3579 files changed, 81222 insertions, 57708 deletions
diff --git a/.gitignore b/.gitignore
index c2ed4ecb0acd..0c39aa20b6ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,6 +33,7 @@
 *.lzo
 *.patch
 *.gcno
+*.ll
 modules.builtin
 Module.symvers
 *.dwo
diff --git a/.mailmap b/.mailmap
index d2aeb146efed..5273cfd70ad6 100644
--- a/.mailmap
+++ b/.mailmap
@@ -146,6 +146,8 @@ Santosh Shilimkar <ssantosh@kernel.org>
 Santosh Shilimkar <santosh.shilimkar@oracle.org>
 Sascha Hauer <s.hauer@pengutronix.de>
 S.Çağlar Onur <caglar@pardus.org.tr>
+Sebastian Reichel <sre@kernel.org> <sre@debian.org>
+Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk>
 Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
 Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
 Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 793acf999e9e..ed3e5e949fce 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -412,6 +412,8 @@ sysctl/
 	- directory with info on the /proc/sys/* files.
 target/
 	- directory with info on generating TCM v4 fabric .ko modules
+tee.txt
+	- info on the TEE subsystem and drivers
 this_cpu_ops.txt
 	- List rationale behind and the way to use this_cpu operations.
 thermal/
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index da5c087462b1..c3c705591532 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -819,7 +819,7 @@ printk(KERN_INFO "my ip: %pI4\n", &amp;ipaddress);
    certain condition is true.  They must be used carefully to ensure
    there is no race condition.  You declare a
    <type>wait_queue_head_t</type>, and then processes which want to
-   wait for that condition declare a <type>wait_queue_t</type>
+   wait for that condition declare a <type>wait_queue_entry_t</type>
    referring to themselves, and place that in the queue.
   </para>
 
diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX
index f773a264ae02..f46980c060aa 100644
--- a/Documentation/RCU/00-INDEX
+++ b/Documentation/RCU/00-INDEX
@@ -17,7 +17,7 @@ rcu_dereference.txt
 rcubarrier.txt
 	- RCU and Unloadable Modules
 rculist_nulls.txt
-	- RCU list primitives for use with SLAB_DESTROY_BY_RCU
+	- RCU list primitives for use with SLAB_TYPESAFE_BY_RCU
 rcuref.txt
 	- Reference-count design for elements of lists/arrays protected by RCU
 rcu.txt
@@ -28,8 +28,6 @@ stallwarn.txt
 	- RCU CPU stall warnings (module parameter rcu_cpu_stall_suppress)
 torture.txt
 	- RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST)
-trace.txt
-	- CONFIG_RCU_TRACE debugfs files and formats
 UP.txt
 	- RCU on Uniprocessor Systems
 whatisRCU.txt
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index d583c653a703..38d6d800761f 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -19,6 +19,8 @@ to each other.
 	The <tt>rcu_state</tt> Structure</a>
 <li>	<a href="#The rcu_node Structure">
 	The <tt>rcu_node</tt> Structure</a>
+<li>	<a href="#The rcu_segcblist Structure">
+	The <tt>rcu_segcblist</tt> Structure</a>
 <li>	<a href="#The rcu_data Structure">
 	The <tt>rcu_data</tt> Structure</a>
 <li>	<a href="#The rcu_dynticks Structure">
@@ -841,6 +843,134 @@ for lockdep lock-class names.
 Finally, lines&nbsp;64-66 produce an error if the maximum number of
 CPUs is too large for the specified fanout.
 
+<h3><a name="The rcu_segcblist Structure">
+The <tt>rcu_segcblist</tt> Structure</a></h3>
+
+The <tt>rcu_segcblist</tt> structure maintains a segmented list of
+callbacks as follows:
+
+<pre>
+ 1 #define RCU_DONE_TAIL        0
+ 2 #define RCU_WAIT_TAIL        1
+ 3 #define RCU_NEXT_READY_TAIL  2
+ 4 #define RCU_NEXT_TAIL        3
+ 5 #define RCU_CBLIST_NSEGS     4
+ 6
+ 7 struct rcu_segcblist {
+ 8   struct rcu_head *head;
+ 9   struct rcu_head **tails[RCU_CBLIST_NSEGS];
+10   unsigned long gp_seq[RCU_CBLIST_NSEGS];
+11   long len;
+12   long len_lazy;
+13 };
+</pre>
+
+<p>
+The segments are as follows:
+
+<ol>
+<li>	<tt>RCU_DONE_TAIL</tt>: Callbacks whose grace periods have elapsed.
+	These callbacks are ready to be invoked.
+<li>	<tt>RCU_WAIT_TAIL</tt>: Callbacks that are waiting for the
+	current grace period.
+	Note that different CPUs can have different ideas about which
+	grace period is current, hence the <tt>-&gt;gp_seq</tt> field.
+<li>	<tt>RCU_NEXT_READY_TAIL</tt>: Callbacks waiting for the next
+	grace period to start.
+<li>	<tt>RCU_NEXT_TAIL</tt>: Callbacks that have not yet been
+	associated with a grace period.
+</ol>
+
+<p>
+The <tt>-&gt;head</tt> pointer references the first callback or
+is <tt>NULL</tt> if the list contains no callbacks (which is
+<i>not</i> the same as being empty).
+Each element of the <tt>-&gt;tails[]</tt> array references the
+<tt>-&gt;next</tt> pointer of the last callback in the corresponding
+segment of the list, or the list's <tt>-&gt;head</tt> pointer if
+that segment and all previous segments are empty.
+If the corresponding segment is empty but some previous segment is
+not empty, then the array element is identical to its predecessor.
+Older callbacks are closer to the head of the list, and new callbacks
+are added at the tail.
+This relationship between the <tt>-&gt;head</tt> pointer, the
+<tt>-&gt;tails[]</tt> array, and the callbacks is shown in this
+diagram:
+
+</p><p><img src="nxtlist.svg" alt="nxtlist.svg" width="40%">
+
+</p><p>In this figure, the <tt>-&gt;head</tt> pointer references the
+first
+RCU callback in the list.
+The <tt>-&gt;tails[RCU_DONE_TAIL]</tt> array element references
+the <tt>-&gt;head</tt> pointer itself, indicating that none
+of the callbacks is ready to invoke.
+The <tt>-&gt;tails[RCU_WAIT_TAIL]</tt> array element references callback
+CB&nbsp;2's <tt>-&gt;next</tt> pointer, which indicates that
+CB&nbsp;1 and CB&nbsp;2 are both waiting on the current grace period,
+give or take possible disagreements about exactly which grace period
+is the current one.
+The <tt>-&gt;tails[RCU_NEXT_READY_TAIL]</tt> array element
+references the same RCU callback that <tt>-&gt;tails[RCU_WAIT_TAIL]</tt>
+does, which indicates that there are no callbacks waiting on the next
+RCU grace period.
+The <tt>-&gt;tails[RCU_NEXT_TAIL]</tt> array element references
+CB&nbsp;4's <tt>-&gt;next</tt> pointer, indicating that all the
+remaining RCU callbacks have not yet been assigned to an RCU grace
+period.
+Note that the <tt>-&gt;tails[RCU_NEXT_TAIL]</tt> array element
+always references the last RCU callback's <tt>-&gt;next</tt> pointer
+unless the callback list is empty, in which case it references
+the <tt>-&gt;head</tt> pointer.
+
+<p>
+There is one additional important special case for the
+<tt>-&gt;tails[RCU_NEXT_TAIL]</tt> array element: It can be <tt>NULL</tt>
+when this list is <i>disabled</i>.
+Lists are disabled when the corresponding CPU is offline or when
+the corresponding CPU's callbacks are offloaded to a kthread,
+both of which are described elsewhere.
+
+</p><p>CPUs advance their callbacks from the
+<tt>RCU_NEXT_TAIL</tt> to the <tt>RCU_NEXT_READY_TAIL</tt> to the
+<tt>RCU_WAIT_TAIL</tt> to the <tt>RCU_DONE_TAIL</tt> list segments
+as grace periods advance.
+
+</p><p>The <tt>-&gt;gp_seq[]</tt> array records grace-period
+numbers corresponding to the list segments.
+This is what allows different CPUs to have different ideas as to
+which is the current grace period while still avoiding premature
+invocation of their callbacks.
+In particular, this allows CPUs that go idle for extended periods
+to determine which of their callbacks are ready to be invoked after
+reawakening.
+
+</p><p>The <tt>-&gt;len</tt> counter contains the number of
+callbacks in <tt>-&gt;head</tt>, and the
+<tt>-&gt;len_lazy</tt> contains the number of those callbacks that
+are known to only free memory, and whose invocation can therefore
+be safely deferred.
+
+<p><b>Important note</b>: It is the <tt>-&gt;len</tt> field that
+determines whether or not there are callbacks associated with
+this <tt>rcu_segcblist</tt> structure, <i>not</i> the <tt>-&gt;head</tt>
+pointer.
+The reason for this is that all the ready-to-invoke callbacks
+(that is, those in the <tt>RCU_DONE_TAIL</tt> segment) are extracted
+all at once at callback-invocation time.
+If callback invocation must be postponed, for example, because a
+high-priority process just woke up on this CPU, then the remaining
+callbacks are placed back on the <tt>RCU_DONE_TAIL</tt> segment.
+Either way, the <tt>-&gt;len</tt> and <tt>-&gt;len_lazy</tt> counts
+are adjusted after the corresponding callbacks have been invoked, and so
+again it is the <tt>-&gt;len</tt> count that accurately reflects whether
+or not there are callbacks associated with this <tt>rcu_segcblist</tt>
+structure.
+Of course, off-CPU sampling of the <tt>-&gt;len</tt> count requires
+the use of appropriate synchronization, for example, memory barriers.
+This synchronization can be a bit subtle, particularly in the case
+of <tt>rcu_barrier()</tt>.
+
 <h3><a name="The rcu_data Structure">
 The <tt>rcu_data</tt> Structure</a></h3>
 
@@ -983,62 +1113,18 @@ choice.
 as follows:
 
 <pre>
- 1 struct rcu_head *nxtlist;
- 2 struct rcu_head **nxttail[RCU_NEXT_SIZE];
- 3 unsigned long nxtcompleted[RCU_NEXT_SIZE];
- 4 long qlen_lazy;
- 5 long qlen;
- 6 long qlen_last_fqs_check;
+ 1 struct rcu_segcblist cblist;
+ 2 long qlen_last_fqs_check;
+ 3 unsigned long n_cbs_invoked;
+ 4 unsigned long n_nocbs_invoked;
+ 5 unsigned long n_cbs_orphaned;
+ 6 unsigned long n_cbs_adopted;
  7 unsigned long n_force_qs_snap;
- 8 unsigned long n_cbs_invoked;
- 9 unsigned long n_cbs_orphaned;
-10 unsigned long n_cbs_adopted;
-11 long blimit;
+ 8 long blimit;
 </pre>
 
-<p>The <tt>-&gt;nxtlist</tt> pointer and the
-<tt>-&gt;nxttail[]</tt> array form a four-segment list with
-older callbacks near the head and newer ones near the tail.
-Each segment contains callbacks with the corresponding relationship
-to the current grace period.
-The pointer out of the end of each of the four segments is referenced
-by the element of the <tt>-&gt;nxttail[]</tt> array indexed by
-<tt>RCU_DONE_TAIL</tt> (for callbacks handled by a prior grace period),
-<tt>RCU_WAIT_TAIL</tt> (for callbacks waiting on the current grace period),
-<tt>RCU_NEXT_READY_TAIL</tt> (for callbacks that will wait on the next
-grace period), and
-<tt>RCU_NEXT_TAIL</tt> (for callbacks that are not yet associated
-with a specific grace period)
-respectively, as shown in the following figure.
-
-</p><p><img src="nxtlist.svg" alt="nxtlist.svg" width="40%">
-
-</p><p>In this figure, the <tt>-&gt;nxtlist</tt> pointer references the
-first
-RCU callback in the list.
-The <tt>-&gt;nxttail[RCU_DONE_TAIL]</tt> array element references
-the <tt>-&gt;nxtlist</tt> pointer itself, indicating that none
-of the callbacks is ready to invoke.
-The <tt>-&gt;nxttail[RCU_WAIT_TAIL]</tt> array element references callback
-CB&nbsp;2's <tt>-&gt;next</tt> pointer, which indicates that
-CB&nbsp;1 and CB&nbsp;2 are both waiting on the current grace period.
-The <tt>-&gt;nxttail[RCU_NEXT_READY_TAIL]</tt> array element
-references the same RCU callback that <tt>-&gt;nxttail[RCU_WAIT_TAIL]</tt>
-does, which indicates that there are no callbacks waiting on the next
-RCU grace period.
-The <tt>-&gt;nxttail[RCU_NEXT_TAIL]</tt> array element references
-CB&nbsp;4's <tt>-&gt;next</tt> pointer, indicating that all the
-remaining RCU callbacks have not yet been assigned to an RCU grace
-period.
-Note that the <tt>-&gt;nxttail[RCU_NEXT_TAIL]</tt> array element
-always references the last RCU callback's <tt>-&gt;next</tt> pointer
-unless the callback list is empty, in which case it references
-the <tt>-&gt;nxtlist</tt> pointer.
-
-</p><p>CPUs advance their callbacks from the
-<tt>RCU_NEXT_TAIL</tt> to the <tt>RCU_NEXT_READY_TAIL</tt> to the
-<tt>RCU_WAIT_TAIL</tt> to the <tt>RCU_DONE_TAIL</tt> list segments
-as grace periods advance.
+<p>The <tt>-&gt;cblist</tt> structure is the segmented callback list
+described earlier.
 The CPU advances the callbacks in its <tt>rcu_data</tt> structure
 whenever it notices that another RCU grace period has completed.
 The CPU detects the completion of an RCU grace period by noticing
@@ -1049,16 +1135,7 @@ Recall that each <tt>rcu_node</tt> structure's
 <tt>-&gt;completed</tt> field is updated at the end of each
 grace period.
 
-</p><p>The <tt>-&gt;nxtcompleted[]</tt> array records grace-period
-numbers corresponding to the list segments.
-This allows CPUs that go idle for extended periods to determine
-which of their callbacks are ready to be invoked after reawakening.
-
-</p><p>The <tt>-&gt;qlen</tt> counter contains the number of
-callbacks in <tt>-&gt;nxtlist</tt>, and the
-<tt>-&gt;qlen_lazy</tt> contains the number of those callbacks that
-are known to only free memory, and whose invocation can therefore
-be safely deferred.
+<p>
 The <tt>-&gt;qlen_last_fqs_check</tt> and
 <tt>-&gt;n_force_qs_snap</tt> coordinate the forcing of quiescent
 states from <tt>call_rcu()</tt> and friends when callback
@@ -1069,6 +1146,10 @@ lists grow excessively long.
 fields count the number of callbacks invoked,
 sent to other CPUs when this CPU goes offline,
 and received from other CPUs when those other CPUs go offline.
+The <tt>-&gt;n_nocbs_invoked</tt> is used when the CPU's callbacks
+are offloaded to a kthread.
+
+<p>
 Finally, the <tt>-&gt;blimit</tt> counter is the maximum number of
 RCU callbacks that may be invoked at a given time.
 
@@ -1104,6 +1185,9 @@ Its fields are as follows:
   1   int dynticks_nesting;
   2   int dynticks_nmi_nesting;
   3   atomic_t dynticks;
+  4   bool rcu_need_heavy_qs;
+  5   unsigned long rcu_qs_ctr;
+  6   bool rcu_urgent_qs;
 </pre>
 
 <p>The <tt>-&gt;dynticks_nesting</tt> field counts the
@@ -1117,11 +1201,32 @@ NMIs are counted by the <tt>-&gt;dynticks_nmi_nesting</tt>
 field, except that NMIs that interrupt non-dyntick-idle execution
 are not counted.
 
-</p><p>Finally, the <tt>-&gt;dynticks</tt> field counts the corresponding
+</p><p>The <tt>-&gt;dynticks</tt> field counts the corresponding
 CPU's transitions to and from dyntick-idle mode, so that this counter
 has an even value when the CPU is in dyntick-idle mode and an odd
 value otherwise.
 
+</p><p>The <tt>-&gt;rcu_need_heavy_qs</tt> field is used
+to record the fact that the RCU core code would really like to
+see a quiescent state from the corresponding CPU, so much so that
+it is willing to call for heavy-weight dyntick-counter operations.
+This flag is checked by RCU's context-switch and <tt>cond_resched()</tt>
+code, which provide a momentary idle sojourn in response.
+
+</p><p>The <tt>-&gt;rcu_qs_ctr</tt> field is used to record
+quiescent states from <tt>cond_resched()</tt>.
+Because <tt>cond_resched()</tt> can execute quite frequently, this
+must be quite lightweight, as in a non-atomic increment of this
+per-CPU field.
+
+</p><p>Finally, the <tt>-&gt;rcu_urgent_qs</tt> field is used to record
+the fact that the RCU core code would really like to see a quiescent
+state from the corresponding CPU, with the various other fields indicating
+just how badly RCU wants this quiescent state.
+This flag is checked by RCU's context-switch and <tt>cond_resched()</tt>
+code, which, if nothing else, non-atomically increment <tt>-&gt;rcu_qs_ctr</tt>
+in response.
+
 <table>
 <tr><th>&nbsp;</th></tr>
 <tr><th align="left">Quick Quiz:</th></tr>
diff --git a/Documentation/RCU/Design/Data-Structures/nxtlist.svg b/Documentation/RCU/Design/Data-Structures/nxtlist.svg
index abc4cc73a097..0223e79c38e0 100644
--- a/Documentation/RCU/Design/Data-Structures/nxtlist.svg
+++ b/Documentation/RCU/Design/Data-Structures/nxtlist.svg
@@ -19,7 +19,7 @@
    id="svg2"
    version="1.1"
    inkscape:version="0.48.4 r9939"
-   sodipodi:docname="nxtlist.fig">
+   sodipodi:docname="segcblist.svg">
   <metadata
      id="metadata94">
     <rdf:RDF>
@@ -28,7 +28,7 @@
         <dc:format>image/svg+xml</dc:format>
         <dc:type
            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title></dc:title>
+        <dc:title />
       </cc:Work>
     </rdf:RDF>
   </metadata>
@@ -241,61 +241,51 @@
        xml:space="preserve"
        x="225"
        y="675"
-       fill="#000000"
-       font-family="Courier"
        font-style="normal"
        font-weight="bold"
        font-size="324"
-       text-anchor="start"
-       id="text64">nxtlist</text>
+       id="text64"
+       style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;head</text>
     <!-- Text -->
     <text
        xml:space="preserve"
        x="225"
        y="1800"
-       fill="#000000"
-       font-family="Courier"
        font-style="normal"
        font-weight="bold"
        font-size="324"
-       text-anchor="start"
-       id="text66">nxttail[RCU_DONE_TAIL]</text>
+       id="text66"
+       style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;tails[RCU_DONE_TAIL]</text>
     <!-- Text -->
     <text
        xml:space="preserve"
        x="225"
        y="2925"
-       fill="#000000"
-       font-family="Courier"
        font-style="normal"
        font-weight="bold"
        font-size="324"
-       text-anchor="start"
-       id="text68">nxttail[RCU_WAIT_TAIL]</text>
+       id="text68"
+       style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;tails[RCU_WAIT_TAIL]</text>
     <!-- Text -->
     <text
        xml:space="preserve"
        x="225"
        y="4050"
-       fill="#000000"
-       font-family="Courier"
        font-style="normal"
        font-weight="bold"
        font-size="324"
-       text-anchor="start"
-       id="text70">nxttail[RCU_NEXT_READY_TAIL]</text>
+       id="text70"
+       style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;tails[RCU_NEXT_READY_TAIL]</text>
     <!-- Text -->
     <text
        xml:space="preserve"
        x="225"
        y="5175"
-       fill="#000000"
-       font-family="Courier"
        font-style="normal"
        font-weight="bold"
        font-size="324"
-       text-anchor="start"
-       id="text72">nxttail[RCU_NEXT_TAIL]</text>
+       id="text72"
+       style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;tails[RCU_NEXT_TAIL]</text>
     <!-- Text -->
     <text
        xml:space="preserve"
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
index 7a3194c5559a..e5d0bbd0230b 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
@@ -284,6 +284,7 @@ Expedited Grace Period Refinements</a></h2>
 	Funnel locking and wait/wakeup</a>.
 <li>	<a href="#Use of Workqueues">Use of Workqueues</a>.
 <li>	<a href="#Stall Warnings">Stall warnings</a>.
+<li>	<a href="#Mid-Boot Operation">Mid-boot operation</a>.
 </ol>
 
 <h3><a name="Idle-CPU Checks">Idle-CPU Checks</a></h3>
@@ -524,7 +525,7 @@ their grace periods and carrying out their wakeups.
 In earlier implementations, the task requesting the expedited
 grace period also drove it to completion.
 This straightforward approach had the disadvantage of needing to
-account for signals sent to user tasks,
+account for POSIX signals sent to user tasks,
 so more recent implemementations use the Linux kernel's
 <a href="https://www.kernel.org/doc/Documentation/workqueue.txt">workqueues</a>.
 
@@ -533,8 +534,8 @@ The requesting task still does counter snapshotting and funnel-lock
 processing, but the task reaching the top of the funnel lock
 does a <tt>schedule_work()</tt> (from <tt>_synchronize_rcu_expedited()</tt>
 so that a workqueue kthread does the actual grace-period processing.
-Because workqueue kthreads do not accept signals, grace-period-wait
-processing need not allow for signals.
+Because workqueue kthreads do not accept POSIX signals, grace-period-wait
+processing need not allow for POSIX signals.
 
 In addition, this approach allows wakeups for the previous expedited
 grace period to be overlapped with processing for the next expedited
@@ -586,6 +587,46 @@ blocking the current grace period are printed.
 Each stall warning results in another pass through the loop, but the
 second and subsequent passes use longer stall times.
 
+<h3><a name="Mid-Boot Operation">Mid-boot operation</a></h3>
+
+<p>
+The use of workqueues has the advantage that the expedited
+grace-period code need not worry about POSIX signals.
+Unfortunately, it has the
+corresponding disadvantage that workqueues cannot be used until
+they are initialized, which does not happen until some time after
+the scheduler spawns the first task.
+Given that there are parts of the kernel that really do want to
+execute grace periods during this mid-boot &ldquo;dead zone&rdquo;,
+expedited grace periods must do something else during thie time.
+
+<p>
+What they do is to fall back to the old practice of requiring that the
+requesting task drive the expedited grace period, as was the case
+before the use of workqueues.
+However, the requesting task is only required to drive the grace period
+during the mid-boot dead zone.
+Before mid-boot, a synchronous grace period is a no-op.
+Some time after mid-boot, workqueues are used.
+
+<p>
+Non-expedited non-SRCU synchronous grace periods must also operate
+normally during mid-boot.
+This is handled by causing non-expedited grace periods to take the
+expedited code path during mid-boot.
+
+<p>
+The current code assumes that there are no POSIX signals during
+the mid-boot dead zone.
+However, if an overwhelming need for POSIX signals somehow arises,
+appropriate adjustments can be made to the expedited stall-warning code.
+One such adjustment would reinstate the pre-workqueue stall-warning
+checks, but only during the mid-boot dead zone.
+
+<p>
+With this refinement, synchronous grace periods can now be used from
+task context pretty much any time during the life of the kernel.
+
 <h3><a name="Summary">
 Summary</a></h3>
 
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index 21593496aca6..95b30fa25d56 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -559,9 +559,7 @@ The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
 	For <tt>remove_gp_synchronous()</tt>, as long as all modifications
 	to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
 	the above optimizations are harmless.
-	However,
-	with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
-	<tt>sparse</tt> will complain if you
+	However, <tt>sparse</tt> will complain if you
 	define <tt>gp</tt> with <tt>__rcu</tt> and then
 	access it without using
 	either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
@@ -659,8 +657,9 @@ systems with more than one CPU:
 	In other words, a given instance of <tt>synchronize_rcu()</tt>
 	can avoid waiting on a given RCU read-side critical section only
 	if it can prove that <tt>synchronize_rcu()</tt> started first.
+	</font>
 
-	<p>
+	<p><font color="ffffff">
 	A related question is &ldquo;When <tt>rcu_read_lock()</tt>
 	doesn't generate any code, why does it matter how it relates
 	to a grace period?&rdquo;
@@ -675,8 +674,9 @@ systems with more than one CPU:
 	within the critical section, in which case none of the accesses
 	within the critical section may observe the effects of any
 	access following the grace period.
+	</font>
 
-	<p>
+	<p><font color="ffffff">
 	As of late 2016, mathematical models of RCU take this
 	viewpoint, for example, see slides&nbsp;62 and&nbsp;63
 	of the
@@ -1616,8 +1616,8 @@ CPUs should at least make reasonable forward progress.
 In return for its shorter latencies, <tt>synchronize_rcu_expedited()</tt>
 is permitted to impose modest degradation of real-time latency
 on non-idle online CPUs.
-That said, it will likely be necessary to take further steps to reduce this
-degradation, hopefully to roughly that of a scheduling-clock interrupt.
+Here, &ldquo;modest&rdquo; means roughly the same latency
+degradation as a scheduling-clock interrupt.
 
 <p>
 There are a number of situations where even
@@ -1847,7 +1847,8 @@ mass storage, or user patience, whichever comes first.
 If the nesting is not visible to the compiler, as is the case with
 mutually recursive functions each in its own translation unit,
 stack overflow will result.
-If the nesting takes the form of loops, either the control variable
+If the nesting takes the form of loops, perhaps in the guise of tail
+recursion, either the control variable
 will overflow or (in the Linux kernel) you will get an RCU CPU stall warning.
 Nevertheless, this class of RCU implementations is one
 of the most composable constructs in existence.
@@ -1913,12 +1914,9 @@ This requirement is another factor driving batching of grace periods,
 but it is also the driving force behind the checks for large numbers
 of queued RCU callbacks in the <tt>call_rcu()</tt> code path.
 Finally, high update rates should not delay RCU read-side critical
-sections, although some read-side delays can occur when using
+sections, although some small read-side delays can occur when using
 <tt>synchronize_rcu_expedited()</tt>, courtesy of this function's use
-of <tt>try_stop_cpus()</tt>.
-(In the future, <tt>synchronize_rcu_expedited()</tt> will be
-converted to use lighter-weight inter-processor interrupts (IPIs),
-but this will still disturb readers, though to a much smaller degree.)
+of <tt>smp_call_function_single()</tt>.
 
 <p>
 Although all three of these corner cases were understood in the early
@@ -1978,9 +1976,8 @@ guard against mishaps and misuse:
 	and <tt>rcu_dereference()</tt>, perhaps (incorrectly)
 	substituting a simple assignment.
 	To catch this sort of error, a given RCU-protected pointer may be
-	tagged with <tt>__rcu</tt>, after which running sparse
-	with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt> will complain
-	about simple-assignment accesses to that pointer.
+	tagged with <tt>__rcu</tt>, after which sparse
+	will complain about simple-assignment accesses to that pointer.
 	Arnd Bergmann made me aware of this requirement, and also
 	supplied the needed
 	<a href="https://lwn.net/Articles/376011/">patch series</a>.
@@ -2037,7 +2034,7 @@ guard against mishaps and misuse:
 	some other synchronization mechanism, for example, reference
 	counting.
 <li>	In kernels built with <tt>CONFIG_RCU_TRACE=y</tt>, RCU-related
-	information is provided via both debugfs and event tracing.
+	information is provided via event tracing.
 <li>	Open-coded use of <tt>rcu_assign_pointer()</tt> and
 	<tt>rcu_dereference()</tt> to create typical linked
 	data structures can be surprisingly error-prone.
@@ -2154,7 +2151,8 @@ as will <tt>rcu_assign_pointer()</tt>.
 <p>
 Although <tt>call_rcu()</tt> may be invoked at any
 time during boot, callbacks are not guaranteed to be invoked until after
-the scheduler is fully up and running.
+all of RCU's kthreads have been spawned, which occurs at
+<tt>early_initcall()</tt> time.
 This delay in callback invocation is due to the fact that RCU does not
 invoke callbacks until it is fully initialized, and this full initialization
 cannot occur until after the scheduler has initialized itself to the
@@ -2167,8 +2165,10 @@ on what operations those callbacks could invoke.
 Perhaps surprisingly, <tt>synchronize_rcu()</tt>,
 <a href="#Bottom-Half Flavor"><tt>synchronize_rcu_bh()</tt></a>
 (<a href="#Bottom-Half Flavor">discussed below</a>),
-and
-<a href="#Sched Flavor"><tt>synchronize_sched()</tt></a>
+<a href="#Sched Flavor"><tt>synchronize_sched()</tt></a>,
+<tt>synchronize_rcu_expedited()</tt>,
+<tt>synchronize_rcu_bh_expedited()</tt>, and
+<tt>synchronize_sched_expedited()</tt>
 will all operate normally
 during very early boot, the reason being that there is only one CPU
 and preemption is disabled.
@@ -2178,45 +2178,59 @@ state and thus a grace period, so the early-boot implementation can
 be a no-op.
 
 <p>
-Both <tt>synchronize_rcu_bh()</tt> and <tt>synchronize_sched()</tt>
-continue to operate normally through the remainder of boot, courtesy
-of the fact that preemption is disabled across their RCU read-side
-critical sections and also courtesy of the fact that there is still
-only one CPU.
-However, once the scheduler starts initializing, preemption is enabled.
-There is still only a single CPU, but the fact that preemption is enabled
-means that the no-op implementation of <tt>synchronize_rcu()</tt> no
-longer works in <tt>CONFIG_PREEMPT=y</tt> kernels.
-Therefore, as soon as the scheduler starts initializing, the early-boot
-fastpath is disabled.
-This means that <tt>synchronize_rcu()</tt> switches to its runtime
-mode of operation where it posts callbacks, which in turn means that
-any call to <tt>synchronize_rcu()</tt> will block until the corresponding
-callback is invoked.
-Unfortunately, the callback cannot be invoked until RCU's runtime
-grace-period machinery is up and running, which cannot happen until
-the scheduler has initialized itself sufficiently to allow RCU's
-kthreads to be spawned.
-Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler
-initialization can result in deadlock.
+However, once the scheduler has spawned its first kthread, this early
+boot trick fails for <tt>synchronize_rcu()</tt> (as well as for
+<tt>synchronize_rcu_expedited()</tt>) in <tt>CONFIG_PREEMPT=y</tt>
+kernels.
+The reason is that an RCU read-side critical section might be preempted,
+which means that a subsequent <tt>synchronize_rcu()</tt> really does have
+to wait for something, as opposed to simply returning immediately.
+Unfortunately, <tt>synchronize_rcu()</tt> can't do this until all of
+its kthreads are spawned, which doesn't happen until some time during
+<tt>early_initcalls()</tt> time.
+But this is no excuse:  RCU is nevertheless required to correctly handle
+synchronous grace periods during this time period.
+Once all of its kthreads are up and running, RCU starts running
+normally.
 
 <table>
 <tr><th>&nbsp;</th></tr>
 <tr><th align="left">Quick Quiz:</th></tr>
 <tr><td>
-	So what happens with <tt>synchronize_rcu()</tt> during
-	scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
-	kernels?
+	How can RCU possibly handle grace periods before all of its
+	kthreads have been spawned???
 </td></tr>
 <tr><th align="left">Answer:</th></tr>
 <tr><td bgcolor="#ffffff"><font color="ffffff">
-	In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
-	maps directly to <tt>synchronize_sched()</tt>.
-	Therefore, <tt>synchronize_rcu()</tt> works normally throughout
-	boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
-	However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
-	so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
-	during scheduler initialization.
+	Very carefully!
+	</font>
+
+	<p><font color="ffffff">
+	During the &ldquo;dead zone&rdquo; between the time that the
+	scheduler spawns the first task and the time that all of RCU's
+	kthreads have been spawned, all synchronous grace periods are
+	handled by the expedited grace-period mechanism.
+	At runtime, this expedited mechanism relies on workqueues, but
+	during the dead zone the requesting task itself drives the
+	desired expedited grace period.
+	Because dead-zone execution takes place within task context,
+	everything works.
+	Once the dead zone ends, expedited grace periods go back to
+	using workqueues, as is required to avoid problems that would
+	otherwise occur when a user task received a POSIX signal while
+	driving an expedited grace period.
+	</font>
+
+	<p><font color="ffffff">
+	And yes, this does mean that it is unhelpful to send POSIX
+	signals to random tasks between the time that the scheduler
+	spawns its first kthread and the time that RCU's kthreads
+	have all been spawned.
+	If there ever turns out to be a good reason for sending POSIX
+	signals during that time, appropriate adjustments will be made.
+	(If it turns out that POSIX signals are sent during this time for
+	no good reason, other adjustments will be made, appropriate
+	or otherwise.)
 </font></td></tr>
 <tr><td>&nbsp;</td></tr>
 </table>
@@ -2295,12 +2309,61 @@ situation, and Dipankar Sarma incorporated <tt>rcu_barrier()</tt> into RCU.
 The need for <tt>rcu_barrier()</tt> for module unloading became
 apparent later.
 
+<p>
+<b>Important note</b>: The <tt>rcu_barrier()</tt> function is not,
+repeat, <i>not</i>, obligated to wait for a grace period.
+It is instead only required to wait for RCU callbacks that have
+already been posted.
+Therefore, if there are no RCU callbacks posted anywhere in the system,
+<tt>rcu_barrier()</tt> is within its rights to return immediately.
+Even if there are callbacks posted, <tt>rcu_barrier()</tt> does not
+necessarily need to wait for a grace period.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Wait a minute!
+	Each RCU callbacks must wait for a grace period to complete,
+	and <tt>rcu_barrier()</tt> must wait for each pre-existing
+	callback to be invoked.
+	Doesn't <tt>rcu_barrier()</tt> therefore need to wait for
+	a full grace period if there is even one callback posted anywhere
+	in the system?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Absolutely not!!!
+	</font>
+
+	<p><font color="ffffff">
+	Yes, each RCU callbacks must wait for a grace period to complete,
+	but it might well be partly (or even completely) finished waiting
+	by the time <tt>rcu_barrier()</tt> is invoked.
+	In that case, <tt>rcu_barrier()</tt> need only wait for the
+	remaining portion of the grace period to elapse.
+	So even if there are quite a few callbacks posted,
+	<tt>rcu_barrier()</tt> might well return quite quickly.
+	</font>
+
+	<p><font color="ffffff">
+	So if you need to wait for a grace period as well as for all
+	pre-existing callbacks, you will need to invoke both
+	<tt>synchronize_rcu()</tt> and <tt>rcu_barrier()</tt>.
+	If latency is a concern, you can always use workqueues
+	to invoke them concurrently.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
 <h3><a name="Hotplug CPU">Hotplug CPU</a></h3>
 
 <p>
 The Linux kernel supports CPU hotplug, which means that CPUs
 can come and go.
-It is of course illegal to use any RCU API member from an offline CPU.
+It is of course illegal to use any RCU API member from an offline CPU,
+with the exception of <a href="#Sleepable RCU">SRCU</a> read-side
+critical sections.
 This requirement was present from day one in DYNIX/ptx, but
 on the other hand, the Linux kernel's CPU-hotplug implementation
 is &ldquo;interesting.&rdquo;
@@ -2310,19 +2373,18 @@ The Linux-kernel CPU-hotplug implementation has notifiers that
 are used to allow the various kernel subsystems (including RCU)
 to respond appropriately to a given CPU-hotplug operation.
 Most RCU operations may be invoked from CPU-hotplug notifiers,
-including even normal synchronous grace-period operations
-such as <tt>synchronize_rcu()</tt>.
-However, expedited grace-period operations such as
-<tt>synchronize_rcu_expedited()</tt> are not supported,
-due to the fact that current implementations block CPU-hotplug
-operations, which could result in deadlock.
+including even synchronous grace-period operations such as
+<tt>synchronize_rcu()</tt> and <tt>synchronize_rcu_expedited()</tt>.
 
 <p>
-In addition, all-callback-wait operations such as
+However, all-callback-wait operations such as
 <tt>rcu_barrier()</tt> are also not supported, due to the
 fact that there are phases of CPU-hotplug operations where
 the outgoing CPU's callbacks will not be invoked until after
 the CPU-hotplug operation ends, which could also result in deadlock.
+Furthermore, <tt>rcu_barrier()</tt> blocks CPU-hotplug operations
+during its execution, which results in another type of deadlock
+when invoked from a CPU-hotplug notifier.
 
 <h3><a name="Scheduler and RCU">Scheduler and RCU</a></h3>
 
@@ -2455,11 +2517,7 @@ It is similarly socially unacceptable to interrupt an
 <tt>nohz_full</tt> CPU running in userspace.
 RCU must therefore track <tt>nohz_full</tt> userspace
 execution.
-And in
-<a href="https://lwn.net/Articles/558284/"><tt>CONFIG_NO_HZ_FULL_SYSIDLE=y</tt></a>
-kernels, RCU must separately track idle CPUs on the one hand and
-CPUs that are either idle or executing in userspace on the other.
-In both cases, RCU must be able to sample state at two points in
+RCU must therefore be able to sample state at two points in
 time, and be able to determine whether or not some other CPU spent
 any time idle and/or executing in userspace.
 
@@ -2864,6 +2922,41 @@ API, which, in combination with <tt>srcu_read_unlock()</tt>,
 guarantees a full memory barrier.
 
 <p>
+Also unlike other RCU flavors, SRCU's callbacks-wait function
+<tt>srcu_barrier()</tt> may be invoked from CPU-hotplug notifiers,
+though this is not necessarily a good idea.
+The reason that this is possible is that SRCU is insensitive
+to whether or not a CPU is online, which means that <tt>srcu_barrier()</tt>
+need not exclude CPU-hotplug operations.
+
+<p>
+SRCU also differs from other RCU flavors in that SRCU's expedited and
+non-expedited grace periods are implemented by the same mechanism.
+This means that in the current SRCU implementation, expediting a
+future grace period has the side effect of expediting all prior
+grace periods that have not yet completed.
+(But please note that this is a property of the current implementation,
+not necessarily of future implementations.)
+In addition, if SRCU has been idle for longer than the interval
+specified by the <tt>srcutree.exp_holdoff</tt> kernel boot parameter
+(25&nbsp;microseconds by default),
+and if a <tt>synchronize_srcu()</tt> invocation ends this idle period,
+that invocation will be automatically expedited.
+
+<p>
+As of v4.12, SRCU's callbacks are maintained per-CPU, eliminating
+a locking bottleneck present in prior kernel versions.
+Although this will allow users to put much heavier stress on
+<tt>call_srcu()</tt>, it is important to note that SRCU does not
+yet take any special steps to deal with callback flooding.
+So if you are posting (say) 10,000 SRCU callbacks per second per CPU,
+you are probably totally OK, but if you intend to post (say) 1,000,000
+SRCU callbacks per second per CPU, please run some tests first.
+SRCU just might need a few adjustment to deal with that sort of load.
+Of course, your mileage may vary based on the speed of your CPUs and
+the size of your memory.
+
+<p>
 The
 <a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">SRCU API</a>
 includes
@@ -3021,8 +3114,8 @@ to do some redesign to avoid this scalability problem.
 
 <p>
 RCU disables CPU hotplug in a few places, perhaps most notably in the
-expedited grace-period and <tt>rcu_barrier()</tt> operations.
-If there is a strong reason to use expedited grace periods in CPU-hotplug
+<tt>rcu_barrier()</tt> operations.
+If there is a strong reason to use <tt>rcu_barrier()</tt> in CPU-hotplug
 notifiers, it will be necessary to avoid disabling CPU hotplug.
 This would introduce some complexity, so there had better be a <i>very</i>
 good reason.
@@ -3096,9 +3189,5 @@ Andy Lutomirski for their help in rendering
 this article human readable, and to Michelle Rankin for her support
 of this effort.
 Other contributions are acknowledged in the Linux kernel's git archive.
-The cartoon is copyright (c) 2013 by Melissa Broussard,
-and is provided
-under the terms of the Creative Commons Attribution-Share Alike 3.0
-United States license.
 
 </body></html>
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 877947130ebe..6beda556faf3 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -413,11 +413,11 @@ over a rather long period of time, but improvements are always welcome!
 	read-side critical sections.  It is the responsibility of the
 	RCU update-side primitives to deal with this.
 
-17.	Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
-	__rcu sparse checks (enabled by CONFIG_SPARSE_RCU_POINTER) to
-	validate your RCU code.  These can help find problems as follows:
+17.	Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
+	__rcu sparse checks to validate your RCU code.	These can help
+	find problems as follows:
 
-	CONFIG_PROVE_RCU: check that accesses to RCU-protected data
+	CONFIG_PROVE_LOCKING: check that accesses to RCU-protected data
 		structures are carried out under the proper RCU
 		read-side critical section, while holding the right
 		combination of locks, or whatever other conditions
diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt
index c0bf2441a2ba..b2a613f16d74 100644
--- a/Documentation/RCU/rcu_dereference.txt
+++ b/Documentation/RCU/rcu_dereference.txt
@@ -138,6 +138,15 @@ o	Be very careful about comparing pointers obtained from
 		This sort of comparison occurs frequently when scanning
 		RCU-protected circular linked lists.
 
+		Note that if checks for being within an RCU read-side
+		critical section are not required and the pointer is never
+		dereferenced, rcu_access_pointer() should be used in place
+		of rcu_dereference(). The rcu_access_pointer() primitive
+		does not require an enclosing read-side critical section,
+		and also omits the smp_read_barrier_depends() included in
+		rcu_dereference(), which in turn should provide a small
+		performance gain in some CPUs (e.g., the DEC Alpha).
+
 	o	The comparison is against a pointer that references memory
 		that was initialized "a long time ago."  The reason
 		this is safe is that even if misordering occurs, the
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
index 18f9651ff23d..8151f0195f76 100644
--- a/Documentation/RCU/rculist_nulls.txt
+++ b/Documentation/RCU/rculist_nulls.txt
@@ -1,5 +1,5 @@
 Using hlist_nulls to protect read-mostly linked lists and
-objects using SLAB_DESTROY_BY_RCU allocations.
+objects using SLAB_TYPESAFE_BY_RCU allocations.
 
 Please read the basics in Documentation/RCU/listRCU.txt
 
@@ -7,7 +7,7 @@ Using special makers (called 'nulls') is a convenient way
 to solve following problem :
 
 A typical RCU linked list managing objects which are
-allocated with SLAB_DESTROY_BY_RCU kmem_cache can
+allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can
 use following algos :
 
 1) Lookup algo
@@ -96,7 +96,7 @@ unlock_chain(); // typically a spin_unlock()
 3) Remove algo
 --------------
 Nothing special here, we can use a standard RCU hlist deletion.
-But thanks to SLAB_DESTROY_BY_RCU, beware a deleted object can be reused
+But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused
 very very fast (before the end of RCU grace period)
 
 if (put_last_reference_on(obj) {
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index e93d04133fe7..96a3d81837e1 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -1,9 +1,102 @@
 Using RCU's CPU Stall Detector
 
-The rcu_cpu_stall_suppress module parameter enables RCU's CPU stall
-detector, which detects conditions that unduly delay RCU grace periods.
-This module parameter enables CPU stall detection by default, but
-may be overridden via boot-time parameter or at runtime via sysfs.
+This document first discusses what sorts of issues RCU's CPU stall
+detector can locate, and then discusses kernel parameters and Kconfig
+options that can be used to fine-tune the detector's operation.  Finally,
+this document explains the stall detector's "splat" format.
+
+
+What Causes RCU CPU Stall Warnings?
+
+So your kernel printed an RCU CPU stall warning.  The next question is
+"What caused it?"  The following problems can result in RCU CPU stall
+warnings:
+
+o	A CPU looping in an RCU read-side critical section.
+
+o	A CPU looping with interrupts disabled.
+
+o	A CPU looping with preemption disabled.  This condition can
+	result in RCU-sched stalls and, if ksoftirqd is in use, RCU-bh
+	stalls.
+
+o	A CPU looping with bottom halves disabled.  This condition can
+	result in RCU-sched and RCU-bh stalls.
+
+o	For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the
+	kernel without invoking schedule().  Note that cond_resched()
+	does not necessarily prevent RCU CPU stall warnings.  Therefore,
+	if the looping in the kernel is really expected and desirable
+	behavior, you might need to replace some of the cond_resched()
+	calls with calls to cond_resched_rcu_qs().
+
+o	Booting Linux using a console connection that is too slow to
+	keep up with the boot-time console-message rate.  For example,
+	a 115Kbaud serial console can be -way- too slow to keep up
+	with boot-time message rates, and will frequently result in
+	RCU CPU stall warning messages.  Especially if you have added
+	debug printk()s.
+
+o	Anything that prevents RCU's grace-period kthreads from running.
+	This can result in the "All QSes seen" console-log message.
+	This message will include information on when the kthread last
+	ran and how often it should be expected to run.
+
+o	A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
+	happen to preempt a low-priority task in the middle of an RCU
+	read-side critical section.   This is especially damaging if
+	that low-priority task is not permitted to run on any other CPU,
+	in which case the next RCU grace period can never complete, which
+	will eventually cause the system to run out of memory and hang.
+	While the system is in the process of running itself out of
+	memory, you might see stall-warning messages.
+
+o	A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
+	is running at a higher priority than the RCU softirq threads.
+	This will prevent RCU callbacks from ever being invoked,
+	and in a CONFIG_PREEMPT_RCU kernel will further prevent
+	RCU grace periods from ever completing.  Either way, the
+	system will eventually run out of memory and hang.  In the
+	CONFIG_PREEMPT_RCU case, you might see stall-warning
+	messages.
+
+o	A hardware or software issue shuts off the scheduler-clock
+	interrupt on a CPU that is not in dyntick-idle mode.  This
+	problem really has happened, and seems to be most likely to
+	result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels.
+
+o	A bug in the RCU implementation.
+
+o	A hardware failure.  This is quite unlikely, but has occurred
+	at least once in real life.  A CPU failed in a running system,
+	becoming unresponsive, but not causing an immediate crash.
+	This resulted in a series of RCU CPU stall warnings, eventually
+	leading the realization that the CPU had failed.
+
+The RCU, RCU-sched, RCU-bh, and RCU-tasks implementations have CPU stall
+warning.  Note that SRCU does -not- have CPU stall warnings.  Please note
+that RCU only detects CPU stalls when there is a grace period in progress.
+No grace period, no CPU stall warnings.
+
+To diagnose the cause of the stall, inspect the stack traces.
+The offending function will usually be near the top of the stack.
+If you have a series of stall warnings from a single extended stall,
+comparing the stack traces can often help determine where the stall
+is occurring, which will usually be in the function nearest the top of
+that portion of the stack which remains the same from trace to trace.
+If you can reliably trigger the stall, ftrace can be quite helpful.
+
+RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE
+and with RCU's event tracing.  For information on RCU's event tracing,
+see include/trace/events/rcu.h.
+
+
+Fine-Tuning the RCU CPU Stall Detector
+
+The rcuupdate.rcu_cpu_stall_suppress module parameter disables RCU's
+CPU stall detector, which detects conditions that unduly delay RCU grace
+periods.  This module parameter enables CPU stall detection by default,
+but may be overridden via boot-time parameter or at runtime via sysfs.
 The stall detector's idea of what constitutes "unduly delayed" is
 controlled by a set of kernel configuration variables and cpp macros:
 
@@ -56,6 +149,9 @@ rcupdate.rcu_task_stall_timeout
 	And continues with the output of sched_show_task() for each
 	task stalling the current RCU-tasks grace period.
 
+
+Interpreting RCU's CPU Stall-Detector "Splats"
+
 For non-RCU-tasks flavors of RCU, when a CPU detects that it is stalling,
 it will print a message similar to the following:
 
@@ -178,89 +274,3 @@ grace period is in flight.
 
 It is entirely possible to see stall warnings from normal and from
 expedited grace periods at about the same time from the same run.
-
-
-What Causes RCU CPU Stall Warnings?
-
-So your kernel printed an RCU CPU stall warning.  The next question is
-"What caused it?"  The following problems can result in RCU CPU stall
-warnings:
-
-o	A CPU looping in an RCU read-side critical section.
-	
-o	A CPU looping with interrupts disabled.  This condition can
-	result in RCU-sched and RCU-bh stalls.
-
-o	A CPU looping with preemption disabled.  This condition can
-	result in RCU-sched stalls and, if ksoftirqd is in use, RCU-bh
-	stalls.
-
-o	A CPU looping with bottom halves disabled.  This condition can
-	result in RCU-sched and RCU-bh stalls.
-
-o	For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the
-	kernel without invoking schedule().  Note that cond_resched()
-	does not necessarily prevent RCU CPU stall warnings.  Therefore,
-	if the looping in the kernel is really expected and desirable
-	behavior, you might need to replace some of the cond_resched()
-	calls with calls to cond_resched_rcu_qs().
-
-o	Booting Linux using a console connection that is too slow to
-	keep up with the boot-time console-message rate.  For example,
-	a 115Kbaud serial console can be -way- too slow to keep up
-	with boot-time message rates, and will frequently result in
-	RCU CPU stall warning messages.  Especially if you have added
-	debug printk()s.
-
-o	Anything that prevents RCU's grace-period kthreads from running.
-	This can result in the "All QSes seen" console-log message.
-	This message will include information on when the kthread last
-	ran and how often it should be expected to run.
-
-o	A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
-	happen to preempt a low-priority task in the middle of an RCU
-	read-side critical section.   This is especially damaging if
-	that low-priority task is not permitted to run on any other CPU,
-	in which case the next RCU grace period can never complete, which
-	will eventually cause the system to run out of memory and hang.
-	While the system is in the process of running itself out of
-	memory, you might see stall-warning messages.
-
-o	A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
-	is running at a higher priority than the RCU softirq threads.
-	This will prevent RCU callbacks from ever being invoked,
-	and in a CONFIG_PREEMPT_RCU kernel will further prevent
-	RCU grace periods from ever completing.  Either way, the
-	system will eventually run out of memory and hang.  In the
-	CONFIG_PREEMPT_RCU case, you might see stall-warning
-	messages.
-
-o	A hardware or software issue shuts off the scheduler-clock
-	interrupt on a CPU that is not in dyntick-idle mode.  This
-	problem really has happened, and seems to be most likely to
-	result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels.
-
-o	A bug in the RCU implementation.
-
-o	A hardware failure.  This is quite unlikely, but has occurred
-	at least once in real life.  A CPU failed in a running system,
-	becoming unresponsive, but not causing an immediate crash.
-	This resulted in a series of RCU CPU stall warnings, eventually
-	leading the realization that the CPU had failed.
-
-The RCU, RCU-sched, RCU-bh, and RCU-tasks implementations have CPU stall
-warning.  Note that SRCU does -not- have CPU stall warnings.  Please note
-that RCU only detects CPU stalls when there is a grace period in progress.
-No grace period, no CPU stall warnings.
-
-To diagnose the cause of the stall, inspect the stack traces.
-The offending function will usually be near the top of the stack.
-If you have a series of stall warnings from a single extended stall,
-comparing the stack traces can often help determine where the stall
-is occurring, which will usually be in the function nearest the top of
-that portion of the stack which remains the same from trace to trace.
-If you can reliably trigger the stall, ftrace can be quite helpful.
-
-RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE
-and with RCU's event tracing.  For information on RCU's event tracing,
-see include/trace/events/rcu.h.
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
deleted file mode 100644
index 6549012033f9..000000000000
--- a/Documentation/RCU/trace.txt
+++ /dev/null
@@ -1,535 +0,0 @@
-CONFIG_RCU_TRACE debugfs Files and Formats
-
-
-The rcutree and rcutiny implementations of RCU provide debugfs trace
-output that summarizes counters and state.  This information is useful for
-debugging RCU itself, and can sometimes also help to debug abuses of RCU.
-The following sections describe the debugfs files and formats, first
-for rcutree and next for rcutiny.
-
-
-CONFIG_TREE_RCU and CONFIG_PREEMPT_RCU debugfs Files and Formats
-
-These implementations of RCU provide several debugfs directories under the
-top-level directory "rcu":
-
-rcu/rcu_bh
-rcu/rcu_preempt
-rcu/rcu_sched
-
-Each directory contains files for the corresponding flavor of RCU.
-Note that rcu/rcu_preempt is only present for CONFIG_PREEMPT_RCU.
-For CONFIG_TREE_RCU, the RCU flavor maps onto the RCU-sched flavor,
-so that activity for both appears in rcu/rcu_sched.
-
-In addition, the following file appears in the top-level directory:
-rcu/rcutorture.  This file displays rcutorture test progress.  The output
-of "cat rcu/rcutorture" looks as follows:
-
-rcutorture test sequence: 0 (test in progress)
-rcutorture update version number: 615
-
-The first line shows the number of rcutorture tests that have completed
-since boot.  If a test is currently running, the "(test in progress)"
-string will appear as shown above.  The second line shows the number of
-update cycles that the current test has started, or zero if there is
-no test in progress.
-
-
-Within each flavor directory (rcu/rcu_bh, rcu/rcu_sched, and possibly
-also rcu/rcu_preempt) the following files will be present:
-
-rcudata:
-	Displays fields in struct rcu_data.
-rcuexp:
-	Displays statistics for expedited grace periods.
-rcugp:
-	Displays grace-period counters.
-rcuhier:
-	Displays the struct rcu_node hierarchy.
-rcu_pending:
-	Displays counts of the reasons rcu_pending() decided that RCU had
-	work to do.
-rcuboost:
-	Displays RCU boosting statistics.  Only present if
-	CONFIG_RCU_BOOST=y.
-
-The output of "cat rcu/rcu_preempt/rcudata" looks as follows:
-
-  0!c=30455 g=30456 cnq=1/0:1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716
-  1!c=30719 g=30720 cnq=1/0:0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982
-  2!c=30150 g=30151 cnq=1/1:1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458
-  3 c=31249 g=31250 cnq=1/1:0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622
-  4!c=29502 g=29503 cnq=1/0:1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521
-  5 c=31201 g=31202 cnq=1/0:1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698
-  6!c=30253 g=30254 cnq=1/0:1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353
-  7 c=31178 g=31178 cnq=1/0:0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969
-
-This file has one line per CPU, or eight for this 8-CPU system.
-The fields are as follows:
-
-o	The number at the beginning of each line is the CPU number.
-	CPUs numbers followed by an exclamation mark are offline,
-	but have been online at least once since boot.	There will be
-	no output for CPUs that have never been online, which can be
-	a good thing in the surprisingly common case where NR_CPUS is
-	substantially larger than the number of actual CPUs.
-
-o	"c" is the count of grace periods that this CPU believes have
-	completed.  Offlined CPUs and CPUs in dynticks idle mode may lag
-	quite a ways behind, for example, CPU 4 under "rcu_sched" above,
-	which has been offline through 16 RCU grace periods.  It is not
-	unusual to see offline CPUs lagging by thousands of grace periods.
-	Note that although the grace-period number is an unsigned long,
-	it is printed out as a signed long to allow more human-friendly
-	representation near boot time.
-
-o	"g" is the count of grace periods that this CPU believes have
-	started.  Again, offlined CPUs and CPUs in dynticks idle mode
-	may lag behind.  If the "c" and "g" values are equal, this CPU
-	has already reported a quiescent state for the last RCU grace
-	period that it is aware of, otherwise, the CPU believes that it
-	owes RCU a quiescent state.
-
-o	"pq" indicates that this CPU has passed through a quiescent state
-	for the current grace period.  It is possible for "pq" to be
-	"1" and "c" different than "g", which indicates that although
-	the CPU has passed through a quiescent state, either (1) this
-	CPU has not yet reported that fact, (2) some other CPU has not
-	yet reported for this grace period, or (3) both.
-
-o	"qp" indicates that RCU still expects a quiescent state from
-	this CPU.  Offlined CPUs and CPUs in dyntick idle mode might
-	well have qp=1, which is OK: RCU is still ignoring them.
-
-o	"dt" is the current value of the dyntick counter that is incremented
-	when entering or leaving idle, either due to a context switch or
-	due to an interrupt.  This number is even if the CPU is in idle
-	from RCU's viewpoint and odd otherwise.  The number after the
-	first "/" is the interrupt nesting depth when in idle state,
-	or a large number added to the interrupt-nesting depth when
-	running a non-idle task.  Some architectures do not accurately
-	count interrupt nesting when running in non-idle kernel context,
-	which can result in interesting anomalies such as negative
-	interrupt-nesting levels.  The number after the second "/"
-	is the NMI nesting depth.
-
-o	"df" is the number of times that some other CPU has forced a
-	quiescent state on behalf of this CPU due to this CPU being in
-	idle state.
-
-o	"of" is the number of times that some other CPU has forced a
-	quiescent state on behalf of this CPU due to this CPU being
-	offline.  In a perfect world, this might never happen, but it
-	turns out that offlining and onlining a CPU can take several grace
-	periods, and so there is likely to be an extended period of time
-	when RCU believes that the CPU is online when it really is not.
-	Please note that erring in the other direction (RCU believing a
-	CPU is offline when it is really alive and kicking) is a fatal
-	error, so it makes sense to err conservatively.
-
-o	"ql" is the number of RCU callbacks currently residing on
-	this CPU.  The first number is the number of "lazy" callbacks
-	that are known to RCU to only be freeing memory, and the number
-	after the "/" is the total number of callbacks, lazy or not.
-	These counters count callbacks regardless of what phase of
-	grace-period processing that they are in (new, waiting for
-	grace period to start, waiting for grace period to end, ready
-	to invoke).
-
-o	"qs" gives an indication of the state of the callback queue
-	with four characters:
-
-	"N"	Indicates that there are callbacks queued that are not
-		ready to be handled by the next grace period, and thus
-		will be handled by the grace period following the next
-		one.
-
-	"R"	Indicates that there are callbacks queued that are
-		ready to be handled by the next grace period.
-
-	"W"	Indicates that there are callbacks queued that are
-		waiting on the current grace period.
-
-	"D"	Indicates that there are callbacks queued that have
-		already been handled by a prior grace period, and are
-		thus waiting to be invoked.  Note that callbacks in
-		the process of being invoked are not counted here.
-		Callbacks in the process of being invoked are those
-		that have been removed from the rcu_data structures
-		queues by rcu_do_batch(), but which have not yet been
-		invoked.
-
-	If there are no callbacks in a given one of the above states,
-	the corresponding character is replaced by ".".
-
-o	"b" is the batch limit for this CPU.  If more than this number
-	of RCU callbacks is ready to invoke, then the remainder will
-	be deferred.
-
-o	"ci" is the number of RCU callbacks that have been invoked for
-	this CPU.  Note that ci+nci+ql is the number of callbacks that have
-	been registered in absence of CPU-hotplug activity.
-
-o	"nci" is the number of RCU callbacks that have been offloaded from
-	this CPU.  This will always be zero unless the kernel was built
-	with CONFIG_RCU_NOCB_CPU=y and the "rcu_nocbs=" kernel boot
-	parameter was specified.
-
-o	"co" is the number of RCU callbacks that have been orphaned due to
-	this CPU going offline.  These orphaned callbacks have been moved
-	to an arbitrarily chosen online CPU.
-
-o	"ca" is the number of RCU callbacks that have been adopted by this
-	CPU due to other CPUs going offline.  Note that ci+co-ca+ql is
-	the number of RCU callbacks registered on this CPU.
-
-
-Kernels compiled with CONFIG_RCU_BOOST=y display the following from
-/debug/rcu/rcu_preempt/rcudata:
-
-  0!c=12865 g=12866 cnq=1/0:1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871
-  1 c=14407 g=14408 cnq=1/0:0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485
-  2 c=14407 g=14408 cnq=1/0:0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490
-  3 c=14407 g=14408 cnq=1/0:0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290
-  4 c=14405 g=14406 cnq=1/0:1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114
-  5!c=14168 g=14169 cnq=1/0:0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722
-  6 c=14404 g=14405 cnq=1/0:0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811
-  7 c=14407 g=14408 cnq=1/0:1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042
-
-This is similar to the output discussed above, but contains the following
-additional fields:
-
-o	"kt" is the per-CPU kernel-thread state.  The digit preceding
-	the first slash is zero if there is no work pending and 1
-	otherwise.  The character between the first pair of slashes is
-	as follows:
-
-	"S"	The kernel thread is stopped, in other words, all
-		CPUs corresponding to this rcu_node structure are
-		offline.
-
-	"R"	The kernel thread is running.
-
-	"W"	The kernel thread is waiting because there is no work
-		for it to do.
-
-	"O"	The kernel thread is waiting because it has been
-		forced off of its designated CPU or because its
-		->cpus_allowed mask permits it to run on other than
-		its designated CPU.
-
-	"Y"	The kernel thread is yielding to avoid hogging CPU.
-
-	"?"	Unknown value, indicates a bug.
-
-	The number after the final slash is the CPU that the kthread
-	is actually running on.
-
-	This field is displayed only for CONFIG_RCU_BOOST kernels.
-
-o	"ktl" is the low-order 16 bits (in hexadecimal) of the count of
-	the number of times that this CPU's per-CPU kthread has gone
-	through its loop servicing invoke_rcu_cpu_kthread() requests.
-
-	This field is displayed only for CONFIG_RCU_BOOST kernels.
-
-
-The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
-
-s=21872 wd1=0 wd2=0 wd3=5 enq=0 sc=21872
-
-These fields are as follows:
-
-o	"s" is the sequence number, with an odd number indicating that
-	an expedited grace period is in progress.
-
-o	"wd1", "wd2", and "wd3" are the number of times that an attempt
-	to start an expedited grace period found that someone else had
-	completed an expedited grace period that satisfies the attempted
-	request.  "Our work is done."
-
-o	"enq" is the number of quiescent states still outstanding.
-
-o	"sc" is the number of times that the attempt to start a
-	new expedited grace period succeeded.
-
-
-The output of "cat rcu/rcu_preempt/rcugp" looks as follows:
-
-completed=31249  gpnum=31250  age=1  max=18
-
-These fields are taken from the rcu_state structure, and are as follows:
-
-o	"completed" is the number of grace periods that have completed.
-	It is comparable to the "c" field from rcu/rcudata in that a
-	CPU whose "c" field matches the value of "completed" is aware
-	that the corresponding RCU grace period has completed.
-
-o	"gpnum" is the number of grace periods that have started.  It is
-	similarly comparable to the "g" field from rcu/rcudata in that
-	a CPU whose "g" field matches the value of "gpnum" is aware that
-	the corresponding RCU grace period has started.
-
-	If these two fields are equal, then there is no grace period
-	in progress, in other words, RCU is idle.  On the other hand,
-	if the two fields differ (as they are above), then an RCU grace
-	period is in progress.
-
-o	"age" is the number of jiffies that the current grace period
-	has extended for, or zero if there is no grace period currently
-	in effect.
-
-o	"max" is the age in jiffies of the longest-duration grace period
-	thus far.
-
-The output of "cat rcu/rcu_preempt/rcuhier" looks as follows:
-
-c=14407 g=14408 s=0 jfq=2 j=c863 nfqs=12040/nfqsng=0(12040) fqlh=1051 oqlen=0/0
-3/3 ..>. 0:7 ^0
-e/e ..>. 0:3 ^0    d/d ..>. 4:7 ^1
-
-The fields are as follows:
-
-o	"c" is exactly the same as "completed" under rcu/rcu_preempt/rcugp.
-
-o	"g" is exactly the same as "gpnum" under rcu/rcu_preempt/rcugp.
-
-o	"s" is the current state of the force_quiescent_state()
-	state machine.
-
-o	"jfq" is the number of jiffies remaining for this grace period
-	before force_quiescent_state() is invoked to help push things
-	along.	Note that CPUs in idle mode throughout the grace period
-	will not report on their own, but rather must be check by some
-	other CPU via force_quiescent_state().
-
-o	"j" is the low-order four hex digits of the jiffies counter.
-	Yes, Paul did run into a number of problems that turned out to
-	be due to the jiffies counter no longer counting.  Why do you ask?
-
-o	"nfqs" is the number of calls to force_quiescent_state() since
-	boot.
-
-o	"nfqsng" is the number of useless calls to force_quiescent_state(),
-	where there wasn't actually a grace period active.  This can
-	no longer happen due to grace-period processing being pushed
-	into a kthread.  The number in parentheses is the difference
-	between "nfqs" and "nfqsng", or the number of times that
-	force_quiescent_state() actually did some real work.
-
-o	"fqlh" is the number of calls to force_quiescent_state() that
-	exited immediately (without even being counted in nfqs above)
-	due to contention on ->fqslock.
-
-o	Each element of the form "3/3 ..>. 0:7 ^0" represents one rcu_node
-	structure.  Each line represents one level of the hierarchy,
-	from root to leaves.  It is best to think of the rcu_data
-	structures as forming yet another level after the leaves.
-	Note that there might be either one, two, three, or even four
-	levels of rcu_node structures, depending on the relationship
-	between CONFIG_RCU_FANOUT, CONFIG_RCU_FANOUT_LEAF (possibly
-	adjusted using the rcu_fanout_leaf kernel boot parameter), and
-	CONFIG_NR_CPUS (possibly adjusted using the nr_cpu_ids count of
-	possible CPUs for the booting hardware).
-
-	o	The numbers separated by the "/" are the qsmask followed
-		by the qsmaskinit.  The qsmask will have one bit
-		set for each entity in the next lower level that has
-		not yet checked in for the current grace period ("e"
-		indicating CPUs 5, 6, and 7 in the example above).
-		The qsmaskinit will have one bit for each entity that is
-		currently expected to check in during each grace period.
-		The value of qsmaskinit is assigned to that of qsmask
-		at the beginning of each grace period.
-
-	o	The characters separated by the ">" indicate the state
-		of the blocked-tasks lists.  A "G" preceding the ">"
-		indicates that at least one task blocked in an RCU
-		read-side critical section blocks the current grace
-		period, while a "E" preceding the ">" indicates that
-		at least one task blocked in an RCU read-side critical
-		section blocks the current expedited grace period.
-		A "T" character following the ">" indicates that at
-		least one task is blocked within an RCU read-side
-		critical section, regardless of whether any current
-		grace period (expedited or normal) is inconvenienced.
-		A "." character appears if the corresponding condition
-		does not hold, so that "..>." indicates that no tasks
-		are blocked.  In contrast, "GE>T" indicates maximal
-		inconvenience from blocked tasks.  CONFIG_TREE_RCU
-		builds of the kernel will always show "..>.".
-
-	o	The numbers separated by the ":" are the range of CPUs
-		served by this struct rcu_node.  This can be helpful
-		in working out how the hierarchy is wired together.
-
-		For example, the example rcu_node structure shown above
-		has "0:7", indicating that it covers CPUs 0 through 7.
-
-	o	The number after the "^" indicates the bit in the
-		next higher level rcu_node structure that this rcu_node
-		structure corresponds to.  For example, the "d/d ..>. 4:7
-		^1" has a "1" in this position, indicating that it
-		corresponds to the "1" bit in the "3" shown in the
-		"3/3 ..>. 0:7 ^0" entry on the next level up.
-
-
-The output of "cat rcu/rcu_sched/rcu_pending" looks as follows:
-
-  0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903 ndw=0
-  1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113 ndw=0
-  2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889 ndw=0
-  3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469 ndw=0
-  4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042 ndw=0
-  5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422 ndw=0
-  6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699 ndw=0
-  7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147 ndw=0
-
-The fields are as follows:
-
-o	The leading number is the CPU number, with "!" indicating
-	an offline CPU.
-
-o	"np" is the number of times that __rcu_pending() has been invoked
-	for the corresponding flavor of RCU.
-
-o	"qsp" is the number of times that the RCU was waiting for a
-	quiescent state from this CPU.
-
-o	"rpq" is the number of times that the CPU had passed through
-	a quiescent state, but not yet reported it to RCU.
-
-o	"cbr" is the number of times that this CPU had RCU callbacks
-	that had passed through a grace period, and were thus ready
-	to be invoked.
-
-o	"cng" is the number of times that this CPU needed another
-	grace period while RCU was idle.
-
-o	"gpc" is the number of times that an old grace period had
-	completed, but this CPU was not yet aware of it.
-
-o	"gps" is the number of times that a new grace period had started,
-	but this CPU was not yet aware of it.
-
-o	"ndw" is the number of times that a wakeup of an rcuo
-	callback-offload kthread had to be deferred in order to avoid
-	deadlock.
-
-o	"nn" is the number of times that this CPU needed nothing.
-
-
-The output of "cat rcu/rcuboost" looks as follows:
-
-0:3 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894
-    balk: nt=0 egt=4695 bt=0 nb=0 ny=56 nos=0
-4:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894
-    balk: nt=0 egt=6541 bt=0 nb=0 ny=126 nos=0
-
-This information is output only for rcu_preempt.  Each two-line entry
-corresponds to a leaf rcu_node structure.  The fields are as follows:
-
-o	"n:m" is the CPU-number range for the corresponding two-line
-	entry.  In the sample output above, the first entry covers
-	CPUs zero through three and the second entry covers CPUs four
-	through seven.
-
-o	"tasks=TNEB" gives the state of the various segments of the
-	rnp->blocked_tasks list:
-
-	"T"	This indicates that there are some tasks that blocked
-		while running on one of the corresponding CPUs while
-		in an RCU read-side critical section.
-
-	"N"	This indicates that some of the blocked tasks are preventing
-		the current normal (non-expedited) grace period from
-		completing.
-
-	"E"	This indicates that some of the blocked tasks are preventing
-		the current expedited grace period from completing.
-
-	"B"	This indicates that some of the blocked tasks are in
-		need of RCU priority boosting.
-
-	Each character is replaced with "." if the corresponding
-	condition does not hold.
-
-o	"kt" is the state of the RCU priority-boosting kernel
-	thread associated with the corresponding rcu_node structure.
-	The state can be one of the following:
-
-	"S"	The kernel thread is stopped, in other words, all
-		CPUs corresponding to this rcu_node structure are
-		offline.
-
-	"R"	The kernel thread is running.
-
-	"W"	The kernel thread is waiting because there is no work
-		for it to do.
-
-	"Y"	The kernel thread is yielding to avoid hogging CPU.
-
-	"?"	Unknown value, indicates a bug.
-
-o	"ntb" is the number of tasks boosted.
-
-o	"neb" is the number of tasks boosted in order to complete an
-	expedited grace period.
-
-o	"nnb" is the number of tasks boosted in order to complete a
-	normal (non-expedited) grace period.  When boosting a task
-	that was blocking both an expedited and a normal grace period,
-	it is counted against the expedited total above.
-
-o	"j" is the low-order 16 bits of the jiffies counter in
-	hexadecimal.
-
-o	"bt" is the low-order 16 bits of the value that the jiffies
-	counter will have when we next start boosting, assuming that
-	the current grace period does not end beforehand.  This is
-	also in hexadecimal.
-
-o	"balk: nt" counts the number of times we didn't boost (in
-	other words, we balked) even though it was time to boost because
-	there were no blocked tasks to boost.  This situation occurs
-	when there is one blocked task on one rcu_node structure and
-	none on some other rcu_node structure.
-
-o	"egt" counts the number of times we balked because although
-	there were blocked tasks, none of them were blocking the
-	current grace period, whether expedited or otherwise.
-
-o	"bt" counts the number of times we balked because boosting
-	had already been initiated for the current grace period.
-
-o	"nb" counts the number of times we balked because there
-	was at least one task blocking the current non-expedited grace
-	period that never had blocked.  If it is already running, it
-	just won't help to boost its priority!
-
-o	"ny" counts the number of times we balked because it was
-	not yet time to start boosting.
-
-o	"nos" counts the number of times we balked for other
-	reasons, e.g., the grace period ended first.
-
-
-CONFIG_TINY_RCU debugfs Files and Formats
-
-These implementations of RCU provides a single debugfs file under the
-top-level directory RCU, namely rcu/rcudata, which displays fields in
-rcu_bh_ctrlblk and rcu_sched_ctrlblk.
-
-The output of "cat rcu/rcudata" is as follows:
-
-rcu_sched: qlen: 0
-rcu_bh: qlen: 0
-
-This is split into rcu_sched and rcu_bh sections.  The field is as
-follows:
-
-o	"qlen" is the number of RCU callbacks currently waiting either
-	for an RCU grace period or waiting to be invoked.  This is the
-	only field present for rcu_sched and rcu_bh, due to the
-	short-circuiting of grace period in those two cases.
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 5cbd8b2395b8..8ed6c9f6133c 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -562,7 +562,9 @@ This section presents a "toy" RCU implementation that is based on
 familiar locking primitives.  Its overhead makes it a non-starter for
 real-life use, as does its lack of scalability.  It is also unsuitable
 for realtime use, since it allows scheduling latency to "bleed" from
-one read-side critical section to another.
+one read-side critical section to another.  It also assumes recursive
+reader-writer locks:  If you try this with non-recursive locks, and
+you allow nested rcu_read_lock() calls, you can deadlock.
 
 However, it is probably the easiest implementation to relate to, so is
 a good starting point.
@@ -587,20 +589,21 @@ It is extremely simple:
 		write_unlock(&rcu_gp_mutex);
 	}
 
-[You can ignore rcu_assign_pointer() and rcu_dereference() without
-missing much.  But here they are anyway.  And whatever you do, don't
-forget about them when submitting patches making use of RCU!]
+[You can ignore rcu_assign_pointer() and rcu_dereference() without missing
+much.  But here are simplified versions anyway.  And whatever you do,
+don't forget about them when submitting patches making use of RCU!]
 
-	#define rcu_assign_pointer(p, v)	({ \
-							smp_wmb(); \
-							(p) = (v); \
-						})
+	#define rcu_assign_pointer(p, v) \
+	({ \
+		smp_store_release(&(p), (v)); \
+	})
 
-	#define rcu_dereference(p)     ({ \
-					typeof(p) _________p1 = p; \
-					smp_read_barrier_depends(); \
-					(_________p1); \
-					})
+	#define rcu_dereference(p) \
+	({ \
+		typeof(p) _________p1 = p; \
+		smp_read_barrier_depends(); \
+		(_________p1); \
+	})
 
 
 The rcu_read_lock() and rcu_read_unlock() primitive read-acquire
@@ -925,7 +928,8 @@ d.	Do you need RCU grace periods to complete even in the face
 
 e.	Is your workload too update-intensive for normal use of
 	RCU, but inappropriate for other synchronization mechanisms?
-	If so, consider SLAB_DESTROY_BY_RCU.  But please be careful!
+	If so, consider SLAB_TYPESAFE_BY_RCU (which was originally
+	named SLAB_DESTROY_BY_RCU).  But please be careful!
 
 f.	Do you need read-side critical sections that are respected
 	even though they are in the middle of the idle loop, during
diff --git a/Documentation/acpi/acpi-lid.txt b/Documentation/acpi/acpi-lid.txt
index 22cb3091f297..effe7af3a5af 100644
--- a/Documentation/acpi/acpi-lid.txt
+++ b/Documentation/acpi/acpi-lid.txt
@@ -59,20 +59,28 @@ button driver uses the following 3 modes in order not to trigger issues.
 If the userspace hasn't been prepared to ignore the unreliable "opened"
 events and the unreliable initial state notification, Linux users can use
 the following kernel parameters to handle the possible issues:
-A. button.lid_init_state=open:
+A. button.lid_init_state=method:
+   When this option is specified, the ACPI button driver reports the
+   initial lid state using the returning value of the _LID control method
+   and whether the "opened"/"closed" events are paired fully relies on the
+   firmware implementation.
+   This option can be used to fix some platforms where the returning value
+   of the _LID control method is reliable but the initial lid state
+   notification is missing.
+   This option is the default behavior during the period the userspace
+   isn't ready to handle the buggy AML tables.
+B. button.lid_init_state=open:
    When this option is specified, the ACPI button driver always reports the
    initial lid state as "opened" and whether the "opened"/"closed" events
    are paired fully relies on the firmware implementation.
    This may fix some platforms where the returning value of the _LID
    control method is not reliable and the initial lid state notification is
    missing.
-   This option is the default behavior during the period the userspace
-   isn't ready to handle the buggy AML tables.
 
 If the userspace has been prepared to ignore the unreliable "opened" events
 and the unreliable initial state notification, Linux users should always
 use the following kernel parameter:
-B. button.lid_init_state=ignore:
+C. button.lid_init_state=ignore:
    When this option is specified, the ACPI button driver never reports the
    initial lid state and there is a compensation mechanism implemented to
    ensure that the reliable "closed" notifications can always be delievered
diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst
index 02f639aab06e..b96e80f79e85 100644
--- a/Documentation/admin-guide/README.rst
+++ b/Documentation/admin-guide/README.rst
@@ -362,7 +362,7 @@ If something goes wrong
    as is, otherwise you will have to use the ``ksymoops`` program to make
    sense of the dump (but compiling with CONFIG_KALLSYMS is usually preferred).
    This utility can be downloaded from
-   ftp://ftp.<country>.kernel.org/pub/linux/utils/kernel/ksymoops/ .
+   https://www.kernel.org/pub/linux/utils/kernel/ksymoops/ .
    Alternatively, you can do the dump lookup by hand:
 
  - In debugging dumps like the above, it helps enormously if you can
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 130e7ecaf9a6..9b0b3dea6326 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -866,6 +866,15 @@
 
 	dscc4.setup=	[NET]
 
+	dt_cpu_ftrs=	[PPC]
+			Format: {"off" | "known"}
+			Control how the dt_cpu_ftrs device-tree binding is
+			used for CPU feature discovery and setup (if it
+			exists).
+			off: Do not use it, fall back to legacy cpu table.
+			known: Do not pass through unknown features to guests
+			or userspace, only those that the kernel is aware of.
+
 	dump_apple_properties	[X86]
 			Dump name and content of EFI device properties on
 			x86 Macs.  Useful for driver authors to determine
@@ -972,7 +981,7 @@
 			A valid base address must be provided, and the serial
 			port must already be setup and configured.
 
-		armada3700_uart,<addr>
+		ar3700_uart,<addr>
 			Start an early, polled-mode console on the
 			Armada 3700 serial port at the specified
 			address. The serial port must already be setup
@@ -2127,6 +2136,12 @@
 	memmap=nn[KMG]@ss[KMG]
 			[KNL] Force usage of a specific region of memory.
 			Region of memory to be used is from ss to ss+nn.
+			If @ss[KMG] is omitted, it is equivalent to mem=nn[KMG],
+			which limits max address to nn[KMG].
+			Multiple different regions can be specified,
+			comma delimited.
+			Example:
+				memmap=100M@2G,100M#3G,1G!1024G
 
 	memmap=nn[KMG]#ss[KMG]
 			[KNL,ACPI] Mark specific memory as ACPI data.
@@ -2139,6 +2154,9 @@
 			         memmap=64K$0x18690000
 			         or
 			         memmap=0x10000$0x18690000
+			Some bootloaders may need an escape character before '$',
+			like Grub2, otherwise '$' and the following number
+			will be eaten.
 
 	memmap=nn[KMG]!ss[KMG]
 			[KNL,X86] Mark specific memory as protected.
@@ -2434,12 +2452,6 @@
 			and gids from such clients.  This is intended to ease
 			migration from NFSv2/v3.
 
-	objlayoutdriver.osd_login_prog=
-			[NFS] [OBJLAYOUT] sets the pathname to the program which
-			is used to automatically discover and login into new
-			osd-targets. Please see:
-			Documentation/filesystems/pnfs.txt for more explanations
-
 	nmi_debug=	[KNL,SH] Specify one or more actions to take
 			when a NMI is triggered.
 			Format: [state][,regs][,debounce][,die]
@@ -3235,21 +3247,17 @@
 
 	rcutree.gp_cleanup_delay=	[KNL]
 			Set the number of jiffies to delay each step of
-			RCU grace-period cleanup.  This only has effect
-			when CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP is set.
+			RCU grace-period cleanup.
 
 	rcutree.gp_init_delay=	[KNL]
 			Set the number of jiffies to delay each step of
-			RCU grace-period initialization.  This only has
-			effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT
-			is set.
+			RCU grace-period initialization.
 
 	rcutree.gp_preinit_delay=	[KNL]
 			Set the number of jiffies to delay each step of
 			RCU grace-period pre-initialization, that is,
 			the propagation of recent CPU-hotplug changes up
-			the rcu_node combining tree.  This only has effect
-			when CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT is set.
+			the rcu_node combining tree.
 
 	rcutree.rcu_fanout_exact= [KNL]
 			Disable autobalancing of the rcu_node combining
@@ -3325,6 +3333,17 @@
 			This wake_up() will be accompanied by a
 			WARN_ONCE() splat and an ftrace_dump().
 
+	rcuperf.gp_async= [KNL]
+			Measure performance of asynchronous
+			grace-period primitives such as call_rcu().
+
+	rcuperf.gp_async_max= [KNL]
+			Specify the maximum number of outstanding
+			callbacks per writer thread.  When a writer
+			thread exceeds this limit, it invokes the
+			corresponding flavor of rcu_barrier() to allow
+			previously posted callbacks to drain.
+
 	rcuperf.gp_exp= [KNL]
 			Measure performance of expedited synchronous
 			grace-period primitives.
@@ -3352,17 +3371,22 @@
 	rcuperf.perf_runnable= [BOOT]
 			Start rcuperf running at boot time.
 
+	rcuperf.perf_type= [KNL]
+			Specify the RCU implementation to test.
+
 	rcuperf.shutdown= [KNL]
 			Shut the system down after performance tests
 			complete.  This is useful for hands-off automated
 			testing.
 
-	rcuperf.perf_type= [KNL]
-			Specify the RCU implementation to test.
-
 	rcuperf.verbose= [KNL]
 			Enable additional printk() statements.
 
+	rcuperf.writer_holdoff= [KNL]
+			Write-side holdoff between grace periods,
+			in microseconds.  The default of zero says
+			no holdoff.
+
 	rcutorture.cbflood_inter_holdoff= [KNL]
 			Set holdoff time (jiffies) between successive
 			callback-flood tests.
@@ -3800,6 +3824,30 @@
 	spia_pedr=
 	spia_peddr=
 
+	srcutree.counter_wrap_check [KNL]
+			Specifies how frequently to check for
+			grace-period sequence counter wrap for the
+			srcu_data structure's ->srcu_gp_seq_needed field.
+			The greater the number of bits set in this kernel
+			parameter, the less frequently counter wrap will
+			be checked for.  Note that the bottom two bits
+			are ignored.
+
+	srcutree.exp_holdoff [KNL]
+			Specifies how many nanoseconds must elapse
+			since the end of the last SRCU grace period for
+			a given srcu_struct until the next normal SRCU
+			grace period will be considered for automatic
+			expediting.  Set to zero to disable automatic
+			expediting.
+
+	stack_guard_gap=	[MM]
+			override the default stack gap protection. The value
+			is in page units and it defines how many pages prior
+			to (for stacks growing down) resp. after (for stacks
+			growing up) the main stack are reserved for no other
+			mapping. Default value is 256 pages.
+
 	stacktrace	[FTRACE]
 			Enabled the stack tracer on boot up.
 
diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 289c80f7760e..09aa2e949787 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -1,4 +1,5 @@
 .. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy <cpufreq_policy>`
+.. |intel_pstate| replace:: :doc:`intel_pstate <intel_pstate>`
 
 =======================
 CPU Performance Scaling
@@ -75,7 +76,7 @@ feedback registers, as that information is typically specific to the hardware
 interface it comes from and may not be easily represented in an abstract,
 platform-independent way.  For this reason, ``CPUFreq`` allows scaling drivers
 to bypass the governor layer and implement their own performance scaling
-algorithms.  That is done by the ``intel_pstate`` scaling driver.
+algorithms.  That is done by the |intel_pstate| scaling driver.
 
 
 ``CPUFreq`` Policy Objects
@@ -174,13 +175,13 @@ necessary to restart the scaling governor so that it can take the new online CPU
 into account.  That is achieved by invoking the governor's ``->stop`` and
 ``->start()`` callbacks, in this order, for the entire policy.
 
-As mentioned before, the ``intel_pstate`` scaling driver bypasses the scaling
+As mentioned before, the |intel_pstate| scaling driver bypasses the scaling
 governor layer of ``CPUFreq`` and provides its own P-state selection algorithms.
-Consequently, if ``intel_pstate`` is used, scaling governors are not attached to
+Consequently, if |intel_pstate| is used, scaling governors are not attached to
 new policy objects.  Instead, the driver's ``->setpolicy()`` callback is invoked
 to register per-CPU utilization update callbacks for each policy.  These
 callbacks are invoked by the CPU scheduler in the same way as for scaling
-governors, but in the ``intel_pstate`` case they both determine the P-state to
+governors, but in the |intel_pstate| case they both determine the P-state to
 use and change the hardware configuration accordingly in one go from scheduler
 context.
 
@@ -257,7 +258,7 @@ are the following:
 
 ``scaling_available_governors``
 	List of ``CPUFreq`` scaling governors present in the kernel that can
-	be attached to this policy or (if the ``intel_pstate`` scaling driver is
+	be attached to this policy or (if the |intel_pstate| scaling driver is
 	in use) list of scaling algorithms provided by the driver that can be
 	applied to this policy.
 
@@ -274,7 +275,7 @@ are the following:
 	the CPU is actually running at (due to hardware design and other
 	limitations).
 
-	Some scaling drivers (e.g. ``intel_pstate``) attempt to provide
+	Some scaling drivers (e.g. |intel_pstate|) attempt to provide
 	information more precisely reflecting the current CPU frequency through
 	this attribute, but that still may not be the exact current CPU
 	frequency as seen by the hardware at the moment.
@@ -284,13 +285,13 @@ are the following:
 
 ``scaling_governor``
 	The scaling governor currently attached to this policy or (if the
-	``intel_pstate`` scaling driver is in use) the scaling algorithm
+	|intel_pstate| scaling driver is in use) the scaling algorithm
 	provided by the driver that is currently applied to this policy.
 
 	This attribute is read-write and writing to it will cause a new scaling
 	governor to be attached to this policy or a new scaling algorithm
 	provided by the scaling driver to be applied to it (in the
-	``intel_pstate`` case), as indicated by the string written to this
+	|intel_pstate| case), as indicated by the string written to this
 	attribute (which must be one of the names listed by the
 	``scaling_available_governors`` attribute described above).
 
@@ -619,7 +620,7 @@ This file is located under :file:`/sys/devices/system/cpu/cpufreq/` and controls
 the "boost" setting for the whole system.  It is not present if the underlying
 scaling driver does not support the frequency boost mechanism (or supports it,
 but provides a driver-specific interface for controlling it, like
-``intel_pstate``).
+|intel_pstate|).
 
 If the value in this file is 1, the frequency boost mechanism is enabled.  This
 means that either the hardware can be put into states in which it is able to
diff --git a/Documentation/admin-guide/pm/index.rst b/Documentation/admin-guide/pm/index.rst
index c80f087321fc..7f148f76f432 100644
--- a/Documentation/admin-guide/pm/index.rst
+++ b/Documentation/admin-guide/pm/index.rst
@@ -6,6 +6,7 @@ Power Management
    :maxdepth: 2
 
    cpufreq
+   intel_pstate
 
 .. only::  subproject and html
 
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
new file mode 100644
index 000000000000..33d703989ea8
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -0,0 +1,755 @@
+===============================================
+``intel_pstate`` CPU Performance Scaling Driver
+===============================================
+
+::
+
+ Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+
+General Information
+===================
+
+``intel_pstate`` is a part of the
+:doc:`CPU performance scaling subsystem <cpufreq>` in the Linux kernel
+(``CPUFreq``).  It is a scaling driver for the Sandy Bridge and later
+generations of Intel processors.  Note, however, that some of those processors
+may not be supported.  [To understand ``intel_pstate`` it is necessary to know
+how ``CPUFreq`` works in general, so this is the time to read :doc:`cpufreq` if
+you have not done that yet.]
+
+For the processors supported by ``intel_pstate``, the P-state concept is broader
+than just an operating frequency or an operating performance point (see the
+`LinuxCon Europe 2015 presentation by Kristen Accardi <LCEU2015_>`_ for more
+information about that).  For this reason, the representation of P-states used
+by ``intel_pstate`` internally follows the hardware specification (for details
+refer to `Intel® 64 and IA-32 Architectures Software Developer’s Manual
+Volume 3: System Programming Guide <SDM_>`_).  However, the ``CPUFreq`` core
+uses frequencies for identifying operating performance points of CPUs and
+frequencies are involved in the user space interface exposed by it, so
+``intel_pstate`` maps its internal representation of P-states to frequencies too
+(fortunately, that mapping is unambiguous).  At the same time, it would not be
+practical for ``intel_pstate`` to supply the ``CPUFreq`` core with a table of
+available frequencies due to the possible size of it, so the driver does not do
+that.  Some functionality of the core is limited by that.
+
+Since the hardware P-state selection interface used by ``intel_pstate`` is
+available at the logical CPU level, the driver always works with individual
+CPUs.  Consequently, if ``intel_pstate`` is in use, every ``CPUFreq`` policy
+object corresponds to one logical CPU and ``CPUFreq`` policies are effectively
+equivalent to CPUs.  In particular, this means that they become "inactive" every
+time the corresponding CPU is taken offline and need to be re-initialized when
+it goes back online.
+
+``intel_pstate`` is not modular, so it cannot be unloaded, which means that the
+only way to pass early-configuration-time parameters to it is via the kernel
+command line.  However, its configuration can be adjusted via ``sysfs`` to a
+great extent.  In some configurations it even is possible to unregister it via
+``sysfs`` which allows another ``CPUFreq`` scaling driver to be loaded and
+registered (see `below <status_attr_>`_).
+
+
+Operation Modes
+===============
+
+``intel_pstate`` can operate in three different modes: in the active mode with
+or without hardware-managed P-states support and in the passive mode.  Which of
+them will be in effect depends on what kernel command line options are used and
+on the capabilities of the processor.
+
+Active Mode
+-----------
+
+This is the default operation mode of ``intel_pstate``.  If it works in this
+mode, the ``scaling_driver`` policy attribute in ``sysfs`` for all ``CPUFreq``
+policies contains the string "intel_pstate".
+
+In this mode the driver bypasses the scaling governors layer of ``CPUFreq`` and
+provides its own scaling algorithms for P-state selection.  Those algorithms
+can be applied to ``CPUFreq`` policies in the same way as generic scaling
+governors (that is, through the ``scaling_governor`` policy attribute in
+``sysfs``).  [Note that different P-state selection algorithms may be chosen for
+different policies, but that is not recommended.]
+
+They are not generic scaling governors, but their names are the same as the
+names of some of those governors.  Moreover, confusingly enough, they generally
+do not work in the same way as the generic governors they share the names with.
+For example, the ``powersave`` P-state selection algorithm provided by
+``intel_pstate`` is not a counterpart of the generic ``powersave`` governor
+(roughly, it corresponds to the ``schedutil`` and ``ondemand`` governors).
+
+There are two P-state selection algorithms provided by ``intel_pstate`` in the
+active mode: ``powersave`` and ``performance``.  The way they both operate
+depends on whether or not the hardware-managed P-states (HWP) feature has been
+enabled in the processor and possibly on the processor model.
+
+Which of the P-state selection algorithms is used by default depends on the
+:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option.
+Namely, if that option is set, the ``performance`` algorithm will be used by
+default, and the other one will be used by default if it is not set.
+
+Active Mode With HWP
+~~~~~~~~~~~~~~~~~~~~
+
+If the processor supports the HWP feature, it will be enabled during the
+processor initialization and cannot be disabled after that.  It is possible
+to avoid enabling it by passing the ``intel_pstate=no_hwp`` argument to the
+kernel in the command line.
+
+If the HWP feature has been enabled, ``intel_pstate`` relies on the processor to
+select P-states by itself, but still it can give hints to the processor's
+internal P-state selection logic.  What those hints are depends on which P-state
+selection algorithm has been applied to the given policy (or to the CPU it
+corresponds to).
+
+Even though the P-state selection is carried out by the processor automatically,
+``intel_pstate`` registers utilization update callbacks with the CPU scheduler
+in this mode.  However, they are not used for running a P-state selection
+algorithm, but for periodic updates of the current CPU frequency information to
+be made available from the ``scaling_cur_freq`` policy attribute in ``sysfs``.
+
+HWP + ``performance``
+.....................
+
+In this configuration ``intel_pstate`` will write 0 to the processor's
+Energy-Performance Preference (EPP) knob (if supported) or its
+Energy-Performance Bias (EPB) knob (otherwise), which means that the processor's
+internal P-state selection logic is expected to focus entirely on performance.
+
+This will override the EPP/EPB setting coming from the ``sysfs`` interface
+(see `Energy vs Performance Hints`_ below).
+
+Also, in this configuration the range of P-states available to the processor's
+internal P-state selection logic is always restricted to the upper boundary
+(that is, the maximum P-state that the driver is allowed to use).
+
+HWP + ``powersave``
+...................
+
+In this configuration ``intel_pstate`` will set the processor's
+Energy-Performance Preference (EPP) knob (if supported) or its
+Energy-Performance Bias (EPB) knob (otherwise) to whatever value it was
+previously set to via ``sysfs`` (or whatever default value it was
+set to by the platform firmware).  This usually causes the processor's
+internal P-state selection logic to be less performance-focused.
+
+Active Mode Without HWP
+~~~~~~~~~~~~~~~~~~~~~~~
+
+This is the default operation mode for processors that do not support the HWP
+feature.  It also is used by default with the ``intel_pstate=no_hwp`` argument
+in the kernel command line.  However, in this mode ``intel_pstate`` may refuse
+to work with the given processor if it does not recognize it.  [Note that
+``intel_pstate`` will never refuse to work with any processor with the HWP
+feature enabled.]
+
+In this mode ``intel_pstate`` registers utilization update callbacks with the
+CPU scheduler in order to run a P-state selection algorithm, either
+``powersave`` or ``performance``, depending on the ``scaling_cur_freq`` policy
+setting in ``sysfs``.  The current CPU frequency information to be made
+available from the ``scaling_cur_freq`` policy attribute in ``sysfs`` is
+periodically updated by those utilization update callbacks too.
+
+``performance``
+...............
+
+Without HWP, this P-state selection algorithm is always the same regardless of
+the processor model and platform configuration.
+
+It selects the maximum P-state it is allowed to use, subject to limits set via
+``sysfs``, every time the P-state selection computations are carried out by the
+driver's utilization update callback for the given CPU (that does not happen
+more often than every 10 ms), but the hardware configuration will not be changed
+if the new P-state is the same as the current one.
+
+This is the default P-state selection algorithm if the
+:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
+is set.
+
+``powersave``
+.............
+
+Without HWP, this P-state selection algorithm generally depends on the
+processor model and/or the system profile setting in the ACPI tables and there
+are two variants of it.
+
+One of them is used with processors from the Atom line and (regardless of the
+processor model) on platforms with the system profile in the ACPI tables set to
+"mobile" (laptops mostly), "tablet", "appliance PC", "desktop", or
+"workstation".  It is also used with processors supporting the HWP feature if
+that feature has not been enabled (that is, with the ``intel_pstate=no_hwp``
+argument in the kernel command line).  It is similar to the algorithm
+implemented by the generic ``schedutil`` scaling governor except that the
+utilization metric used by it is based on numbers coming from feedback
+registers of the CPU.  It generally selects P-states proportional to the
+current CPU utilization, so it is referred to as the "proportional" algorithm.
+
+The second variant of the ``powersave`` P-state selection algorithm, used in all
+of the other cases (generally, on processors from the Core line, so it is
+referred to as the "Core" algorithm), is based on the values read from the APERF
+and MPERF feedback registers and the previously requested target P-state.
+It does not really take CPU utilization into account explicitly, but as a rule
+it causes the CPU P-state to ramp up very quickly in response to increased
+utilization which is generally desirable in server environments.
+
+Regardless of the variant, this algorithm is run by the driver's utilization
+update callback for the given CPU when it is invoked by the CPU scheduler, but
+not more often than every 10 ms (that can be tweaked via ``debugfs`` in `this
+particular case <Tuning Interface in debugfs_>`_).  Like in the ``performance``
+case, the hardware configuration is not touched if the new P-state turns out to
+be the same as the current one.
+
+This is the default P-state selection algorithm if the
+:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
+is not set.
+
+Passive Mode
+------------
+
+This mode is used if the ``intel_pstate=passive`` argument is passed to the
+kernel in the command line (it implies the ``intel_pstate=no_hwp`` setting too).
+Like in the active mode without HWP support, in this mode ``intel_pstate`` may
+refuse to work with the given processor if it does not recognize it.
+
+If the driver works in this mode, the ``scaling_driver`` policy attribute in
+``sysfs`` for all ``CPUFreq`` policies contains the string "intel_cpufreq".
+Then, the driver behaves like a regular ``CPUFreq`` scaling driver.  That is,
+it is invoked by generic scaling governors when necessary to talk to the
+hardware in order to change the P-state of a CPU (in particular, the
+``schedutil`` governor can invoke it directly from scheduler context).
+
+While in this mode, ``intel_pstate`` can be used with all of the (generic)
+scaling governors listed by the ``scaling_available_governors`` policy attribute
+in ``sysfs`` (and the P-state selection algorithms described above are not
+used).  Then, it is responsible for the configuration of policy objects
+corresponding to CPUs and provides the ``CPUFreq`` core (and the scaling
+governors attached to the policy objects) with accurate information on the
+maximum and minimum operating frequencies supported by the hardware (including
+the so-called "turbo" frequency ranges).  In other words, in the passive mode
+the entire range of available P-states is exposed by ``intel_pstate`` to the
+``CPUFreq`` core.  However, in this mode the driver does not register
+utilization update callbacks with the CPU scheduler and the ``scaling_cur_freq``
+information comes from the ``CPUFreq`` core (and is the last frequency selected
+by the current scaling governor for the given policy).
+
+
+.. _turbo:
+
+Turbo P-states Support
+======================
+
+In the majority of cases, the entire range of P-states available to
+``intel_pstate`` can be divided into two sub-ranges that correspond to
+different types of processor behavior, above and below a boundary that
+will be referred to as the "turbo threshold" in what follows.
+
+The P-states above the turbo threshold are referred to as "turbo P-states" and
+the whole sub-range of P-states they belong to is referred to as the "turbo
+range".  These names are related to the Turbo Boost technology allowing a
+multicore processor to opportunistically increase the P-state of one or more
+cores if there is enough power to do that and if that is not going to cause the
+thermal envelope of the processor package to be exceeded.
+
+Specifically, if software sets the P-state of a CPU core within the turbo range
+(that is, above the turbo threshold), the processor is permitted to take over
+performance scaling control for that core and put it into turbo P-states of its
+choice going forward.  However, that permission is interpreted differently by
+different processor generations.  Namely, the Sandy Bridge generation of
+processors will never use any P-states above the last one set by software for
+the given core, even if it is within the turbo range, whereas all of the later
+processor generations will take it as a license to use any P-states from the
+turbo range, even above the one set by software.  In other words, on those
+processors setting any P-state from the turbo range will enable the processor
+to put the given core into all turbo P-states up to and including the maximum
+supported one as it sees fit.
+
+One important property of turbo P-states is that they are not sustainable.  More
+precisely, there is no guarantee that any CPUs will be able to stay in any of
+those states indefinitely, because the power distribution within the processor
+package may change over time  or the thermal envelope it was designed for might
+be exceeded if a turbo P-state was used for too long.
+
+In turn, the P-states below the turbo threshold generally are sustainable.  In
+fact, if one of them is set by software, the processor is not expected to change
+it to a lower one unless in a thermal stress or a power limit violation
+situation (a higher P-state may still be used if it is set for another CPU in
+the same package at the same time, for example).
+
+Some processors allow multiple cores to be in turbo P-states at the same time,
+but the maximum P-state that can be set for them generally depends on the number
+of cores running concurrently.  The maximum turbo P-state that can be set for 3
+cores at the same time usually is lower than the analogous maximum P-state for
+2 cores, which in turn usually is lower than the maximum turbo P-state that can
+be set for 1 core.  The one-core maximum turbo P-state is thus the maximum
+supported one overall.
+
+The maximum supported turbo P-state, the turbo threshold (the maximum supported
+non-turbo P-state) and the minimum supported P-state are specific to the
+processor model and can be determined by reading the processor's model-specific
+registers (MSRs).  Moreover, some processors support the Configurable TDP
+(Thermal Design Power) feature and, when that feature is enabled, the turbo
+threshold effectively becomes a configurable value that can be set by the
+platform firmware.
+
+Unlike ``_PSS`` objects in the ACPI tables, ``intel_pstate`` always exposes
+the entire range of available P-states, including the whole turbo range, to the
+``CPUFreq`` core and (in the passive mode) to generic scaling governors.  This
+generally causes turbo P-states to be set more often when ``intel_pstate`` is
+used relative to ACPI-based CPU performance scaling (see `below <acpi-cpufreq_>`_
+for more information).
+
+Moreover, since ``intel_pstate`` always knows what the real turbo threshold is
+(even if the Configurable TDP feature is enabled in the processor), its
+``no_turbo`` attribute in ``sysfs`` (described `below <no_turbo_attr_>`_) should
+work as expected in all cases (that is, if set to disable turbo P-states, it
+always should prevent ``intel_pstate`` from using them).
+
+
+Processor Support
+=================
+
+To handle a given processor ``intel_pstate`` requires a number of different
+pieces of information on it to be known, including:
+
+ * The minimum supported P-state.
+
+ * The maximum supported `non-turbo P-state <turbo_>`_.
+
+ * Whether or not turbo P-states are supported at all.
+
+ * The maximum supported `one-core turbo P-state <turbo_>`_ (if turbo P-states
+   are supported).
+
+ * The scaling formula to translate the driver's internal representation
+   of P-states into frequencies and the other way around.
+
+Generally, ways to obtain that information are specific to the processor model
+or family.  Although it often is possible to obtain all of it from the processor
+itself (using model-specific registers), there are cases in which hardware
+manuals need to be consulted to get to it too.
+
+For this reason, there is a list of supported processors in ``intel_pstate`` and
+the driver initialization will fail if the detected processor is not in that
+list, unless it supports the `HWP feature <Active Mode_>`_.  [The interface to
+obtain all of the information listed above is the same for all of the processors
+supporting the HWP feature, which is why they all are supported by
+``intel_pstate``.]
+
+
+User Space Interface in ``sysfs``
+=================================
+
+Global Attributes
+-----------------
+
+``intel_pstate`` exposes several global attributes (files) in ``sysfs`` to
+control its functionality at the system level.  They are located in the
+``/sys/devices/system/cpu/cpufreq/intel_pstate/`` directory and affect all
+CPUs.
+
+Some of them are not present if the ``intel_pstate=per_cpu_perf_limits``
+argument is passed to the kernel in the command line.
+
+``max_perf_pct``
+	Maximum P-state the driver is allowed to set in percent of the
+	maximum supported performance level (the highest supported `turbo
+	P-state <turbo_>`_).
+
+	This attribute will not be exposed if the
+	``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
+	command line.
+
+``min_perf_pct``
+	Minimum P-state the driver is allowed to set in percent of the
+	maximum supported performance level (the highest supported `turbo
+	P-state <turbo_>`_).
+
+	This attribute will not be exposed if the
+	``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
+	command line.
+
+``num_pstates``
+	Number of P-states supported by the processor (between 0 and 255
+	inclusive) including both turbo and non-turbo P-states (see
+	`Turbo P-states Support`_).
+
+	The value of this attribute is not affected by the ``no_turbo``
+	setting described `below <no_turbo_attr_>`_.
+
+	This attribute is read-only.
+
+``turbo_pct``
+	Ratio of the `turbo range <turbo_>`_ size to the size of the entire
+	range of supported P-states, in percent.
+
+	This attribute is read-only.
+
+.. _no_turbo_attr:
+
+``no_turbo``
+	If set (equal to 1), the driver is not allowed to set any turbo P-states
+	(see `Turbo P-states Support`_).  If unset (equalt to 0, which is the
+	default), turbo P-states can be set by the driver.
+	[Note that ``intel_pstate`` does not support the general ``boost``
+	attribute (supported by some other scaling drivers) which is replaced
+	by this one.]
+
+	This attrubute does not affect the maximum supported frequency value
+	supplied to the ``CPUFreq`` core and exposed via the policy interface,
+	but it affects the maximum possible value of per-policy P-state	limits
+	(see `Interpretation of Policy Attributes`_ below for details).
+
+.. _status_attr:
+
+``status``
+	Operation mode of the driver: "active", "passive" or "off".
+
+	"active"
+		The driver is functional and in the `active mode
+		<Active Mode_>`_.
+
+	"passive"
+		The driver is functional and in the `passive mode
+		<Passive Mode_>`_.
+
+	"off"
+		The driver is not functional (it is not registered as a scaling
+		driver with the ``CPUFreq`` core).
+
+	This attribute can be written to in order to change the driver's
+	operation mode or to unregister it.  The string written to it must be
+	one of the possible values of it and, if successful, the write will
+	cause the driver to switch over to the operation mode represented by
+	that string - or to be unregistered in the "off" case.  [Actually,
+	switching over from the active mode to the passive mode or the other
+	way around causes the driver to be unregistered and registered again
+	with a different set of callbacks, so all of its settings (the global
+	as well as the per-policy ones) are then reset to their default
+	values, possibly depending on the target operation mode.]
+
+	That only is supported in some configurations, though (for example, if
+	the `HWP feature is enabled in the processor <Active Mode With HWP_>`_,
+	the operation mode of the driver cannot be changed), and if it is not
+	supported in the current configuration, writes to this attribute with
+	fail with an appropriate error.
+
+Interpretation of Policy Attributes
+-----------------------------------
+
+The interpretation of some ``CPUFreq`` policy attributes described in
+:doc:`cpufreq` is special with ``intel_pstate`` as the current scaling driver
+and it generally depends on the driver's `operation mode <Operation Modes_>`_.
+
+First of all, the values of the ``cpuinfo_max_freq``, ``cpuinfo_min_freq`` and
+``scaling_cur_freq`` attributes are produced by applying a processor-specific
+multiplier to the internal P-state representation used by ``intel_pstate``.
+Also, the values of the ``scaling_max_freq`` and ``scaling_min_freq``
+attributes are capped by the frequency corresponding to the maximum P-state that
+the driver is allowed to set.
+
+If the ``no_turbo`` `global attribute <no_turbo_attr_>`_ is set, the driver is
+not allowed to use turbo P-states, so the maximum value of ``scaling_max_freq``
+and ``scaling_min_freq`` is limited to the maximum non-turbo P-state frequency.
+Accordingly, setting ``no_turbo`` causes ``scaling_max_freq`` and
+``scaling_min_freq`` to go down to that value if they were above it before.
+However, the old values of ``scaling_max_freq`` and ``scaling_min_freq`` will be
+restored after unsetting ``no_turbo``, unless these attributes have been written
+to after ``no_turbo`` was set.
+
+If ``no_turbo`` is not set, the maximum possible value of ``scaling_max_freq``
+and ``scaling_min_freq`` corresponds to the maximum supported turbo P-state,
+which also is the value of ``cpuinfo_max_freq`` in either case.
+
+Next, the following policy attributes have special meaning if
+``intel_pstate`` works in the `active mode <Active Mode_>`_:
+
+``scaling_available_governors``
+	List of P-state selection algorithms provided by ``intel_pstate``.
+
+``scaling_governor``
+	P-state selection algorithm provided by ``intel_pstate`` currently in
+	use with the given policy.
+
+``scaling_cur_freq``
+	Frequency of the average P-state of the CPU represented by the given
+	policy for the time interval between the last two invocations of the
+	driver's utilization update callback by the CPU scheduler for that CPU.
+
+The meaning of these attributes in the `passive mode <Passive Mode_>`_ is the
+same as for other scaling drivers.
+
+Additionally, the value of the ``scaling_driver`` attribute for ``intel_pstate``
+depends on the operation mode of the driver.  Namely, it is either
+"intel_pstate" (in the `active mode <Active Mode_>`_) or "intel_cpufreq" (in the
+`passive mode <Passive Mode_>`_).
+
+Coordination of P-State Limits
+------------------------------
+
+``intel_pstate`` allows P-state limits to be set in two ways: with the help of
+the ``max_perf_pct`` and ``min_perf_pct`` `global attributes
+<Global Attributes_>`_ or via the ``scaling_max_freq`` and ``scaling_min_freq``
+``CPUFreq`` policy attributes.  The coordination between those limits is based
+on the following rules, regardless of the current operation mode of the driver:
+
+ 1. All CPUs are affected by the global limits (that is, none of them can be
+    requested to run faster than the global maximum and none of them can be
+    requested to run slower than the global minimum).
+
+ 2. Each individual CPU is affected by its own per-policy limits (that is, it
+    cannot be requested to run faster than its own per-policy maximum and it
+    cannot be requested to run slower than its own per-policy minimum).
+
+ 3. The global and per-policy limits can be set independently.
+
+If the `HWP feature is enabled in the processor <Active Mode With HWP_>`_, the
+resulting effective values are written into its registers whenever the limits
+change in order to request its internal P-state selection logic to always set
+P-states within these limits.  Otherwise, the limits are taken into account by
+scaling governors (in the `passive mode <Passive Mode_>`_) and by the driver
+every time before setting a new P-state for a CPU.
+
+Additionally, if the ``intel_pstate=per_cpu_perf_limits`` command line argument
+is passed to the kernel, ``max_perf_pct`` and ``min_perf_pct`` are not exposed
+at all and the only way to set the limits is by using the policy attributes.
+
+
+Energy vs Performance Hints
+---------------------------
+
+If ``intel_pstate`` works in the `active mode with the HWP feature enabled
+<Active Mode With HWP_>`_ in the processor, additional attributes are present
+in every ``CPUFreq`` policy directory in ``sysfs``.  They are intended to allow
+user space to help ``intel_pstate`` to adjust the processor's internal P-state
+selection logic by focusing it on performance or on energy-efficiency, or
+somewhere between the two extremes:
+
+``energy_performance_preference``
+	Current value of the energy vs performance hint for the given policy
+	(or the CPU represented by it).
+
+	The hint can be changed by writing to this attribute.
+
+``energy_performance_available_preferences``
+	List of strings that can be written to the
+	``energy_performance_preference`` attribute.
+
+	They represent different energy vs performance hints and should be
+	self-explanatory, except that ``default`` represents whatever hint
+	value was set by the platform firmware.
+
+Strings written to the ``energy_performance_preference`` attribute are
+internally translated to integer values written to the processor's
+Energy-Performance Preference (EPP) knob (if supported) or its
+Energy-Performance Bias (EPB) knob.
+
+[Note that tasks may by migrated from one CPU to another by the scheduler's
+load-balancing algorithm and if different energy vs performance hints are
+set for those CPUs, that may lead to undesirable outcomes.  To avoid such
+issues it is better to set the same energy vs performance hint for all CPUs
+or to pin every task potentially sensitive to them to a specific CPU.]
+
+.. _acpi-cpufreq:
+
+``intel_pstate`` vs ``acpi-cpufreq``
+====================================
+
+On the majority of systems supported by ``intel_pstate``, the ACPI tables
+provided by the platform firmware contain ``_PSS`` objects returning information
+that can be used for CPU performance scaling (refer to the `ACPI specification`_
+for details on the ``_PSS`` objects and the format of the information returned
+by them).
+
+The information returned by the ACPI ``_PSS`` objects is used by the
+``acpi-cpufreq`` scaling driver.  On systems supported by ``intel_pstate``
+the ``acpi-cpufreq`` driver uses the same hardware CPU performance scaling
+interface, but the set of P-states it can use is limited by the ``_PSS``
+output.
+
+On those systems each ``_PSS`` object returns a list of P-states supported by
+the corresponding CPU which basically is a subset of the P-states range that can
+be used by ``intel_pstate`` on the same system, with one exception: the whole
+`turbo range <turbo_>`_ is represented by one item in it (the topmost one).  By
+convention, the frequency returned by ``_PSS`` for that item is greater by 1 MHz
+than the frequency of the highest non-turbo P-state listed by it, but the
+corresponding P-state representation (following the hardware specification)
+returned for it matches the maximum supported turbo P-state (or is the
+special value 255 meaning essentially "go as high as you can get").
+
+The list of P-states returned by ``_PSS`` is reflected by the table of
+available frequencies supplied by ``acpi-cpufreq`` to the ``CPUFreq`` core and
+scaling governors and the minimum and maximum supported frequencies reported by
+it come from that list as well.  In particular, given the special representation
+of the turbo range described above, this means that the maximum supported
+frequency reported by ``acpi-cpufreq`` is higher by 1 MHz than the frequency
+of the highest supported non-turbo P-state listed by ``_PSS`` which, of course,
+affects decisions made by the scaling governors, except for ``powersave`` and
+``performance``.
+
+For example, if a given governor attempts to select a frequency proportional to
+estimated CPU load and maps the load of 100% to the maximum supported frequency
+(possibly multiplied by a constant), then it will tend to choose P-states below
+the turbo threshold if ``acpi-cpufreq`` is used as the scaling driver, because
+in that case the turbo range corresponds to a small fraction of the frequency
+band it can use (1 MHz vs 1 GHz or more).  In consequence, it will only go to
+the turbo range for the highest loads and the other loads above 50% that might
+benefit from running at turbo frequencies will be given non-turbo P-states
+instead.
+
+One more issue related to that may appear on systems supporting the
+`Configurable TDP feature <turbo_>`_ allowing the platform firmware to set the
+turbo threshold.  Namely, if that is not coordinated with the lists of P-states
+returned by ``_PSS`` properly, there may be more than one item corresponding to
+a turbo P-state in those lists and there may be a problem with avoiding the
+turbo range (if desirable or necessary).  Usually, to avoid using turbo
+P-states overall, ``acpi-cpufreq`` simply avoids using the topmost state listed
+by ``_PSS``, but that is not sufficient when there are other turbo P-states in
+the list returned by it.
+
+Apart from the above, ``acpi-cpufreq`` works like ``intel_pstate`` in the
+`passive mode <Passive Mode_>`_, except that the number of P-states it can set
+is limited to the ones listed by the ACPI ``_PSS`` objects.
+
+
+Kernel Command Line Options for ``intel_pstate``
+================================================
+
+Several kernel command line options can be used to pass early-configuration-time
+parameters to ``intel_pstate`` in order to enforce specific behavior of it.  All
+of them have to be prepended with the ``intel_pstate=`` prefix.
+
+``disable``
+	Do not register ``intel_pstate`` as the scaling driver even if the
+	processor is supported by it.
+
+``passive``
+	Register ``intel_pstate`` in the `passive mode <Passive Mode_>`_ to
+	start with.
+
+	This option implies the ``no_hwp`` one described below.
+
+``force``
+	Register ``intel_pstate`` as the scaling driver instead of
+	``acpi-cpufreq`` even if the latter is preferred on the given system.
+
+	This may prevent some platform features (such as thermal controls and
+	power capping) that rely on the availability of ACPI P-states
+	information from functioning as expected, so it should be used with
+	caution.
+
+	This option does not work with processors that are not supported by
+	``intel_pstate`` and on platforms where the ``pcc-cpufreq`` scaling
+	driver is used instead of ``acpi-cpufreq``.
+
+``no_hwp``
+	Do not enable the `hardware-managed P-states (HWP) feature
+	<Active Mode With HWP_>`_ even if it is supported by the processor.
+
+``hwp_only``
+	Register ``intel_pstate`` as the scaling driver only if the
+	`hardware-managed P-states (HWP) feature <Active Mode With HWP_>`_ is
+	supported by the processor.
+
+``support_acpi_ppc``
+	Take ACPI ``_PPC`` performance limits into account.
+
+	If the preferred power management profile in the FADT (Fixed ACPI
+	Description Table) is set to "Enterprise Server" or "Performance
+	Server", the ACPI ``_PPC`` limits are taken into account by default
+	and this option has no effect.
+
+``per_cpu_perf_limits``
+	Use per-logical-CPU P-State limits (see `Coordination of P-state
+	Limits`_ for details).
+
+
+Diagnostics and Tuning
+======================
+
+Trace Events
+------------
+
+There are two static trace events that can be used for ``intel_pstate``
+diagnostics.  One of them is the ``cpu_frequency`` trace event generally used
+by ``CPUFreq``, and the other one is the ``pstate_sample`` trace event specific
+to ``intel_pstate``.  Both of them are triggered by ``intel_pstate`` only if
+it works in the `active mode <Active Mode_>`_.
+
+The following sequence of shell commands can be used to enable them and see
+their output (if the kernel is generally configured to support event tracing)::
+
+ # cd /sys/kernel/debug/tracing/
+ # echo 1 > events/power/pstate_sample/enable
+ # echo 1 > events/power/cpu_frequency/enable
+ # cat trace
+ gnome-terminal--4510  [001] ..s.  1177.680733: pstate_sample: core_busy=107 scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618 freq=2474476
+ cat-5235  [002] ..s.  1177.681723: cpu_frequency: state=2900000 cpu_id=2
+
+If ``intel_pstate`` works in the `passive mode <Passive Mode_>`_, the
+``cpu_frequency`` trace event will be triggered either by the ``schedutil``
+scaling governor (for the policies it is attached to), or by the ``CPUFreq``
+core (for the policies with other scaling governors).
+
+``ftrace``
+----------
+
+The ``ftrace`` interface can be used for low-level diagnostics of
+``intel_pstate``.  For example, to check how often the function to set a
+P-state is called, the ``ftrace`` filter can be set to to
+:c:func:`intel_pstate_set_pstate`::
+
+ # cd /sys/kernel/debug/tracing/
+ # cat available_filter_functions | grep -i pstate
+ intel_pstate_set_pstate
+ intel_pstate_cpu_init
+ ...
+ # echo intel_pstate_set_pstate > set_ftrace_filter
+ # echo function > current_tracer
+ # cat trace | head -15
+ # tracer: function
+ #
+ # entries-in-buffer/entries-written: 80/80   #P:4
+ #
+ #                              _-----=> irqs-off
+ #                             / _----=> need-resched
+ #                            | / _---=> hardirq/softirq
+ #                            || / _--=> preempt-depth
+ #                            ||| /     delay
+ #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
+ #              | |       |   ||||       |         |
+             Xorg-3129  [000] ..s.  2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func
+  gnome-terminal--4510  [002] ..s.  2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func
+      gnome-shell-3409  [001] ..s.  2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func
+           <idle>-0     [000] ..s.  2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
+
+Tuning Interface in ``debugfs``
+-------------------------------
+
+The ``powersave`` algorithm provided by ``intel_pstate`` for `the Core line of
+processors in the active mode <powersave_>`_ is based on a `PID controller`_
+whose parameters were chosen to address a number of different use cases at the
+same time.  However, it still is possible to fine-tune it to a specific workload
+and the ``debugfs`` interface under ``/sys/kernel/debug/pstate_snb/`` is
+provided for this purpose.  [Note that the ``pstate_snb`` directory will be
+present only if the specific P-state selection algorithm matching the interface
+in it actually is in use.]
+
+The following files present in that directory can be used to modify the PID
+controller parameters at run time:
+
+| ``deadband``
+| ``d_gain_pct``
+| ``i_gain_pct``
+| ``p_gain_pct``
+| ``sample_rate_ms``
+| ``setpoint``
+
+Note, however, that achieving desirable results this way generally requires
+expert-level understanding of the power vs performance tradeoff, so extra care
+is recommended when attempting to do that.
+
+
+.. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
+.. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
+.. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf
+.. _PID controller: https://en.wikipedia.org/wiki/PID_controller
diff --git a/Documentation/arm64/tagged-pointers.txt b/Documentation/arm64/tagged-pointers.txt
index d9995f1f51b3..a25a99e82bb1 100644
--- a/Documentation/arm64/tagged-pointers.txt
+++ b/Documentation/arm64/tagged-pointers.txt
@@ -11,24 +11,56 @@ in AArch64 Linux.
 The kernel configures the translation tables so that translations made
 via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of
 the virtual address ignored by the translation hardware. This frees up
-this byte for application use, with the following caveats:
+this byte for application use.
 
-	(1) The kernel requires that all user addresses passed to EL1
-	    are tagged with tag 0x00. This means that any syscall
-	    parameters containing user virtual addresses *must* have
-	    their top byte cleared before trapping to the kernel.
 
-	(2) Non-zero tags are not preserved when delivering signals.
-	    This means that signal handlers in applications making use
-	    of tags cannot rely on the tag information for user virtual
-	    addresses being maintained for fields inside siginfo_t.
-	    One exception to this rule is for signals raised in response
-	    to watchpoint debug exceptions, where the tag information
-	    will be preserved.
+Passing tagged addresses to the kernel
+--------------------------------------
 
-	(3) Special care should be taken when using tagged pointers,
-	    since it is likely that C compilers will not hazard two
-	    virtual addresses differing only in the upper byte.
+All interpretation of userspace memory addresses by the kernel assumes
+an address tag of 0x00.
+
+This includes, but is not limited to, addresses found in:
+
+ - pointer arguments to system calls, including pointers in structures
+   passed to system calls,
+
+ - the stack pointer (sp), e.g. when interpreting it to deliver a
+   signal,
+
+ - the frame pointer (x29) and frame records, e.g. when interpreting
+   them to generate a backtrace or call graph.
+
+Using non-zero address tags in any of these locations may result in an
+error code being returned, a (fatal) signal being raised, or other modes
+of failure.
+
+For these reasons, passing non-zero address tags to the kernel via
+system calls is forbidden, and using a non-zero address tag for sp is
+strongly discouraged.
+
+Programs maintaining a frame pointer and frame records that use non-zero
+address tags may suffer impaired or inaccurate debug and profiling
+visibility.
+
+
+Preserving tags
+---------------
+
+Non-zero tags are not preserved when delivering signals. This means that
+signal handlers in applications making use of tags cannot rely on the
+tag information for user virtual addresses being maintained for fields
+inside siginfo_t. One exception to this rule is for signals raised in
+response to watchpoint debug exceptions, where the tag information will
+be preserved.
 
 The architecture prevents the use of a tagged PC, so the upper byte will
 be set to a sign-extension of bit 55 on exception return.
+
+
+Other considerations
+--------------------
+
+Special care should be taken when using tagged pointers, since it is
+likely that C compilers will not hazard two virtual addresses differing
+only in the upper byte.
diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
index 1b87df6cd476..05e2822a80b3 100644
--- a/Documentation/block/bfq-iosched.txt
+++ b/Documentation/block/bfq-iosched.txt
@@ -11,6 +11,13 @@ controllers), BFQ's main features are:
   groups (switching back to time distribution when needed to keep
   throughput high).
 
+In its default configuration, BFQ privileges latency over
+throughput. So, when needed for achieving a lower latency, BFQ builds
+schedules that may lead to a lower throughput. If your main or only
+goal, for a given device, is to achieve the maximum-possible
+throughput at all times, then do switch off all low-latency heuristics
+for that device, by setting low_latency to 0. Full details in Section 3.
+
 On average CPUs, the current version of BFQ can handle devices
 performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a
 reference, 30-50 KIOPS correspond to very high bandwidths with
@@ -375,11 +382,19 @@ default, low latency mode is enabled. If enabled, interactive and soft
 real-time applications are privileged and experience a lower latency,
 as explained in more detail in the description of how BFQ works.
 
-DO NOT enable this mode if you need full control on bandwidth
+DISABLE this mode if you need full control on bandwidth
 distribution. In fact, if it is enabled, then BFQ automatically
 increases the bandwidth share of privileged applications, as the main
 means to guarantee a lower latency to them.
 
+In addition, as already highlighted at the beginning of this document,
+DISABLE this mode if your only goal is to achieve a high throughput.
+In fact, privileging the I/O of some application over the rest may
+entail a lower throughput. To achieve the highest-possible throughput
+on a non-rotational device, setting slice_idle to 0 may be needed too
+(at the cost of giving up any strong guarantee on fairness and low
+latency).
+
 timeout_sync
 ------------
 
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 01ddeaf64b0f..9490f2845f06 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -632,7 +632,7 @@ to i/o submission, if the bio fields are likely to be accessed after the
 i/o is issued (since the bio may otherwise get freed in case i/o completion
 happens in the meantime).
 
-The bio_clone() routine may be used to duplicate a bio, where the clone
+The bio_clone_fast() routine may be used to duplicate a bio, where the clone
 shares the bio_vec_list with the original bio (i.e. both point to the
 same bio_vec_list). This would typically be used for splitting i/o requests
 in lvm or md.
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index e50b95c25868..dc5e2dcdbef4 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -918,6 +918,18 @@ PAGE_SIZE multiple when read back.
 
 		Number of major page faults incurred
 
+	  workingset_refault
+
+		Number of refaults of previously evicted pages
+
+	  workingset_activate
+
+		Number of refaulted pages that were immediately activated
+
+	  workingset_nodereclaim
+
+		Number of times a shadow node has been reclaimed
+
   memory.swap.current
 
 	A read-only single value file which exists on non-root
diff --git a/Documentation/core-api/atomic_ops.rst b/Documentation/core-api/atomic_ops.rst
index 55e43f1c80de..fce929144ccd 100644
--- a/Documentation/core-api/atomic_ops.rst
+++ b/Documentation/core-api/atomic_ops.rst
@@ -303,6 +303,11 @@ defined which accomplish this::
 	void smp_mb__before_atomic(void);
 	void smp_mb__after_atomic(void);
 
+Preceding a non-value-returning read-modify-write atomic operation with
+smp_mb__before_atomic() and following it with smp_mb__after_atomic()
+provides the same full ordering that is provided by value-returning
+read-modify-write atomic operations.
+
 For example, smp_mb__before_atomic() can be used like so::
 
 	obj->dead = 1;
diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt
deleted file mode 100644
index 3fdcdfd968ba..000000000000
--- a/Documentation/cpu-freq/intel-pstate.txt
+++ /dev/null
@@ -1,281 +0,0 @@
-Intel P-State driver
---------------------
-
-This driver provides an interface to control the P-State selection for the
-SandyBridge+ Intel processors.
-
-The following document explains P-States:
-http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
-As stated in the document, P-State doesn’t exactly mean a frequency. However, for
-the sake of the relationship with cpufreq, P-State and frequency are used
-interchangeably.
-
-Understanding the cpufreq core governors and policies are important before
-discussing more details about the Intel P-State driver. Based on what callbacks
-a cpufreq driver provides to the cpufreq core, it can support two types of
-drivers:
-- with target_index() callback: In this mode, the drivers using cpufreq core
-simply provide the minimum and maximum frequency limits and an additional
-interface target_index() to set the current frequency. The cpufreq subsystem
-has a number of scaling governors ("performance", "powersave", "ondemand",
-etc.). Depending on which governor is in use, cpufreq core will call for
-transitions to a specific frequency using target_index() callback.
-- setpolicy() callback: In this mode, drivers do not provide target_index()
-callback, so cpufreq core can't request a transition to a specific frequency.
-The driver provides minimum and maximum frequency limits and callbacks to set a
-policy. The policy in cpufreq sysfs is referred to as the "scaling governor".
-The cpufreq core can request the driver to operate in any of the two policies:
-"performance" and "powersave". The driver decides which frequency to use based
-on the above policy selection considering minimum and maximum frequency limits.
-
-The Intel P-State driver falls under the latter category, which implements the
-setpolicy() callback. This driver decides what P-State to use based on the
-requested policy from the cpufreq core. If the processor is capable of
-selecting its next P-State internally, then the driver will offload this
-responsibility to the processor (aka HWP: Hardware P-States). If not, the
-driver implements algorithms to select the next P-State.
-
-Since these policies are implemented in the driver, they are not same as the
-cpufreq scaling governors implementation, even if they have the same name in
-the cpufreq sysfs (scaling_governors). For example the "performance" policy is
-similar to cpufreq’s "performance" governor, but "powersave" is completely
-different than the cpufreq "powersave" governor. The strategy here is similar
-to cpufreq "ondemand", where the requested P-State is related to the system load.
-
-Sysfs Interface
-
-In addition to the frequency-controlling interfaces provided by the cpufreq
-core, the driver provides its own sysfs files to control the P-State selection.
-These files have been added to /sys/devices/system/cpu/intel_pstate/.
-Any changes made to these files are applicable to all CPUs (even in a
-multi-package system, Refer to later section on placing "Per-CPU limits").
-
-      max_perf_pct: Limits the maximum P-State that will be requested by
-      the driver. It states it as a percentage of the available performance. The
-      available (P-State) performance may be reduced by the no_turbo
-      setting described below.
-
-      min_perf_pct: Limits the minimum P-State that will be requested by
-      the driver. It states it as a percentage of the max (non-turbo)
-      performance level.
-
-      no_turbo: Limits the driver to selecting P-State below the turbo
-      frequency range.
-
-      turbo_pct: Displays the percentage of the total performance that
-      is supported by hardware that is in the turbo range. This number
-      is independent of whether turbo has been disabled or not.
-
-      num_pstates: Displays the number of P-States that are supported
-      by hardware. This number is independent of whether turbo has
-      been disabled or not.
-
-For example, if a system has these parameters:
-	Max 1 core turbo ratio: 0x21 (Max 1 core ratio is the maximum P-State)
-	Max non turbo ratio: 0x17
-	Minimum ratio : 0x08 (Here the ratio is called max efficiency ratio)
-
-Sysfs will show :
-	max_perf_pct:100, which corresponds to 1 core ratio
-	min_perf_pct:24, max_efficiency_ratio / max 1 Core ratio
-	no_turbo:0, turbo is not disabled
-	num_pstates:26 = (max 1 Core ratio - Max Efficiency Ratio + 1)
-	turbo_pct:39 = (max 1 core ratio - max non turbo ratio) / num_pstates
-
-Refer to "Intel® 64 and IA-32 Architectures Software Developer’s Manual
-Volume 3: System Programming Guide" to understand ratios.
-
-There is one more sysfs attribute in /sys/devices/system/cpu/intel_pstate/
-that can be used for controlling the operation mode of the driver:
-
-      status: Three settings are possible:
-      "off"     - The driver is not in use at this time.
-      "active"  - The driver works as a P-state governor (default).
-      "passive" - The driver works as a regular cpufreq one and collaborates
-                  with the generic cpufreq governors (it sets P-states as
-                  requested by those governors).
-      The current setting is returned by reads from this attribute.  Writing one
-      of the above strings to it changes the operation mode as indicated by that
-      string, if possible.  If HW-managed P-states (HWP) are enabled, it is not
-      possible to change the driver's operation mode and attempts to write to
-      this attribute will fail.
-
-cpufreq sysfs for Intel P-State
-
-Since this driver registers with cpufreq, cpufreq sysfs is also presented.
-There are some important differences, which need to be considered.
-
-scaling_cur_freq: This displays the real frequency which was used during
-the last sample period instead of what is requested. Some other cpufreq driver,
-like acpi-cpufreq, displays what is requested (Some changes are on the
-way to fix this for acpi-cpufreq driver). The same is true for frequencies
-displayed at /proc/cpuinfo.
-
-scaling_governor: This displays current active policy. Since each CPU has a
-cpufreq sysfs, it is possible to set a scaling governor to each CPU. But this
-is not possible with Intel P-States, as there is one common policy for all
-CPUs. Here, the last requested policy will be applicable to all CPUs. It is
-suggested that one use the cpupower utility to change policy to all CPUs at the
-same time.
-
-scaling_setspeed: This attribute can never be used with Intel P-State.
-
-scaling_max_freq/scaling_min_freq: This interface can be used similarly to
-the max_perf_pct/min_perf_pct of Intel P-State sysfs. However since frequencies
-are converted to nearest possible P-State, this is prone to rounding errors.
-This method is not preferred to limit performance.
-
-affected_cpus: Not used
-related_cpus: Not used
-
-For contemporary Intel processors, the frequency is controlled by the
-processor itself and the P-State exposed to software is related to
-performance levels.  The idea that frequency can be set to a single
-frequency is fictional for Intel Core processors. Even if the scaling
-driver selects a single P-State, the actual frequency the processor
-will run at is selected by the processor itself.
-
-Per-CPU limits
-
-The kernel command line option "intel_pstate=per_cpu_perf_limits" forces
-the intel_pstate driver to use per-CPU performance limits.  When it is set,
-the sysfs control interface described above is subject to limitations.
-- The following controls are not available for both read and write
-	/sys/devices/system/cpu/intel_pstate/max_perf_pct
-	/sys/devices/system/cpu/intel_pstate/min_perf_pct
-- The following controls can be used to set performance limits, as far as the
-architecture of the processor permits:
-	/sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq
-	/sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq
-	/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
-- User can still observe turbo percent and number of P-States from
-	/sys/devices/system/cpu/intel_pstate/turbo_pct
-	/sys/devices/system/cpu/intel_pstate/num_pstates
-- User can read write system wide turbo status
-	/sys/devices/system/cpu/no_turbo
-
-Support of energy performance hints
-It is possible to provide hints to the HWP algorithms in the processor
-to be more performance centric to more energy centric. When the driver
-is using HWP, two additional cpufreq sysfs attributes are presented for
-each logical CPU.
-These attributes are:
-	- energy_performance_available_preferences
-	- energy_performance_preference
-
-To get list of supported hints:
-$ cat energy_performance_available_preferences
-    default performance balance_performance balance_power power
-
-The current preference can be read or changed via cpufreq sysfs
-attribute "energy_performance_preference". Reading from this attribute
-will display current effective setting. User can write any of the valid
-preference string to this attribute. User can always restore to power-on
-default by writing "default".
-
-Since threads can migrate to different CPUs, this is possible that the
-new CPU may have different energy performance preference than the previous
-one. To avoid such issues, either threads can be pinned to specific CPUs
-or set the same energy performance preference value to all CPUs.
-
-Tuning Intel P-State driver
-
-When the performance can be tuned using PID (Proportional Integral
-Derivative) controller, debugfs files are provided for adjusting performance.
-They are presented under:
-/sys/kernel/debug/pstate_snb/
-
-The PID tunable parameters are:
-      deadband
-      d_gain_pct
-      i_gain_pct
-      p_gain_pct
-      sample_rate_ms
-      setpoint
-
-To adjust these parameters, some understanding of driver implementation is
-necessary. There are some tweeks described here, but be very careful. Adjusting
-them requires expert level understanding of power and performance relationship.
-These limits are only useful when the "powersave" policy is active.
-
--To make the system more responsive to load changes, sample_rate_ms can
-be adjusted  (current default is 10ms).
--To make the system use higher performance, even if the load is lower, setpoint
-can be adjusted to a lower number. This will also lead to faster ramp up time
-to reach the maximum P-State.
-If there are no derivative and integral coefficients, The next P-State will be
-equal to:
-	current P-State - ((setpoint - current cpu load) * p_gain_pct)
-
-For example, if the current PID parameters are (Which are defaults for the core
-processors like SandyBridge):
-      deadband = 0
-      d_gain_pct = 0
-      i_gain_pct = 0
-      p_gain_pct = 20
-      sample_rate_ms = 10
-      setpoint = 97
-
-If the current P-State = 0x08 and current load = 100, this will result in the
-next P-State = 0x08 - ((97 - 100) * 0.2) = 8.6 (rounded to 9). Here the P-State
-goes up by only 1. If during next sample interval the current load doesn't
-change and still 100, then P-State goes up by one again. This process will
-continue as long as the load is more than the setpoint until the maximum P-State
-is reached.
-
-For the same load at setpoint = 60, this will result in the next P-State
-= 0x08 - ((60 - 100) * 0.2) = 16
-So by changing the setpoint from 97 to 60, there is an increase of the
-next P-State from 9 to 16. So this will make processor execute at higher
-P-State for the same CPU load. If the load continues to be more than the
-setpoint during next sample intervals, then P-State will go up again till the
-maximum P-State is reached. But the ramp up time to reach the maximum P-State
-will be much faster when the setpoint is 60 compared to 97.
-
-Debugging Intel P-State driver
-
-Event tracing
-To debug P-State transition, the Linux event tracing interface can be used.
-There are two specific events, which can be enabled (Provided the kernel
-configs related to event tracing are enabled).
-
-# cd /sys/kernel/debug/tracing/
-# echo 1 > events/power/pstate_sample/enable
-# echo 1 > events/power/cpu_frequency/enable
-# cat trace
-gnome-terminal--4510  [001] ..s.  1177.680733: pstate_sample: core_busy=107
-	scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618
-		freq=2474476
-cat-5235  [002] ..s.  1177.681723: cpu_frequency: state=2900000 cpu_id=2
-
-
-Using ftrace
-
-If function level tracing is required, the Linux ftrace interface can be used.
-For example if we want to check how often a function to set a P-State is
-called, we can set ftrace filter to intel_pstate_set_pstate.
-
-# cd /sys/kernel/debug/tracing/
-# cat available_filter_functions | grep -i pstate
-intel_pstate_set_pstate
-intel_pstate_cpu_init
-...
-
-# echo intel_pstate_set_pstate > set_ftrace_filter
-# echo function > current_tracer
-# cat trace | head -15
-# tracer: function
-#
-# entries-in-buffer/entries-written: 80/80   #P:4
-#
-#                              _-----=> irqs-off
-#                             / _----=> need-resched
-#                            | / _---=> hardirq/softirq
-#                            || / _--=> preempt-depth
-#                            ||| /     delay
-#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
-#              | |       |   ||||       |         |
-            Xorg-3129  [000] ..s.  2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func
- gnome-terminal--4510  [002] ..s.  2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func
-     gnome-shell-3409  [001] ..s.  2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func
-          <idle>-0     [000] ..s.  2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
diff --git a/Documentation/dev-tools/sparse.rst b/Documentation/dev-tools/sparse.rst
index ffdcc97f6f5a..78aa00a604a0 100644
--- a/Documentation/dev-tools/sparse.rst
+++ b/Documentation/dev-tools/sparse.rst
@@ -103,9 +103,3 @@ have already built it.
 
 The optional make variable CF can be used to pass arguments to sparse.  The
 build system passes -Wbitwise to sparse automatically.
-
-Checking RCU annotations
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-RCU annotations are not checked by default.  To enable RCU annotation
-checks, include -DCONFIG_SPARSE_RCU_POINTER in your CF flags.
diff --git a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt
new file mode 100644
index 000000000000..d38834c67dff
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt
@@ -0,0 +1,31 @@
+OP-TEE Device Tree Bindings
+
+OP-TEE is a piece of software using hardware features to provide a Trusted
+Execution Environment. The security can be provided with ARM TrustZone, but
+also by virtualization or a separate chip.
+
+We're using "linaro" as the first part of the compatible property for
+the reference implementation maintained by Linaro.
+
+* OP-TEE based on ARM TrustZone required properties:
+
+- compatible     : should contain "linaro,optee-tz"
+
+- method         : The method of calling the OP-TEE Trusted OS. Permitted
+                   values are:
+
+                   "smc" : SMC #0, with the register assignments specified
+		           in drivers/tee/optee/optee_smc.h
+
+                   "hvc" : HVC #0, with the register assignments specified
+		           in drivers/tee/optee/optee_smc.h
+
+
+
+Example:
+	firmware {
+		optee {
+			compatible = "linaro,optee-tz";
+			method = "smc";
+		};
+	};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt
index cb0054ac7121..cd977db7630c 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt
@@ -7,6 +7,7 @@ Required Properties:
 
 - compatible: Should be one of:
 	- "mediatek,mt2701-apmixedsys"
+	- "mediatek,mt6797-apmixedsys"
 	- "mediatek,mt8135-apmixedsys"
 	- "mediatek,mt8173-apmixedsys"
 - #clock-cells: Must be 1
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt
index f6a916686f4c..047b11ae5f45 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt
@@ -7,6 +7,7 @@ Required Properties:
 
 - compatible: Should be one of:
 	- "mediatek,mt2701-imgsys", "syscon"
+	- "mediatek,mt6797-imgsys", "syscon"
 	- "mediatek,mt8173-imgsys", "syscon"
 - #clock-cells: Must be 1
 
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt
index 1620ec2a5a3f..58d58e2006b8 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt
@@ -8,6 +8,7 @@ Required Properties:
 
 - compatible: Should be one of:
 	- "mediatek,mt2701-infracfg", "syscon"
+	- "mediatek,mt6797-infracfg", "syscon"
 	- "mediatek,mt8135-infracfg", "syscon"
 	- "mediatek,mt8173-infracfg", "syscon"
 - #clock-cells: Must be 1
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt
index 67dd2e473d25..70529e0b58e9 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt
@@ -7,6 +7,7 @@ Required Properties:
 
 - compatible: Should be one of:
 	- "mediatek,mt2701-mmsys", "syscon"
+	- "mediatek,mt6797-mmsys", "syscon"
 	- "mediatek,mt8173-mmsys", "syscon"
 - #clock-cells: Must be 1
 
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt
index 9f2fe7860114..ec93ecbb9f3c 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt
@@ -7,6 +7,7 @@ Required Properties:
 
 - compatible: Should be one of:
 	- "mediatek,mt2701-topckgen"
+	- "mediatek,mt6797-topckgen"
 	- "mediatek,mt8135-topckgen"
 	- "mediatek,mt8173-topckgen"
 - #clock-cells: Must be 1
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt
index 2440f73450c3..d150104f928a 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt
@@ -7,6 +7,7 @@ Required Properties:
 
 - compatible: Should be one of:
 	- "mediatek,mt2701-vdecsys", "syscon"
+	- "mediatek,mt6797-vdecsys", "syscon"
 	- "mediatek,mt8173-vdecsys", "syscon"
 - #clock-cells: Must be 1
 
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt
index 5bb2866a2b50..8a93be643647 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt
@@ -5,7 +5,8 @@ The Mediatek vencsys controller provides various clocks to the system.
 
 Required Properties:
 
-- compatible: Should be:
+- compatible: Should be one of:
+	- "mediatek,mt6797-vencsys", "syscon"
 	- "mediatek,mt8173-vencsys", "syscon"
 - #clock-cells: Must be 1
 
diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt b/Documentation/devicetree/bindings/clock/idt,versaclock5.txt
index 87e9c47a89a3..53d7e50ed875 100644
--- a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt
+++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.txt
@@ -6,18 +6,21 @@ from 3 to 12 output clocks.
 ==I2C device node==
 
 Required properties:
-- compatible:	shall be one of "idt,5p49v5923" , "idt,5p49v5933".
+- compatible:	shall be one of "idt,5p49v5923" , "idt,5p49v5933" ,
+		"idt,5p49v5935".
 - reg:		i2c device address, shall be 0x68 or 0x6a.
 - #clock-cells:	from common clock binding; shall be set to 1.
 - clocks:	from common clock binding; list of parent clock handles,
 		- 5p49v5923: (required) either or both of XTAL or CLKIN
 					reference clock.
-		- 5p49v5933: (optional) property not present (internal
+		- 5p49v5933 and
+		- 5p49v5935: (optional) property not present (internal
 					Xtal used) or CLKIN reference
 					clock.
 - clock-names:	from common clock binding; clock input names, can be
 		- 5p49v5923: (required) either or both of "xin", "clkin".
-		- 5p49v5933: (optional) property not present or "clkin".
+		- 5p49v5933 and
+		- 5p49v5935: (optional) property not present or "clkin".
 
 ==Mapping between clock specifier and physical pins==
 
@@ -34,6 +37,13 @@ clock specifier, the following mapping applies:
 	1 -- OUT1
 	2 -- OUT4
 
+5P49V5935:
+	0 -- OUT0_SEL_I2CB
+	1 -- OUT1
+	2 -- OUT2
+	3 -- OUT3
+	4 -- OUT4
+
 ==Example==
 
 /* 25MHz reference crystal */
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk1108-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.txt
index 4da126116cf0..161326a4f9c1 100644
--- a/Documentation/devicetree/bindings/clock/rockchip,rk1108-cru.txt
+++ b/Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.txt
@@ -1,12 +1,12 @@
-* Rockchip RK1108 Clock and Reset Unit
+* Rockchip RV1108 Clock and Reset Unit
 
-The RK1108 clock controller generates and supplies clock to various
+The RV1108 clock controller generates and supplies clock to various
 controllers within the SoC and also implements a reset controller for SoC
 peripherals.
 
 Required Properties:
 
-- compatible: should be "rockchip,rk1108-cru"
+- compatible: should be "rockchip,rv1108-cru"
 - reg: physical base address of the controller and length of memory mapped
   region.
 - #clock-cells: should be 1.
@@ -19,7 +19,7 @@ Optional Properties:
 
 Each clock is assigned an identifier and client nodes can use this identifier
 to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/rk1108-cru.h headers and can be
+preprocessor macros in the dt-bindings/clock/rv1108-cru.h headers and can be
 used in device tree sources. Similar macros exist for the reset sources in
 these files.
 
@@ -38,7 +38,7 @@ clock-output-names:
 Example: Clock controller node:
 
 	cru: cru@20200000 {
-		compatible = "rockchip,rk1108-cru";
+		compatible = "rockchip,rv1108-cru";
 		reg = <0x20200000 0x1000>;
 		rockchip,grf = <&grf>;
 
@@ -50,7 +50,7 @@ Example: UART controller node that consumes the clock generated by the clock
   controller:
 
 	uart0: serial@10230000 {
-		compatible = "rockchip,rk1108-uart", "snps,dw-apb-uart";
+		compatible = "rockchip,rv1108-uart", "snps,dw-apb-uart";
 		reg = <0x10230000 0x100>;
 		interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
 		reg-shift = <2>;
diff --git a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
index bae5668cf427..f465647a4dd2 100644
--- a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
+++ b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
@@ -7,9 +7,12 @@ Required properties :
 		- "allwinner,sun8i-a23-ccu"
 		- "allwinner,sun8i-a33-ccu"
 		- "allwinner,sun8i-h3-ccu"
+		- "allwinner,sun8i-h3-r-ccu"
 		- "allwinner,sun8i-v3s-ccu"
 		- "allwinner,sun9i-a80-ccu"
 		- "allwinner,sun50i-a64-ccu"
+		- "allwinner,sun50i-a64-r-ccu"
+		- "allwinner,sun50i-h5-ccu"
 
 - reg: Must contain the registers base address and length
 - clocks: phandle to the oscillators feeding the CCU. Two are needed:
@@ -19,7 +22,11 @@ Required properties :
 - #clock-cells : must contain 1
 - #reset-cells : must contain 1
 
-Example:
+For the PRCM CCUs on H3/A64, two more clocks are needed:
+- "pll-periph": the SoC's peripheral PLL from the main CCU
+- "iosc": the SoC's internal frequency oscillator
+
+Example for generic CCU:
 ccu: clock@01c20000 {
 	compatible = "allwinner,sun8i-h3-ccu";
 	reg = <0x01c20000 0x400>;
@@ -28,3 +35,13 @@ ccu: clock@01c20000 {
 	#clock-cells = <1>;
 	#reset-cells = <1>;
 };
+
+Example for PRCM CCU:
+r_ccu: clock@01f01400 {
+	compatible = "allwinner,sun50i-a64-r-ccu";
+	reg = <0x01f01400 0x100>;
+	clocks = <&osc24M>, <&osc32k>, <&iosc>, <&ccu CLK_PLL_PERIPH0>;
+	clock-names = "hosc", "losc", "iosc", "pll-periph";
+	#clock-cells = <1>;
+	#reset-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt b/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt
index 7a5c0e204c8e..e5a8b363d829 100644
--- a/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt
@@ -13,6 +13,8 @@ Required nodes:
 	Additional, the display node has to define properties:
 	- bits-per-pixel: Bits per pixel
 	- fsl,pcr: LCDC PCR value
+	A display node may optionally define
+	- fsl,aus-mode: boolean to enable AUS mode (only for imx21)
 
 Optional properties:
 - lcd-supply: Regulator for LCD supply voltage.
diff --git a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
index 42c3bb2d53e8..01e331a5f3e7 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
@@ -41,9 +41,9 @@ Required properties:
 Optional properties:
 
 In order to use the GPIO lines in PWM mode, some additional optional
-properties are required. Only Armada 370 and XP support these properties.
+properties are required.
 
-- compatible: Must contain "marvell,armada-370-xp-gpio"
+- compatible: Must contain "marvell,armada-370-gpio"
 
 - reg: an additional register set is needed, for the GPIO Blink
   Counter on/off registers.
@@ -71,7 +71,7 @@ Example:
 		};
 
 		gpio1: gpio@18140 {
-			compatible = "marvell,armada-370-xp-gpio";
+			compatible = "marvell,armada-370-gpio";
 			reg = <0x18140 0x40>, <0x181c8 0x08>;
 			reg-names = "gpio", "pwm";
 			ngpios = <17>;
diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
index 6db22103e2dd..025cf8c9324a 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
@@ -36,7 +36,7 @@ Optional properties:
                 control gpios
 
  - threshold:   allows setting the "click"-threshold in the range
-                from 20 to 80.
+                from 0 to 80.
 
  - gain:        allows setting the sensitivity in the range from 0 to
                 31. Note that lower values indicate higher
diff --git a/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt b/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt
index 05485699d70e..9630ac0e4b56 100644
--- a/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt
+++ b/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt
@@ -16,6 +16,11 @@ Required properties:
 - reg:                  Base address of PMIC on Hi6220 SoC.
 - interrupt-controller: Hi655x has internal IRQs (has own IRQ domain).
 - pmic-gpios:           The GPIO used by PMIC IRQ.
+- #clock-cells:		From common clock binding; shall be set to 0
+
+Optional properties:
+- clock-output-names: From common clock binding to override the
+  default output clock name
 
 Example:
 	pmic: pmic@f8000000 {
@@ -24,4 +29,5 @@ Example:
 		interrupt-controller;
 		#interrupt-cells = <2>;
 		pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
+		#clock-cells = <0>;
 	}
diff --git a/Documentation/devicetree/bindings/mfd/stm32-timers.txt b/Documentation/devicetree/bindings/mfd/stm32-timers.txt
index bbd083f5600a..1db6e0057a63 100644
--- a/Documentation/devicetree/bindings/mfd/stm32-timers.txt
+++ b/Documentation/devicetree/bindings/mfd/stm32-timers.txt
@@ -31,7 +31,7 @@ Example:
 		compatible = "st,stm32-timers";
 		reg = <0x40010000 0x400>;
 		clocks = <&rcc 0 160>;
-		clock-names = "clk_int";
+		clock-names = "int";
 
 		pwm {
 			compatible = "st,stm32-pwm";
diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt
index e25436861867..9029b45b8a22 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt
+++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt
@@ -18,6 +18,8 @@ Optional properties:
   "ext_clock" (External clock provided to the card).
 - post-power-on-delay-ms : Delay in ms after powering the card and
 	de-asserting the reset-gpios (if any)
+- power-off-delay-us : Delay in us after asserting the reset-gpios (if any)
+	during power off of the card.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/mtd/atmel-nand.txt b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
index 3e7ee99d3949..f6bee57e453a 100644
--- a/Documentation/devicetree/bindings/mtd/atmel-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
@@ -1,4 +1,109 @@
-Atmel NAND flash
+Atmel NAND flash controller bindings
+
+The NAND flash controller node should be defined under the EBI bus (see
+Documentation/devicetree/bindings/memory-controllers/atmel,ebi.txt).
+One or several NAND devices can be defined under this NAND controller.
+The NAND controller might be connected to an ECC engine.
+
+* NAND controller bindings:
+
+Required properties:
+- compatible: should be one of the following
+	"atmel,at91rm9200-nand-controller"
+	"atmel,at91sam9260-nand-controller"
+	"atmel,at91sam9261-nand-controller"
+	"atmel,at91sam9g45-nand-controller"
+	"atmel,sama5d3-nand-controller"
+- ranges: empty ranges property to forward EBI ranges definitions.
+- #address-cells: should be set to 2.
+- #size-cells: should be set to 1.
+- atmel,nfc-io: phandle to the NFC IO block. Only required for sama5d3
+		controllers.
+- atmel,nfc-sram: phandle to the NFC SRAM block. Only required for sama5d3
+		  controllers.
+
+Optional properties:
+- ecc-engine: phandle to the PMECC block. Only meaningful if the SoC embeds
+	      a PMECC engine.
+
+* NAND device/chip bindings:
+
+Required properties:
+- reg: describes the CS lines assigned to the NAND device. If the NAND device
+       exposes multiple CS lines (multi-dies chips), your reg property will
+       contain X tuples of 3 entries.
+       1st entry: the CS line this NAND chip is connected to
+       2nd entry: the base offset of the memory region assigned to this
+		  device (always 0)
+       3rd entry: the memory region size (always 0x800000)
+
+Optional properties:
+- rb-gpios: the GPIO(s) used to check the Ready/Busy status of the NAND.
+- cs-gpios: the GPIO(s) used to control the CS line.
+- det-gpios: the GPIO used to detect if a Smartmedia Card is present.
+- atmel,rb: an integer identifying the native Ready/Busy pin. Only meaningful
+	    on sama5 SoCs.
+
+All generic properties described in
+Documentation/devicetree/bindings/mtd/{common,nand}.txt also apply to the NAND
+device node, and NAND partitions should be defined under the NAND node as
+described in Documentation/devicetree/bindings/mtd/partition.txt.
+
+* ECC engine (PMECC) bindings:
+
+Required properties:
+- compatible: should be one of the following
+	"atmel,at91sam9g45-pmecc"
+	"atmel,sama5d4-pmecc"
+	"atmel,sama5d2-pmecc"
+- reg: should contain 2 register ranges. The first one is pointing to the PMECC
+       block, and the second one to the PMECC_ERRLOC block.
+
+Example:
+
+	pmecc: ecc-engine@ffffc070 {
+		compatible = "atmel,at91sam9g45-pmecc";
+                reg = <0xffffc070 0x490>,
+                      <0xffffc500 0x100>;
+	};
+
+	ebi: ebi@10000000 {
+		compatible = "atmel,sama5d3-ebi";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		atmel,smc = <&hsmc>;
+		reg = <0x10000000 0x10000000
+		       0x40000000 0x30000000>;
+		ranges = <0x0 0x0 0x10000000 0x10000000
+			  0x1 0x0 0x40000000 0x10000000
+			  0x2 0x0 0x50000000 0x10000000
+			  0x3 0x0 0x60000000 0x10000000>;
+		clocks = <&mck>;
+
+                nand_controller: nand-controller {
+			compatible = "atmel,sama5d3-nand-controller";
+			atmel,nfc-sram = <&nfc_sram>;
+			atmel,nfc-io = <&nfc_io>;
+			ecc-engine = <&pmecc>;
+			#address-cells = <2>;
+			#size-cells = <1>;
+			ranges;
+
+			nand@3 {
+				reg = <0x3 0x0 0x800000>;
+				atmel,rb = <0>;
+
+				/*
+				 * Put generic NAND/MTD properties and
+				 * subnodes here.
+				 */
+			};
+		};
+	};
+
+-----------------------------------------------------------------------
+
+Deprecated bindings (should not be used in new device trees):
 
 Required properties:
 - compatible: The possible values are:
diff --git a/Documentation/devicetree/bindings/mtd/denali-nand.txt b/Documentation/devicetree/bindings/mtd/denali-nand.txt
index b04d03a1d499..e593bbeb2115 100644
--- a/Documentation/devicetree/bindings/mtd/denali-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/denali-nand.txt
@@ -1,11 +1,11 @@
 * Denali NAND controller
 
 Required properties:
-  - compatible : should be "denali,denali-nand-dt"
+  - compatible : should be one of the following:
+      "altr,socfpga-denali-nand"            - for Altera SOCFPGA
   - reg : should contain registers location and length for data and reg.
   - reg-names: Should contain the reg names "nand_data" and "denali_reg"
   - interrupts : The interrupt number.
-  - dm-mask : DMA bit mask
 
 The device tree may optionally contain sub-nodes describing partitions of the
 address space. See partition.txt for more detail.
@@ -15,9 +15,8 @@ Examples:
 nand: nand@ff900000 {
 	#address-cells = <1>;
 	#size-cells = <1>;
-	compatible = "denali,denali-nand-dt";
+	compatible = "altr,socfpga-denali-nand";
 	reg = <0xff900000 0x100000>, <0xffb80000 0x10000>;
 	reg-names = "nand_data", "denali_reg";
 	interrupts = <0 144 4>;
-	dma-mask = <0xffffffff>;
 };
diff --git a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
index af8915b41ccf..486a17d533d7 100644
--- a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
@@ -12,7 +12,7 @@ Required properties:
 - #address-cells, #size-cells : Must be present if the device has sub-nodes
   representing partitions.
 - gpios : Specifies the GPIO pins to control the NAND device.  The order of
-  GPIO references is:  RDY, nCE, ALE, CLE, and an optional nWP.
+  GPIO references is:  RDY, nCE, ALE, CLE, and nWP. nCE and nWP are optional.
 
 Optional properties:
 - bank-width : Width (in bytes) of the device.  If not present, the width
@@ -36,7 +36,7 @@ gpio-nand@1,0 {
 	#address-cells = <1>;
 	#size-cells = <1>;
 	gpios = <&banka 1 0>,	/* RDY */
-		<&banka 2 0>, 	/* nCE */
+		<0>, 		/* nCE */
 		<&banka 3 0>, 	/* ALE */
 		<&banka 4 0>, 	/* CLE */
 		<0>;		/* nWP */
diff --git a/Documentation/devicetree/bindings/mtd/stm32-quadspi.txt b/Documentation/devicetree/bindings/mtd/stm32-quadspi.txt
new file mode 100644
index 000000000000..ddd18c135148
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/stm32-quadspi.txt
@@ -0,0 +1,43 @@
+* STMicroelectronics Quad Serial Peripheral Interface(QuadSPI)
+
+Required properties:
+- compatible: should be "st,stm32f469-qspi"
+- reg: the first contains the register location and length.
+       the second contains the memory mapping address and length
+- reg-names: should contain the reg names "qspi" "qspi_mm"
+- interrupts: should contain the interrupt for the device
+- clocks: the phandle of the clock needed by the QSPI controller
+- A pinctrl must be defined to set pins in mode of operation for QSPI transfer
+
+Optional properties:
+- resets: must contain the phandle to the reset controller.
+
+A spi flash must be a child of the nor_flash node and could have some
+properties. Also see jedec,spi-nor.txt.
+
+Required properties:
+- reg: chip-Select number (QSPI controller may connect 2 nor flashes)
+- spi-max-frequency: max frequency of spi bus
+
+Optional property:
+- spi-rx-bus-width: see ../spi/spi-bus.txt for the description
+
+Example:
+
+qspi: spi@a0001000 {
+	compatible = "st,stm32f469-qspi";
+	reg = <0xa0001000 0x1000>, <0x90000000 0x10000000>;
+	reg-names = "qspi", "qspi_mm";
+	interrupts = <91>;
+	resets = <&rcc STM32F4_AHB3_RESET(QSPI)>;
+	clocks = <&rcc 0 STM32F4_AHB3_CLOCK(QSPI)>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_qspi0>;
+
+	flash@0 {
+		reg = <0>;
+		spi-rx-bus-width = <4>;
+		spi-max-frequency = <108000000>;
+		...
+	};
+};
diff --git a/Documentation/devicetree/bindings/net/dsa/b53.txt b/Documentation/devicetree/bindings/net/dsa/b53.txt
index d6c6e41648d4..8ec2ca21adeb 100644
--- a/Documentation/devicetree/bindings/net/dsa/b53.txt
+++ b/Documentation/devicetree/bindings/net/dsa/b53.txt
@@ -34,7 +34,7 @@ Required properties:
       "brcm,bcm6328-switch"
       "brcm,bcm6368-switch" and the mandatory "brcm,bcm63xx-switch"
 
-See Documentation/devicetree/bindings/dsa/dsa.txt for a list of additional
+See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
 required and optional properties.
 
 Examples:
diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt
index 7ef9dbb08957..1d4d0f49c9d0 100644
--- a/Documentation/devicetree/bindings/net/dsa/marvell.txt
+++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt
@@ -26,6 +26,10 @@ Optional properties:
 - interrupt-controller	: Indicates the switch is itself an interrupt
 			  controller. This is used for the PHY interrupts.
 #interrupt-cells = <2>	: Controller uses two cells, number and flag
+- eeprom-length		: Set to the length of an EEPROM connected to the
+			  switch. Must be set if the switch can not detect
+			  the presence and/or size of a connected EEPROM,
+			  otherwise optional.
 - mdio			: Container of PHY and devices on the switches MDIO
 			  bus.
 - mdio?		: Container of PHYs and devices on the external MDIO
diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
index a1e3693cca16..6f55bdd52f8a 100644
--- a/Documentation/devicetree/bindings/net/fsl-fec.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fec.txt
@@ -15,6 +15,10 @@ Optional properties:
 - phy-reset-active-high : If present then the reset sequence using the GPIO
   specified in the "phy-reset-gpios" property is reversed (H=reset state,
   L=operation state).
+- phy-reset-post-delay : Post reset delay in milliseconds. If present then
+  a delay of phy-reset-post-delay milliseconds will be observed after the
+  phy-reset-gpios has been toggled. Can be omitted thus no delay is
+  observed. Delay is in range of 1ms to 1000ms. Other delays are invalid.
 - phy-supply : regulator that powers the Ethernet PHY.
 - phy-handle : phandle to the PHY device connected to this device.
 - fixed-link : Assume a fixed link. See fixed-link.txt in the same directory.
diff --git a/Documentation/devicetree/bindings/net/smsc911x.txt b/Documentation/devicetree/bindings/net/smsc911x.txt
index 16c3a9501f5d..acfafc8e143c 100644
--- a/Documentation/devicetree/bindings/net/smsc911x.txt
+++ b/Documentation/devicetree/bindings/net/smsc911x.txt
@@ -27,6 +27,7 @@ Optional properties:
   of the device. On many systems this is wired high so the device goes
   out of reset at power-on, but if it is under program control, this
   optional GPIO can wake up in response to it.
+- vdd33a-supply, vddvario-supply : 3.3V analog and IO logic power supplies
 
 Examples:
 
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
index 71a3c134af1b..f01d154090da 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
@@ -247,7 +247,6 @@ bias-bus-hold		- latch weakly
 bias-pull-up		- pull up the pin
 bias-pull-down		- pull down the pin
 bias-pull-pin-default	- use pin-default pull state
-bi-directional		- pin supports simultaneous input/output operations
 drive-push-pull		- drive actively high and low
 drive-open-drain	- drive with open drain
 drive-open-source	- drive with open source
@@ -260,7 +259,6 @@ input-debounce		- debounce mode with debound time X
 power-source		- select between different power supplies
 low-power-enable	- enable low power mode
 low-power-disable	- disable low power mode
-output-enable		- enable output on pin regardless of output value
 output-low		- set the pin to output mode with low level
 output-high		- set the pin to output mode with high level
 slew-rate		- set the slew rate
diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt
index 940707d095cc..14bd9e945ff6 100644
--- a/Documentation/devicetree/bindings/power/power_domain.txt
+++ b/Documentation/devicetree/bindings/power/power_domain.txt
@@ -81,7 +81,7 @@ Example 3:
 	child: power-controller@12341000 {
 		compatible = "foo,power-controller";
 		reg = <0x12341000 0x1000>;
-		power-domains = <&parent 0>;
+		power-domains = <&parent>;
 		#power-domain-cells = <0>;
 		domain-idle-states = <&DOMAIN_PWR_DN>;
 	};
diff --git a/Documentation/devicetree/bindings/power/supply/axp20x_battery.txt b/Documentation/devicetree/bindings/power/supply/axp20x_battery.txt
new file mode 100644
index 000000000000..c24886676a60
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/axp20x_battery.txt
@@ -0,0 +1,20 @@
+AXP20x and AXP22x battery power supply
+
+Required Properties:
+ - compatible, one of:
+			"x-powers,axp209-battery-power-supply"
+			"x-powers,axp221-battery-power-supply"
+
+This node is a subnode of the axp20x/axp22x PMIC.
+
+The AXP20X and AXP22X can read the battery voltage, charge and discharge
+currents of the battery by reading ADC channels from the AXP20X/AXP22X
+ADC.
+
+Example:
+
+&axp209 {
+	battery_power_supply: battery-power-supply {
+		compatible = "x-powers,axp209-battery-power-supply";
+	}
+};
diff --git a/Documentation/devicetree/bindings/powerpc/ibm,powerpc-cpu-features.txt b/Documentation/devicetree/bindings/powerpc/ibm,powerpc-cpu-features.txt
new file mode 100644
index 000000000000..5af426e13334
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/ibm,powerpc-cpu-features.txt
@@ -0,0 +1,248 @@
+*** NOTE ***
+This document is copied from OPAL firmware
+(skiboot/doc/device-tree/ibm,powerpc-cpu-features/binding.txt)
+
+There is more complete overview and documentation of features in that
+source tree.  All patches and modifications should go there.
+************
+
+ibm,powerpc-cpu-features binding
+================================
+
+This device tree binding describes CPU features available to software, with
+enablement, privilege, and compatibility metadata.
+
+More general description of design and implementation of this binding is
+found in design.txt, which also points to documentation of specific features.
+
+
+/cpus/ibm,powerpc-cpu-features node binding
+-------------------------------------------
+
+Node: ibm,powerpc-cpu-features
+
+Description: Container of CPU feature nodes.
+
+The node name must be "ibm,powerpc-cpu-features".
+
+It is implemented as a child of the node "/cpus", but this must not be
+assumed by parsers.
+
+The node is optional but should be provided by new OPAL firmware.
+
+Properties:
+
+- compatible
+  Usage: required
+  Value type: string
+  Definition: "ibm,powerpc-cpu-features"
+
+  This compatibility refers to backwards compatibility of the overall
+  design with parsers that behave according to these guidelines. This can
+  be extended in a backward compatible manner which would not warrant a
+  revision of the compatible property.
+
+- isa
+  Usage: required
+  Value type: <u32>
+  Definition:
+
+  isa that the CPU is currently running in. This provides instruction set
+  compatibility, less the individual feature nodes. For example, an ISA v3.0
+  implementation that lacks the "transactional-memory" cpufeature node
+  should not use transactional memory facilities.
+
+  Value corresponds to the "Power ISA Version" multiplied by 1000.
+  For example, <3000> corresponds to Version 3.0, <2070> to Version 2.07.
+  The minor digit is available for revisions.
+
+- display-name
+  Usage: optional
+  Value type: string
+  Definition:
+
+  A human readable name for the CPU.
+
+/cpus/ibm,powerpc-cpu-features/example-feature node bindings
+----------------------------------------------------------------
+
+Each child node of cpu-features represents a CPU feature / capability.
+
+Node: A string describing an architected CPU feature, e.g., "floating-point".
+
+Description: A feature or capability supported by the CPUs.
+
+The name of the node is a human readable string that forms the interface
+used to describe features to software. Features are currently documented
+in the code where they are implemented in skiboot/core/cpufeatures.c
+
+Presence of the node indicates the feature is available.
+
+Properties:
+
+- isa
+  Usage: required
+  Value type: <u32>
+  Definition:
+
+  First level of the Power ISA that the feature appears in.
+  Software should filter out features when constraining the
+  environment to a particular ISA version.
+
+  Value is defined similarly to /cpus/features/isa
+
+- usable-privilege
+  Usage: required
+  Value type: <u32> bit mask
+  Definition:
+              Bit numbers are LSB0
+              bit 0 - PR (problem state / user mode)
+              bit 1 - OS (privileged state)
+              bit 2 - HV (hypervisor state)
+              All other bits reserved and should be zero.
+
+  This property describes the privilege levels and/or software components
+  that can use the feature.
+
+  If bit 0 is set, then the hwcap-bit-nr property will exist.
+
+
+- hv-support
+  Usage: optional
+  Value type: <u32> bit mask
+  Definition:
+              Bit numbers are LSB0
+              bit 0 -  HFSCR
+              All other bits reserved and should be zero.
+
+  This property describes the HV privilege support required to enable the
+  feature to lesser privilege levels. If the property does not exist then no
+  support is required.
+
+  If no bits are set, the hypervisor must have explicit/custom support for
+  this feature.
+
+  If the HFSCR bit is set, then the hfscr-bit-nr property will exist and
+  the feature may be enabled by setting this bit in the HFSCR register.
+
+
+- os-support
+  Usage: optional
+  Value type: <u32> bit mask
+  Definition:
+              Bit numbers are LSB0
+              bit 0 -  FSCR
+              All other bits reserved and should be zero.
+
+  This property describes the OS privilege support required to enable the
+  feature to lesser privilege levels. If the property does not exist then no
+  support is required.
+
+  If no bits are set, the operating system must have explicit/custom support
+  for this feature.
+
+  If the FSCR bit is set, then the fscr-bit-nr property will exist and
+  the feature may be enabled by setting this bit in the FSCR register.
+
+
+- hfscr-bit-nr
+  Usage: optional
+  Value type: <u32>
+  Definition: HFSCR bit position (LSB0)
+
+  This property exists when the hv-support property HFSCR bit is set. This
+  property describes the bit number in the HFSCR register that the
+  hypervisor must set in order to enable this feature.
+
+  This property also exists if an HFSCR bit corresponds with this feature.
+  This makes CPU feature parsing slightly simpler.
+
+
+- fscr-bit-nr
+  Usage: optional
+  Value type: <u32>
+  Definition: FSCR bit position (LSB0)
+
+  This property exists when the os-support property FSCR bit is set. This
+  property describes the bit number in the FSCR register that the
+  operating system must set in order to enable this feature.
+
+  This property also exists if an FSCR bit corresponds with this feature.
+  This makes CPU feature parsing slightly simpler.
+
+
+- hwcap-bit-nr
+  Usage: optional
+  Value type: <u32>
+  Definition: Linux ELF AUX vector bit position (LSB0)
+
+  This property may exist when the usable-privilege property value has PR bit set.
+  This property describes the bit number that should be set in the ELF AUX
+  hardware capability vectors in order to advertise this feature to userspace.
+  Bits 0-31 correspond to bits 0-31 in AT_HWCAP vector. Bits 32-63 correspond
+  to 0-31 in AT_HWCAP2 vector, and so on.  Missing AT_HWCAPx vectors implies
+  that the feature is not enabled or can not be advertised. Operating systems
+  may provide a number of unassigned hardware capability bits to allow for new
+  features to be advertised.
+
+  Some properties representing features created before this binding are
+  advertised to userspace without a one-to-one hwcap bit number may not specify
+  this bit. Operating system will handle those bits specifically.  All new
+  features usable by userspace will have a hwcap-bit-nr property.
+
+
+- dependencies
+  Usage: optional
+  Value type: <prop-encoded-array>
+  Definition:
+
+  If this property exists then it is a list of phandles to cpu feature
+  nodes that must be enabled for this feature to be enabled.
+
+
+Example
+-------
+
+	/cpus/ibm,powerpc-cpu-features {
+		compatible = "ibm,powerpc-cpu-features";
+
+		isa = <3020>;
+
+		darn {
+			isa = <3000>;
+			usable-privilege = <1 | 2 | 4>;
+			hwcap-bit-nr = <xx>;
+		};
+
+		scv {
+			isa = <3000>;
+			usable-privilege = <1 | 2>;
+			os-support = <0>;
+			hwcap-bit-nr = <xx>;
+		};
+
+		stop {
+			isa = <3000>;
+			usable-privilege = <2 | 4>;
+			hv-support = <0>;
+			os-support = <0>;
+		};
+
+		vsx2 (hypothetical) {
+			isa = <3010>;
+			usable-privilege = <1 | 2 | 4>;
+			hv-support = <0>;
+			os-support = <0>;
+			hwcap-bit-nr = <xx>;
+		};
+
+		vsx2-newinsns {
+			isa = <3020>;
+			usable-privilege = <1 | 2 | 4>;
+			os-support = <1>;
+			fscr-bit-nr = <xx>;
+			hwcap-bit-nr = <xx>;
+			dependencies = <&vsx2>;
+		};
+
+	};
diff --git a/Documentation/devicetree/bindings/rtc/cpcap-rtc.txt b/Documentation/devicetree/bindings/rtc/cpcap-rtc.txt
new file mode 100644
index 000000000000..45750ff3112d
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/cpcap-rtc.txt
@@ -0,0 +1,18 @@
+Motorola CPCAP PMIC RTC
+-----------------------
+
+This module is part of the CPCAP. For more details about the whole
+chip see Documentation/devicetree/bindings/mfd/motorola-cpcap.txt.
+
+Requires node properties:
+- compatible: should contain "motorola,cpcap-rtc"
+- interrupts: An interrupt specifier for alarm and 1 Hz irq
+
+Example:
+
+&cpcap {
+	cpcap_rtc: rtc {
+		compatible = "motorola,cpcap-rtc";
+		interrupts = <39 IRQ_TYPE_NONE>, <26 IRQ_TYPE_NONE>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/rtc/rtc-sh.txt b/Documentation/devicetree/bindings/rtc/rtc-sh.txt
new file mode 100644
index 000000000000..7676c7d28874
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/rtc-sh.txt
@@ -0,0 +1,28 @@
+* Real Time Clock for Renesas SH and ARM SoCs
+
+Required properties:
+- compatible: Should be "renesas,r7s72100-rtc" and "renesas,sh-rtc" as a
+  fallback.
+- reg: physical base address and length of memory mapped region.
+- interrupts: 3 interrupts for alarm, period, and carry.
+- interrupt-names: The interrupts should be labeled as "alarm", "period", and
+  "carry".
+- clocks: The functional clock source for the RTC controller must be listed
+  first (if exists). Additionally, potential clock counting sources are to be
+  listed.
+- clock-names: The functional clock must be labeled as "fck". Other clocks
+  may be named in accordance to the SoC hardware manuals.
+
+
+Example:
+rtc: rtc@fcff1000 {
+	compatible = "renesas,r7s72100-rtc", "renesas,sh-rtc";
+	reg = <0xfcff1000 0x2e>;
+	interrupts = <GIC_SPI 276 IRQ_TYPE_EDGE_RISING
+		      GIC_SPI 277 IRQ_TYPE_EDGE_RISING
+		      GIC_SPI 278 IRQ_TYPE_EDGE_RISING>;
+	interrupt-names = "alarm", "period", "carry";
+	clocks = <&mstp6_clks R7S72100_CLK_RTC>, <&rtc_x1_clk>,
+		 <&rtc_x3_clk>, <&extal_clk>;
+	clock-names = "fck", "rtc_x1", "rtc_x3", "extal";
+};
diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt
index 349f79fd7076..626e1afa64a6 100644
--- a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt
+++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt
@@ -13,8 +13,17 @@ Required properties:
 - #gpio-cells : Should be two. The first cell is the pin number and the
   second cell is used to specify optional parameters (currently unused).
 - gpio-controller : Marks the port as GPIO controller.
+Optional properties:
+- fsl,cpm1-gpio-irq-mask : For banks having interrupt capability (like port C
+  on CPM1), this item tells which ports have an associated interrupt (ports are
+  listed in the same order as in PCINT register)
+- interrupts : This property provides the list of interrupt for each GPIO having
+  one as described by the fsl,cpm1-gpio-irq-mask property. There should be as
+  many interrupts as number of ones in the mask property. The first interrupt in
+  the list corresponds to the most significant bit of the mask.
+- interrupt-parent : Parent for the above interrupt property.
 
-Example of three SOC GPIO banks defined as gpio-controller nodes:
+Example of four SOC GPIO banks defined as gpio-controller nodes:
 
 	CPM1_PIO_A: gpio-controller@950 {
 		#gpio-cells = <2>;
@@ -30,6 +39,16 @@ Example of three SOC GPIO banks defined as gpio-controller nodes:
 		gpio-controller;
 	};
 
+	CPM1_PIO_C: gpio-controller@960 {
+		#gpio-cells = <2>;
+		compatible = "fsl,cpm1-pario-bank-c";
+		reg = <0x960 0x10>;
+		fsl,cpm1-gpio-irq-mask = <0x0fff>;
+		interrupts = <1 2 6 9 10 11 14 15 23 24 26 31>;
+		interrupt-parent = <&CPM_PIC>;
+		gpio-controller;
+	};
+
 	CPM1_PIO_E: gpio-controller@ac8 {
 		#gpio-cells = <2>;
 		compatible = "fsl,cpm1-pario-bank-e";
diff --git a/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt b/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt
deleted file mode 100644
index c59e27c632c1..000000000000
--- a/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Hi6220 SoC ION
-===================================================================
-Required properties:
-- compatible : "hisilicon,hi6220-ion"
-- list of the ION heaps
-	- heap name : maybe heap_sys_user@0
-	- heap id   : id should be unique in the system.
-	- heap base : base ddr address of the heap,0 means that
-	it is dynamic.
-	- heap size : memory size and 0 means it is dynamic.
-	- heap type : the heap type of the heap, please also
-	see the define in ion.h(drivers/staging/android/uapi/ion.h)
--------------------------------------------------------------------
-Example:
-	hi6220-ion {
-		compatible = "hisilicon,hi6220-ion";
-		heap_sys_user@0 {
-			heap-name = "sys_user";
-			heap-id   = <0x0>;
-			heap-base = <0x0>;
-			heap-size = <0x0>;
-			heap-type = "ion_system";
-		};
-		heap_sys_contig@0 {
-			heap-name = "sys_contig";
-			heap-id   = <0x1>;
-			heap-base = <0x0>;
-			heap-size = <0x0>;
-			heap-type = "ion_system_contig";
-		};
-	};
diff --git a/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt b/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt
index 474531d2b2c5..da8c5b73ad10 100644
--- a/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt
@@ -3,15 +3,39 @@ Binding for Thermal Sensor driver for BCM2835 SoCs.
 Required parameters:
 -------------------
 
-compatible: 	should be one of: "brcm,bcm2835-thermal",
-		"brcm,bcm2836-thermal" or "brcm,bcm2837-thermal"
-reg:		Address range of the thermal registers.
-clocks: 	Phandle of the clock used by the thermal sensor.
+compatible: 		should be one of: "brcm,bcm2835-thermal",
+			"brcm,bcm2836-thermal" or "brcm,bcm2837-thermal"
+reg:			Address range of the thermal registers.
+clocks: 		Phandle of the clock used by the thermal sensor.
+#thermal-sensor-cells:	should be 0 (see thermal.txt)
 
 Example:
 
+thermal-zones {
+	cpu_thermal: cpu-thermal {
+		polling-delay-passive = <0>;
+		polling-delay = <1000>;
+
+		thermal-sensors = <&thermal>;
+
+		trips {
+			cpu-crit {
+				temperature	= <80000>;
+				hysteresis	= <0>;
+				type		= "critical";
+			};
+		};
+
+		coefficients = <(-538)	407000>;
+
+		cooling-maps {
+		};
+	};
+};
+
 thermal: thermal@7e212000 {
 	compatible = "brcm,bcm2835-thermal";
 	reg = <0x7e212000 0x8>;
 	clocks = <&clocks BCM2835_CLOCK_TSENS>;
+	#thermal-sensor-cells = <0>;
 };
diff --git a/Documentation/devicetree/bindings/thermal/brcm,ns-thermal b/Documentation/devicetree/bindings/thermal/brcm,ns-thermal
new file mode 100644
index 000000000000..68e047170039
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/brcm,ns-thermal
@@ -0,0 +1,37 @@
+* Broadcom Northstar Thermal
+
+This binding describes thermal sensor that is part of Northstar's DMU (Device
+Management Unit).
+
+Required properties:
+- compatible : Must be "brcm,ns-thermal"
+- reg : iomem address range of PVTMON registers
+- #thermal-sensor-cells : Should be <0>
+
+Example:
+
+thermal: thermal@1800c2c0 {
+	compatible = "brcm,ns-thermal";
+	reg = <0x1800c2c0 0x10>;
+	#thermal-sensor-cells = <0>;
+};
+
+thermal-zones {
+	cpu_thermal: cpu-thermal {
+		polling-delay-passive = <0>;
+		polling-delay = <1000>;
+		coefficients = <(-556) 418000>;
+		thermal-sensors = <&thermal>;
+
+		trips {
+			cpu-crit {
+				temperature	= <125000>;
+				hysteresis	= <0>;
+				type		= "critical";
+			};
+		};
+
+		cooling-maps {
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/thermal/da9062-thermal.txt b/Documentation/devicetree/bindings/thermal/da9062-thermal.txt
new file mode 100644
index 000000000000..e241bb5a5584
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/da9062-thermal.txt
@@ -0,0 +1,36 @@
+* Dialog DA9062/61 TJUNC Thermal Module
+
+This module is part of the DA9061/DA9062. For more details about entire
+DA9062 and DA9061 chips see Documentation/devicetree/bindings/mfd/da9062.txt
+
+Junction temperature thermal module uses an interrupt signal to identify
+high THERMAL_TRIP_HOT temperatures for the PMIC device.
+
+Required properties:
+
+- compatible: should be one of the following valid compatible string lines:
+        "dlg,da9061-thermal", "dlg,da9062-thermal"
+        "dlg,da9062-thermal"
+
+Optional properties:
+
+- polling-delay-passive : Specify the polling period, measured in
+    milliseconds, between thermal zone device update checks.
+
+Example: DA9062
+
+	pmic0: da9062@58 {
+		thermal {
+			compatible = "dlg,da9062-thermal";
+			polling-delay-passive = <3000>;
+		};
+	};
+
+Example: DA9061 using a fall-back compatible for the DA9062 onkey driver
+
+	pmic0: da9061@58 {
+		thermal {
+			compatible = "dlg,da9061-thermal", "dlg,da9062-thermal";
+			polling-delay-passive = <3000>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/trivial-devices.txt b/Documentation/devicetree/bindings/trivial-devices.txt
index ad10fbe61562..3e0a34c88e07 100644
--- a/Documentation/devicetree/bindings/trivial-devices.txt
+++ b/Documentation/devicetree/bindings/trivial-devices.txt
@@ -160,6 +160,7 @@ sii,s35390a		2-wire CMOS real-time clock
 silabs,si7020		Relative Humidity and Temperature Sensors
 skyworks,sky81452	Skyworks SKY81452: Six-Channel White LED Driver with Touch Panel Bias Supply
 st,24c256		i2c serial eeprom  (24cxx)
+st,m41t0		Serial real-time clock (RTC)
 st,m41t00		Serial real-time clock (RTC)
 st,m41t62		Serial real-time clock (RTC) with alarm
 st,m41t80		M41T80 - SERIAL ACCESS RTC WITH ALARMS
diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt b/Documentation/devicetree/bindings/usb/dwc2.txt
index 00bea038639e..fcf199b64d3d 100644
--- a/Documentation/devicetree/bindings/usb/dwc2.txt
+++ b/Documentation/devicetree/bindings/usb/dwc2.txt
@@ -10,6 +10,7 @@ Required properties:
   - "rockchip,rk3288-usb", "rockchip,rk3066-usb", "snps,dwc2": for rk3288 Soc;
   - "lantiq,arx100-usb": The DWC2 USB controller instance in Lantiq ARX SoCs;
   - "lantiq,xrx200-usb": The DWC2 USB controller instance in Lantiq XRX SoCs;
+  - "amlogic,meson8-usb": The DWC2 USB controller instance in Amlogic Meson8 SoCs;
   - "amlogic,meson8b-usb": The DWC2 USB controller instance in Amlogic Meson8b SoCs;
   - "amlogic,meson-gxbb-usb": The DWC2 USB controller instance in Amlogic S905 SoCs;
   - "amcc,dwc-otg": The DWC2 USB controller instance in AMCC Canyonlands 460EX SoCs;
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index f9fe94535b46..c03d20140366 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -173,6 +173,7 @@ lego	LEGO Systems A/S
 lenovo	Lenovo Group Ltd.
 lg	LG Corporation
 licheepi	Lichee Pi
+linaro	Linaro Limited
 linux	Linux-specific binding
 lltc	Linear Technology Corporation
 lsi	LSI Corp. (LSI Logic)
@@ -196,6 +197,7 @@ minix	MINIX Technology Ltd.
 miramems	MiraMEMS Sensing Technology Co., Ltd.
 mitsubishi	Mitsubishi Electric Corporation
 mosaixtech	Mosaix Technologies, Inc.
+motorola	Motorola, Inc.
 moxa	Moxa
 mpl	MPL AG
 mqmaker	mqmaker Inc.
diff --git a/Documentation/filesystems/autofs4.txt b/Documentation/filesystems/autofs4.txt
index f10dd590f69f..8444dc3d57e8 100644
--- a/Documentation/filesystems/autofs4.txt
+++ b/Documentation/filesystems/autofs4.txt
@@ -316,7 +316,7 @@ For version 5, the format of the message is:
         struct autofs_v5_packet {
                 int proto_version;                /* Protocol version */
                 int type;                        /* Type of packet */
-                autofs_wqt_t wait_queue_token;
+                autofs_wqt_t wait_queue_entry_token;
                 __u32 dev;
                 __u64 ino;
                 __u32 uid;
@@ -341,12 +341,12 @@ The pipe will be set to "packet mode" (equivalent to passing
 `O_DIRECT`) to _pipe2(2)_ so that a read from the pipe will return at
 most one packet, and any unread portion of a packet will be discarded.
 
-The `wait_queue_token` is a unique number which can identify a
+The `wait_queue_entry_token` is a unique number which can identify a
 particular request to be acknowledged.  When a message is sent over
 the pipe the affected dentry is marked as either "active" or
 "expiring" and other accesses to it block until the message is
 acknowledged using one of the ioctls below and the relevant
-`wait_queue_token`.
+`wait_queue_entry_token`.
 
 Communicating with autofs: root directory ioctls
 ------------------------------------------------
@@ -358,7 +358,7 @@ capability, or must be the automount daemon.
 The available ioctl commands are:
 
 - **AUTOFS_IOC_READY**: a notification has been handled.  The argument
-    to the ioctl command is the "wait_queue_token" number
+    to the ioctl command is the "wait_queue_entry_token" number
     corresponding to the notification being acknowledged.
 - **AUTOFS_IOC_FAIL**: similar to above, but indicates failure with
     the error code `ENOENT`.
@@ -382,14 +382,14 @@ The available ioctl commands are:
         struct autofs_packet_expire_multi {
                 int proto_version;              /* Protocol version */
                 int type;                       /* Type of packet */
-                autofs_wqt_t wait_queue_token;
+                autofs_wqt_t wait_queue_entry_token;
                 int len;
                 char name[NAME_MAX+1];
         };
 
      is required.  This is filled in with the name of something
      that can be unmounted or removed.  If nothing can be expired,
-     `errno` is set to `EAGAIN`.  Even though a `wait_queue_token`
+     `errno` is set to `EAGAIN`.  Even though a `wait_queue_entry_token`
      is present in the structure, no "wait queue" is established
      and no acknowledgment is needed.
 - **AUTOFS_IOC_EXPIRE_MULTI**:  This is similar to
diff --git a/Documentation/filesystems/bfs.txt b/Documentation/filesystems/bfs.txt
index 78043d5a8fc3..843ce91a2e40 100644
--- a/Documentation/filesystems/bfs.txt
+++ b/Documentation/filesystems/bfs.txt
@@ -54,4 +54,4 @@ The first 4 bytes should be 0x1badface.
 If you have any patches, questions or suggestions regarding this BFS
 implementation please contact the author:
 
-Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+Tigran Aivazian <aivazian.tigran@gmail.com>
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt
index 8de578a98222..80dc0bdc302a 100644
--- a/Documentation/filesystems/nfs/pnfs.txt
+++ b/Documentation/filesystems/nfs/pnfs.txt
@@ -64,46 +64,9 @@ table which are called by the nfs-client pnfs-core to implement the
 different layout types.
 
 Files-layout-driver code is in: fs/nfs/filelayout/.. directory
-Objects-layout-driver code is in: fs/nfs/objlayout/.. directory
 Blocks-layout-driver code is in: fs/nfs/blocklayout/.. directory
 Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory
 
-objects-layout setup
---------------------
-
-As part of the full STD implementation the objlayoutdriver.ko needs, at times,
-to automatically login to yet undiscovered iscsi/osd devices. For this the
-driver makes up-calles to a user-mode script called *osd_login*
-
-The path_name of the script to use is by default:
-	/sbin/osd_login.
-This name can be overridden by the Kernel module parameter:
-	objlayoutdriver.osd_login_prog
-
-If Kernel does not find the osd_login_prog path it will zero it out
-and will not attempt farther logins. An admin can then write new value
-to the objlayoutdriver.osd_login_prog Kernel parameter to re-enable it.
-
-The /sbin/osd_login is part of the nfs-utils package, and should usually
-be installed on distributions that support this Kernel version.
-
-The API to the login script is as follows:
-	Usage: $0 -u <URI> -o <OSDNAME> -s <SYSTEMID>
-	Options:
-		-u		target uri e.g. iscsi://<ip>:<port>
-				(always exists)
-				(More protocols can be defined in the future.
-				 The client does not interpret this string it is
-				 passed unchanged as received from the Server)
-		-o		osdname of the requested target OSD
-				(Might be empty)
-				(A string which denotes the OSD name, there is a
-				 limit of 64 chars on this string)
-		-s 		systemid of the requested target OSD
-				(Might be empty)
-				(This string, if not empty is always an hex
-				 representation of the 20 bytes osd_system_id)
-
 blocks-layout setup
 -------------------
 
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index 634d03e20c2d..c9e884b52698 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -21,12 +21,19 @@ from accessing the corresponding object from the original filesystem.
 This is most obvious from the 'st_dev' field returned by stat(2).
 
 While directories will report an st_dev from the overlay-filesystem,
-all non-directory objects will report an st_dev from the lower or
+non-directory objects may report an st_dev from the lower filesystem or
 upper filesystem that is providing the object.  Similarly st_ino will
 only be unique when combined with st_dev, and both of these can change
 over the lifetime of a non-directory object.  Many applications and
 tools ignore these values and will not be affected.
 
+In the special case of all overlay layers on the same underlying
+filesystem, all objects will report an st_dev from the overlay
+filesystem and st_ino from the underlying filesystem.  This will
+make the overlay mount more compliant with filesystem scanners and
+overlay objects will be distinguishable from the corresponding
+objects in the original filesystem.
+
 Upper and Lower
 ---------------
 
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 61306a22888d..bc67dbf76eb0 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -67,6 +67,7 @@ needed).
    driver-api/index
    core-api/index
    media/index
+   input/index
    gpu/index
    security/index
    sound/index
diff --git a/Documentation/input/devices/edt-ft5x06.rst b/Documentation/input/devices/edt-ft5x06.rst
index 2032f0b7a8fa..1ccc94b192b7 100644
--- a/Documentation/input/devices/edt-ft5x06.rst
+++ b/Documentation/input/devices/edt-ft5x06.rst
@@ -15,7 +15,7 @@ It has been tested with the following devices:
 The driver allows configuration of the touch screen via a set of sysfs files:
 
 /sys/class/input/eventX/device/device/threshold:
-    allows setting the "click"-threshold in the range from 20 to 80.
+    allows setting the "click"-threshold in the range from 0 to 80.
 
 /sys/class/input/eventX/device/device/gain:
     allows setting the sensitivity in the range from 0 to 31. Note that
diff --git a/Documentation/input/ff.rst b/Documentation/input/ff.rst
index 6a265a6934e6..26d461998e08 100644
--- a/Documentation/input/ff.rst
+++ b/Documentation/input/ff.rst
@@ -130,11 +130,11 @@ See <uapi/linux/input.h> for a description of the ff_effect struct.  You
 should also find help in a few sketches, contained in files shape.svg
 and interactive.svg:
 
-.. figure:: shape.svg
+.. kernel-figure:: shape.svg
 
     Shape
 
-.. figure:: interactive.svg
+.. kernel-figure:: interactive.svg
 
     Interactive
 
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index eccb675a2852..1e9fcb4d0ec8 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -309,6 +309,7 @@ Code  Seq#(hex)	Include File		Comments
 0xA3	80-8F	Port ACL		in development:
 					<mailto:tlewis@mindspring.com>
 0xA3	90-9F	linux/dtlk.h
+0xA4	00-1F	uapi/linux/tee.h	Generic TEE subsystem
 0xAA	00-3F	linux/uapi/linux/userfaultfd.h
 0xAB	00-1F	linux/nbd.h
 0xAC	00-1F	linux/raw.h
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index 9b9c4797fc55..e18daca65ccd 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -44,11 +44,11 @@ This document describes the Linux kernel Makefiles.
 	   --- 6.11 Post-link pass
 
 	=== 7 Kbuild syntax for exported headers
-		--- 7.1 header-y
+		--- 7.1 no-export-headers
 		--- 7.2 genhdr-y
-		--- 7.3 destination-y
-		--- 7.4 generic-y
-		--- 7.5 generated-y
+		--- 7.3 generic-y
+		--- 7.4 generated-y
+		--- 7.5 mandatory-y
 
 	=== 8 Kbuild Variables
 	=== 9 Makefile language
@@ -1236,7 +1236,7 @@ When kbuild executes, the following steps are followed (roughly):
 	that may be shared between individual architectures.
 	The recommended approach how to use a generic header file is
 	to list the file in the Kbuild file.
-	See "7.4 generic-y" for further info on syntax etc.
+	See "7.3 generic-y" for further info on syntax etc.
 
 --- 6.11 Post-link pass
 
@@ -1263,53 +1263,32 @@ The pre-processing does:
 - drop include of compiler.h
 - drop all sections that are kernel internal (guarded by ifdef __KERNEL__)
 
-Each relevant directory contains a file name "Kbuild" which specifies the
-headers to be exported.
-See subsequent chapter for the syntax of the Kbuild file.
-
-	--- 7.1 header-y
-
-	header-y specifies header files to be exported.
-
-		Example:
-			#include/linux/Kbuild
-			header-y += usb/
-			header-y += aio_abi.h
+All headers under include/uapi/, include/generated/uapi/,
+arch/<arch>/include/uapi/ and arch/<arch>/include/generated/uapi/
+are exported.
 
-	The convention is to list one file per line and
-	preferably in alphabetic order.
+A Kbuild file may be defined under arch/<arch>/include/uapi/asm/ and
+arch/<arch>/include/asm/ to list asm files coming from asm-generic.
+See subsequent chapter for the syntax of the Kbuild file.
 
-	header-y also specifies which subdirectories to visit.
-	A subdirectory is identified by a trailing '/' which
-	can be seen in the example above for the usb subdirectory.
+	--- 7.1 no-export-headers
 
-	Subdirectories are visited before their parent directories.
+	no-export-headers is essentially used by include/uapi/linux/Kbuild to
+	avoid exporting specific headers (e.g. kvm.h) on architectures that do
+	not support it. It should be avoided as much as possible.
 
 	--- 7.2 genhdr-y
 
-	genhdr-y specifies generated files to be exported.
-	Generated files are special as they need to be looked
-	up in another directory when doing 'make O=...' builds.
+	genhdr-y specifies asm files to be generated.
 
 		Example:
-			#include/linux/Kbuild
-			genhdr-y += version.h
+			#arch/x86/include/uapi/asm/Kbuild
+			genhdr-y += unistd_32.h
+			genhdr-y += unistd_64.h
+			genhdr-y += unistd_x32.h
 
-	--- 7.3 destination-y
 
-	When an architecture has a set of exported headers that needs to be
-	exported to a different directory destination-y is used.
-	destination-y specifies the destination directory for all exported
-	headers in the file where it is present.
-
-		Example:
-			#arch/xtensa/platforms/s6105/include/platform/Kbuild
-			destination-y := include/linux
-
-	In the example above all exported headers in the Kbuild file
-	will be located in the directory "include/linux" when exported.
-
-	--- 7.4 generic-y
+	--- 7.3 generic-y
 
 	If an architecture uses a verbatim copy of a header from
 	include/asm-generic then this is listed in the file
@@ -1336,7 +1315,7 @@ See subsequent chapter for the syntax of the Kbuild file.
 		Example: termios.h
 			#include <asm-generic/termios.h>
 
-	--- 7.5 generated-y
+	--- 7.4 generated-y
 
 	If an architecture generates other header files alongside generic-y
 	wrappers, and not included in genhdr-y, then generated-y specifies
@@ -1349,6 +1328,15 @@ See subsequent chapter for the syntax of the Kbuild file.
 			#arch/x86/include/asm/Kbuild
 			generated-y += syscalls_32.h
 
+	--- 7.5 mandatory-y
+
+	mandatory-y is essentially used by include/uapi/asm-generic/Kbuild.asm
+	to define the minimun set of headers that must be exported in
+	include/asm.
+
+	The convention is to list one subdir per line and
+	preferably in alphabetic order.
+
 === 8 Kbuild Variables
 
 The top Makefile exports the following variables:
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
index df31e30b6a02..2cb7dc5c0e0d 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/kernel-per-CPU-kthreads.txt
@@ -109,13 +109,12 @@ SCHED_SOFTIRQ: Do all of the following:
 	on that CPU.  If a thread that expects to run on the de-jittered
 	CPU awakens, the scheduler will send an IPI that can result in
 	a subsequent SCHED_SOFTIRQ.
-2.	Build with CONFIG_RCU_NOCB_CPU=y, CONFIG_RCU_NOCB_CPU_ALL=y,
-	CONFIG_NO_HZ_FULL=y, and, in addition, ensure that the CPU
-	to be de-jittered is marked as an adaptive-ticks CPU using the
-	"nohz_full=" boot parameter.  This reduces the number of
-	scheduler-clock interrupts that the de-jittered CPU receives,
-	minimizing its chances of being selected to do the load balancing
-	work that runs in SCHED_SOFTIRQ context.
+2.	CONFIG_NO_HZ_FULL=y and ensure that the CPU to be de-jittered
+	is marked as an adaptive-ticks CPU using the "nohz_full="
+	boot parameter.  This reduces the number of scheduler-clock
+	interrupts that the de-jittered CPU receives, minimizing its
+	chances of being selected to do the load balancing work that
+	runs in SCHED_SOFTIRQ context.
 3.	To the extent possible, keep the CPU out of the kernel when it
 	is non-idle, for example, by avoiding system calls and by
 	forcing both kernel threads and interrupts to execute elsewhere.
@@ -135,11 +134,10 @@ HRTIMER_SOFTIRQ:  Do all of the following:
 RCU_SOFTIRQ:  Do at least one of the following:
 1.	Offload callbacks and keep the CPU in either dyntick-idle or
 	adaptive-ticks state by doing all of the following:
-	a.	Build with CONFIG_RCU_NOCB_CPU=y, CONFIG_RCU_NOCB_CPU_ALL=y,
-		CONFIG_NO_HZ_FULL=y, and, in addition ensure that the CPU
-		to be de-jittered is marked as an adaptive-ticks CPU using
-		the "nohz_full=" boot parameter.  Bind the rcuo kthreads
-		to housekeeping CPUs, which can tolerate OS jitter.
+	a.	CONFIG_NO_HZ_FULL=y and ensure that the CPU to be
+		de-jittered is marked as an adaptive-ticks CPU using the
+		"nohz_full=" boot parameter.  Bind the rcuo kthreads to
+		housekeeping CPUs, which can tolerate OS jitter.
 	b.	To the extent possible, keep the CPU out of the kernel
 		when it is non-idle, for example, by avoiding system
 		calls and by forcing both kernel threads and interrupts
@@ -236,11 +234,10 @@ To reduce its OS jitter, do at least one of the following:
 	is feasible only if your workload never requires RCU priority
 	boosting, for example, if you ensure frequent idle time on all
 	CPUs that might execute within the kernel.
-3.	Build with CONFIG_RCU_NOCB_CPU=y and CONFIG_RCU_NOCB_CPU_ALL=y,
-	which offloads all RCU callbacks to kthreads that can be moved
-	off of CPUs susceptible to OS jitter.  This approach prevents the
-	rcuc/%u kthreads from having any work to do, so that they are
-	never awakened.
+3.	Build with CONFIG_RCU_NOCB_CPU=y and boot with the rcu_nocbs=
+	boot parameter offloading RCU callbacks from all CPUs susceptible
+	to OS jitter.  This approach prevents the rcuc/%u kthreads from
+	having any work to do, so that they are never awakened.
 4.	Ensure that the CPU never enters the kernel, and, in particular,
 	avoid initiating any CPU hotplug operations on this CPU.  This is
 	another way of preventing any callbacks from being queued on the
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index d2b0a8d81258..9d5e0f853f08 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -27,7 +27,7 @@ The purpose of this document is twofold:
  (2) to provide a guide as to how to use the barriers that are available.
 
 Note that an architecture can provide more than the minimum requirement
-for any particular barrier, but if the architecure provides less than
+for any particular barrier, but if the architecture provides less than
 that, that architecture is incorrect.
 
 Note also that it is possible that a barrier may be a no-op for an
@@ -768,7 +768,7 @@ equal to zero, in which case the compiler is within its rights to
 transform the above code into the following:
 
 	q = READ_ONCE(a);
-	WRITE_ONCE(b, 1);
+	WRITE_ONCE(b, 2);
 	do_something_else();
 
 Given this transformation, the CPU is not required to respect the ordering
@@ -2373,7 +2373,7 @@ is performed:
 					spin_unlock(Q);
 
 
-See Documentation/DocBook/deviceiobook.tmpl for more information.
+See Documentation/driver-api/device-io.rst for more information.
 
 
 =================================
@@ -2614,7 +2614,7 @@ might be needed:
      relaxed memory access properties, then _mandatory_ memory barriers are
      required to enforce ordering.
 
-See Documentation/DocBook/deviceiobook.tmpl for more information.
+See Documentation/driver-api/device-io.rst for more information.
 
 
 INTERRUPTS
diff --git a/Documentation/networking/dpaa.txt b/Documentation/networking/dpaa.txt
new file mode 100644
index 000000000000..76e016d4d344
--- /dev/null
+++ b/Documentation/networking/dpaa.txt
@@ -0,0 +1,194 @@
+The QorIQ DPAA Ethernet Driver
+==============================
+
+Authors:
+Madalin Bucur <madalin.bucur@nxp.com>
+Camelia Groza <camelia.groza@nxp.com>
+
+Contents
+========
+
+	- DPAA Ethernet Overview
+	- DPAA Ethernet Supported SoCs
+	- Configuring DPAA Ethernet in your kernel
+	- DPAA Ethernet Frame Processing
+	- DPAA Ethernet Features
+	- Debugging
+
+DPAA Ethernet Overview
+======================
+
+DPAA stands for Data Path Acceleration Architecture and it is a
+set of networking acceleration IPs that are available on several
+generations of SoCs, both on PowerPC and ARM64.
+
+The Freescale DPAA architecture consists of a series of hardware blocks
+that support Ethernet connectivity. The Ethernet driver depends upon the
+following drivers in the Linux kernel:
+
+ - Peripheral Access Memory Unit (PAMU) (* needed only for PPC platforms)
+    drivers/iommu/fsl_*
+ - Frame Manager (FMan)
+    drivers/net/ethernet/freescale/fman
+ - Queue Manager (QMan), Buffer Manager (BMan)
+    drivers/soc/fsl/qbman
+
+A simplified view of the dpaa_eth interfaces mapped to FMan MACs:
+
+  dpaa_eth       /eth0\     ...       /ethN\
+  driver        |      |             |      |
+  -------------   ----   -----------   ----   -------------
+       -Ports  / Tx  Rx \    ...    / Tx  Rx \
+  FMan        |          |         |          |
+       -MACs  |   MAC0   |         |   MACN   |
+             /   dtsec0   \  ...  /   dtsecN   \ (or tgec)
+            /              \     /              \(or memac)
+  ---------  --------------  ---  --------------  ---------
+      FMan, FMan Port, FMan SP, FMan MURAM drivers
+  ---------------------------------------------------------
+      FMan HW blocks: MURAM, MACs, Ports, SP
+  ---------------------------------------------------------
+
+The dpaa_eth relation to the QMan, BMan and FMan:
+              ________________________________
+  dpaa_eth   /            eth0                \
+  driver    /                                  \
+  ---------   -^-   -^-   -^-   ---    ---------
+  QMan driver / \   / \   / \  \   /  | BMan    |
+             |Rx | |Rx | |Tx | |Tx |  | driver  |
+  ---------  |Dfl| |Err| |Cnf| |FQs|  |         |
+  QMan HW    |FQ | |FQ | |FQs| |   |  |         |
+             /   \ /   \ /   \  \ /   |         |
+  ---------   ---   ---   ---   -v-    ---------
+            |        FMan QMI         |         |
+            | FMan HW       FMan BMI  | BMan HW |
+              -----------------------   --------
+
+where the acronyms used above (and in the code) are:
+DPAA = Data Path Acceleration Architecture
+FMan = DPAA Frame Manager
+QMan = DPAA Queue Manager
+BMan = DPAA Buffers Manager
+QMI = QMan interface in FMan
+BMI = BMan interface in FMan
+FMan SP = FMan Storage Profiles
+MURAM = Multi-user RAM in FMan
+FQ = QMan Frame Queue
+Rx Dfl FQ = default reception FQ
+Rx Err FQ = Rx error frames FQ
+Tx Cnf FQ = Tx confirmation FQs
+Tx FQs = transmission frame queues
+dtsec = datapath three speed Ethernet controller (10/100/1000 Mbps)
+tgec = ten gigabit Ethernet controller (10 Gbps)
+memac = multirate Ethernet MAC (10/100/1000/10000)
+
+DPAA Ethernet Supported SoCs
+============================
+
+The DPAA drivers enable the Ethernet controllers present on the following SoCs:
+
+# PPC
+P1023
+P2041
+P3041
+P4080
+P5020
+P5040
+T1023
+T1024
+T1040
+T1042
+T2080
+T4240
+B4860
+
+# ARM
+LS1043A
+LS1046A
+
+Configuring DPAA Ethernet in your kernel
+========================================
+
+To enable the DPAA Ethernet driver, the following Kconfig options are required:
+
+# common for arch/arm64 and arch/powerpc platforms
+CONFIG_FSL_DPAA=y
+CONFIG_FSL_FMAN=y
+CONFIG_FSL_DPAA_ETH=y
+CONFIG_FSL_XGMAC_MDIO=y
+
+# for arch/powerpc only
+CONFIG_FSL_PAMU=y
+
+# common options needed for the PHYs used on the RDBs
+CONFIG_VITESSE_PHY=y
+CONFIG_REALTEK_PHY=y
+CONFIG_AQUANTIA_PHY=y
+
+DPAA Ethernet Frame Processing
+==============================
+
+On Rx, buffers for the incoming frames are retrieved from one of the three
+existing buffers pools. The driver initializes and seeds these, each with
+buffers of different sizes: 1KB, 2KB and 4KB.
+
+On Tx, all transmitted frames are returned to the driver through Tx
+confirmation frame queues. The driver is then responsible for freeing the
+buffers. In order to do this properly, a backpointer is added to the buffer
+before transmission that points to the skb. When the buffer returns to the
+driver on a confirmation FQ, the skb can be correctly consumed.
+
+DPAA Ethernet Features
+======================
+
+Currently the DPAA Ethernet driver enables the basic features required for
+a Linux Ethernet driver. The support for advanced features will be added
+gradually.
+
+The driver has Rx and Tx checksum offloading for UDP and TCP. Currently the Rx
+checksum offload feature is enabled by default and cannot be controlled through
+ethtool.
+
+The driver has support for multiple prioritized Tx traffic classes. Priorities
+range from 0 (lowest) to 3 (highest). These are mapped to HW workqueues with
+strict priority levels. Each traffic class contains NR_CPU TX queues. By
+default, only one traffic class is enabled and the lowest priority Tx queues
+are used. Higher priority traffic classes can be enabled with the mqprio
+qdisc. For example, all four traffic classes are enabled on an interface with
+the following command. Furthermore, skb priority levels are mapped to traffic
+classes as follows:
+
+	* priorities 0 to 3 - traffic class 0 (low priority)
+	* priorities 4 to 7 - traffic class 1 (medium-low priority)
+	* priorities 8 to 11 - traffic class 2 (medium-high priority)
+	* priorities 12 to 15 - traffic class 3 (high priority)
+
+tc qdisc add dev <int> root handle 1: \
+	 mqprio num_tc 4 map 0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 hw 1
+
+Debugging
+=========
+
+The following statistics are exported for each interface through ethtool:
+
+	- interrupt count per CPU
+	- Rx packets count per CPU
+	- Tx packets count per CPU
+	- Tx confirmed packets count per CPU
+	- Tx S/G frames count per CPU
+	- Tx error count per CPU
+	- Rx error count per CPU
+	- Rx error count per type
+	- congestion related statistics:
+		- congestion status
+		- time spent in congestion
+		- number of time the device entered congestion
+		- dropped packets count per cause
+
+The driver also exports the following information in sysfs:
+
+	- the FQ IDs for each FQ type
+	/sys/devices/platform/dpaa-ethernet.0/net/<int>/fqids
+
+	- the IDs of the buffer pools in use
+	/sys/devices/platform/dpaa-ethernet.0/net/<int>/bpids
diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt
index 59f4db2a0c85..f55639d71d35 100644
--- a/Documentation/networking/scaling.txt
+++ b/Documentation/networking/scaling.txt
@@ -122,7 +122,7 @@ associated flow of the packet. The hash is either provided by hardware
 or will be computed in the stack. Capable hardware can pass the hash in
 the receive descriptor for the packet; this would usually be the same
 hash used for RSS (e.g. computed Toeplitz hash). The hash is saved in
-skb->rx_hash and can be used elsewhere in the stack as a hash of the
+skb->hash and can be used elsewhere in the stack as a hash of the
 packet’s flow.
 
 Each receive hardware queue has an associated list of CPUs to which
diff --git a/Documentation/networking/tcp.txt b/Documentation/networking/tcp.txt
index bdc4c0db51e1..9c7139d57e57 100644
--- a/Documentation/networking/tcp.txt
+++ b/Documentation/networking/tcp.txt
@@ -1,7 +1,7 @@
 TCP protocol
 ============
 
-Last updated: 9 February 2008
+Last updated: 3 June 2017
 
 Contents
 ========
@@ -29,18 +29,19 @@ As of 2.6.13, Linux supports pluggable congestion control algorithms.
 A congestion control mechanism can be registered through functions in
 tcp_cong.c. The functions used by the congestion control mechanism are
 registered via passing a tcp_congestion_ops struct to
-tcp_register_congestion_control. As a minimum name, ssthresh,
-cong_avoid must be valid.
+tcp_register_congestion_control. As a minimum, the congestion control
+mechanism must provide a valid name and must implement either ssthresh,
+cong_avoid and undo_cwnd hooks or the "omnipotent" cong_control hook.
 
 Private data for a congestion control mechanism is stored in tp->ca_priv.
 tcp_ca(tp) returns a pointer to this space.  This is preallocated space - it
 is important to check the size of your private data will fit this space, or
-alternatively space could be allocated elsewhere and a pointer to it could
+alternatively, space could be allocated elsewhere and a pointer to it could
 be stored here.
 
 There are three kinds of congestion control algorithms currently: The
 simplest ones are derived from TCP reno (highspeed, scalable) and just
-provide an alternative the congestion window calculation. More complex
+provide an alternative congestion window calculation. More complex
 ones like BIC try to look at other events to provide better
 heuristics.  There are also round trip time based algorithms like
 Vegas and Westwood+.
@@ -49,21 +50,15 @@ Good TCP congestion control is a complex problem because the algorithm
 needs to maintain fairness and performance. Please review current
 research and RFC's before developing new modules.
 
-The method that is used to determine which congestion control mechanism is
-determined by the setting of the sysctl net.ipv4.tcp_congestion_control.
-The default congestion control will be the last one registered (LIFO);
-so if you built everything as modules, the default will be reno. If you
-build with the defaults from Kconfig, then CUBIC will be builtin (not a
-module) and it will end up the default.
+The default congestion control mechanism is chosen based on the
+DEFAULT_TCP_CONG Kconfig parameter. If you really want a particular default
+value then you can set it using sysctl net.ipv4.tcp_congestion_control. The
+module will be autoloaded if needed and you will get the expected protocol. If
+you ask for an unknown congestion method, then the sysctl attempt will fail.
 
-If you really want a particular default value then you will need
-to set it with the sysctl.  If you use a sysctl, the module will be autoloaded
-if needed and you will get the expected protocol. If you ask for an
-unknown congestion method, then the sysctl attempt will fail.
-
-If you remove a tcp congestion control module, then you will get the next
+If you remove a TCP congestion control module, then you will get the next
 available one. Since reno cannot be built as a module, and cannot be
-deleted, it will always be available.
+removed, it will always be available.
 
 How the new TCP output machine [nyi] works.
 ===========================================
diff --git a/Documentation/scheduler/sched-deadline.txt b/Documentation/scheduler/sched-deadline.txt
index cbc1b46cbf70..e89e36ec15a5 100644
--- a/Documentation/scheduler/sched-deadline.txt
+++ b/Documentation/scheduler/sched-deadline.txt
@@ -7,6 +7,8 @@ CONTENTS
  0. WARNING
  1. Overview
  2. Scheduling algorithm
+   2.1 Main algorithm
+   2.2 Bandwidth reclaiming
  3. Scheduling Real-Time Tasks
    3.1 Definitions
    3.2 Schedulability Analysis for Uniprocessor Systems
@@ -44,6 +46,9 @@ CONTENTS
 2. Scheduling algorithm
 ==================
 
+2.1 Main algorithm
+------------------
+
  SCHED_DEADLINE uses three parameters, named "runtime", "period", and
  "deadline", to schedule tasks. A SCHED_DEADLINE task should receive
  "runtime" microseconds of execution time every "period" microseconds, and
@@ -113,6 +118,160 @@ CONTENTS
          remaining runtime = remaining runtime + runtime
 
 
+2.2 Bandwidth reclaiming
+------------------------
+
+ Bandwidth reclaiming for deadline tasks is based on the GRUB (Greedy
+ Reclamation of Unused Bandwidth) algorithm [15, 16, 17] and it is enabled
+ when flag SCHED_FLAG_RECLAIM is set.
+
+ The following diagram illustrates the state names for tasks handled by GRUB:
+
+                             ------------
+                 (d)        |   Active   |
+              ------------->|            |
+              |             | Contending |
+              |              ------------
+              |                A      |
+          ----------           |      |
+         |          |          |      |
+         | Inactive |          |(b)   | (a)
+         |          |          |      |
+          ----------           |      |
+              A                |      V
+              |              ------------
+              |             |   Active   |
+              --------------|     Non    |
+                 (c)        | Contending |
+                             ------------
+
+ A task can be in one of the following states:
+
+  - ActiveContending: if it is ready for execution (or executing);
+
+  - ActiveNonContending: if it just blocked and has not yet surpassed the 0-lag
+    time;
+
+  - Inactive: if it is blocked and has surpassed the 0-lag time.
+
+ State transitions:
+
+  (a) When a task blocks, it does not become immediately inactive since its
+      bandwidth cannot be immediately reclaimed without breaking the
+      real-time guarantees. It therefore enters a transitional state called
+      ActiveNonContending. The scheduler arms the "inactive timer" to fire at
+      the 0-lag time, when the task's bandwidth can be reclaimed without
+      breaking the real-time guarantees.
+
+      The 0-lag time for a task entering the ActiveNonContending state is
+      computed as
+
+                        (runtime * dl_period)
+             deadline - ---------------------
+                             dl_runtime
+
+      where runtime is the remaining runtime, while dl_runtime and dl_period
+      are the reservation parameters.
+
+  (b) If the task wakes up before the inactive timer fires, the task re-enters
+      the ActiveContending state and the "inactive timer" is canceled.
+      In addition, if the task wakes up on a different runqueue, then
+      the task's utilization must be removed from the previous runqueue's active
+      utilization and must be added to the new runqueue's active utilization.
+      In order to avoid races between a task waking up on a runqueue while the
+       "inactive timer" is running on a different CPU, the "dl_non_contending"
+      flag is used to indicate that a task is not on a runqueue but is active
+      (so, the flag is set when the task blocks and is cleared when the
+      "inactive timer" fires or when the task  wakes up).
+
+  (c) When the "inactive timer" fires, the task enters the Inactive state and
+      its utilization is removed from the runqueue's active utilization.
+
+  (d) When an inactive task wakes up, it enters the ActiveContending state and
+      its utilization is added to the active utilization of the runqueue where
+      it has been enqueued.
+
+ For each runqueue, the algorithm GRUB keeps track of two different bandwidths:
+
+  - Active bandwidth (running_bw): this is the sum of the bandwidths of all
+    tasks in active state (i.e., ActiveContending or ActiveNonContending);
+
+  - Total bandwidth (this_bw): this is the sum of all tasks "belonging" to the
+    runqueue, including the tasks in Inactive state.
+
+
+ The algorithm reclaims the bandwidth of the tasks in Inactive state.
+ It does so by decrementing the runtime of the executing task Ti at a pace equal
+ to
+
+           dq = -max{ Ui, (1 - Uinact) } dt
+
+ where Uinact is the inactive utilization, computed as (this_bq - running_bw),
+ and Ui is the bandwidth of task Ti.
+
+
+ Let's now see a trivial example of two deadline tasks with runtime equal
+ to 4 and period equal to 8 (i.e., bandwidth equal to 0.5):
+
+     A            Task T1
+     |
+     |                               |
+     |                               |
+     |--------                       |----
+     |       |                       V
+     |---|---|---|---|---|---|---|---|--------->t
+     0   1   2   3   4   5   6   7   8
+
+
+     A            Task T2
+     |
+     |                               |
+     |                               |
+     |       ------------------------|
+     |       |                       V
+     |---|---|---|---|---|---|---|---|--------->t
+     0   1   2   3   4   5   6   7   8
+
+
+     A            running_bw
+     |
+   1 -----------------               ------
+     |               |               |
+  0.5-               -----------------
+     |                               |
+     |---|---|---|---|---|---|---|---|--------->t
+     0   1   2   3   4   5   6   7   8
+
+
+  - Time t = 0:
+
+    Both tasks are ready for execution and therefore in ActiveContending state.
+    Suppose Task T1 is the first task to start execution.
+    Since there are no inactive tasks, its runtime is decreased as dq = -1 dt.
+
+  - Time t = 2:
+
+    Suppose that task T1 blocks
+    Task T1 therefore enters the ActiveNonContending state. Since its remaining
+    runtime is equal to 2, its 0-lag time is equal to t = 4.
+    Task T2 start execution, with runtime still decreased as dq = -1 dt since
+    there are no inactive tasks.
+
+  - Time t = 4:
+
+    This is the 0-lag time for Task T1. Since it didn't woken up in the
+    meantime, it enters the Inactive state. Its bandwidth is removed from
+    running_bw.
+    Task T2 continues its execution. However, its runtime is now decreased as
+    dq = - 0.5 dt because Uinact = 0.5.
+    Task T2 therefore reclaims the bandwidth unused by Task T1.
+
+  - Time t = 8:
+
+    Task T1 wakes up. It enters the ActiveContending state again, and the
+    running_bw is incremented.
+
+
 3. Scheduling Real-Time Tasks
 =============================
 
@@ -330,6 +489,15 @@ CONTENTS
   14 - J. Erickson, U. Devi and S. Baruah. Improved tardiness bounds for
        Global EDF. Proceedings of the 22nd Euromicro Conference on
        Real-Time Systems, 2010.
+  15 - G. Lipari, S. Baruah, Greedy reclamation of unused bandwidth in
+       constant-bandwidth servers, 12th IEEE Euromicro Conference on Real-Time
+       Systems, 2000.
+  16 - L. Abeni, J. Lelli, C. Scordino, L. Palopoli, Greedy CPU reclaiming for
+       SCHED DEADLINE. In Proceedings of the Real-Time Linux Workshop (RTLWS),
+       Dusseldorf, Germany, 2014.
+  17 - L. Abeni, G. Lipari, A. Parri, Y. Sun, Multicore CPU reclaiming: parallel
+       or sequential?. In Proceedings of the 31st Annual ACM Symposium on Applied
+       Computing, 2016.
 
 
 4. Bandwidth management
diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst
index 5338673c88d9..773d2bfacc6c 100644
--- a/Documentation/sound/hd-audio/models.rst
+++ b/Documentation/sound/hd-audio/models.rst
@@ -16,6 +16,8 @@ ALC880
     6-jack in back, 2-jack in front
 6stack-digout
     6-jack with a SPDIF out
+6stack-automute
+    6-jack with headphone jack detection
 
 ALC260
 ======
@@ -62,6 +64,8 @@ lenovo-dock
     Enables docking station I/O for some Lenovos
 hp-gpio-led
     GPIO LED support on HP laptops
+hp-dock-gpio-mic1-led
+    HP dock with mic LED support
 dell-headset-multi
     Headset jack, which can also be used as mic-in
 dell-headset-dock
@@ -72,6 +76,12 @@ alc283-sense-combo
     Combo jack sensing on ALC283
 tpt440-dock
     Pin configs for Lenovo Thinkpad Dock support
+tpt440
+    Lenovo Thinkpad T440s setup
+tpt460
+    Lenovo Thinkpad T460/560 setup
+dual-codecs
+    Lenovo laptops with dual codecs
 
 ALC66x/67x/892
 ==============
@@ -97,6 +107,8 @@ inv-dmic
     Inverted internal mic workaround
 dell-headset-multi
     Headset jack, which can also be used as mic-in
+dual-codecs
+    Lenovo laptops with dual codecs
 
 ALC680
 ======
@@ -114,6 +126,8 @@ inv-dmic
     Inverted internal mic workaround
 no-primary-hp
     VAIO Z/VGC-LN51JGB workaround (for fixed speaker DAC)
+dual-codecs
+    ALC1220 dual codecs for Gaming mobos
 
 ALC861/660
 ==========
@@ -206,65 +220,47 @@ auto
 
 Conexant 5045
 =============
-laptop-hpsense
-    Laptop with HP sense (old model laptop)
-laptop-micsense
-    Laptop with Mic sense (old model fujitsu)
-laptop-hpmicsense
-    Laptop with HP and Mic senses
-benq
-    Benq R55E
-laptop-hp530
-    HP 530 laptop
-test
-    for testing/debugging purpose, almost all controls can be
-    adjusted.  Appearing only when compiled with $CONFIG_SND_DEBUG=y
+cap-mix-amp
+    Fix max input level on mixer widget
+toshiba-p105
+    Toshiba P105 quirk
+hp-530
+    HP 530 quirk
 
 Conexant 5047
 =============
-laptop
-    Basic Laptop config 
-laptop-hp
-    Laptop config for some HP models (subdevice 30A5)
-laptop-eapd
-    Laptop config with EAPD support
-test
-    for testing/debugging purpose, almost all controls can be
-    adjusted.  Appearing only when compiled with $CONFIG_SND_DEBUG=y
+cap-mix-amp
+    Fix max input level on mixer widget
 
 Conexant 5051
 =============
-laptop
-    Basic Laptop config (default)
-hp
-    HP Spartan laptop
-hp-dv6736
-    HP dv6736
-hp-f700
-    HP Compaq Presario F700
-ideapad
-    Lenovo IdeaPad laptop
-toshiba
-    Toshiba Satellite M300
+lenovo-x200
+    Lenovo X200 quirk
 
 Conexant 5066
 =============
-laptop
-    Basic Laptop config (default)
-hp-laptop
-    HP laptops, e g G60
-asus
-    Asus K52JU, Lenovo G560
-dell-laptop
-    Dell laptops
-dell-vostro
-    Dell Vostro
-olpc-xo-1_5
-    OLPC XO 1.5
-ideapad
-    Lenovo IdeaPad U150
+stereo-dmic
+    Workaround for inverted stereo digital mic
+gpio1
+    Enable GPIO1 pin
+headphone-mic-pin
+    Enable headphone mic NID 0x18 without detection
+tp410
+    Thinkpad T400 & co quirks
 thinkpad
-    Lenovo Thinkpad
+    Thinkpad mute/mic LED quirk
+lemote-a1004
+    Lemote A1004 quirk
+lemote-a1205
+    Lemote A1205 quirk
+olpc-xo
+    OLPC XO quirk
+mute-led-eapd
+    Mute LED control via EAPD
+hp-dock
+    HP dock support
+mute-led-gpio
+    Mute LED control via GPIO
 
 STAC9200
 ========
@@ -444,6 +440,8 @@ dell-eq
     Dell desktops/laptops
 alienware
     Alienware M17x
+asus-mobo
+    Pin configs for ASUS mobo with 5.1/SPDIF out
 auto
     BIOS setup (default)
 
@@ -477,6 +475,8 @@ hp-envy-ts-bass
     Pin fixup for HP Envy TS bass speaker (NID 0x10)
 hp-bnb13-eq
     Hardware equalizer setup for HP laptops
+hp-envy-ts-bass
+    HP Envy TS bass support
 auto
     BIOS setup (default)
 
@@ -496,10 +496,22 @@ auto
 
 Cirrus Logic CS4206/4207
 ========================
+mbp53
+    MacBook Pro 5,3
 mbp55
     MacBook Pro 5,5
 imac27
     IMac 27 Inch
+imac27_122
+    iMac 12,2
+apple
+    Generic Apple quirk
+mbp101
+    MacBookPro 10,1
+mbp81
+    MacBookPro 8,1
+mba42
+    MacBookAir 4,2
 auto
     BIOS setup (default)
 
@@ -509,6 +521,10 @@ mba6
     MacBook Air 6,1 and 6,2
 gpio0
     Enable GPIO 0 amp
+mbp11
+    MacBookPro 11,2
+macmini
+    MacMini 7,1
 auto
     BIOS setup (default)
 
diff --git a/Documentation/target/target-export-device b/Documentation/target/target-export-device
new file mode 100755
index 000000000000..b803f4f886b5
--- /dev/null
+++ b/Documentation/target/target-export-device
@@ -0,0 +1,80 @@
+#!/bin/sh
+#
+# This script illustrates the sequence of operations in configfs to
+# create a very simple LIO iSCSI target with a file or block device
+# backstore.
+#
+# (C) Copyright 2014 Christophe Vu-Brugier <cvubrugier@fastmail.fm>
+#
+
+print_usage() {
+    cat <<EOF
+Usage: $(basename $0) [-p PORTAL] DEVICE|FILE
+Export a block device or a file as an iSCSI target with a single LUN
+EOF
+}
+
+die() {
+    echo $1
+    exit 1
+}
+
+while getopts "hp:" arg; do
+    case $arg in
+        h) print_usage; exit 0;;
+        p) PORTAL=${OPTARG};;
+    esac
+done
+shift $(($OPTIND - 1))
+
+DEVICE=$1
+[ -n "$DEVICE" ] || die "Missing device or file argument"
+[ -b $DEVICE -o -f $DEVICE ] || die "Invalid device or file: ${DEVICE}"
+IQN="iqn.2003-01.org.linux-iscsi.$(hostname):$(basename $DEVICE)"
+[ -n "$PORTAL" ] || PORTAL="0.0.0.0:3260"
+
+CONFIGFS=/sys/kernel/config
+CORE_DIR=$CONFIGFS/target/core
+ISCSI_DIR=$CONFIGFS/target/iscsi
+
+# Load the target modules and mount the config file system
+lsmod | grep -q configfs || modprobe configfs
+lsmod | grep -q target_core_mod || modprobe target_core_mod
+mount | grep -q ^configfs || mount -t configfs none $CONFIGFS
+mkdir -p $ISCSI_DIR
+
+# Create a backstore
+if [ -b $DEVICE ]; then
+    BACKSTORE_DIR=$CORE_DIR/iblock_0/data
+    mkdir -p $BACKSTORE_DIR
+    echo "udev_path=${DEVICE}" > $BACKSTORE_DIR/control
+else
+    BACKSTORE_DIR=$CORE_DIR/fileio_0/data
+    mkdir -p $BACKSTORE_DIR
+    DEVICE_SIZE=$(du -b $DEVICE | cut -f1)
+    echo "fd_dev_name=${DEVICE}" > $BACKSTORE_DIR/control
+    echo "fd_dev_size=${DEVICE_SIZE}" > $BACKSTORE_DIR/control
+    echo 1 > $BACKSTORE_DIR/attrib/emulate_write_cache
+fi
+echo 1 > $BACKSTORE_DIR/enable
+
+# Create an iSCSI target and a target portal group (TPG)
+mkdir $ISCSI_DIR/$IQN
+mkdir $ISCSI_DIR/$IQN/tpgt_1/
+
+# Create a LUN
+mkdir $ISCSI_DIR/$IQN/tpgt_1/lun/lun_0
+ln -s $BACKSTORE_DIR $ISCSI_DIR/$IQN/tpgt_1/lun/lun_0/data
+echo 1 > $ISCSI_DIR/$IQN/tpgt_1/enable
+
+# Create a network portal
+mkdir $ISCSI_DIR/$IQN/tpgt_1/np/$PORTAL
+
+# Disable authentication
+echo 0 > $ISCSI_DIR/$IQN/tpgt_1/attrib/authentication
+echo 1 > $ISCSI_DIR/$IQN/tpgt_1/attrib/generate_node_acls
+
+# Allow write access for non authenticated initiators
+echo 0 > $ISCSI_DIR/$IQN/tpgt_1/attrib/demo_mode_write_protect
+
+echo "Target ${IQN}, portal ${PORTAL} has been created"
diff --git a/Documentation/tee.txt b/Documentation/tee.txt
new file mode 100644
index 000000000000..718599357596
--- /dev/null
+++ b/Documentation/tee.txt
@@ -0,0 +1,118 @@
+TEE subsystem
+This document describes the TEE subsystem in Linux.
+
+A TEE (Trusted Execution Environment) is a trusted OS running in some
+secure environment, for example, TrustZone on ARM CPUs, or a separate
+secure co-processor etc. A TEE driver handles the details needed to
+communicate with the TEE.
+
+This subsystem deals with:
+
+- Registration of TEE drivers
+
+- Managing shared memory between Linux and the TEE
+
+- Providing a generic API to the TEE
+
+The TEE interface
+=================
+
+include/uapi/linux/tee.h defines the generic interface to a TEE.
+
+User space (the client) connects to the driver by opening /dev/tee[0-9]* or
+/dev/teepriv[0-9]*.
+
+- TEE_IOC_SHM_ALLOC allocates shared memory and returns a file descriptor
+  which user space can mmap. When user space doesn't need the file
+  descriptor any more, it should be closed. When shared memory isn't needed
+  any longer it should be unmapped with munmap() to allow the reuse of
+  memory.
+
+- TEE_IOC_VERSION lets user space know which TEE this driver handles and
+  the its capabilities.
+
+- TEE_IOC_OPEN_SESSION opens a new session to a Trusted Application.
+
+- TEE_IOC_INVOKE invokes a function in a Trusted Application.
+
+- TEE_IOC_CANCEL may cancel an ongoing TEE_IOC_OPEN_SESSION or TEE_IOC_INVOKE.
+
+- TEE_IOC_CLOSE_SESSION closes a session to a Trusted Application.
+
+There are two classes of clients, normal clients and supplicants. The latter is
+a helper process for the TEE to access resources in Linux, for example file
+system access. A normal client opens /dev/tee[0-9]* and a supplicant opens
+/dev/teepriv[0-9].
+
+Much of the communication between clients and the TEE is opaque to the
+driver. The main job for the driver is to receive requests from the
+clients, forward them to the TEE and send back the results. In the case of
+supplicants the communication goes in the other direction, the TEE sends
+requests to the supplicant which then sends back the result.
+
+OP-TEE driver
+=============
+
+The OP-TEE driver handles OP-TEE [1] based TEEs. Currently it is only the ARM
+TrustZone based OP-TEE solution that is supported.
+
+Lowest level of communication with OP-TEE builds on ARM SMC Calling
+Convention (SMCCC) [2], which is the foundation for OP-TEE's SMC interface
+[3] used internally by the driver. Stacked on top of that is OP-TEE Message
+Protocol [4].
+
+OP-TEE SMC interface provides the basic functions required by SMCCC and some
+additional functions specific for OP-TEE. The most interesting functions are:
+
+- OPTEE_SMC_FUNCID_CALLS_UID (part of SMCCC) returns the version information
+  which is then returned by TEE_IOC_VERSION
+
+- OPTEE_SMC_CALL_GET_OS_UUID returns the particular OP-TEE implementation, used
+  to tell, for instance, a TrustZone OP-TEE apart from an OP-TEE running on a
+  separate secure co-processor.
+
+- OPTEE_SMC_CALL_WITH_ARG drives the OP-TEE message protocol
+
+- OPTEE_SMC_GET_SHM_CONFIG lets the driver and OP-TEE agree on which memory
+  range to used for shared memory between Linux and OP-TEE.
+
+The GlobalPlatform TEE Client API [5] is implemented on top of the generic
+TEE API.
+
+Picture of the relationship between the different components in the
+OP-TEE architecture.
+
+    User space                  Kernel                   Secure world
+    ~~~~~~~~~~                  ~~~~~~                   ~~~~~~~~~~~~
+ +--------+                                             +-------------+
+ | Client |                                             | Trusted     |
+ +--------+                                             | Application |
+    /\                                                  +-------------+
+    || +----------+                                           /\
+    || |tee-      |                                           ||
+    || |supplicant|                                           \/
+    || +----------+                                     +-------------+
+    \/      /\                                          | TEE Internal|
+ +-------+  ||                                          | API         |
+ + TEE   |  ||            +--------+--------+           +-------------+
+ | Client|  ||            | TEE    | OP-TEE |           | OP-TEE      |
+ | API   |  \/            | subsys | driver |           | Trusted OS  |
+ +-------+----------------+----+-------+----+-----------+-------------+
+ |      Generic TEE API        |       |     OP-TEE MSG               |
+ |      IOCTL (TEE_IOC_*)      |       |     SMCCC (OPTEE_SMC_CALL_*) |
+ +-----------------------------+       +------------------------------+
+
+RPC (Remote Procedure Call) are requests from secure world to kernel driver
+or tee-supplicant. An RPC is identified by a special range of SMCCC return
+values from OPTEE_SMC_CALL_WITH_ARG. RPC messages which are intended for the
+kernel are handled by the kernel driver. Other RPC messages will be forwarded to
+tee-supplicant without further involvement of the driver, except switching
+shared memory buffer representation.
+
+References:
+[1] https://github.com/OP-TEE/optee_os
+[2] http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html
+[3] drivers/tee/optee/optee_smc.h
+[4] drivers/tee/optee/optee_msg.h
+[5] http://www.globalplatform.org/specificationsdevice.asp look for
+    "TEE Client API Specification v1.0" and click download.
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index ef473dc7f55e..bb9a0a53e76b 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -582,3 +582,24 @@ platform data is provided, this uses the step_wise throttling policy.
 This function serves as an arbitrator to set the state of a cooling
 device. It sets the cooling device to the deepest cooling state if
 possible.
+
+6. thermal_emergency_poweroff:
+
+On an event of critical trip temperature crossing. Thermal framework
+allows the system to shutdown gracefully by calling orderly_poweroff().
+In the event of a failure of orderly_poweroff() to shut down the system
+we are in danger of keeping the system alive at undesirably high
+temperatures. To mitigate this high risk scenario we program a work
+queue to fire after a pre-determined number of seconds to start
+an emergency shutdown of the device using the kernel_power_off()
+function. In case kernel_power_off() fails then finally
+emergency_restart() is called in the worst case.
+
+The delay should be carefully profiled so as to give adequate time for
+orderly_poweroff(). In case of failure of an orderly_poweroff() the
+emergency poweroff kicks in after the delay has elapsed and shuts down
+the system.
+
+If set to 0 emergency poweroff will not be supported. So a carefully
+profiled non-zero positive value is a must for emergerncy poweroff to be
+triggered.
diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt
index 6eaf576294f3..2dcaf9adb7a7 100644
--- a/Documentation/timers/NO_HZ.txt
+++ b/Documentation/timers/NO_HZ.txt
@@ -194,32 +194,9 @@ that the RCU callbacks are processed in a timely fashion.
 
 Another approach is to offload RCU callback processing to "rcuo" kthreads
 using the CONFIG_RCU_NOCB_CPU=y Kconfig option.  The specific CPUs to
-offload may be selected via several methods:
-
-1.	One of three mutually exclusive Kconfig options specify a
-	build-time default for the CPUs to offload:
-
-	a.	The CONFIG_RCU_NOCB_CPU_NONE=y Kconfig option results in
-		no CPUs being offloaded.
-
-	b.	The CONFIG_RCU_NOCB_CPU_ZERO=y Kconfig option causes
-		CPU 0 to be offloaded.
-
-	c.	The CONFIG_RCU_NOCB_CPU_ALL=y Kconfig option causes all
-		CPUs to be offloaded.  Note that the callbacks will be
-		offloaded to "rcuo" kthreads, and that those kthreads
-		will in fact run on some CPU.  However, this approach
-		gives fine-grained control on exactly which CPUs the
-		callbacks run on, along with their scheduling priority
-		(including the default of SCHED_OTHER), and it further
-		allows this control to be varied dynamically at runtime.
-
-2.	The "rcu_nocbs=" kernel boot parameter, which takes a comma-separated
-	list of CPUs and CPU ranges, for example, "1,3-5" selects CPUs 1,
-	3, 4, and 5.  The specified CPUs will be offloaded in addition to
-	any CPUs specified as offloaded by CONFIG_RCU_NOCB_CPU_ZERO=y or
-	CONFIG_RCU_NOCB_CPU_ALL=y.  This means that the "rcu_nocbs=" boot
-	parameter has no effect for kernels built with RCU_NOCB_CPU_ALL=y.
+offload may be selected using The "rcu_nocbs=" kernel boot parameter,
+which takes a comma-separated list of CPUs and CPU ranges, for example,
+"1,3-5" selects CPUs 1, 3, 4, and 5.
 
 The offloaded CPUs will never queue RCU callbacks, and therefore RCU
 never prevents offloaded CPUs from entering either dyntick-idle mode
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 94a987bd2bc5..fff8ff6d4893 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1609,7 +1609,7 @@ Doing the same with chrt -r 5 and function-trace set.
   <idle>-0       3dN.2   14us : sched_avg_update <-__cpu_load_update
   <idle>-0       3dN.2   14us : _raw_spin_unlock <-cpu_load_update_nohz
   <idle>-0       3dN.2   14us : sub_preempt_count <-_raw_spin_unlock
-  <idle>-0       3dN.1   15us : calc_load_exit_idle <-tick_nohz_idle_exit
+  <idle>-0       3dN.1   15us : calc_load_nohz_stop <-tick_nohz_idle_exit
   <idle>-0       3dN.1   15us : touch_softlockup_watchdog <-tick_nohz_idle_exit
   <idle>-0       3dN.1   15us : hrtimer_cancel <-tick_nohz_idle_exit
   <idle>-0       3dN.1   15us : hrtimer_try_to_cancel <-hrtimer_cancel
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index ce0b48d69eaa..d05d4c54e8f7 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -2343,7 +2343,7 @@ ACQUIRE VS I/O 액세스
 					spin_unlock(Q);
 
 
-더 많은 정보를 위해선 Documenataion/DocBook/deviceiobook.tmpl 을 참고하세요.
+더 많은 정보를 위해선 Documentation/driver-api/device-io.rst 를 참고하세요.
 
 
 =========================
@@ -2578,7 +2578,7 @@ CPU 에서는 사용되는 어토믹 인스트럭션 자체에 메모리 배리�
  (2) 만약 액세스 함수들이 완화된 메모리 액세스 속성을 갖는 I/O 메모리 윈도우를
      사용한다면, 순서를 강제하기 위해선 _mandatory_ 메모리 배리어가 필요합니다.
 
-더 많은 정보를 위해선 Documentation/DocBook/deviceiobook.tmpl 을 참고하십시오.
+더 많은 정보를 위해선 Documentation/driver-api/device-io.rst 를 참고하십시오.
 
 
 인터럽트
diff --git a/Documentation/usb/typec.rst b/Documentation/usb/typec.rst
index b67a46779de9..8a7249f2ff04 100644
--- a/Documentation/usb/typec.rst
+++ b/Documentation/usb/typec.rst
@@ -114,8 +114,7 @@ the details during registration. The class offers the following API for
 registering/unregistering cables and their plugs:
 
 .. kernel-doc:: drivers/usb/typec/typec.c
-   :functions: typec_register_cable typec_unregister_cable typec_register_plug
-   typec_unregister_plug
+   :functions: typec_register_cable typec_unregister_cable typec_register_plug typec_unregister_plug
 
 The class will provide a handle to struct typec_cable and struct typec_plug if
 the registration is successful, or NULL if it isn't.
@@ -137,8 +136,7 @@ during connection of a partner or cable, the port driver must use the following
 APIs to report it to the class:
 
 .. kernel-doc:: drivers/usb/typec/typec.c
-   :functions: typec_set_data_role typec_set_pwr_role typec_set_vconn_role
-   typec_set_pwr_opmode
+   :functions: typec_set_data_role typec_set_pwr_role typec_set_vconn_role typec_set_pwr_opmode
 
 Alternate Modes
 ~~~~~~~~~~~~~~~
diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
index 6081a5b7fc1e..eb06beb75960 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
@@ -32,7 +32,128 @@ Groups:
     KVM_DEV_ARM_VGIC_CTRL_INIT
       request the initialization of the ITS, no additional parameter in
       kvm_device_attr.addr.
+
+    KVM_DEV_ARM_ITS_SAVE_TABLES
+      save the ITS table data into guest RAM, at the location provisioned
+      by the guest in corresponding registers/table entries.
+
+      The layout of the tables in guest memory defines an ABI. The entries
+      are laid out in little endian format as described in the last paragraph.
+
+    KVM_DEV_ARM_ITS_RESTORE_TABLES
+      restore the ITS tables from guest RAM to ITS internal structures.
+
+      The GICV3 must be restored before the ITS and all ITS registers but
+      the GITS_CTLR must be restored before restoring the ITS tables.
+
+      The GITS_IIDR read-only register must also be restored before
+      calling KVM_DEV_ARM_ITS_RESTORE_TABLES as the IIDR revision field
+      encodes the ABI revision.
+
+      The expected ordering when restoring the GICv3/ITS is described in section
+      "ITS Restore Sequence".
+
   Errors:
     -ENXIO:  ITS not properly configured as required prior to setting
              this attribute
     -ENOMEM: Memory shortage when allocating ITS internal data
+    -EINVAL: Inconsistent restored data
+    -EFAULT: Invalid guest ram access
+    -EBUSY:  One or more VCPUS are running
+
+  KVM_DEV_ARM_VGIC_GRP_ITS_REGS
+  Attributes:
+      The attr field of kvm_device_attr encodes the offset of the
+      ITS register, relative to the ITS control frame base address
+      (ITS_base).
+
+      kvm_device_attr.addr points to a __u64 value whatever the width
+      of the addressed register (32/64 bits). 64 bit registers can only
+      be accessed with full length.
+
+      Writes to read-only registers are ignored by the kernel except for:
+      - GITS_CREADR. It must be restored otherwise commands in the queue
+        will be re-executed after restoring CWRITER. GITS_CREADR must be
+        restored before restoring the GITS_CTLR which is likely to enable the
+        ITS. Also it must be restored after GITS_CBASER since a write to
+        GITS_CBASER resets GITS_CREADR.
+      - GITS_IIDR. The Revision field encodes the table layout ABI revision.
+        In the future we might implement direct injection of virtual LPIs.
+        This will require an upgrade of the table layout and an evolution of
+        the ABI. GITS_IIDR must be restored before calling
+        KVM_DEV_ARM_ITS_RESTORE_TABLES.
+
+      For other registers, getting or setting a register has the same
+      effect as reading/writing the register on real hardware.
+  Errors:
+    -ENXIO: Offset does not correspond to any supported register
+    -EFAULT: Invalid user pointer for attr->addr
+    -EINVAL: Offset is not 64-bit aligned
+    -EBUSY: one or more VCPUS are running
+
+ ITS Restore Sequence:
+ -------------------------
+
+The following ordering must be followed when restoring the GIC and the ITS:
+a) restore all guest memory and create vcpus
+b) restore all redistributors
+c) provide the its base address
+   (KVM_DEV_ARM_VGIC_GRP_ADDR)
+d) restore the ITS in the following order:
+   1. Restore GITS_CBASER
+   2. Restore all other GITS_ registers, except GITS_CTLR!
+   3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
+   4. Restore GITS_CTLR
+
+Then vcpus can be started.
+
+ ITS Table ABI REV0:
+ -------------------
+
+ Revision 0 of the ABI only supports the features of a virtual GICv3, and does
+ not support a virtual GICv4 with support for direct injection of virtual
+ interrupts for nested hypervisors.
+
+ The device table and ITT are indexed by the DeviceID and EventID,
+ respectively. The collection table is not indexed by CollectionID, and the
+ entries in the collection are listed in no particular order.
+ All entries are 8 bytes.
+
+ Device Table Entry (DTE):
+
+ bits:     | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
+ values:   | V |   next    | ITT_addr |  Size   |
+
+ where;
+ - V indicates whether the entry is valid. If not, other fields
+   are not meaningful.
+ - next: equals to 0 if this entry is the last one; otherwise it
+   corresponds to the DeviceID offset to the next DTE, capped by
+   2^14 -1.
+ - ITT_addr matches bits [51:8] of the ITT address (256 Byte aligned).
+ - Size specifies the supported number of bits for the EventID,
+   minus one
+
+ Collection Table Entry (CTE):
+
+ bits:     | 63| 62 ..  52  | 51 ... 16 | 15  ...   0 |
+ values:   | V |    RES0    |  RDBase   |    ICID     |
+
+ where:
+ - V indicates whether the entry is valid. If not, other fields are
+   not meaningful.
+ - RES0: reserved field with Should-Be-Zero-or-Preserved behavior.
+ - RDBase is the PE number (GICR_TYPER.Processor_Number semantic),
+ - ICID is the collection ID
+
+ Interrupt Translation Entry (ITE):
+
+ bits:     | 63 ... 48 | 47 ... 16 | 15 ... 0 |
+ values:   |    next   |   pINTID  |  ICID    |
+
+ where:
+ - next: equals to 0 if this entry is the last one; otherwise it corresponds
+   to the EventID offset to the next ITE capped by 2^16 -1.
+ - pINTID is the physical LPI ID; if zero, it means the entry is not valid
+   and other fields are not meaningful.
+ - ICID is the collection ID
diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
index c1a24612c198..9293b45abdb9 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
@@ -167,11 +167,17 @@ Groups:
     KVM_DEV_ARM_VGIC_CTRL_INIT
       request the initialization of the VGIC, no additional parameter in
       kvm_device_attr.addr.
+    KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES
+      save all LPI pending bits into guest RAM pending tables.
+
+      The first kB of the pending table is not altered by this operation.
   Errors:
     -ENXIO: VGIC not properly configured as required prior to calling
      this attribute
     -ENODEV: no online VCPU
     -ENOMEM: memory shortage when allocating vgic internal data
+    -EFAULT: Invalid guest ram access
+    -EBUSY:  One or more VCPUS are running
 
 
   KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt
index 4f7d86dd0a5d..914518aeb972 100644
--- a/Documentation/watchdog/watchdog-parameters.txt
+++ b/Documentation/watchdog/watchdog-parameters.txt
@@ -117,7 +117,7 @@ nowayout: Watchdog cannot be stopped once started
 -------------------------------------------------
 iTCO_wdt:
 heartbeat: Watchdog heartbeat in seconds.
-	(2<heartbeat<39 (TCO v1) or 613 (TCO v2), default=30)
+	(5<=heartbeat<=74 (TCO v1) or 1226 (TCO v2), default=30)
 nowayout: Watchdog cannot be stopped once started
 	(default=kernel config parameter)
 -------------------------------------------------
diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index 0f6d8477b66c..c491a1b82de2 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -295,7 +295,7 @@ kernel and the tasks running there get 50% of the cache. They should
 also get 50% of memory bandwidth assuming that the cores 4-7 are SMT
 siblings and only the real time threads are scheduled on the cores 4-7.
 
-# echo C0 > p0/cpus
+# echo F0 > p0/cpus
 
 4) Locking between applications
 
diff --git a/Kbuild b/Kbuild
index 3d0ae152af7c..94c752762bc2 100644
--- a/Kbuild
+++ b/Kbuild
@@ -7,31 +7,6 @@
 # 4) Check for missing system calls
 # 5) Generate constants.py (may need bounds.h)
 
-# Default sed regexp - multiline due to syntax constraints
-define sed-y
-	"/^->/{s:->#\(.*\):/* \1 */:; \
-	s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \
-	s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
-	s:->::; p;}"
-endef
-
-# Use filechk to avoid rebuilds when a header changes, but the resulting file
-# does not
-define filechk_offsets
-	(set -e; \
-	 echo "#ifndef $2"; \
-	 echo "#define $2"; \
-	 echo "/*"; \
-	 echo " * DO NOT MODIFY."; \
-	 echo " *"; \
-	 echo " * This file was generated by Kbuild"; \
-	 echo " */"; \
-	 echo ""; \
-	 sed -ne $(sed-y); \
-	 echo ""; \
-	 echo "#endif" )
-endef
-
 #####
 # 1) Generate bounds.h
 
diff --git a/MAINTAINERS b/MAINTAINERS
index f42daf74f541..d357695ee4fe 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -846,7 +846,6 @@ M:	Laura Abbott <labbott@redhat.com>
 M:	Sumit Semwal <sumit.semwal@linaro.org>
 L:	devel@driverdev.osuosl.org
 S:	Supported
-F:	Documentation/devicetree/bindings/staging/ion/
 F:	drivers/staging/android/ion
 F:	drivers/staging/android/uapi/ion.h
 F:	drivers/staging/android/uapi/ion_test.h
@@ -1085,6 +1084,16 @@ F: 	drivers/pinctrl/meson/
 F:	drivers/mmc/host/meson*
 N:	meson
 
+ARM/Amlogic Meson SoC CLOCK FRAMEWORK
+M:	Neil Armstrong <narmstrong@baylibre.com>
+M:	Jerome Brunet <jbrunet@baylibre.com>
+L:	linux-amlogic@lists.infradead.org
+S:	Maintained
+F:	drivers/clk/meson/
+F:	include/dt-bindings/clock/meson*
+F:	include/dt-bindings/clock/gxbb*
+F:	Documentation/devicetree/bindings/clock/amlogic*
+
 ARM/Annapurna Labs ALPINE ARCHITECTURE
 M:	Tsahee Zidenberg <tsahee@annapurnalabs.com>
 M:	Antoine Tenart <antoine.tenart@free-electrons.com>
@@ -1163,7 +1172,7 @@ N:	clps711x
 
 ARM/CIRRUS LOGIC EP93XX ARM ARCHITECTURE
 M:	Hartley Sweeten <hsweeten@visionengravers.com>
-M:	Ryan Mallon <rmallon@gmail.com>
+M:	Alexander Sverdlin <alexander.sverdlin@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-ep93xx/
@@ -1480,13 +1489,15 @@ M:	Gregory Clement <gregory.clement@free-electrons.com>
 M:	Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
-F:	arch/arm/mach-mvebu/
-F:	drivers/rtc/rtc-armada38x.c
 F:	arch/arm/boot/dts/armada*
 F:	arch/arm/boot/dts/kirkwood*
+F:	arch/arm/configs/mvebu_*_defconfig
+F:	arch/arm/mach-mvebu/
 F:	arch/arm64/boot/dts/marvell/armada*
 F:	drivers/cpufreq/mvebu-cpufreq.c
-F:	arch/arm/configs/mvebu_*_defconfig
+F:	drivers/irqchip/irq-armada-370-xp.c
+F:	drivers/irqchip/irq-mvebu-*
+F:	drivers/rtc/rtc-armada38x.c
 
 ARM/Marvell Berlin SoC support
 M:	Jisheng Zhang <jszhang@marvell.com>
@@ -1712,7 +1723,6 @@ N:	rockchip
 ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
 M:	Kukjin Kim <kgene@kernel.org>
 M:	Krzysztof Kozlowski <krzk@kernel.org>
-R:	Javier Martinez Canillas <javier@osg.samsung.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 Q:	https://patchwork.kernel.org/project/linux-samsung-soc/list/
@@ -1820,7 +1830,6 @@ F:	drivers/edac/altera_edac.
 ARM/STI ARCHITECTURE
 M:	Patrice Chotard <patrice.chotard@st.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-L:	kernel@stlinux.com
 W:	http://www.stlinux.com
 S:	Maintained
 F:	arch/arm/mach-sti/
@@ -2264,7 +2273,7 @@ M:	Wenyou Yang <wenyou.yang@atmel.com>
 M:	Josh Wu <rainyfeeling@outlook.com>
 L:	linux-mtd@lists.infradead.org
 S:	Supported
-F:	drivers/mtd/nand/atmel_nand*
+F:	drivers/mtd/nand/atmel/*
 
 ATMEL SDMMC DRIVER
 M:	Ludovic Desroches <ludovic.desroches@microchip.com>
@@ -2313,6 +2322,15 @@ F:	Documentation/devicetree/bindings/input/atmel,maxtouch.txt
 F:	drivers/input/touchscreen/atmel_mxt_ts.c
 F:	include/linux/platform_data/atmel_mxt_ts.h
 
+ATOMIC INFRASTRUCTURE
+M:	Will Deacon <will.deacon@arm.com>
+M:	Peter Zijlstra <peterz@infradead.org>
+R:	Boqun Feng <boqun.feng@gmail.com>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	arch/*/include/asm/atomic*.h
+F:	include/*/atomic*.h
+
 ATTO EXPRESSSAS SAS/SATA RAID SCSI DRIVER
 M:	Bradley Grove <linuxdrivers@attotech.com>
 L:	linux-scsi@vger.kernel.org
@@ -2473,7 +2491,7 @@ S:	Maintained
 F:	drivers/net/ethernet/ec_bhf.c
 
 BFS FILE SYSTEM
-M:	"Tigran A. Aivazian" <tigran@aivazian.fsnet.co.uk>
+M:	"Tigran A. Aivazian" <aivazian.tigran@gmail.com>
 S:	Maintained
 F:	Documentation/filesystems/bfs.txt
 F:	fs/bfs/
@@ -2926,6 +2944,8 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs.git
 S:	Maintained
 F:	Documentation/filesystems/btrfs.txt
 F:	fs/btrfs/
+F:	include/linux/btrfs*
+F:	include/uapi/linux/btrfs*
 
 BTTV VIDEO4LINUX DRIVER
 M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
@@ -2953,7 +2973,7 @@ F:	sound/pci/oxygen/
 
 C6X ARCHITECTURE
 M:	Mark Salter <msalter@redhat.com>
-M:	Aurelien Jacquiot <a-jacquiot@ti.com>
+M:	Aurelien Jacquiot <jacquiot.aurelien@gmail.com>
 L:	linux-c6x-dev@linux-c6x.org
 W:	http://www.linux-c6x.org/wiki/index.php/Main_Page
 S:	Maintained
@@ -3104,6 +3124,14 @@ F:	drivers/net/ieee802154/cc2520.c
 F:	include/linux/spi/cc2520.h
 F:	Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
 
+CCREE ARM TRUSTZONE CRYPTOCELL 700 REE DRIVER
+M:	Gilad Ben-Yossef <gilad@benyossef.com>
+L:	linux-crypto@vger.kernel.org
+L:	driverdev-devel@linuxdriverproject.org
+S:	Supported
+F:	drivers/staging/ccree/
+W:	https://developer.arm.com/products/system-ip/trustzone-cryptocell/cryptocell-700-family
+
 CEC FRAMEWORK
 M:	Hans Verkuil <hans.verkuil@cisco.com>
 L:	linux-media@vger.kernel.org
@@ -5588,6 +5616,7 @@ L:	linux-pm@vger.kernel.org
 S:	Supported
 F:	drivers/base/power/domain*.c
 F:	include/linux/pm_domain.h
+F:	Documentation/devicetree/bindings/power/power_domain.txt
 
 GENERIC UIO DRIVER FOR PCI DEVICES
 M:	"Michael S. Tsirkin" <mst@redhat.com>
@@ -5602,7 +5631,7 @@ F:	scripts/get_maintainer.pl
 
 GENWQE (IBM Generic Workqueue Card)
 M:	Frank Haverkamp <haver@linux.vnet.ibm.com>
-M:	Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
+M:	Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
 S:	Supported
 F:	drivers/misc/genwqe/
 
@@ -5647,7 +5676,6 @@ F:	tools/testing/selftests/gpio/
 
 GPIO SUBSYSTEM
 M:	Linus Walleij <linus.walleij@linaro.org>
-M:	Alexandre Courbot <gnurou@gmail.com>
 L:	linux-gpio@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
 S:	Maintained
@@ -5682,7 +5710,7 @@ M:	Alex Elder <elder@kernel.org>
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 S:	Maintained
 F:	drivers/staging/greybus/
-L:	greybus-dev@lists.linaro.org
+L:	greybus-dev@lists.linaro.org (moderated for non-subscribers)
 
 GREYBUS AUDIO PROTOCOLS DRIVERS
 M:	Vaibhav Agarwal <vaibhav.sr@gmail.com>
@@ -7123,7 +7151,7 @@ S:	Maintained
 F:	drivers/media/platform/rcar_jpu.c
 
 JSM Neo PCI based serial card
-M:	Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
+M:	Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
 L:	linux-serial@vger.kernel.org
 S:	Maintained
 F:	drivers/tty/serial/jsm/
@@ -7536,7 +7564,7 @@ S:	Maintained
 F:	drivers/ata/sata_promise.*
 
 LIBLOCKDEP
-M:	Sasha Levin <sasha.levin@oracle.com>
+M:	Sasha Levin <alexander.levin@verizon.com>
 S:	Maintained
 F:	tools/lib/lockdep/
 
@@ -7687,7 +7715,7 @@ F:	drivers/platform/x86/hp_accel.c
 
 LIVE PATCHING
 M:	Josh Poimboeuf <jpoimboe@redhat.com>
-M:	Jessica Yu <jeyu@redhat.com>
+M:	Jessica Yu <jeyu@kernel.org>
 M:	Jiri Kosina <jikos@kernel.org>
 M:	Miroslav Benes <mbenes@suse.cz>
 R:	Petr Mladek <pmladek@suse.com>
@@ -7910,7 +7938,7 @@ L:	linux-man@vger.kernel.org
 S:	Maintained
 
 MARDUK (CREATOR CI40) DEVICE TREE SUPPORT
-M:	Rahul Bedarkar <rahul.bedarkar@imgtec.com>
+M:	Rahul Bedarkar <rahulbedarkar89@gmail.com>
 L:	linux-mips@linux-mips.org
 S:	Maintained
 F:	arch/mips/boot/dts/img/pistachio_marduk.dts
@@ -8363,12 +8391,12 @@ M:	Brian Norris <computersforpeace@gmail.com>
 M:	Boris Brezillon <boris.brezillon@free-electrons.com>
 M:	Marek Vasut <marek.vasut@gmail.com>
 M:	Richard Weinberger <richard@nod.at>
-M:	Cyrille Pitchen <cyrille.pitchen@atmel.com>
+M:	Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
 L:	linux-mtd@lists.infradead.org
 W:	http://www.linux-mtd.infradead.org/
 Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
-T:	git git://git.infradead.org/linux-mtd.git
-T:	git git://git.infradead.org/l2-mtd.git
+T:	git git://git.infradead.org/linux-mtd.git master
+T:	git git://git.infradead.org/l2-mtd.git master
 S:	Maintained
 F:	Documentation/devicetree/bindings/mtd/
 F:	drivers/mtd/
@@ -8488,7 +8516,7 @@ S:	Odd Fixes
 F:	drivers/media/radio/radio-miropcm20*
 
 MELLANOX MLX4 core VPI driver
-M:	Yishai Hadas <yishaih@mellanox.com>
+M:	Tariq Toukan <tariqt@mellanox.com>
 L:	netdev@vger.kernel.org
 L:	linux-rdma@vger.kernel.org
 W:	http://www.mellanox.com
@@ -8496,7 +8524,6 @@ Q:	http://patchwork.ozlabs.org/project/netdev/list/
 S:	Supported
 F:	drivers/net/ethernet/mellanox/mlx4/
 F:	include/linux/mlx4/
-F:	include/uapi/rdma/mlx4-abi.h
 
 MELLANOX MLX4 IB driver
 M:	Yishai Hadas <yishaih@mellanox.com>
@@ -8506,6 +8533,7 @@ Q:	http://patchwork.kernel.org/project/linux-rdma/list/
 S:	Supported
 F:	drivers/infiniband/hw/mlx4/
 F:	include/linux/mlx4/
+F:	include/uapi/rdma/mlx4-abi.h
 
 MELLANOX MLX5 core VPI driver
 M:	Saeed Mahameed <saeedm@mellanox.com>
@@ -8518,7 +8546,6 @@ Q:	http://patchwork.ozlabs.org/project/netdev/list/
 S:	Supported
 F:	drivers/net/ethernet/mellanox/mlx5/core/
 F:	include/linux/mlx5/
-F:	include/uapi/rdma/mlx5-abi.h
 
 MELLANOX MLX5 IB driver
 M:	Matan Barak <matanb@mellanox.com>
@@ -8529,6 +8556,7 @@ Q:	http://patchwork.kernel.org/project/linux-rdma/list/
 S:	Supported
 F:	drivers/infiniband/hw/mlx5/
 F:	include/linux/mlx5/
+F:	include/uapi/rdma/mlx5-abi.h
 
 MELEXIS MLX90614 DRIVER
 M:	Crt Mori <cmo@melexis.com>
@@ -8568,7 +8596,7 @@ S:	Maintained
 F:	drivers/media/dvb-frontends/mn88473*
 
 MODULE SUPPORT
-M:	Jessica Yu <jeyu@redhat.com>
+M:	Jessica Yu <jeyu@kernel.org>
 M:	Rusty Russell <rusty@rustcorp.com.au>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git modules-next
 S:	Maintained
@@ -8743,7 +8771,8 @@ R:	Richard Weinberger <richard@nod.at>
 L:	linux-mtd@lists.infradead.org
 W:	http://www.linux-mtd.infradead.org/
 Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
-T:	git git://github.com/linux-nand/linux.git
+T:	git git://git.infradead.org/linux-mtd.git nand/fixes
+T:	git git://git.infradead.org/l2-mtd.git nand/next
 S:	Maintained
 F:	drivers/mtd/nand/
 F:	include/linux/mtd/nand*.h
@@ -9515,6 +9544,11 @@ F:	arch/*/oprofile/
 F:	drivers/oprofile/
 F:	include/linux/oprofile.h
 
+OP-TEE DRIVER
+M:	Jens Wiklander <jens.wiklander@linaro.org>
+S:	Maintained
+F:	drivers/tee/optee/
+
 ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
 M:	Mark Fasheh <mfasheh@versity.com>
 M:	Joel Becker <jlbec@evilplan.org>
@@ -9534,10 +9568,6 @@ F:	drivers/net/wireless/intersil/orinoco/
 
 OSD LIBRARY and FILESYSTEM
 M:	Boaz Harrosh <ooo@electrozaur.com>
-M:	Benny Halevy <bhalevy@primarydata.com>
-L:	osd-dev@open-osd.org
-W:	http://open-osd.org
-T:	git git://git.open-osd.org/open-osd.git
 S:	Maintained
 F:	drivers/scsi/osd/
 F:	include/scsi/osd_*
@@ -10428,7 +10458,7 @@ S:	Orphan
 
 PXA RTC DRIVER
 M:	Robert Jarzmik <robert.jarzmik@free.fr>
-L:	rtc-linux@googlegroups.com
+L:	linux-rtc@vger.kernel.org
 S:	Maintained
 
 QAT DRIVER
@@ -10735,7 +10765,7 @@ X:	kernel/torture.c
 REAL TIME CLOCK (RTC) SUBSYSTEM
 M:	Alessandro Zummo <a.zummo@towertech.it>
 M:	Alexandre Belloni <alexandre.belloni@free-electrons.com>
-L:	rtc-linux@googlegroups.com
+L:	linux-rtc@vger.kernel.org
 Q:	http://patchwork.ozlabs.org/project/rtc-linux/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git
 S:	Maintained
@@ -11246,7 +11276,6 @@ F:	drivers/media/rc/serial_ir.c
 
 STI CEC DRIVER
 M:	Benjamin Gaignard <benjamin.gaignard@linaro.org>
-L:	kernel@stlinux.com
 S:	Maintained
 F:	drivers/staging/media/st-cec/
 F:	Documentation/devicetree/bindings/media/stih-cec.txt
@@ -11296,6 +11325,14 @@ F:	drivers/hwtracing/stm/
 F:	include/linux/stm.h
 F:	include/uapi/linux/stm.h
 
+TEE SUBSYSTEM
+M:	Jens Wiklander <jens.wiklander@linaro.org>
+S:	Maintained
+F:	include/linux/tee_drv.h
+F:	include/uapi/linux/tee.h
+F:	drivers/tee/
+F:	Documentation/tee.txt
+
 THUNDERBOLT DRIVER
 M:	Andreas Noever <andreas.noever@gmail.com>
 S:	Maintained
@@ -11748,6 +11785,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nsekhar/linux-davinci.git
 S:	Supported
 F:	arch/arm/mach-davinci/
 F:	drivers/i2c/busses/i2c-davinci.c
+F:	arch/arm/boot/dts/da850*
 
 TI DAVINCI SERIES MEDIA DRIVER
 M:	"Lad, Prabhakar" <prabhakar.csengg@gmail.com>
@@ -12087,7 +12125,7 @@ S:	Maintained
 F:	drivers/clk/spear/
 
 SPI NOR SUBSYSTEM
-M:	Cyrille Pitchen <cyrille.pitchen@atmel.com>
+M:	Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
 M:	Marek Vasut <marek.vasut@gmail.com>
 L:	linux-mtd@lists.infradead.org
 W:	http://www.linux-mtd.infradead.org/
@@ -13433,6 +13471,17 @@ W:	http://en.wikipedia.org/wiki/Util-linux
 T:	git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git
 S:	Maintained
 
+UUID HELPERS
+M:	Christoph Hellwig <hch@lst.de>
+R:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+L:	linux-kernel@vger.kernel.org
+T:	git git://git.infradead.org/users/hch/uuid.git
+F:	lib/uuid.c
+F:	lib/test_uuid.c
+F:	include/linux/uuid.h
+F:	include/uapi/linux/uuid.h
+S:	Maintained
+
 UVESAFB DRIVER
 M:	Michal Januszewski <spock@gentoo.org>
 L:	linux-fbdev@vger.kernel.org
@@ -13540,8 +13589,8 @@ F:	include/uapi/linux/virtio_*.h
 F:	drivers/crypto/virtio/
 
 VIRTIO DRIVERS FOR S390
-M:	Christian Borntraeger <borntraeger@de.ibm.com>
 M:	Cornelia Huck <cornelia.huck@de.ibm.com>
+M:	Halil Pasic <pasic@linux.vnet.ibm.com>
 L:	linux-s390@vger.kernel.org
 L:	virtualization@lists.linux-foundation.org
 L:	kvm@vger.kernel.org
@@ -13831,7 +13880,7 @@ S:	Odd fixes
 F:	drivers/net/wireless/wl3501*
 
 WOLFSON MICROELECTRONICS DRIVERS
-L:	patches@opensource.wolfsonmicro.com
+L:	patches@opensource.cirrus.com
 T:	git https://github.com/CirrusLogic/linux-drivers.git
 W:	https://github.com/CirrusLogic/linux-drivers/wiki
 S:	Supported
diff --git a/Makefile b/Makefile
index 220121fdca4d..283c6236438e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 VERSION = 4
-PATCHLEVEL = 11
+PATCHLEVEL = 12
 SUBLEVEL = 0
 EXTRAVERSION =
 NAME = Fearless Coyote
@@ -632,13 +632,9 @@ include arch/$(SRCARCH)/Makefile
 KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
 
-ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
-KBUILD_CFLAGS	+= $(call cc-option,-ffunction-sections,)
-KBUILD_CFLAGS	+= $(call cc-option,-fdata-sections,)
-endif
-
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-KBUILD_CFLAGS	+= -Os $(call cc-disable-warning,maybe-uninitialized,)
+KBUILD_CFLAGS	+= $(call cc-option,-Oz,-Os)
+KBUILD_CFLAGS	+= $(call cc-disable-warning,maybe-uninitialized,)
 else
 ifdef CONFIG_PROFILE_ALL_BRANCHES
 KBUILD_CFLAGS	+= -O2 $(call cc-disable-warning,maybe-uninitialized,)
@@ -698,8 +694,16 @@ endif
 KBUILD_CFLAGS += $(stackp-flag)
 
 ifeq ($(cc-name),clang)
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET	:= -target $(notdir $(CROSS_COMPILE:%-=%))
+GCC_TOOLCHAIN	:= $(realpath $(dir $(shell which $(LD)))/..)
+endif
+ifneq ($(GCC_TOOLCHAIN),)
+CLANG_GCC_TC	:= -gcc-toolchain $(GCC_TOOLCHAIN)
+endif
+KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
+KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
-KBUILD_CPPFLAGS += $(call cc-option,-Wno-unknown-warning-option,)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
 KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
 KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
@@ -710,10 +714,12 @@ KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
 # See modpost pattern 2
 KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
 KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
+KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
+KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
 else
 
 # These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.build)
+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
 endif
@@ -773,6 +779,11 @@ ifdef CONFIG_DEBUG_SECTION_MISMATCH
 KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once)
 endif
 
+ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
+KBUILD_CFLAGS	+= $(call cc-option,-ffunction-sections,)
+KBUILD_CFLAGS	+= $(call cc-option,-fdata-sections,)
+endif
+
 # arch Makefile may override CC so keep this after arch Makefile is included
 NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
 CHECKFLAGS     += $(NOSTDINC_FLAGS)
@@ -801,6 +812,9 @@ KBUILD_CFLAGS   += $(call cc-option,-Werror=date-time)
 # enforce correct pointer usage
 KBUILD_CFLAGS   += $(call cc-option,-Werror=incompatible-pointer-types)
 
+# Require designated initializers for all marked structures
+KBUILD_CFLAGS   += $(call cc-option,-Werror=designated-init)
+
 # use the deterministic mode of AR if available
 KBUILD_ARFLAGS := $(call ar-option,D)
 
@@ -815,7 +829,7 @@ KBUILD_AFLAGS   += $(ARCH_AFLAGS)   $(KAFLAGS)
 KBUILD_CFLAGS   += $(ARCH_CFLAGS)   $(KCFLAGS)
 
 # Use --build-id when available.
-LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\
+LDFLAGS_BUILD_ID := $(patsubst -Wl$(comma)%,%,\
 			      $(call cc-ldoption, -Wl$(comma)--build-id,))
 KBUILD_LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
 LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
@@ -1128,7 +1142,7 @@ firmware_install:
 export INSTALL_HDR_PATH = $(objtree)/usr
 
 # If we do an all arch process set dst to asm-$(hdr-arch)
-hdr-dst = $(if $(KBUILD_HEADERS), dst=include/asm-$(hdr-arch), dst=include/asm)
+hdr-dst = $(if $(KBUILD_HEADERS), dst=include/arch-$(hdr-arch), dst=include)
 
 PHONY += archheaders
 archheaders:
@@ -1149,7 +1163,7 @@ headers_install: __headers
 	$(if $(wildcard $(srctree)/arch/$(hdr-arch)/include/uapi/asm/Kbuild),, \
 	  $(error Headers not exportable for the $(SRCARCH) architecture))
 	$(Q)$(MAKE) $(hdr-inst)=include/uapi
-	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi/asm $(hdr-dst)
+	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi $(hdr-dst)
 
 PHONY += headers_check_all
 headers_check_all: headers_install_all
@@ -1158,7 +1172,7 @@ headers_check_all: headers_install_all
 PHONY += headers_check
 headers_check: headers_install
 	$(Q)$(MAKE) $(hdr-inst)=include/uapi HDRCHECK=1
-	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi/asm $(hdr-dst) HDRCHECK=1
+	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi $(hdr-dst) HDRCHECK=1
 
 # ---------------------------------------------------------------------------
 # Kernel selftest
@@ -1315,8 +1329,8 @@ PHONY += distclean
 distclean: mrproper
 	@find $(srctree) $(RCS_FIND_IGNORE) \
 		\( -name '*.orig' -o -name '*.rej' -o -name '*~' \
-		-o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \
-		-o -name '.*.rej' -o -name '*%'  -o -name 'core' \) \
+		-o -name '*.bak' -o -name '#*#' -o -name '*%' \
+		-o -name 'core' \) \
 		-type f -print | xargs rm -f
 
 
@@ -1361,6 +1375,8 @@ help:
 	@echo  '                    (default: $$(INSTALL_MOD_PATH)/lib/firmware)'
 	@echo  '  dir/            - Build all files in dir and below'
 	@echo  '  dir/file.[ois]  - Build specified target only'
+	@echo  '  dir/file.ll     - Build the LLVM assembly file'
+	@echo  '                    (requires compiler support for LLVM assembly generation)'
 	@echo  '  dir/file.lst    - Build specified mixed source/assembly target only'
 	@echo  '                    (requires a recent binutils and recent build (System.map))'
 	@echo  '  dir/file.ko     - Build module including final link'
@@ -1421,7 +1437,7 @@ help:
 	@echo  '  make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build'
 	@echo  '  make V=2   [targets] 2 => give reason for rebuild of target'
 	@echo  '  make O=dir [targets] Locate all output files in "dir", including .config'
-	@echo  '  make C=1   [targets] Check all c source with $$CHECK (sparse by default)'
+	@echo  '  make C=1   [targets] Check re-compiled c source with $$CHECK (sparse by default)'
 	@echo  '  make C=2   [targets] Force check of all c source with $$CHECK'
 	@echo  '  make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
 	@echo  '  make W=n   [targets] Enable extra gcc checks, n=1,2,3 where'
@@ -1549,6 +1565,7 @@ clean: $(clean-dirs)
 		-o -name '*.symtypes' -o -name 'modules.order' \
 		-o -name modules.builtin -o -name '.tmp_*.o.*' \
 		-o -name '*.c.[012]*.*' \
+		-o -name '*.ll' \
 		-o -name '*.gcno' \) -type f -print | xargs rm -f
 
 # Generate tags for editors
@@ -1652,6 +1669,8 @@ endif
 	$(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
 %.symtypes: %.c prepare scripts FORCE
 	$(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
+%.ll: %.c prepare scripts FORCE
+	$(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
 
 # Modules
 /: prepare scripts FORCE
diff --git a/arch/Kconfig b/arch/Kconfig
index dcbd462b68b1..f76b214cf7ad 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -324,6 +324,9 @@ config HAVE_CMPXCHG_LOCAL
 config HAVE_CMPXCHG_DOUBLE
 	bool
 
+config ARCH_WEAK_RELEASE_ACQUIRE
+	bool
+
 config ARCH_WANT_IPC_PARSE_VERSION
 	bool
 
@@ -864,4 +867,13 @@ config STRICT_MODULE_RWX
 config ARCH_WANT_RELAX_ORDER
 	bool
 
+config REFCOUNT_FULL
+	bool "Perform full reference count validation at the expense of speed"
+	help
+	  Enabling this switches the refcounting infrastructure from a fast
+	  unchecked atomic_t implementation to a fully state checked
+	  implementation, which can be (slightly) slower but provides protections
+	  against various use-after-free conditions that can be used in
+	  security flaw exploits.
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild
index d96f2ef5b639..b15bf6bc0e94 100644
--- a/arch/alpha/include/uapi/asm/Kbuild
+++ b/arch/alpha/include/uapi/asm/Kbuild
@@ -1,43 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += a.out.h
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += compiler.h
-header-y += console.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += fpu.h
-header-y += gentrap.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += pal.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += reg.h
-header-y += regdef.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += sysinfo.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 9ec56dc97374..ce93124a850b 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1201,8 +1201,10 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options,
 	if (!access_ok(VERIFY_WRITE, ur, sizeof(*ur)))
 		return -EFAULT;
 
-	err = 0;
-	err |= put_user(status, ustatus);
+	err = put_user(status, ustatus);
+	if (ret < 0)
+		return err ? err : ret;
+
 	err |= __put_user(r.ru_utime.tv_sec, &ur->ru_utime.tv_sec);
 	err |= __put_user(r.ru_utime.tv_usec, &ur->ru_utime.tv_usec);
 	err |= __put_user(r.ru_stime.tv_sec, &ur->ru_stime.tv_sec);
diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile
index 59660743237c..7083434dd241 100644
--- a/arch/alpha/lib/Makefile
+++ b/arch/alpha/lib/Makefile
@@ -46,11 +46,6 @@ AFLAGS___remqu.o =       -DREM
 AFLAGS___divlu.o = -DDIV       -DINTSIZE
 AFLAGS___remlu.o =       -DREM -DINTSIZE
 
-$(obj)/__divqu.o: $(obj)/$(ev6-y)divide.S
-	$(cmd_as_o_S)
-$(obj)/__remqu.o: $(obj)/$(ev6-y)divide.S
-	$(cmd_as_o_S)
-$(obj)/__divlu.o: $(obj)/$(ev6-y)divide.S
-	$(cmd_as_o_S)
-$(obj)/__remlu.o: $(obj)/$(ev6-y)divide.S
-	$(cmd_as_o_S)
+$(addprefix $(obj)/,__divqu.o __remqu.o __divlu.o __remlu.o): \
+						$(src)/$(ev6-y)divide.S FORCE
+	$(call if_changed_rule,as_o_S)
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 19cce226d1a8..44ef35d33956 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -123,9 +123,9 @@ libs-y		+= arch/arc/lib/ $(LIBGCC)
 boot		:= arch/arc/boot
 
 #default target for make without any arguments.
-KBUILD_IMAGE	:= bootpImage
+KBUILD_IMAGE	:= $(boot)/bootpImage
 
-all:	$(KBUILD_IMAGE)
+all:	bootpImage
 bootpImage: vmlinux
 
 boot_targets += uImage uImage.bin uImage.gz
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 6e1242da0159..4104a0839214 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -86,8 +86,6 @@ struct task_struct;
 #define TSK_K_BLINK(tsk)	TSK_K_REG(tsk, 4)
 #define TSK_K_FP(tsk)		TSK_K_REG(tsk, 0)
 
-#define thread_saved_pc(tsk)	TSK_K_BLINK(tsk)
-
 extern void start_thread(struct pt_regs * regs, unsigned long pc,
 			 unsigned long usp);
 
diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
index f50d02df78d5..b15bf6bc0e94 100644
--- a/arch/arc/include/uapi/asm/Kbuild
+++ b/arch/arc/include/uapi/asm/Kbuild
@@ -1,5 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-header-y += elf.h
-header-y += page.h
-header-y += cachectl.h
diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c
index 3e25e8d6486b..2e13683dfb24 100644
--- a/arch/arc/mm/mmap.c
+++ b/arch/arc/mm/mmap.c
@@ -65,7 +65,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4c1a35f15838..6491be556ddc 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1416,6 +1416,7 @@ choice
 	config VMSPLIT_3G
 		bool "3G/1G user/kernel split"
 	config VMSPLIT_3G_OPT
+		depends on !ARM_LPAE
 		bool "3G/1G user/kernel split (for full 1G low memory)"
 	config VMSPLIT_2G
 		bool "2G/2G user/kernel split"
@@ -1637,7 +1638,7 @@ config ARCH_SELECT_MEMORY_MODEL
 config HAVE_ARCH_PFN_VALID
 	def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
 
-config HAVE_GENERIC_RCU_GUP
+config HAVE_GENERIC_GUP
 	def_bool y
 	depends on ARM_LPAE
 
@@ -2061,6 +2062,23 @@ config EFI
 	  is only useful for kernels that may run on systems that have
 	  UEFI firmware.
 
+config DMI
+	bool "Enable support for SMBIOS (DMI) tables"
+	depends on EFI
+	default y
+	help
+	  This enables SMBIOS/DMI feature for systems.
+
+	  This option is only useful on systems that have UEFI firmware.
+	  However, even with this option, the resultant kernel should
+	  continue to boot on existing non-UEFI platforms.
+
+	  NOTE: This does *NOT* enable or encourage the use of DMI quirks,
+	  i.e., the the practice of identifying the platform via DMI to
+	  decide whether certain workarounds for buggy hardware and/or
+	  firmware need to be enabled. This would require the DMI subsystem
+	  to be enabled much earlier than we do on ARM, which is non-trivial.
+
 endmenu
 
 menu "CPU Power Management"
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index ab30cc634d02..65f4e2a4eb94 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -297,10 +297,11 @@ drivers-$(CONFIG_OPROFILE)      += arch/arm/oprofile/
 libs-y				:= arch/arm/lib/ $(libs-y)
 
 # Default target when executing plain make
+boot := arch/arm/boot
 ifeq ($(CONFIG_XIP_KERNEL),y)
-KBUILD_IMAGE := xipImage
+KBUILD_IMAGE := $(boot)/xipImage
 else
-KBUILD_IMAGE := zImage
+KBUILD_IMAGE := $(boot)/zImage
 endif
 
 # Build the DT binary blobs if we have OF configured
@@ -308,9 +309,8 @@ ifeq ($(CONFIG_USE_OF),y)
 KBUILD_DTBS := dtbs
 endif
 
-all:	$(KBUILD_IMAGE) $(KBUILD_DTBS)
+all:	$(notdir $(KBUILD_IMAGE)) $(KBUILD_DTBS)
 
-boot := arch/arm/boot
 
 archheaders:
 	$(Q)$(MAKE) $(build)=arch/arm/tools uapi
diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S
index 9d5dc4fda3c1..a17ca8d78656 100644
--- a/arch/arm/boot/compressed/efi-header.S
+++ b/arch/arm/boot/compressed/efi-header.S
@@ -17,14 +17,13 @@
 		@ there.
 		.inst	'M' | ('Z' << 8) | (0x1310 << 16)   @ tstne r0, #0x4d000
 #else
-		mov	r0, r0
+ AR_CLASS(	mov	r0, r0		)
+  M_CLASS(	nop.w			)
 #endif
 		.endm
 
 		.macro	__EFI_HEADER
 #ifdef CONFIG_EFI_STUB
-		b	__efi_start
-
 		.set	start_offset, __efi_start - start
 		.org	start + 0x3c
 		@
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 7c711ba61417..8a756870c238 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -130,19 +130,22 @@ start:
 		.rept	7
 		__nop
 		.endr
-   ARM(		mov	r0, r0		)
-   ARM(		b	1f		)
- THUMB(		badr	r12, 1f		)
- THUMB(		bx	r12		)
+#ifndef CONFIG_THUMB2_KERNEL
+		mov	r0, r0
+#else
+ AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
+  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
+		.thumb
+#endif
+		W(b)	1f
 
 		.word	_magic_sig	@ Magic numbers to help the loader
 		.word	_magic_start	@ absolute load/run zImage address
 		.word	_magic_end	@ zImage end address
 		.word	0x04030201	@ endianness flag
 
- THUMB(		.thumb			)
-1:		__EFI_HEADER
-
+		__EFI_HEADER
+1:
  ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
  AR_CLASS(	mrs	r9, cpsr	)
 #ifdef CONFIG_ARM_VIRT_EXT
diff --git a/arch/arm/boot/dts/am335x-sl50.dts b/arch/arm/boot/dts/am335x-sl50.dts
index c5d2589c55fc..fc864a855991 100644
--- a/arch/arm/boot/dts/am335x-sl50.dts
+++ b/arch/arm/boot/dts/am335x-sl50.dts
@@ -220,7 +220,7 @@
 
 	mmc1_pins: pinmux_mmc1_pins {
 		pinctrl-single,pins = <
-			AM33XX_IOPAD(0x960, PIN_INPUT | MUX_MODE7)		/* spi0_cs1.gpio0_6 */
+			AM33XX_IOPAD(0x96c, PIN_INPUT | MUX_MODE7)		/* uart0_rtsn.gpio1_9 */
 		>;
 	};
 
@@ -280,10 +280,6 @@
 			AM33XX_IOPAD(0x834, PIN_INPUT_PULLUP | MUX_MODE7)	/* nKbdReset - gpmc_ad13.gpio1_13 */
 			AM33XX_IOPAD(0x838, PIN_INPUT_PULLUP | MUX_MODE7)	/* nDispReset - gpmc_ad14.gpio1_14 */
 			AM33XX_IOPAD(0x844, PIN_INPUT_PULLUP | MUX_MODE7)	/* USB1_enPower - gpmc_a1.gpio1_17 */
-			/* AVR Programming - SPI Bus (bit bang) - Screen and Keyboard */
-			AM33XX_IOPAD(0x954, PIN_INPUT_PULLUP | MUX_MODE7)	/* Kbd/Disp/BattMOSI spi0_d0.gpio0_3 */
-			AM33XX_IOPAD(0x958, PIN_INPUT_PULLUP | MUX_MODE7)	/* Kbd/Disp/BattMISO spi0_d1.gpio0_4 */
-			AM33XX_IOPAD(0x950, PIN_INPUT_PULLUP | MUX_MODE7)	/* Kbd/Disp/BattSCLK spi0_clk.gpio0_2 */
 			/* PDI Bus - Battery system */
 			AM33XX_IOPAD(0x840, PIN_INPUT_PULLUP | MUX_MODE7)	/* nBattReset  gpmc_a0.gpio1_16 */
 			AM33XX_IOPAD(0x83c, PIN_INPUT_PULLUP | MUX_MODE7)	/* BattPDIData gpmc_ad15.gpio1_15 */
@@ -384,7 +380,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc1_pins>;
 	bus-width = <4>;
-	cd-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>;
+	cd-gpios = <&gpio1 9 GPIO_ACTIVE_LOW>;
 	vmmc-supply = <&vmmcsd_fixed>;
 };
 
diff --git a/arch/arm/boot/dts/bcm283x-rpi-smsc9512.dtsi b/arch/arm/boot/dts/bcm283x-rpi-smsc9512.dtsi
index 12c981e51134..9a0599f711ff 100644
--- a/arch/arm/boot/dts/bcm283x-rpi-smsc9512.dtsi
+++ b/arch/arm/boot/dts/bcm283x-rpi-smsc9512.dtsi
@@ -1,6 +1,6 @@
 / {
 	aliases {
-		ethernet = &ethernet;
+		ethernet0 = &ethernet;
 	};
 };
 
diff --git a/arch/arm/boot/dts/bcm283x-rpi-smsc9514.dtsi b/arch/arm/boot/dts/bcm283x-rpi-smsc9514.dtsi
index 3f0a56ebcf1f..dc7ae776db5f 100644
--- a/arch/arm/boot/dts/bcm283x-rpi-smsc9514.dtsi
+++ b/arch/arm/boot/dts/bcm283x-rpi-smsc9514.dtsi
@@ -1,6 +1,6 @@
 / {
 	aliases {
-		ethernet = &ethernet;
+		ethernet0 = &ethernet;
 	};
 };
 
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 35cea3fcaf5c..9444a9a9ba10 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -3,6 +3,11 @@
 #include <dt-bindings/clock/bcm2835-aux.h>
 #include <dt-bindings/gpio/gpio.h>
 
+/* firmware-provided startup stubs live here, where the secondary CPUs are
+ * spinning.
+ */
+/memreserve/ 0x00000000 0x00001000;
+
 /* This include file covers the common peripherals and configuration between
  * bcm2835 and bcm2836 implementations, leaving the CPU configuration to
  * bcm2835.dtsi and bcm2836.dtsi.
@@ -198,8 +203,8 @@
 				brcm,pins = <0 1>;
 				brcm,function = <BCM2835_FSEL_ALT0>;
 			};
-			i2c0_gpio32: i2c0_gpio32 {
-				brcm,pins = <32 34>;
+			i2c0_gpio28: i2c0_gpio28 {
+				brcm,pins = <28 29>;
 				brcm,function = <BCM2835_FSEL_ALT0>;
 			};
 			i2c0_gpio44: i2c0_gpio44 {
@@ -295,20 +300,28 @@
 			/* Separate from the uart0_gpio14 group
 			 * because it conflicts with spi1_gpio16, and
 			 * people often run uart0 on the two pins
-			 * without flow contrl.
+			 * without flow control.
 			 */
 			uart0_ctsrts_gpio16: uart0_ctsrts_gpio16 {
 				brcm,pins = <16 17>;
 				brcm,function = <BCM2835_FSEL_ALT3>;
 			};
-			uart0_gpio30: uart0_gpio30 {
+			uart0_ctsrts_gpio30: uart0_ctsrts_gpio30 {
 				brcm,pins = <30 31>;
 				brcm,function = <BCM2835_FSEL_ALT3>;
 			};
-			uart0_ctsrts_gpio32: uart0_ctsrts_gpio32 {
+			uart0_gpio32: uart0_gpio32 {
 				brcm,pins = <32 33>;
 				brcm,function = <BCM2835_FSEL_ALT3>;
 			};
+			uart0_gpio36: uart0_gpio36 {
+				brcm,pins = <36 37>;
+				brcm,function = <BCM2835_FSEL_ALT2>;
+			};
+			uart0_ctsrts_gpio38: uart0_ctsrts_gpio38 {
+				brcm,pins = <38 39>;
+				brcm,function = <BCM2835_FSEL_ALT2>;
+			};
 
 			uart1_gpio14: uart1_gpio14 {
 				brcm,pins = <14 15>;
@@ -326,10 +339,6 @@
 				brcm,pins = <30 31>;
 				brcm,function = <BCM2835_FSEL_ALT5>;
 			};
-			uart1_gpio36: uart1_gpio36 {
-				brcm,pins = <36 37 38 39>;
-				brcm,function = <BCM2835_FSEL_ALT2>;
-			};
 			uart1_gpio40: uart1_gpio40 {
 				brcm,pins = <40 41>;
 				brcm,function = <BCM2835_FSEL_ALT5>;
diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts
index 4bc4b575c99b..31a9e061ddd0 100644
--- a/arch/arm/boot/dts/dra7-evm.dts
+++ b/arch/arm/boot/dts/dra7-evm.dts
@@ -204,6 +204,8 @@
 	tps659038: tps659038@58 {
 		compatible = "ti,tps659038";
 		reg = <0x58>;
+		ti,palmas-override-powerhold;
+		ti,system-power-controller;
 
 		tps659038_pmic {
 			compatible = "ti,tps659038-pmic";
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 57892f264cea..e7144662af45 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -2017,4 +2017,8 @@
 	coefficients = <0 2000>;
 };
 
+&cpu_crit {
+	temperature = <120000>; /* milli Celsius */
+};
+
 /include/ "dra7xx-clocks.dtsi"
diff --git a/arch/arm/boot/dts/imx53-qsrb.dts b/arch/arm/boot/dts/imx53-qsrb.dts
index de2215832372..4e103a905dc9 100644
--- a/arch/arm/boot/dts/imx53-qsrb.dts
+++ b/arch/arm/boot/dts/imx53-qsrb.dts
@@ -23,7 +23,7 @@
 	imx53-qsrb {
 		pinctrl_pmic: pmicgrp {
 			fsl,pins = <
-				MX53_PAD_CSI0_DAT5__GPIO5_23	0x1e4 /* IRQ */
+				MX53_PAD_CSI0_DAT5__GPIO5_23	0x1c4 /* IRQ */
 			>;
 		};
 	};
diff --git a/arch/arm/boot/dts/imx6sx-sdb.dts b/arch/arm/boot/dts/imx6sx-sdb.dts
index 5bb8fd57e7f5..d71da30c9cff 100644
--- a/arch/arm/boot/dts/imx6sx-sdb.dts
+++ b/arch/arm/boot/dts/imx6sx-sdb.dts
@@ -12,23 +12,6 @@
 	model = "Freescale i.MX6 SoloX SDB RevB Board";
 };
 
-&cpu0 {
-	operating-points = <
-		/* kHz    uV */
-		996000  1250000
-		792000  1175000
-		396000  1175000
-		198000  1175000
-		>;
-	fsl,soc-operating-points = <
-		/* ARM kHz      SOC uV */
-		996000	1250000
-		792000	1175000
-		396000	1175000
-		198000  1175000
-	>;
-};
-
 &i2c1 {
 	clock-frequency = <100000>;
 	pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/imx6ul-14x14-evk.dts b/arch/arm/boot/dts/imx6ul-14x14-evk.dts
index f18e1f1d0ce2..d2be8aa3370b 100644
--- a/arch/arm/boot/dts/imx6ul-14x14-evk.dts
+++ b/arch/arm/boot/dts/imx6ul-14x14-evk.dts
@@ -120,10 +120,16 @@
 
 		ethphy0: ethernet-phy@2 {
 			reg = <2>;
+			micrel,led-mode = <1>;
+			clocks = <&clks IMX6UL_CLK_ENET_REF>;
+			clock-names = "rmii-ref";
 		};
 
 		ethphy1: ethernet-phy@1 {
 			reg = <1>;
+			micrel,led-mode = <1>;
+			clocks = <&clks IMX6UL_CLK_ENET2_REF>;
+			clock-names = "rmii-ref";
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/include/arm b/arch/arm/boot/dts/include/arm
deleted file mode 120000
index a96aa0ea9d8c..000000000000
--- a/arch/arm/boot/dts/include/arm
+++ /dev/null
@@ -1 +0,0 @@
-..
-\ No newline at end of file
diff --git a/arch/arm/boot/dts/include/arm64 b/arch/arm/boot/dts/include/arm64
deleted file mode 120000
index 074a835fca3e..000000000000
--- a/arch/arm/boot/dts/include/arm64
+++ /dev/null
@@ -1 +0,0 @@
-../../../../arm64/boot/dts
-\ No newline at end of file
diff --git a/arch/arm/boot/dts/include/dt-bindings b/arch/arm/boot/dts/include/dt-bindings
deleted file mode 120000
index 08c00e4972fa..000000000000
--- a/arch/arm/boot/dts/include/dt-bindings
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../include/dt-bindings
-\ No newline at end of file
diff --git a/arch/arm/boot/dts/keystone-k2l-netcp.dtsi b/arch/arm/boot/dts/keystone-k2l-netcp.dtsi
index b6f26824e83a..66f615a74118 100644
--- a/arch/arm/boot/dts/keystone-k2l-netcp.dtsi
+++ b/arch/arm/boot/dts/keystone-k2l-netcp.dtsi
@@ -137,8 +137,8 @@ netcp: netcp@26000000 {
 	/* NetCP address range */
 	ranges = <0 0x26000000 0x1000000>;
 
-	clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>, <&clkosr>;
-	clock-names = "pa_clk", "ethss_clk", "cpts", "osr_clk";
+	clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>;
+	clock-names = "pa_clk", "ethss_clk", "cpts";
 	dma-coherent;
 
 	ti,navigator-dmas = <&dma_gbe 0>,
diff --git a/arch/arm/boot/dts/keystone-k2l.dtsi b/arch/arm/boot/dts/keystone-k2l.dtsi
index b58e7ebc0919..148650406cf7 100644
--- a/arch/arm/boot/dts/keystone-k2l.dtsi
+++ b/arch/arm/boot/dts/keystone-k2l.dtsi
@@ -232,6 +232,14 @@
 			};
 		};
 
+		osr: sram@70000000 {
+			compatible = "mmio-sram";
+			reg = <0x70000000 0x10000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			clocks = <&clkosr>;
+		};
+
 		dspgpio0: keystone_dsp_gpio@02620240 {
 			compatible = "ti,keystone-dsp-gpio";
 			gpio-controller;
diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
index 08cce17a25a0..43e9364083de 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
@@ -249,9 +249,9 @@
 			OMAP3_CORE1_IOPAD(0x2110, PIN_INPUT | MUX_MODE0)   /* cam_xclka.cam_xclka */
 			OMAP3_CORE1_IOPAD(0x2112, PIN_INPUT | MUX_MODE0)   /* cam_pclk.cam_pclk */
 
-			OMAP3_CORE1_IOPAD(0x2114, PIN_INPUT | MUX_MODE0)   /* cam_d0.cam_d0 */
-			OMAP3_CORE1_IOPAD(0x2116, PIN_INPUT | MUX_MODE0)   /* cam_d1.cam_d1 */
-			OMAP3_CORE1_IOPAD(0x2118, PIN_INPUT | MUX_MODE0)   /* cam_d2.cam_d2 */
+			OMAP3_CORE1_IOPAD(0x2116, PIN_INPUT | MUX_MODE0)   /* cam_d0.cam_d0 */
+			OMAP3_CORE1_IOPAD(0x2118, PIN_INPUT | MUX_MODE0)   /* cam_d1.cam_d1 */
+			OMAP3_CORE1_IOPAD(0x211a, PIN_INPUT | MUX_MODE0)   /* cam_d2.cam_d2 */
 			OMAP3_CORE1_IOPAD(0x211c, PIN_INPUT | MUX_MODE0)   /* cam_d3.cam_d3 */
 			OMAP3_CORE1_IOPAD(0x211e, PIN_INPUT | MUX_MODE0)   /* cam_d4.cam_d4 */
 			OMAP3_CORE1_IOPAD(0x2120, PIN_INPUT | MUX_MODE0)   /* cam_d5.cam_d5 */
diff --git a/arch/arm/boot/dts/mt7623.dtsi b/arch/arm/boot/dts/mt7623.dtsi
index 402579ab70d2..3a9e9b6aea68 100644
--- a/arch/arm/boot/dts/mt7623.dtsi
+++ b/arch/arm/boot/dts/mt7623.dtsi
@@ -72,6 +72,8 @@
 			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>,
 			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>,
 			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+		clock-frequency = <13000000>;
+		arm,cpu-registers-not-fw-configured;
 	};
 
 	watchdog: watchdog@10007000 {
diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi
index b3a8b1f24499..9ec737069369 100644
--- a/arch/arm/boot/dts/omap3-gta04.dtsi
+++ b/arch/arm/boot/dts/omap3-gta04.dtsi
@@ -55,7 +55,8 @@
 		simple-audio-card,bitclock-master = <&telephony_link_master>;
 		simple-audio-card,frame-master = <&telephony_link_master>;
 		simple-audio-card,format = "i2s";
-
+		simple-audio-card,bitclock-inversion;
+		simple-audio-card,frame-inversion;
 		simple-audio-card,cpu {
 			sound-dai = <&mcbsp4>;
 		};
diff --git a/arch/arm/boot/dts/omap4-panda-a4.dts b/arch/arm/boot/dts/omap4-panda-a4.dts
index 78d363177762..f1a6476af371 100644
--- a/arch/arm/boot/dts/omap4-panda-a4.dts
+++ b/arch/arm/boot/dts/omap4-panda-a4.dts
@@ -13,7 +13,7 @@
 /* Pandaboard Rev A4+ have external pullups on SCL & SDA */
 &dss_hdmi_pins {
 	pinctrl-single,pins = <
-		OMAP4_IOPAD(0x09a, PIN_INPUT_PULLUP | MUX_MODE0)	/* hdmi_cec.hdmi_cec */
+		OMAP4_IOPAD(0x09a, PIN_INPUT | MUX_MODE0)		/* hdmi_cec.hdmi_cec */
 		OMAP4_IOPAD(0x09c, PIN_INPUT | MUX_MODE0)		/* hdmi_scl.hdmi_scl */
 		OMAP4_IOPAD(0x09e, PIN_INPUT | MUX_MODE0)		/* hdmi_sda.hdmi_sda */
 		>;
diff --git a/arch/arm/boot/dts/omap4-panda-es.dts b/arch/arm/boot/dts/omap4-panda-es.dts
index 119f8e657edc..940fe4f7c5f6 100644
--- a/arch/arm/boot/dts/omap4-panda-es.dts
+++ b/arch/arm/boot/dts/omap4-panda-es.dts
@@ -34,7 +34,7 @@
 /* PandaboardES has external pullups on SCL & SDA */
 &dss_hdmi_pins {
 	pinctrl-single,pins = <
-		OMAP4_IOPAD(0x09a, PIN_INPUT_PULLUP | MUX_MODE0)	/* hdmi_cec.hdmi_cec */
+		OMAP4_IOPAD(0x09a, PIN_INPUT | MUX_MODE0)		/* hdmi_cec.hdmi_cec */
 		OMAP4_IOPAD(0x09c, PIN_INPUT | MUX_MODE0)		/* hdmi_scl.hdmi_scl */
 		OMAP4_IOPAD(0x09e, PIN_INPUT | MUX_MODE0)		/* hdmi_sda.hdmi_sda */
 		>;
diff --git a/arch/arm/boot/dts/rk1108.dtsi b/arch/arm/boot/dts/rk1108.dtsi
index 6c8fc19d0ecd..1297924db6ad 100644
--- a/arch/arm/boot/dts/rk1108.dtsi
+++ b/arch/arm/boot/dts/rk1108.dtsi
@@ -41,7 +41,7 @@
 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
-#include <dt-bindings/clock/rk1108-cru.h>
+#include <dt-bindings/clock/rv1108-cru.h>
 #include <dt-bindings/pinctrl/rockchip.h>
 / {
 	#address-cells = <1>;
diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi
index 1aeeacb3a884..d4f600dbb7eb 100644
--- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi
+++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi
@@ -558,10 +558,11 @@
 		};
 
 		r_ccu: clock@1f01400 {
-			compatible = "allwinner,sun50i-a64-r-ccu";
+			compatible = "allwinner,sun8i-h3-r-ccu";
 			reg = <0x01f01400 0x100>;
-			clocks = <&osc24M>, <&osc32k>, <&iosc>;
-			clock-names = "hosc", "losc", "iosc";
+			clocks = <&osc24M>, <&osc32k>, <&iosc>,
+				 <&ccu 9>;
+			clock-names = "hosc", "losc", "iosc", "pll-periph";
 			#clock-cells = <1>;
 			#reset-cells = <1>;
 		};
diff --git a/arch/arm/boot/dts/versatile-pb.dts b/arch/arm/boot/dts/versatile-pb.dts
index 33a8eb28374e..06e2331f666d 100644
--- a/arch/arm/boot/dts/versatile-pb.dts
+++ b/arch/arm/boot/dts/versatile-pb.dts
@@ -1,4 +1,4 @@
-#include <versatile-ab.dts>
+#include "versatile-ab.dts"
 
 / {
 	model = "ARM Versatile PB";
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index cf062472e07b..2b913f17d50f 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -235,7 +235,7 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster)
 	return ret;
 }
 
-typedef void (*phys_reset_t)(unsigned long);
+typedef typeof(cpu_reset) phys_reset_t;
 
 void mcpm_cpu_power_down(void)
 {
@@ -300,7 +300,7 @@ void mcpm_cpu_power_down(void)
 	 * on the CPU.
 	 */
 	phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
-	phys_reset(__pa_symbol(mcpm_entry_point));
+	phys_reset(__pa_symbol(mcpm_entry_point), false);
 
 	/* should never get here */
 	BUG();
@@ -389,7 +389,7 @@ static int __init nocache_trampoline(unsigned long _arg)
 	__mcpm_cpu_down(cpu, cluster);
 
 	phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
-	phys_reset(__pa_symbol(mcpm_entry_point));
+	phys_reset(__pa_symbol(mcpm_entry_point), false);
 	BUG();
 }
 
diff --git a/arch/arm/configs/gemini_defconfig b/arch/arm/configs/gemini_defconfig
new file mode 100644
index 000000000000..d2d75fa664a6
--- /dev/null
+++ b/arch/arm/configs/gemini_defconfig
@@ -0,0 +1,68 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_USER_NS=y
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARCH_MULTI_V4=y
+# CONFIG_ARCH_MULTI_V7 is not set
+CONFIG_ARCH_GEMINI=y
+CONFIG_PCI=y
+CONFIG_PREEMPT=y
+CONFIG_AEABI=y
+CONFIG_CMDLINE="console=ttyS0,115200n8"
+CONFIG_KEXEC=y
+CONFIG_BINFMT_MISC=y
+CONFIG_PM=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_CFI_STAA=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_KEYBOARD_GPIO=y
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_GEMINI_WATCHDOG=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_FOTG210_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GEMINI=y
+CONFIG_DMADEVICES=y
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_ROMFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_DEBUG_FS=y
diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
index 36ec9c8f6e16..3234fe9bba6e 100644
--- a/arch/arm/include/asm/device.h
+++ b/arch/arm/include/asm/device.h
@@ -19,7 +19,8 @@ struct dev_archdata {
 #ifdef CONFIG_XEN
 	const struct dma_map_ops *dev_dma_ops;
 #endif
-	bool dma_coherent;
+	unsigned int dma_coherent:1;
+	unsigned int dma_ops_setup:1;
 };
 
 struct omap_device;
diff --git a/arch/arm/include/asm/dmi.h b/arch/arm/include/asm/dmi.h
new file mode 100644
index 000000000000..df2d2ff06f5b
--- /dev/null
+++ b/arch/arm/include/asm/dmi.h
@@ -0,0 +1,19 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_DMI_H
+#define __ASM_DMI_H
+
+#include <linux/io.h>
+#include <linux/slab.h>
+
+#define dmi_early_remap(x, l)		memremap(x, l, MEMREMAP_WB)
+#define dmi_early_unmap(x, l)		memunmap(x)
+#define dmi_remap(x, l)			memremap(x, l, MEMREMAP_WB)
+#define dmi_unmap(x)			memunmap(x)
+#define dmi_alloc(l)			kzalloc(l, GFP_KERNEL)
+
+#endif
diff --git a/arch/arm/include/asm/kvm_coproc.h b/arch/arm/include/asm/kvm_coproc.h
index 4917c2f7e459..e74ab0fbab79 100644
--- a/arch/arm/include/asm/kvm_coproc.h
+++ b/arch/arm/include/asm/kvm_coproc.h
@@ -31,7 +31,8 @@ void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table);
 int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
 
diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
index 302240c19a5a..a0d726a47c8a 100644
--- a/arch/arm/include/asm/pgtable-nommu.h
+++ b/arch/arm/include/asm/pgtable-nommu.h
@@ -66,6 +66,7 @@ typedef pte_t *pte_addr_t;
 #define pgprot_noncached(prot)	(prot)
 #define pgprot_writecombine(prot) (prot)
 #define pgprot_dmacoherent(prot) (prot)
+#define pgprot_device(prot)	(prot)
 
 
 /*
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 46a76cd6acb6..607f702c2d62 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -1,23 +1,6 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += auxvec.h
-header-y += byteorder.h
-header-y += fcntl.h
-header-y += hwcap.h
-header-y += ioctls.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += perf_regs.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += setup.h
-header-y += sigcontext.h
-header-y += signal.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += unistd.h
 genhdr-y += unistd-common.h
 genhdr-y += unistd-oabi.h
 genhdr-y += unistd-eabi.h
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index a88726359e5f..5e3c673fa3f4 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -196,13 +196,17 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS	8
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
 #define VGIC_LEVEL_INFO_LINE_LEVEL	0
 
-#define   KVM_DEV_ARM_VGIC_CTRL_INIT    0
+#define   KVM_DEV_ARM_VGIC_CTRL_INIT		0
+#define   KVM_DEV_ARM_ITS_SAVE_TABLES		1
+#define   KVM_DEV_ARM_ITS_RESTORE_TABLES	2
+#define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 80254b47dc34..3ff571c2c71c 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -40,8 +40,15 @@
 #ifdef CONFIG_MMU
 void *module_alloc(unsigned long size)
 {
-	void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-				GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+	gfp_t gfp_mask = GFP_KERNEL;
+	void *p;
+
+	/* Silence the initial allocation */
+	if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS))
+		gfp_mask |= __GFP_NOWARN;
+
+	p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+				gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
 				__builtin_return_address(0));
 	if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
 		return p;
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 32e1a9513dc7..4e80bf7420d4 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -315,7 +315,7 @@ static void __init cacheid_init(void)
 	if (arch >= CPU_ARCH_ARMv6) {
 		unsigned int cachetype = read_cpuid_cachetype();
 
-		if ((arch == CPU_ARCH_ARMv7M) && !cachetype) {
+		if ((arch == CPU_ARCH_ARMv7M) && !(cachetype & 0xf000f)) {
 			cacheid = 0;
 		} else if ((cachetype & (7 << 29)) == 4 << 29) {
 			/* ARMv7 register format */
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 572a8df1b766..c9a0a5299827 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -555,8 +555,7 @@ static DEFINE_RAW_SPINLOCK(stop_lock);
  */
 static void ipi_cpu_stop(unsigned int cpu)
 {
-	if (system_state == SYSTEM_BOOTING ||
-	    system_state == SYSTEM_RUNNING) {
+	if (system_state <= SYSTEM_RUNNING) {
 		raw_spin_lock(&stop_lock);
 		pr_crit("CPU%u: stopping\n", cpu);
 		dump_stack();
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 7b3670c2ae7b..d9beee652d36 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -18,9 +18,12 @@ KVM := ../../../virt/kvm
 kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
 
 obj-$(CONFIG_KVM_ARM_HOST) += hyp/
+
 obj-y += kvm-arm.o init.o interrupts.o
-obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o vgic-v3-coproc.o
+obj-y += handle_exit.o guest.o emulate.o reset.o
+obj-y += coproc.o coproc_a15.o coproc_a7.o   vgic-v3-coproc.o
+obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
+obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
 obj-y += $(KVM)/arm/aarch32.o
 
 obj-y += $(KVM)/arm/vgic/vgic.o
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 2c14b69511e9..6d1d2e26dfe5 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -32,6 +32,7 @@
 #include <asm/vfp.h>
 #include "../vfp/vfpinstr.h"
 
+#define CREATE_TRACE_POINTS
 #include "trace.h"
 #include "coproc.h"
 
@@ -111,12 +112,6 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 1;
 }
 
-int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-	kvm_inject_undefined(vcpu);
-	return 1;
-}
-
 static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
 {
 	/*
@@ -284,7 +279,7 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
  * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
  * all PM registers, which doesn't crash the guest kernel at least.
  */
-static bool pm_fake(struct kvm_vcpu *vcpu,
+static bool trap_raz_wi(struct kvm_vcpu *vcpu,
 		    const struct coproc_params *p,
 		    const struct coproc_reg *r)
 {
@@ -294,19 +289,19 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
 		return read_zero(vcpu, p);
 }
 
-#define access_pmcr pm_fake
-#define access_pmcntenset pm_fake
-#define access_pmcntenclr pm_fake
-#define access_pmovsr pm_fake
-#define access_pmselr pm_fake
-#define access_pmceid0 pm_fake
-#define access_pmceid1 pm_fake
-#define access_pmccntr pm_fake
-#define access_pmxevtyper pm_fake
-#define access_pmxevcntr pm_fake
-#define access_pmuserenr pm_fake
-#define access_pmintenset pm_fake
-#define access_pmintenclr pm_fake
+#define access_pmcr trap_raz_wi
+#define access_pmcntenset trap_raz_wi
+#define access_pmcntenclr trap_raz_wi
+#define access_pmovsr trap_raz_wi
+#define access_pmselr trap_raz_wi
+#define access_pmceid0 trap_raz_wi
+#define access_pmceid1 trap_raz_wi
+#define access_pmccntr trap_raz_wi
+#define access_pmxevtyper trap_raz_wi
+#define access_pmxevcntr trap_raz_wi
+#define access_pmuserenr trap_raz_wi
+#define access_pmintenset trap_raz_wi
+#define access_pmintenclr trap_raz_wi
 
 /* Architected CP15 registers.
  * CRn denotes the primary register number, but is copied to the CRm in the
@@ -532,12 +527,7 @@ static int emulate_cp15(struct kvm_vcpu *vcpu,
 	return 1;
 }
 
-/**
- * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
- * @vcpu: The VCPU pointer
- * @run:  The kvm_run struct
- */
-int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static struct coproc_params decode_64bit_hsr(struct kvm_vcpu *vcpu)
 {
 	struct coproc_params params;
 
@@ -551,9 +541,38 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
 	params.CRm = 0;
 
+	return params;
+}
+
+/**
+ * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct coproc_params params = decode_64bit_hsr(vcpu);
+
 	return emulate_cp15(vcpu, &params);
 }
 
+/**
+ * kvm_handle_cp14_64 -- handles a mrrc/mcrr trap on a guest CP14 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct coproc_params params = decode_64bit_hsr(vcpu);
+
+	/* raz_wi cp14 */
+	trap_raz_wi(vcpu, &params, NULL);
+
+	/* handled */
+	kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+	return 1;
+}
+
 static void reset_coproc_regs(struct kvm_vcpu *vcpu,
 			      const struct coproc_reg *table, size_t num)
 {
@@ -564,12 +583,7 @@ static void reset_coproc_regs(struct kvm_vcpu *vcpu,
 			table[i].reset(vcpu, &table[i]);
 }
 
-/**
- * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
- * @vcpu: The VCPU pointer
- * @run:  The kvm_run struct
- */
-int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static struct coproc_params decode_32bit_hsr(struct kvm_vcpu *vcpu)
 {
 	struct coproc_params params;
 
@@ -583,9 +597,37 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	params.Op2 = (kvm_vcpu_get_hsr(vcpu) >> 17) & 0x7;
 	params.Rt2 = 0;
 
+	return params;
+}
+
+/**
+ * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct coproc_params params = decode_32bit_hsr(vcpu);
 	return emulate_cp15(vcpu, &params);
 }
 
+/**
+ * kvm_handle_cp14_32 -- handles a mrc/mcr trap on a guest CP14 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct coproc_params params = decode_32bit_hsr(vcpu);
+
+	/* raz_wi cp14 */
+	trap_raz_wi(vcpu, &params, NULL);
+
+	/* handled */
+	kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+	return 1;
+}
+
 /******************************************************************************
  * Userspace API
  *****************************************************************************/
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 5fd7968cdae9..f86a9aaef462 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -95,9 +95,9 @@ static exit_handle_fn arm_exit_handlers[] = {
 	[HSR_EC_WFI]		= kvm_handle_wfx,
 	[HSR_EC_CP15_32]	= kvm_handle_cp15_32,
 	[HSR_EC_CP15_64]	= kvm_handle_cp15_64,
-	[HSR_EC_CP14_MR]	= kvm_handle_cp14_access,
+	[HSR_EC_CP14_MR]	= kvm_handle_cp14_32,
 	[HSR_EC_CP14_LS]	= kvm_handle_cp14_load_store,
-	[HSR_EC_CP14_64]	= kvm_handle_cp14_access,
+	[HSR_EC_CP14_64]	= kvm_handle_cp14_64,
 	[HSR_EC_CP_0_13]	= kvm_handle_cp_0_13_access,
 	[HSR_EC_CP10_ID]	= kvm_handle_cp10_id,
 	[HSR_EC_HVC]		= handle_hvc,
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
index 3023bb530edf..8679405b0b2b 100644
--- a/arch/arm/kvm/hyp/Makefile
+++ b/arch/arm/kvm/hyp/Makefile
@@ -2,6 +2,8 @@
 # Makefile for Kernel-based Virtual Machine module, HYP part
 #
 
+ccflags-y += -fno-stack-protector
+
 KVM=../../../../virt/kvm
 
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index 92678b7bd046..624a510d31df 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -48,7 +48,9 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host)
 	write_sysreg(HSTR_T(15), HSTR);
 	write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR);
 	val = read_sysreg(HDCR);
-	write_sysreg(val | HDCR_TPM | HDCR_TPMCR, HDCR);
+	val |= HDCR_TPM | HDCR_TPMCR; /* trap performance monitors */
+	val |= HDCR_TDRA | HDCR_TDOSA | HDCR_TDA; /* trap debug regs */
+	write_sysreg(val, HDCR);
 }
 
 static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 570ed4a9c261..5386528665b5 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -104,7 +104,6 @@ __do_hyp_init:
 	@  - Write permission implies XN: disabled
 	@  - Instruction cache: enabled
 	@  - Data/Unified cache: enabled
-	@  - Memory alignment checks: enabled
 	@  - MMU: enabled (this code must be run from an identity mapping)
 	mrc	p15, 4, r0, c1, c0, 0	@ HSCR
 	ldr	r2, =HSCTLR_MASK
@@ -112,8 +111,8 @@ __do_hyp_init:
 	mrc	p15, 0, r1, c1, c0, 0	@ SCTLR
 	ldr	r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
 	and	r1, r1, r2
- ARM(	ldr	r2, =(HSCTLR_M | HSCTLR_A)			)
- THUMB(	ldr	r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE)		)
+ ARM(	ldr	r2, =(HSCTLR_M)					)
+ THUMB(	ldr	r2, =(HSCTLR_M | HSCTLR_TE)			)
 	orr	r1, r1, r2
 	orr	r0, r0, r1
 	mcr	p15, 4, r0, c1, c0, 0	@ HSCR
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index c25a88598eb0..b0d10648c486 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -1,138 +1,11 @@
-#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_KVM_H
+#if !defined(_TRACE_ARM_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ARM_KVM_H
 
 #include <linux/tracepoint.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm
 
-/*
- * Tracepoints for entry/exit to guest
- */
-TRACE_EVENT(kvm_entry,
-	TP_PROTO(unsigned long vcpu_pc),
-	TP_ARGS(vcpu_pc),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	vcpu_pc		)
-	),
-
-	TP_fast_assign(
-		__entry->vcpu_pc		= vcpu_pc;
-	),
-
-	TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
-);
-
-TRACE_EVENT(kvm_exit,
-	TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc),
-	TP_ARGS(idx, exit_reason, vcpu_pc),
-
-	TP_STRUCT__entry(
-		__field(	int,		idx		)
-		__field(	unsigned int,	exit_reason	)
-		__field(	unsigned long,	vcpu_pc		)
-	),
-
-	TP_fast_assign(
-		__entry->idx			= idx;
-		__entry->exit_reason		= exit_reason;
-		__entry->vcpu_pc		= vcpu_pc;
-	),
-
-	TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
-		  __print_symbolic(__entry->idx, kvm_arm_exception_type),
-		  __entry->exit_reason,
-		  __print_symbolic(__entry->exit_reason, kvm_arm_exception_class),
-		  __entry->vcpu_pc)
-);
-
-TRACE_EVENT(kvm_guest_fault,
-	TP_PROTO(unsigned long vcpu_pc, unsigned long hsr,
-		 unsigned long hxfar,
-		 unsigned long long ipa),
-	TP_ARGS(vcpu_pc, hsr, hxfar, ipa),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	vcpu_pc		)
-		__field(	unsigned long,	hsr		)
-		__field(	unsigned long,	hxfar		)
-		__field(   unsigned long long,	ipa		)
-	),
-
-	TP_fast_assign(
-		__entry->vcpu_pc		= vcpu_pc;
-		__entry->hsr			= hsr;
-		__entry->hxfar			= hxfar;
-		__entry->ipa			= ipa;
-	),
-
-	TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
-		  __entry->ipa, __entry->hsr,
-		  __entry->hxfar, __entry->vcpu_pc)
-);
-
-TRACE_EVENT(kvm_access_fault,
-	TP_PROTO(unsigned long ipa),
-	TP_ARGS(ipa),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	ipa		)
-	),
-
-	TP_fast_assign(
-		__entry->ipa		= ipa;
-	),
-
-	TP_printk("IPA: %lx", __entry->ipa)
-);
-
-TRACE_EVENT(kvm_irq_line,
-	TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
-	TP_ARGS(type, vcpu_idx, irq_num, level),
-
-	TP_STRUCT__entry(
-		__field(	unsigned int,	type		)
-		__field(	int,		vcpu_idx	)
-		__field(	int,		irq_num		)
-		__field(	int,		level		)
-	),
-
-	TP_fast_assign(
-		__entry->type		= type;
-		__entry->vcpu_idx	= vcpu_idx;
-		__entry->irq_num	= irq_num;
-		__entry->level		= level;
-	),
-
-	TP_printk("Inject %s interrupt (%d), vcpu->idx: %d, num: %d, level: %d",
-		  (__entry->type == KVM_ARM_IRQ_TYPE_CPU) ? "CPU" :
-		  (__entry->type == KVM_ARM_IRQ_TYPE_PPI) ? "VGIC PPI" :
-		  (__entry->type == KVM_ARM_IRQ_TYPE_SPI) ? "VGIC SPI" : "UNKNOWN",
-		  __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level)
-);
-
-TRACE_EVENT(kvm_mmio_emulate,
-	TP_PROTO(unsigned long vcpu_pc, unsigned long instr,
-		 unsigned long cpsr),
-	TP_ARGS(vcpu_pc, instr, cpsr),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	vcpu_pc		)
-		__field(	unsigned long,	instr		)
-		__field(	unsigned long,	cpsr		)
-	),
-
-	TP_fast_assign(
-		__entry->vcpu_pc		= vcpu_pc;
-		__entry->instr			= instr;
-		__entry->cpsr			= cpsr;
-	),
-
-	TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)",
-		  __entry->vcpu_pc, __entry->instr, __entry->cpsr)
-);
-
 /* Architecturally implementation defined CP15 register access */
 TRACE_EVENT(kvm_emulate_cp15_imp,
 	TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn,
@@ -181,87 +54,6 @@ TRACE_EVENT(kvm_wfx,
 		__entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
 );
 
-TRACE_EVENT(kvm_unmap_hva,
-	TP_PROTO(unsigned long hva),
-	TP_ARGS(hva),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	hva		)
-	),
-
-	TP_fast_assign(
-		__entry->hva		= hva;
-	),
-
-	TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva)
-);
-
-TRACE_EVENT(kvm_unmap_hva_range,
-	TP_PROTO(unsigned long start, unsigned long end),
-	TP_ARGS(start, end),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	start		)
-		__field(	unsigned long,	end		)
-	),
-
-	TP_fast_assign(
-		__entry->start		= start;
-		__entry->end		= end;
-	),
-
-	TP_printk("mmu notifier unmap range: %#08lx -- %#08lx",
-		  __entry->start, __entry->end)
-);
-
-TRACE_EVENT(kvm_set_spte_hva,
-	TP_PROTO(unsigned long hva),
-	TP_ARGS(hva),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	hva		)
-	),
-
-	TP_fast_assign(
-		__entry->hva		= hva;
-	),
-
-	TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
-);
-
-TRACE_EVENT(kvm_age_hva,
-	TP_PROTO(unsigned long start, unsigned long end),
-	TP_ARGS(start, end),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	start		)
-		__field(	unsigned long,	end		)
-	),
-
-	TP_fast_assign(
-		__entry->start		= start;
-		__entry->end		= end;
-	),
-
-	TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
-		  __entry->start, __entry->end)
-);
-
-TRACE_EVENT(kvm_test_age_hva,
-	TP_PROTO(unsigned long hva),
-	TP_ARGS(hva),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	hva		)
-	),
-
-	TP_fast_assign(
-		__entry->hva		= hva;
-	),
-
-	TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
-);
-
 TRACE_EVENT(kvm_hvc,
 	TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
 	TP_ARGS(vcpu_pc, r0, imm),
@@ -282,49 +74,10 @@ TRACE_EVENT(kvm_hvc,
 		  __entry->vcpu_pc, __entry->r0, __entry->imm)
 );
 
-TRACE_EVENT(kvm_set_way_flush,
-	    TP_PROTO(unsigned long vcpu_pc, bool cache),
-	    TP_ARGS(vcpu_pc, cache),
-
-	    TP_STRUCT__entry(
-		    __field(	unsigned long,	vcpu_pc		)
-		    __field(	bool,		cache		)
-	    ),
-
-	    TP_fast_assign(
-		    __entry->vcpu_pc		= vcpu_pc;
-		    __entry->cache		= cache;
-	    ),
-
-	    TP_printk("S/W flush at 0x%016lx (cache %s)",
-		      __entry->vcpu_pc, __entry->cache ? "on" : "off")
-);
-
-TRACE_EVENT(kvm_toggle_cache,
-	    TP_PROTO(unsigned long vcpu_pc, bool was, bool now),
-	    TP_ARGS(vcpu_pc, was, now),
-
-	    TP_STRUCT__entry(
-		    __field(	unsigned long,	vcpu_pc		)
-		    __field(	bool,		was		)
-		    __field(	bool,		now		)
-	    ),
-
-	    TP_fast_assign(
-		    __entry->vcpu_pc		= vcpu_pc;
-		    __entry->was		= was;
-		    __entry->now		= now;
-	    ),
-
-	    TP_printk("VM op at 0x%016lx (cache was %s, now %s)",
-		      __entry->vcpu_pc, __entry->was ? "on" : "off",
-		      __entry->now ? "on" : "off")
-);
-
-#endif /* _TRACE_KVM_H */
+#endif /* _TRACE_ARM_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH arch/arm/kvm
+#define TRACE_INCLUDE_PATH .
 #undef TRACE_INCLUDE_FILE
 #define TRACE_INCLUDE_FILE trace
 
diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index 841e924143f9..cbd959b73654 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -1,6 +1,7 @@
 menuconfig ARCH_AT91
 	bool "Atmel SoCs"
 	depends on ARCH_MULTI_V4T || ARCH_MULTI_V5 || ARCH_MULTI_V7
+	select ARM_CPU_SUSPEND if PM
 	select COMMON_CLK_AT91
 	select GPIOLIB
 	select PINCTRL
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 2cd27c830ab6..283e79ab587d 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -335,7 +335,7 @@ static const struct ramc_info ramc_infos[] __initconst = {
 	{ .idle = sama5d3_ddr_standby, .memctrl = AT91_MEMCTRL_DDRSDR},
 };
 
-static const struct of_device_id const ramc_ids[] __initconst = {
+static const struct of_device_id ramc_ids[] __initconst = {
 	{ .compatible = "atmel,at91rm9200-sdramc", .data = &ramc_infos[0] },
 	{ .compatible = "atmel,at91sam9260-sdramc", .data = &ramc_infos[1] },
 	{ .compatible = "atmel,at91sam9g45-ddramc", .data = &ramc_infos[2] },
diff --git a/arch/arm/mach-bcm/bcm_kona_smc.c b/arch/arm/mach-bcm/bcm_kona_smc.c
index cf3f8658f0e5..a55a7ecf146a 100644
--- a/arch/arm/mach-bcm/bcm_kona_smc.c
+++ b/arch/arm/mach-bcm/bcm_kona_smc.c
@@ -33,7 +33,7 @@ struct bcm_kona_smc_data {
 	unsigned result;
 };
 
-static const struct of_device_id const bcm_kona_smc_ids[] __initconst = {
+static const struct of_device_id bcm_kona_smc_ids[] __initconst = {
 	{.compatible = "brcm,kona-smc"},
 	{.compatible = "bcm,kona-smc"}, /* deprecated name */
 	{},
diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c
index 03da3813f1ab..7d5a44a06648 100644
--- a/arch/arm/mach-cns3xxx/core.c
+++ b/arch/arm/mach-cns3xxx/core.c
@@ -346,7 +346,7 @@ static struct usb_ohci_pdata cns3xxx_usb_ohci_pdata = {
 	.power_off	= csn3xxx_usb_power_off,
 };
 
-static const struct of_dev_auxdata const cns3xxx_auxdata[] __initconst = {
+static const struct of_dev_auxdata cns3xxx_auxdata[] __initconst = {
 	{ "intel,usb-ehci", CNS3XXX_USB_BASE, "ehci-platform", &cns3xxx_usb_ehci_pdata },
 	{ "intel,usb-ohci", CNS3XXX_USB_OHCI_BASE, "ohci-platform", &cns3xxx_usb_ohci_pdata },
 	{ "cavium,cns3420-ahci", CNS3XXX_SATA2_BASE, "ahci", NULL },
diff --git a/arch/arm/mach-davinci/pm.c b/arch/arm/mach-davinci/pm.c
index efb80354f303..b5cc05dc2cb2 100644
--- a/arch/arm/mach-davinci/pm.c
+++ b/arch/arm/mach-davinci/pm.c
@@ -153,7 +153,8 @@ int __init davinci_pm_init(void)
 	davinci_sram_suspend = sram_alloc(davinci_cpu_suspend_sz, NULL);
 	if (!davinci_sram_suspend) {
 		pr_err("PM: cannot allocate SRAM memory\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto no_sram_mem;
 	}
 
 	davinci_sram_push(davinci_sram_suspend, davinci_cpu_suspend,
@@ -161,6 +162,10 @@ int __init davinci_pm_init(void)
 
 	suspend_set_ops(&davinci_pm_ops);
 
+	return 0;
+
+no_sram_mem:
+	iounmap(pm_config.ddrpsc_reg_base);
 no_ddrpsc_mem:
 	iounmap(pm_config.ddrpll_reg_base);
 no_ddrpll_mem:
diff --git a/arch/arm/mach-omap2/clkt2xxx_dpllcore.c b/arch/arm/mach-omap2/clkt2xxx_dpllcore.c
index 59cf310bc1e9..e8d417309f33 100644
--- a/arch/arm/mach-omap2/clkt2xxx_dpllcore.c
+++ b/arch/arm/mach-omap2/clkt2xxx_dpllcore.c
@@ -138,7 +138,8 @@ int omap2_reprogram_dpllcore(struct clk_hw *hw, unsigned long rate,
 		if (!dd)
 			return -EINVAL;
 
-		tmpset.cm_clksel1_pll = readl_relaxed(dd->mult_div1_reg);
+		tmpset.cm_clksel1_pll =
+			omap_clk_ll_ops.clk_readl(&dd->mult_div1_reg);
 		tmpset.cm_clksel1_pll &= ~(dd->mult_mask |
 					   dd->div1_mask);
 		div = ((curr_prcm_set->xtal_speed / 1000000) - 1);
diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c
index 1270afdcacdf..42881f21cede 100644
--- a/arch/arm/mach-omap2/clock.c
+++ b/arch/arm/mach-omap2/clock.c
@@ -54,9 +54,10 @@ u16 cpu_mask;
 #define OMAP3PLUS_DPLL_FINT_MIN		32000
 #define OMAP3PLUS_DPLL_FINT_MAX		52000000
 
-static struct ti_clk_ll_ops omap_clk_ll_ops = {
+struct ti_clk_ll_ops omap_clk_ll_ops = {
 	.clkdm_clk_enable = clkdm_clk_enable,
 	.clkdm_clk_disable = clkdm_clk_disable,
+	.clkdm_lookup = clkdm_lookup,
 	.cm_wait_module_ready = omap_cm_wait_module_ready,
 	.cm_split_idlest_reg = cm_split_idlest_reg,
 };
@@ -78,38 +79,6 @@ int __init omap2_clk_setup_ll_ops(void)
  * OMAP2+ specific clock functions
  */
 
-/* Public functions */
-
-/**
- * omap2_init_clk_clkdm - look up a clockdomain name, store pointer in clk
- * @clk: OMAP clock struct ptr to use
- *
- * Convert a clockdomain name stored in a struct clk 'clk' into a
- * clockdomain pointer, and save it into the struct clk.  Intended to be
- * called during clk_register().  No return value.
- */
-void omap2_init_clk_clkdm(struct clk_hw *hw)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	struct clockdomain *clkdm;
-	const char *clk_name;
-
-	if (!clk->clkdm_name)
-		return;
-
-	clk_name = __clk_get_name(hw->clk);
-
-	clkdm = clkdm_lookup(clk->clkdm_name);
-	if (clkdm) {
-		pr_debug("clock: associated clk %s to clkdm %s\n",
-			 clk_name, clk->clkdm_name);
-		clk->clkdm = clkdm;
-	} else {
-		pr_debug("clock: could not associate clk %s to clkdm %s\n",
-			 clk_name, clk->clkdm_name);
-	}
-}
-
 /**
  * ti_clk_init_features - init clock features struct for the SoC
  *
diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h
index 4e66295dca25..cf45550197e6 100644
--- a/arch/arm/mach-omap2/clock.h
+++ b/arch/arm/mach-omap2/clock.h
@@ -64,6 +64,8 @@
 #define OMAP4XXX_EN_DPLL_FRBYPASS		0x6
 #define OMAP4XXX_EN_DPLL_LOCKED			0x7
 
+extern struct ti_clk_ll_ops omap_clk_ll_ops;
+
 extern u16 cpu_mask;
 
 extern const struct clkops clkops_omap2_dflt_wait;
diff --git a/arch/arm/mach-omap2/cm.h b/arch/arm/mach-omap2/cm.h
index 1fe3e6b833d2..de75cbcdc9d1 100644
--- a/arch/arm/mach-omap2/cm.h
+++ b/arch/arm/mach-omap2/cm.h
@@ -23,6 +23,7 @@
 #define MAX_MODULE_READY_TIME		2000
 
 # ifndef __ASSEMBLER__
+#include <linux/clk/ti.h>
 extern void __iomem *cm_base;
 extern void __iomem *cm2_base;
 extern void omap2_set_globals_cm(void __iomem *cm, void __iomem *cm2);
@@ -50,7 +51,7 @@ extern void omap2_set_globals_cm(void __iomem *cm, void __iomem *cm2);
  * @module_disable: ptr to the SoC CM-specific module_disable impl
  */
 struct cm_ll_data {
-	int (*split_idlest_reg)(void __iomem *idlest_reg, s16 *prcm_inst,
+	int (*split_idlest_reg)(struct clk_omap_reg *idlest_reg, s16 *prcm_inst,
 				u8 *idlest_reg_id);
 	int (*wait_module_ready)(u8 part, s16 prcm_mod, u16 idlest_reg,
 				 u8 idlest_shift);
@@ -60,7 +61,7 @@ struct cm_ll_data {
 	void (*module_disable)(u8 part, u16 inst, u16 clkctrl_offs);
 };
 
-extern int cm_split_idlest_reg(void __iomem *idlest_reg, s16 *prcm_inst,
+extern int cm_split_idlest_reg(struct clk_omap_reg *idlest_reg, s16 *prcm_inst,
 			       u8 *idlest_reg_id);
 int omap_cm_wait_module_ready(u8 part, s16 prcm_mod, u16 idlest_reg,
 			      u8 idlest_shift);
diff --git a/arch/arm/mach-omap2/cm2xxx.c b/arch/arm/mach-omap2/cm2xxx.c
index 3e5fd3587eb1..cd90b4c6a06b 100644
--- a/arch/arm/mach-omap2/cm2xxx.c
+++ b/arch/arm/mach-omap2/cm2xxx.c
@@ -204,7 +204,7 @@ void omap2xxx_cm_apll96_disable(void)
  * XXX This function is only needed until absolute register addresses are
  * removed from the OMAP struct clk records.
  */
-static int omap2xxx_cm_split_idlest_reg(void __iomem *idlest_reg,
+static int omap2xxx_cm_split_idlest_reg(struct clk_omap_reg *idlest_reg,
 					s16 *prcm_inst,
 					u8 *idlest_reg_id)
 {
@@ -212,10 +212,7 @@ static int omap2xxx_cm_split_idlest_reg(void __iomem *idlest_reg,
 	u8 idlest_offs;
 	int i;
 
-	if (idlest_reg < cm_base || idlest_reg > (cm_base + 0x0fff))
-		return -EINVAL;
-
-	idlest_offs = (unsigned long)idlest_reg & 0xff;
+	idlest_offs = idlest_reg->offset & 0xff;
 	for (i = 0; i < ARRAY_SIZE(omap2xxx_cm_idlest_offs); i++) {
 		if (idlest_offs == omap2xxx_cm_idlest_offs[i]) {
 			*idlest_reg_id = i + 1;
@@ -226,7 +223,7 @@ static int omap2xxx_cm_split_idlest_reg(void __iomem *idlest_reg,
 	if (i == ARRAY_SIZE(omap2xxx_cm_idlest_offs))
 		return -EINVAL;
 
-	offs = idlest_reg - cm_base;
+	offs = idlest_reg->offset;
 	offs &= 0xff00;
 	*prcm_inst = offs;
 
diff --git a/arch/arm/mach-omap2/cm3xxx.c b/arch/arm/mach-omap2/cm3xxx.c
index d91ae8206d1e..55b046a719dc 100644
--- a/arch/arm/mach-omap2/cm3xxx.c
+++ b/arch/arm/mach-omap2/cm3xxx.c
@@ -118,7 +118,7 @@ static int omap3xxx_cm_wait_module_ready(u8 part, s16 prcm_mod, u16 idlest_id,
  * XXX This function is only needed until absolute register addresses are
  * removed from the OMAP struct clk records.
  */
-static int omap3xxx_cm_split_idlest_reg(void __iomem *idlest_reg,
+static int omap3xxx_cm_split_idlest_reg(struct clk_omap_reg *idlest_reg,
 					s16 *prcm_inst,
 					u8 *idlest_reg_id)
 {
@@ -126,11 +126,7 @@ static int omap3xxx_cm_split_idlest_reg(void __iomem *idlest_reg,
 	u8 idlest_offs;
 	int i;
 
-	if (idlest_reg < (cm_base + OMAP3430_IVA2_MOD) ||
-	    idlest_reg > (cm_base + 0x1ffff))
-		return -EINVAL;
-
-	idlest_offs = (unsigned long)idlest_reg & 0xff;
+	idlest_offs = idlest_reg->offset & 0xff;
 	for (i = 0; i < ARRAY_SIZE(omap3xxx_cm_idlest_offs); i++) {
 		if (idlest_offs == omap3xxx_cm_idlest_offs[i]) {
 			*idlest_reg_id = i + 1;
@@ -141,7 +137,7 @@ static int omap3xxx_cm_split_idlest_reg(void __iomem *idlest_reg,
 	if (i == ARRAY_SIZE(omap3xxx_cm_idlest_offs))
 		return -EINVAL;
 
-	offs = idlest_reg - cm_base;
+	offs = idlest_reg->offset;
 	offs &= 0xff00;
 	*prcm_inst = offs;
 
diff --git a/arch/arm/mach-omap2/cm_common.c b/arch/arm/mach-omap2/cm_common.c
index 23e8bcec34e3..bbe41f4c9dc8 100644
--- a/arch/arm/mach-omap2/cm_common.c
+++ b/arch/arm/mach-omap2/cm_common.c
@@ -65,7 +65,7 @@ void __init omap2_set_globals_cm(void __iomem *cm, void __iomem *cm2)
  * or 0 upon success.  XXX This function is only needed until absolute
  * register addresses are removed from the OMAP struct clk records.
  */
-int cm_split_idlest_reg(void __iomem *idlest_reg, s16 *prcm_inst,
+int cm_split_idlest_reg(struct clk_omap_reg *idlest_reg, s16 *prcm_inst,
 			u8 *idlest_reg_id)
 {
 	if (!cm_ll_data->split_idlest_reg) {
diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index 3089d3bfa19b..8cc6338fcb12 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -266,11 +266,12 @@ extern int omap4_cpu_kill(unsigned int cpu);
 extern const struct smp_operations omap4_smp_ops;
 #endif
 
+extern u32 omap4_get_cpu1_ns_pa_addr(void);
+
 #if defined(CONFIG_SMP) && defined(CONFIG_PM)
 extern int omap4_mpuss_init(void);
 extern int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state);
 extern int omap4_hotplug_cpu(unsigned int cpu, unsigned int power_state);
-extern u32 omap4_get_cpu1_ns_pa_addr(void);
 #else
 static inline int omap4_enter_lowpower(unsigned int cpu,
 					unsigned int power_state)
diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
index 03ec6d307c82..4cfc4f9b2c69 100644
--- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c
+++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
@@ -213,11 +213,6 @@ static void __init save_l2x0_context(void)
 {}
 #endif
 
-u32 omap4_get_cpu1_ns_pa_addr(void)
-{
-	return old_cpu1_ns_pa_addr;
-}
-
 /**
  * omap4_enter_lowpower: OMAP4 MPUSS Low Power Entry Function
  * The purpose of this function is to manage low power programming
@@ -457,6 +452,11 @@ int __init omap4_mpuss_init(void)
 
 #endif
 
+u32 omap4_get_cpu1_ns_pa_addr(void)
+{
+	return old_cpu1_ns_pa_addr;
+}
+
 /*
  * For kexec, we must set CPU1_WAKEUP_NS_PA_ADDR to point to
  * current kernel's secondary_startup() early before
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index 3faf454ba487..33e4953c61a8 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -306,7 +306,6 @@ static void __init omap4_smp_maybe_reset_cpu1(struct omap_smp_config *c)
 
 	cpu1_startup_pa = readl_relaxed(cfg.wakeupgen_base +
 					OMAP_AUX_CORE_BOOT_1);
-	cpu1_ns_pa_addr = omap4_get_cpu1_ns_pa_addr();
 
 	/* Did the configured secondary_startup() get overwritten? */
 	if (!omap4_smp_cpu1_startup_valid(cpu1_startup_pa))
@@ -316,9 +315,13 @@ static void __init omap4_smp_maybe_reset_cpu1(struct omap_smp_config *c)
 	 * If omap4 or 5 has NS_PA_ADDR configured, CPU1 may be in a
 	 * deeper idle state in WFI and will wake to an invalid address.
 	 */
-	if ((soc_is_omap44xx() || soc_is_omap54xx()) &&
-	    !omap4_smp_cpu1_startup_valid(cpu1_ns_pa_addr))
-		needs_reset = true;
+	if ((soc_is_omap44xx() || soc_is_omap54xx())) {
+		cpu1_ns_pa_addr = omap4_get_cpu1_ns_pa_addr();
+		if (!omap4_smp_cpu1_startup_valid(cpu1_ns_pa_addr))
+			needs_reset = true;
+	} else {
+		cpu1_ns_pa_addr = 0;
+	}
 
 	if (!needs_reset || !c->cpu1_rstctrl_va)
 		return;
diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c
index 2b138b65129a..dc11841ca334 100644
--- a/arch/arm/mach-omap2/prm_common.c
+++ b/arch/arm/mach-omap2/prm_common.c
@@ -711,7 +711,7 @@ static struct omap_prcm_init_data scrm_data __initdata = {
 };
 #endif
 
-static const struct of_device_id const omap_prcm_dt_match_table[] __initconst = {
+static const struct of_device_id omap_prcm_dt_match_table[] __initconst = {
 #ifdef CONFIG_SOC_AM33XX
 	{ .compatible = "ti,am3-prcm", .data = &am3_prm_data },
 #endif
diff --git a/arch/arm/mach-omap2/vc.c b/arch/arm/mach-omap2/vc.c
index 2028167fff31..d76b1e5eb8ba 100644
--- a/arch/arm/mach-omap2/vc.c
+++ b/arch/arm/mach-omap2/vc.c
@@ -559,7 +559,7 @@ struct i2c_init_data {
 	u8 hsscll_12;
 };
 
-static const struct i2c_init_data const omap4_i2c_timing_data[] __initconst = {
+static const struct i2c_init_data omap4_i2c_timing_data[] __initconst = {
 	{
 		.load = 50,
 		.loadbits = 0x3,
diff --git a/arch/arm/mach-spear/time.c b/arch/arm/mach-spear/time.c
index 4878ba90026d..289e036c9c30 100644
--- a/arch/arm/mach-spear/time.c
+++ b/arch/arm/mach-spear/time.c
@@ -204,7 +204,7 @@ static void __init spear_clockevent_init(int irq)
 	setup_irq(irq, &spear_timer_irq);
 }
 
-static const struct of_device_id const timer_of_match[] __initconst = {
+static const struct of_device_id timer_of_match[] __initconst = {
 	{ .compatible = "st,spear-timer", },
 	{ },
 };
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index c742dfd2967b..bd83c531828a 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -2311,7 +2311,14 @@ int arm_iommu_attach_device(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(arm_iommu_attach_device);
 
-static void __arm_iommu_detach_device(struct device *dev)
+/**
+ * arm_iommu_detach_device
+ * @dev: valid struct device pointer
+ *
+ * Detaches the provided device from a previously attached map.
+ * This voids the dma operations (dma_map_ops pointer)
+ */
+void arm_iommu_detach_device(struct device *dev)
 {
 	struct dma_iommu_mapping *mapping;
 
@@ -2324,22 +2331,10 @@ static void __arm_iommu_detach_device(struct device *dev)
 	iommu_detach_device(mapping->domain, dev);
 	kref_put(&mapping->kref, release_iommu_mapping);
 	to_dma_iommu_mapping(dev) = NULL;
+	set_dma_ops(dev, NULL);
 
 	pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev));
 }
-
-/**
- * arm_iommu_detach_device
- * @dev: valid struct device pointer
- *
- * Detaches the provided device from a previously attached map.
- * This voids the dma operations (dma_map_ops pointer)
- */
-void arm_iommu_detach_device(struct device *dev)
-{
-	__arm_iommu_detach_device(dev);
-	set_dma_ops(dev, NULL);
-}
 EXPORT_SYMBOL_GPL(arm_iommu_detach_device);
 
 static const struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent)
@@ -2379,7 +2374,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev)
 	if (!mapping)
 		return;
 
-	__arm_iommu_detach_device(dev);
+	arm_iommu_detach_device(dev);
 	arm_iommu_release_mapping(mapping);
 }
 
@@ -2430,9 +2425,13 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 		dev->dma_ops = xen_dma_ops;
 	}
 #endif
+	dev->archdata.dma_ops_setup = true;
 }
 
 void arch_teardown_dma_ops(struct device *dev)
 {
+	if (!dev->archdata.dma_ops_setup)
+		return;
+
 	arm_teardown_iommu_dma_ops(dev);
 }
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 2239fde10b80..f0701d8d24df 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -90,7 +90,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -141,7 +141,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 31af3cb59a60..e46a6a446cdd 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1218,15 +1218,15 @@ void __init adjust_lowmem_bounds(void)
 
 	high_memory = __va(arm_lowmem_limit - 1) + 1;
 
+	if (!memblock_limit)
+		memblock_limit = arm_lowmem_limit;
+
 	/*
 	 * Round the memblock limit down to a pmd size.  This
 	 * helps to ensure that we will allocate memory from the
 	 * last full pmd, which should be mapped.
 	 */
-	if (memblock_limit)
-		memblock_limit = round_down(memblock_limit, PMD_SIZE);
-	if (!memblock_limit)
-		memblock_limit = arm_lowmem_limit;
+	memblock_limit = round_down(memblock_limit, PMD_SIZE);
 
 	if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) {
 		if (memblock_end_of_DRAM() > arm_lowmem_limit) {
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3dcd7ec69bca..95c7ed392003 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -205,7 +205,7 @@ config GENERIC_CALIBRATE_DELAY
 config ZONE_DMA
 	def_bool y
 
-config HAVE_GENERIC_RCU_GUP
+config HAVE_GENERIC_GUP
 	def_bool y
 
 config ARCH_DMA_ADDR_T_64BIT
@@ -1084,10 +1084,6 @@ config SYSVIPC_COMPAT
 	def_bool y
 	depends on COMPAT && SYSVIPC
 
-config KEYS_COMPAT
-	def_bool y
-	depends on COMPAT && KEYS
-
 endmenu
 
 menu "Power management options"
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 4afcffcb46cb..73272f43ca01 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -106,8 +106,13 @@ config ARCH_MVEBU
 	select ARMADA_AP806_SYSCON
 	select ARMADA_CP110_SYSCON
 	select ARMADA_37XX_CLK
+	select GPIOLIB
+	select GPIOLIB_IRQCHIP
 	select MVEBU_ODMI
 	select MVEBU_PIC
+	select OF_GPIO
+	select PINCTRL
+	select PINCTRL_ARMADA_37XX
 	help
 	  This enables support for Marvell EBU familly, including:
 	   - Armada 3700 SoC Family
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 7dedf2d8494e..f839ecd919f9 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -102,12 +102,12 @@ libs-y		:= arch/arm64/lib/ $(libs-y)
 core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
 
 # Default target when executing plain make
-KBUILD_IMAGE	:= Image.gz
+boot		:= arch/arm64/boot
+KBUILD_IMAGE	:= $(boot)/Image.gz
 KBUILD_DTBS	:= dtbs
 
-all:	$(KBUILD_IMAGE) $(KBUILD_DTBS)
+all:	Image.gz $(KBUILD_DTBS)
 
-boot := arch/arm64/boot
 
 Image: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
index c7f669f5884f..166c9ef884dc 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
@@ -406,8 +406,9 @@
 		r_ccu: clock@1f01400 {
 			compatible = "allwinner,sun50i-a64-r-ccu";
 			reg = <0x01f01400 0x100>;
-			clocks = <&osc24M>, <&osc32k>, <&iosc>;
-			clock-names = "hosc", "losc", "iosc";
+			clocks = <&osc24M>, <&osc32k>, <&iosc>,
+				 <&ccu 11>;
+			clock-names = "hosc", "losc", "iosc", "pll-periph";
 			#clock-cells = <1>;
 			#reset-cells = <1>;
 		};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
index 4d314a253fd9..732e2e06f503 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
@@ -40,7 +40,7 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include "sunxi-h3-h5.dtsi"
+#include <arm/sunxi-h3-h5.dtsi>
 
 / {
 	cpus {
diff --git a/arch/arm64/boot/dts/allwinner/sunxi-h3-h5.dtsi b/arch/arm64/boot/dts/allwinner/sunxi-h3-h5.dtsi
deleted file mode 120000
index 036f01dc2b9b..000000000000
--- a/arch/arm64/boot/dts/allwinner/sunxi-h3-h5.dtsi
+++ /dev/null
@@ -1 +0,0 @@
-../../../../arm/boot/dts/sunxi-h3-h5.dtsi
-\ No newline at end of file
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index 9b4ba7169210..49f6a6242cf9 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -81,6 +81,45 @@
 		};
 	};
 
+	reg_sys_5v: regulator@0 {
+		compatible = "regulator-fixed";
+		regulator-name = "SYS_5V";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	reg_vdd_3v3: regulator@1 {
+		compatible = "regulator-fixed";
+		regulator-name = "VDD_3V3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-boot-on;
+		regulator-always-on;
+		vin-supply = <&reg_sys_5v>;
+	};
+
+	reg_5v_hub: regulator@2 {
+		compatible = "regulator-fixed";
+		regulator-name = "5V_HUB";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-boot-on;
+		gpio = <&gpio0 7 0>;
+		regulator-always-on;
+		vin-supply = <&reg_sys_5v>;
+	};
+
+	wl1835_pwrseq: wl1835-pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		/* WLAN_EN GPIO */
+		reset-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>;
+		clocks = <&pmic>;
+		clock-names = "ext_clock";
+		power-off-delay-us = <10>;
+	};
+
 	soc {
 		spi0: spi@f7106000 {
 			status = "ok";
@@ -256,11 +295,31 @@
 
 		/* GPIO blocks 16 thru 19 do not appear to be routed to pins */
 
+		dwmmc_0: dwmmc0@f723d000 {
+			cap-mmc-highspeed;
+			non-removable;
+			bus-width = <0x8>;
+			vmmc-supply = <&ldo19>;
+		};
+
+		dwmmc_1: dwmmc1@f723e000 {
+			card-detect-delay = <200>;
+			cap-sd-highspeed;
+			sd-uhs-sdr12;
+			sd-uhs-sdr25;
+			sd-uhs-sdr50;
+			vqmmc-supply = <&ldo7>;
+			vmmc-supply = <&ldo10>;
+			bus-width = <0x4>;
+			disable-wp;
+			cd-gpios = <&gpio1 0 1>;
+		};
+
 		dwmmc_2: dwmmc2@f723f000 {
-			ti,non-removable;
+			bus-width = <0x4>;
 			non-removable;
-			/* WL_EN */
-			vmmc-supply = <&wlan_en_reg>;
+			vmmc-supply = <&reg_vdd_3v3>;
+			mmc-pwrseq = <&wl1835_pwrseq>;
 
 			#address-cells = <0x1>;
 			#size-cells = <0x0>;
@@ -272,18 +331,6 @@
 				interrupts = <3 IRQ_TYPE_EDGE_RISING>;
 			};
 		};
-
-		wlan_en_reg: regulator@1 {
-			compatible = "regulator-fixed";
-			regulator-name = "wlan-en-regulator";
-			regulator-min-microvolt = <1800000>;
-			regulator-max-microvolt = <1800000>;
-			/* WLAN_EN GPIO */
-			gpio = <&gpio0 5 0>;
-			/* WLAN card specific delay */
-			startup-delay-us = <70000>;
-			enable-active-high;
-		};
 	};
 
 	leds {
@@ -330,6 +377,7 @@
 	pmic: pmic@f8000000 {
 		compatible = "hisilicon,hi655x-pmic";
 		reg = <0x0 0xf8000000 0x0 0x1000>;
+		#clock-cells = <0>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
 		pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
@@ -411,6 +459,13 @@
 			};
 		};
 	};
+
+	firmware {
+		optee {
+			compatible = "linaro,optee-tz";
+			method = "smc";
+		};
+	};
 };
 
 &uart2 {
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
index 1e5129b19280..5013e4b2ea71 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
@@ -725,20 +725,10 @@
 			status = "disabled";
 		};
 
-		fixed_5v_hub: regulator@0 {
-			compatible = "regulator-fixed";
-			regulator-name = "fixed_5v_hub";
-			regulator-min-microvolt = <5000000>;
-			regulator-max-microvolt = <5000000>;
-			regulator-boot-on;
-			gpio = <&gpio0 7 0>;
-			regulator-always-on;
-		};
-
 		usb_phy: usbphy {
 			compatible = "hisilicon,hi6220-usb-phy";
 			#phy-cells = <0>;
-			phy-supply = <&fixed_5v_hub>;
+			phy-supply = <&reg_5v_hub>;
 			hisilicon,peripheral-syscon = <&sys_ctrl>;
 		};
 
@@ -766,17 +756,12 @@
 
 		dwmmc_0: dwmmc0@f723d000 {
 			compatible = "hisilicon,hi6220-dw-mshc";
-			num-slots = <0x1>;
-			cap-mmc-highspeed;
-			non-removable;
 			reg = <0x0 0xf723d000 0x0 0x1000>;
 			interrupts = <0x0 0x48 0x4>;
 			clocks = <&sys_ctrl 2>, <&sys_ctrl 1>;
 			clock-names = "ciu", "biu";
 			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC0>;
 			reset-names = "reset";
-			bus-width = <0x8>;
-			vmmc-supply = <&ldo19>;
 			pinctrl-names = "default";
 			pinctrl-0 = <&emmc_pmx_func &emmc_clk_cfg_func
 				     &emmc_cfg_func &emmc_rst_cfg_func>;
@@ -784,13 +769,7 @@
 
 		dwmmc_1: dwmmc1@f723e000 {
 			compatible = "hisilicon,hi6220-dw-mshc";
-			num-slots = <0x1>;
-			card-detect-delay = <200>;
 			hisilicon,peripheral-syscon = <&ao_ctrl>;
-			cap-sd-highspeed;
-			sd-uhs-sdr12;
-			sd-uhs-sdr25;
-			sd-uhs-sdr50;
 			reg = <0x0 0xf723e000 0x0 0x1000>;
 			interrupts = <0x0 0x49 0x4>;
 			#address-cells = <0x1>;
@@ -799,11 +778,6 @@
 			clock-names = "ciu", "biu";
 			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC1>;
 			reset-names = "reset";
-			vqmmc-supply = <&ldo7>;
-			vmmc-supply = <&ldo10>;
-			bus-width = <0x4>;
-			disable-wp;
-			cd-gpios = <&gpio1 0 1>;
 			pinctrl-names = "default", "idle";
 			pinctrl-0 = <&sd_pmx_func &sd_clk_cfg_func &sd_cfg_func>;
 			pinctrl-1 = <&sd_pmx_idle &sd_clk_cfg_idle &sd_cfg_idle>;
@@ -811,15 +785,12 @@
 
 		dwmmc_2: dwmmc2@f723f000 {
 			compatible = "hisilicon,hi6220-dw-mshc";
-			num-slots = <0x1>;
 			reg = <0x0 0xf723f000 0x0 0x1000>;
 			interrupts = <0x0 0x4a 0x4>;
 			clocks = <&sys_ctrl HI6220_MMC2_CIUCLK>, <&sys_ctrl HI6220_MMC2_CLK>;
 			clock-names = "ciu", "biu";
 			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC2>;
 			reset-names = "reset";
-			bus-width = <0x4>;
-			broken-cd;
 			pinctrl-names = "default", "idle";
 			pinctrl-0 = <&sdio_pmx_func &sdio_clk_cfg_func &sdio_cfg_func>;
 			pinctrl-1 = <&sdio_pmx_idle &sdio_clk_cfg_idle &sdio_cfg_idle>;
diff --git a/arch/arm64/boot/dts/include/arm b/arch/arm64/boot/dts/include/arm
deleted file mode 120000
index cf63d80e2b93..000000000000
--- a/arch/arm64/boot/dts/include/arm
+++ /dev/null
@@ -1 +0,0 @@
-../../../../arm/boot/dts
-\ No newline at end of file
diff --git a/arch/arm64/boot/dts/include/arm64 b/arch/arm64/boot/dts/include/arm64
deleted file mode 120000
index a96aa0ea9d8c..000000000000
--- a/arch/arm64/boot/dts/include/arm64
+++ /dev/null
@@ -1 +0,0 @@
-..
-\ No newline at end of file
diff --git a/arch/arm64/boot/dts/include/dt-bindings b/arch/arm64/boot/dts/include/dt-bindings
deleted file mode 120000
index 08c00e4972fa..000000000000
--- a/arch/arm64/boot/dts/include/dt-bindings
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../include/dt-bindings
-\ No newline at end of file
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-db.dts b/arch/arm64/boot/dts/marvell/armada-3720-db.dts
index cef5f976bc0f..a89855f57091 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-db.dts
+++ b/arch/arm64/boot/dts/marvell/armada-3720-db.dts
@@ -79,6 +79,8 @@
 };
 
 &i2c0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&i2c1_pins>;
 	status = "okay";
 
 	gpio_exp: pca9555@22 {
@@ -113,6 +115,8 @@
 
 &spi0 {
 	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&spi_quad_pins>;
 
 	m25p80@0 {
 		compatible = "jedec,spi-nor";
@@ -143,6 +147,8 @@
 
 /* Exported on the micro USB connector CON32 through an FTDI */
 &uart0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart1_pins>;
 	status = "okay";
 };
 
@@ -184,6 +190,8 @@
 };
 
 &eth0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&rgmii_pins>;
 	phy-mode = "rgmii-id";
 	phy = <&phy0>;
 	status = "okay";
diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
index 58ae9e095af2..4d495ec39202 100644
--- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
@@ -161,16 +161,83 @@
 				#clock-cells = <1>;
 			};
 
-			gpio1: gpio@13800 {
-				compatible = "marvell,mvebu-gpio-3700",
+			pinctrl_nb: pinctrl@13800 {
+				compatible = "marvell,armada3710-nb-pinctrl",
 				"syscon", "simple-mfd";
-				reg = <0x13800 0x500>;
+				reg = <0x13800 0x100>, <0x13C00 0x20>;
+				gpionb: gpio {
+					#gpio-cells = <2>;
+					gpio-ranges = <&pinctrl_nb 0 0 36>;
+					gpio-controller;
+					interrupts =
+					<GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>;
+
+				};
 
 				xtalclk: xtal-clk {
 					compatible = "marvell,armada-3700-xtal-clock";
 					clock-output-names = "xtal";
 					#clock-cells = <0>;
 				};
+
+				spi_quad_pins: spi-quad-pins {
+					groups = "spi_quad";
+					function = "spi";
+				};
+
+				i2c1_pins: i2c1-pins {
+					groups = "i2c1";
+					function = "i2c";
+				};
+
+				i2c2_pins: i2c2-pins {
+					groups = "i2c2";
+					function = "i2c";
+				};
+
+				uart1_pins: uart1-pins {
+					groups = "uart1";
+					function = "uart";
+				};
+
+				uart2_pins: uart2-pins {
+					groups = "uart2";
+					function = "uart";
+				};
+			};
+
+			pinctrl_sb: pinctrl@18800 {
+				compatible = "marvell,armada3710-sb-pinctrl",
+				"syscon", "simple-mfd";
+				reg = <0x18800 0x100>, <0x18C00 0x20>;
+				gpiosb: gpio {
+					#gpio-cells = <2>;
+					gpio-ranges = <&pinctrl_sb 0 0 29>;
+					gpio-controller;
+					interrupts =
+					<GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 159 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>;
+				};
+
+				rgmii_pins: mii-pins {
+					groups = "rgmii";
+					function = "mii";
+				};
+
 			};
 
 			eth0: ethernet@30000 {
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
index ac8df5201cd6..b4bc42ece754 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
@@ -231,8 +231,7 @@
 			cpm_crypto: crypto@800000 {
 				compatible = "inside-secure,safexcel-eip197";
 				reg = <0x800000 0x200000>;
-				interrupts = <GIC_SPI 34 (IRQ_TYPE_EDGE_RISING
-				| IRQ_TYPE_LEVEL_HIGH)>,
+				interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
index 7740a75a8230..6e2058847ddc 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
@@ -221,8 +221,7 @@
 			cps_crypto: crypto@800000 {
 				compatible = "inside-secure,safexcel-eip197";
 				reg = <0x800000 0x200000>;
-				interrupts = <GIC_SPI 34 (IRQ_TYPE_EDGE_RISING
-				| IRQ_TYPE_LEVEL_HIGH)>,
+				interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 278 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 279 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 280 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
index 0ecaad4333a7..1c3634fa94bf 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
@@ -134,6 +134,9 @@
 	bus-width = <8>;
 	max-frequency = <50000000>;
 	cap-mmc-highspeed;
+	mediatek,hs200-cmd-int-delay=<26>;
+	mediatek,hs400-cmd-int-delay=<14>;
+	mediatek,hs400-cmd-resp-sel-rising;
 	vmmc-supply = <&mt6397_vemc_3v3_reg>;
 	vqmmc-supply = <&mt6397_vio18_reg>;
 	non-removable;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts
index 658bb9dc9dfd..7bd31066399b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts
@@ -44,7 +44,7 @@
 
 /dts-v1/;
 #include "rk3399-gru.dtsi"
-#include <include/dt-bindings/input/linux-event-codes.h>
+#include <dt-bindings/input/linux-event-codes.h>
 
 /*
  * Kevin-specific things
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index ce072859e3b2..97c123e09e45 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -30,7 +30,6 @@ CONFIG_PROFILING=y
 CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 CONFIG_ARCH_SUNXI=y
 CONFIG_ARCH_ALPINE=y
@@ -62,16 +61,16 @@ CONFIG_ARCH_XGENE=y
 CONFIG_ARCH_ZX=y
 CONFIG_ARCH_ZYNQMP=y
 CONFIG_PCI=y
-CONFIG_PCI_MSI=y
 CONFIG_PCI_IOV=y
-CONFIG_PCI_AARDVARK=y
-CONFIG_PCIE_RCAR=y
-CONFIG_PCI_HOST_GENERIC=y
-CONFIG_PCI_XGENE=y
 CONFIG_PCI_LAYERSCAPE=y
 CONFIG_PCI_HISI=y
 CONFIG_PCIE_QCOM=y
 CONFIG_PCIE_ARMADA_8K=y
+CONFIG_PCI_AARDVARK=y
+CONFIG_PCIE_RCAR=y
+CONFIG_PCIE_ROCKCHIP=m
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCI_XGENE=y
 CONFIG_ARM64_VA_BITS_48=y
 CONFIG_SCHED_MC=y
 CONFIG_NUMA=y
@@ -80,12 +79,11 @@ CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
 CONFIG_SECCOMP=y
-CONFIG_XEN=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
+CONFIG_XEN=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
-CONFIG_CPU_IDLE=y
 CONFIG_HIBERNATION=y
 CONFIG_ARM_CPUIDLE=y
 CONFIG_CPU_FREQ=y
@@ -155,8 +153,8 @@ CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_NBD=m
 CONFIG_VIRTIO_BLK=y
-CONFIG_EEPROM_AT25=m
 CONFIG_SRAM=y
+CONFIG_EEPROM_AT25=m
 # CONFIG_SCSI_PROC_FS is not set
 CONFIG_BLK_DEV_SD=y
 CONFIG_SCSI_SAS_ATA=y
@@ -168,8 +166,8 @@ CONFIG_AHCI_CEVA=y
 CONFIG_AHCI_MVEBU=y
 CONFIG_AHCI_XGENE=y
 CONFIG_AHCI_QORIQ=y
-CONFIG_SATA_RCAR=y
 CONFIG_SATA_SIL24=y
+CONFIG_SATA_RCAR=y
 CONFIG_PATA_PLATFORM=y
 CONFIG_PATA_OF_PLATFORM=y
 CONFIG_NETDEVICES=y
@@ -186,18 +184,17 @@ CONFIG_HNS_ENET=y
 CONFIG_E1000E=y
 CONFIG_IGB=y
 CONFIG_IGBVF=y
-CONFIG_MVPP2=y
 CONFIG_MVNETA=y
+CONFIG_MVPP2=y
 CONFIG_SKY2=y
 CONFIG_RAVB=y
 CONFIG_SMC91X=y
 CONFIG_SMSC911X=y
 CONFIG_STMMAC_ETH=m
-CONFIG_REALTEK_PHY=m
+CONFIG_MDIO_BUS_MUX_MMIOREG=y
 CONFIG_MESON_GXL_PHY=m
 CONFIG_MICREL_PHY=y
-CONFIG_MDIO_BUS_MUX=y
-CONFIG_MDIO_BUS_MUX_MMIOREG=y
+CONFIG_REALTEK_PHY=m
 CONFIG_USB_PEGASUS=m
 CONFIG_USB_RTL8150=m
 CONFIG_USB_RTL8152=m
@@ -212,6 +209,8 @@ CONFIG_BRCMFMAC=m
 CONFIG_WL18XX=m
 CONFIG_WLCORE_SDIO=m
 CONFIG_INPUT_EVDEV=y
+CONFIG_KEYBOARD_ADC=m
+CONFIG_KEYBOARD_CROS_EC=y
 CONFIG_KEYBOARD_GPIO=y
 CONFIG_INPUT_MISC=y
 CONFIG_INPUT_PM8941_PWRKEY=y
@@ -230,14 +229,14 @@ CONFIG_SERIAL_8250_UNIPHIER=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_SERIAL_MESON=y
+CONFIG_SERIAL_MESON_CONSOLE=y
 CONFIG_SERIAL_SAMSUNG=y
 CONFIG_SERIAL_SAMSUNG_CONSOLE=y
 CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=11
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
-CONFIG_SERIAL_MESON=y
-CONFIG_SERIAL_MESON_CONSOLE=y
 CONFIG_SERIAL_MSM=y
 CONFIG_SERIAL_MSM_CONSOLE=y
 CONFIG_SERIAL_XILINX_PS_UART=y
@@ -261,14 +260,15 @@ CONFIG_I2C_UNIPHIER_F=y
 CONFIG_I2C_RCAR=y
 CONFIG_I2C_CROS_EC_TUNNEL=y
 CONFIG_SPI=y
-CONFIG_SPI_MESON_SPIFC=m
 CONFIG_SPI_BCM2835=m
 CONFIG_SPI_BCM2835AUX=m
+CONFIG_SPI_MESON_SPIFC=m
 CONFIG_SPI_ORION=y
 CONFIG_SPI_PL022=y
 CONFIG_SPI_QUP=y
-CONFIG_SPI_SPIDEV=m
+CONFIG_SPI_ROCKCHIP=y
 CONFIG_SPI_S3C64XX=y
+CONFIG_SPI_SPIDEV=m
 CONFIG_SPMI=y
 CONFIG_PINCTRL_SINGLE=y
 CONFIG_PINCTRL_MAX77620=y
@@ -286,33 +286,33 @@ CONFIG_GPIO_PCA953X=y
 CONFIG_GPIO_PCA953X_IRQ=y
 CONFIG_GPIO_MAX77620=y
 CONFIG_POWER_RESET_MSM=y
-CONFIG_BATTERY_BQ27XXX=y
 CONFIG_POWER_RESET_XGENE=y
 CONFIG_POWER_RESET_SYSCON=y
+CONFIG_BATTERY_BQ27XXX=y
+CONFIG_SENSORS_ARM_SCPI=y
 CONFIG_SENSORS_LM90=m
 CONFIG_SENSORS_INA2XX=m
-CONFIG_SENSORS_ARM_SCPI=y
-CONFIG_THERMAL=y
-CONFIG_THERMAL_EMULATION=y
 CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
 CONFIG_CPU_THERMAL=y
-CONFIG_BCM2835_THERMAL=y
+CONFIG_THERMAL_EMULATION=y
 CONFIG_EXYNOS_THERMAL=y
+CONFIG_ROCKCHIP_THERMAL=m
 CONFIG_WATCHDOG=y
-CONFIG_BCM2835_WDT=y
-CONFIG_RENESAS_WDT=y
 CONFIG_S3C2410_WATCHDOG=y
 CONFIG_MESON_GXBB_WATCHDOG=m
 CONFIG_MESON_WATCHDOG=m
+CONFIG_RENESAS_WDT=y
+CONFIG_BCM2835_WDT=y
+CONFIG_MFD_CROS_EC=y
+CONFIG_MFD_CROS_EC_I2C=y
+CONFIG_MFD_CROS_EC_SPI=y
 CONFIG_MFD_EXYNOS_LPASS=m
+CONFIG_MFD_HI655X_PMIC=y
 CONFIG_MFD_MAX77620=y
-CONFIG_MFD_RK808=y
 CONFIG_MFD_SPMI_PMIC=y
+CONFIG_MFD_RK808=y
 CONFIG_MFD_SEC_CORE=y
-CONFIG_MFD_HI655X_PMIC=y
-CONFIG_REGULATOR=y
-CONFIG_MFD_CROS_EC=y
-CONFIG_MFD_CROS_EC_I2C=y
+CONFIG_REGULATOR_FAN53555=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
 CONFIG_REGULATOR_GPIO=y
 CONFIG_REGULATOR_HI655X=y
@@ -345,13 +345,12 @@ CONFIG_DRM_EXYNOS_DSI=y
 CONFIG_DRM_EXYNOS_HDMI=y
 CONFIG_DRM_EXYNOS_MIC=y
 CONFIG_DRM_RCAR_DU=m
-CONFIG_DRM_RCAR_HDMI=y
 CONFIG_DRM_RCAR_LVDS=y
 CONFIG_DRM_RCAR_VSP=y
 CONFIG_DRM_TEGRA=m
-CONFIG_DRM_VC4=m
 CONFIG_DRM_PANEL_SIMPLE=m
 CONFIG_DRM_I2C_ADV7511=m
+CONFIG_DRM_VC4=m
 CONFIG_DRM_HISI_KIRIN=m
 CONFIG_DRM_MESON=m
 CONFIG_FB=y
@@ -366,39 +365,37 @@ CONFIG_SOUND=y
 CONFIG_SND=y
 CONFIG_SND_SOC=y
 CONFIG_SND_BCM2835_SOC_I2S=m
-CONFIG_SND_SOC_RCAR=y
 CONFIG_SND_SOC_SAMSUNG=y
+CONFIG_SND_SOC_RCAR=y
 CONFIG_SND_SOC_AK4613=y
 CONFIG_USB=y
 CONFIG_USB_OTG=y
 CONFIG_USB_XHCI_HCD=y
-CONFIG_USB_XHCI_PLATFORM=y
-CONFIG_USB_XHCI_RCAR=y
-CONFIG_USB_EHCI_EXYNOS=y
 CONFIG_USB_XHCI_TEGRA=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_MSM=y
+CONFIG_USB_EHCI_EXYNOS=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
-CONFIG_USB_OHCI_EXYNOS=y
 CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_EXYNOS=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_RENESAS_USBHS=m
 CONFIG_USB_STORAGE=y
-CONFIG_USB_DWC2=y
 CONFIG_USB_DWC3=y
+CONFIG_USB_DWC2=y
 CONFIG_USB_CHIPIDEA=y
 CONFIG_USB_CHIPIDEA_UDC=y
 CONFIG_USB_CHIPIDEA_HOST=y
 CONFIG_USB_ISP1760=y
 CONFIG_USB_HSIC_USB3503=y
 CONFIG_USB_MSM_OTG=y
+CONFIG_USB_QCOM_8X16_PHY=y
 CONFIG_USB_ULPI=y
 CONFIG_USB_GADGET=y
 CONFIG_USB_RENESAS_USBHS_UDC=m
 CONFIG_MMC=y
 CONFIG_MMC_BLOCK_MINORS=32
 CONFIG_MMC_ARMMMCI=y
-CONFIG_MMC_MESON_GX=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_ACPI=y
 CONFIG_MMC_SDHCI_PLTFM=y
@@ -406,6 +403,7 @@ CONFIG_MMC_SDHCI_OF_ARASAN=y
 CONFIG_MMC_SDHCI_OF_ESDHC=y
 CONFIG_MMC_SDHCI_CADENCE=y
 CONFIG_MMC_SDHCI_TEGRA=y
+CONFIG_MMC_MESON_GX=y
 CONFIG_MMC_SDHCI_MSM=y
 CONFIG_MMC_SPI=y
 CONFIG_MMC_SDHI=y
@@ -414,32 +412,31 @@ CONFIG_MMC_DW_EXYNOS=y
 CONFIG_MMC_DW_K3=y
 CONFIG_MMC_DW_ROCKCHIP=y
 CONFIG_MMC_SUNXI=y
-CONFIG_MMC_SDHCI_XENON=y
 CONFIG_MMC_BCM2835=y
+CONFIG_MMC_SDHCI_XENON=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_GPIO=y
 CONFIG_LEDS_PWM=y
 CONFIG_LEDS_SYSCON=y
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_LEDS_TRIGGER_CPU=y
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_MAX77686=y
+CONFIG_RTC_DRV_RK808=m
 CONFIG_RTC_DRV_S5M=y
 CONFIG_RTC_DRV_DS3232=y
 CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_S3C=y
 CONFIG_RTC_DRV_PL031=y
 CONFIG_RTC_DRV_SUN6I=y
-CONFIG_RTC_DRV_RK808=m
 CONFIG_RTC_DRV_TEGRA=y
 CONFIG_RTC_DRV_XGENE=y
-CONFIG_RTC_DRV_S3C=y
 CONFIG_DMADEVICES=y
+CONFIG_DMA_BCM2835=m
 CONFIG_MV_XOR_V2=y
 CONFIG_PL330_DMA=y
-CONFIG_DMA_BCM2835=m
 CONFIG_TEGRA20_APB_DMA=y
 CONFIG_QCOM_BAM_DMA=y
 CONFIG_QCOM_HIDMA_MGMT=y
@@ -452,52 +449,56 @@ CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_MMIO=y
 CONFIG_XEN_GNTDEV=y
 CONFIG_XEN_GRANT_DEV_ALLOC=y
+CONFIG_COMMON_CLK_RK808=y
 CONFIG_COMMON_CLK_SCPI=y
 CONFIG_COMMON_CLK_CS2000_CP=y
 CONFIG_COMMON_CLK_S2MPS11=y
-CONFIG_COMMON_CLK_PWM=y
-CONFIG_COMMON_CLK_RK808=y
 CONFIG_CLK_QORIQ=y
+CONFIG_COMMON_CLK_PWM=y
 CONFIG_COMMON_CLK_QCOM=y
+CONFIG_QCOM_CLK_SMD_RPM=y
 CONFIG_MSM_GCC_8916=y
 CONFIG_MSM_GCC_8994=y
 CONFIG_MSM_MMCC_8996=y
 CONFIG_HWSPINLOCK_QCOM=y
-CONFIG_MAILBOX=y
 CONFIG_ARM_MHU=y
 CONFIG_PLATFORM_MHU=y
 CONFIG_BCM2835_MBOX=y
 CONFIG_HI6220_MBOX=y
 CONFIG_ARM_SMMU=y
 CONFIG_ARM_SMMU_V3=y
+CONFIG_RPMSG_QCOM_SMD=y
 CONFIG_RASPBERRYPI_POWER=y
 CONFIG_QCOM_SMEM=y
-CONFIG_QCOM_SMD=y
 CONFIG_QCOM_SMD_RPM=y
+CONFIG_QCOM_SMP2P=y
+CONFIG_QCOM_SMSM=y
 CONFIG_ROCKCHIP_PM_DOMAINS=y
 CONFIG_ARCH_TEGRA_132_SOC=y
 CONFIG_ARCH_TEGRA_210_SOC=y
 CONFIG_ARCH_TEGRA_186_SOC=y
 CONFIG_EXTCON_USB_GPIO=y
+CONFIG_IIO=y
+CONFIG_EXYNOS_ADC=y
+CONFIG_ROCKCHIP_SARADC=m
 CONFIG_PWM=y
 CONFIG_PWM_BCM2835=m
+CONFIG_PWM_CROS_EC=m
+CONFIG_PWM_MESON=m
 CONFIG_PWM_ROCKCHIP=y
+CONFIG_PWM_SAMSUNG=y
 CONFIG_PWM_TEGRA=m
-CONFIG_PWM_MESON=m
-CONFIG_COMMON_RESET_HI6220=y
 CONFIG_PHY_RCAR_GEN3_USB2=y
 CONFIG_PHY_HI6220_USB=y
+CONFIG_PHY_SUN4I_USB=y
 CONFIG_PHY_ROCKCHIP_INNO_USB2=y
 CONFIG_PHY_ROCKCHIP_EMMC=y
-CONFIG_PHY_SUN4I_USB=y
+CONFIG_PHY_ROCKCHIP_PCIE=m
 CONFIG_PHY_XGENE=y
 CONFIG_PHY_TEGRA_XUSB=y
 CONFIG_ARM_SCPI_PROTOCOL=y
-CONFIG_ACPI=y
-CONFIG_IIO=y
-CONFIG_EXYNOS_ADC=y
-CONFIG_PWM_SAMSUNG=y
 CONFIG_RASPBERRYPI_FIRMWARE=y
+CONFIG_ACPI=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -511,7 +512,6 @@ CONFIG_FUSE_FS=m
 CONFIG_CUSE=m
 CONFIG_OVERLAY_FS=m
 CONFIG_VFAT_FS=y
-CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y
 CONFIG_CONFIGFS_FS=y
 CONFIG_EFIVAR_FS=y
@@ -539,11 +539,9 @@ CONFIG_MEMTEST=y
 CONFIG_SECURITY=y
 CONFIG_CRYPTO_ECHAINIV=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
-CONFIG_CRYPTO_DEV_SAFEXCEL=m
 CONFIG_ARM64_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y
 CONFIG_CRYPTO_SHA2_ARM64_CE=y
 CONFIG_CRYPTO_GHASH_ARM64_CE=y
 CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
 CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
-# CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 0e99978da3f0..59cca1d6ec54 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -23,9 +23,9 @@
 #define ACPI_MADT_GICC_LENGTH	\
 	(acpi_gbl_FADT.header.revision < 6 ? 76 : 80)
 
-#define BAD_MADT_GICC_ENTRY(entry, end)						\
-	(!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) ||	\
-	 (entry)->header.length != ACPI_MADT_GICC_LENGTH)
+#define BAD_MADT_GICC_ENTRY(entry, end)					\
+	(!(entry) || (entry)->header.length != ACPI_MADT_GICC_LENGTH ||	\
+	(unsigned long)(entry) + ACPI_MADT_GICC_LENGTH > (end))
 
 /* Basic configuration for ACPI */
 #ifdef	CONFIG_ACPI
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index df411f3e083c..ecd9788cd298 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -62,4 +62,13 @@ alternative_if ARM64_ALT_PAN_NOT_UAO
 alternative_else_nop_endif
 	.endm
 
+/*
+ * Remove the address tag from a virtual address, if present.
+ */
+	.macro	clear_address_tag, dst, addr
+	tst	\addr, #(1 << 55)
+	bic	\dst, \addr, #(0xff << 56)
+	csel	\dst, \dst, \addr, eq
+	.endm
+
 #endif
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index f819fdcff1ac..f5a2d09afb38 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -264,7 +264,6 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,		\
 	"	st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n"	\
 	"	cbnz	%w[tmp], 1b\n"					\
 	"	" #mb "\n"						\
-	"	mov	%" #w "[oldval], %" #w "[old]\n"		\
 	"2:"								\
 	: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval),			\
 	  [v] "+Q" (*(unsigned long *)ptr)				\
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 7457ce082b5f..99fa69c9c3cf 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -322,7 +322,7 @@ static inline void atomic64_and(long i, atomic64_t *v)
 #define ATOMIC64_FETCH_OP_AND(name, mb, cl...)				\
 static inline long atomic64_fetch_and##name(long i, atomic64_t *v)	\
 {									\
-	register long x0 asm ("w0") = i;				\
+	register long x0 asm ("x0") = i;				\
 	register atomic64_t *x1 asm ("x1") = v;				\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
@@ -394,7 +394,7 @@ ATOMIC64_OP_SUB_RETURN(        , al, "memory")
 #define ATOMIC64_FETCH_OP_SUB(name, mb, cl...)				\
 static inline long atomic64_fetch_sub##name(long i, atomic64_t *v)	\
 {									\
-	register long x0 asm ("w0") = i;				\
+	register long x0 asm ("x0") = i;				\
 	register atomic64_t *x1 asm ("x1") = v;				\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 4e0497f581a0..0fe7e43b7fbc 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -42,25 +42,35 @@
 #define __smp_rmb()	dmb(ishld)
 #define __smp_wmb()	dmb(ishst)
 
-#define __smp_store_release(p, v)						\
+#define __smp_store_release(p, v)					\
 do {									\
+	union { typeof(*p) __val; char __c[1]; } __u =			\
+		{ .__val = (__force typeof(*p)) (v) }; 			\
 	compiletime_assert_atomic_type(*p);				\
 	switch (sizeof(*p)) {						\
 	case 1:								\
 		asm volatile ("stlrb %w1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u8 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 	case 2:								\
 		asm volatile ("stlrh %w1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u16 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 	case 4:								\
 		asm volatile ("stlr %w1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u32 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 	case 8:								\
 		asm volatile ("stlr %1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u64 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 	}								\
 } while (0)
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 91b26d26af8a..ae852add053d 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -46,7 +46,7 @@ static inline unsigned long __xchg_case_##name(unsigned long x,		\
 	"	swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n"	\
 		__nops(3)						\
 	"	" #nop_lse)						\
-	: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)			\
+	: "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned long *)ptr)	\
 	: "r" (x)							\
 	: cl);								\
 									\
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index e7f84a7b4465..428ee1f2468c 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -115,6 +115,7 @@ struct arm64_cpu_capabilities {
 
 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
+extern struct static_key_false arm64_const_caps_ready;
 
 bool this_cpu_has_cap(unsigned int cap);
 
@@ -124,7 +125,7 @@ static inline bool cpu_have_feature(unsigned int num)
 }
 
 /* System capability check for constant caps */
-static inline bool cpus_have_const_cap(int num)
+static inline bool __cpus_have_const_cap(int num)
 {
 	if (num >= ARM64_NCAPS)
 		return false;
@@ -138,6 +139,14 @@ static inline bool cpus_have_cap(unsigned int num)
 	return test_bit(num, cpu_hwcaps);
 }
 
+static inline bool cpus_have_const_cap(int num)
+{
+	if (static_branch_likely(&arm64_const_caps_ready))
+		return __cpus_have_const_cap(num);
+	else
+		return cpus_have_cap(num);
+}
+
 static inline void cpus_set_cap(unsigned int num)
 {
 	if (num >= ARM64_NCAPS) {
@@ -145,7 +154,6 @@ static inline void cpus_set_cap(unsigned int num)
 			num, ARM64_NCAPS);
 	} else {
 		__set_bit(num, cpu_hwcaps);
-		static_branch_enable(&cpu_hwcap_keys[num]);
 	}
 }
 
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index f5ea0ba70f07..fe39e6841326 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -240,6 +240,12 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE;
 }
 
+static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
+{
+	u32 esr = kvm_vcpu_get_hsr(vcpu);
+	return (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+}
+
 static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
 {
 	return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5e19165c5fa8..1f252a95bc02 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -24,6 +24,7 @@
 
 #include <linux/types.h>
 #include <linux/kvm_types.h>
+#include <asm/cpufeature.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -355,9 +356,12 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 				       unsigned long vector_ptr)
 {
 	/*
-	 * Call initialization code, and switch to the full blown
-	 * HYP code.
+	 * Call initialization code, and switch to the full blown HYP code.
+	 * If the cpucaps haven't been finalized yet, something has gone very
+	 * wrong, and hyp will crash and burn when it uses any
+	 * cpus_have_const_cap() wrapper.
 	 */
+	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
 	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
 }
 
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 15c142ce991c..b4d13d9267ff 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -286,6 +286,10 @@
 #define SCTLR_ELx_A	(1 << 1)
 #define SCTLR_ELx_M	1
 
+#define SCTLR_EL2_RES1	((1 << 4)  | (1 << 5)  | (1 << 11) | (1 << 16) | \
+			 (1 << 16) | (1 << 18) | (1 << 22) | (1 << 23) | \
+			 (1 << 28) | (1 << 29))
+
 #define SCTLR_ELx_FLAGS	(SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
 			 SCTLR_ELx_SA | SCTLR_ELx_I)
 
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index ba497172610d..7b8a04789cef 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -69,20 +69,21 @@ static inline void set_fs(mm_segment_t fs)
  */
 #define __range_ok(addr, size)						\
 ({									\
+	unsigned long __addr = (unsigned long __force)(addr);		\
 	unsigned long flag, roksum;					\
 	__chk_user_ptr(addr);						\
 	asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls"		\
 		: "=&r" (flag), "=&r" (roksum)				\
-		: "1" (addr), "Ir" (size),				\
+		: "1" (__addr), "Ir" (size),				\
 		  "r" (current_thread_info()->addr_limit)		\
 		: "cc");						\
 	flag;								\
 })
 
 /*
- * When dealing with data aborts or instruction traps we may end up with
- * a tagged userland pointer. Clear the tag to get a sane pointer to pass
- * on to access_ok(), for instance.
+ * When dealing with data aborts, watchpoints, or instruction traps we may end
+ * up with a tagged userland pointer. Clear the tag to get a sane pointer to
+ * pass on to access_ok(), for instance.
  */
 #define untagged_addr(addr)		sign_extend64(addr, 55)
 
@@ -230,7 +231,7 @@ do {									\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 8:								\
-		__get_user_asm("ldr", "ldtr", "%",  __gu_val, (ptr),	\
+		__get_user_asm("ldr", "ldtr", "%x",  __gu_val, (ptr),	\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	default:							\
@@ -297,7 +298,7 @@ do {									\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 8:								\
-		__put_user_asm("str", "sttr", "%", __pu_val, (ptr),	\
+		__put_user_asm("str", "sttr", "%x", __pu_val, (ptr),	\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	default:							\
diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild
index 825b0fe51c2b..13a97aa2285f 100644
--- a/arch/arm64/include/uapi/asm/Kbuild
+++ b/arch/arm64/include/uapi/asm/Kbuild
@@ -2,21 +2,3 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += kvm_para.h
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += fcntl.h
-header-y += hwcap.h
-header-y += kvm_para.h
-header-y += perf_regs.h
-header-y += param.h
-header-y += ptrace.h
-header-y += setup.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += stat.h
-header-y += statfs.h
-header-y += ucontext.h
-header-y += unistd.h
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 869ee480deed..70eea2ecc663 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -216,13 +216,17 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK	0x3ff
 #define VGIC_LEVEL_INFO_LINE_LEVEL	0
 
-#define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
+#define   KVM_DEV_ARM_VGIC_CTRL_INIT		0
+#define   KVM_DEV_ARM_ITS_SAVE_TABLES           1
+#define   KVM_DEV_ARM_ITS_RESTORE_TABLES        2
+#define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
 
 /* Device Control API on vcpu fd */
 #define KVM_ARM_VCPU_PMU_V3_CTRL	0
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 657977e77ec8..f0e6d717885b 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -306,7 +306,8 @@ do {								\
 	_ASM_EXTABLE(0b, 4b)					\
 	_ASM_EXTABLE(1b, 4b)					\
 	: "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2)	\
-	: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT),		\
+	: "r" ((unsigned long)addr), "i" (-EAGAIN),		\
+	  "i" (-EFAULT),					\
 	  "i" (__SWP_LL_SC_LOOPS)				\
 	: "memory");						\
 	uaccess_disable();					\
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 94b8f7fc3310..817ce3365e20 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -985,8 +985,16 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
  */
 void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 {
-	for (; caps->matches; caps++)
-		if (caps->enable && cpus_have_cap(caps->capability))
+	for (; caps->matches; caps++) {
+		unsigned int num = caps->capability;
+
+		if (!cpus_have_cap(num))
+			continue;
+
+		/* Ensure cpus_have_const_cap(num) works */
+		static_branch_enable(&cpu_hwcap_keys[num]);
+
+		if (caps->enable) {
 			/*
 			 * Use stop_machine() as it schedules the work allowing
 			 * us to modify PSTATE, instead of on_each_cpu() which
@@ -994,6 +1002,8 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 			 * we return.
 			 */
 			stop_machine(caps->enable, NULL, cpu_online_mask);
+		}
+	}
 }
 
 /*
@@ -1096,6 +1106,14 @@ static void __init setup_feature_capabilities(void)
 	enable_cpu_capabilities(arm64_features);
 }
 
+DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready);
+EXPORT_SYMBOL(arm64_const_caps_ready);
+
+static void __init mark_const_caps_ready(void)
+{
+	static_branch_enable(&arm64_const_caps_ready);
+}
+
 /*
  * Check if the current CPU has a given feature capability.
  * Should be called from non-preemptible context.
@@ -1131,6 +1149,7 @@ void __init setup_cpu_features(void)
 	/* Set the CPU feature capabilies */
 	setup_feature_capabilities();
 	enable_errata_workarounds();
+	mark_const_caps_ready();
 	setup_elf_hwcaps(arm64_elf_hwcaps);
 
 	if (system_supports_32bit_el0())
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 5d17f377d905..82cd07592519 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -11,7 +11,6 @@
  *
  */
 
-#include <linux/dmi.h>
 #include <linux/efi.h>
 #include <linux/init.h>
 
@@ -117,20 +116,6 @@ int __init efi_set_mapping_permissions(struct mm_struct *mm,
 				   set_permissions, md);
 }
 
-static int __init arm64_dmi_init(void)
-{
-	/*
-	 * On arm64, DMI depends on UEFI, and dmi_scan_machine() needs to
-	 * be called early because dmi_id_init(), which is an arch_initcall
-	 * itself, depends on dmi_scan_machine() having been called already.
-	 */
-	dmi_scan_machine();
-	if (dmi_available)
-		dmi_set_dump_stack_arch_desc();
-	return 0;
-}
-core_initcall(arm64_dmi_init);
-
 /*
  * UpdateCapsule() depends on the system being shutdown via
  * ResetSystem().
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 43512d4d7df2..b738880350f9 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -428,12 +428,13 @@ el1_da:
 	/*
 	 * Data abort handling
 	 */
-	mrs	x0, far_el1
+	mrs	x3, far_el1
 	enable_dbg
 	// re-enable interrupts if they were enabled in the aborted context
 	tbnz	x23, #7, 1f			// PSR_I_BIT
 	enable_irq
 1:
+	clear_address_tag x0, x3
 	mov	x2, sp				// struct pt_regs
 	bl	do_mem_abort
 
@@ -594,7 +595,7 @@ el0_da:
 	// enable interrupts before calling the main handler
 	enable_dbg_and_irq
 	ct_user_exit
-	bic	x0, x26, #(0xff << 56)
+	clear_address_tag x0, x26
 	mov	x1, x25
 	mov	x2, sp
 	bl	do_mem_abort
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 0296e7924240..749f81779420 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -36,6 +36,7 @@
 #include <asm/traps.h>
 #include <asm/cputype.h>
 #include <asm/system_misc.h>
+#include <asm/uaccess.h>
 
 /* Breakpoint currently in use for each BRP. */
 static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
@@ -721,6 +722,8 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
 	u64 wp_low, wp_high;
 	u32 lens, lene;
 
+	addr = untagged_addr(addr);
+
 	lens = __ffs(ctrl->len);
 	lene = __fls(ctrl->len);
 
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index c9a2ab446dc6..f035ff6fb223 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -32,11 +32,16 @@
 
 void *module_alloc(unsigned long size)
 {
+	gfp_t gfp_mask = GFP_KERNEL;
 	void *p;
 
+	/* Silence the initial allocation */
+	if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+		gfp_mask |= __GFP_NOWARN;
+
 	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
 				module_alloc_base + MODULES_VSIZE,
-				GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+				gfp_mask, PAGE_KERNEL_EXEC, 0,
 				NUMA_NO_NODE, __builtin_return_address(0));
 
 	if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 4f0e3ebfea4b..c7e3e6387a49 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -191,8 +191,10 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 		return NULL;
 
 	root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node);
-	if (!root_ops)
+	if (!root_ops) {
+		kfree(ri);
 		return NULL;
+	}
 
 	ri->cfg = pci_acpi_setup_ecam_mapping(root);
 	if (!ri->cfg) {
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index bcc79471b38e..83a1b1ad189f 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -877,15 +877,24 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
 
 	if (attr->exclude_idle)
 		return -EPERM;
-	if (is_kernel_in_hyp_mode() &&
-	    attr->exclude_kernel != attr->exclude_hv)
-		return -EINVAL;
+
+	/*
+	 * If we're running in hyp mode, then we *are* the hypervisor.
+	 * Therefore we ignore exclude_hv in this configuration, since
+	 * there's no hypervisor to sample anyway. This is consistent
+	 * with other architectures (x86 and Power).
+	 */
+	if (is_kernel_in_hyp_mode()) {
+		if (!attr->exclude_kernel)
+			config_base |= ARMV8_PMU_INCLUDE_EL2;
+	} else {
+		if (attr->exclude_kernel)
+			config_base |= ARMV8_PMU_EXCLUDE_EL1;
+		if (!attr->exclude_hv)
+			config_base |= ARMV8_PMU_INCLUDE_EL2;
+	}
 	if (attr->exclude_user)
 		config_base |= ARMV8_PMU_EXCLUDE_EL0;
-	if (!is_kernel_in_hyp_mode() && attr->exclude_kernel)
-		config_base |= ARMV8_PMU_EXCLUDE_EL1;
-	if (!attr->exclude_hv)
-		config_base |= ARMV8_PMU_INCLUDE_EL2;
 
 	/*
 	 * Install the filter into config_base as this is used to
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 6e0e16a3a7d4..321119881abf 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -961,8 +961,7 @@ void smp_send_stop(void)
 		cpumask_copy(&mask, cpu_online_mask);
 		cpumask_clear_cpu(smp_processor_id(), &mask);
 
-		if (system_state == SYSTEM_BOOTING ||
-		    system_state == SYSTEM_RUNNING)
+		if (system_state <= SYSTEM_RUNNING)
 			pr_crit("SMP: stopping secondary CPUs\n");
 		smp_cross_call(&mask, IPI_CPU_STOP);
 	}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index d4d6ae02cd55..0805b44f986a 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -443,7 +443,7 @@ int cpu_enable_cache_maint_trap(void *__unused)
 }
 
 #define __user_cache_maint(insn, address, res)			\
-	if (untagged_addr(address) >= user_addr_max()) {	\
+	if (address >= user_addr_max()) {			\
 		res = -EFAULT;					\
 	} else {						\
 		uaccess_ttbr0_enable();				\
@@ -469,7 +469,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 	int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
 	int ret = 0;
 
-	address = pt_regs_read_reg(regs, rt);
+	address = untagged_addr(pt_regs_read_reg(regs, rt));
 
 	switch (crm) {
 	case ESR_ELx_SYS64_ISS_CRM_DC_CVAU:	/* DC CVAU, gets promoted */
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 41b6e31f8f55..d0cb007fa482 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -221,10 +221,11 @@ void update_vsyscall(struct timekeeper *tk)
 		/* tkr_mono.cycle_last == tkr_raw.cycle_last */
 		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
 		vdso_data->raw_time_sec		= tk->raw_time.tv_sec;
-		vdso_data->raw_time_nsec	= tk->raw_time.tv_nsec;
+		vdso_data->raw_time_nsec	= (tk->raw_time.tv_nsec <<
+						   tk->tkr_raw.shift) +
+						  tk->tkr_raw.xtime_nsec;
 		vdso_data->xtime_clock_sec	= tk->xtime_sec;
 		vdso_data->xtime_clock_nsec	= tk->tkr_mono.xtime_nsec;
-		/* tkr_raw.xtime_nsec == 0 */
 		vdso_data->cs_mono_mult		= tk->tkr_mono.mult;
 		vdso_data->cs_raw_mult		= tk->tkr_raw.mult;
 		/* tkr_mono.shift == tkr_raw.shift */
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index e00b4671bd7c..76320e920965 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -256,7 +256,6 @@ monotonic_raw:
 	seqcnt_check fail=monotonic_raw
 
 	/* All computations are done with left-shifted nsecs. */
-	lsl	x14, x14, x12
 	get_nsec_per_sec res=x9
 	lsl	x9, x9, x12
 
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index afd51bebb9c5..5d9810086c25 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -7,14 +7,13 @@ CFLAGS_arm.o := -I.
 CFLAGS_mmu.o := -I.
 
 KVM=../../../virt/kvm
-ARM=../../../arch/arm/kvm
 
 obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
 obj-$(CONFIG_KVM_ARM_HOST) += hyp/
 
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
 
 kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 839425c24b1c..3f9615582377 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -106,10 +106,13 @@ __do_hyp_init:
 	tlbi	alle2
 	dsb	sy
 
-	mrs	x4, sctlr_el2
-	and	x4, x4, #SCTLR_ELx_EE	// preserve endianness of EL2
-	ldr	x5, =SCTLR_ELx_FLAGS
-	orr	x4, x4, x5
+	/*
+	 * Preserve all the RES1 bits while setting the default flags,
+	 * as well as the EE bit on BE. Drop the A flag since the compiler
+	 * is allowed to generate unaligned accesses.
+	 */
+	ldr	x4, =(SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
+CPU_BE(	orr	x4, x4, #SCTLR_ELx_EE)
 	msr	sctlr_el2, x4
 	isb
 
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index aaf42ae8d8c3..14c4e3b14bcb 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -2,6 +2,8 @@
 # Makefile for Kernel-based Virtual Machine module, HYP part
 #
 
+ccflags-y += -fno-stack-protector
+
 KVM=../../../../virt/kvm
 
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index efbe9e8e7a78..0fe27024a2e1 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1529,8 +1529,8 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
 {
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
-	int Rt = (hsr >> 5) & 0xf;
-	int Rt2 = (hsr >> 10) & 0xf;
+	int Rt = kvm_vcpu_sys_get_rt(vcpu);
+	int Rt2 = (hsr >> 10) & 0x1f;
 
 	params.is_aarch32 = true;
 	params.is_32bit = false;
@@ -1586,7 +1586,7 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
 {
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
-	int Rt  = (hsr >> 5) & 0xf;
+	int Rt  = kvm_vcpu_sys_get_rt(vcpu);
 
 	params.is_aarch32 = true;
 	params.is_32bit = true;
@@ -1688,7 +1688,7 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	struct sys_reg_params params;
 	unsigned long esr = kvm_vcpu_get_hsr(vcpu);
-	int Rt = (esr >> 5) & 0x1f;
+	int Rt = kvm_vcpu_sys_get_rt(vcpu);
 	int ret;
 
 	trace_kvm_handle_sys_reg(esr);
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
index 79f37e37d367..6260b69e5622 100644
--- a/arch/arm64/kvm/vgic-sys-reg-v3.c
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c
@@ -65,8 +65,8 @@ static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		 * Here set VMCR.CTLR in ICC_CTLR_EL1 layout.
 		 * The vgic_set_vmcr() will convert to ICH_VMCR layout.
 		 */
-		vmcr.ctlr = val & ICC_CTLR_EL1_CBPR_MASK;
-		vmcr.ctlr |= val & ICC_CTLR_EL1_EOImode_MASK;
+		vmcr.cbpr = (val & ICC_CTLR_EL1_CBPR_MASK) >> ICC_CTLR_EL1_CBPR_SHIFT;
+		vmcr.eoim = (val & ICC_CTLR_EL1_EOImode_MASK) >> ICC_CTLR_EL1_EOImode_SHIFT;
 		vgic_set_vmcr(vcpu, &vmcr);
 	} else {
 		val = 0;
@@ -83,8 +83,8 @@ static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		 * The VMCR.CTLR value is in ICC_CTLR_EL1 layout.
 		 * Extract it directly using ICC_CTLR_EL1 reg definitions.
 		 */
-		val |= vmcr.ctlr & ICC_CTLR_EL1_CBPR_MASK;
-		val |= vmcr.ctlr & ICC_CTLR_EL1_EOImode_MASK;
+		val |= (vmcr.cbpr << ICC_CTLR_EL1_CBPR_SHIFT) & ICC_CTLR_EL1_CBPR_MASK;
+		val |= (vmcr.eoim << ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK;
 
 		p->regval = val;
 	}
@@ -135,7 +135,7 @@ static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		p->regval = 0;
 
 	vgic_get_vmcr(vcpu, &vmcr);
-	if (!((vmcr.ctlr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT)) {
+	if (!vmcr.cbpr) {
 		if (p->is_write) {
 			vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >>
 				     ICC_BPR1_EL1_SHIFT;
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index c6e53580aefe..c870d6f01ac2 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -36,6 +36,7 @@ int bpf_jit_enable __read_mostly;
 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
 #define TCALL_CNT (MAX_BPF_JIT_REG + 2)
+#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
 
 /* Map BPF registers to A64 registers */
 static const int bpf2a64[] = {
@@ -57,6 +58,7 @@ static const int bpf2a64[] = {
 	/* temporary registers for internal BPF JIT */
 	[TMP_REG_1] = A64_R(10),
 	[TMP_REG_2] = A64_R(11),
+	[TMP_REG_3] = A64_R(12),
 	/* tail_call_cnt */
 	[TCALL_CNT] = A64_R(26),
 	/* temporary register for blinding constants */
@@ -253,8 +255,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	 */
 	off = offsetof(struct bpf_array, ptrs);
 	emit_a64_mov_i64(tmp, off, ctx);
-	emit(A64_LDR64(tmp, r2, tmp), ctx);
-	emit(A64_LDR64(prg, tmp, r3), ctx);
+	emit(A64_ADD(1, tmp, r2, tmp), ctx);
+	emit(A64_LSL(1, prg, r3, 3), ctx);
+	emit(A64_LDR64(prg, tmp, prg), ctx);
 	emit(A64_CBZ(1, prg, jmp_offset), ctx);
 
 	/* goto *(prog->bpf_func + prologue_size); */
@@ -318,6 +321,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	const u8 src = bpf2a64[insn->src_reg];
 	const u8 tmp = bpf2a64[TMP_REG_1];
 	const u8 tmp2 = bpf2a64[TMP_REG_2];
+	const u8 tmp3 = bpf2a64[TMP_REG_3];
 	const s16 off = insn->off;
 	const s32 imm = insn->imm;
 	const int i = insn - ctx->prog->insnsi;
@@ -688,10 +692,10 @@ emit_cond_jmp:
 		emit(A64_PRFM(tmp, PST, L1, STRM), ctx);
 		emit(A64_LDXR(isdw, tmp2, tmp), ctx);
 		emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
-		emit(A64_STXR(isdw, tmp2, tmp, tmp2), ctx);
+		emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx);
 		jmp_offset = -3;
 		check_imm19(jmp_offset);
-		emit(A64_CBNZ(0, tmp2, jmp_offset), ctx);
+		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
 		break;
 
 	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h
index 85d4af97c986..dbdbb8a558df 100644
--- a/arch/blackfin/include/asm/processor.h
+++ b/arch/blackfin/include/asm/processor.h
@@ -75,11 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk)	(tsk->thread.pc)
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)							\
diff --git a/arch/blackfin/include/uapi/asm/Kbuild b/arch/blackfin/include/uapi/asm/Kbuild
index 0bd28f77abc3..b15bf6bc0e94 100644
--- a/arch/blackfin/include/uapi/asm/Kbuild
+++ b/arch/blackfin/include/uapi/asm/Kbuild
@@ -1,19 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += bfin_sport.h
-header-y += byteorder.h
-header-y += cachectl.h
-header-y += fcntl.h
-header-y += fixed_code.h
-header-y += ioctls.h
-header-y += kvm_para.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += stat.h
-header-y += swab.h
-header-y += unistd.h
diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h
index b9eb3da7f278..7c87b5be53b5 100644
--- a/arch/c6x/include/asm/processor.h
+++ b/arch/c6x/include/asm/processor.h
@@ -96,11 +96,6 @@ static inline void release_thread(struct task_struct *dead_task)
 #define release_segments(mm)		do { } while (0)
 
 /*
- * saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc)
-
-/*
  * saved kernel SP and DP of a blocked thread.
  */
 #ifdef _BIG_ENDIAN
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
index e9bc2b2b8147..13a97aa2285f 100644
--- a/arch/c6x/include/uapi/asm/Kbuild
+++ b/arch/c6x/include/uapi/asm/Kbuild
@@ -2,11 +2,3 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += kvm_para.h
-
-header-y += byteorder.h
-header-y += kvm_para.h
-header-y += ptrace.h
-header-y += setup.h
-header-y += sigcontext.h
-header-y += swab.h
-header-y += unistd.h
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index e299d30105b5..a2cdb1521aca 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -69,14 +69,6 @@ void hard_reset_now (void)
 	while(1) /* waiting for RETRIBUTION! */ ;
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return task_pt_regs(t)->irp;
-}
-
 /* setup the child's kernel stack with a pt_regs and switch_stack on it.
  * it will be un-nested during _resume and _ret_from_sys_call when the
  * new thread is scheduled.
diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index 2735eb7671a5..b7cd6b9209a9 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig
@@ -136,7 +136,6 @@ config ETRAX_NANDFLASH
 	bool "NAND flash support"
 	depends on ETRAX_ARCH_V32
 	select MTD_NAND
-	select MTD_NAND_IDS
 	help
 	  This option enables MTD mapping of NAND flash devices.  Needed to use
 	  NAND flash memories.  If unsure, say Y.
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index c530a8fa87ce..fe87b383fbf3 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -85,14 +85,6 @@ hard_reset_now(void)
 }
 
 /*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return task_pt_regs(t)->erp;
-}
-
-/*
  * Setup the child's kernel stack with a pt_regs and call switch_stack() on it.
  * It will be unnested during _resume and _ret_from_sys_call when the new thread
  * is scheduled.
diff --git a/arch/cris/boot/dts/include/dt-bindings b/arch/cris/boot/dts/include/dt-bindings
deleted file mode 120000
index 08c00e4972fa..000000000000
--- a/arch/cris/boot/dts/include/dt-bindings
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../include/dt-bindings
-\ No newline at end of file
diff --git a/arch/cris/include/arch-v10/arch/Kbuild b/arch/cris/include/arch-v10/arch/Kbuild
deleted file mode 100644
index 1f0fc7a66f5f..000000000000
--- a/arch/cris/include/arch-v10/arch/Kbuild
+++ /dev/null
@@ -1 +0,0 @@
-# CRISv10 arch
diff --git a/arch/cris/include/arch-v32/arch/Kbuild b/arch/cris/include/arch-v32/arch/Kbuild
deleted file mode 100644
index 2fd65c7e15c9..000000000000
--- a/arch/cris/include/arch-v32/arch/Kbuild
+++ /dev/null
@@ -1 +0,0 @@
-# CRISv32 arch
diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h
index 15b815df29c1..bc2729e4b2c9 100644
--- a/arch/cris/include/asm/processor.h
+++ b/arch/cris/include/asm/processor.h
@@ -52,8 +52,6 @@ unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_ESP(tsk)   ((tsk) == current ? rdusp() : (tsk)->thread.usp)
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 /* Free all resources held by a thread. */
 static inline void release_thread(struct task_struct *dead_task)
 {
diff --git a/arch/cris/include/uapi/arch-v10/arch/Kbuild b/arch/cris/include/uapi/arch-v10/arch/Kbuild
deleted file mode 100644
index 9048c87a782b..000000000000
--- a/arch/cris/include/uapi/arch-v10/arch/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-# UAPI Header export list
-header-y += sv_addr.agh
-header-y += sv_addr_ag.h
-header-y += svinto.h
-header-y += user.h
diff --git a/arch/cris/include/uapi/arch-v32/arch/Kbuild b/arch/cris/include/uapi/arch-v32/arch/Kbuild
deleted file mode 100644
index 59efffd16b61..000000000000
--- a/arch/cris/include/uapi/arch-v32/arch/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-# UAPI Header export list
-header-y += cryptocop.h
-header-y += user.h
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index d5564a0ae66a..b15bf6bc0e94 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -1,44 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += ../arch-v10/arch/
-header-y += ../arch-v32/arch/
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += elf.h
-header-y += elf_v10.h
-header-y += elf_v32.h
-header-y += errno.h
-header-y += ethernet.h
-header-y += etraxgpio.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += ptrace_v10.h
-header-y += ptrace_v32.h
-header-y += resource.h
-header-y += rs485.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += sync_serial.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h
index ddaeb9cc9143..e4d08d74ed9f 100644
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h
@@ -96,11 +96,6 @@ extern asmlinkage void *restore_user_regs(const struct user_context *target, ...
 #define release_segments(mm)		do { } while (0)
 #define forget_segments()		do { } while (0)
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	((tsk)->thread.frame0->pc)
diff --git a/arch/frv/include/asm/timex.h b/arch/frv/include/asm/timex.h
index a89bddefdacf..139093fab326 100644
--- a/arch/frv/include/asm/timex.h
+++ b/arch/frv/include/asm/timex.h
@@ -16,5 +16,11 @@ static inline cycles_t get_cycles(void)
 #define vxtime_lock()		do {} while (0)
 #define vxtime_unlock()		do {} while (0)
 
+/* This attribute is used in include/linux/jiffies.h alongside with
+ * __cacheline_aligned_in_smp. It is assumed that __cacheline_aligned_in_smp
+ * for frv does not contain another section specification.
+ */
+#define __jiffy_arch_data	__attribute__((__section__(".data")))
+
 #endif
 
diff --git a/arch/frv/include/uapi/asm/Kbuild b/arch/frv/include/uapi/asm/Kbuild
index 42a2b33461c0..b15bf6bc0e94 100644
--- a/arch/frv/include/uapi/asm/Kbuild
+++ b/arch/frv/include/uapi/asm/Kbuild
@@ -1,35 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += registers.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/frv/kernel/asm-offsets.c b/arch/frv/kernel/asm-offsets.c
index 8414293f213a..20c5b79b55f9 100644
--- a/arch/frv/kernel/asm-offsets.c
+++ b/arch/frv/kernel/asm-offsets.c
@@ -14,21 +14,10 @@
 #include <asm/thread_info.h>
 #include <asm/gdb-stub.h>
 
-#define DEF_PTREG(sym, reg) \
-        asm volatile("\n->" #sym " %0 offsetof(struct pt_regs, " #reg ")" \
-		     : : "i" (offsetof(struct pt_regs, reg)))
-
-#define DEF_IREG(sym, reg) \
-        asm volatile("\n->" #sym " %0 offsetof(struct user_context, " #reg ")" \
-		     : : "i" (offsetof(struct user_context, reg)))
-
-#define DEF_FREG(sym, reg) \
-        asm volatile("\n->" #sym " %0 offsetof(struct user_context, " #reg ")" \
-		     : : "i" (offsetof(struct user_context, reg)))
-
-#define DEF_0REG(sym, reg) \
-        asm volatile("\n->" #sym " %0 offsetof(struct frv_frame0, " #reg ")" \
-		     : : "i" (offsetof(struct frv_frame0, reg)))
+#define DEF_PTREG(sym, reg) OFFSET(sym, pt_regs, reg)
+#define DEF_IREG(sym, reg) OFFSET(sym, user_context, reg)
+#define DEF_FREG(sym, reg) OFFSET(sym, user_context, reg)
+#define DEF_0REG(sym, reg) OFFSET(sym, frv_frame0, reg)
 
 void foo(void)
 {
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 5a4c92abc99e..a957b374e3a6 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -198,15 +198,6 @@ unsigned long get_wchan(struct task_struct *p)
 	return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	/* Check whether the thread is blocked in resume() */
-	if (in_sched_functions(tsk->thread.pc))
-		return ((unsigned long *)tsk->thread.fp)[2];
-	else
-		return tsk->thread.pc;
-}
-
 int elf_check_arch(const struct elf32_hdr *hdr)
 {
 	unsigned long hsr0 = __get_HSR(0);
diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c
index da82c25301e7..46aa289c5102 100644
--- a/arch/frv/mm/elf-fdpic.c
+++ b/arch/frv/mm/elf-fdpic.c
@@ -75,7 +75,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(current->mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			goto success;
 	}
 
diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h
index 65132d7ae9e5..afa53147e66a 100644
--- a/arch/h8300/include/asm/processor.h
+++ b/arch/h8300/include/asm/processor.h
@@ -110,10 +110,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk);
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	\
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
index fb6101a5d4f1..b15bf6bc0e94 100644
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ b/arch/h8300/include/uapi/asm/Kbuild
@@ -1,30 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += siginfo.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/h8300/include/asm/bitsperlong.h b/arch/h8300/include/uapi/asm/bitsperlong.h
index e140e46729ac..34212608371e 100644
--- a/arch/h8300/include/asm/bitsperlong.h
+++ b/arch/h8300/include/uapi/asm/bitsperlong.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_H8300_BITS_PER_LONG
-#define __ASM_H8300_BITS_PER_LONG
+#ifndef _UAPI__ASM_H8300_BITS_PER_LONG
+#define _UAPI__ASM_H8300_BITS_PER_LONG
 
 #include <asm-generic/bitsperlong.h>
 
@@ -11,4 +11,4 @@ typedef long		__kernel_ssize_t;
 typedef long		__kernel_ptrdiff_t;
 #endif
 
-#endif /* __ASM_H8300_BITS_PER_LONG */
+#endif /* _UAPI__ASM_H8300_BITS_PER_LONG */
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 0f5db5bb561b..d1ddcabbbe83 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -129,11 +129,6 @@ int copy_thread(unsigned long clone_flags,
 	return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return ((struct pt_regs *)tsk->thread.esp0)->pc;
-}
-
 unsigned long get_wchan(struct task_struct *p)
 {
 	unsigned long fp, pc;
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index a2036bfda8af..6b45ef79eb8f 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -1,6 +1,3 @@
-
-header-y += ucontext.h
-
 generic-y += auxvec.h
 generic-y += barrier.h
 generic-y += bug.h
diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h
index 45a825402f63..ce67940860a5 100644
--- a/arch/hexagon/include/asm/processor.h
+++ b/arch/hexagon/include/asm/processor.h
@@ -33,9 +33,6 @@
 /*  task_struct, defined elsewhere, is the "process descriptor" */
 struct task_struct;
 
-/*  this is defined in arch/process.c  */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
 
 /*
diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild
index c31706c38631..b15bf6bc0e94 100644
--- a/arch/hexagon/include/uapi/asm/Kbuild
+++ b/arch/hexagon/include/uapi/asm/Kbuild
@@ -1,15 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += kvm_para.h
-header-y += param.h
-header-y += ptrace.h
-header-y += registers.h
-header-y += setup.h
-header-y += sigcontext.h
-header-y += signal.h
-header-y += swab.h
-header-y += unistd.h
-header-y += user.h
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index de715bab7956..656050c2e6a0 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -61,14 +61,6 @@ void arch_cpu_idle(void)
 }
 
 /*
- *  Return saved PC of a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return 0;
-}
-
-/*
  * Copy architecture-specific thread state
  */
 int copy_thread(unsigned long clone_flags, unsigned long usp,
diff --git a/arch/hexagon/mm/uaccess.c b/arch/hexagon/mm/uaccess.c
index ec90afdb3ad0..c599eb126c9e 100644
--- a/arch/hexagon/mm/uaccess.c
+++ b/arch/hexagon/mm/uaccess.c
@@ -37,15 +37,14 @@ __kernel_size_t __clear_user_hexagon(void __user *dest, unsigned long count)
 	long uncleared;
 
 	while (count > PAGE_SIZE) {
-		uncleared = __copy_to_user_hexagon(dest, &empty_zero_page,
-						PAGE_SIZE);
+		uncleared = raw_copy_to_user(dest, &empty_zero_page, PAGE_SIZE);
 		if (uncleared)
 			return count - (PAGE_SIZE - uncleared);
 		count -= PAGE_SIZE;
 		dest += PAGE_SIZE;
 	}
 	if (count)
-		count = __copy_to_user_hexagon(dest, &empty_zero_page, count);
+		count = raw_copy_to_user(dest, &empty_zero_page, count);
 
 	return count;
 }
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 26a63d69c599..ab982f07ea68 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -602,23 +602,6 @@ ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat)
 }
 
 /*
- * Return saved PC of a blocked thread.
- * Note that the only way T can block is through a call to schedule() -> switch_to().
- */
-static inline unsigned long
-thread_saved_pc (struct task_struct *t)
-{
-	struct unw_frame_info info;
-	unsigned long ip;
-
-	unw_init_from_blocked_task(&info, t);
-	if (unw_unwind(&info) < 0)
-		return 0;
-	unw_get_ip(&info, &ip);
-	return ip;
-}
-
-/*
  * Get the current instruction/program counter value.
  */
 #define current_text_addr() \
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild
index 891002bbb995..13a97aa2285f 100644
--- a/arch/ia64/include/uapi/asm/Kbuild
+++ b/arch/ia64/include/uapi/asm/Kbuild
@@ -2,48 +2,3 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += kvm_para.h
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += break.h
-header-y += byteorder.h
-header-y += cmpxchg.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += fpu.h
-header-y += gcc_intrin.h
-header-y += ia64regs.h
-header-y += intel_intrin.h
-header-y += intrinsics.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += perfmon.h
-header-y += perfmon_default_smpl.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += ptrace_offsets.h
-header-y += resource.h
-header-y += rse.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += ucontext.h
-header-y += unistd.h
-header-y += ustack.h
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 3686d6abafde..9edda5466020 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -50,32 +50,10 @@ CFLAGS_traps.o  += -mfixed-range=f2-f5,f16-f31
 # The gate DSO image is built using a special linker script.
 include $(src)/Makefile.gate
 
-# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config
-define sed-y
-	"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
-endef
-quiet_cmd_nr_irqs = GEN     $@
-define cmd_nr_irqs
-	(set -e; \
-	 echo "#ifndef __ASM_NR_IRQS_H__"; \
-	 echo "#define __ASM_NR_IRQS_H__"; \
-	 echo "/*"; \
-	 echo " * DO NOT MODIFY."; \
-	 echo " *"; \
-	 echo " * This file was generated by Kbuild"; \
-	 echo " *"; \
-	 echo " */"; \
-	 echo ""; \
-	 sed -ne $(sed-y) $<; \
-	 echo ""; \
-	 echo "#endif" ) > $@
-endef
-
 # We use internal kbuild rules to avoid the "is up to date" message from make
 arch/$(SRCARCH)/kernel/nr-irqs.s: arch/$(SRCARCH)/kernel/nr-irqs.c
 	$(Q)mkdir -p $(dir $@)
 	$(call if_changed_dep,cc_s_c)
 
-include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s
-	$(Q)mkdir -p $(dir $@)
-	$(call cmd,nr_irqs)
+include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s FORCE
+	$(call filechk,offsets,__ASM_NR_IRQS_H__)
diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate
index ceeffc509764..a32903ada016 100644
--- a/arch/ia64/kernel/Makefile.gate
+++ b/arch/ia64/kernel/Makefile.gate
@@ -6,7 +6,7 @@ extra-y += gate.so gate-syms.o gate.lds gate.o
 
 CPPFLAGS_gate.lds := -P -C -U$(ARCH)
 
-quiet_cmd_gate = GATE $@
+quiet_cmd_gate = GATE    $@
       cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
 
 GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h
index 5767367550c6..657874eeeccc 100644
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h
@@ -122,8 +122,6 @@ extern void release_thread(struct task_struct *);
 extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
 extern void release_segments(struct mm_struct * mm);
 
-extern unsigned long thread_saved_pc(struct task_struct *);
-
 /* Copy and release all segment info associated with a VM */
 #define copy_segments(p, mm)  do { } while (0)
 #define release_segments(mm)  do { } while (0)
diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild
index 43937a61d6cf..b15bf6bc0e94 100644
--- a/arch/m32r/include/uapi/asm/Kbuild
+++ b/arch/m32r/include/uapi/asm/Kbuild
@@ -1,33 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index d8ffcfec599c..8cd7e03f4370 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -39,14 +39,6 @@
 
 #include <linux/err.h>
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return tsk->thread.lr;
-}
-
 void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 531cb9eb3319..ddff1164aff0 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68020=y
@@ -349,7 +351,6 @@ CONFIG_SCSI_A4000T=y
 CONFIG_SCSI_ZORRO7XX=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
@@ -361,6 +362,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
 CONFIG_TARGET_CORE=m
 CONFIG_TCM_IBLOCK=m
 CONFIG_TCM_FILEIO=m
@@ -414,6 +416,7 @@ CONFIG_ZORRO8390=y
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
@@ -572,6 +575,8 @@ CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
@@ -590,6 +595,7 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index ca91d39555da..17384dc959a5 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -27,6 +27,8 @@ CONFIG_SUN_PARTITION=y
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68020=y
@@ -331,7 +333,6 @@ CONFIG_ISCSI_TCP=m
 CONFIG_ISCSI_BOOT_SYSFS=m
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
@@ -343,6 +344,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
 CONFIG_TARGET_CORE=m
 CONFIG_TCM_IBLOCK=m
 CONFIG_TCM_FILEIO=m
@@ -388,6 +390,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
@@ -531,6 +534,8 @@ CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
@@ -549,6 +554,7 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 23a3d8a691e2..53a641d62f85 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68020=y
@@ -340,7 +342,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m
 CONFIG_ATARI_SCSI=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
@@ -352,6 +353,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
 CONFIG_TARGET_CORE=m
 CONFIG_TCM_IBLOCK=m
 CONFIG_TCM_FILEIO=m
@@ -399,6 +401,7 @@ CONFIG_SMC91X=y
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
@@ -552,6 +555,8 @@ CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
@@ -570,6 +575,7 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 95deb95140fe..3925ae3a5eb3 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68040=y
@@ -330,7 +332,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m
 CONFIG_BVME6000_SCSI=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
@@ -342,6 +343,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
 CONFIG_TARGET_CORE=m
 CONFIG_TCM_IBLOCK=m
 CONFIG_TCM_FILEIO=m
@@ -387,6 +389,7 @@ CONFIG_BVME6000_NET=y
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
@@ -523,6 +526,8 @@ CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
@@ -541,6 +546,7 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index afae6958db2d..f4a134b390b4 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -27,6 +27,8 @@ CONFIG_SUN_PARTITION=y
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68020=y
@@ -331,7 +333,6 @@ CONFIG_ISCSI_TCP=m
 CONFIG_ISCSI_BOOT_SYSFS=m
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
@@ -343,6 +344,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
 CONFIG_TARGET_CORE=m
 CONFIG_TCM_IBLOCK=m
 CONFIG_TCM_FILEIO=m
@@ -389,6 +391,7 @@ CONFIG_HPLANCE=y
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
@@ -533,6 +536,8 @@ CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
@@ -551,6 +556,7 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index b010734729a7..9ed0cef632b7 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68020=y
@@ -340,7 +342,6 @@ CONFIG_MAC_SCSI=y
 CONFIG_SCSI_MAC_ESP=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
@@ -352,6 +353,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
 CONFIG_TARGET_CORE=m
 CONFIG_TCM_IBLOCK=m
 CONFIG_TCM_FILEIO=m
@@ -408,6 +410,7 @@ CONFIG_MAC8390=y
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
@@ -555,6 +558,8 @@ CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
@@ -573,6 +578,7 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 0e414549b235..efed0d48fd53 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -22,6 +22,8 @@ CONFIG_UNIXWARE_DISKLABEL=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68020=y
@@ -373,7 +375,6 @@ CONFIG_BVME6000_SCSI=y
 CONFIG_SUN3X_ESP=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
@@ -385,6 +386,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
 CONFIG_TARGET_CORE=m
 CONFIG_TCM_IBLOCK=m
 CONFIG_TCM_FILEIO=m
@@ -454,6 +456,7 @@ CONFIG_SMC91X=y
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 CONFIG_PLIP=m
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -635,6 +638,8 @@ CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
@@ -653,6 +658,7 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index b2e687a0ec3d..9040457c7f9c 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68030=y
@@ -329,7 +331,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m
 CONFIG_MVME147_SCSI=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
@@ -341,6 +342,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
 CONFIG_TARGET_CORE=m
 CONFIG_TCM_IBLOCK=m
 CONFIG_TCM_FILEIO=m
@@ -387,6 +389,7 @@ CONFIG_MVME147_NET=y
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
@@ -523,6 +526,8 @@ CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
@@ -541,6 +546,7 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index cbd8ee24d1bc..8b17f00e0484 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68040=y
@@ -330,7 +332,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m
 CONFIG_MVME16x_SCSI=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
@@ -342,6 +343,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
 CONFIG_TARGET_CORE=m
 CONFIG_TCM_IBLOCK=m
 CONFIG_TCM_FILEIO=m
@@ -387,6 +389,7 @@ CONFIG_MVME16x_NET=y
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
@@ -523,6 +526,8 @@ CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
@@ -541,6 +546,7 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 1e82cc944339..5f3718c62c85 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -27,6 +27,8 @@ CONFIG_SUN_PARTITION=y
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68040=y
@@ -336,7 +338,6 @@ CONFIG_ISCSI_TCP=m
 CONFIG_ISCSI_BOOT_SYSFS=m
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
@@ -348,6 +349,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
 CONFIG_TARGET_CORE=m
 CONFIG_TCM_IBLOCK=m
 CONFIG_TCM_FILEIO=m
@@ -398,6 +400,7 @@ CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 CONFIG_PLIP=m
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -546,6 +549,8 @@ CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
@@ -564,6 +569,7 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index f9e77f57a972..8c979a68fca5 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -26,6 +26,8 @@ CONFIG_UNIXWARE_DISKLABEL=y
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_SUN3=y
@@ -327,7 +329,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m
 CONFIG_SUN3_SCSI=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
@@ -339,6 +340,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
 CONFIG_TARGET_CORE=m
 CONFIG_TCM_IBLOCK=m
 CONFIG_TCM_FILEIO=m
@@ -385,6 +387,7 @@ CONFIG_SUN3_82586=y
 # CONFIG_NET_VENDOR_SUN is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
@@ -525,6 +528,8 @@ CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
@@ -542,6 +547,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 3c394fcfb368..a1e79530e806 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -26,6 +26,8 @@ CONFIG_UNIXWARE_DISKLABEL=y
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_SUN3X=y
@@ -327,7 +329,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m
 CONFIG_SUN3X_ESP=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
@@ -339,6 +340,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
 CONFIG_TARGET_CORE=m
 CONFIG_TCM_IBLOCK=m
 CONFIG_TCM_FILEIO=m
@@ -385,6 +387,7 @@ CONFIG_SUN3LANCE=y
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
@@ -525,6 +528,8 @@ CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
@@ -543,6 +548,7 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index 77239e81379b..94c36030440c 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -130,8 +130,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	\
diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
index 8c8ce5e1ee0e..3bc64d02ba5f 100644
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -62,9 +62,4 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)
 
 #endif /* !CONFIG_CPU_HAS_NO_BITFIELDS */
 
-#ifndef __uClinux__
-extern void ptrace_signal_deliver(void);
-#define ptrace_signal_deliver ptrace_signal_deliver
-#endif /* __uClinux__ */
-
 #endif /* _M68K_SIGNAL_H */
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
index 6a2d257bdfb2..64368077235a 100644
--- a/arch/m68k/include/uapi/asm/Kbuild
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -9,27 +9,3 @@ generic-y += socket.h
 generic-y += sockios.h
 generic-y += termbits.h
 generic-y += termios.h
-
-header-y += a.out.h
-header-y += bootinfo.h
-header-y += bootinfo-amiga.h
-header-y += bootinfo-apollo.h
-header-y += bootinfo-atari.h
-header-y += bootinfo-hp300.h
-header-y += bootinfo-mac.h
-header-y += bootinfo-q40.h
-header-y += bootinfo-vme.h
-header-y += byteorder.h
-header-y += cachectl.h
-header-y += fcntl.h
-header-y += ioctls.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += setup.h
-header-y += sigcontext.h
-header-y += signal.h
-header-y += stat.h
-header-y += swab.h
-header-y += unistd.h
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index e475c945c8b2..7df92f8b0781 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -40,20 +40,6 @@
 asmlinkage void ret_from_fork(void);
 asmlinkage void ret_from_kernel_thread(void);
 
-
-/*
- * Return saved PC from a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp;
-	/* Check whether the thread is blocked in resume() */
-	if (in_sched_functions(sw->retpc))
-		return ((unsigned long *)sw->a6)[1];
-	else
-		return sw->retpc;
-}
-
 void arch_cpu_idle(void)
 {
 #if defined(MACH_ATARI_ONLY)
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 6f945bb5ffbd..e79421f5b9cd 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -109,22 +109,6 @@ int fixup_exception(struct pt_regs *regs)
 	return 1;
 }
 
-void ptrace_signal_deliver(void)
-{
-	struct pt_regs *regs = signal_pt_regs();
-	if (regs->orig_d0 < 0)
-		return;
-	switch (regs->d0) {
-	case -ERESTARTNOHAND:
-	case -ERESTARTSYS:
-	case -ERESTARTNOINTR:
-		regs->d0 = regs->orig_d0;
-		regs->orig_d0 = -1;
-		regs->pc -= 2;
-		break;
-	}
-}
-
 static inline void push_cache (unsigned long vaddr)
 {
 	/*
diff --git a/arch/metag/boot/dts/include/dt-bindings b/arch/metag/boot/dts/include/dt-bindings
deleted file mode 120000
index 08c00e4972fa..000000000000
--- a/arch/metag/boot/dts/include/dt-bindings
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../include/dt-bindings
-\ No newline at end of file
diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h
index 5ebc2850690e..9c8fbf8fb5aa 100644
--- a/arch/metag/include/asm/uaccess.h
+++ b/arch/metag/include/asm/uaccess.h
@@ -24,24 +24,32 @@
 
 #define segment_eq(a, b)	((a).seg == (b).seg)
 
-#define __kernel_ok (uaccess_kernel())
-/*
- * Explicitly allow NULL pointers here. Parts of the kernel such
- * as readv/writev use access_ok to validate pointers, but want
- * to allow NULL pointers for various reasons. NULL pointers are
- * safe to allow through because the first page is not mappable on
- * Meta.
- *
- * We also wish to avoid letting user code access the system area
- * and the kernel half of the address space.
- */
-#define __user_bad(addr, size) (((addr) > 0 && (addr) < META_MEMORY_BASE) || \
-				((addr) > PAGE_OFFSET &&		\
-				 (addr) < LINCORE_BASE))
-
 static inline int __access_ok(unsigned long addr, unsigned long size)
 {
-	return __kernel_ok || !__user_bad(addr, size);
+	/*
+	 * Allow access to the user mapped memory area, but not the system area
+	 * before it. The check extends to the top of the address space when
+	 * kernel access is allowed (there's no real reason to user copy to the
+	 * system area in any case).
+	 */
+	if (likely(addr >= META_MEMORY_BASE && addr < get_fs().seg &&
+		   size <= get_fs().seg - addr))
+		return true;
+	/*
+	 * Explicitly allow NULL pointers here. Parts of the kernel such
+	 * as readv/writev use access_ok to validate pointers, but want
+	 * to allow NULL pointers for various reasons. NULL pointers are
+	 * safe to allow through because the first page is not mappable on
+	 * Meta.
+	 */
+	if (!addr)
+		return true;
+	/* Allow access to core code memory area... */
+	if (addr >= LINCORE_CODE_BASE && addr <= LINCORE_CODE_LIMIT &&
+	    size <= LINCORE_CODE_LIMIT + 1 - addr)
+		return true;
+	/* ... but no other areas. */
+	return false;
 }
 
 #define access_ok(type, addr, size) __access_ok((unsigned long)(addr),	\
@@ -113,7 +121,8 @@ extern long __get_user_bad(void);
 
 #define __get_user_nocheck(x, ptr, size)			\
 ({                                                              \
-	long __gu_err, __gu_val;                                \
+	long __gu_err;						\
+	long long __gu_val;					\
 	__get_user_size(__gu_val, (ptr), (size), __gu_err);	\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;             \
 	__gu_err;                                               \
@@ -121,7 +130,8 @@ extern long __get_user_bad(void);
 
 #define __get_user_check(x, ptr, size)					\
 ({                                                                      \
-	long __gu_err = -EFAULT, __gu_val = 0;                          \
+	long __gu_err = -EFAULT;					\
+	long long __gu_val = 0;						\
 	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
 	if (access_ok(VERIFY_READ, __gu_addr, size))			\
 		__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
@@ -132,6 +142,7 @@ extern long __get_user_bad(void);
 extern unsigned char __get_user_asm_b(const void __user *addr, long *err);
 extern unsigned short __get_user_asm_w(const void __user *addr, long *err);
 extern unsigned int __get_user_asm_d(const void __user *addr, long *err);
+extern unsigned long long __get_user_asm_l(const void __user *addr, long *err);
 
 #define __get_user_size(x, ptr, size, retval)			\
 do {                                                            \
@@ -143,6 +154,8 @@ do {                                                            \
 		x = __get_user_asm_w(ptr, &retval); break;	\
 	case 4:							\
 		x = __get_user_asm_d(ptr, &retval); break;	\
+	case 8:							\
+		x = __get_user_asm_l(ptr, &retval); break;	\
 	default:						\
 		(x) = __get_user_bad();				\
 	}                                                       \
@@ -161,8 +174,13 @@ do {                                                            \
 extern long __must_check __strncpy_from_user(char *dst, const char __user *src,
 					     long count);
 
-#define strncpy_from_user(dst, src, count) __strncpy_from_user(dst, src, count)
-
+static inline long
+strncpy_from_user(char *dst, const char __user *src, long count)
+{
+	if (!access_ok(VERIFY_READ, src, 1))
+		return -EFAULT;
+	return __strncpy_from_user(dst, src, count);
+}
 /*
  * Return the size of a string (including the ending 0)
  *
diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild
index ab78be2b6eb0..b29731ebd7a9 100644
--- a/arch/metag/include/uapi/asm/Kbuild
+++ b/arch/metag/include/uapi/asm/Kbuild
@@ -1,14 +1,6 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += byteorder.h
-header-y += ech.h
-header-y += ptrace.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += swab.h
-header-y += unistd.h
-
 generic-y += mman.h
 generic-y += resource.h
 generic-y += setup.h
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index 232a12bf3f99..2dbbb7c66043 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -567,8 +567,7 @@ static void stop_this_cpu(void *data)
 {
 	unsigned int cpu = smp_processor_id();
 
-	if (system_state == SYSTEM_BOOTING ||
-	    system_state == SYSTEM_RUNNING) {
+	if (system_state <= SYSTEM_RUNNING) {
 		spin_lock(&stop_lock);
 		pr_crit("CPU%u: stopping\n", cpu);
 		dump_stack();
diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c
index e8a4ea83cabb..c941abdb8f85 100644
--- a/arch/metag/lib/usercopy.c
+++ b/arch/metag/lib/usercopy.c
@@ -246,65 +246,47 @@
 #define __asm_copy_user_64bit_rapf_loop(				\
 		to, from, ret, n, id, FIXUP)				\
 	asm volatile (							\
-		".balign 8\n"						\
-		"MOV	RAPF, %1\n"					\
-		"MSETL	[A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n"	\
-		"MOV	D0Ar6, #0\n"					\
-		"LSR	D1Ar5, %3, #6\n"				\
-		"SUB	TXRPT, D1Ar5, #2\n"				\
-		"MOV	RAPF, %1\n"					\
+			".balign 8\n"					\
+		"	MOV	RAPF, %1\n"				\
+		"	MSETL	[A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n" \
+		"	MOV	D0Ar6, #0\n"				\
+		"	LSR	D1Ar5, %3, #6\n"			\
+		"	SUB	TXRPT, D1Ar5, #2\n"			\
+		"	MOV	RAPF, %1\n"				\
 		"$Lloop"id":\n"						\
-		"ADD	RAPF, %1, #64\n"				\
-		"21:\n"							\
-		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"22:\n"							\
-		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"23:\n"							\
-		"SUB	%3, %3, #32\n"					\
-		"24:\n"							\
-		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"25:\n"							\
-		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"26:\n"							\
-		"SUB	%3, %3, #32\n"					\
-		"DCACHE	[%1+#-64], D0Ar6\n"				\
-		"BR	$Lloop"id"\n"					\
+		"	ADD	RAPF, %1, #64\n"			\
+		"21:	MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"22:	MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"23:	SUB	%3, %3, #32\n"				\
+		"24:	MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"25:	MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"26:	SUB	%3, %3, #32\n"				\
+		"	DCACHE	[%1+#-64], D0Ar6\n"			\
+		"	BR	$Lloop"id"\n"				\
 									\
-		"MOV	RAPF, %1\n"					\
-		"27:\n"							\
-		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"28:\n"							\
-		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"29:\n"							\
-		"SUB	%3, %3, #32\n"					\
-		"30:\n"							\
-		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"31:\n"							\
-		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"32:\n"							\
-		"SUB	%0, %0, #8\n"					\
-		"33:\n"							\
-		"SETL	[%0++], D0.7, D1.7\n"				\
-		"SUB	%3, %3, #32\n"					\
-		"1:"							\
-		"DCACHE	[%1+#-64], D0Ar6\n"				\
-		"GETL    D0Ar6, D1Ar5, [A0StP+#-40]\n"			\
-		"GETL    D0FrT, D1RtP, [A0StP+#-32]\n"			\
-		"GETL    D0.5, D1.5, [A0StP+#-24]\n"			\
-		"GETL    D0.6, D1.6, [A0StP+#-16]\n"			\
-		"GETL    D0.7, D1.7, [A0StP+#-8]\n"			\
-		"SUB A0StP, A0StP, #40\n"				\
+		"	MOV	RAPF, %1\n"				\
+		"27:	MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"28:	MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"29:	SUB	%3, %3, #32\n"				\
+		"30:	MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"31:	MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"32:	SETL	[%0+#-8], D0.7, D1.7\n"			\
+		"	SUB	%3, %3, #32\n"				\
+		"1:	DCACHE	[%1+#-64], D0Ar6\n"			\
+		"	GETL	D0Ar6, D1Ar5, [A0StP+#-40]\n"		\
+		"	GETL	D0FrT, D1RtP, [A0StP+#-32]\n"		\
+		"	GETL	D0.5, D1.5, [A0StP+#-24]\n"		\
+		"	GETL	D0.6, D1.6, [A0StP+#-16]\n"		\
+		"	GETL	D0.7, D1.7, [A0StP+#-8]\n"		\
+		"	SUB	A0StP, A0StP, #40\n"			\
 		"	.section .fixup,\"ax\"\n"			\
-		"4:\n"							\
-		"	ADD	%0, %0, #8\n"				\
-		"3:\n"							\
-		"	MOV	D0Ar2, TXSTATUS\n"			\
+		"3:	MOV	D0Ar2, TXSTATUS\n"			\
 		"	MOV	D1Ar1, TXSTATUS\n"			\
 		"	AND	D1Ar1, D1Ar1, #0xFFFFF8FF\n"		\
 		"	MOV	TXSTATUS, D1Ar1\n"			\
 			FIXUP						\
-		"	MOVT    D0Ar2,#HI(1b)\n"			\
-		"	JUMP    D0Ar2,#LO(1b)\n"			\
+		"	MOVT	D0Ar2, #HI(1b)\n"			\
+		"	JUMP	D0Ar2, #LO(1b)\n"			\
 		"	.previous\n"					\
 		"	.section __ex_table,\"a\"\n"			\
 		"	.long 21b,3b\n"					\
@@ -319,7 +301,6 @@
 		"	.long 30b,3b\n"					\
 		"	.long 31b,3b\n"					\
 		"	.long 32b,3b\n"					\
-		"	.long 33b,4b\n"					\
 		"	.previous\n"					\
 		: "=r" (to), "=r" (from), "=r" (ret), "=d" (n)		\
 		: "0" (to), "1" (from), "2" (ret), "3" (n)		\
@@ -397,89 +378,59 @@
 #define __asm_copy_user_32bit_rapf_loop(				\
 			to,	from, ret, n, id, FIXUP)		\
 	asm volatile (							\
-		".balign 8\n"						\
-		"MOV	RAPF, %1\n"					\
-		"MSETL	[A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n"	\
-		"MOV	D0Ar6, #0\n"					\
-		"LSR	D1Ar5, %3, #6\n"				\
-		"SUB	TXRPT, D1Ar5, #2\n"				\
-		"MOV	RAPF, %1\n"					\
-	"$Lloop"id":\n"							\
-		"ADD	RAPF, %1, #64\n"				\
-		"21:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"22:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"23:\n"							\
-		"SUB	%3, %3, #16\n"					\
-		"24:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"25:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"26:\n"							\
-		"SUB	%3, %3, #16\n"					\
-		"27:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"28:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"29:\n"							\
-		"SUB	%3, %3, #16\n"					\
-		"30:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"31:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"32:\n"							\
-		"SUB	%3, %3, #16\n"					\
-		"DCACHE	[%1+#-64], D0Ar6\n"				\
-		"BR	$Lloop"id"\n"					\
+			".balign 8\n"					\
+		"	MOV	RAPF, %1\n"				\
+		"	MSETL	[A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n" \
+		"	MOV	D0Ar6, #0\n"				\
+		"	LSR	D1Ar5, %3, #6\n"			\
+		"	SUB	TXRPT, D1Ar5, #2\n"			\
+		"	MOV	RAPF, %1\n"				\
+		"$Lloop"id":\n"						\
+		"	ADD	RAPF, %1, #64\n"			\
+		"21:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"22:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"23:	SUB	%3, %3, #16\n"				\
+		"24:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"25:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"26:	SUB	%3, %3, #16\n"				\
+		"27:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"28:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"29:	SUB	%3, %3, #16\n"				\
+		"30:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"31:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"32:	SUB	%3, %3, #16\n"				\
+		"	DCACHE	[%1+#-64], D0Ar6\n"			\
+		"	BR	$Lloop"id"\n"				\
 									\
-		"MOV	RAPF, %1\n"					\
-		"33:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"34:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"35:\n"							\
-		"SUB	%3, %3, #16\n"					\
-		"36:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"37:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"38:\n"							\
-		"SUB	%3, %3, #16\n"					\
-		"39:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"40:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"41:\n"							\
-		"SUB	%3, %3, #16\n"					\
-		"42:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"43:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"44:\n"							\
-		"SUB	%0, %0, #4\n"					\
-		"45:\n"							\
-		"SETD	[%0++], D0.7\n"					\
-		"SUB	%3, %3, #16\n"					\
-		"1:"							\
-		"DCACHE	[%1+#-64], D0Ar6\n"				\
-		"GETL    D0Ar6, D1Ar5, [A0StP+#-40]\n"			\
-		"GETL    D0FrT, D1RtP, [A0StP+#-32]\n"			\
-		"GETL    D0.5, D1.5, [A0StP+#-24]\n"			\
-		"GETL    D0.6, D1.6, [A0StP+#-16]\n"			\
-		"GETL    D0.7, D1.7, [A0StP+#-8]\n"			\
-		"SUB A0StP, A0StP, #40\n"				\
+		"	MOV	RAPF, %1\n"				\
+		"33:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"34:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"35:	SUB	%3, %3, #16\n"				\
+		"36:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"37:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"38:	SUB	%3, %3, #16\n"				\
+		"39:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"40:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"41:	SUB	%3, %3, #16\n"				\
+		"42:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"43:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"44:	SETD	[%0+#-4], D0.7\n"			\
+		"	SUB	%3, %3, #16\n"				\
+		"1:	DCACHE	[%1+#-64], D0Ar6\n"			\
+		"	GETL	D0Ar6, D1Ar5, [A0StP+#-40]\n"		\
+		"	GETL	D0FrT, D1RtP, [A0StP+#-32]\n"		\
+		"	GETL	D0.5, D1.5, [A0StP+#-24]\n"		\
+		"	GETL	D0.6, D1.6, [A0StP+#-16]\n"		\
+		"	GETL	D0.7, D1.7, [A0StP+#-8]\n"		\
+		"	SUB A0StP, A0StP, #40\n"			\
 		"	.section .fixup,\"ax\"\n"			\
-		"4:\n"							\
-		"	ADD		%0, %0, #4\n"			\
-		"3:\n"							\
-		"	MOV	D0Ar2, TXSTATUS\n"			\
+		"3:	MOV	D0Ar2, TXSTATUS\n"			\
 		"	MOV	D1Ar1, TXSTATUS\n"			\
 		"	AND	D1Ar1, D1Ar1, #0xFFFFF8FF\n"		\
 		"	MOV	TXSTATUS, D1Ar1\n"			\
 			FIXUP						\
-		"	MOVT    D0Ar2,#HI(1b)\n"			\
-		"	JUMP    D0Ar2,#LO(1b)\n"			\
+		"	MOVT	D0Ar2, #HI(1b)\n"			\
+		"	JUMP	D0Ar2, #LO(1b)\n"			\
 		"	.previous\n"					\
 		"	.section __ex_table,\"a\"\n"			\
 		"	.long 21b,3b\n"					\
@@ -506,7 +457,6 @@
 		"	.long 42b,3b\n"					\
 		"	.long 43b,3b\n"					\
 		"	.long 44b,3b\n"					\
-		"	.long 45b,4b\n"					\
 		"	.previous\n"					\
 		: "=r" (to), "=r" (from), "=r" (ret), "=d" (n)		\
 		: "0" (to), "1" (from), "2" (ret), "3" (n)		\
@@ -1094,6 +1044,30 @@ unsigned int __get_user_asm_d(const void __user *addr, long *err)
 }
 EXPORT_SYMBOL(__get_user_asm_d);
 
+unsigned long long __get_user_asm_l(const void __user *addr, long *err)
+{
+	register unsigned long long x asm ("D0Re0") = 0;
+	asm volatile (
+		"	GETL %0,%t0,[%2]\n"
+		"1:\n"
+		"	GETL %0,%t0,[%2]\n"
+		"2:\n"
+		"	.section .fixup,\"ax\"\n"
+		"3:	MOV     D0FrT,%3\n"
+		"	SETD    [%1],D0FrT\n"
+		"	MOVT    D0FrT,#HI(2b)\n"
+		"	JUMP    D0FrT,#LO(2b)\n"
+		"	.previous\n"
+		"	.section __ex_table,\"a\"\n"
+		"	.long 1b,3b\n"
+		"	.previous\n"
+		: "=r" (x)
+		: "r" (err), "r" (addr), "P" (-EFAULT)
+		: "D0FrT");
+	return x;
+}
+EXPORT_SYMBOL(__get_user_asm_l);
+
 long __put_user_asm_b(unsigned int x, void __user *addr)
 {
 	register unsigned int err asm ("D0Re0") = 0;
diff --git a/arch/metag/mm/mmu-meta1.c b/arch/metag/mm/mmu-meta1.c
index 91f4255bcb5c..62ebab90924d 100644
--- a/arch/metag/mm/mmu-meta1.c
+++ b/arch/metag/mm/mmu-meta1.c
@@ -152,6 +152,5 @@ void __init mmu_init(unsigned long mem_end)
 
 		p_swapper_pg_dir++;
 		addr += PGDIR_SIZE;
-		entry++;
 	}
 }
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index dc5dd5b69fde..92fd4e95b488 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -1,8 +1,6 @@
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
 CONFIG_AUDIT=y
-CONFIG_AUDIT_LOGINUID_IMMUTABLE=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_SYSFS_DEPRECATED=y
@@ -33,10 +31,12 @@ CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
-# CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
+CONFIG_BRIDGE=m
 CONFIG_MTD=y
-CONFIG_PROC_DEVICETREE=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
 CONFIG_NETDEVICES=y
@@ -47,9 +47,9 @@ CONFIG_XILINX_LL_TEMAC=y
 # CONFIG_VT is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_UARTLITE=y
 CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_XILINX_HWICAP=y
 CONFIG_I2C=y
@@ -66,7 +66,6 @@ CONFIG_FB=y
 CONFIG_FB_XILINX=y
 # CONFIG_USB_SUPPORT is not set
 CONFIG_UIO=y
-CONFIG_UIO_PDRV=y
 CONFIG_UIO_PDRV_GENIRQ=y
 CONFIG_UIO_DMEM_GENIRQ=y
 CONFIG_EXT2_FS=y
@@ -77,14 +76,13 @@ CONFIG_NFS_FS=y
 CONFIG_CIFS=y
 CONFIG_CIFS_STATS=y
 CONFIG_CIFS_STATS2=y
-CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_SLAB=y
+CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_INFO=y
 CONFIG_KGDB=y
 CONFIG_KGDB_TESTS=y
 CONFIG_KGDB_KDB=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_KEYS=y
 CONFIG_ENCRYPTED_KEYS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig
index 4cdaf565e638..06d69a6e192d 100644
--- a/arch/microblaze/configs/nommu_defconfig
+++ b/arch/microblaze/configs/nommu_defconfig
@@ -1,9 +1,6 @@
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
 CONFIG_AUDIT=y
-CONFIG_AUDIT_LOGINUID_IMMUTABLE=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
 CONFIG_IKCONFIG=y
@@ -34,18 +31,15 @@ CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
-# CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_UCLINUX=y
-CONFIG_PROC_DEVICETREE=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
 CONFIG_NETDEVICES=y
@@ -56,9 +50,9 @@ CONFIG_XILINX_LL_TEMAC=y
 # CONFIG_VT is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_UARTLITE=y
 CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_XILINX_HWICAP=y
 CONFIG_I2C=y
@@ -74,10 +68,6 @@ CONFIG_XILINX_WATCHDOG=y
 CONFIG_FB=y
 CONFIG_FB_XILINX=y
 # CONFIG_USB_SUPPORT is not set
-CONFIG_UIO=y
-CONFIG_UIO_PDRV=y
-CONFIG_UIO_PDRV_GENIRQ=y
-CONFIG_UIO_DMEM_GENIRQ=y
 CONFIG_EXT2_FS=y
 # CONFIG_DNOTIFY is not set
 CONFIG_CRAMFS=y
@@ -85,10 +75,10 @@ CONFIG_ROMFS_FS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NLS=y
-CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_SLAB=y
+CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_INFO=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_KEYS=y
 CONFIG_ENCRYPTED_KEYS=y
@@ -97,4 +87,3 @@ CONFIG_CRYPTO_MD4=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_ARC4=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 56830ff65333..83a4ef3a2495 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -1,14 +1,57 @@
 
 generic-y += barrier.h
+generic-y += bitops.h
+generic-y += bitsperlong.h
+generic-y += bug.h
+generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += device.h
+generic-y += div64.h
+generic-y += emergency-restart.h
+generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
+generic-y += fb.h
+generic-y += fcntl.h
+generic-y += hardirq.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += irq_regs.h
 generic-y += irq_work.h
+generic-y += kdebug.h
+generic-y += kmap_types.h
+generic-y += kprobes.h
+generic-y += linkage.h
+generic-y += local.h
+generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += parport.h
+generic-y += percpu.h
+generic-y += poll.h
 generic-y += preempt.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += serial.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += siginfo.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += swab.h
 generic-y += syscalls.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += topology.h
 generic-y += trace_clock.h
+generic-y += ucontext.h
+generic-y += vga.h
 generic-y += word-at-a-time.h
-generic-y += kprobes.h
+generic-y += xor.h
diff --git a/arch/microblaze/include/asm/bitops.h b/arch/microblaze/include/asm/bitops.h
deleted file mode 100644
index a72468f15c8b..000000000000
--- a/arch/microblaze/include/asm/bitops.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bitops.h>
diff --git a/arch/microblaze/include/asm/bug.h b/arch/microblaze/include/asm/bug.h
deleted file mode 100644
index b12fd89e42e9..000000000000
--- a/arch/microblaze/include/asm/bug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bug.h>
diff --git a/arch/microblaze/include/asm/bugs.h b/arch/microblaze/include/asm/bugs.h
deleted file mode 100644
index 61791e1ad9f5..000000000000
--- a/arch/microblaze/include/asm/bugs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bugs.h>
diff --git a/arch/microblaze/include/asm/div64.h b/arch/microblaze/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/arch/microblaze/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/arch/microblaze/include/asm/emergency-restart.h b/arch/microblaze/include/asm/emergency-restart.h
deleted file mode 100644
index 3711bd9d50bd..000000000000
--- a/arch/microblaze/include/asm/emergency-restart.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/emergency-restart.h>
diff --git a/arch/microblaze/include/asm/fb.h b/arch/microblaze/include/asm/fb.h
deleted file mode 100644
index 3a4988e8df45..000000000000
--- a/arch/microblaze/include/asm/fb.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fb.h>
diff --git a/arch/microblaze/include/asm/hardirq.h b/arch/microblaze/include/asm/hardirq.h
deleted file mode 100644
index fb3c05a0cbbf..000000000000
--- a/arch/microblaze/include/asm/hardirq.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/hardirq.h>
diff --git a/arch/microblaze/include/asm/irq_regs.h b/arch/microblaze/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/microblaze/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/microblaze/include/asm/kdebug.h b/arch/microblaze/include/asm/kdebug.h
deleted file mode 100644
index 6ece1b037665..000000000000
--- a/arch/microblaze/include/asm/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
diff --git a/arch/microblaze/include/asm/kmap_types.h b/arch/microblaze/include/asm/kmap_types.h
deleted file mode 100644
index 25975252d83d..000000000000
--- a/arch/microblaze/include/asm/kmap_types.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_MICROBLAZE_KMAP_TYPES_H
-#define _ASM_MICROBLAZE_KMAP_TYPES_H
-
-#include <asm-generic/kmap_types.h>
-
-#endif /* _ASM_MICROBLAZE_KMAP_TYPES_H */
diff --git a/arch/microblaze/include/asm/linkage.h b/arch/microblaze/include/asm/linkage.h
deleted file mode 100644
index 0540bbaad897..000000000000
--- a/arch/microblaze/include/asm/linkage.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/linkage.h>
diff --git a/arch/microblaze/include/asm/local.h b/arch/microblaze/include/asm/local.h
deleted file mode 100644
index c11c530f74d0..000000000000
--- a/arch/microblaze/include/asm/local.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local.h>
diff --git a/arch/microblaze/include/asm/local64.h b/arch/microblaze/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/microblaze/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/microblaze/include/asm/parport.h b/arch/microblaze/include/asm/parport.h
deleted file mode 100644
index cf252af64590..000000000000
--- a/arch/microblaze/include/asm/parport.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/parport.h>
diff --git a/arch/microblaze/include/asm/percpu.h b/arch/microblaze/include/asm/percpu.h
deleted file mode 100644
index 06a959d67234..000000000000
--- a/arch/microblaze/include/asm/percpu.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/percpu.h>
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index 37ef196e4519..330d556860ba 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -69,8 +69,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 # define KSTK_EIP(tsk)	(0)
@@ -121,10 +119,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/* Return saved (kernel) PC of a blocked thread.  */
-#  define thread_saved_pc(tsk)	\
-	((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0)
-
 unsigned long get_wchan(struct task_struct *p);
 
 /* The size allocated for kernel stacks. This _must_ be a power of two! */
diff --git a/arch/microblaze/include/asm/serial.h b/arch/microblaze/include/asm/serial.h
deleted file mode 100644
index a0cb0caff152..000000000000
--- a/arch/microblaze/include/asm/serial.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/serial.h>
diff --git a/arch/microblaze/include/asm/shmparam.h b/arch/microblaze/include/asm/shmparam.h
deleted file mode 100644
index 93f30deb95d0..000000000000
--- a/arch/microblaze/include/asm/shmparam.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/shmparam.h>
diff --git a/arch/microblaze/include/asm/topology.h b/arch/microblaze/include/asm/topology.h
deleted file mode 100644
index 5428f333a02c..000000000000
--- a/arch/microblaze/include/asm/topology.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/topology.h>
diff --git a/arch/microblaze/include/asm/ucontext.h b/arch/microblaze/include/asm/ucontext.h
deleted file mode 100644
index 9bc07b9f30fb..000000000000
--- a/arch/microblaze/include/asm/ucontext.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ucontext.h>
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 032fed71223f..9774e1d9507b 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -38,6 +38,6 @@
 
 #endif /* __ASSEMBLY__ */
 
-#define __NR_syscalls         398
+#define __NR_syscalls         399
 
 #endif /* _ASM_MICROBLAZE_UNISTD_H */
diff --git a/arch/microblaze/include/asm/vga.h b/arch/microblaze/include/asm/vga.h
deleted file mode 100644
index 89d82fd8fcf1..000000000000
--- a/arch/microblaze/include/asm/vga.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/vga.h>
diff --git a/arch/microblaze/include/asm/xor.h b/arch/microblaze/include/asm/xor.h
deleted file mode 100644
index c82eb12a5b18..000000000000
--- a/arch/microblaze/include/asm/xor.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/xor.h>
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index 1aac99f87df1..2178c78c7c1a 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -2,35 +2,3 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += types.h
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += elf.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += unistd.h
diff --git a/arch/microblaze/include/uapi/asm/bitsperlong.h b/arch/microblaze/include/uapi/asm/bitsperlong.h
deleted file mode 100644
index 6dc0bb0c13b2..000000000000
--- a/arch/microblaze/include/uapi/asm/bitsperlong.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bitsperlong.h>
diff --git a/arch/microblaze/include/uapi/asm/errno.h b/arch/microblaze/include/uapi/asm/errno.h
deleted file mode 100644
index 4c82b503d92f..000000000000
--- a/arch/microblaze/include/uapi/asm/errno.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/errno.h>
diff --git a/arch/microblaze/include/uapi/asm/fcntl.h b/arch/microblaze/include/uapi/asm/fcntl.h
deleted file mode 100644
index 46ab12db5739..000000000000
--- a/arch/microblaze/include/uapi/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fcntl.h>
diff --git a/arch/microblaze/include/uapi/asm/ioctl.h b/arch/microblaze/include/uapi/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/microblaze/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/microblaze/include/uapi/asm/ioctls.h b/arch/microblaze/include/uapi/asm/ioctls.h
deleted file mode 100644
index ec34c760665e..000000000000
--- a/arch/microblaze/include/uapi/asm/ioctls.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctls.h>
diff --git a/arch/microblaze/include/uapi/asm/ipcbuf.h b/arch/microblaze/include/uapi/asm/ipcbuf.h
deleted file mode 100644
index 84c7e51cb6d0..000000000000
--- a/arch/microblaze/include/uapi/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/microblaze/include/uapi/asm/kvm_para.h b/arch/microblaze/include/uapi/asm/kvm_para.h
deleted file mode 100644
index 14fab8f0b957..000000000000
--- a/arch/microblaze/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kvm_para.h>
diff --git a/arch/microblaze/include/uapi/asm/mman.h b/arch/microblaze/include/uapi/asm/mman.h
deleted file mode 100644
index 8eebf89f5ab1..000000000000
--- a/arch/microblaze/include/uapi/asm/mman.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mman.h>
diff --git a/arch/microblaze/include/uapi/asm/msgbuf.h b/arch/microblaze/include/uapi/asm/msgbuf.h
deleted file mode 100644
index 809134c644a6..000000000000
--- a/arch/microblaze/include/uapi/asm/msgbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/msgbuf.h>
diff --git a/arch/microblaze/include/uapi/asm/param.h b/arch/microblaze/include/uapi/asm/param.h
deleted file mode 100644
index 965d45427975..000000000000
--- a/arch/microblaze/include/uapi/asm/param.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/param.h>
diff --git a/arch/microblaze/include/uapi/asm/poll.h b/arch/microblaze/include/uapi/asm/poll.h
deleted file mode 100644
index c98509d3149e..000000000000
--- a/arch/microblaze/include/uapi/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/arch/microblaze/include/uapi/asm/resource.h b/arch/microblaze/include/uapi/asm/resource.h
deleted file mode 100644
index 04bc4db8921b..000000000000
--- a/arch/microblaze/include/uapi/asm/resource.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/resource.h>
diff --git a/arch/microblaze/include/uapi/asm/sembuf.h b/arch/microblaze/include/uapi/asm/sembuf.h
deleted file mode 100644
index 7673b83cfef7..000000000000
--- a/arch/microblaze/include/uapi/asm/sembuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/sembuf.h>
diff --git a/arch/microblaze/include/uapi/asm/shmbuf.h b/arch/microblaze/include/uapi/asm/shmbuf.h
deleted file mode 100644
index 83c05fc2de38..000000000000
--- a/arch/microblaze/include/uapi/asm/shmbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/shmbuf.h>
diff --git a/arch/microblaze/include/uapi/asm/siginfo.h b/arch/microblaze/include/uapi/asm/siginfo.h
deleted file mode 100644
index 0815d29d82e5..000000000000
--- a/arch/microblaze/include/uapi/asm/siginfo.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/siginfo.h>
diff --git a/arch/microblaze/include/uapi/asm/signal.h b/arch/microblaze/include/uapi/asm/signal.h
deleted file mode 100644
index 7b1573ce19de..000000000000
--- a/arch/microblaze/include/uapi/asm/signal.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/signal.h>
diff --git a/arch/microblaze/include/uapi/asm/socket.h b/arch/microblaze/include/uapi/asm/socket.h
deleted file mode 100644
index 6b71384b9d8b..000000000000
--- a/arch/microblaze/include/uapi/asm/socket.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/socket.h>
diff --git a/arch/microblaze/include/uapi/asm/sockios.h b/arch/microblaze/include/uapi/asm/sockios.h
deleted file mode 100644
index def6d4746ee7..000000000000
--- a/arch/microblaze/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/sockios.h>
diff --git a/arch/microblaze/include/uapi/asm/stat.h b/arch/microblaze/include/uapi/asm/stat.h
deleted file mode 100644
index 3dc90fa92c70..000000000000
--- a/arch/microblaze/include/uapi/asm/stat.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/stat.h>
diff --git a/arch/microblaze/include/uapi/asm/statfs.h b/arch/microblaze/include/uapi/asm/statfs.h
deleted file mode 100644
index 0b91fe198c20..000000000000
--- a/arch/microblaze/include/uapi/asm/statfs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/statfs.h>
diff --git a/arch/microblaze/include/uapi/asm/swab.h b/arch/microblaze/include/uapi/asm/swab.h
deleted file mode 100644
index 7847e563ab66..000000000000
--- a/arch/microblaze/include/uapi/asm/swab.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/swab.h>
diff --git a/arch/microblaze/include/uapi/asm/termbits.h b/arch/microblaze/include/uapi/asm/termbits.h
deleted file mode 100644
index 3935b106de79..000000000000
--- a/arch/microblaze/include/uapi/asm/termbits.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termbits.h>
diff --git a/arch/microblaze/include/uapi/asm/termios.h b/arch/microblaze/include/uapi/asm/termios.h
deleted file mode 100644
index 280d78a9d966..000000000000
--- a/arch/microblaze/include/uapi/asm/termios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termios.h>
diff --git a/arch/microblaze/include/uapi/asm/unistd.h b/arch/microblaze/include/uapi/asm/unistd.h
index d8086159d996..a88b3c11cc20 100644
--- a/arch/microblaze/include/uapi/asm/unistd.h
+++ b/arch/microblaze/include/uapi/asm/unistd.h
@@ -413,5 +413,6 @@
 #define __NR_pkey_mprotect	395
 #define __NR_pkey_alloc		396
 #define __NR_pkey_free		397
+#define __NR_statx		398
 
 #endif /* _UAPI_ASM_MICROBLAZE_UNISTD_H */
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index 12e093a03e60..e45ada8fb006 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -65,8 +65,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
 		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
 			continue;
 
-		__dma_sync(page_to_phys(sg_page(sg)) + sg->offset,
-							sg->length, direction);
+		__dma_sync(sg_phys(sg), sg->length, direction);
 	}
 
 	return nents;
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
index ef548510b951..4e1b567becd6 100644
--- a/arch/microblaze/kernel/entry.S
+++ b/arch/microblaze/kernel/entry.S
@@ -208,9 +208,7 @@ syscall_debug_table:
 	mfs	r11, rmsr;		/* save MSR */			\
 	swi	r11, r1, PT_MSR;
 
-#define RESTORE_REGS \
-	lwi	r11, r1, PT_MSR;					\
-	mts	rmsr , r11;						\
+#define RESTORE_REGS_GP \
 	lwi	r2, r1, PT_R2;	/* restore SDA */		\
 	lwi	r3, r1, PT_R3;					\
 	lwi	r4, r1, PT_R4;					\
@@ -242,6 +240,18 @@ syscall_debug_table:
 	lwi	r30, r1, PT_R30;					\
 	lwi	r31, r1, PT_R31;	/* Restore cur task reg */
 
+#define RESTORE_REGS \
+	lwi	r11, r1, PT_MSR;					\
+	mts	rmsr , r11;						\
+	RESTORE_REGS_GP
+
+#define RESTORE_REGS_RTBD \
+	lwi	r11, r1, PT_MSR;					\
+	andni	r11, r11, MSR_EIP;          /* clear EIP */             \
+	ori	r11, r11, MSR_EE | MSR_BIP; /* set EE and BIP */        \
+	mts	rmsr , r11;						\
+	RESTORE_REGS_GP
+
 #define SAVE_STATE	\
 	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* save stack */	\
 	/* See if already in kernel mode.*/				\
@@ -427,7 +437,7 @@ C_ENTRY(ret_from_trap):
 	swi	CURRENT_TASK, r0, PER_CPU(CURRENT_SAVE); /* save current */
 	VM_OFF;
 	tophys(r1,r1);
-	RESTORE_REGS;
+	RESTORE_REGS_RTBD;
 	addik	r1, r1, PT_SIZE		/* Clean up stack space.  */
 	lwi	r1, r1, PT_R1 - PT_SIZE;/* Restore user stack pointer. */
 	bri	6f;
@@ -436,7 +446,7 @@ C_ENTRY(ret_from_trap):
 2:	set_bip;			/*  Ints masked for state restore */
 	VM_OFF;
 	tophys(r1,r1);
-	RESTORE_REGS;
+	RESTORE_REGS_RTBD;
 	addik	r1, r1, PT_SIZE		/* Clean up stack space.  */
 	tovirt(r1,r1);
 6:
@@ -612,7 +622,7 @@ C_ENTRY(ret_from_exc):
 	VM_OFF;
 	tophys(r1,r1);
 
-	RESTORE_REGS;
+	RESTORE_REGS_RTBD;
 	addik	r1, r1, PT_SIZE		/* Clean up stack space.  */
 
 	lwi	r1, r1, PT_R1 - PT_SIZE; /* Restore user stack pointer. */
@@ -621,7 +631,7 @@ C_ENTRY(ret_from_exc):
 2:	set_bip;			/* Ints masked for state restore */
 	VM_OFF;
 	tophys(r1,r1);
-	RESTORE_REGS;
+	RESTORE_REGS_RTBD;
 	addik	r1, r1, PT_SIZE		/* Clean up stack space.  */
 
 	tovirt(r1,r1);
@@ -847,7 +857,7 @@ dbtrap_call: /* Return point for kernel/user entry + 8 because of rtsd r15, 8 */
 	VM_OFF;
 	tophys(r1,r1);
 	/* MS: Restore all regs */
-	RESTORE_REGS
+	RESTORE_REGS_RTBD
 	addik	r1, r1, PT_SIZE	 /* Clean up stack space */
 	lwi	r1, r1, PT_R1 - PT_SIZE; /* Restore user stack pointer */
 DBTRAP_return_user: /* MS: Make global symbol for debugging */
@@ -858,7 +868,7 @@ DBTRAP_return_user: /* MS: Make global symbol for debugging */
 2:	VM_OFF;
 	tophys(r1,r1);
 	/* MS: Restore all regs */
-	RESTORE_REGS
+	RESTORE_REGS_RTBD
 	lwi	r14, r1, PT_R14;
 	lwi	r16, r1, PT_PC;
 	addik	r1, r1, PT_SIZE; /* MS: Clean up stack space */
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index e92a817e645f..6527ec22f158 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -119,23 +119,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	return 0;
 }
 
-#ifndef CONFIG_MMU
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct cpu_context *ctx =
-		&(((struct thread_info *)(tsk->stack))->cpu_context);
-
-	/* Check whether the thread is blocked in resume() */
-	if (in_sched_functions(ctx->r15))
-		return (unsigned long)ctx->r15;
-	else
-		return ctx->r14;
-}
-#endif
-
 unsigned long get_wchan(struct task_struct *p)
 {
 /* TBD (used by procfs) */
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index 6841c2df14d9..c48ff4ad2070 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -398,3 +398,4 @@ ENTRY(sys_call_table)
 	.long sys_pkey_mprotect		/* 395 */
 	.long sys_pkey_alloc
 	.long sys_pkey_free
+	.long sys_statx
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index 999066192715..545ccd46edb3 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -178,8 +178,10 @@ static __init int xilinx_clockevent_init(void)
 				clockevent_xilinx_timer.shift);
 	clockevent_xilinx_timer.max_delta_ns =
 		clockevent_delta2ns((u32)~0, &clockevent_xilinx_timer);
+	clockevent_xilinx_timer.max_delta_ticks = (u32)~0;
 	clockevent_xilinx_timer.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_xilinx_timer);
+	clockevent_xilinx_timer.min_delta_ticks = 1;
 	clockevent_xilinx_timer.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_xilinx_timer);
 
diff --git a/arch/microblaze/mm/highmem.c b/arch/microblaze/mm/highmem.c
index 2fcc5a52d84d..ed4454c5ce35 100644
--- a/arch/microblaze/mm/highmem.c
+++ b/arch/microblaze/mm/highmem.c
@@ -60,6 +60,7 @@ void __kunmap_atomic(void *kvaddr)
 {
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
 	int type;
+	unsigned int idx;
 
 	if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
 		pagefault_enable();
@@ -68,21 +69,18 @@ void __kunmap_atomic(void *kvaddr)
 	}
 
 	type = kmap_atomic_idx();
-#ifdef CONFIG_DEBUG_HIGHMEM
-	{
-		unsigned int idx;
-
-		idx = type + KM_TYPE_NR * smp_processor_id();
-		BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
 
-		/*
-		 * force other mappings to Oops if they'll try to access
-		 * this pte without first remap it
-		 */
-		pte_clear(&init_mm, vaddr, kmap_pte-idx);
-		local_flush_tlb_page(NULL, vaddr);
-	}
+	idx = type + KM_TYPE_NR * smp_processor_id();
+#ifdef CONFIG_DEBUG_HIGHMEM
+	BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
 #endif
+	/*
+	 * force other mappings to Oops if they'll try to access
+	 * this pte without first remap it
+	 */
+	pte_clear(&init_mm, vaddr, kmap_pte-idx);
+	local_flush_tlb_page(NULL, vaddr);
+
 	kmap_atomic_idx_pop();
 	pagefault_enable();
 	preempt_enable();
diff --git a/arch/mips/Kbuild b/arch/mips/Kbuild
index 5c3f688a5232..5cef58651db0 100644
--- a/arch/mips/Kbuild
+++ b/arch/mips/Kbuild
@@ -1,7 +1,9 @@
 # Fail on warnings - also for files referenced in subdirs
 # -Werror can be disabled for specific files using:
 # CFLAGS_<file.o> := -Wno-error
+ifeq ($(W),)
 subdir-ccflags-y := -Werror
+endif
 
 # platform specific definitions
 include arch/mips/Kbuild.platforms
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 4e9ebf65d071..2828ecde133d 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -46,6 +46,7 @@ config MIPS
 	select ARCH_DISCARD_MEMBLOCK
 	select GENERIC_SMP_IDLE_THREAD
 	select BUILDTIME_EXTABLE_SORT
+	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
 	select GENERIC_CMOS_UPDATE
@@ -68,6 +69,7 @@ config MIPS
 	select HANDLE_DOMAIN_IRQ
 	select HAVE_EXIT_THREAD
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_COPY_THREAD_TLS
 
 menu "Machine selection"
 
@@ -1039,14 +1041,6 @@ config RWSEM_GENERIC_SPINLOCK
 config RWSEM_XCHGADD_ALGORITHM
 	bool
 
-config ARCH_HAS_ILOG2_U32
-	bool
-	default n
-
-config ARCH_HAS_ILOG2_U64
-	bool
-	default n
-
 config GENERIC_HWEIGHT
 	bool
 	default y
@@ -1372,6 +1366,7 @@ config CPU_LOONGSON3
 	select WEAK_ORDERING
 	select WEAK_REORDERING_BEYOND_LLSC
 	select MIPS_PGD_C0_CONTEXT
+	select MIPS_L1_CACHE_SHIFT_6
 	select GPIOLIB
 	help
 		The Loongson 3 processor implements the MIPS64R2 instruction
@@ -2120,10 +2115,13 @@ config MIPS_VA_BITS_48
 	bool "48 bits virtual memory"
 	depends on 64BIT
 	help
-	  Support a maximum at least 48 bits of application virtual memory.
-	  Default is 40 bits or less, depending on the CPU.
-	  This option result in a small memory overhead for page tables.
-	  This option is only supported with 16k and 64k page sizes.
+	  Support a maximum at least 48 bits of application virtual
+	  memory.  Default is 40 bits or less, depending on the CPU.
+	  For page sizes 16k and above, this option results in a small
+	  memory overhead for page tables.  For 4k page size, a fourth
+	  level of page tables is added which imposes both a memory
+	  overhead as well as slower TLB fault handling.
+
 	  If unsure, say N.
 
 choice
@@ -2133,7 +2131,6 @@ choice
 config PAGE_SIZE_4KB
 	bool "4kB"
 	depends on !CPU_LOONGSON2 && !CPU_LOONGSON3
-	depends on !MIPS_VA_BITS_48
 	help
 	 This option select the standard 4kB Linux page size.  On some
 	 R3000-family processors this is the only available page size.  Using
@@ -2982,6 +2979,7 @@ config HAVE_LATENCYTOP_SUPPORT
 
 config PGTABLE_LEVELS
 	int
+	default 4 if PAGE_SIZE_4KB && MIPS_VA_BITS_48
 	default 3 if 64BIT && !PAGE_SIZE_64KB
 	default 2
 
diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index 7f975b20b20c..42a97c59200f 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -82,7 +82,7 @@ config CMDLINE_OVERRIDE
 config SB1XXX_CORELIS
 	bool "Corelis Debugger"
 	depends on SIBYTE_SB1xxx_SOC
-	select DEBUG_INFO
+	select DEBUG_INFO if !COMPILE_TEST
 	help
 	  Select compile flags that produce code that can be processed by the
 	  Corelis mksym utility and UDB Emulator.
diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile
index 2728a9a9c7c5..145b5ce8eb7e 100644
--- a/arch/mips/boot/Makefile
+++ b/arch/mips/boot/Makefile
@@ -128,19 +128,19 @@ quiet_cmd_cpp_its_S = ITS     $@
 			-DADDR_BITS=$(ADDR_BITS) \
 			-DADDR_CELLS=$(itb_addr_cells)
 
-$(obj)/vmlinux.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
 	$(call if_changed_dep,cpp_its_S,none,vmlinux.bin)
 
-$(obj)/vmlinux.gz.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.gz.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
 	$(call if_changed_dep,cpp_its_S,gzip,vmlinux.bin.gz)
 
-$(obj)/vmlinux.bz2.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.bz2.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX)  FORCE
 	$(call if_changed_dep,cpp_its_S,bzip2,vmlinux.bin.bz2)
 
-$(obj)/vmlinux.lzma.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.lzma.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
 	$(call if_changed_dep,cpp_its_S,lzma,vmlinux.bin.lzma)
 
-$(obj)/vmlinux.lzo.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.lzo.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
 	$(call if_changed_dep,cpp_its_S,lzo,vmlinux.bin.lzo)
 
 quiet_cmd_itb-image = ITB     $@
diff --git a/arch/mips/boot/dts/include/dt-bindings b/arch/mips/boot/dts/include/dt-bindings
deleted file mode 120000
index 08c00e4972fa..000000000000
--- a/arch/mips/boot/dts/include/dt-bindings
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../include/dt-bindings
-\ No newline at end of file
diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index c370426a7322..5c0b56203bae 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -25,15 +25,6 @@ endif # CPU_CAVIUM_OCTEON
 
 if CAVIUM_OCTEON_SOC
 
-config CAVIUM_OCTEON_2ND_KERNEL
-	bool "Build the kernel to be used as a 2nd kernel on the same chip"
-	default "n"
-	help
-	  This option configures this kernel to be linked at a different
-	  address and use the 2nd uart for output. This allows a kernel built
-	  with this option to be run at the same time as one built without this
-	  option.
-
 config CAVIUM_OCTEON_LOCK_L2
 	bool "Lock often used kernel code in the L2"
 	default "y"
diff --git a/arch/mips/cavium-octeon/Platform b/arch/mips/cavium-octeon/Platform
index 8a301cb12d68..45be853700e6 100644
--- a/arch/mips/cavium-octeon/Platform
+++ b/arch/mips/cavium-octeon/Platform
@@ -4,8 +4,4 @@
 platform-$(CONFIG_CAVIUM_OCTEON_SOC)	+= cavium-octeon/
 cflags-$(CONFIG_CAVIUM_OCTEON_SOC)	+=				\
 		-I$(srctree)/arch/mips/include/asm/mach-cavium-octeon
-ifdef CONFIG_CAVIUM_OCTEON_2ND_KERNEL
-load-$(CONFIG_CAVIUM_OCTEON_SOC)	+= 0xffffffff84100000
-else
 load-$(CONFIG_CAVIUM_OCTEON_SOC)	+= 0xffffffff81100000
-endif
diff --git a/arch/mips/cavium-octeon/executive/cvmx-l2c.c b/arch/mips/cavium-octeon/executive/cvmx-l2c.c
index 89b5273299ab..f091c9b70603 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-l2c.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-l2c.c
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2010 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -239,6 +239,7 @@ uint64_t cvmx_l2c_read_perf(uint32_t counter)
 		else {
 			uint64_t counter = 0;
 			int tad;
+
 			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
 				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC0(tad));
 			return counter;
@@ -249,6 +250,7 @@ uint64_t cvmx_l2c_read_perf(uint32_t counter)
 		else {
 			uint64_t counter = 0;
 			int tad;
+
 			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
 				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC1(tad));
 			return counter;
@@ -259,6 +261,7 @@ uint64_t cvmx_l2c_read_perf(uint32_t counter)
 		else {
 			uint64_t counter = 0;
 			int tad;
+
 			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
 				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC2(tad));
 			return counter;
@@ -270,6 +273,7 @@ uint64_t cvmx_l2c_read_perf(uint32_t counter)
 		else {
 			uint64_t counter = 0;
 			int tad;
+
 			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
 				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC3(tad));
 			return counter;
@@ -301,7 +305,7 @@ static void fault_in(uint64_t addr, int len)
 	 */
 	CVMX_DCACHE_INVALIDATE;
 	while (len > 0) {
-		ACCESS_ONCE(*ptr);
+		READ_ONCE(*ptr);
 		len -= CVMX_CACHE_LINE_SIZE;
 		ptr += CVMX_CACHE_LINE_SIZE;
 	}
@@ -375,7 +379,9 @@ int cvmx_l2c_lock_line(uint64_t addr)
 		if (((union cvmx_l2c_cfg)(cvmx_read_csr(CVMX_L2C_CFG))).s.idxalias) {
 			int alias_shift = CVMX_L2C_IDX_ADDR_SHIFT + 2 * CVMX_L2_SET_BITS - 1;
 			uint64_t addr_tmp = addr ^ (addr & ((1 << alias_shift) - 1)) >> CVMX_L2_SET_BITS;
+
 			lckbase.s.lck_base = addr_tmp >> 7;
+
 		} else {
 			lckbase.s.lck_base = addr >> 7;
 		}
@@ -435,6 +441,7 @@ void cvmx_l2c_flush(void)
 		/* These may look like constants, but they aren't... */
 		int assoc_shift = CVMX_L2C_TAG_ADDR_ALIAS_SHIFT;
 		int set_shift = CVMX_L2C_IDX_ADDR_SHIFT;
+
 		for (set = 0; set < n_set; set++) {
 			for (assoc = 0; assoc < n_assoc; assoc++) {
 				address = CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
@@ -519,89 +526,49 @@ int cvmx_l2c_unlock_mem_region(uint64_t start, uint64_t len)
 union __cvmx_l2c_tag {
 	uint64_t u64;
 	struct cvmx_l2c_tag_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved:40;
-		uint64_t V:1;		/* Line valid */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t L:1;		/* Line locked */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t addr:20;	/* Phys mem addr (33..14) */
-#else
-		uint64_t addr:20;	/* Phys mem addr (33..14) */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t L:1;		/* Line locked */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t V:1;		/* Line valid */
-		uint64_t reserved:40;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved:40,
+		__BITFIELD_FIELD(uint64_t V:1,		/* Line valid */
+		__BITFIELD_FIELD(uint64_t D:1,		/* Line dirty */
+		__BITFIELD_FIELD(uint64_t L:1,		/* Line locked */
+		__BITFIELD_FIELD(uint64_t U:1,		/* Use, LRU eviction */
+		__BITFIELD_FIELD(uint64_t addr:20,	/* Phys addr (33..14) */
+		;))))))
 	} cn50xx;
 	struct cvmx_l2c_tag_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved:41;
-		uint64_t V:1;		/* Line valid */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t L:1;		/* Line locked */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t addr:19;	/* Phys mem addr (33..15) */
-#else
-		uint64_t addr:19;	/* Phys mem addr (33..15) */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t L:1;		/* Line locked */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t V:1;		/* Line valid */
-		uint64_t reserved:41;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved:41,
+		__BITFIELD_FIELD(uint64_t V:1,		/* Line valid */
+		__BITFIELD_FIELD(uint64_t D:1,		/* Line dirty */
+		__BITFIELD_FIELD(uint64_t L:1,		/* Line locked */
+		__BITFIELD_FIELD(uint64_t U:1,		/* Use, LRU eviction */
+		__BITFIELD_FIELD(uint64_t addr:19,	/* Phys addr (33..15) */
+		;))))))
 	} cn30xx;
 	struct cvmx_l2c_tag_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved:42;
-		uint64_t V:1;		/* Line valid */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t L:1;		/* Line locked */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t addr:18;	/* Phys mem addr (33..16) */
-#else
-		uint64_t addr:18;	/* Phys mem addr (33..16) */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t L:1;		/* Line locked */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t V:1;		/* Line valid */
-		uint64_t reserved:42;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved:42,
+		__BITFIELD_FIELD(uint64_t V:1,		/* Line valid */
+		__BITFIELD_FIELD(uint64_t D:1,		/* Line dirty */
+		__BITFIELD_FIELD(uint64_t L:1,		/* Line locked */
+		__BITFIELD_FIELD(uint64_t U:1,		/* Use, LRU eviction */
+		__BITFIELD_FIELD(uint64_t addr:18,	/* Phys addr (33..16) */
+		;))))))
 	} cn31xx;
 	struct cvmx_l2c_tag_cn38xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved:43;
-		uint64_t V:1;		/* Line valid */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t L:1;		/* Line locked */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t addr:17;	/* Phys mem addr (33..17) */
-#else
-		uint64_t addr:17;	/* Phys mem addr (33..17) */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t L:1;		/* Line locked */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t V:1;		/* Line valid */
-		uint64_t reserved:43;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved:43,
+		__BITFIELD_FIELD(uint64_t V:1,		/* Line valid */
+		__BITFIELD_FIELD(uint64_t D:1,		/* Line dirty */
+		__BITFIELD_FIELD(uint64_t L:1,		/* Line locked */
+		__BITFIELD_FIELD(uint64_t U:1,		/* Use, LRU eviction */
+		__BITFIELD_FIELD(uint64_t addr:17,	/* Phys addr (33..17) */
+		;))))))
 	} cn38xx;
 	struct cvmx_l2c_tag_cn58xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved:44;
-		uint64_t V:1;		/* Line valid */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t L:1;		/* Line locked */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t addr:16;	/* Phys mem addr (33..18) */
-#else
-		uint64_t addr:16;	/* Phys mem addr (33..18) */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t L:1;		/* Line locked */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t V:1;		/* Line valid */
-		uint64_t reserved:44;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved:44,
+		__BITFIELD_FIELD(uint64_t V:1,		/* Line valid */
+		__BITFIELD_FIELD(uint64_t D:1,		/* Line dirty */
+		__BITFIELD_FIELD(uint64_t L:1,		/* Line locked */
+		__BITFIELD_FIELD(uint64_t U:1,		/* Use, LRU eviction */
+		__BITFIELD_FIELD(uint64_t addr:16,	/* Phys addr (33..18) */
+		;))))))
 	} cn58xx;
 	struct cvmx_l2c_tag_cn58xx cn56xx;	/* 2048 sets */
 	struct cvmx_l2c_tag_cn31xx cn52xx;	/* 512 sets */
@@ -629,8 +596,8 @@ static union __cvmx_l2c_tag __read_l2_tag(uint64_t assoc, uint64_t index)
 	union __cvmx_l2c_tag tag_val;
 	uint64_t dbg_addr = CVMX_L2C_DBG;
 	unsigned long flags;
-
 	union cvmx_l2c_dbg debug_val;
+
 	debug_val.u64 = 0;
 	/*
 	 * For low core count parts, the core number is always small
@@ -683,8 +650,8 @@ static union __cvmx_l2c_tag __read_l2_tag(uint64_t assoc, uint64_t index)
 union cvmx_l2c_tag cvmx_l2c_get_tag(uint32_t association, uint32_t index)
 {
 	union cvmx_l2c_tag tag;
-	tag.u64 = 0;
 
+	tag.u64 = 0;
 	if ((int)association >= cvmx_l2c_get_num_assoc()) {
 		cvmx_dprintf("ERROR: cvmx_l2c_get_tag association out of range\n");
 		return tag;
@@ -767,10 +734,12 @@ uint32_t cvmx_l2c_address_to_index(uint64_t addr)
 
 	if (OCTEON_IS_MODEL(OCTEON_CN6XXX)) {
 		union cvmx_l2c_ctl l2c_ctl;
+
 		l2c_ctl.u64 = cvmx_read_csr(CVMX_L2C_CTL);
 		indxalias = !l2c_ctl.s.disidxalias;
 	} else {
 		union cvmx_l2c_cfg l2c_cfg;
+
 		l2c_cfg.u64 = cvmx_read_csr(CVMX_L2C_CFG);
 		indxalias = l2c_cfg.s.idxalias;
 	}
@@ -778,6 +747,7 @@ uint32_t cvmx_l2c_address_to_index(uint64_t addr)
 	if (indxalias) {
 		if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
 			uint32_t a_14_12 = (idx / (CVMX_L2C_MEMBANK_SELECT_SIZE/(1<<CVMX_L2C_IDX_ADDR_SHIFT))) & 0x7;
+
 			idx ^= idx / cvmx_l2c_get_num_sets();
 			idx ^= a_14_12;
 		} else {
@@ -801,6 +771,7 @@ int cvmx_l2c_get_cache_size_bytes(void)
 int cvmx_l2c_get_set_bits(void)
 {
 	int l2_set_bits;
+
 	if (OCTEON_IS_MODEL(OCTEON_CN56XX) || OCTEON_IS_MODEL(OCTEON_CN58XX))
 		l2_set_bits = 11;	/* 2048 sets */
 	else if (OCTEON_IS_MODEL(OCTEON_CN38XX) || OCTEON_IS_MODEL(OCTEON_CN63XX))
@@ -828,6 +799,7 @@ int cvmx_l2c_get_num_sets(void)
 int cvmx_l2c_get_num_assoc(void)
 {
 	int l2_assoc;
+
 	if (OCTEON_IS_MODEL(OCTEON_CN56XX) ||
 	    OCTEON_IS_MODEL(OCTEON_CN52XX) ||
 	    OCTEON_IS_MODEL(OCTEON_CN58XX) ||
@@ -869,16 +841,17 @@ int cvmx_l2c_get_num_assoc(void)
 		else if (mio_fus_dat3.s.l2c_crip == 1)
 			l2_assoc = 12;
 	} else {
-		union cvmx_l2d_fus3 val;
-		val.u64 = cvmx_read_csr(CVMX_L2D_FUS3);
+		uint64_t l2d_fus3;
+
+		l2d_fus3 = cvmx_read_csr(CVMX_L2D_FUS3);
 		/*
 		 * Using shifts here, as bit position names are
 		 * different for each model but they all mean the
 		 * same.
 		 */
-		if ((val.u64 >> 35) & 0x1)
+		if ((l2d_fus3 >> 35) & 0x1)
 			l2_assoc = l2_assoc >> 2;
-		else if ((val.u64 >> 34) & 0x1)
+		else if ((l2d_fus3 >> 34) & 0x1)
 			l2_assoc = l2_assoc >> 1;
 	}
 	return l2_assoc;
diff --git a/arch/mips/cavium-octeon/executive/octeon-model.c b/arch/mips/cavium-octeon/executive/octeon-model.c
index d08a2bce653c..341052387b49 100644
--- a/arch/mips/cavium-octeon/executive/octeon-model.c
+++ b/arch/mips/cavium-octeon/executive/octeon-model.c
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2010 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -63,16 +63,15 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 	char pass[4];
 	int clock_mhz;
 	const char *suffix;
-	union cvmx_l2d_fus3 fus3;
 	int num_cores;
 	union cvmx_mio_fus_dat2 fus_dat2;
 	union cvmx_mio_fus_dat3 fus_dat3;
 	char fuse_model[10];
 	uint32_t fuse_data = 0;
+	uint64_t l2d_fus3 = 0;
 
-	fus3.u64 = 0;
 	if (OCTEON_IS_MODEL(OCTEON_CN3XXX) || OCTEON_IS_MODEL(OCTEON_CN5XXX))
-		fus3.u64 = cvmx_read_csr(CVMX_L2D_FUS3);
+		l2d_fus3 = (cvmx_read_csr(CVMX_L2D_FUS3) >> 34) & 0x3;
 	fus_dat2.u64 = cvmx_read_csr(CVMX_MIO_FUS_DAT2);
 	fus_dat3.u64 = cvmx_read_csr(CVMX_MIO_FUS_DAT3);
 	num_cores = cvmx_octeon_num_cores();
@@ -192,7 +191,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 	/* Now figure out the family, the first two digits */
 	switch ((chip_id >> 8) & 0xff) {
 	case 0:		/* CN38XX, CN37XX or CN36XX */
-		if (fus3.cn38xx.crip_512k) {
+		if (l2d_fus3) {
 			/*
 			 * For some unknown reason, the 16 core one is
 			 * called 37 instead of 36.
@@ -223,7 +222,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 		}
 		break;
 	case 1:		/* CN31XX or CN3020 */
-		if ((chip_id & 0x10) || fus3.cn31xx.crip_128k)
+		if ((chip_id & 0x10) || l2d_fus3)
 			family = "30";
 		else
 			family = "31";
@@ -246,7 +245,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 	case 2:		/* CN3010 or CN3005 */
 		family = "30";
 		/* A chip with half cache is an 05 */
-		if (fus3.cn30xx.crip_64k)
+		if (l2d_fus3)
 			core_model = "05";
 		/*
 		 * This series of chips didn't follow the standard
@@ -267,7 +266,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 	case 3:		/* CN58XX */
 		family = "58";
 		/* Special case. 4 core, half cache (CP with half cache) */
-		if ((num_cores == 4) && fus3.cn58xx.crip_1024k && !strncmp(suffix, "CP", 2))
+		if ((num_cores == 4) && l2d_fus3 && !strncmp(suffix, "CP", 2))
 			core_model = "29";
 
 		/* Pass 1 uses different encodings for pass numbers */
@@ -290,7 +289,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 		break;
 	case 4:		/* CN57XX, CN56XX, CN55XX, CN54XX */
 		if (fus_dat2.cn56xx.raid_en) {
-			if (fus3.cn56xx.crip_1024k)
+			if (l2d_fus3)
 				family = "55";
 			else
 				family = "57";
@@ -309,7 +308,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 				if (fus_dat3.cn56xx.bar2_en)
 					suffix = "NSPB2";
 			}
-			if (fus3.cn56xx.crip_1024k)
+			if (l2d_fus3)
 				family = "54";
 			else
 				family = "56";
@@ -319,7 +318,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 		family = "50";
 		break;
 	case 7:		/* CN52XX */
-		if (fus3.cn52xx.crip_256k)
+		if (l2d_fus3)
 			family = "51";
 		else
 			family = "52";
diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index 3375e61daa19..8505db478904 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -3,71 +3,27 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2004-2016 Cavium Networks
+ * Copyright (C) 2004-2017 Cavium, Inc.
  * Copyright (C) 2008 Wind River Systems
  */
 
-#include <linux/init.h>
-#include <linux/delay.h>
 #include <linux/etherdevice.h>
 #include <linux/of_platform.h>
 #include <linux/of_fdt.h>
 #include <linux/libfdt.h>
-#include <linux/usb/ehci_def.h>
-#include <linux/usb/ehci_pdriver.h>
-#include <linux/usb/ohci_pdriver.h>
 
 #include <asm/octeon/octeon.h>
 #include <asm/octeon/cvmx-helper-board.h>
+
+#ifdef CONFIG_USB
+#include <linux/usb/ehci_def.h>
+#include <linux/usb/ehci_pdriver.h>
+#include <linux/usb/ohci_pdriver.h>
 #include <asm/octeon/cvmx-uctlx-defs.h>
 
 #define CVMX_UAHCX_EHCI_USBCMD	(CVMX_ADD_IO_SEG(0x00016F0000000010ull))
 #define CVMX_UAHCX_OHCI_USBCMD	(CVMX_ADD_IO_SEG(0x00016F0000000408ull))
 
-/* Octeon Random Number Generator.  */
-static int __init octeon_rng_device_init(void)
-{
-	struct platform_device *pd;
-	int ret = 0;
-
-	struct resource rng_resources[] = {
-		{
-			.flags	= IORESOURCE_MEM,
-			.start	= XKPHYS_TO_PHYS(CVMX_RNM_CTL_STATUS),
-			.end	= XKPHYS_TO_PHYS(CVMX_RNM_CTL_STATUS) + 0xf
-		}, {
-			.flags	= IORESOURCE_MEM,
-			.start	= cvmx_build_io_address(8, 0),
-			.end	= cvmx_build_io_address(8, 0) + 0x7
-		}
-	};
-
-	pd = platform_device_alloc("octeon_rng", -1);
-	if (!pd) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = platform_device_add_resources(pd, rng_resources,
-					    ARRAY_SIZE(rng_resources));
-	if (ret)
-		goto fail;
-
-	ret = platform_device_add(pd);
-	if (ret)
-		goto fail;
-
-	return ret;
-fail:
-	platform_device_put(pd);
-
-out:
-	return ret;
-}
-device_initcall(octeon_rng_device_init);
-
-#ifdef CONFIG_USB
-
 static DEFINE_MUTEX(octeon2_usb_clocks_mutex);
 
 static int octeon2_usb_clock_start_cnt;
@@ -440,8 +396,49 @@ device_initcall(octeon_ohci_device_init);
 
 #endif /* CONFIG_USB */
 
+/* Octeon Random Number Generator.  */
+static int __init octeon_rng_device_init(void)
+{
+	struct platform_device *pd;
+	int ret = 0;
 
-static struct of_device_id __initdata octeon_ids[] = {
+	struct resource rng_resources[] = {
+		{
+			.flags	= IORESOURCE_MEM,
+			.start	= XKPHYS_TO_PHYS(CVMX_RNM_CTL_STATUS),
+			.end	= XKPHYS_TO_PHYS(CVMX_RNM_CTL_STATUS) + 0xf
+		}, {
+			.flags	= IORESOURCE_MEM,
+			.start	= cvmx_build_io_address(8, 0),
+			.end	= cvmx_build_io_address(8, 0) + 0x7
+		}
+	};
+
+	pd = platform_device_alloc("octeon_rng", -1);
+	if (!pd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = platform_device_add_resources(pd, rng_resources,
+					    ARRAY_SIZE(rng_resources));
+	if (ret)
+		goto fail;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		goto fail;
+
+	return ret;
+fail:
+	platform_device_put(pd);
+
+out:
+	return ret;
+}
+device_initcall(octeon_rng_device_init);
+
+const struct of_device_id octeon_ids[] __initconst = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "cavium,octeon-6335-uctl", },
 	{ .compatible = "cavium,octeon-5750-usbn", },
@@ -481,6 +478,7 @@ static void __init octeon_fdt_set_phy(int eth, int phy_addr)
 	alt_phy_handle = fdt_getprop(initial_boot_params, eth, "cavium,alt-phy-handle", NULL);
 	if (alt_phy_handle) {
 		u32 alt_phandle = be32_to_cpup(alt_phy_handle);
+
 		alt_phy = fdt_node_offset_by_phandle(initial_boot_params, alt_phandle);
 	} else {
 		alt_phy = -1;
@@ -579,6 +577,7 @@ static void __init octeon_fdt_rm_ethernet(int node)
 	if (phy_handle) {
 		u32 ph = be32_to_cpup(phy_handle);
 		int p = fdt_node_offset_by_phandle(initial_boot_params, ph);
+
 		if (p >= 0)
 			fdt_nop_node(initial_boot_params, p);
 	}
@@ -728,6 +727,7 @@ int __init octeon_prune_device_tree(void)
 
 	for (i = 0; i < 2; i++) {
 		int mgmt;
+
 		snprintf(name_buffer, sizeof(name_buffer),
 			 "mix%d", i);
 		alias_prop = fdt_getprop(initial_boot_params, aliases,
@@ -743,6 +743,7 @@ int __init octeon_prune_device_tree(void)
 						 name_buffer);
 			} else {
 				int phy_addr = cvmx_helper_board_get_mii_address(CVMX_HELPER_BOARD_MGMT_IPD_PORT + i);
+
 				octeon_fdt_set_phy(mgmt, phy_addr);
 			}
 		}
@@ -751,6 +752,7 @@ int __init octeon_prune_device_tree(void)
 	pip_path = fdt_getprop(initial_boot_params, aliases, "pip", NULL);
 	if (pip_path) {
 		int pip = fdt_path_offset(initial_boot_params, pip_path);
+
 		if (pip	 >= 0)
 			for (i = 0; i <= 4; i++)
 				octeon_fdt_pip_iface(pip, i);
@@ -767,6 +769,7 @@ int __init octeon_prune_device_tree(void)
 
 	for (i = 0; i < 2; i++) {
 		int i2c;
+
 		snprintf(name_buffer, sizeof(name_buffer),
 			 "twsi%d", i);
 		alias_prop = fdt_getprop(initial_boot_params, aliases,
@@ -797,11 +800,11 @@ int __init octeon_prune_device_tree(void)
 
 	for (i = 0; i < 2; i++) {
 		int i2c;
+
 		snprintf(name_buffer, sizeof(name_buffer),
 			 "smi%d", i);
 		alias_prop = fdt_getprop(initial_boot_params, aliases,
 					name_buffer, NULL);
-
 		if (alias_prop) {
 			i2c = fdt_path_offset(initial_boot_params, alias_prop);
 			if (i2c < 0)
@@ -824,6 +827,7 @@ int __init octeon_prune_device_tree(void)
 
 	for (i = 0; i < 3; i++) {
 		int uart;
+
 		snprintf(name_buffer, sizeof(name_buffer),
 			 "uart%d", i);
 		alias_prop = fdt_getprop(initial_boot_params, aliases,
@@ -863,6 +867,7 @@ int __init octeon_prune_device_tree(void)
 		int len;
 
 		int cf = fdt_path_offset(initial_boot_params, alias_prop);
+
 		base_ptr = 0;
 		if (octeon_bootinfo->major_version == 1
 			&& octeon_bootinfo->minor_version >= 1) {
@@ -912,6 +917,7 @@ int __init octeon_prune_device_tree(void)
 			fdt_nop_property(initial_boot_params, cf, "cavium,dma-engine-handle");
 			if (!is_16bit) {
 				__be32 width = cpu_to_be32(8);
+
 				fdt_setprop_inplace(initial_boot_params, cf,
 						"cavium,bus-width", &width, sizeof(width));
 			}
@@ -1004,6 +1010,7 @@ end_led:
 		;
 	}
 
+#ifdef CONFIG_USB
 	/* OHCI/UHCI USB */
 	alias_prop = fdt_getprop(initial_boot_params, aliases,
 				 "uctl", NULL);
@@ -1036,6 +1043,7 @@ end_led:
 		} else  {
 			__be32 new_f[1];
 			enum cvmx_helper_board_usb_clock_types c;
+
 			c = __cvmx_helper_board_usb_get_clock_type();
 			switch (c) {
 			case USB_CLOCK_TYPE_REF_48:
@@ -1052,6 +1060,7 @@ end_led:
 			}
 		}
 	}
+#endif
 
 	return 0;
 }
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index d9dbeb0b165b..a8034d0dcade 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -374,14 +374,8 @@ void octeon_write_lcd(const char *s)
  */
 int octeon_get_boot_uart(void)
 {
-	int uart;
-#ifdef CONFIG_CAVIUM_OCTEON_2ND_KERNEL
-	uart = 1;
-#else
-	uart = (octeon_boot_desc_ptr->flags & OCTEON_BL_FLAG_CONSOLE_UART1) ?
+	return (octeon_boot_desc_ptr->flags & OCTEON_BL_FLAG_CONSOLE_UART1) ?
 		1 : 0;
-#endif
-	return uart;
 }
 
 /**
@@ -901,14 +895,10 @@ void __init prom_init(void)
 	}
 
 	if (strstr(arcs_cmdline, "console=") == NULL) {
-#ifdef CONFIG_CAVIUM_OCTEON_2ND_KERNEL
-		strcat(arcs_cmdline, " console=ttyS0,115200");
-#else
 		if (octeon_uart == 1)
 			strcat(arcs_cmdline, " console=ttyS1,115200");
 		else
 			strcat(arcs_cmdline, " console=ttyS0,115200");
-#endif
 	}
 
 	mips_hpt_frequency = octeon_get_clock_rate();
diff --git a/arch/mips/configs/generic_defconfig b/arch/mips/configs/generic_defconfig
index c95d94c7838b..91aacf2ef26d 100644
--- a/arch/mips/configs/generic_defconfig
+++ b/arch/mips/configs/generic_defconfig
@@ -36,6 +36,8 @@ CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
 CONFIG_NETFILTER=y
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
@@ -80,6 +82,7 @@ CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_NFS_V4_1=y
 CONFIG_NFS_V4_2=y
+CONFIG_ROOT_NFS=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_INFO_REDUCED=y
diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h
index b4db69fbc40c..fc67947ed658 100644
--- a/arch/mips/include/asm/cache.h
+++ b/arch/mips/include/asm/cache.h
@@ -9,14 +9,9 @@
 #ifndef _ASM_CACHE_H
 #define _ASM_CACHE_H
 
-#include <kmalloc.h>
-
 #define L1_CACHE_SHIFT		CONFIG_MIPS_L1_CACHE_SHIFT
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
-#define SMP_CACHE_SHIFT		L1_CACHE_SHIFT
-#define SMP_CACHE_BYTES		L1_CACHE_BYTES
-
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
 
 #endif /* _ASM_CACHE_H */
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index be3b4c25f335..cd6efb07c980 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -12,10 +12,9 @@
 #ifndef __ASM_CPU_INFO_H
 #define __ASM_CPU_INFO_H
 
+#include <linux/cache.h>
 #include <linux/types.h>
 
-#include <asm/cache.h>
-
 /*
  * Descriptor for a cache
  */
diff --git a/arch/mips/include/asm/cpufeature.h b/arch/mips/include/asm/cpufeature.h
new file mode 100644
index 000000000000..c63ec05313c1
--- /dev/null
+++ b/arch/mips/include/asm/cpufeature.h
@@ -0,0 +1,26 @@
+/*
+ * CPU feature definitions for module loading, used by
+ * module_cpu_feature_match(), see uapi/asm/hwcap.h for MIPS CPU features.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ASM_CPUFEATURE_H
+#define __ASM_CPUFEATURE_H
+
+#include <uapi/asm/hwcap.h>
+#include <asm/elf.h>
+
+#define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap))
+
+#define cpu_feature(x)		ilog2(HWCAP_ ## x)
+
+static inline bool cpu_have_feature(unsigned int num)
+{
+	return elf_hwcap & (1UL << num);
+}
+
+#endif /* __ASM_CPUFEATURE_H */
diff --git a/arch/mips/include/asm/highmem.h b/arch/mips/include/asm/highmem.h
index d34536e7653f..279b6d14ffeb 100644
--- a/arch/mips/include/asm/highmem.h
+++ b/arch/mips/include/asm/highmem.h
@@ -35,7 +35,12 @@ extern pte_t *pkmap_page_table;
  * easily, subsequent pte tables have to be allocated in one physical
  * chunk of RAM.
  */
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+#define LAST_PKMAP 512
+#else
 #define LAST_PKMAP 1024
+#endif
+
 #define LAST_PKMAP_MASK (LAST_PKMAP-1)
 #define PKMAP_NR(virt)	((virt-PKMAP_BASE) >> PAGE_SHIFT)
 #define PKMAP_ADDR(nr)	(PKMAP_BASE + ((nr) << PAGE_SHIFT))
diff --git a/arch/mips/include/asm/kprobes.h b/arch/mips/include/asm/kprobes.h
index 291846d9ba83..ad1a99948f27 100644
--- a/arch/mips/include/asm/kprobes.h
+++ b/arch/mips/include/asm/kprobes.h
@@ -43,7 +43,8 @@ typedef union mips_instruction kprobe_opcode_t;
 
 #define flush_insn_slot(p)						\
 do {									\
-	flush_icache_range((unsigned long)p->addr,			\
+	if (p->addr)							\
+		flush_icache_range((unsigned long)p->addr,		\
 			   (unsigned long)p->addr +			\
 			   (MAX_INSN_SIZE * sizeof(kprobe_opcode_t)));	\
 } while (0)
diff --git a/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h b/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h
index 98cf40417c5d..d38be668e338 100644
--- a/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h
@@ -10,8 +10,6 @@
 #ifndef __ASM_MACH_RM200_CPU_FEATURE_OVERRIDES_H
 #define __ASM_MACH_RM200_CPU_FEATURE_OVERRIDES_H
 
-#include <cpu-feature-overrides.h>
-
 #define cpu_has_tlb		1
 #define cpu_has_4kex		1
 #define cpu_has_4k_cache	1
diff --git a/arch/mips/include/asm/octeon/cvmx-l2c-defs.h b/arch/mips/include/asm/octeon/cvmx-l2c-defs.h
index 10262cb6ff50..d045973ddb33 100644
--- a/arch/mips/include/asm/octeon/cvmx-l2c-defs.h
+++ b/arch/mips/include/asm/octeon/cvmx-l2c-defs.h
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2012 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -28,3140 +28,177 @@
 #ifndef __CVMX_L2C_DEFS_H__
 #define __CVMX_L2C_DEFS_H__
 
-#define CVMX_L2C_BIG_CTL (CVMX_ADD_IO_SEG(0x0001180080800030ull))
-#define CVMX_L2C_BST (CVMX_ADD_IO_SEG(0x00011800808007F8ull))
-#define CVMX_L2C_BST0 (CVMX_ADD_IO_SEG(0x00011800800007F8ull))
-#define CVMX_L2C_BST1 (CVMX_ADD_IO_SEG(0x00011800800007F0ull))
-#define CVMX_L2C_BST2 (CVMX_ADD_IO_SEG(0x00011800800007E8ull))
-#define CVMX_L2C_BST_MEMX(block_id) (CVMX_ADD_IO_SEG(0x0001180080C007F8ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_BST_TDTX(block_id) (CVMX_ADD_IO_SEG(0x0001180080A007F0ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_BST_TTGX(block_id) (CVMX_ADD_IO_SEG(0x0001180080A007F8ull) + ((block_id) & 3) * 0x40000ull)
+#include <uapi/asm/bitfield.h>
+
+#define CVMX_L2C_DBG (CVMX_ADD_IO_SEG(0x0001180080000030ull))
 #define CVMX_L2C_CFG (CVMX_ADD_IO_SEG(0x0001180080000000ull))
-#define CVMX_L2C_COP0_MAPX(offset) (CVMX_ADD_IO_SEG(0x0001180080940000ull) + ((offset) & 16383) * 8)
 #define CVMX_L2C_CTL (CVMX_ADD_IO_SEG(0x0001180080800000ull))
-#define CVMX_L2C_DBG (CVMX_ADD_IO_SEG(0x0001180080000030ull))
-#define CVMX_L2C_DUT (CVMX_ADD_IO_SEG(0x0001180080000050ull))
-#define CVMX_L2C_DUT_MAPX(offset) (CVMX_ADD_IO_SEG(0x0001180080E00000ull) + ((offset) & 8191) * 8)
-#define CVMX_L2C_ERR_TDTX(block_id) (CVMX_ADD_IO_SEG(0x0001180080A007E0ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_ERR_TTGX(block_id) (CVMX_ADD_IO_SEG(0x0001180080A007E8ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_ERR_VBFX(block_id) (CVMX_ADD_IO_SEG(0x0001180080C007F0ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_ERR_XMC (CVMX_ADD_IO_SEG(0x00011800808007D8ull))
-#define CVMX_L2C_GRPWRR0 (CVMX_ADD_IO_SEG(0x00011800800000C8ull))
-#define CVMX_L2C_GRPWRR1 (CVMX_ADD_IO_SEG(0x00011800800000D0ull))
-#define CVMX_L2C_INT_EN (CVMX_ADD_IO_SEG(0x0001180080000100ull))
-#define CVMX_L2C_INT_ENA (CVMX_ADD_IO_SEG(0x0001180080800020ull))
-#define CVMX_L2C_INT_REG (CVMX_ADD_IO_SEG(0x0001180080800018ull))
-#define CVMX_L2C_INT_STAT (CVMX_ADD_IO_SEG(0x00011800800000F8ull))
-#define CVMX_L2C_IOCX_PFC(block_id) (CVMX_ADD_IO_SEG(0x0001180080800420ull))
-#define CVMX_L2C_IORX_PFC(block_id) (CVMX_ADD_IO_SEG(0x0001180080800428ull))
 #define CVMX_L2C_LCKBASE (CVMX_ADD_IO_SEG(0x0001180080000058ull))
 #define CVMX_L2C_LCKOFF (CVMX_ADD_IO_SEG(0x0001180080000060ull))
-#define CVMX_L2C_LFB0 (CVMX_ADD_IO_SEG(0x0001180080000038ull))
-#define CVMX_L2C_LFB1 (CVMX_ADD_IO_SEG(0x0001180080000040ull))
-#define CVMX_L2C_LFB2 (CVMX_ADD_IO_SEG(0x0001180080000048ull))
-#define CVMX_L2C_LFB3 (CVMX_ADD_IO_SEG(0x00011800800000B8ull))
-#define CVMX_L2C_OOB (CVMX_ADD_IO_SEG(0x00011800800000D8ull))
-#define CVMX_L2C_OOB1 (CVMX_ADD_IO_SEG(0x00011800800000E0ull))
-#define CVMX_L2C_OOB2 (CVMX_ADD_IO_SEG(0x00011800800000E8ull))
-#define CVMX_L2C_OOB3 (CVMX_ADD_IO_SEG(0x00011800800000F0ull))
+#define CVMX_L2C_PFCTL (CVMX_ADD_IO_SEG(0x0001180080000090ull))
+#define CVMX_L2C_PFCX(offset) (CVMX_ADD_IO_SEG(0x0001180080000098ull) +	       \
+		((offset) & 3) * 8)
 #define CVMX_L2C_PFC0 CVMX_L2C_PFCX(0)
 #define CVMX_L2C_PFC1 CVMX_L2C_PFCX(1)
 #define CVMX_L2C_PFC2 CVMX_L2C_PFCX(2)
 #define CVMX_L2C_PFC3 CVMX_L2C_PFCX(3)
-#define CVMX_L2C_PFCTL (CVMX_ADD_IO_SEG(0x0001180080000090ull))
-#define CVMX_L2C_PFCX(offset) (CVMX_ADD_IO_SEG(0x0001180080000098ull) + ((offset) & 3) * 8)
-#define CVMX_L2C_PPGRP (CVMX_ADD_IO_SEG(0x00011800800000C0ull))
-#define CVMX_L2C_QOS_IOBX(offset) (CVMX_ADD_IO_SEG(0x0001180080880200ull) + ((offset) & 1) * 8)
-#define CVMX_L2C_QOS_PPX(offset) (CVMX_ADD_IO_SEG(0x0001180080880000ull) + ((offset) & 31) * 8)
-#define CVMX_L2C_QOS_WGT (CVMX_ADD_IO_SEG(0x0001180080800008ull))
-#define CVMX_L2C_RSCX_PFC(offset) (CVMX_ADD_IO_SEG(0x0001180080800410ull) + ((offset) & 3) * 64)
-#define CVMX_L2C_RSDX_PFC(offset) (CVMX_ADD_IO_SEG(0x0001180080800418ull) + ((offset) & 3) * 64)
 #define CVMX_L2C_SPAR0 (CVMX_ADD_IO_SEG(0x0001180080000068ull))
 #define CVMX_L2C_SPAR1 (CVMX_ADD_IO_SEG(0x0001180080000070ull))
 #define CVMX_L2C_SPAR2 (CVMX_ADD_IO_SEG(0x0001180080000078ull))
 #define CVMX_L2C_SPAR3 (CVMX_ADD_IO_SEG(0x0001180080000080ull))
 #define CVMX_L2C_SPAR4 (CVMX_ADD_IO_SEG(0x0001180080000088ull))
-#define CVMX_L2C_TADX_ECC0(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00018ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_ECC1(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00020ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_IEN(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00000ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_INT(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00028ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_PFC0(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00400ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_PFC1(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00408ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_PFC2(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00410ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_PFC3(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00418ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_PRF(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00008ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_TAG(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00010ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_VER_ID (CVMX_ADD_IO_SEG(0x00011800808007E0ull))
-#define CVMX_L2C_VER_IOB (CVMX_ADD_IO_SEG(0x00011800808007F0ull))
-#define CVMX_L2C_VER_MSC (CVMX_ADD_IO_SEG(0x00011800808007D0ull))
-#define CVMX_L2C_VER_PP (CVMX_ADD_IO_SEG(0x00011800808007E8ull))
-#define CVMX_L2C_VIRTID_IOBX(offset) (CVMX_ADD_IO_SEG(0x00011800808C0200ull) + ((offset) & 1) * 8)
-#define CVMX_L2C_VIRTID_PPX(offset) (CVMX_ADD_IO_SEG(0x00011800808C0000ull) + ((offset) & 31) * 8)
-#define CVMX_L2C_VRT_CTL (CVMX_ADD_IO_SEG(0x0001180080800010ull))
-#define CVMX_L2C_VRT_MEMX(offset) (CVMX_ADD_IO_SEG(0x0001180080900000ull) + ((offset) & 1023) * 8)
-#define CVMX_L2C_WPAR_IOBX(offset) (CVMX_ADD_IO_SEG(0x0001180080840200ull) + ((offset) & 1) * 8)
-#define CVMX_L2C_WPAR_PPX(offset) (CVMX_ADD_IO_SEG(0x0001180080840000ull) + ((offset) & 31) * 8)
-#define CVMX_L2C_XMCX_PFC(offset) (CVMX_ADD_IO_SEG(0x0001180080800400ull) + ((offset) & 3) * 64)
-#define CVMX_L2C_XMC_CMD (CVMX_ADD_IO_SEG(0x0001180080800028ull))
-#define CVMX_L2C_XMDX_PFC(offset) (CVMX_ADD_IO_SEG(0x0001180080800408ull) + ((offset) & 3) * 64)
-
-union cvmx_l2c_big_ctl {
-	uint64_t u64;
-	struct cvmx_l2c_big_ctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_8_63:56;
-		uint64_t maxdram:4;
-		uint64_t reserved_1_3:3;
-		uint64_t disable:1;
-#else
-		uint64_t disable:1;
-		uint64_t reserved_1_3:3;
-		uint64_t maxdram:4;
-		uint64_t reserved_8_63:56;
-#endif
-	} s;
-	struct cvmx_l2c_big_ctl_s cn61xx;
-	struct cvmx_l2c_big_ctl_s cn63xx;
-	struct cvmx_l2c_big_ctl_s cn66xx;
-	struct cvmx_l2c_big_ctl_s cn68xx;
-	struct cvmx_l2c_big_ctl_s cn68xxp1;
-	struct cvmx_l2c_big_ctl_s cnf71xx;
-};
-
-union cvmx_l2c_bst {
-	uint64_t u64;
-	struct cvmx_l2c_bst_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t dutfl:32;
-		uint64_t rbffl:4;
-		uint64_t xbffl:4;
-		uint64_t tdpfl:4;
-		uint64_t ioccmdfl:4;
-		uint64_t iocdatfl:4;
-		uint64_t dutresfl:4;
-		uint64_t vrtfl:4;
-		uint64_t tdffl:4;
-#else
-		uint64_t tdffl:4;
-		uint64_t vrtfl:4;
-		uint64_t dutresfl:4;
-		uint64_t iocdatfl:4;
-		uint64_t ioccmdfl:4;
-		uint64_t tdpfl:4;
-		uint64_t xbffl:4;
-		uint64_t rbffl:4;
-		uint64_t dutfl:32;
-#endif
-	} s;
-	struct cvmx_l2c_bst_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_36_63:28;
-		uint64_t dutfl:4;
-		uint64_t reserved_17_31:15;
-		uint64_t ioccmdfl:1;
-		uint64_t reserved_13_15:3;
-		uint64_t iocdatfl:1;
-		uint64_t reserved_9_11:3;
-		uint64_t dutresfl:1;
-		uint64_t reserved_5_7:3;
-		uint64_t vrtfl:1;
-		uint64_t reserved_1_3:3;
-		uint64_t tdffl:1;
-#else
-		uint64_t tdffl:1;
-		uint64_t reserved_1_3:3;
-		uint64_t vrtfl:1;
-		uint64_t reserved_5_7:3;
-		uint64_t dutresfl:1;
-		uint64_t reserved_9_11:3;
-		uint64_t iocdatfl:1;
-		uint64_t reserved_13_15:3;
-		uint64_t ioccmdfl:1;
-		uint64_t reserved_17_31:15;
-		uint64_t dutfl:4;
-		uint64_t reserved_36_63:28;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_bst_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_38_63:26;
-		uint64_t dutfl:6;
-		uint64_t reserved_17_31:15;
-		uint64_t ioccmdfl:1;
-		uint64_t reserved_13_15:3;
-		uint64_t iocdatfl:1;
-		uint64_t reserved_9_11:3;
-		uint64_t dutresfl:1;
-		uint64_t reserved_5_7:3;
-		uint64_t vrtfl:1;
-		uint64_t reserved_1_3:3;
-		uint64_t tdffl:1;
-#else
-		uint64_t tdffl:1;
-		uint64_t reserved_1_3:3;
-		uint64_t vrtfl:1;
-		uint64_t reserved_5_7:3;
-		uint64_t dutresfl:1;
-		uint64_t reserved_9_11:3;
-		uint64_t iocdatfl:1;
-		uint64_t reserved_13_15:3;
-		uint64_t ioccmdfl:1;
-		uint64_t reserved_17_31:15;
-		uint64_t dutfl:6;
-		uint64_t reserved_38_63:26;
-#endif
-	} cn63xx;
-	struct cvmx_l2c_bst_cn63xx cn63xxp1;
-	struct cvmx_l2c_bst_cn66xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_42_63:22;
-		uint64_t dutfl:10;
-		uint64_t reserved_17_31:15;
-		uint64_t ioccmdfl:1;
-		uint64_t reserved_13_15:3;
-		uint64_t iocdatfl:1;
-		uint64_t reserved_9_11:3;
-		uint64_t dutresfl:1;
-		uint64_t reserved_5_7:3;
-		uint64_t vrtfl:1;
-		uint64_t reserved_1_3:3;
-		uint64_t tdffl:1;
-#else
-		uint64_t tdffl:1;
-		uint64_t reserved_1_3:3;
-		uint64_t vrtfl:1;
-		uint64_t reserved_5_7:3;
-		uint64_t dutresfl:1;
-		uint64_t reserved_9_11:3;
-		uint64_t iocdatfl:1;
-		uint64_t reserved_13_15:3;
-		uint64_t ioccmdfl:1;
-		uint64_t reserved_17_31:15;
-		uint64_t dutfl:10;
-		uint64_t reserved_42_63:22;
-#endif
-	} cn66xx;
-	struct cvmx_l2c_bst_s cn68xx;
-	struct cvmx_l2c_bst_s cn68xxp1;
-	struct cvmx_l2c_bst_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_bst0 {
-	uint64_t u64;
-	struct cvmx_l2c_bst0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_24_63:40;
-		uint64_t dtbnk:1;
-		uint64_t wlb_msk:4;
-		uint64_t dtcnt:13;
-		uint64_t dt:1;
-		uint64_t stin_msk:1;
-		uint64_t wlb_dat:4;
-#else
-		uint64_t wlb_dat:4;
-		uint64_t stin_msk:1;
-		uint64_t dt:1;
-		uint64_t dtcnt:13;
-		uint64_t wlb_msk:4;
-		uint64_t dtbnk:1;
-		uint64_t reserved_24_63:40;
-#endif
-	} s;
-	struct cvmx_l2c_bst0_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_23_63:41;
-		uint64_t wlb_msk:4;
-		uint64_t reserved_15_18:4;
-		uint64_t dtcnt:9;
-		uint64_t dt:1;
-		uint64_t reserved_4_4:1;
-		uint64_t wlb_dat:4;
-#else
-		uint64_t wlb_dat:4;
-		uint64_t reserved_4_4:1;
-		uint64_t dt:1;
-		uint64_t dtcnt:9;
-		uint64_t reserved_15_18:4;
-		uint64_t wlb_msk:4;
-		uint64_t reserved_23_63:41;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_bst0_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_23_63:41;
-		uint64_t wlb_msk:4;
-		uint64_t reserved_16_18:3;
-		uint64_t dtcnt:10;
-		uint64_t dt:1;
-		uint64_t stin_msk:1;
-		uint64_t wlb_dat:4;
-#else
-		uint64_t wlb_dat:4;
-		uint64_t stin_msk:1;
-		uint64_t dt:1;
-		uint64_t dtcnt:10;
-		uint64_t reserved_16_18:3;
-		uint64_t wlb_msk:4;
-		uint64_t reserved_23_63:41;
-#endif
-	} cn31xx;
-	struct cvmx_l2c_bst0_cn38xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_19_63:45;
-		uint64_t dtcnt:13;
-		uint64_t dt:1;
-		uint64_t stin_msk:1;
-		uint64_t wlb_dat:4;
-#else
-		uint64_t wlb_dat:4;
-		uint64_t stin_msk:1;
-		uint64_t dt:1;
-		uint64_t dtcnt:13;
-		uint64_t reserved_19_63:45;
-#endif
-	} cn38xx;
-	struct cvmx_l2c_bst0_cn38xx cn38xxp2;
-	struct cvmx_l2c_bst0_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_24_63:40;
-		uint64_t dtbnk:1;
-		uint64_t wlb_msk:4;
-		uint64_t reserved_16_18:3;
-		uint64_t dtcnt:10;
-		uint64_t dt:1;
-		uint64_t stin_msk:1;
-		uint64_t wlb_dat:4;
-#else
-		uint64_t wlb_dat:4;
-		uint64_t stin_msk:1;
-		uint64_t dt:1;
-		uint64_t dtcnt:10;
-		uint64_t reserved_16_18:3;
-		uint64_t wlb_msk:4;
-		uint64_t dtbnk:1;
-		uint64_t reserved_24_63:40;
-#endif
-	} cn50xx;
-	struct cvmx_l2c_bst0_cn50xx cn52xx;
-	struct cvmx_l2c_bst0_cn50xx cn52xxp1;
-	struct cvmx_l2c_bst0_s cn56xx;
-	struct cvmx_l2c_bst0_s cn56xxp1;
-	struct cvmx_l2c_bst0_s cn58xx;
-	struct cvmx_l2c_bst0_s cn58xxp1;
-};
-
-union cvmx_l2c_bst1 {
-	uint64_t u64;
-	struct cvmx_l2c_bst1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_9_63:55;
-		uint64_t l2t:9;
-#else
-		uint64_t l2t:9;
-		uint64_t reserved_9_63:55;
-#endif
-	} s;
-	struct cvmx_l2c_bst1_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t vwdf:4;
-		uint64_t lrf:2;
-		uint64_t vab_vwcf:1;
-		uint64_t reserved_5_8:4;
-		uint64_t l2t:5;
-#else
-		uint64_t l2t:5;
-		uint64_t reserved_5_8:4;
-		uint64_t vab_vwcf:1;
-		uint64_t lrf:2;
-		uint64_t vwdf:4;
-		uint64_t reserved_16_63:48;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_bst1_cn30xx cn31xx;
-	struct cvmx_l2c_bst1_cn38xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t vwdf:4;
-		uint64_t lrf:2;
-		uint64_t vab_vwcf:1;
-		uint64_t l2t:9;
-#else
-		uint64_t l2t:9;
-		uint64_t vab_vwcf:1;
-		uint64_t lrf:2;
-		uint64_t vwdf:4;
-		uint64_t reserved_16_63:48;
-#endif
-	} cn38xx;
-	struct cvmx_l2c_bst1_cn38xx cn38xxp2;
-	struct cvmx_l2c_bst1_cn38xx cn50xx;
-	struct cvmx_l2c_bst1_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_19_63:45;
-		uint64_t plc2:1;
-		uint64_t plc1:1;
-		uint64_t plc0:1;
-		uint64_t vwdf:4;
-		uint64_t reserved_11_11:1;
-		uint64_t ilc:1;
-		uint64_t vab_vwcf:1;
-		uint64_t l2t:9;
-#else
-		uint64_t l2t:9;
-		uint64_t vab_vwcf:1;
-		uint64_t ilc:1;
-		uint64_t reserved_11_11:1;
-		uint64_t vwdf:4;
-		uint64_t plc0:1;
-		uint64_t plc1:1;
-		uint64_t plc2:1;
-		uint64_t reserved_19_63:45;
-#endif
-	} cn52xx;
-	struct cvmx_l2c_bst1_cn52xx cn52xxp1;
-	struct cvmx_l2c_bst1_cn56xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_24_63:40;
-		uint64_t plc2:1;
-		uint64_t plc1:1;
-		uint64_t plc0:1;
-		uint64_t ilc:1;
-		uint64_t vwdf1:4;
-		uint64_t vwdf0:4;
-		uint64_t vab_vwcf1:1;
-		uint64_t reserved_10_10:1;
-		uint64_t vab_vwcf0:1;
-		uint64_t l2t:9;
-#else
-		uint64_t l2t:9;
-		uint64_t vab_vwcf0:1;
-		uint64_t reserved_10_10:1;
-		uint64_t vab_vwcf1:1;
-		uint64_t vwdf0:4;
-		uint64_t vwdf1:4;
-		uint64_t ilc:1;
-		uint64_t plc0:1;
-		uint64_t plc1:1;
-		uint64_t plc2:1;
-		uint64_t reserved_24_63:40;
-#endif
-	} cn56xx;
-	struct cvmx_l2c_bst1_cn56xx cn56xxp1;
-	struct cvmx_l2c_bst1_cn38xx cn58xx;
-	struct cvmx_l2c_bst1_cn38xx cn58xxp1;
-};
-
-union cvmx_l2c_bst2 {
-	uint64_t u64;
-	struct cvmx_l2c_bst2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t mrb:4;
-		uint64_t reserved_4_11:8;
-		uint64_t ipcbst:1;
-		uint64_t picbst:1;
-		uint64_t xrdmsk:1;
-		uint64_t xrddat:1;
-#else
-		uint64_t xrddat:1;
-		uint64_t xrdmsk:1;
-		uint64_t picbst:1;
-		uint64_t ipcbst:1;
-		uint64_t reserved_4_11:8;
-		uint64_t mrb:4;
-		uint64_t reserved_16_63:48;
-#endif
-	} s;
-	struct cvmx_l2c_bst2_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t mrb:4;
-		uint64_t rmdf:4;
-		uint64_t reserved_4_7:4;
-		uint64_t ipcbst:1;
-		uint64_t reserved_2_2:1;
-		uint64_t xrdmsk:1;
-		uint64_t xrddat:1;
-#else
-		uint64_t xrddat:1;
-		uint64_t xrdmsk:1;
-		uint64_t reserved_2_2:1;
-		uint64_t ipcbst:1;
-		uint64_t reserved_4_7:4;
-		uint64_t rmdf:4;
-		uint64_t mrb:4;
-		uint64_t reserved_16_63:48;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_bst2_cn30xx cn31xx;
-	struct cvmx_l2c_bst2_cn38xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t mrb:4;
-		uint64_t rmdf:4;
-		uint64_t rhdf:4;
-		uint64_t ipcbst:1;
-		uint64_t picbst:1;
-		uint64_t xrdmsk:1;
-		uint64_t xrddat:1;
-#else
-		uint64_t xrddat:1;
-		uint64_t xrdmsk:1;
-		uint64_t picbst:1;
-		uint64_t ipcbst:1;
-		uint64_t rhdf:4;
-		uint64_t rmdf:4;
-		uint64_t mrb:4;
-		uint64_t reserved_16_63:48;
-#endif
-	} cn38xx;
-	struct cvmx_l2c_bst2_cn38xx cn38xxp2;
-	struct cvmx_l2c_bst2_cn30xx cn50xx;
-	struct cvmx_l2c_bst2_cn30xx cn52xx;
-	struct cvmx_l2c_bst2_cn30xx cn52xxp1;
-	struct cvmx_l2c_bst2_cn56xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t mrb:4;
-		uint64_t rmdb:4;
-		uint64_t rhdb:4;
-		uint64_t ipcbst:1;
-		uint64_t picbst:1;
-		uint64_t xrdmsk:1;
-		uint64_t xrddat:1;
-#else
-		uint64_t xrddat:1;
-		uint64_t xrdmsk:1;
-		uint64_t picbst:1;
-		uint64_t ipcbst:1;
-		uint64_t rhdb:4;
-		uint64_t rmdb:4;
-		uint64_t mrb:4;
-		uint64_t reserved_16_63:48;
-#endif
-	} cn56xx;
-	struct cvmx_l2c_bst2_cn56xx cn56xxp1;
-	struct cvmx_l2c_bst2_cn56xx cn58xx;
-	struct cvmx_l2c_bst2_cn56xx cn58xxp1;
-};
-
-union cvmx_l2c_bst_memx {
-	uint64_t u64;
-	struct cvmx_l2c_bst_memx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t start_bist:1;
-		uint64_t clear_bist:1;
-		uint64_t reserved_5_61:57;
-		uint64_t rdffl:1;
-		uint64_t vbffl:4;
-#else
-		uint64_t vbffl:4;
-		uint64_t rdffl:1;
-		uint64_t reserved_5_61:57;
-		uint64_t clear_bist:1;
-		uint64_t start_bist:1;
-#endif
-	} s;
-	struct cvmx_l2c_bst_memx_s cn61xx;
-	struct cvmx_l2c_bst_memx_s cn63xx;
-	struct cvmx_l2c_bst_memx_s cn63xxp1;
-	struct cvmx_l2c_bst_memx_s cn66xx;
-	struct cvmx_l2c_bst_memx_s cn68xx;
-	struct cvmx_l2c_bst_memx_s cn68xxp1;
-	struct cvmx_l2c_bst_memx_s cnf71xx;
-};
-
-union cvmx_l2c_bst_tdtx {
-	uint64_t u64;
-	struct cvmx_l2c_bst_tdtx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t fbfrspfl:8;
-		uint64_t sbffl:8;
-		uint64_t fbffl:8;
-		uint64_t l2dfl:8;
-#else
-		uint64_t l2dfl:8;
-		uint64_t fbffl:8;
-		uint64_t sbffl:8;
-		uint64_t fbfrspfl:8;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_l2c_bst_tdtx_s cn61xx;
-	struct cvmx_l2c_bst_tdtx_s cn63xx;
-	struct cvmx_l2c_bst_tdtx_cn63xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_24_63:40;
-		uint64_t sbffl:8;
-		uint64_t fbffl:8;
-		uint64_t l2dfl:8;
-#else
-		uint64_t l2dfl:8;
-		uint64_t fbffl:8;
-		uint64_t sbffl:8;
-		uint64_t reserved_24_63:40;
-#endif
-	} cn63xxp1;
-	struct cvmx_l2c_bst_tdtx_s cn66xx;
-	struct cvmx_l2c_bst_tdtx_s cn68xx;
-	struct cvmx_l2c_bst_tdtx_s cn68xxp1;
-	struct cvmx_l2c_bst_tdtx_s cnf71xx;
-};
+#define CVMX_L2C_TADX_PFCX(offset, block_id)				       \
+		(CVMX_ADD_IO_SEG(0x0001180080A00400ull) + (((offset) & 3) +    \
+		((block_id) & 7) * 0x8000ull) * 8)
+#define CVMX_L2C_TADX_PFC0(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00400ull) + \
+		((block_id) & 3) * 0x40000ull)
+#define CVMX_L2C_TADX_PFC1(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00408ull) + \
+		((block_id) & 3) * 0x40000ull)
+#define CVMX_L2C_TADX_PFC2(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00410ull) + \
+		((block_id) & 3) * 0x40000ull)
+#define CVMX_L2C_TADX_PFC3(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00418ull) + \
+		((block_id) & 3) * 0x40000ull)
+#define CVMX_L2C_TADX_PRF(offset) (CVMX_ADD_IO_SEG(0x0001180080A00008ull)    + \
+		((offset) & 7) * 0x40000ull)
+#define CVMX_L2C_TADX_TAG(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00010ull)  + \
+		((block_id) & 3) * 0x40000ull)
+#define CVMX_L2C_WPAR_IOBX(offset) (CVMX_ADD_IO_SEG(0x0001180080840200ull)   + \
+		((offset) & 1) * 8)
+#define CVMX_L2C_WPAR_PPX(offset) (CVMX_ADD_IO_SEG(0x0001180080840000ull)    + \
+		((offset) & 31) * 8)
+#define CVMX_L2D_FUS3 (CVMX_ADD_IO_SEG(0x00011800800007B8ull))
 
-union cvmx_l2c_bst_ttgx {
-	uint64_t u64;
-	struct cvmx_l2c_bst_ttgx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_17_63:47;
-		uint64_t lrufl:1;
-		uint64_t tagfl:16;
-#else
-		uint64_t tagfl:16;
-		uint64_t lrufl:1;
-		uint64_t reserved_17_63:47;
-#endif
-	} s;
-	struct cvmx_l2c_bst_ttgx_s cn61xx;
-	struct cvmx_l2c_bst_ttgx_s cn63xx;
-	struct cvmx_l2c_bst_ttgx_s cn63xxp1;
-	struct cvmx_l2c_bst_ttgx_s cn66xx;
-	struct cvmx_l2c_bst_ttgx_s cn68xx;
-	struct cvmx_l2c_bst_ttgx_s cn68xxp1;
-	struct cvmx_l2c_bst_ttgx_s cnf71xx;
-};
 
 union cvmx_l2c_cfg {
 	uint64_t u64;
 	struct cvmx_l2c_cfg_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_20_63:44;
-		uint64_t bstrun:1;
-		uint64_t lbist:1;
-		uint64_t xor_bank:1;
-		uint64_t dpres1:1;
-		uint64_t dpres0:1;
-		uint64_t dfill_dis:1;
-		uint64_t fpexp:4;
-		uint64_t fpempty:1;
-		uint64_t fpen:1;
-		uint64_t idxalias:1;
-		uint64_t mwf_crd:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t lrf_arb_mode:1;
-#else
-		uint64_t lrf_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t mwf_crd:4;
-		uint64_t idxalias:1;
-		uint64_t fpen:1;
-		uint64_t fpempty:1;
-		uint64_t fpexp:4;
-		uint64_t dfill_dis:1;
-		uint64_t dpres0:1;
-		uint64_t dpres1:1;
-		uint64_t xor_bank:1;
-		uint64_t lbist:1;
-		uint64_t bstrun:1;
-		uint64_t reserved_20_63:44;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_20_63:44,
+		__BITFIELD_FIELD(uint64_t bstrun:1,
+		__BITFIELD_FIELD(uint64_t lbist:1,
+		__BITFIELD_FIELD(uint64_t xor_bank:1,
+		__BITFIELD_FIELD(uint64_t dpres1:1,
+		__BITFIELD_FIELD(uint64_t dpres0:1,
+		__BITFIELD_FIELD(uint64_t dfill_dis:1,
+		__BITFIELD_FIELD(uint64_t fpexp:4,
+		__BITFIELD_FIELD(uint64_t fpempty:1,
+		__BITFIELD_FIELD(uint64_t fpen:1,
+		__BITFIELD_FIELD(uint64_t idxalias:1,
+		__BITFIELD_FIELD(uint64_t mwf_crd:4,
+		__BITFIELD_FIELD(uint64_t rsp_arb_mode:1,
+		__BITFIELD_FIELD(uint64_t rfb_arb_mode:1,
+		__BITFIELD_FIELD(uint64_t lrf_arb_mode:1,
+		;)))))))))))))))
 	} s;
-	struct cvmx_l2c_cfg_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t fpexp:4;
-		uint64_t fpempty:1;
-		uint64_t fpen:1;
-		uint64_t idxalias:1;
-		uint64_t mwf_crd:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t lrf_arb_mode:1;
-#else
-		uint64_t lrf_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t mwf_crd:4;
-		uint64_t idxalias:1;
-		uint64_t fpen:1;
-		uint64_t fpempty:1;
-		uint64_t fpexp:4;
-		uint64_t reserved_14_63:50;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_cfg_cn30xx cn31xx;
-	struct cvmx_l2c_cfg_cn30xx cn38xx;
-	struct cvmx_l2c_cfg_cn30xx cn38xxp2;
-	struct cvmx_l2c_cfg_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_20_63:44;
-		uint64_t bstrun:1;
-		uint64_t lbist:1;
-		uint64_t reserved_14_17:4;
-		uint64_t fpexp:4;
-		uint64_t fpempty:1;
-		uint64_t fpen:1;
-		uint64_t idxalias:1;
-		uint64_t mwf_crd:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t lrf_arb_mode:1;
-#else
-		uint64_t lrf_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t mwf_crd:4;
-		uint64_t idxalias:1;
-		uint64_t fpen:1;
-		uint64_t fpempty:1;
-		uint64_t fpexp:4;
-		uint64_t reserved_14_17:4;
-		uint64_t lbist:1;
-		uint64_t bstrun:1;
-		uint64_t reserved_20_63:44;
-#endif
-	} cn50xx;
-	struct cvmx_l2c_cfg_cn50xx cn52xx;
-	struct cvmx_l2c_cfg_cn50xx cn52xxp1;
-	struct cvmx_l2c_cfg_s cn56xx;
-	struct cvmx_l2c_cfg_s cn56xxp1;
-	struct cvmx_l2c_cfg_cn58xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_20_63:44;
-		uint64_t bstrun:1;
-		uint64_t lbist:1;
-		uint64_t reserved_15_17:3;
-		uint64_t dfill_dis:1;
-		uint64_t fpexp:4;
-		uint64_t fpempty:1;
-		uint64_t fpen:1;
-		uint64_t idxalias:1;
-		uint64_t mwf_crd:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t lrf_arb_mode:1;
-#else
-		uint64_t lrf_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t mwf_crd:4;
-		uint64_t idxalias:1;
-		uint64_t fpen:1;
-		uint64_t fpempty:1;
-		uint64_t fpexp:4;
-		uint64_t dfill_dis:1;
-		uint64_t reserved_15_17:3;
-		uint64_t lbist:1;
-		uint64_t bstrun:1;
-		uint64_t reserved_20_63:44;
-#endif
-	} cn58xx;
-	struct cvmx_l2c_cfg_cn58xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_15_63:49;
-		uint64_t dfill_dis:1;
-		uint64_t fpexp:4;
-		uint64_t fpempty:1;
-		uint64_t fpen:1;
-		uint64_t idxalias:1;
-		uint64_t mwf_crd:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t lrf_arb_mode:1;
-#else
-		uint64_t lrf_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t mwf_crd:4;
-		uint64_t idxalias:1;
-		uint64_t fpen:1;
-		uint64_t fpempty:1;
-		uint64_t fpexp:4;
-		uint64_t dfill_dis:1;
-		uint64_t reserved_15_63:49;
-#endif
-	} cn58xxp1;
-};
-
-union cvmx_l2c_cop0_mapx {
-	uint64_t u64;
-	struct cvmx_l2c_cop0_mapx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t data:64;
-#else
-		uint64_t data:64;
-#endif
-	} s;
-	struct cvmx_l2c_cop0_mapx_s cn61xx;
-	struct cvmx_l2c_cop0_mapx_s cn63xx;
-	struct cvmx_l2c_cop0_mapx_s cn63xxp1;
-	struct cvmx_l2c_cop0_mapx_s cn66xx;
-	struct cvmx_l2c_cop0_mapx_s cn68xx;
-	struct cvmx_l2c_cop0_mapx_s cn68xxp1;
-	struct cvmx_l2c_cop0_mapx_s cnf71xx;
 };
 
 union cvmx_l2c_ctl {
 	uint64_t u64;
 	struct cvmx_l2c_ctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_30_63:34;
-		uint64_t sepcmt:1;
-		uint64_t rdf_fast:1;
-		uint64_t disstgl2i:1;
-		uint64_t l2dfsbe:1;
-		uint64_t l2dfdbe:1;
-		uint64_t discclk:1;
-		uint64_t maxvab:4;
-		uint64_t maxlfb:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t ef_ena:1;
-		uint64_t ef_cnt:7;
-		uint64_t vab_thresh:4;
-		uint64_t disecc:1;
-		uint64_t disidxalias:1;
-#else
-		uint64_t disidxalias:1;
-		uint64_t disecc:1;
-		uint64_t vab_thresh:4;
-		uint64_t ef_cnt:7;
-		uint64_t ef_ena:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t maxlfb:4;
-		uint64_t maxvab:4;
-		uint64_t discclk:1;
-		uint64_t l2dfdbe:1;
-		uint64_t l2dfsbe:1;
-		uint64_t disstgl2i:1;
-		uint64_t rdf_fast:1;
-		uint64_t sepcmt:1;
-		uint64_t reserved_30_63:34;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_30_63:34,
+		__BITFIELD_FIELD(uint64_t sepcmt:1,
+		__BITFIELD_FIELD(uint64_t rdf_fast:1,
+		__BITFIELD_FIELD(uint64_t disstgl2i:1,
+		__BITFIELD_FIELD(uint64_t l2dfsbe:1,
+		__BITFIELD_FIELD(uint64_t l2dfdbe:1,
+		__BITFIELD_FIELD(uint64_t discclk:1,
+		__BITFIELD_FIELD(uint64_t maxvab:4,
+		__BITFIELD_FIELD(uint64_t maxlfb:4,
+		__BITFIELD_FIELD(uint64_t rsp_arb_mode:1,
+		__BITFIELD_FIELD(uint64_t xmc_arb_mode:1,
+		__BITFIELD_FIELD(uint64_t ef_ena:1,
+		__BITFIELD_FIELD(uint64_t ef_cnt:7,
+		__BITFIELD_FIELD(uint64_t vab_thresh:4,
+		__BITFIELD_FIELD(uint64_t disecc:1,
+		__BITFIELD_FIELD(uint64_t disidxalias:1,
+		;))))))))))))))))
 	} s;
-	struct cvmx_l2c_ctl_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_29_63:35;
-		uint64_t rdf_fast:1;
-		uint64_t disstgl2i:1;
-		uint64_t l2dfsbe:1;
-		uint64_t l2dfdbe:1;
-		uint64_t discclk:1;
-		uint64_t maxvab:4;
-		uint64_t maxlfb:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t ef_ena:1;
-		uint64_t ef_cnt:7;
-		uint64_t vab_thresh:4;
-		uint64_t disecc:1;
-		uint64_t disidxalias:1;
-#else
-		uint64_t disidxalias:1;
-		uint64_t disecc:1;
-		uint64_t vab_thresh:4;
-		uint64_t ef_cnt:7;
-		uint64_t ef_ena:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t maxlfb:4;
-		uint64_t maxvab:4;
-		uint64_t discclk:1;
-		uint64_t l2dfdbe:1;
-		uint64_t l2dfsbe:1;
-		uint64_t disstgl2i:1;
-		uint64_t rdf_fast:1;
-		uint64_t reserved_29_63:35;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_ctl_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_28_63:36;
-		uint64_t disstgl2i:1;
-		uint64_t l2dfsbe:1;
-		uint64_t l2dfdbe:1;
-		uint64_t discclk:1;
-		uint64_t maxvab:4;
-		uint64_t maxlfb:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t ef_ena:1;
-		uint64_t ef_cnt:7;
-		uint64_t vab_thresh:4;
-		uint64_t disecc:1;
-		uint64_t disidxalias:1;
-#else
-		uint64_t disidxalias:1;
-		uint64_t disecc:1;
-		uint64_t vab_thresh:4;
-		uint64_t ef_cnt:7;
-		uint64_t ef_ena:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t maxlfb:4;
-		uint64_t maxvab:4;
-		uint64_t discclk:1;
-		uint64_t l2dfdbe:1;
-		uint64_t l2dfsbe:1;
-		uint64_t disstgl2i:1;
-		uint64_t reserved_28_63:36;
-#endif
-	} cn63xx;
-	struct cvmx_l2c_ctl_cn63xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_25_63:39;
-		uint64_t discclk:1;
-		uint64_t maxvab:4;
-		uint64_t maxlfb:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t ef_ena:1;
-		uint64_t ef_cnt:7;
-		uint64_t vab_thresh:4;
-		uint64_t disecc:1;
-		uint64_t disidxalias:1;
-#else
-		uint64_t disidxalias:1;
-		uint64_t disecc:1;
-		uint64_t vab_thresh:4;
-		uint64_t ef_cnt:7;
-		uint64_t ef_ena:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t maxlfb:4;
-		uint64_t maxvab:4;
-		uint64_t discclk:1;
-		uint64_t reserved_25_63:39;
-#endif
-	} cn63xxp1;
-	struct cvmx_l2c_ctl_cn61xx cn66xx;
-	struct cvmx_l2c_ctl_s cn68xx;
-	struct cvmx_l2c_ctl_cn63xx cn68xxp1;
-	struct cvmx_l2c_ctl_cn61xx cnf71xx;
 };
 
 union cvmx_l2c_dbg {
 	uint64_t u64;
 	struct cvmx_l2c_dbg_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_15_63:49;
-		uint64_t lfb_enum:4;
-		uint64_t lfb_dmp:1;
-		uint64_t ppnum:4;
-		uint64_t set:3;
-		uint64_t finv:1;
-		uint64_t l2d:1;
-		uint64_t l2t:1;
-#else
-		uint64_t l2t:1;
-		uint64_t l2d:1;
-		uint64_t finv:1;
-		uint64_t set:3;
-		uint64_t ppnum:4;
-		uint64_t lfb_dmp:1;
-		uint64_t lfb_enum:4;
-		uint64_t reserved_15_63:49;
-#endif
-	} s;
-	struct cvmx_l2c_dbg_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_13_63:51;
-		uint64_t lfb_enum:2;
-		uint64_t lfb_dmp:1;
-		uint64_t reserved_7_9:3;
-		uint64_t ppnum:1;
-		uint64_t reserved_5_5:1;
-		uint64_t set:2;
-		uint64_t finv:1;
-		uint64_t l2d:1;
-		uint64_t l2t:1;
-#else
-		uint64_t l2t:1;
-		uint64_t l2d:1;
-		uint64_t finv:1;
-		uint64_t set:2;
-		uint64_t reserved_5_5:1;
-		uint64_t ppnum:1;
-		uint64_t reserved_7_9:3;
-		uint64_t lfb_dmp:1;
-		uint64_t lfb_enum:2;
-		uint64_t reserved_13_63:51;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_dbg_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t lfb_enum:3;
-		uint64_t lfb_dmp:1;
-		uint64_t reserved_7_9:3;
-		uint64_t ppnum:1;
-		uint64_t reserved_5_5:1;
-		uint64_t set:2;
-		uint64_t finv:1;
-		uint64_t l2d:1;
-		uint64_t l2t:1;
-#else
-		uint64_t l2t:1;
-		uint64_t l2d:1;
-		uint64_t finv:1;
-		uint64_t set:2;
-		uint64_t reserved_5_5:1;
-		uint64_t ppnum:1;
-		uint64_t reserved_7_9:3;
-		uint64_t lfb_dmp:1;
-		uint64_t lfb_enum:3;
-		uint64_t reserved_14_63:50;
-#endif
-	} cn31xx;
-	struct cvmx_l2c_dbg_s cn38xx;
-	struct cvmx_l2c_dbg_s cn38xxp2;
-	struct cvmx_l2c_dbg_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t lfb_enum:3;
-		uint64_t lfb_dmp:1;
-		uint64_t reserved_7_9:3;
-		uint64_t ppnum:1;
-		uint64_t set:3;
-		uint64_t finv:1;
-		uint64_t l2d:1;
-		uint64_t l2t:1;
-#else
-		uint64_t l2t:1;
-		uint64_t l2d:1;
-		uint64_t finv:1;
-		uint64_t set:3;
-		uint64_t ppnum:1;
-		uint64_t reserved_7_9:3;
-		uint64_t lfb_dmp:1;
-		uint64_t lfb_enum:3;
-		uint64_t reserved_14_63:50;
-#endif
-	} cn50xx;
-	struct cvmx_l2c_dbg_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t lfb_enum:3;
-		uint64_t lfb_dmp:1;
-		uint64_t reserved_8_9:2;
-		uint64_t ppnum:2;
-		uint64_t set:3;
-		uint64_t finv:1;
-		uint64_t l2d:1;
-		uint64_t l2t:1;
-#else
-		uint64_t l2t:1;
-		uint64_t l2d:1;
-		uint64_t finv:1;
-		uint64_t set:3;
-		uint64_t ppnum:2;
-		uint64_t reserved_8_9:2;
-		uint64_t lfb_dmp:1;
-		uint64_t lfb_enum:3;
-		uint64_t reserved_14_63:50;
-#endif
-	} cn52xx;
-	struct cvmx_l2c_dbg_cn52xx cn52xxp1;
-	struct cvmx_l2c_dbg_s cn56xx;
-	struct cvmx_l2c_dbg_s cn56xxp1;
-	struct cvmx_l2c_dbg_s cn58xx;
-	struct cvmx_l2c_dbg_s cn58xxp1;
-};
-
-union cvmx_l2c_dut {
-	uint64_t u64;
-	struct cvmx_l2c_dut_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t dtena:1;
-		uint64_t reserved_30_30:1;
-		uint64_t dt_vld:1;
-		uint64_t dt_tag:29;
-#else
-		uint64_t dt_tag:29;
-		uint64_t dt_vld:1;
-		uint64_t reserved_30_30:1;
-		uint64_t dtena:1;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_l2c_dut_s cn30xx;
-	struct cvmx_l2c_dut_s cn31xx;
-	struct cvmx_l2c_dut_s cn38xx;
-	struct cvmx_l2c_dut_s cn38xxp2;
-	struct cvmx_l2c_dut_s cn50xx;
-	struct cvmx_l2c_dut_s cn52xx;
-	struct cvmx_l2c_dut_s cn52xxp1;
-	struct cvmx_l2c_dut_s cn56xx;
-	struct cvmx_l2c_dut_s cn56xxp1;
-	struct cvmx_l2c_dut_s cn58xx;
-	struct cvmx_l2c_dut_s cn58xxp1;
-};
-
-union cvmx_l2c_dut_mapx {
-	uint64_t u64;
-	struct cvmx_l2c_dut_mapx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_38_63:26;
-		uint64_t tag:28;
-		uint64_t reserved_1_9:9;
-		uint64_t valid:1;
-#else
-		uint64_t valid:1;
-		uint64_t reserved_1_9:9;
-		uint64_t tag:28;
-		uint64_t reserved_38_63:26;
-#endif
-	} s;
-	struct cvmx_l2c_dut_mapx_s cn61xx;
-	struct cvmx_l2c_dut_mapx_s cn63xx;
-	struct cvmx_l2c_dut_mapx_s cn63xxp1;
-	struct cvmx_l2c_dut_mapx_s cn66xx;
-	struct cvmx_l2c_dut_mapx_s cn68xx;
-	struct cvmx_l2c_dut_mapx_s cn68xxp1;
-	struct cvmx_l2c_dut_mapx_s cnf71xx;
-};
-
-union cvmx_l2c_err_tdtx {
-	uint64_t u64;
-	struct cvmx_l2c_err_tdtx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t dbe:1;
-		uint64_t sbe:1;
-		uint64_t vdbe:1;
-		uint64_t vsbe:1;
-		uint64_t syn:10;
-		uint64_t reserved_22_49:28;
-		uint64_t wayidx:18;
-		uint64_t reserved_2_3:2;
-		uint64_t type:2;
-#else
-		uint64_t type:2;
-		uint64_t reserved_2_3:2;
-		uint64_t wayidx:18;
-		uint64_t reserved_22_49:28;
-		uint64_t syn:10;
-		uint64_t vsbe:1;
-		uint64_t vdbe:1;
-		uint64_t sbe:1;
-		uint64_t dbe:1;
-#endif
-	} s;
-	struct cvmx_l2c_err_tdtx_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t dbe:1;
-		uint64_t sbe:1;
-		uint64_t vdbe:1;
-		uint64_t vsbe:1;
-		uint64_t syn:10;
-		uint64_t reserved_20_49:30;
-		uint64_t wayidx:16;
-		uint64_t reserved_2_3:2;
-		uint64_t type:2;
-#else
-		uint64_t type:2;
-		uint64_t reserved_2_3:2;
-		uint64_t wayidx:16;
-		uint64_t reserved_20_49:30;
-		uint64_t syn:10;
-		uint64_t vsbe:1;
-		uint64_t vdbe:1;
-		uint64_t sbe:1;
-		uint64_t dbe:1;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_err_tdtx_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t dbe:1;
-		uint64_t sbe:1;
-		uint64_t vdbe:1;
-		uint64_t vsbe:1;
-		uint64_t syn:10;
-		uint64_t reserved_21_49:29;
-		uint64_t wayidx:17;
-		uint64_t reserved_2_3:2;
-		uint64_t type:2;
-#else
-		uint64_t type:2;
-		uint64_t reserved_2_3:2;
-		uint64_t wayidx:17;
-		uint64_t reserved_21_49:29;
-		uint64_t syn:10;
-		uint64_t vsbe:1;
-		uint64_t vdbe:1;
-		uint64_t sbe:1;
-		uint64_t dbe:1;
-#endif
-	} cn63xx;
-	struct cvmx_l2c_err_tdtx_cn63xx cn63xxp1;
-	struct cvmx_l2c_err_tdtx_cn63xx cn66xx;
-	struct cvmx_l2c_err_tdtx_s cn68xx;
-	struct cvmx_l2c_err_tdtx_s cn68xxp1;
-	struct cvmx_l2c_err_tdtx_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_err_ttgx {
-	uint64_t u64;
-	struct cvmx_l2c_err_ttgx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t dbe:1;
-		uint64_t sbe:1;
-		uint64_t noway:1;
-		uint64_t reserved_56_60:5;
-		uint64_t syn:6;
-		uint64_t reserved_22_49:28;
-		uint64_t wayidx:15;
-		uint64_t reserved_2_6:5;
-		uint64_t type:2;
-#else
-		uint64_t type:2;
-		uint64_t reserved_2_6:5;
-		uint64_t wayidx:15;
-		uint64_t reserved_22_49:28;
-		uint64_t syn:6;
-		uint64_t reserved_56_60:5;
-		uint64_t noway:1;
-		uint64_t sbe:1;
-		uint64_t dbe:1;
-#endif
-	} s;
-	struct cvmx_l2c_err_ttgx_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t dbe:1;
-		uint64_t sbe:1;
-		uint64_t noway:1;
-		uint64_t reserved_56_60:5;
-		uint64_t syn:6;
-		uint64_t reserved_20_49:30;
-		uint64_t wayidx:13;
-		uint64_t reserved_2_6:5;
-		uint64_t type:2;
-#else
-		uint64_t type:2;
-		uint64_t reserved_2_6:5;
-		uint64_t wayidx:13;
-		uint64_t reserved_20_49:30;
-		uint64_t syn:6;
-		uint64_t reserved_56_60:5;
-		uint64_t noway:1;
-		uint64_t sbe:1;
-		uint64_t dbe:1;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_err_ttgx_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t dbe:1;
-		uint64_t sbe:1;
-		uint64_t noway:1;
-		uint64_t reserved_56_60:5;
-		uint64_t syn:6;
-		uint64_t reserved_21_49:29;
-		uint64_t wayidx:14;
-		uint64_t reserved_2_6:5;
-		uint64_t type:2;
-#else
-		uint64_t type:2;
-		uint64_t reserved_2_6:5;
-		uint64_t wayidx:14;
-		uint64_t reserved_21_49:29;
-		uint64_t syn:6;
-		uint64_t reserved_56_60:5;
-		uint64_t noway:1;
-		uint64_t sbe:1;
-		uint64_t dbe:1;
-#endif
-	} cn63xx;
-	struct cvmx_l2c_err_ttgx_cn63xx cn63xxp1;
-	struct cvmx_l2c_err_ttgx_cn63xx cn66xx;
-	struct cvmx_l2c_err_ttgx_s cn68xx;
-	struct cvmx_l2c_err_ttgx_s cn68xxp1;
-	struct cvmx_l2c_err_ttgx_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_err_vbfx {
-	uint64_t u64;
-	struct cvmx_l2c_err_vbfx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_62_63:2;
-		uint64_t vdbe:1;
-		uint64_t vsbe:1;
-		uint64_t vsyn:10;
-		uint64_t reserved_2_49:48;
-		uint64_t type:2;
-#else
-		uint64_t type:2;
-		uint64_t reserved_2_49:48;
-		uint64_t vsyn:10;
-		uint64_t vsbe:1;
-		uint64_t vdbe:1;
-		uint64_t reserved_62_63:2;
-#endif
-	} s;
-	struct cvmx_l2c_err_vbfx_s cn61xx;
-	struct cvmx_l2c_err_vbfx_s cn63xx;
-	struct cvmx_l2c_err_vbfx_s cn63xxp1;
-	struct cvmx_l2c_err_vbfx_s cn66xx;
-	struct cvmx_l2c_err_vbfx_s cn68xx;
-	struct cvmx_l2c_err_vbfx_s cn68xxp1;
-	struct cvmx_l2c_err_vbfx_s cnf71xx;
-};
-
-union cvmx_l2c_err_xmc {
-	uint64_t u64;
-	struct cvmx_l2c_err_xmc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t cmd:6;
-		uint64_t reserved_54_57:4;
-		uint64_t sid:6;
-		uint64_t reserved_38_47:10;
-		uint64_t addr:38;
-#else
-		uint64_t addr:38;
-		uint64_t reserved_38_47:10;
-		uint64_t sid:6;
-		uint64_t reserved_54_57:4;
-		uint64_t cmd:6;
-#endif
-	} s;
-	struct cvmx_l2c_err_xmc_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t cmd:6;
-		uint64_t reserved_52_57:6;
-		uint64_t sid:4;
-		uint64_t reserved_38_47:10;
-		uint64_t addr:38;
-#else
-		uint64_t addr:38;
-		uint64_t reserved_38_47:10;
-		uint64_t sid:4;
-		uint64_t reserved_52_57:6;
-		uint64_t cmd:6;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_err_xmc_cn61xx cn63xx;
-	struct cvmx_l2c_err_xmc_cn61xx cn63xxp1;
-	struct cvmx_l2c_err_xmc_cn66xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t cmd:6;
-		uint64_t reserved_53_57:5;
-		uint64_t sid:5;
-		uint64_t reserved_38_47:10;
-		uint64_t addr:38;
-#else
-		uint64_t addr:38;
-		uint64_t reserved_38_47:10;
-		uint64_t sid:5;
-		uint64_t reserved_53_57:5;
-		uint64_t cmd:6;
-#endif
-	} cn66xx;
-	struct cvmx_l2c_err_xmc_s cn68xx;
-	struct cvmx_l2c_err_xmc_s cn68xxp1;
-	struct cvmx_l2c_err_xmc_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_grpwrr0 {
-	uint64_t u64;
-	struct cvmx_l2c_grpwrr0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t plc1rmsk:32;
-		uint64_t plc0rmsk:32;
-#else
-		uint64_t plc0rmsk:32;
-		uint64_t plc1rmsk:32;
-#endif
-	} s;
-	struct cvmx_l2c_grpwrr0_s cn52xx;
-	struct cvmx_l2c_grpwrr0_s cn52xxp1;
-	struct cvmx_l2c_grpwrr0_s cn56xx;
-	struct cvmx_l2c_grpwrr0_s cn56xxp1;
-};
-
-union cvmx_l2c_grpwrr1 {
-	uint64_t u64;
-	struct cvmx_l2c_grpwrr1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t ilcrmsk:32;
-		uint64_t plc2rmsk:32;
-#else
-		uint64_t plc2rmsk:32;
-		uint64_t ilcrmsk:32;
-#endif
-	} s;
-	struct cvmx_l2c_grpwrr1_s cn52xx;
-	struct cvmx_l2c_grpwrr1_s cn52xxp1;
-	struct cvmx_l2c_grpwrr1_s cn56xx;
-	struct cvmx_l2c_grpwrr1_s cn56xxp1;
-};
-
-union cvmx_l2c_int_en {
-	uint64_t u64;
-	struct cvmx_l2c_int_en_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_9_63:55;
-		uint64_t lck2ena:1;
-		uint64_t lckena:1;
-		uint64_t l2ddeden:1;
-		uint64_t l2dsecen:1;
-		uint64_t l2tdeden:1;
-		uint64_t l2tsecen:1;
-		uint64_t oob3en:1;
-		uint64_t oob2en:1;
-		uint64_t oob1en:1;
-#else
-		uint64_t oob1en:1;
-		uint64_t oob2en:1;
-		uint64_t oob3en:1;
-		uint64_t l2tsecen:1;
-		uint64_t l2tdeden:1;
-		uint64_t l2dsecen:1;
-		uint64_t l2ddeden:1;
-		uint64_t lckena:1;
-		uint64_t lck2ena:1;
-		uint64_t reserved_9_63:55;
-#endif
-	} s;
-	struct cvmx_l2c_int_en_s cn52xx;
-	struct cvmx_l2c_int_en_s cn52xxp1;
-	struct cvmx_l2c_int_en_s cn56xx;
-	struct cvmx_l2c_int_en_s cn56xxp1;
-};
-
-union cvmx_l2c_int_ena {
-	uint64_t u64;
-	struct cvmx_l2c_int_ena_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_8_63:56;
-		uint64_t bigrd:1;
-		uint64_t bigwr:1;
-		uint64_t vrtpe:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtwr:1;
-		uint64_t holewr:1;
-		uint64_t holerd:1;
-#else
-		uint64_t holerd:1;
-		uint64_t holewr:1;
-		uint64_t vrtwr:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtpe:1;
-		uint64_t bigwr:1;
-		uint64_t bigrd:1;
-		uint64_t reserved_8_63:56;
-#endif
-	} s;
-	struct cvmx_l2c_int_ena_s cn61xx;
-	struct cvmx_l2c_int_ena_s cn63xx;
-	struct cvmx_l2c_int_ena_cn63xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_6_63:58;
-		uint64_t vrtpe:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtwr:1;
-		uint64_t holewr:1;
-		uint64_t holerd:1;
-#else
-		uint64_t holerd:1;
-		uint64_t holewr:1;
-		uint64_t vrtwr:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtpe:1;
-		uint64_t reserved_6_63:58;
-#endif
-	} cn63xxp1;
-	struct cvmx_l2c_int_ena_s cn66xx;
-	struct cvmx_l2c_int_ena_s cn68xx;
-	struct cvmx_l2c_int_ena_s cn68xxp1;
-	struct cvmx_l2c_int_ena_s cnf71xx;
-};
-
-union cvmx_l2c_int_reg {
-	uint64_t u64;
-	struct cvmx_l2c_int_reg_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_20_63:44;
-		uint64_t tad3:1;
-		uint64_t tad2:1;
-		uint64_t tad1:1;
-		uint64_t tad0:1;
-		uint64_t reserved_8_15:8;
-		uint64_t bigrd:1;
-		uint64_t bigwr:1;
-		uint64_t vrtpe:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtwr:1;
-		uint64_t holewr:1;
-		uint64_t holerd:1;
-#else
-		uint64_t holerd:1;
-		uint64_t holewr:1;
-		uint64_t vrtwr:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtpe:1;
-		uint64_t bigwr:1;
-		uint64_t bigrd:1;
-		uint64_t reserved_8_15:8;
-		uint64_t tad0:1;
-		uint64_t tad1:1;
-		uint64_t tad2:1;
-		uint64_t tad3:1;
-		uint64_t reserved_20_63:44;
-#endif
-	} s;
-	struct cvmx_l2c_int_reg_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_17_63:47;
-		uint64_t tad0:1;
-		uint64_t reserved_8_15:8;
-		uint64_t bigrd:1;
-		uint64_t bigwr:1;
-		uint64_t vrtpe:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtwr:1;
-		uint64_t holewr:1;
-		uint64_t holerd:1;
-#else
-		uint64_t holerd:1;
-		uint64_t holewr:1;
-		uint64_t vrtwr:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtpe:1;
-		uint64_t bigwr:1;
-		uint64_t bigrd:1;
-		uint64_t reserved_8_15:8;
-		uint64_t tad0:1;
-		uint64_t reserved_17_63:47;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_int_reg_cn61xx cn63xx;
-	struct cvmx_l2c_int_reg_cn63xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_17_63:47;
-		uint64_t tad0:1;
-		uint64_t reserved_6_15:10;
-		uint64_t vrtpe:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtwr:1;
-		uint64_t holewr:1;
-		uint64_t holerd:1;
-#else
-		uint64_t holerd:1;
-		uint64_t holewr:1;
-		uint64_t vrtwr:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtpe:1;
-		uint64_t reserved_6_15:10;
-		uint64_t tad0:1;
-		uint64_t reserved_17_63:47;
-#endif
-	} cn63xxp1;
-	struct cvmx_l2c_int_reg_cn61xx cn66xx;
-	struct cvmx_l2c_int_reg_s cn68xx;
-	struct cvmx_l2c_int_reg_s cn68xxp1;
-	struct cvmx_l2c_int_reg_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_int_stat {
-	uint64_t u64;
-	struct cvmx_l2c_int_stat_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_9_63:55;
-		uint64_t lck2:1;
-		uint64_t lck:1;
-		uint64_t l2dded:1;
-		uint64_t l2dsec:1;
-		uint64_t l2tded:1;
-		uint64_t l2tsec:1;
-		uint64_t oob3:1;
-		uint64_t oob2:1;
-		uint64_t oob1:1;
-#else
-		uint64_t oob1:1;
-		uint64_t oob2:1;
-		uint64_t oob3:1;
-		uint64_t l2tsec:1;
-		uint64_t l2tded:1;
-		uint64_t l2dsec:1;
-		uint64_t l2dded:1;
-		uint64_t lck:1;
-		uint64_t lck2:1;
-		uint64_t reserved_9_63:55;
-#endif
-	} s;
-	struct cvmx_l2c_int_stat_s cn52xx;
-	struct cvmx_l2c_int_stat_s cn52xxp1;
-	struct cvmx_l2c_int_stat_s cn56xx;
-	struct cvmx_l2c_int_stat_s cn56xxp1;
-};
-
-union cvmx_l2c_iocx_pfc {
-	uint64_t u64;
-	struct cvmx_l2c_iocx_pfc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_iocx_pfc_s cn61xx;
-	struct cvmx_l2c_iocx_pfc_s cn63xx;
-	struct cvmx_l2c_iocx_pfc_s cn63xxp1;
-	struct cvmx_l2c_iocx_pfc_s cn66xx;
-	struct cvmx_l2c_iocx_pfc_s cn68xx;
-	struct cvmx_l2c_iocx_pfc_s cn68xxp1;
-	struct cvmx_l2c_iocx_pfc_s cnf71xx;
-};
-
-union cvmx_l2c_iorx_pfc {
-	uint64_t u64;
-	struct cvmx_l2c_iorx_pfc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_iorx_pfc_s cn61xx;
-	struct cvmx_l2c_iorx_pfc_s cn63xx;
-	struct cvmx_l2c_iorx_pfc_s cn63xxp1;
-	struct cvmx_l2c_iorx_pfc_s cn66xx;
-	struct cvmx_l2c_iorx_pfc_s cn68xx;
-	struct cvmx_l2c_iorx_pfc_s cn68xxp1;
-	struct cvmx_l2c_iorx_pfc_s cnf71xx;
-};
-
-union cvmx_l2c_lckbase {
-	uint64_t u64;
-	struct cvmx_l2c_lckbase_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_31_63:33;
-		uint64_t lck_base:27;
-		uint64_t reserved_1_3:3;
-		uint64_t lck_ena:1;
-#else
-		uint64_t lck_ena:1;
-		uint64_t reserved_1_3:3;
-		uint64_t lck_base:27;
-		uint64_t reserved_31_63:33;
-#endif
-	} s;
-	struct cvmx_l2c_lckbase_s cn30xx;
-	struct cvmx_l2c_lckbase_s cn31xx;
-	struct cvmx_l2c_lckbase_s cn38xx;
-	struct cvmx_l2c_lckbase_s cn38xxp2;
-	struct cvmx_l2c_lckbase_s cn50xx;
-	struct cvmx_l2c_lckbase_s cn52xx;
-	struct cvmx_l2c_lckbase_s cn52xxp1;
-	struct cvmx_l2c_lckbase_s cn56xx;
-	struct cvmx_l2c_lckbase_s cn56xxp1;
-	struct cvmx_l2c_lckbase_s cn58xx;
-	struct cvmx_l2c_lckbase_s cn58xxp1;
-};
-
-union cvmx_l2c_lckoff {
-	uint64_t u64;
-	struct cvmx_l2c_lckoff_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_10_63:54;
-		uint64_t lck_offset:10;
-#else
-		uint64_t lck_offset:10;
-		uint64_t reserved_10_63:54;
-#endif
-	} s;
-	struct cvmx_l2c_lckoff_s cn30xx;
-	struct cvmx_l2c_lckoff_s cn31xx;
-	struct cvmx_l2c_lckoff_s cn38xx;
-	struct cvmx_l2c_lckoff_s cn38xxp2;
-	struct cvmx_l2c_lckoff_s cn50xx;
-	struct cvmx_l2c_lckoff_s cn52xx;
-	struct cvmx_l2c_lckoff_s cn52xxp1;
-	struct cvmx_l2c_lckoff_s cn56xx;
-	struct cvmx_l2c_lckoff_s cn56xxp1;
-	struct cvmx_l2c_lckoff_s cn58xx;
-	struct cvmx_l2c_lckoff_s cn58xxp1;
-};
-
-union cvmx_l2c_lfb0 {
-	uint64_t u64;
-	struct cvmx_l2c_lfb0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t stcpnd:1;
-		uint64_t stpnd:1;
-		uint64_t stinv:1;
-		uint64_t stcfl:1;
-		uint64_t vam:1;
-		uint64_t inxt:4;
-		uint64_t itl:1;
-		uint64_t ihd:1;
-		uint64_t set:3;
-		uint64_t vabnum:4;
-		uint64_t sid:9;
-		uint64_t cmd:4;
-		uint64_t vld:1;
-#else
-		uint64_t vld:1;
-		uint64_t cmd:4;
-		uint64_t sid:9;
-		uint64_t vabnum:4;
-		uint64_t set:3;
-		uint64_t ihd:1;
-		uint64_t itl:1;
-		uint64_t inxt:4;
-		uint64_t vam:1;
-		uint64_t stcfl:1;
-		uint64_t stinv:1;
-		uint64_t stpnd:1;
-		uint64_t stcpnd:1;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_l2c_lfb0_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t stcpnd:1;
-		uint64_t stpnd:1;
-		uint64_t stinv:1;
-		uint64_t stcfl:1;
-		uint64_t vam:1;
-		uint64_t reserved_25_26:2;
-		uint64_t inxt:2;
-		uint64_t itl:1;
-		uint64_t ihd:1;
-		uint64_t reserved_20_20:1;
-		uint64_t set:2;
-		uint64_t reserved_16_17:2;
-		uint64_t vabnum:2;
-		uint64_t sid:9;
-		uint64_t cmd:4;
-		uint64_t vld:1;
-#else
-		uint64_t vld:1;
-		uint64_t cmd:4;
-		uint64_t sid:9;
-		uint64_t vabnum:2;
-		uint64_t reserved_16_17:2;
-		uint64_t set:2;
-		uint64_t reserved_20_20:1;
-		uint64_t ihd:1;
-		uint64_t itl:1;
-		uint64_t inxt:2;
-		uint64_t reserved_25_26:2;
-		uint64_t vam:1;
-		uint64_t stcfl:1;
-		uint64_t stinv:1;
-		uint64_t stpnd:1;
-		uint64_t stcpnd:1;
-		uint64_t reserved_32_63:32;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_lfb0_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t stcpnd:1;
-		uint64_t stpnd:1;
-		uint64_t stinv:1;
-		uint64_t stcfl:1;
-		uint64_t vam:1;
-		uint64_t reserved_26_26:1;
-		uint64_t inxt:3;
-		uint64_t itl:1;
-		uint64_t ihd:1;
-		uint64_t reserved_20_20:1;
-		uint64_t set:2;
-		uint64_t reserved_17_17:1;
-		uint64_t vabnum:3;
-		uint64_t sid:9;
-		uint64_t cmd:4;
-		uint64_t vld:1;
-#else
-		uint64_t vld:1;
-		uint64_t cmd:4;
-		uint64_t sid:9;
-		uint64_t vabnum:3;
-		uint64_t reserved_17_17:1;
-		uint64_t set:2;
-		uint64_t reserved_20_20:1;
-		uint64_t ihd:1;
-		uint64_t itl:1;
-		uint64_t inxt:3;
-		uint64_t reserved_26_26:1;
-		uint64_t vam:1;
-		uint64_t stcfl:1;
-		uint64_t stinv:1;
-		uint64_t stpnd:1;
-		uint64_t stcpnd:1;
-		uint64_t reserved_32_63:32;
-#endif
-	} cn31xx;
-	struct cvmx_l2c_lfb0_s cn38xx;
-	struct cvmx_l2c_lfb0_s cn38xxp2;
-	struct cvmx_l2c_lfb0_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t stcpnd:1;
-		uint64_t stpnd:1;
-		uint64_t stinv:1;
-		uint64_t stcfl:1;
-		uint64_t vam:1;
-		uint64_t reserved_26_26:1;
-		uint64_t inxt:3;
-		uint64_t itl:1;
-		uint64_t ihd:1;
-		uint64_t set:3;
-		uint64_t reserved_17_17:1;
-		uint64_t vabnum:3;
-		uint64_t sid:9;
-		uint64_t cmd:4;
-		uint64_t vld:1;
-#else
-		uint64_t vld:1;
-		uint64_t cmd:4;
-		uint64_t sid:9;
-		uint64_t vabnum:3;
-		uint64_t reserved_17_17:1;
-		uint64_t set:3;
-		uint64_t ihd:1;
-		uint64_t itl:1;
-		uint64_t inxt:3;
-		uint64_t reserved_26_26:1;
-		uint64_t vam:1;
-		uint64_t stcfl:1;
-		uint64_t stinv:1;
-		uint64_t stpnd:1;
-		uint64_t stcpnd:1;
-		uint64_t reserved_32_63:32;
-#endif
-	} cn50xx;
-	struct cvmx_l2c_lfb0_cn50xx cn52xx;
-	struct cvmx_l2c_lfb0_cn50xx cn52xxp1;
-	struct cvmx_l2c_lfb0_s cn56xx;
-	struct cvmx_l2c_lfb0_s cn56xxp1;
-	struct cvmx_l2c_lfb0_s cn58xx;
-	struct cvmx_l2c_lfb0_s cn58xxp1;
-};
-
-union cvmx_l2c_lfb1 {
-	uint64_t u64;
-	struct cvmx_l2c_lfb1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_19_63:45;
-		uint64_t dsgoing:1;
-		uint64_t bid:2;
-		uint64_t wtrsp:1;
-		uint64_t wtdw:1;
-		uint64_t wtdq:1;
-		uint64_t wtwhp:1;
-		uint64_t wtwhf:1;
-		uint64_t wtwrm:1;
-		uint64_t wtstm:1;
-		uint64_t wtrda:1;
-		uint64_t wtstdt:1;
-		uint64_t wtstrsp:1;
-		uint64_t wtstrsc:1;
-		uint64_t wtvtm:1;
-		uint64_t wtmfl:1;
-		uint64_t prbrty:1;
-		uint64_t wtprb:1;
-		uint64_t vld:1;
-#else
-		uint64_t vld:1;
-		uint64_t wtprb:1;
-		uint64_t prbrty:1;
-		uint64_t wtmfl:1;
-		uint64_t wtvtm:1;
-		uint64_t wtstrsc:1;
-		uint64_t wtstrsp:1;
-		uint64_t wtstdt:1;
-		uint64_t wtrda:1;
-		uint64_t wtstm:1;
-		uint64_t wtwrm:1;
-		uint64_t wtwhf:1;
-		uint64_t wtwhp:1;
-		uint64_t wtdq:1;
-		uint64_t wtdw:1;
-		uint64_t wtrsp:1;
-		uint64_t bid:2;
-		uint64_t dsgoing:1;
-		uint64_t reserved_19_63:45;
-#endif
-	} s;
-	struct cvmx_l2c_lfb1_s cn30xx;
-	struct cvmx_l2c_lfb1_s cn31xx;
-	struct cvmx_l2c_lfb1_s cn38xx;
-	struct cvmx_l2c_lfb1_s cn38xxp2;
-	struct cvmx_l2c_lfb1_s cn50xx;
-	struct cvmx_l2c_lfb1_s cn52xx;
-	struct cvmx_l2c_lfb1_s cn52xxp1;
-	struct cvmx_l2c_lfb1_s cn56xx;
-	struct cvmx_l2c_lfb1_s cn56xxp1;
-	struct cvmx_l2c_lfb1_s cn58xx;
-	struct cvmx_l2c_lfb1_s cn58xxp1;
-};
-
-union cvmx_l2c_lfb2 {
-	uint64_t u64;
-	struct cvmx_l2c_lfb2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_0_63:64;
-#else
-		uint64_t reserved_0_63:64;
-#endif
-	} s;
-	struct cvmx_l2c_lfb2_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_27_63:37;
-		uint64_t lfb_tag:19;
-		uint64_t lfb_idx:8;
-#else
-		uint64_t lfb_idx:8;
-		uint64_t lfb_tag:19;
-		uint64_t reserved_27_63:37;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_lfb2_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_27_63:37;
-		uint64_t lfb_tag:17;
-		uint64_t lfb_idx:10;
-#else
-		uint64_t lfb_idx:10;
-		uint64_t lfb_tag:17;
-		uint64_t reserved_27_63:37;
-#endif
-	} cn31xx;
-	struct cvmx_l2c_lfb2_cn31xx cn38xx;
-	struct cvmx_l2c_lfb2_cn31xx cn38xxp2;
-	struct cvmx_l2c_lfb2_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_27_63:37;
-		uint64_t lfb_tag:20;
-		uint64_t lfb_idx:7;
-#else
-		uint64_t lfb_idx:7;
-		uint64_t lfb_tag:20;
-		uint64_t reserved_27_63:37;
-#endif
-	} cn50xx;
-	struct cvmx_l2c_lfb2_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_27_63:37;
-		uint64_t lfb_tag:18;
-		uint64_t lfb_idx:9;
-#else
-		uint64_t lfb_idx:9;
-		uint64_t lfb_tag:18;
-		uint64_t reserved_27_63:37;
-#endif
-	} cn52xx;
-	struct cvmx_l2c_lfb2_cn52xx cn52xxp1;
-	struct cvmx_l2c_lfb2_cn56xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_27_63:37;
-		uint64_t lfb_tag:16;
-		uint64_t lfb_idx:11;
-#else
-		uint64_t lfb_idx:11;
-		uint64_t lfb_tag:16;
-		uint64_t reserved_27_63:37;
-#endif
-	} cn56xx;
-	struct cvmx_l2c_lfb2_cn56xx cn56xxp1;
-	struct cvmx_l2c_lfb2_cn56xx cn58xx;
-	struct cvmx_l2c_lfb2_cn56xx cn58xxp1;
-};
-
-union cvmx_l2c_lfb3 {
-	uint64_t u64;
-	struct cvmx_l2c_lfb3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_5_63:59;
-		uint64_t stpartdis:1;
-		uint64_t lfb_hwm:4;
-#else
-		uint64_t lfb_hwm:4;
-		uint64_t stpartdis:1;
-		uint64_t reserved_5_63:59;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_15_63:49,
+		__BITFIELD_FIELD(uint64_t lfb_enum:4,
+		__BITFIELD_FIELD(uint64_t lfb_dmp:1,
+		__BITFIELD_FIELD(uint64_t ppnum:4,
+		__BITFIELD_FIELD(uint64_t set:3,
+		__BITFIELD_FIELD(uint64_t finv:1,
+		__BITFIELD_FIELD(uint64_t l2d:1,
+		__BITFIELD_FIELD(uint64_t l2t:1,
+		;))))))))
 	} s;
-	struct cvmx_l2c_lfb3_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_5_63:59;
-		uint64_t stpartdis:1;
-		uint64_t reserved_2_3:2;
-		uint64_t lfb_hwm:2;
-#else
-		uint64_t lfb_hwm:2;
-		uint64_t reserved_2_3:2;
-		uint64_t stpartdis:1;
-		uint64_t reserved_5_63:59;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_lfb3_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_5_63:59;
-		uint64_t stpartdis:1;
-		uint64_t reserved_3_3:1;
-		uint64_t lfb_hwm:3;
-#else
-		uint64_t lfb_hwm:3;
-		uint64_t reserved_3_3:1;
-		uint64_t stpartdis:1;
-		uint64_t reserved_5_63:59;
-#endif
-	} cn31xx;
-	struct cvmx_l2c_lfb3_s cn38xx;
-	struct cvmx_l2c_lfb3_s cn38xxp2;
-	struct cvmx_l2c_lfb3_cn31xx cn50xx;
-	struct cvmx_l2c_lfb3_cn31xx cn52xx;
-	struct cvmx_l2c_lfb3_cn31xx cn52xxp1;
-	struct cvmx_l2c_lfb3_s cn56xx;
-	struct cvmx_l2c_lfb3_s cn56xxp1;
-	struct cvmx_l2c_lfb3_s cn58xx;
-	struct cvmx_l2c_lfb3_s cn58xxp1;
-};
-
-union cvmx_l2c_oob {
-	uint64_t u64;
-	struct cvmx_l2c_oob_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_2_63:62;
-		uint64_t dwbena:1;
-		uint64_t stena:1;
-#else
-		uint64_t stena:1;
-		uint64_t dwbena:1;
-		uint64_t reserved_2_63:62;
-#endif
-	} s;
-	struct cvmx_l2c_oob_s cn52xx;
-	struct cvmx_l2c_oob_s cn52xxp1;
-	struct cvmx_l2c_oob_s cn56xx;
-	struct cvmx_l2c_oob_s cn56xxp1;
-};
-
-union cvmx_l2c_oob1 {
-	uint64_t u64;
-	struct cvmx_l2c_oob1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t fadr:27;
-		uint64_t fsrc:1;
-		uint64_t reserved_34_35:2;
-		uint64_t sadr:14;
-		uint64_t reserved_14_19:6;
-		uint64_t size:14;
-#else
-		uint64_t size:14;
-		uint64_t reserved_14_19:6;
-		uint64_t sadr:14;
-		uint64_t reserved_34_35:2;
-		uint64_t fsrc:1;
-		uint64_t fadr:27;
-#endif
-	} s;
-	struct cvmx_l2c_oob1_s cn52xx;
-	struct cvmx_l2c_oob1_s cn52xxp1;
-	struct cvmx_l2c_oob1_s cn56xx;
-	struct cvmx_l2c_oob1_s cn56xxp1;
-};
-
-union cvmx_l2c_oob2 {
-	uint64_t u64;
-	struct cvmx_l2c_oob2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t fadr:27;
-		uint64_t fsrc:1;
-		uint64_t reserved_34_35:2;
-		uint64_t sadr:14;
-		uint64_t reserved_14_19:6;
-		uint64_t size:14;
-#else
-		uint64_t size:14;
-		uint64_t reserved_14_19:6;
-		uint64_t sadr:14;
-		uint64_t reserved_34_35:2;
-		uint64_t fsrc:1;
-		uint64_t fadr:27;
-#endif
-	} s;
-	struct cvmx_l2c_oob2_s cn52xx;
-	struct cvmx_l2c_oob2_s cn52xxp1;
-	struct cvmx_l2c_oob2_s cn56xx;
-	struct cvmx_l2c_oob2_s cn56xxp1;
-};
-
-union cvmx_l2c_oob3 {
-	uint64_t u64;
-	struct cvmx_l2c_oob3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t fadr:27;
-		uint64_t fsrc:1;
-		uint64_t reserved_34_35:2;
-		uint64_t sadr:14;
-		uint64_t reserved_14_19:6;
-		uint64_t size:14;
-#else
-		uint64_t size:14;
-		uint64_t reserved_14_19:6;
-		uint64_t sadr:14;
-		uint64_t reserved_34_35:2;
-		uint64_t fsrc:1;
-		uint64_t fadr:27;
-#endif
-	} s;
-	struct cvmx_l2c_oob3_s cn52xx;
-	struct cvmx_l2c_oob3_s cn52xxp1;
-	struct cvmx_l2c_oob3_s cn56xx;
-	struct cvmx_l2c_oob3_s cn56xxp1;
-};
-
-union cvmx_l2c_pfcx {
-	uint64_t u64;
-	struct cvmx_l2c_pfcx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_36_63:28;
-		uint64_t pfcnt0:36;
-#else
-		uint64_t pfcnt0:36;
-		uint64_t reserved_36_63:28;
-#endif
-	} s;
-	struct cvmx_l2c_pfcx_s cn30xx;
-	struct cvmx_l2c_pfcx_s cn31xx;
-	struct cvmx_l2c_pfcx_s cn38xx;
-	struct cvmx_l2c_pfcx_s cn38xxp2;
-	struct cvmx_l2c_pfcx_s cn50xx;
-	struct cvmx_l2c_pfcx_s cn52xx;
-	struct cvmx_l2c_pfcx_s cn52xxp1;
-	struct cvmx_l2c_pfcx_s cn56xx;
-	struct cvmx_l2c_pfcx_s cn56xxp1;
-	struct cvmx_l2c_pfcx_s cn58xx;
-	struct cvmx_l2c_pfcx_s cn58xxp1;
 };
 
 union cvmx_l2c_pfctl {
 	uint64_t u64;
 	struct cvmx_l2c_pfctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_36_63:28;
-		uint64_t cnt3rdclr:1;
-		uint64_t cnt2rdclr:1;
-		uint64_t cnt1rdclr:1;
-		uint64_t cnt0rdclr:1;
-		uint64_t cnt3ena:1;
-		uint64_t cnt3clr:1;
-		uint64_t cnt3sel:6;
-		uint64_t cnt2ena:1;
-		uint64_t cnt2clr:1;
-		uint64_t cnt2sel:6;
-		uint64_t cnt1ena:1;
-		uint64_t cnt1clr:1;
-		uint64_t cnt1sel:6;
-		uint64_t cnt0ena:1;
-		uint64_t cnt0clr:1;
-		uint64_t cnt0sel:6;
-#else
-		uint64_t cnt0sel:6;
-		uint64_t cnt0clr:1;
-		uint64_t cnt0ena:1;
-		uint64_t cnt1sel:6;
-		uint64_t cnt1clr:1;
-		uint64_t cnt1ena:1;
-		uint64_t cnt2sel:6;
-		uint64_t cnt2clr:1;
-		uint64_t cnt2ena:1;
-		uint64_t cnt3sel:6;
-		uint64_t cnt3clr:1;
-		uint64_t cnt3ena:1;
-		uint64_t cnt0rdclr:1;
-		uint64_t cnt1rdclr:1;
-		uint64_t cnt2rdclr:1;
-		uint64_t cnt3rdclr:1;
-		uint64_t reserved_36_63:28;
-#endif
-	} s;
-	struct cvmx_l2c_pfctl_s cn30xx;
-	struct cvmx_l2c_pfctl_s cn31xx;
-	struct cvmx_l2c_pfctl_s cn38xx;
-	struct cvmx_l2c_pfctl_s cn38xxp2;
-	struct cvmx_l2c_pfctl_s cn50xx;
-	struct cvmx_l2c_pfctl_s cn52xx;
-	struct cvmx_l2c_pfctl_s cn52xxp1;
-	struct cvmx_l2c_pfctl_s cn56xx;
-	struct cvmx_l2c_pfctl_s cn56xxp1;
-	struct cvmx_l2c_pfctl_s cn58xx;
-	struct cvmx_l2c_pfctl_s cn58xxp1;
-};
-
-union cvmx_l2c_ppgrp {
-	uint64_t u64;
-	struct cvmx_l2c_ppgrp_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_24_63:40;
-		uint64_t pp11grp:2;
-		uint64_t pp10grp:2;
-		uint64_t pp9grp:2;
-		uint64_t pp8grp:2;
-		uint64_t pp7grp:2;
-		uint64_t pp6grp:2;
-		uint64_t pp5grp:2;
-		uint64_t pp4grp:2;
-		uint64_t pp3grp:2;
-		uint64_t pp2grp:2;
-		uint64_t pp1grp:2;
-		uint64_t pp0grp:2;
-#else
-		uint64_t pp0grp:2;
-		uint64_t pp1grp:2;
-		uint64_t pp2grp:2;
-		uint64_t pp3grp:2;
-		uint64_t pp4grp:2;
-		uint64_t pp5grp:2;
-		uint64_t pp6grp:2;
-		uint64_t pp7grp:2;
-		uint64_t pp8grp:2;
-		uint64_t pp9grp:2;
-		uint64_t pp10grp:2;
-		uint64_t pp11grp:2;
-		uint64_t reserved_24_63:40;
-#endif
-	} s;
-	struct cvmx_l2c_ppgrp_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_8_63:56;
-		uint64_t pp3grp:2;
-		uint64_t pp2grp:2;
-		uint64_t pp1grp:2;
-		uint64_t pp0grp:2;
-#else
-		uint64_t pp0grp:2;
-		uint64_t pp1grp:2;
-		uint64_t pp2grp:2;
-		uint64_t pp3grp:2;
-		uint64_t reserved_8_63:56;
-#endif
-	} cn52xx;
-	struct cvmx_l2c_ppgrp_cn52xx cn52xxp1;
-	struct cvmx_l2c_ppgrp_s cn56xx;
-	struct cvmx_l2c_ppgrp_s cn56xxp1;
-};
-
-union cvmx_l2c_qos_iobx {
-	uint64_t u64;
-	struct cvmx_l2c_qos_iobx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_7_63:57;
-		uint64_t dwblvl:3;
-		uint64_t reserved_3_3:1;
-		uint64_t lvl:3;
-#else
-		uint64_t lvl:3;
-		uint64_t reserved_3_3:1;
-		uint64_t dwblvl:3;
-		uint64_t reserved_7_63:57;
-#endif
-	} s;
-	struct cvmx_l2c_qos_iobx_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_6_63:58;
-		uint64_t dwblvl:2;
-		uint64_t reserved_2_3:2;
-		uint64_t lvl:2;
-#else
-		uint64_t lvl:2;
-		uint64_t reserved_2_3:2;
-		uint64_t dwblvl:2;
-		uint64_t reserved_6_63:58;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_qos_iobx_cn61xx cn63xx;
-	struct cvmx_l2c_qos_iobx_cn61xx cn63xxp1;
-	struct cvmx_l2c_qos_iobx_cn61xx cn66xx;
-	struct cvmx_l2c_qos_iobx_s cn68xx;
-	struct cvmx_l2c_qos_iobx_s cn68xxp1;
-	struct cvmx_l2c_qos_iobx_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_qos_ppx {
-	uint64_t u64;
-	struct cvmx_l2c_qos_ppx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_3_63:61;
-		uint64_t lvl:3;
-#else
-		uint64_t lvl:3;
-		uint64_t reserved_3_63:61;
-#endif
-	} s;
-	struct cvmx_l2c_qos_ppx_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_2_63:62;
-		uint64_t lvl:2;
-#else
-		uint64_t lvl:2;
-		uint64_t reserved_2_63:62;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_qos_ppx_cn61xx cn63xx;
-	struct cvmx_l2c_qos_ppx_cn61xx cn63xxp1;
-	struct cvmx_l2c_qos_ppx_cn61xx cn66xx;
-	struct cvmx_l2c_qos_ppx_s cn68xx;
-	struct cvmx_l2c_qos_ppx_s cn68xxp1;
-	struct cvmx_l2c_qos_ppx_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_qos_wgt {
-	uint64_t u64;
-	struct cvmx_l2c_qos_wgt_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t wgt7:8;
-		uint64_t wgt6:8;
-		uint64_t wgt5:8;
-		uint64_t wgt4:8;
-		uint64_t wgt3:8;
-		uint64_t wgt2:8;
-		uint64_t wgt1:8;
-		uint64_t wgt0:8;
-#else
-		uint64_t wgt0:8;
-		uint64_t wgt1:8;
-		uint64_t wgt2:8;
-		uint64_t wgt3:8;
-		uint64_t wgt4:8;
-		uint64_t wgt5:8;
-		uint64_t wgt6:8;
-		uint64_t wgt7:8;
-#endif
-	} s;
-	struct cvmx_l2c_qos_wgt_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t wgt3:8;
-		uint64_t wgt2:8;
-		uint64_t wgt1:8;
-		uint64_t wgt0:8;
-#else
-		uint64_t wgt0:8;
-		uint64_t wgt1:8;
-		uint64_t wgt2:8;
-		uint64_t wgt3:8;
-		uint64_t reserved_32_63:32;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_qos_wgt_cn61xx cn63xx;
-	struct cvmx_l2c_qos_wgt_cn61xx cn63xxp1;
-	struct cvmx_l2c_qos_wgt_cn61xx cn66xx;
-	struct cvmx_l2c_qos_wgt_s cn68xx;
-	struct cvmx_l2c_qos_wgt_s cn68xxp1;
-	struct cvmx_l2c_qos_wgt_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_rscx_pfc {
-	uint64_t u64;
-	struct cvmx_l2c_rscx_pfc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_rscx_pfc_s cn61xx;
-	struct cvmx_l2c_rscx_pfc_s cn63xx;
-	struct cvmx_l2c_rscx_pfc_s cn63xxp1;
-	struct cvmx_l2c_rscx_pfc_s cn66xx;
-	struct cvmx_l2c_rscx_pfc_s cn68xx;
-	struct cvmx_l2c_rscx_pfc_s cn68xxp1;
-	struct cvmx_l2c_rscx_pfc_s cnf71xx;
-};
-
-union cvmx_l2c_rsdx_pfc {
-	uint64_t u64;
-	struct cvmx_l2c_rsdx_pfc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_rsdx_pfc_s cn61xx;
-	struct cvmx_l2c_rsdx_pfc_s cn63xx;
-	struct cvmx_l2c_rsdx_pfc_s cn63xxp1;
-	struct cvmx_l2c_rsdx_pfc_s cn66xx;
-	struct cvmx_l2c_rsdx_pfc_s cn68xx;
-	struct cvmx_l2c_rsdx_pfc_s cn68xxp1;
-	struct cvmx_l2c_rsdx_pfc_s cnf71xx;
-};
-
-union cvmx_l2c_spar0 {
-	uint64_t u64;
-	struct cvmx_l2c_spar0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t umsk3:8;
-		uint64_t umsk2:8;
-		uint64_t umsk1:8;
-		uint64_t umsk0:8;
-#else
-		uint64_t umsk0:8;
-		uint64_t umsk1:8;
-		uint64_t umsk2:8;
-		uint64_t umsk3:8;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_l2c_spar0_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_4_63:60;
-		uint64_t umsk0:4;
-#else
-		uint64_t umsk0:4;
-		uint64_t reserved_4_63:60;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_spar0_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_12_63:52;
-		uint64_t umsk1:4;
-		uint64_t reserved_4_7:4;
-		uint64_t umsk0:4;
-#else
-		uint64_t umsk0:4;
-		uint64_t reserved_4_7:4;
-		uint64_t umsk1:4;
-		uint64_t reserved_12_63:52;
-#endif
-	} cn31xx;
-	struct cvmx_l2c_spar0_s cn38xx;
-	struct cvmx_l2c_spar0_s cn38xxp2;
-	struct cvmx_l2c_spar0_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t umsk1:8;
-		uint64_t umsk0:8;
-#else
-		uint64_t umsk0:8;
-		uint64_t umsk1:8;
-		uint64_t reserved_16_63:48;
-#endif
-	} cn50xx;
-	struct cvmx_l2c_spar0_s cn52xx;
-	struct cvmx_l2c_spar0_s cn52xxp1;
-	struct cvmx_l2c_spar0_s cn56xx;
-	struct cvmx_l2c_spar0_s cn56xxp1;
-	struct cvmx_l2c_spar0_s cn58xx;
-	struct cvmx_l2c_spar0_s cn58xxp1;
-};
-
-union cvmx_l2c_spar1 {
-	uint64_t u64;
-	struct cvmx_l2c_spar1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t umsk7:8;
-		uint64_t umsk6:8;
-		uint64_t umsk5:8;
-		uint64_t umsk4:8;
-#else
-		uint64_t umsk4:8;
-		uint64_t umsk5:8;
-		uint64_t umsk6:8;
-		uint64_t umsk7:8;
-		uint64_t reserved_32_63:32;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_36_63:28,
+		__BITFIELD_FIELD(uint64_t cnt3rdclr:1,
+		__BITFIELD_FIELD(uint64_t cnt2rdclr:1,
+		__BITFIELD_FIELD(uint64_t cnt1rdclr:1,
+		__BITFIELD_FIELD(uint64_t cnt0rdclr:1,
+		__BITFIELD_FIELD(uint64_t cnt3ena:1,
+		__BITFIELD_FIELD(uint64_t cnt3clr:1,
+		__BITFIELD_FIELD(uint64_t cnt3sel:6,
+		__BITFIELD_FIELD(uint64_t cnt2ena:1,
+		__BITFIELD_FIELD(uint64_t cnt2clr:1,
+		__BITFIELD_FIELD(uint64_t cnt2sel:6,
+		__BITFIELD_FIELD(uint64_t cnt1ena:1,
+		__BITFIELD_FIELD(uint64_t cnt1clr:1,
+		__BITFIELD_FIELD(uint64_t cnt1sel:6,
+		__BITFIELD_FIELD(uint64_t cnt0ena:1,
+		__BITFIELD_FIELD(uint64_t cnt0clr:1,
+		__BITFIELD_FIELD(uint64_t cnt0sel:6,
+		;)))))))))))))))))
 	} s;
-	struct cvmx_l2c_spar1_s cn38xx;
-	struct cvmx_l2c_spar1_s cn38xxp2;
-	struct cvmx_l2c_spar1_s cn56xx;
-	struct cvmx_l2c_spar1_s cn56xxp1;
-	struct cvmx_l2c_spar1_s cn58xx;
-	struct cvmx_l2c_spar1_s cn58xxp1;
-};
-
-union cvmx_l2c_spar2 {
-	uint64_t u64;
-	struct cvmx_l2c_spar2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t umsk11:8;
-		uint64_t umsk10:8;
-		uint64_t umsk9:8;
-		uint64_t umsk8:8;
-#else
-		uint64_t umsk8:8;
-		uint64_t umsk9:8;
-		uint64_t umsk10:8;
-		uint64_t umsk11:8;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_l2c_spar2_s cn38xx;
-	struct cvmx_l2c_spar2_s cn38xxp2;
-	struct cvmx_l2c_spar2_s cn56xx;
-	struct cvmx_l2c_spar2_s cn56xxp1;
-	struct cvmx_l2c_spar2_s cn58xx;
-	struct cvmx_l2c_spar2_s cn58xxp1;
-};
-
-union cvmx_l2c_spar3 {
-	uint64_t u64;
-	struct cvmx_l2c_spar3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t umsk15:8;
-		uint64_t umsk14:8;
-		uint64_t umsk13:8;
-		uint64_t umsk12:8;
-#else
-		uint64_t umsk12:8;
-		uint64_t umsk13:8;
-		uint64_t umsk14:8;
-		uint64_t umsk15:8;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_l2c_spar3_s cn38xx;
-	struct cvmx_l2c_spar3_s cn38xxp2;
-	struct cvmx_l2c_spar3_s cn58xx;
-	struct cvmx_l2c_spar3_s cn58xxp1;
-};
-
-union cvmx_l2c_spar4 {
-	uint64_t u64;
-	struct cvmx_l2c_spar4_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_8_63:56;
-		uint64_t umskiob:8;
-#else
-		uint64_t umskiob:8;
-		uint64_t reserved_8_63:56;
-#endif
-	} s;
-	struct cvmx_l2c_spar4_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_4_63:60;
-		uint64_t umskiob:4;
-#else
-		uint64_t umskiob:4;
-		uint64_t reserved_4_63:60;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_spar4_cn30xx cn31xx;
-	struct cvmx_l2c_spar4_s cn38xx;
-	struct cvmx_l2c_spar4_s cn38xxp2;
-	struct cvmx_l2c_spar4_s cn50xx;
-	struct cvmx_l2c_spar4_s cn52xx;
-	struct cvmx_l2c_spar4_s cn52xxp1;
-	struct cvmx_l2c_spar4_s cn56xx;
-	struct cvmx_l2c_spar4_s cn56xxp1;
-	struct cvmx_l2c_spar4_s cn58xx;
-	struct cvmx_l2c_spar4_s cn58xxp1;
-};
-
-union cvmx_l2c_tadx_ecc0 {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_ecc0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_58_63:6;
-		uint64_t ow3ecc:10;
-		uint64_t reserved_42_47:6;
-		uint64_t ow2ecc:10;
-		uint64_t reserved_26_31:6;
-		uint64_t ow1ecc:10;
-		uint64_t reserved_10_15:6;
-		uint64_t ow0ecc:10;
-#else
-		uint64_t ow0ecc:10;
-		uint64_t reserved_10_15:6;
-		uint64_t ow1ecc:10;
-		uint64_t reserved_26_31:6;
-		uint64_t ow2ecc:10;
-		uint64_t reserved_42_47:6;
-		uint64_t ow3ecc:10;
-		uint64_t reserved_58_63:6;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_ecc0_s cn61xx;
-	struct cvmx_l2c_tadx_ecc0_s cn63xx;
-	struct cvmx_l2c_tadx_ecc0_s cn63xxp1;
-	struct cvmx_l2c_tadx_ecc0_s cn66xx;
-	struct cvmx_l2c_tadx_ecc0_s cn68xx;
-	struct cvmx_l2c_tadx_ecc0_s cn68xxp1;
-	struct cvmx_l2c_tadx_ecc0_s cnf71xx;
-};
-
-union cvmx_l2c_tadx_ecc1 {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_ecc1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_58_63:6;
-		uint64_t ow7ecc:10;
-		uint64_t reserved_42_47:6;
-		uint64_t ow6ecc:10;
-		uint64_t reserved_26_31:6;
-		uint64_t ow5ecc:10;
-		uint64_t reserved_10_15:6;
-		uint64_t ow4ecc:10;
-#else
-		uint64_t ow4ecc:10;
-		uint64_t reserved_10_15:6;
-		uint64_t ow5ecc:10;
-		uint64_t reserved_26_31:6;
-		uint64_t ow6ecc:10;
-		uint64_t reserved_42_47:6;
-		uint64_t ow7ecc:10;
-		uint64_t reserved_58_63:6;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_ecc1_s cn61xx;
-	struct cvmx_l2c_tadx_ecc1_s cn63xx;
-	struct cvmx_l2c_tadx_ecc1_s cn63xxp1;
-	struct cvmx_l2c_tadx_ecc1_s cn66xx;
-	struct cvmx_l2c_tadx_ecc1_s cn68xx;
-	struct cvmx_l2c_tadx_ecc1_s cn68xxp1;
-	struct cvmx_l2c_tadx_ecc1_s cnf71xx;
-};
-
-union cvmx_l2c_tadx_ien {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_ien_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_9_63:55;
-		uint64_t wrdislmc:1;
-		uint64_t rddislmc:1;
-		uint64_t noway:1;
-		uint64_t vbfdbe:1;
-		uint64_t vbfsbe:1;
-		uint64_t tagdbe:1;
-		uint64_t tagsbe:1;
-		uint64_t l2ddbe:1;
-		uint64_t l2dsbe:1;
-#else
-		uint64_t l2dsbe:1;
-		uint64_t l2ddbe:1;
-		uint64_t tagsbe:1;
-		uint64_t tagdbe:1;
-		uint64_t vbfsbe:1;
-		uint64_t vbfdbe:1;
-		uint64_t noway:1;
-		uint64_t rddislmc:1;
-		uint64_t wrdislmc:1;
-		uint64_t reserved_9_63:55;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_ien_s cn61xx;
-	struct cvmx_l2c_tadx_ien_s cn63xx;
-	struct cvmx_l2c_tadx_ien_cn63xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_7_63:57;
-		uint64_t noway:1;
-		uint64_t vbfdbe:1;
-		uint64_t vbfsbe:1;
-		uint64_t tagdbe:1;
-		uint64_t tagsbe:1;
-		uint64_t l2ddbe:1;
-		uint64_t l2dsbe:1;
-#else
-		uint64_t l2dsbe:1;
-		uint64_t l2ddbe:1;
-		uint64_t tagsbe:1;
-		uint64_t tagdbe:1;
-		uint64_t vbfsbe:1;
-		uint64_t vbfdbe:1;
-		uint64_t noway:1;
-		uint64_t reserved_7_63:57;
-#endif
-	} cn63xxp1;
-	struct cvmx_l2c_tadx_ien_s cn66xx;
-	struct cvmx_l2c_tadx_ien_s cn68xx;
-	struct cvmx_l2c_tadx_ien_s cn68xxp1;
-	struct cvmx_l2c_tadx_ien_s cnf71xx;
-};
-
-union cvmx_l2c_tadx_int {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_int_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_9_63:55;
-		uint64_t wrdislmc:1;
-		uint64_t rddislmc:1;
-		uint64_t noway:1;
-		uint64_t vbfdbe:1;
-		uint64_t vbfsbe:1;
-		uint64_t tagdbe:1;
-		uint64_t tagsbe:1;
-		uint64_t l2ddbe:1;
-		uint64_t l2dsbe:1;
-#else
-		uint64_t l2dsbe:1;
-		uint64_t l2ddbe:1;
-		uint64_t tagsbe:1;
-		uint64_t tagdbe:1;
-		uint64_t vbfsbe:1;
-		uint64_t vbfdbe:1;
-		uint64_t noway:1;
-		uint64_t rddislmc:1;
-		uint64_t wrdislmc:1;
-		uint64_t reserved_9_63:55;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_int_s cn61xx;
-	struct cvmx_l2c_tadx_int_s cn63xx;
-	struct cvmx_l2c_tadx_int_s cn66xx;
-	struct cvmx_l2c_tadx_int_s cn68xx;
-	struct cvmx_l2c_tadx_int_s cn68xxp1;
-	struct cvmx_l2c_tadx_int_s cnf71xx;
-};
-
-union cvmx_l2c_tadx_pfc0 {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_pfc0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_pfc0_s cn61xx;
-	struct cvmx_l2c_tadx_pfc0_s cn63xx;
-	struct cvmx_l2c_tadx_pfc0_s cn63xxp1;
-	struct cvmx_l2c_tadx_pfc0_s cn66xx;
-	struct cvmx_l2c_tadx_pfc0_s cn68xx;
-	struct cvmx_l2c_tadx_pfc0_s cn68xxp1;
-	struct cvmx_l2c_tadx_pfc0_s cnf71xx;
-};
-
-union cvmx_l2c_tadx_pfc1 {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_pfc1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_pfc1_s cn61xx;
-	struct cvmx_l2c_tadx_pfc1_s cn63xx;
-	struct cvmx_l2c_tadx_pfc1_s cn63xxp1;
-	struct cvmx_l2c_tadx_pfc1_s cn66xx;
-	struct cvmx_l2c_tadx_pfc1_s cn68xx;
-	struct cvmx_l2c_tadx_pfc1_s cn68xxp1;
-	struct cvmx_l2c_tadx_pfc1_s cnf71xx;
-};
-
-union cvmx_l2c_tadx_pfc2 {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_pfc2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_pfc2_s cn61xx;
-	struct cvmx_l2c_tadx_pfc2_s cn63xx;
-	struct cvmx_l2c_tadx_pfc2_s cn63xxp1;
-	struct cvmx_l2c_tadx_pfc2_s cn66xx;
-	struct cvmx_l2c_tadx_pfc2_s cn68xx;
-	struct cvmx_l2c_tadx_pfc2_s cn68xxp1;
-	struct cvmx_l2c_tadx_pfc2_s cnf71xx;
-};
-
-union cvmx_l2c_tadx_pfc3 {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_pfc3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_pfc3_s cn61xx;
-	struct cvmx_l2c_tadx_pfc3_s cn63xx;
-	struct cvmx_l2c_tadx_pfc3_s cn63xxp1;
-	struct cvmx_l2c_tadx_pfc3_s cn66xx;
-	struct cvmx_l2c_tadx_pfc3_s cn68xx;
-	struct cvmx_l2c_tadx_pfc3_s cn68xxp1;
-	struct cvmx_l2c_tadx_pfc3_s cnf71xx;
 };
 
 union cvmx_l2c_tadx_prf {
 	uint64_t u64;
 	struct cvmx_l2c_tadx_prf_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t cnt3sel:8;
-		uint64_t cnt2sel:8;
-		uint64_t cnt1sel:8;
-		uint64_t cnt0sel:8;
-#else
-		uint64_t cnt0sel:8;
-		uint64_t cnt1sel:8;
-		uint64_t cnt2sel:8;
-		uint64_t cnt3sel:8;
-		uint64_t reserved_32_63:32;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_32_63:32,
+		__BITFIELD_FIELD(uint64_t cnt3sel:8,
+		__BITFIELD_FIELD(uint64_t cnt2sel:8,
+		__BITFIELD_FIELD(uint64_t cnt1sel:8,
+		__BITFIELD_FIELD(uint64_t cnt0sel:8,
+		;)))))
 	} s;
-	struct cvmx_l2c_tadx_prf_s cn61xx;
-	struct cvmx_l2c_tadx_prf_s cn63xx;
-	struct cvmx_l2c_tadx_prf_s cn63xxp1;
-	struct cvmx_l2c_tadx_prf_s cn66xx;
-	struct cvmx_l2c_tadx_prf_s cn68xx;
-	struct cvmx_l2c_tadx_prf_s cn68xxp1;
-	struct cvmx_l2c_tadx_prf_s cnf71xx;
 };
 
 union cvmx_l2c_tadx_tag {
 	uint64_t u64;
 	struct cvmx_l2c_tadx_tag_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_46_63:18;
-		uint64_t ecc:6;
-		uint64_t reserved_36_39:4;
-		uint64_t tag:19;
-		uint64_t reserved_4_16:13;
-		uint64_t use:1;
-		uint64_t valid:1;
-		uint64_t dirty:1;
-		uint64_t lock:1;
-#else
-		uint64_t lock:1;
-		uint64_t dirty:1;
-		uint64_t valid:1;
-		uint64_t use:1;
-		uint64_t reserved_4_16:13;
-		uint64_t tag:19;
-		uint64_t reserved_36_39:4;
-		uint64_t ecc:6;
-		uint64_t reserved_46_63:18;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_tag_s cn61xx;
-	struct cvmx_l2c_tadx_tag_s cn63xx;
-	struct cvmx_l2c_tadx_tag_s cn63xxp1;
-	struct cvmx_l2c_tadx_tag_s cn66xx;
-	struct cvmx_l2c_tadx_tag_s cn68xx;
-	struct cvmx_l2c_tadx_tag_s cn68xxp1;
-	struct cvmx_l2c_tadx_tag_s cnf71xx;
-};
-
-union cvmx_l2c_ver_id {
-	uint64_t u64;
-	struct cvmx_l2c_ver_id_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t mask:64;
-#else
-		uint64_t mask:64;
-#endif
-	} s;
-	struct cvmx_l2c_ver_id_s cn61xx;
-	struct cvmx_l2c_ver_id_s cn63xx;
-	struct cvmx_l2c_ver_id_s cn63xxp1;
-	struct cvmx_l2c_ver_id_s cn66xx;
-	struct cvmx_l2c_ver_id_s cn68xx;
-	struct cvmx_l2c_ver_id_s cn68xxp1;
-	struct cvmx_l2c_ver_id_s cnf71xx;
-};
-
-union cvmx_l2c_ver_iob {
-	uint64_t u64;
-	struct cvmx_l2c_ver_iob_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_2_63:62;
-		uint64_t mask:2;
-#else
-		uint64_t mask:2;
-		uint64_t reserved_2_63:62;
-#endif
-	} s;
-	struct cvmx_l2c_ver_iob_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_1_63:63;
-		uint64_t mask:1;
-#else
-		uint64_t mask:1;
-		uint64_t reserved_1_63:63;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_ver_iob_cn61xx cn63xx;
-	struct cvmx_l2c_ver_iob_cn61xx cn63xxp1;
-	struct cvmx_l2c_ver_iob_cn61xx cn66xx;
-	struct cvmx_l2c_ver_iob_s cn68xx;
-	struct cvmx_l2c_ver_iob_s cn68xxp1;
-	struct cvmx_l2c_ver_iob_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_ver_msc {
-	uint64_t u64;
-	struct cvmx_l2c_ver_msc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_2_63:62;
-		uint64_t invl2:1;
-		uint64_t dwb:1;
-#else
-		uint64_t dwb:1;
-		uint64_t invl2:1;
-		uint64_t reserved_2_63:62;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_46_63:18,
+		__BITFIELD_FIELD(uint64_t ecc:6,
+		__BITFIELD_FIELD(uint64_t reserved_36_39:4,
+		__BITFIELD_FIELD(uint64_t tag:19,
+		__BITFIELD_FIELD(uint64_t reserved_4_16:13,
+		__BITFIELD_FIELD(uint64_t use:1,
+		__BITFIELD_FIELD(uint64_t valid:1,
+		__BITFIELD_FIELD(uint64_t dirty:1,
+		__BITFIELD_FIELD(uint64_t lock:1,
+		;)))))))))
 	} s;
-	struct cvmx_l2c_ver_msc_s cn61xx;
-	struct cvmx_l2c_ver_msc_s cn63xx;
-	struct cvmx_l2c_ver_msc_s cn66xx;
-	struct cvmx_l2c_ver_msc_s cn68xx;
-	struct cvmx_l2c_ver_msc_s cn68xxp1;
-	struct cvmx_l2c_ver_msc_s cnf71xx;
 };
 
-union cvmx_l2c_ver_pp {
-	uint64_t u64;
-	struct cvmx_l2c_ver_pp_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t mask:32;
-#else
-		uint64_t mask:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_l2c_ver_pp_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_4_63:60;
-		uint64_t mask:4;
-#else
-		uint64_t mask:4;
-		uint64_t reserved_4_63:60;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_ver_pp_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_6_63:58;
-		uint64_t mask:6;
-#else
-		uint64_t mask:6;
-		uint64_t reserved_6_63:58;
-#endif
-	} cn63xx;
-	struct cvmx_l2c_ver_pp_cn63xx cn63xxp1;
-	struct cvmx_l2c_ver_pp_cn66xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_10_63:54;
-		uint64_t mask:10;
-#else
-		uint64_t mask:10;
-		uint64_t reserved_10_63:54;
-#endif
-	} cn66xx;
-	struct cvmx_l2c_ver_pp_s cn68xx;
-	struct cvmx_l2c_ver_pp_s cn68xxp1;
-	struct cvmx_l2c_ver_pp_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_virtid_iobx {
-	uint64_t u64;
-	struct cvmx_l2c_virtid_iobx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t dwbid:6;
-		uint64_t reserved_6_7:2;
-		uint64_t id:6;
-#else
-		uint64_t id:6;
-		uint64_t reserved_6_7:2;
-		uint64_t dwbid:6;
-		uint64_t reserved_14_63:50;
-#endif
-	} s;
-	struct cvmx_l2c_virtid_iobx_s cn61xx;
-	struct cvmx_l2c_virtid_iobx_s cn63xx;
-	struct cvmx_l2c_virtid_iobx_s cn63xxp1;
-	struct cvmx_l2c_virtid_iobx_s cn66xx;
-	struct cvmx_l2c_virtid_iobx_s cn68xx;
-	struct cvmx_l2c_virtid_iobx_s cn68xxp1;
-	struct cvmx_l2c_virtid_iobx_s cnf71xx;
-};
-
-union cvmx_l2c_virtid_ppx {
-	uint64_t u64;
-	struct cvmx_l2c_virtid_ppx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_6_63:58;
-		uint64_t id:6;
-#else
-		uint64_t id:6;
-		uint64_t reserved_6_63:58;
-#endif
-	} s;
-	struct cvmx_l2c_virtid_ppx_s cn61xx;
-	struct cvmx_l2c_virtid_ppx_s cn63xx;
-	struct cvmx_l2c_virtid_ppx_s cn63xxp1;
-	struct cvmx_l2c_virtid_ppx_s cn66xx;
-	struct cvmx_l2c_virtid_ppx_s cn68xx;
-	struct cvmx_l2c_virtid_ppx_s cn68xxp1;
-	struct cvmx_l2c_virtid_ppx_s cnf71xx;
-};
-
-union cvmx_l2c_vrt_ctl {
-	uint64_t u64;
-	struct cvmx_l2c_vrt_ctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_9_63:55;
-		uint64_t ooberr:1;
-		uint64_t reserved_7_7:1;
-		uint64_t memsz:3;
-		uint64_t numid:3;
-		uint64_t enable:1;
-#else
-		uint64_t enable:1;
-		uint64_t numid:3;
-		uint64_t memsz:3;
-		uint64_t reserved_7_7:1;
-		uint64_t ooberr:1;
-		uint64_t reserved_9_63:55;
-#endif
-	} s;
-	struct cvmx_l2c_vrt_ctl_s cn61xx;
-	struct cvmx_l2c_vrt_ctl_s cn63xx;
-	struct cvmx_l2c_vrt_ctl_s cn63xxp1;
-	struct cvmx_l2c_vrt_ctl_s cn66xx;
-	struct cvmx_l2c_vrt_ctl_s cn68xx;
-	struct cvmx_l2c_vrt_ctl_s cn68xxp1;
-	struct cvmx_l2c_vrt_ctl_s cnf71xx;
-};
-
-union cvmx_l2c_vrt_memx {
-	uint64_t u64;
-	struct cvmx_l2c_vrt_memx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_36_63:28;
-		uint64_t parity:4;
-		uint64_t data:32;
-#else
-		uint64_t data:32;
-		uint64_t parity:4;
-		uint64_t reserved_36_63:28;
-#endif
-	} s;
-	struct cvmx_l2c_vrt_memx_s cn61xx;
-	struct cvmx_l2c_vrt_memx_s cn63xx;
-	struct cvmx_l2c_vrt_memx_s cn63xxp1;
-	struct cvmx_l2c_vrt_memx_s cn66xx;
-	struct cvmx_l2c_vrt_memx_s cn68xx;
-	struct cvmx_l2c_vrt_memx_s cn68xxp1;
-	struct cvmx_l2c_vrt_memx_s cnf71xx;
-};
-
-union cvmx_l2c_wpar_iobx {
-	uint64_t u64;
-	struct cvmx_l2c_wpar_iobx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t mask:16;
-#else
-		uint64_t mask:16;
-		uint64_t reserved_16_63:48;
-#endif
-	} s;
-	struct cvmx_l2c_wpar_iobx_s cn61xx;
-	struct cvmx_l2c_wpar_iobx_s cn63xx;
-	struct cvmx_l2c_wpar_iobx_s cn63xxp1;
-	struct cvmx_l2c_wpar_iobx_s cn66xx;
-	struct cvmx_l2c_wpar_iobx_s cn68xx;
-	struct cvmx_l2c_wpar_iobx_s cn68xxp1;
-	struct cvmx_l2c_wpar_iobx_s cnf71xx;
-};
-
-union cvmx_l2c_wpar_ppx {
-	uint64_t u64;
-	struct cvmx_l2c_wpar_ppx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t mask:16;
-#else
-		uint64_t mask:16;
-		uint64_t reserved_16_63:48;
-#endif
-	} s;
-	struct cvmx_l2c_wpar_ppx_s cn61xx;
-	struct cvmx_l2c_wpar_ppx_s cn63xx;
-	struct cvmx_l2c_wpar_ppx_s cn63xxp1;
-	struct cvmx_l2c_wpar_ppx_s cn66xx;
-	struct cvmx_l2c_wpar_ppx_s cn68xx;
-	struct cvmx_l2c_wpar_ppx_s cn68xxp1;
-	struct cvmx_l2c_wpar_ppx_s cnf71xx;
-};
-
-union cvmx_l2c_xmcx_pfc {
-	uint64_t u64;
-	struct cvmx_l2c_xmcx_pfc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_xmcx_pfc_s cn61xx;
-	struct cvmx_l2c_xmcx_pfc_s cn63xx;
-	struct cvmx_l2c_xmcx_pfc_s cn63xxp1;
-	struct cvmx_l2c_xmcx_pfc_s cn66xx;
-	struct cvmx_l2c_xmcx_pfc_s cn68xx;
-	struct cvmx_l2c_xmcx_pfc_s cn68xxp1;
-	struct cvmx_l2c_xmcx_pfc_s cnf71xx;
-};
-
-union cvmx_l2c_xmc_cmd {
+union cvmx_l2c_lckbase {
 	uint64_t u64;
-	struct cvmx_l2c_xmc_cmd_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t inuse:1;
-		uint64_t cmd:6;
-		uint64_t reserved_38_56:19;
-		uint64_t addr:38;
-#else
-		uint64_t addr:38;
-		uint64_t reserved_38_56:19;
-		uint64_t cmd:6;
-		uint64_t inuse:1;
-#endif
+	struct cvmx_l2c_lckbase_s {
+		__BITFIELD_FIELD(uint64_t reserved_31_63:33,
+		__BITFIELD_FIELD(uint64_t lck_base:27,
+		__BITFIELD_FIELD(uint64_t reserved_1_3:3,
+		__BITFIELD_FIELD(uint64_t lck_ena:1,
+		;))))
 	} s;
-	struct cvmx_l2c_xmc_cmd_s cn61xx;
-	struct cvmx_l2c_xmc_cmd_s cn63xx;
-	struct cvmx_l2c_xmc_cmd_s cn63xxp1;
-	struct cvmx_l2c_xmc_cmd_s cn66xx;
-	struct cvmx_l2c_xmc_cmd_s cn68xx;
-	struct cvmx_l2c_xmc_cmd_s cn68xxp1;
-	struct cvmx_l2c_xmc_cmd_s cnf71xx;
 };
 
-union cvmx_l2c_xmdx_pfc {
+union cvmx_l2c_lckoff {
 	uint64_t u64;
-	struct cvmx_l2c_xmdx_pfc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
+	struct cvmx_l2c_lckoff_s {
+		__BITFIELD_FIELD(uint64_t reserved_10_63:54,
+		__BITFIELD_FIELD(uint64_t lck_offset:10,
+		;))
 	} s;
-	struct cvmx_l2c_xmdx_pfc_s cn61xx;
-	struct cvmx_l2c_xmdx_pfc_s cn63xx;
-	struct cvmx_l2c_xmdx_pfc_s cn63xxp1;
-	struct cvmx_l2c_xmdx_pfc_s cn66xx;
-	struct cvmx_l2c_xmdx_pfc_s cn68xx;
-	struct cvmx_l2c_xmdx_pfc_s cn68xxp1;
-	struct cvmx_l2c_xmdx_pfc_s cnf71xx;
 };
 
 #endif
diff --git a/arch/mips/include/asm/octeon/cvmx-l2c.h b/arch/mips/include/asm/octeon/cvmx-l2c.h
index ddb429210a0e..02c4479a90c8 100644
--- a/arch/mips/include/asm/octeon/cvmx-l2c.h
+++ b/arch/mips/include/asm/octeon/cvmx-l2c.h
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2010 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -33,48 +33,39 @@
 #ifndef __CVMX_L2C_H__
 #define __CVMX_L2C_H__
 
-#define CVMX_L2_ASSOC	  cvmx_l2c_get_num_assoc()   /* Deprecated macro, use function */
-#define CVMX_L2_SET_BITS  cvmx_l2c_get_set_bits()    /* Deprecated macro, use function */
-#define CVMX_L2_SETS	  cvmx_l2c_get_num_sets()    /* Deprecated macro, use function */
+#include <uapi/asm/bitfield.h>
 
+#define CVMX_L2_ASSOC	 cvmx_l2c_get_num_assoc()	/* Deprecated macro */
+#define CVMX_L2_SET_BITS cvmx_l2c_get_set_bits()	/* Deprecated macro */
+#define CVMX_L2_SETS	 cvmx_l2c_get_num_sets()	/* Deprecated macro */
 
-#define CVMX_L2C_IDX_ADDR_SHIFT 7  /* based on 128 byte cache line size */
+/* Based on 128 byte cache line size */
+#define CVMX_L2C_IDX_ADDR_SHIFT	7
 #define CVMX_L2C_IDX_MASK	(cvmx_l2c_get_num_sets() - 1)
 
 /* Defines for index aliasing computations */
-#define CVMX_L2C_TAG_ADDR_ALIAS_SHIFT (CVMX_L2C_IDX_ADDR_SHIFT + cvmx_l2c_get_set_bits())
+#define CVMX_L2C_TAG_ADDR_ALIAS_SHIFT (CVMX_L2C_IDX_ADDR_SHIFT +	       \
+		cvmx_l2c_get_set_bits())
 #define CVMX_L2C_ALIAS_MASK (CVMX_L2C_IDX_MASK << CVMX_L2C_TAG_ADDR_ALIAS_SHIFT)
-#define CVMX_L2C_MEMBANK_SELECT_SIZE  4096
+#define CVMX_L2C_MEMBANK_SELECT_SIZE 4096
 
-/* Defines for Virtualizations, valid only from Octeon II onwards. */
-#define CVMX_L2C_VRT_MAX_VIRTID_ALLOWED ((OCTEON_IS_MODEL(OCTEON_CN63XX)) ? 64 : 0)
-#define CVMX_L2C_VRT_MAX_MEMSZ_ALLOWED ((OCTEON_IS_MODEL(OCTEON_CN63XX)) ? 32 : 0)
+/* Number of L2C Tag-and-data sections (TADs) that are connected to LMC. */
+#define CVMX_L2C_TADS  1
 
 union cvmx_l2c_tag {
 	uint64_t u64;
 	struct {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved:28;
-		uint64_t V:1;		/* Line valid */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t L:1;		/* Line locked */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t addr:32;	/* Phys mem (not all bits valid) */
-#else
-		uint64_t addr:32;	/* Phys mem (not all bits valid) */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t L:1;		/* Line locked */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t V:1;		/* Line valid */
-		uint64_t reserved:28;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved:28,
+		__BITFIELD_FIELD(uint64_t V:1,
+		__BITFIELD_FIELD(uint64_t D:1,
+		__BITFIELD_FIELD(uint64_t L:1,
+		__BITFIELD_FIELD(uint64_t U:1,
+		__BITFIELD_FIELD(uint64_t addr:32,
+		;))))))
 	} s;
 };
 
-/* Number of L2C Tag-and-data sections (TADs) that are connected to LMC. */
-#define CVMX_L2C_TADS  1
-
-  /* L2C Performance Counter events. */
+/* L2C Performance Counter events. */
 enum cvmx_l2c_event {
 	CVMX_L2C_EVENT_CYCLES		=  0,
 	CVMX_L2C_EVENT_INSTRUCTION_MISS =  1,
@@ -175,7 +166,8 @@ enum cvmx_l2c_tad_event {
  *
  * @note The routine does not clear the counter.
  */
-void cvmx_l2c_config_perf(uint32_t counter, enum cvmx_l2c_event event, uint32_t clear_on_read);
+void cvmx_l2c_config_perf(uint32_t counter, enum cvmx_l2c_event event,
+			  uint32_t clear_on_read);
 
 /**
  * Read the given L2 Cache performance counter. The counter must be configured
@@ -307,8 +299,11 @@ int cvmx_l2c_unlock_mem_region(uint64_t start, uint64_t len);
 union cvmx_l2c_tag cvmx_l2c_get_tag(uint32_t association, uint32_t index);
 
 /* Wrapper providing a deprecated old function name */
-static inline union cvmx_l2c_tag cvmx_get_l2c_tag(uint32_t association, uint32_t index) __attribute__((deprecated));
-static inline union cvmx_l2c_tag cvmx_get_l2c_tag(uint32_t association, uint32_t index)
+static inline union cvmx_l2c_tag cvmx_get_l2c_tag(uint32_t association,
+						  uint32_t index)
+						  __attribute__((deprecated));
+static inline union cvmx_l2c_tag cvmx_get_l2c_tag(uint32_t association,
+						  uint32_t index)
 {
 	return cvmx_l2c_get_tag(association, index);
 }
diff --git a/arch/mips/include/asm/octeon/cvmx-l2d-defs.h b/arch/mips/include/asm/octeon/cvmx-l2d-defs.h
deleted file mode 100644
index 11a456215638..000000000000
--- a/arch/mips/include/asm/octeon/cvmx-l2d-defs.h
+++ /dev/null
@@ -1,526 +0,0 @@
-/***********************license start***************
- * Author: Cavium Networks
- *
- * Contact: support@caviumnetworks.com
- * This file is part of the OCTEON SDK
- *
- * Copyright (c) 2003-2012 Cavium Networks
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, Version 2, as
- * published by the Free Software Foundation.
- *
- * This file is distributed in the hope that it will be useful, but
- * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
- * NONINFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this file; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- * or visit http://www.gnu.org/licenses/.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium Networks for more information
- ***********************license end**************************************/
-
-#ifndef __CVMX_L2D_DEFS_H__
-#define __CVMX_L2D_DEFS_H__
-
-#define CVMX_L2D_BST0 (CVMX_ADD_IO_SEG(0x0001180080000780ull))
-#define CVMX_L2D_BST1 (CVMX_ADD_IO_SEG(0x0001180080000788ull))
-#define CVMX_L2D_BST2 (CVMX_ADD_IO_SEG(0x0001180080000790ull))
-#define CVMX_L2D_BST3 (CVMX_ADD_IO_SEG(0x0001180080000798ull))
-#define CVMX_L2D_ERR (CVMX_ADD_IO_SEG(0x0001180080000010ull))
-#define CVMX_L2D_FADR (CVMX_ADD_IO_SEG(0x0001180080000018ull))
-#define CVMX_L2D_FSYN0 (CVMX_ADD_IO_SEG(0x0001180080000020ull))
-#define CVMX_L2D_FSYN1 (CVMX_ADD_IO_SEG(0x0001180080000028ull))
-#define CVMX_L2D_FUS0 (CVMX_ADD_IO_SEG(0x00011800800007A0ull))
-#define CVMX_L2D_FUS1 (CVMX_ADD_IO_SEG(0x00011800800007A8ull))
-#define CVMX_L2D_FUS2 (CVMX_ADD_IO_SEG(0x00011800800007B0ull))
-#define CVMX_L2D_FUS3 (CVMX_ADD_IO_SEG(0x00011800800007B8ull))
-
-union cvmx_l2d_bst0 {
-	uint64_t u64;
-	struct cvmx_l2d_bst0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_35_63:29;
-		uint64_t ftl:1;
-		uint64_t q0stat:34;
-#else
-		uint64_t q0stat:34;
-		uint64_t ftl:1;
-		uint64_t reserved_35_63:29;
-#endif
-	} s;
-	struct cvmx_l2d_bst0_s cn30xx;
-	struct cvmx_l2d_bst0_s cn31xx;
-	struct cvmx_l2d_bst0_s cn38xx;
-	struct cvmx_l2d_bst0_s cn38xxp2;
-	struct cvmx_l2d_bst0_s cn50xx;
-	struct cvmx_l2d_bst0_s cn52xx;
-	struct cvmx_l2d_bst0_s cn52xxp1;
-	struct cvmx_l2d_bst0_s cn56xx;
-	struct cvmx_l2d_bst0_s cn56xxp1;
-	struct cvmx_l2d_bst0_s cn58xx;
-	struct cvmx_l2d_bst0_s cn58xxp1;
-};
-
-union cvmx_l2d_bst1 {
-	uint64_t u64;
-	struct cvmx_l2d_bst1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_34_63:30;
-		uint64_t q1stat:34;
-#else
-		uint64_t q1stat:34;
-		uint64_t reserved_34_63:30;
-#endif
-	} s;
-	struct cvmx_l2d_bst1_s cn30xx;
-	struct cvmx_l2d_bst1_s cn31xx;
-	struct cvmx_l2d_bst1_s cn38xx;
-	struct cvmx_l2d_bst1_s cn38xxp2;
-	struct cvmx_l2d_bst1_s cn50xx;
-	struct cvmx_l2d_bst1_s cn52xx;
-	struct cvmx_l2d_bst1_s cn52xxp1;
-	struct cvmx_l2d_bst1_s cn56xx;
-	struct cvmx_l2d_bst1_s cn56xxp1;
-	struct cvmx_l2d_bst1_s cn58xx;
-	struct cvmx_l2d_bst1_s cn58xxp1;
-};
-
-union cvmx_l2d_bst2 {
-	uint64_t u64;
-	struct cvmx_l2d_bst2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_34_63:30;
-		uint64_t q2stat:34;
-#else
-		uint64_t q2stat:34;
-		uint64_t reserved_34_63:30;
-#endif
-	} s;
-	struct cvmx_l2d_bst2_s cn30xx;
-	struct cvmx_l2d_bst2_s cn31xx;
-	struct cvmx_l2d_bst2_s cn38xx;
-	struct cvmx_l2d_bst2_s cn38xxp2;
-	struct cvmx_l2d_bst2_s cn50xx;
-	struct cvmx_l2d_bst2_s cn52xx;
-	struct cvmx_l2d_bst2_s cn52xxp1;
-	struct cvmx_l2d_bst2_s cn56xx;
-	struct cvmx_l2d_bst2_s cn56xxp1;
-	struct cvmx_l2d_bst2_s cn58xx;
-	struct cvmx_l2d_bst2_s cn58xxp1;
-};
-
-union cvmx_l2d_bst3 {
-	uint64_t u64;
-	struct cvmx_l2d_bst3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_34_63:30;
-		uint64_t q3stat:34;
-#else
-		uint64_t q3stat:34;
-		uint64_t reserved_34_63:30;
-#endif
-	} s;
-	struct cvmx_l2d_bst3_s cn30xx;
-	struct cvmx_l2d_bst3_s cn31xx;
-	struct cvmx_l2d_bst3_s cn38xx;
-	struct cvmx_l2d_bst3_s cn38xxp2;
-	struct cvmx_l2d_bst3_s cn50xx;
-	struct cvmx_l2d_bst3_s cn52xx;
-	struct cvmx_l2d_bst3_s cn52xxp1;
-	struct cvmx_l2d_bst3_s cn56xx;
-	struct cvmx_l2d_bst3_s cn56xxp1;
-	struct cvmx_l2d_bst3_s cn58xx;
-	struct cvmx_l2d_bst3_s cn58xxp1;
-};
-
-union cvmx_l2d_err {
-	uint64_t u64;
-	struct cvmx_l2d_err_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_6_63:58;
-		uint64_t bmhclsel:1;
-		uint64_t ded_err:1;
-		uint64_t sec_err:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_intena:1;
-		uint64_t ecc_ena:1;
-#else
-		uint64_t ecc_ena:1;
-		uint64_t sec_intena:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_err:1;
-		uint64_t ded_err:1;
-		uint64_t bmhclsel:1;
-		uint64_t reserved_6_63:58;
-#endif
-	} s;
-	struct cvmx_l2d_err_s cn30xx;
-	struct cvmx_l2d_err_s cn31xx;
-	struct cvmx_l2d_err_s cn38xx;
-	struct cvmx_l2d_err_s cn38xxp2;
-	struct cvmx_l2d_err_s cn50xx;
-	struct cvmx_l2d_err_s cn52xx;
-	struct cvmx_l2d_err_s cn52xxp1;
-	struct cvmx_l2d_err_s cn56xx;
-	struct cvmx_l2d_err_s cn56xxp1;
-	struct cvmx_l2d_err_s cn58xx;
-	struct cvmx_l2d_err_s cn58xxp1;
-};
-
-union cvmx_l2d_fadr {
-	uint64_t u64;
-	struct cvmx_l2d_fadr_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_19_63:45;
-		uint64_t fadru:1;
-		uint64_t fowmsk:4;
-		uint64_t fset:3;
-		uint64_t fadr:11;
-#else
-		uint64_t fadr:11;
-		uint64_t fset:3;
-		uint64_t fowmsk:4;
-		uint64_t fadru:1;
-		uint64_t reserved_19_63:45;
-#endif
-	} s;
-	struct cvmx_l2d_fadr_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_18_63:46;
-		uint64_t fowmsk:4;
-		uint64_t reserved_13_13:1;
-		uint64_t fset:2;
-		uint64_t reserved_9_10:2;
-		uint64_t fadr:9;
-#else
-		uint64_t fadr:9;
-		uint64_t reserved_9_10:2;
-		uint64_t fset:2;
-		uint64_t reserved_13_13:1;
-		uint64_t fowmsk:4;
-		uint64_t reserved_18_63:46;
-#endif
-	} cn30xx;
-	struct cvmx_l2d_fadr_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_18_63:46;
-		uint64_t fowmsk:4;
-		uint64_t reserved_13_13:1;
-		uint64_t fset:2;
-		uint64_t reserved_10_10:1;
-		uint64_t fadr:10;
-#else
-		uint64_t fadr:10;
-		uint64_t reserved_10_10:1;
-		uint64_t fset:2;
-		uint64_t reserved_13_13:1;
-		uint64_t fowmsk:4;
-		uint64_t reserved_18_63:46;
-#endif
-	} cn31xx;
-	struct cvmx_l2d_fadr_cn38xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_18_63:46;
-		uint64_t fowmsk:4;
-		uint64_t fset:3;
-		uint64_t fadr:11;
-#else
-		uint64_t fadr:11;
-		uint64_t fset:3;
-		uint64_t fowmsk:4;
-		uint64_t reserved_18_63:46;
-#endif
-	} cn38xx;
-	struct cvmx_l2d_fadr_cn38xx cn38xxp2;
-	struct cvmx_l2d_fadr_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_18_63:46;
-		uint64_t fowmsk:4;
-		uint64_t fset:3;
-		uint64_t reserved_8_10:3;
-		uint64_t fadr:8;
-#else
-		uint64_t fadr:8;
-		uint64_t reserved_8_10:3;
-		uint64_t fset:3;
-		uint64_t fowmsk:4;
-		uint64_t reserved_18_63:46;
-#endif
-	} cn50xx;
-	struct cvmx_l2d_fadr_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_18_63:46;
-		uint64_t fowmsk:4;
-		uint64_t fset:3;
-		uint64_t reserved_10_10:1;
-		uint64_t fadr:10;
-#else
-		uint64_t fadr:10;
-		uint64_t reserved_10_10:1;
-		uint64_t fset:3;
-		uint64_t fowmsk:4;
-		uint64_t reserved_18_63:46;
-#endif
-	} cn52xx;
-	struct cvmx_l2d_fadr_cn52xx cn52xxp1;
-	struct cvmx_l2d_fadr_s cn56xx;
-	struct cvmx_l2d_fadr_s cn56xxp1;
-	struct cvmx_l2d_fadr_s cn58xx;
-	struct cvmx_l2d_fadr_s cn58xxp1;
-};
-
-union cvmx_l2d_fsyn0 {
-	uint64_t u64;
-	struct cvmx_l2d_fsyn0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_20_63:44;
-		uint64_t fsyn_ow1:10;
-		uint64_t fsyn_ow0:10;
-#else
-		uint64_t fsyn_ow0:10;
-		uint64_t fsyn_ow1:10;
-		uint64_t reserved_20_63:44;
-#endif
-	} s;
-	struct cvmx_l2d_fsyn0_s cn30xx;
-	struct cvmx_l2d_fsyn0_s cn31xx;
-	struct cvmx_l2d_fsyn0_s cn38xx;
-	struct cvmx_l2d_fsyn0_s cn38xxp2;
-	struct cvmx_l2d_fsyn0_s cn50xx;
-	struct cvmx_l2d_fsyn0_s cn52xx;
-	struct cvmx_l2d_fsyn0_s cn52xxp1;
-	struct cvmx_l2d_fsyn0_s cn56xx;
-	struct cvmx_l2d_fsyn0_s cn56xxp1;
-	struct cvmx_l2d_fsyn0_s cn58xx;
-	struct cvmx_l2d_fsyn0_s cn58xxp1;
-};
-
-union cvmx_l2d_fsyn1 {
-	uint64_t u64;
-	struct cvmx_l2d_fsyn1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_20_63:44;
-		uint64_t fsyn_ow3:10;
-		uint64_t fsyn_ow2:10;
-#else
-		uint64_t fsyn_ow2:10;
-		uint64_t fsyn_ow3:10;
-		uint64_t reserved_20_63:44;
-#endif
-	} s;
-	struct cvmx_l2d_fsyn1_s cn30xx;
-	struct cvmx_l2d_fsyn1_s cn31xx;
-	struct cvmx_l2d_fsyn1_s cn38xx;
-	struct cvmx_l2d_fsyn1_s cn38xxp2;
-	struct cvmx_l2d_fsyn1_s cn50xx;
-	struct cvmx_l2d_fsyn1_s cn52xx;
-	struct cvmx_l2d_fsyn1_s cn52xxp1;
-	struct cvmx_l2d_fsyn1_s cn56xx;
-	struct cvmx_l2d_fsyn1_s cn56xxp1;
-	struct cvmx_l2d_fsyn1_s cn58xx;
-	struct cvmx_l2d_fsyn1_s cn58xxp1;
-};
-
-union cvmx_l2d_fus0 {
-	uint64_t u64;
-	struct cvmx_l2d_fus0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_34_63:30;
-		uint64_t q0fus:34;
-#else
-		uint64_t q0fus:34;
-		uint64_t reserved_34_63:30;
-#endif
-	} s;
-	struct cvmx_l2d_fus0_s cn30xx;
-	struct cvmx_l2d_fus0_s cn31xx;
-	struct cvmx_l2d_fus0_s cn38xx;
-	struct cvmx_l2d_fus0_s cn38xxp2;
-	struct cvmx_l2d_fus0_s cn50xx;
-	struct cvmx_l2d_fus0_s cn52xx;
-	struct cvmx_l2d_fus0_s cn52xxp1;
-	struct cvmx_l2d_fus0_s cn56xx;
-	struct cvmx_l2d_fus0_s cn56xxp1;
-	struct cvmx_l2d_fus0_s cn58xx;
-	struct cvmx_l2d_fus0_s cn58xxp1;
-};
-
-union cvmx_l2d_fus1 {
-	uint64_t u64;
-	struct cvmx_l2d_fus1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_34_63:30;
-		uint64_t q1fus:34;
-#else
-		uint64_t q1fus:34;
-		uint64_t reserved_34_63:30;
-#endif
-	} s;
-	struct cvmx_l2d_fus1_s cn30xx;
-	struct cvmx_l2d_fus1_s cn31xx;
-	struct cvmx_l2d_fus1_s cn38xx;
-	struct cvmx_l2d_fus1_s cn38xxp2;
-	struct cvmx_l2d_fus1_s cn50xx;
-	struct cvmx_l2d_fus1_s cn52xx;
-	struct cvmx_l2d_fus1_s cn52xxp1;
-	struct cvmx_l2d_fus1_s cn56xx;
-	struct cvmx_l2d_fus1_s cn56xxp1;
-	struct cvmx_l2d_fus1_s cn58xx;
-	struct cvmx_l2d_fus1_s cn58xxp1;
-};
-
-union cvmx_l2d_fus2 {
-	uint64_t u64;
-	struct cvmx_l2d_fus2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_34_63:30;
-		uint64_t q2fus:34;
-#else
-		uint64_t q2fus:34;
-		uint64_t reserved_34_63:30;
-#endif
-	} s;
-	struct cvmx_l2d_fus2_s cn30xx;
-	struct cvmx_l2d_fus2_s cn31xx;
-	struct cvmx_l2d_fus2_s cn38xx;
-	struct cvmx_l2d_fus2_s cn38xxp2;
-	struct cvmx_l2d_fus2_s cn50xx;
-	struct cvmx_l2d_fus2_s cn52xx;
-	struct cvmx_l2d_fus2_s cn52xxp1;
-	struct cvmx_l2d_fus2_s cn56xx;
-	struct cvmx_l2d_fus2_s cn56xxp1;
-	struct cvmx_l2d_fus2_s cn58xx;
-	struct cvmx_l2d_fus2_s cn58xxp1;
-};
-
-union cvmx_l2d_fus3 {
-	uint64_t u64;
-	struct cvmx_l2d_fus3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_40_63:24;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_34_36:3;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t reserved_34_36:3;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_40_63:24;
-#endif
-	} s;
-	struct cvmx_l2d_fus3_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_35_63:29;
-		uint64_t crip_64k:1;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t crip_64k:1;
-		uint64_t reserved_35_63:29;
-#endif
-	} cn30xx;
-	struct cvmx_l2d_fus3_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_35_63:29;
-		uint64_t crip_128k:1;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t crip_128k:1;
-		uint64_t reserved_35_63:29;
-#endif
-	} cn31xx;
-	struct cvmx_l2d_fus3_cn38xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_36_63:28;
-		uint64_t crip_256k:1;
-		uint64_t crip_512k:1;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t crip_512k:1;
-		uint64_t crip_256k:1;
-		uint64_t reserved_36_63:28;
-#endif
-	} cn38xx;
-	struct cvmx_l2d_fus3_cn38xx cn38xxp2;
-	struct cvmx_l2d_fus3_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_40_63:24;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_36_36:1;
-		uint64_t crip_32k:1;
-		uint64_t crip_64k:1;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t crip_64k:1;
-		uint64_t crip_32k:1;
-		uint64_t reserved_36_36:1;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_40_63:24;
-#endif
-	} cn50xx;
-	struct cvmx_l2d_fus3_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_40_63:24;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_36_36:1;
-		uint64_t crip_128k:1;
-		uint64_t crip_256k:1;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t crip_256k:1;
-		uint64_t crip_128k:1;
-		uint64_t reserved_36_36:1;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_40_63:24;
-#endif
-	} cn52xx;
-	struct cvmx_l2d_fus3_cn52xx cn52xxp1;
-	struct cvmx_l2d_fus3_cn56xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_40_63:24;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_36_36:1;
-		uint64_t crip_512k:1;
-		uint64_t crip_1024k:1;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t crip_1024k:1;
-		uint64_t crip_512k:1;
-		uint64_t reserved_36_36:1;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_40_63:24;
-#endif
-	} cn56xx;
-	struct cvmx_l2d_fus3_cn56xx cn56xxp1;
-	struct cvmx_l2d_fus3_cn58xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_39_63:25;
-		uint64_t ema_ctl:2;
-		uint64_t reserved_36_36:1;
-		uint64_t crip_512k:1;
-		uint64_t crip_1024k:1;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t crip_1024k:1;
-		uint64_t crip_512k:1;
-		uint64_t reserved_36_36:1;
-		uint64_t ema_ctl:2;
-		uint64_t reserved_39_63:25;
-#endif
-	} cn58xx;
-	struct cvmx_l2d_fus3_cn58xx cn58xxp1;
-};
-
-#endif
diff --git a/arch/mips/include/asm/octeon/cvmx-l2t-defs.h b/arch/mips/include/asm/octeon/cvmx-l2t-defs.h
index 83ce22c080e6..fe50671fd1bb 100644
--- a/arch/mips/include/asm/octeon/cvmx-l2t-defs.h
+++ b/arch/mips/include/asm/octeon/cvmx-l2t-defs.h
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2012 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -28,210 +28,116 @@
 #ifndef __CVMX_L2T_DEFS_H__
 #define __CVMX_L2T_DEFS_H__
 
-#define CVMX_L2T_ERR (CVMX_ADD_IO_SEG(0x0001180080000008ull))
+#include <uapi/asm/bitfield.h>
+
+#define CVMX_L2T_ERR	(CVMX_ADD_IO_SEG(0x0001180080000008ull))
+
 
 union cvmx_l2t_err {
 	uint64_t u64;
 	struct cvmx_l2t_err_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_29_63:35;
-		uint64_t fadru:1;
-		uint64_t lck_intena2:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr:1;
-		uint64_t fset:3;
-		uint64_t fadr:10;
-		uint64_t fsyn:6;
-		uint64_t ded_err:1;
-		uint64_t sec_err:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_intena:1;
-		uint64_t ecc_ena:1;
-#else
-		uint64_t ecc_ena:1;
-		uint64_t sec_intena:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_err:1;
-		uint64_t ded_err:1;
-		uint64_t fsyn:6;
-		uint64_t fadr:10;
-		uint64_t fset:3;
-		uint64_t lckerr:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena2:1;
-		uint64_t fadru:1;
-		uint64_t reserved_29_63:35;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_29_63:35,
+		__BITFIELD_FIELD(uint64_t fadru:1,
+		__BITFIELD_FIELD(uint64_t lck_intena2:1,
+		__BITFIELD_FIELD(uint64_t lckerr2:1,
+		__BITFIELD_FIELD(uint64_t lck_intena:1,
+		__BITFIELD_FIELD(uint64_t lckerr:1,
+		__BITFIELD_FIELD(uint64_t fset:3,
+		__BITFIELD_FIELD(uint64_t fadr:10,
+		__BITFIELD_FIELD(uint64_t fsyn:6,
+		__BITFIELD_FIELD(uint64_t ded_err:1,
+		__BITFIELD_FIELD(uint64_t sec_err:1,
+		__BITFIELD_FIELD(uint64_t ded_intena:1,
+		__BITFIELD_FIELD(uint64_t sec_intena:1,
+		__BITFIELD_FIELD(uint64_t ecc_ena:1,
+		;))))))))))))))
 	} s;
 	struct cvmx_l2t_err_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_28_63:36;
-		uint64_t lck_intena2:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr:1;
-		uint64_t reserved_23_23:1;
-		uint64_t fset:2;
-		uint64_t reserved_19_20:2;
-		uint64_t fadr:8;
-		uint64_t fsyn:6;
-		uint64_t ded_err:1;
-		uint64_t sec_err:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_intena:1;
-		uint64_t ecc_ena:1;
-#else
-		uint64_t ecc_ena:1;
-		uint64_t sec_intena:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_err:1;
-		uint64_t ded_err:1;
-		uint64_t fsyn:6;
-		uint64_t fadr:8;
-		uint64_t reserved_19_20:2;
-		uint64_t fset:2;
-		uint64_t reserved_23_23:1;
-		uint64_t lckerr:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena2:1;
-		uint64_t reserved_28_63:36;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_28_63:36,
+		__BITFIELD_FIELD(uint64_t lck_intena2:1,
+		__BITFIELD_FIELD(uint64_t lckerr2:1,
+		__BITFIELD_FIELD(uint64_t lck_intena:1,
+		__BITFIELD_FIELD(uint64_t lckerr:1,
+		__BITFIELD_FIELD(uint64_t reserved_23_23:1,
+		__BITFIELD_FIELD(uint64_t fset:2,
+		__BITFIELD_FIELD(uint64_t reserved_19_20:2,
+		__BITFIELD_FIELD(uint64_t fadr:8,
+		__BITFIELD_FIELD(uint64_t fsyn:6,
+		__BITFIELD_FIELD(uint64_t ded_err:1,
+		__BITFIELD_FIELD(uint64_t sec_err:1,
+		__BITFIELD_FIELD(uint64_t ded_intena:1,
+		__BITFIELD_FIELD(uint64_t sec_intena:1,
+		__BITFIELD_FIELD(uint64_t ecc_ena:1,
+		;)))))))))))))))
 	} cn30xx;
 	struct cvmx_l2t_err_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_28_63:36;
-		uint64_t lck_intena2:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr:1;
-		uint64_t reserved_23_23:1;
-		uint64_t fset:2;
-		uint64_t reserved_20_20:1;
-		uint64_t fadr:9;
-		uint64_t fsyn:6;
-		uint64_t ded_err:1;
-		uint64_t sec_err:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_intena:1;
-		uint64_t ecc_ena:1;
-#else
-		uint64_t ecc_ena:1;
-		uint64_t sec_intena:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_err:1;
-		uint64_t ded_err:1;
-		uint64_t fsyn:6;
-		uint64_t fadr:9;
-		uint64_t reserved_20_20:1;
-		uint64_t fset:2;
-		uint64_t reserved_23_23:1;
-		uint64_t lckerr:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena2:1;
-		uint64_t reserved_28_63:36;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_28_63:36,
+		__BITFIELD_FIELD(uint64_t lck_intena2:1,
+		__BITFIELD_FIELD(uint64_t lckerr2:1,
+		__BITFIELD_FIELD(uint64_t lck_intena:1,
+		__BITFIELD_FIELD(uint64_t lckerr:1,
+		__BITFIELD_FIELD(uint64_t reserved_23_23:1,
+		__BITFIELD_FIELD(uint64_t fset:2,
+		__BITFIELD_FIELD(uint64_t reserved_20_20:1,
+		__BITFIELD_FIELD(uint64_t fadr:9,
+		__BITFIELD_FIELD(uint64_t fsyn:6,
+		__BITFIELD_FIELD(uint64_t ded_err:1,
+		__BITFIELD_FIELD(uint64_t sec_err:1,
+		__BITFIELD_FIELD(uint64_t ded_intena:1,
+		__BITFIELD_FIELD(uint64_t sec_intena:1,
+		__BITFIELD_FIELD(uint64_t ecc_ena:1,
+		;)))))))))))))))
 	} cn31xx;
 	struct cvmx_l2t_err_cn38xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_28_63:36;
-		uint64_t lck_intena2:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr:1;
-		uint64_t fset:3;
-		uint64_t fadr:10;
-		uint64_t fsyn:6;
-		uint64_t ded_err:1;
-		uint64_t sec_err:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_intena:1;
-		uint64_t ecc_ena:1;
-#else
-		uint64_t ecc_ena:1;
-		uint64_t sec_intena:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_err:1;
-		uint64_t ded_err:1;
-		uint64_t fsyn:6;
-		uint64_t fadr:10;
-		uint64_t fset:3;
-		uint64_t lckerr:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena2:1;
-		uint64_t reserved_28_63:36;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_28_63:36,
+		__BITFIELD_FIELD(uint64_t lck_intena2:1,
+		__BITFIELD_FIELD(uint64_t lckerr2:1,
+		__BITFIELD_FIELD(uint64_t lck_intena:1,
+		__BITFIELD_FIELD(uint64_t lckerr:1,
+		__BITFIELD_FIELD(uint64_t fset:3,
+		__BITFIELD_FIELD(uint64_t fadr:10,
+		__BITFIELD_FIELD(uint64_t fsyn:6,
+		__BITFIELD_FIELD(uint64_t ded_err:1,
+		__BITFIELD_FIELD(uint64_t sec_err:1,
+		__BITFIELD_FIELD(uint64_t ded_intena:1,
+		__BITFIELD_FIELD(uint64_t sec_intena:1,
+		__BITFIELD_FIELD(uint64_t ecc_ena:1,
+		;)))))))))))))
 	} cn38xx;
 	struct cvmx_l2t_err_cn38xx cn38xxp2;
 	struct cvmx_l2t_err_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_28_63:36;
-		uint64_t lck_intena2:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr:1;
-		uint64_t fset:3;
-		uint64_t reserved_18_20:3;
-		uint64_t fadr:7;
-		uint64_t fsyn:6;
-		uint64_t ded_err:1;
-		uint64_t sec_err:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_intena:1;
-		uint64_t ecc_ena:1;
-#else
-		uint64_t ecc_ena:1;
-		uint64_t sec_intena:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_err:1;
-		uint64_t ded_err:1;
-		uint64_t fsyn:6;
-		uint64_t fadr:7;
-		uint64_t reserved_18_20:3;
-		uint64_t fset:3;
-		uint64_t lckerr:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena2:1;
-		uint64_t reserved_28_63:36;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_28_63:36,
+		__BITFIELD_FIELD(uint64_t lck_intena2:1,
+		__BITFIELD_FIELD(uint64_t lckerr2:1,
+		__BITFIELD_FIELD(uint64_t lck_intena:1,
+		__BITFIELD_FIELD(uint64_t lckerr:1,
+		__BITFIELD_FIELD(uint64_t fset:3,
+		__BITFIELD_FIELD(uint64_t reserved_18_20:3,
+		__BITFIELD_FIELD(uint64_t fadr:7,
+		__BITFIELD_FIELD(uint64_t fsyn:6,
+		__BITFIELD_FIELD(uint64_t ded_err:1,
+		__BITFIELD_FIELD(uint64_t sec_err:1,
+		__BITFIELD_FIELD(uint64_t ded_intena:1,
+		__BITFIELD_FIELD(uint64_t sec_intena:1,
+		__BITFIELD_FIELD(uint64_t ecc_ena:1,
+		;))))))))))))))
 	} cn50xx;
 	struct cvmx_l2t_err_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_28_63:36;
-		uint64_t lck_intena2:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr:1;
-		uint64_t fset:3;
-		uint64_t reserved_20_20:1;
-		uint64_t fadr:9;
-		uint64_t fsyn:6;
-		uint64_t ded_err:1;
-		uint64_t sec_err:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_intena:1;
-		uint64_t ecc_ena:1;
-#else
-		uint64_t ecc_ena:1;
-		uint64_t sec_intena:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_err:1;
-		uint64_t ded_err:1;
-		uint64_t fsyn:6;
-		uint64_t fadr:9;
-		uint64_t reserved_20_20:1;
-		uint64_t fset:3;
-		uint64_t lckerr:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena2:1;
-		uint64_t reserved_28_63:36;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_28_63:36,
+		__BITFIELD_FIELD(uint64_t lck_intena2:1,
+		__BITFIELD_FIELD(uint64_t lckerr2:1,
+		__BITFIELD_FIELD(uint64_t lck_intena:1,
+		__BITFIELD_FIELD(uint64_t lckerr:1,
+		__BITFIELD_FIELD(uint64_t fset:3,
+		__BITFIELD_FIELD(uint64_t reserved_20_20:1,
+		__BITFIELD_FIELD(uint64_t fadr:9,
+		__BITFIELD_FIELD(uint64_t fsyn:6,
+		__BITFIELD_FIELD(uint64_t ded_err:1,
+		__BITFIELD_FIELD(uint64_t sec_err:1,
+		__BITFIELD_FIELD(uint64_t ded_intena:1,
+		__BITFIELD_FIELD(uint64_t sec_intena:1,
+		__BITFIELD_FIELD(uint64_t ecc_ena:1,
+		;))))))))))))))
 	} cn52xx;
 	struct cvmx_l2t_err_cn52xx cn52xxp1;
 	struct cvmx_l2t_err_s cn56xx;
diff --git a/arch/mips/include/asm/octeon/cvmx-pciercx-defs.h b/arch/mips/include/asm/octeon/cvmx-pciercx-defs.h
index 4bce393391e2..e2dce1acf029 100644
--- a/arch/mips/include/asm/octeon/cvmx-pciercx-defs.h
+++ b/arch/mips/include/asm/octeon/cvmx-pciercx-defs.h
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2012 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -28,3148 +28,341 @@
 #ifndef __CVMX_PCIERCX_DEFS_H__
 #define __CVMX_PCIERCX_DEFS_H__
 
-#define CVMX_PCIERCX_CFG000(block_id) (0x0000000000000000ull)
+#include <uapi/asm/bitfield.h>
+
 #define CVMX_PCIERCX_CFG001(block_id) (0x0000000000000004ull)
-#define CVMX_PCIERCX_CFG002(block_id) (0x0000000000000008ull)
-#define CVMX_PCIERCX_CFG003(block_id) (0x000000000000000Cull)
-#define CVMX_PCIERCX_CFG004(block_id) (0x0000000000000010ull)
-#define CVMX_PCIERCX_CFG005(block_id) (0x0000000000000014ull)
 #define CVMX_PCIERCX_CFG006(block_id) (0x0000000000000018ull)
-#define CVMX_PCIERCX_CFG007(block_id) (0x000000000000001Cull)
 #define CVMX_PCIERCX_CFG008(block_id) (0x0000000000000020ull)
 #define CVMX_PCIERCX_CFG009(block_id) (0x0000000000000024ull)
 #define CVMX_PCIERCX_CFG010(block_id) (0x0000000000000028ull)
 #define CVMX_PCIERCX_CFG011(block_id) (0x000000000000002Cull)
-#define CVMX_PCIERCX_CFG012(block_id) (0x0000000000000030ull)
-#define CVMX_PCIERCX_CFG013(block_id) (0x0000000000000034ull)
-#define CVMX_PCIERCX_CFG014(block_id) (0x0000000000000038ull)
-#define CVMX_PCIERCX_CFG015(block_id) (0x000000000000003Cull)
-#define CVMX_PCIERCX_CFG016(block_id) (0x0000000000000040ull)
-#define CVMX_PCIERCX_CFG017(block_id) (0x0000000000000044ull)
-#define CVMX_PCIERCX_CFG020(block_id) (0x0000000000000050ull)
-#define CVMX_PCIERCX_CFG021(block_id) (0x0000000000000054ull)
-#define CVMX_PCIERCX_CFG022(block_id) (0x0000000000000058ull)
-#define CVMX_PCIERCX_CFG023(block_id) (0x000000000000005Cull)
-#define CVMX_PCIERCX_CFG028(block_id) (0x0000000000000070ull)
-#define CVMX_PCIERCX_CFG029(block_id) (0x0000000000000074ull)
 #define CVMX_PCIERCX_CFG030(block_id) (0x0000000000000078ull)
 #define CVMX_PCIERCX_CFG031(block_id) (0x000000000000007Cull)
 #define CVMX_PCIERCX_CFG032(block_id) (0x0000000000000080ull)
-#define CVMX_PCIERCX_CFG033(block_id) (0x0000000000000084ull)
 #define CVMX_PCIERCX_CFG034(block_id) (0x0000000000000088ull)
 #define CVMX_PCIERCX_CFG035(block_id) (0x000000000000008Cull)
-#define CVMX_PCIERCX_CFG036(block_id) (0x0000000000000090ull)
-#define CVMX_PCIERCX_CFG037(block_id) (0x0000000000000094ull)
-#define CVMX_PCIERCX_CFG038(block_id) (0x0000000000000098ull)
-#define CVMX_PCIERCX_CFG039(block_id) (0x000000000000009Cull)
 #define CVMX_PCIERCX_CFG040(block_id) (0x00000000000000A0ull)
-#define CVMX_PCIERCX_CFG041(block_id) (0x00000000000000A4ull)
-#define CVMX_PCIERCX_CFG042(block_id) (0x00000000000000A8ull)
-#define CVMX_PCIERCX_CFG064(block_id) (0x0000000000000100ull)
-#define CVMX_PCIERCX_CFG065(block_id) (0x0000000000000104ull)
 #define CVMX_PCIERCX_CFG066(block_id) (0x0000000000000108ull)
-#define CVMX_PCIERCX_CFG067(block_id) (0x000000000000010Cull)
-#define CVMX_PCIERCX_CFG068(block_id) (0x0000000000000110ull)
 #define CVMX_PCIERCX_CFG069(block_id) (0x0000000000000114ull)
 #define CVMX_PCIERCX_CFG070(block_id) (0x0000000000000118ull)
-#define CVMX_PCIERCX_CFG071(block_id) (0x000000000000011Cull)
-#define CVMX_PCIERCX_CFG072(block_id) (0x0000000000000120ull)
-#define CVMX_PCIERCX_CFG073(block_id) (0x0000000000000124ull)
-#define CVMX_PCIERCX_CFG074(block_id) (0x0000000000000128ull)
 #define CVMX_PCIERCX_CFG075(block_id) (0x000000000000012Cull)
-#define CVMX_PCIERCX_CFG076(block_id) (0x0000000000000130ull)
-#define CVMX_PCIERCX_CFG077(block_id) (0x0000000000000134ull)
 #define CVMX_PCIERCX_CFG448(block_id) (0x0000000000000700ull)
-#define CVMX_PCIERCX_CFG449(block_id) (0x0000000000000704ull)
-#define CVMX_PCIERCX_CFG450(block_id) (0x0000000000000708ull)
-#define CVMX_PCIERCX_CFG451(block_id) (0x000000000000070Cull)
 #define CVMX_PCIERCX_CFG452(block_id) (0x0000000000000710ull)
-#define CVMX_PCIERCX_CFG453(block_id) (0x0000000000000714ull)
-#define CVMX_PCIERCX_CFG454(block_id) (0x0000000000000718ull)
 #define CVMX_PCIERCX_CFG455(block_id) (0x000000000000071Cull)
-#define CVMX_PCIERCX_CFG456(block_id) (0x0000000000000720ull)
-#define CVMX_PCIERCX_CFG458(block_id) (0x0000000000000728ull)
-#define CVMX_PCIERCX_CFG459(block_id) (0x000000000000072Cull)
-#define CVMX_PCIERCX_CFG460(block_id) (0x0000000000000730ull)
-#define CVMX_PCIERCX_CFG461(block_id) (0x0000000000000734ull)
-#define CVMX_PCIERCX_CFG462(block_id) (0x0000000000000738ull)
-#define CVMX_PCIERCX_CFG463(block_id) (0x000000000000073Cull)
-#define CVMX_PCIERCX_CFG464(block_id) (0x0000000000000740ull)
-#define CVMX_PCIERCX_CFG465(block_id) (0x0000000000000744ull)
-#define CVMX_PCIERCX_CFG466(block_id) (0x0000000000000748ull)
-#define CVMX_PCIERCX_CFG467(block_id) (0x000000000000074Cull)
-#define CVMX_PCIERCX_CFG468(block_id) (0x0000000000000750ull)
-#define CVMX_PCIERCX_CFG490(block_id) (0x00000000000007A8ull)
-#define CVMX_PCIERCX_CFG491(block_id) (0x00000000000007ACull)
-#define CVMX_PCIERCX_CFG492(block_id) (0x00000000000007B0ull)
 #define CVMX_PCIERCX_CFG515(block_id) (0x000000000000080Cull)
-#define CVMX_PCIERCX_CFG516(block_id) (0x0000000000000810ull)
-#define CVMX_PCIERCX_CFG517(block_id) (0x0000000000000814ull)
-
-union cvmx_pciercx_cfg000 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg000_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t devid:16;
-		uint32_t vendid:16;
-#else
-		uint32_t vendid:16;
-		uint32_t devid:16;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg000_s cn52xx;
-	struct cvmx_pciercx_cfg000_s cn52xxp1;
-	struct cvmx_pciercx_cfg000_s cn56xx;
-	struct cvmx_pciercx_cfg000_s cn56xxp1;
-	struct cvmx_pciercx_cfg000_s cn61xx;
-	struct cvmx_pciercx_cfg000_s cn63xx;
-	struct cvmx_pciercx_cfg000_s cn63xxp1;
-	struct cvmx_pciercx_cfg000_s cn66xx;
-	struct cvmx_pciercx_cfg000_s cn68xx;
-	struct cvmx_pciercx_cfg000_s cn68xxp1;
-	struct cvmx_pciercx_cfg000_s cnf71xx;
-};
 
 union cvmx_pciercx_cfg001 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg001_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dpe:1;
-		uint32_t sse:1;
-		uint32_t rma:1;
-		uint32_t rta:1;
-		uint32_t sta:1;
-		uint32_t devt:2;
-		uint32_t mdpe:1;
-		uint32_t fbb:1;
-		uint32_t reserved_22_22:1;
-		uint32_t m66:1;
-		uint32_t cl:1;
-		uint32_t i_stat:1;
-		uint32_t reserved_11_18:8;
-		uint32_t i_dis:1;
-		uint32_t fbbe:1;
-		uint32_t see:1;
-		uint32_t ids_wcc:1;
-		uint32_t per:1;
-		uint32_t vps:1;
-		uint32_t mwice:1;
-		uint32_t scse:1;
-		uint32_t me:1;
-		uint32_t msae:1;
-		uint32_t isae:1;
-#else
-		uint32_t isae:1;
-		uint32_t msae:1;
-		uint32_t me:1;
-		uint32_t scse:1;
-		uint32_t mwice:1;
-		uint32_t vps:1;
-		uint32_t per:1;
-		uint32_t ids_wcc:1;
-		uint32_t see:1;
-		uint32_t fbbe:1;
-		uint32_t i_dis:1;
-		uint32_t reserved_11_18:8;
-		uint32_t i_stat:1;
-		uint32_t cl:1;
-		uint32_t m66:1;
-		uint32_t reserved_22_22:1;
-		uint32_t fbb:1;
-		uint32_t mdpe:1;
-		uint32_t devt:2;
-		uint32_t sta:1;
-		uint32_t rta:1;
-		uint32_t rma:1;
-		uint32_t sse:1;
-		uint32_t dpe:1;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg001_s cn52xx;
-	struct cvmx_pciercx_cfg001_s cn52xxp1;
-	struct cvmx_pciercx_cfg001_s cn56xx;
-	struct cvmx_pciercx_cfg001_s cn56xxp1;
-	struct cvmx_pciercx_cfg001_s cn61xx;
-	struct cvmx_pciercx_cfg001_s cn63xx;
-	struct cvmx_pciercx_cfg001_s cn63xxp1;
-	struct cvmx_pciercx_cfg001_s cn66xx;
-	struct cvmx_pciercx_cfg001_s cn68xx;
-	struct cvmx_pciercx_cfg001_s cn68xxp1;
-	struct cvmx_pciercx_cfg001_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg002 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg002_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t bcc:8;
-		uint32_t sc:8;
-		uint32_t pi:8;
-		uint32_t rid:8;
-#else
-		uint32_t rid:8;
-		uint32_t pi:8;
-		uint32_t sc:8;
-		uint32_t bcc:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg002_s cn52xx;
-	struct cvmx_pciercx_cfg002_s cn52xxp1;
-	struct cvmx_pciercx_cfg002_s cn56xx;
-	struct cvmx_pciercx_cfg002_s cn56xxp1;
-	struct cvmx_pciercx_cfg002_s cn61xx;
-	struct cvmx_pciercx_cfg002_s cn63xx;
-	struct cvmx_pciercx_cfg002_s cn63xxp1;
-	struct cvmx_pciercx_cfg002_s cn66xx;
-	struct cvmx_pciercx_cfg002_s cn68xx;
-	struct cvmx_pciercx_cfg002_s cn68xxp1;
-	struct cvmx_pciercx_cfg002_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg003 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg003_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t bist:8;
-		uint32_t mfd:1;
-		uint32_t chf:7;
-		uint32_t lt:8;
-		uint32_t cls:8;
-#else
-		uint32_t cls:8;
-		uint32_t lt:8;
-		uint32_t chf:7;
-		uint32_t mfd:1;
-		uint32_t bist:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg003_s cn52xx;
-	struct cvmx_pciercx_cfg003_s cn52xxp1;
-	struct cvmx_pciercx_cfg003_s cn56xx;
-	struct cvmx_pciercx_cfg003_s cn56xxp1;
-	struct cvmx_pciercx_cfg003_s cn61xx;
-	struct cvmx_pciercx_cfg003_s cn63xx;
-	struct cvmx_pciercx_cfg003_s cn63xxp1;
-	struct cvmx_pciercx_cfg003_s cn66xx;
-	struct cvmx_pciercx_cfg003_s cn68xx;
-	struct cvmx_pciercx_cfg003_s cn68xxp1;
-	struct cvmx_pciercx_cfg003_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg004 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg004_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_0_31:32;
-#else
-		uint32_t reserved_0_31:32;
-#endif
+		__BITFIELD_FIELD(uint32_t dpe:1,
+		__BITFIELD_FIELD(uint32_t sse:1,
+		__BITFIELD_FIELD(uint32_t rma:1,
+		__BITFIELD_FIELD(uint32_t rta:1,
+		__BITFIELD_FIELD(uint32_t sta:1,
+		__BITFIELD_FIELD(uint32_t devt:2,
+		__BITFIELD_FIELD(uint32_t mdpe:1,
+		__BITFIELD_FIELD(uint32_t fbb:1,
+		__BITFIELD_FIELD(uint32_t reserved_22_22:1,
+		__BITFIELD_FIELD(uint32_t m66:1,
+		__BITFIELD_FIELD(uint32_t cl:1,
+		__BITFIELD_FIELD(uint32_t i_stat:1,
+		__BITFIELD_FIELD(uint32_t reserved_11_18:8,
+		__BITFIELD_FIELD(uint32_t i_dis:1,
+		__BITFIELD_FIELD(uint32_t fbbe:1,
+		__BITFIELD_FIELD(uint32_t see:1,
+		__BITFIELD_FIELD(uint32_t ids_wcc:1,
+		__BITFIELD_FIELD(uint32_t per:1,
+		__BITFIELD_FIELD(uint32_t vps:1,
+		__BITFIELD_FIELD(uint32_t mwice:1,
+		__BITFIELD_FIELD(uint32_t scse:1,
+		__BITFIELD_FIELD(uint32_t me:1,
+		__BITFIELD_FIELD(uint32_t msae:1,
+		__BITFIELD_FIELD(uint32_t isae:1,
+		;))))))))))))))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg004_s cn52xx;
-	struct cvmx_pciercx_cfg004_s cn52xxp1;
-	struct cvmx_pciercx_cfg004_s cn56xx;
-	struct cvmx_pciercx_cfg004_s cn56xxp1;
-	struct cvmx_pciercx_cfg004_s cn61xx;
-	struct cvmx_pciercx_cfg004_s cn63xx;
-	struct cvmx_pciercx_cfg004_s cn63xxp1;
-	struct cvmx_pciercx_cfg004_s cn66xx;
-	struct cvmx_pciercx_cfg004_s cn68xx;
-	struct cvmx_pciercx_cfg004_s cn68xxp1;
-	struct cvmx_pciercx_cfg004_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg005 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg005_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_0_31:32;
-#else
-		uint32_t reserved_0_31:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg005_s cn52xx;
-	struct cvmx_pciercx_cfg005_s cn52xxp1;
-	struct cvmx_pciercx_cfg005_s cn56xx;
-	struct cvmx_pciercx_cfg005_s cn56xxp1;
-	struct cvmx_pciercx_cfg005_s cn61xx;
-	struct cvmx_pciercx_cfg005_s cn63xx;
-	struct cvmx_pciercx_cfg005_s cn63xxp1;
-	struct cvmx_pciercx_cfg005_s cn66xx;
-	struct cvmx_pciercx_cfg005_s cn68xx;
-	struct cvmx_pciercx_cfg005_s cn68xxp1;
-	struct cvmx_pciercx_cfg005_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg006 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg006_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t slt:8;
-		uint32_t subbnum:8;
-		uint32_t sbnum:8;
-		uint32_t pbnum:8;
-#else
-		uint32_t pbnum:8;
-		uint32_t sbnum:8;
-		uint32_t subbnum:8;
-		uint32_t slt:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg006_s cn52xx;
-	struct cvmx_pciercx_cfg006_s cn52xxp1;
-	struct cvmx_pciercx_cfg006_s cn56xx;
-	struct cvmx_pciercx_cfg006_s cn56xxp1;
-	struct cvmx_pciercx_cfg006_s cn61xx;
-	struct cvmx_pciercx_cfg006_s cn63xx;
-	struct cvmx_pciercx_cfg006_s cn63xxp1;
-	struct cvmx_pciercx_cfg006_s cn66xx;
-	struct cvmx_pciercx_cfg006_s cn68xx;
-	struct cvmx_pciercx_cfg006_s cn68xxp1;
-	struct cvmx_pciercx_cfg006_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg007 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg007_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dpe:1;
-		uint32_t sse:1;
-		uint32_t rma:1;
-		uint32_t rta:1;
-		uint32_t sta:1;
-		uint32_t devt:2;
-		uint32_t mdpe:1;
-		uint32_t fbb:1;
-		uint32_t reserved_22_22:1;
-		uint32_t m66:1;
-		uint32_t reserved_16_20:5;
-		uint32_t lio_limi:4;
-		uint32_t reserved_9_11:3;
-		uint32_t io32b:1;
-		uint32_t lio_base:4;
-		uint32_t reserved_1_3:3;
-		uint32_t io32a:1;
-#else
-		uint32_t io32a:1;
-		uint32_t reserved_1_3:3;
-		uint32_t lio_base:4;
-		uint32_t io32b:1;
-		uint32_t reserved_9_11:3;
-		uint32_t lio_limi:4;
-		uint32_t reserved_16_20:5;
-		uint32_t m66:1;
-		uint32_t reserved_22_22:1;
-		uint32_t fbb:1;
-		uint32_t mdpe:1;
-		uint32_t devt:2;
-		uint32_t sta:1;
-		uint32_t rta:1;
-		uint32_t rma:1;
-		uint32_t sse:1;
-		uint32_t dpe:1;
-#endif
+		__BITFIELD_FIELD(uint32_t slt:8,
+		__BITFIELD_FIELD(uint32_t subbnum:8,
+		__BITFIELD_FIELD(uint32_t sbnum:8,
+		__BITFIELD_FIELD(uint32_t pbnum:8,
+		;))))
 	} s;
-	struct cvmx_pciercx_cfg007_s cn52xx;
-	struct cvmx_pciercx_cfg007_s cn52xxp1;
-	struct cvmx_pciercx_cfg007_s cn56xx;
-	struct cvmx_pciercx_cfg007_s cn56xxp1;
-	struct cvmx_pciercx_cfg007_s cn61xx;
-	struct cvmx_pciercx_cfg007_s cn63xx;
-	struct cvmx_pciercx_cfg007_s cn63xxp1;
-	struct cvmx_pciercx_cfg007_s cn66xx;
-	struct cvmx_pciercx_cfg007_s cn68xx;
-	struct cvmx_pciercx_cfg007_s cn68xxp1;
-	struct cvmx_pciercx_cfg007_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg008 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg008_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t ml_addr:12;
-		uint32_t reserved_16_19:4;
-		uint32_t mb_addr:12;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t mb_addr:12;
-		uint32_t reserved_16_19:4;
-		uint32_t ml_addr:12;
-#endif
+		__BITFIELD_FIELD(uint32_t ml_addr:12,
+		__BITFIELD_FIELD(uint32_t reserved_16_19:4,
+		__BITFIELD_FIELD(uint32_t mb_addr:12,
+		__BITFIELD_FIELD(uint32_t reserved_0_3:4,
+		;))))
 	} s;
-	struct cvmx_pciercx_cfg008_s cn52xx;
-	struct cvmx_pciercx_cfg008_s cn52xxp1;
-	struct cvmx_pciercx_cfg008_s cn56xx;
-	struct cvmx_pciercx_cfg008_s cn56xxp1;
-	struct cvmx_pciercx_cfg008_s cn61xx;
-	struct cvmx_pciercx_cfg008_s cn63xx;
-	struct cvmx_pciercx_cfg008_s cn63xxp1;
-	struct cvmx_pciercx_cfg008_s cn66xx;
-	struct cvmx_pciercx_cfg008_s cn68xx;
-	struct cvmx_pciercx_cfg008_s cn68xxp1;
-	struct cvmx_pciercx_cfg008_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg009 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg009_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t lmem_limit:12;
-		uint32_t reserved_17_19:3;
-		uint32_t mem64b:1;
-		uint32_t lmem_base:12;
-		uint32_t reserved_1_3:3;
-		uint32_t mem64a:1;
-#else
-		uint32_t mem64a:1;
-		uint32_t reserved_1_3:3;
-		uint32_t lmem_base:12;
-		uint32_t mem64b:1;
-		uint32_t reserved_17_19:3;
-		uint32_t lmem_limit:12;
-#endif
+		__BITFIELD_FIELD(uint32_t lmem_limit:12,
+		__BITFIELD_FIELD(uint32_t reserved_17_19:3,
+		__BITFIELD_FIELD(uint32_t mem64b:1,
+		__BITFIELD_FIELD(uint32_t lmem_base:12,
+		__BITFIELD_FIELD(uint32_t reserved_1_3:3,
+		__BITFIELD_FIELD(uint32_t mem64a:1,
+		;))))))
 	} s;
-	struct cvmx_pciercx_cfg009_s cn52xx;
-	struct cvmx_pciercx_cfg009_s cn52xxp1;
-	struct cvmx_pciercx_cfg009_s cn56xx;
-	struct cvmx_pciercx_cfg009_s cn56xxp1;
-	struct cvmx_pciercx_cfg009_s cn61xx;
-	struct cvmx_pciercx_cfg009_s cn63xx;
-	struct cvmx_pciercx_cfg009_s cn63xxp1;
-	struct cvmx_pciercx_cfg009_s cn66xx;
-	struct cvmx_pciercx_cfg009_s cn68xx;
-	struct cvmx_pciercx_cfg009_s cn68xxp1;
-	struct cvmx_pciercx_cfg009_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg010 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg010_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t umem_base:32;
-#else
-		uint32_t umem_base:32;
-#endif
+		uint32_t umem_base;
 	} s;
-	struct cvmx_pciercx_cfg010_s cn52xx;
-	struct cvmx_pciercx_cfg010_s cn52xxp1;
-	struct cvmx_pciercx_cfg010_s cn56xx;
-	struct cvmx_pciercx_cfg010_s cn56xxp1;
-	struct cvmx_pciercx_cfg010_s cn61xx;
-	struct cvmx_pciercx_cfg010_s cn63xx;
-	struct cvmx_pciercx_cfg010_s cn63xxp1;
-	struct cvmx_pciercx_cfg010_s cn66xx;
-	struct cvmx_pciercx_cfg010_s cn68xx;
-	struct cvmx_pciercx_cfg010_s cn68xxp1;
-	struct cvmx_pciercx_cfg010_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg011 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg011_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t umem_limit:32;
-#else
-		uint32_t umem_limit:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg011_s cn52xx;
-	struct cvmx_pciercx_cfg011_s cn52xxp1;
-	struct cvmx_pciercx_cfg011_s cn56xx;
-	struct cvmx_pciercx_cfg011_s cn56xxp1;
-	struct cvmx_pciercx_cfg011_s cn61xx;
-	struct cvmx_pciercx_cfg011_s cn63xx;
-	struct cvmx_pciercx_cfg011_s cn63xxp1;
-	struct cvmx_pciercx_cfg011_s cn66xx;
-	struct cvmx_pciercx_cfg011_s cn68xx;
-	struct cvmx_pciercx_cfg011_s cn68xxp1;
-	struct cvmx_pciercx_cfg011_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg012 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg012_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t uio_limit:16;
-		uint32_t uio_base:16;
-#else
-		uint32_t uio_base:16;
-		uint32_t uio_limit:16;
-#endif
+		uint32_t umem_limit;
 	} s;
-	struct cvmx_pciercx_cfg012_s cn52xx;
-	struct cvmx_pciercx_cfg012_s cn52xxp1;
-	struct cvmx_pciercx_cfg012_s cn56xx;
-	struct cvmx_pciercx_cfg012_s cn56xxp1;
-	struct cvmx_pciercx_cfg012_s cn61xx;
-	struct cvmx_pciercx_cfg012_s cn63xx;
-	struct cvmx_pciercx_cfg012_s cn63xxp1;
-	struct cvmx_pciercx_cfg012_s cn66xx;
-	struct cvmx_pciercx_cfg012_s cn68xx;
-	struct cvmx_pciercx_cfg012_s cn68xxp1;
-	struct cvmx_pciercx_cfg012_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg013 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg013_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_8_31:24;
-		uint32_t cp:8;
-#else
-		uint32_t cp:8;
-		uint32_t reserved_8_31:24;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg013_s cn52xx;
-	struct cvmx_pciercx_cfg013_s cn52xxp1;
-	struct cvmx_pciercx_cfg013_s cn56xx;
-	struct cvmx_pciercx_cfg013_s cn56xxp1;
-	struct cvmx_pciercx_cfg013_s cn61xx;
-	struct cvmx_pciercx_cfg013_s cn63xx;
-	struct cvmx_pciercx_cfg013_s cn63xxp1;
-	struct cvmx_pciercx_cfg013_s cn66xx;
-	struct cvmx_pciercx_cfg013_s cn68xx;
-	struct cvmx_pciercx_cfg013_s cn68xxp1;
-	struct cvmx_pciercx_cfg013_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg014 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg014_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_0_31:32;
-#else
-		uint32_t reserved_0_31:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg014_s cn52xx;
-	struct cvmx_pciercx_cfg014_s cn52xxp1;
-	struct cvmx_pciercx_cfg014_s cn56xx;
-	struct cvmx_pciercx_cfg014_s cn56xxp1;
-	struct cvmx_pciercx_cfg014_s cn61xx;
-	struct cvmx_pciercx_cfg014_s cn63xx;
-	struct cvmx_pciercx_cfg014_s cn63xxp1;
-	struct cvmx_pciercx_cfg014_s cn66xx;
-	struct cvmx_pciercx_cfg014_s cn68xx;
-	struct cvmx_pciercx_cfg014_s cn68xxp1;
-	struct cvmx_pciercx_cfg014_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg015 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg015_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_28_31:4;
-		uint32_t dtsees:1;
-		uint32_t dts:1;
-		uint32_t sdt:1;
-		uint32_t pdt:1;
-		uint32_t fbbe:1;
-		uint32_t sbrst:1;
-		uint32_t mam:1;
-		uint32_t vga16d:1;
-		uint32_t vgae:1;
-		uint32_t isae:1;
-		uint32_t see:1;
-		uint32_t pere:1;
-		uint32_t inta:8;
-		uint32_t il:8;
-#else
-		uint32_t il:8;
-		uint32_t inta:8;
-		uint32_t pere:1;
-		uint32_t see:1;
-		uint32_t isae:1;
-		uint32_t vgae:1;
-		uint32_t vga16d:1;
-		uint32_t mam:1;
-		uint32_t sbrst:1;
-		uint32_t fbbe:1;
-		uint32_t pdt:1;
-		uint32_t sdt:1;
-		uint32_t dts:1;
-		uint32_t dtsees:1;
-		uint32_t reserved_28_31:4;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg015_s cn52xx;
-	struct cvmx_pciercx_cfg015_s cn52xxp1;
-	struct cvmx_pciercx_cfg015_s cn56xx;
-	struct cvmx_pciercx_cfg015_s cn56xxp1;
-	struct cvmx_pciercx_cfg015_s cn61xx;
-	struct cvmx_pciercx_cfg015_s cn63xx;
-	struct cvmx_pciercx_cfg015_s cn63xxp1;
-	struct cvmx_pciercx_cfg015_s cn66xx;
-	struct cvmx_pciercx_cfg015_s cn68xx;
-	struct cvmx_pciercx_cfg015_s cn68xxp1;
-	struct cvmx_pciercx_cfg015_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg016 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg016_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t pmes:5;
-		uint32_t d2s:1;
-		uint32_t d1s:1;
-		uint32_t auxc:3;
-		uint32_t dsi:1;
-		uint32_t reserved_20_20:1;
-		uint32_t pme_clock:1;
-		uint32_t pmsv:3;
-		uint32_t ncp:8;
-		uint32_t pmcid:8;
-#else
-		uint32_t pmcid:8;
-		uint32_t ncp:8;
-		uint32_t pmsv:3;
-		uint32_t pme_clock:1;
-		uint32_t reserved_20_20:1;
-		uint32_t dsi:1;
-		uint32_t auxc:3;
-		uint32_t d1s:1;
-		uint32_t d2s:1;
-		uint32_t pmes:5;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg016_s cn52xx;
-	struct cvmx_pciercx_cfg016_s cn52xxp1;
-	struct cvmx_pciercx_cfg016_s cn56xx;
-	struct cvmx_pciercx_cfg016_s cn56xxp1;
-	struct cvmx_pciercx_cfg016_s cn61xx;
-	struct cvmx_pciercx_cfg016_s cn63xx;
-	struct cvmx_pciercx_cfg016_s cn63xxp1;
-	struct cvmx_pciercx_cfg016_s cn66xx;
-	struct cvmx_pciercx_cfg016_s cn68xx;
-	struct cvmx_pciercx_cfg016_s cn68xxp1;
-	struct cvmx_pciercx_cfg016_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg017 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg017_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t pmdia:8;
-		uint32_t bpccee:1;
-		uint32_t bd3h:1;
-		uint32_t reserved_16_21:6;
-		uint32_t pmess:1;
-		uint32_t pmedsia:2;
-		uint32_t pmds:4;
-		uint32_t pmeens:1;
-		uint32_t reserved_4_7:4;
-		uint32_t nsr:1;
-		uint32_t reserved_2_2:1;
-		uint32_t ps:2;
-#else
-		uint32_t ps:2;
-		uint32_t reserved_2_2:1;
-		uint32_t nsr:1;
-		uint32_t reserved_4_7:4;
-		uint32_t pmeens:1;
-		uint32_t pmds:4;
-		uint32_t pmedsia:2;
-		uint32_t pmess:1;
-		uint32_t reserved_16_21:6;
-		uint32_t bd3h:1;
-		uint32_t bpccee:1;
-		uint32_t pmdia:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg017_s cn52xx;
-	struct cvmx_pciercx_cfg017_s cn52xxp1;
-	struct cvmx_pciercx_cfg017_s cn56xx;
-	struct cvmx_pciercx_cfg017_s cn56xxp1;
-	struct cvmx_pciercx_cfg017_s cn61xx;
-	struct cvmx_pciercx_cfg017_s cn63xx;
-	struct cvmx_pciercx_cfg017_s cn63xxp1;
-	struct cvmx_pciercx_cfg017_s cn66xx;
-	struct cvmx_pciercx_cfg017_s cn68xx;
-	struct cvmx_pciercx_cfg017_s cn68xxp1;
-	struct cvmx_pciercx_cfg017_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg020 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg020_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t pvm:1;
-		uint32_t m64:1;
-		uint32_t mme:3;
-		uint32_t mmc:3;
-		uint32_t msien:1;
-		uint32_t ncp:8;
-		uint32_t msicid:8;
-#else
-		uint32_t msicid:8;
-		uint32_t ncp:8;
-		uint32_t msien:1;
-		uint32_t mmc:3;
-		uint32_t mme:3;
-		uint32_t m64:1;
-		uint32_t pvm:1;
-		uint32_t reserved_25_31:7;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg020_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_24_31:8;
-		uint32_t m64:1;
-		uint32_t mme:3;
-		uint32_t mmc:3;
-		uint32_t msien:1;
-		uint32_t ncp:8;
-		uint32_t msicid:8;
-#else
-		uint32_t msicid:8;
-		uint32_t ncp:8;
-		uint32_t msien:1;
-		uint32_t mmc:3;
-		uint32_t mme:3;
-		uint32_t m64:1;
-		uint32_t reserved_24_31:8;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg020_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg020_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg020_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg020_s cn61xx;
-	struct cvmx_pciercx_cfg020_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg020_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg020_cn52xx cn66xx;
-	struct cvmx_pciercx_cfg020_cn52xx cn68xx;
-	struct cvmx_pciercx_cfg020_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg020_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg021 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg021_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t lmsi:30;
-		uint32_t reserved_0_1:2;
-#else
-		uint32_t reserved_0_1:2;
-		uint32_t lmsi:30;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg021_s cn52xx;
-	struct cvmx_pciercx_cfg021_s cn52xxp1;
-	struct cvmx_pciercx_cfg021_s cn56xx;
-	struct cvmx_pciercx_cfg021_s cn56xxp1;
-	struct cvmx_pciercx_cfg021_s cn61xx;
-	struct cvmx_pciercx_cfg021_s cn63xx;
-	struct cvmx_pciercx_cfg021_s cn63xxp1;
-	struct cvmx_pciercx_cfg021_s cn66xx;
-	struct cvmx_pciercx_cfg021_s cn68xx;
-	struct cvmx_pciercx_cfg021_s cn68xxp1;
-	struct cvmx_pciercx_cfg021_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg022 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg022_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t umsi:32;
-#else
-		uint32_t umsi:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg022_s cn52xx;
-	struct cvmx_pciercx_cfg022_s cn52xxp1;
-	struct cvmx_pciercx_cfg022_s cn56xx;
-	struct cvmx_pciercx_cfg022_s cn56xxp1;
-	struct cvmx_pciercx_cfg022_s cn61xx;
-	struct cvmx_pciercx_cfg022_s cn63xx;
-	struct cvmx_pciercx_cfg022_s cn63xxp1;
-	struct cvmx_pciercx_cfg022_s cn66xx;
-	struct cvmx_pciercx_cfg022_s cn68xx;
-	struct cvmx_pciercx_cfg022_s cn68xxp1;
-	struct cvmx_pciercx_cfg022_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg023 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg023_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_16_31:16;
-		uint32_t msimd:16;
-#else
-		uint32_t msimd:16;
-		uint32_t reserved_16_31:16;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg023_s cn52xx;
-	struct cvmx_pciercx_cfg023_s cn52xxp1;
-	struct cvmx_pciercx_cfg023_s cn56xx;
-	struct cvmx_pciercx_cfg023_s cn56xxp1;
-	struct cvmx_pciercx_cfg023_s cn61xx;
-	struct cvmx_pciercx_cfg023_s cn63xx;
-	struct cvmx_pciercx_cfg023_s cn63xxp1;
-	struct cvmx_pciercx_cfg023_s cn66xx;
-	struct cvmx_pciercx_cfg023_s cn68xx;
-	struct cvmx_pciercx_cfg023_s cn68xxp1;
-	struct cvmx_pciercx_cfg023_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg028 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg028_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_30_31:2;
-		uint32_t imn:5;
-		uint32_t si:1;
-		uint32_t dpt:4;
-		uint32_t pciecv:4;
-		uint32_t ncp:8;
-		uint32_t pcieid:8;
-#else
-		uint32_t pcieid:8;
-		uint32_t ncp:8;
-		uint32_t pciecv:4;
-		uint32_t dpt:4;
-		uint32_t si:1;
-		uint32_t imn:5;
-		uint32_t reserved_30_31:2;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg028_s cn52xx;
-	struct cvmx_pciercx_cfg028_s cn52xxp1;
-	struct cvmx_pciercx_cfg028_s cn56xx;
-	struct cvmx_pciercx_cfg028_s cn56xxp1;
-	struct cvmx_pciercx_cfg028_s cn61xx;
-	struct cvmx_pciercx_cfg028_s cn63xx;
-	struct cvmx_pciercx_cfg028_s cn63xxp1;
-	struct cvmx_pciercx_cfg028_s cn66xx;
-	struct cvmx_pciercx_cfg028_s cn68xx;
-	struct cvmx_pciercx_cfg028_s cn68xxp1;
-	struct cvmx_pciercx_cfg028_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg029 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg029_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_28_31:4;
-		uint32_t cspls:2;
-		uint32_t csplv:8;
-		uint32_t reserved_16_17:2;
-		uint32_t rber:1;
-		uint32_t reserved_12_14:3;
-		uint32_t el1al:3;
-		uint32_t el0al:3;
-		uint32_t etfs:1;
-		uint32_t pfs:2;
-		uint32_t mpss:3;
-#else
-		uint32_t mpss:3;
-		uint32_t pfs:2;
-		uint32_t etfs:1;
-		uint32_t el0al:3;
-		uint32_t el1al:3;
-		uint32_t reserved_12_14:3;
-		uint32_t rber:1;
-		uint32_t reserved_16_17:2;
-		uint32_t csplv:8;
-		uint32_t cspls:2;
-		uint32_t reserved_28_31:4;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg029_s cn52xx;
-	struct cvmx_pciercx_cfg029_s cn52xxp1;
-	struct cvmx_pciercx_cfg029_s cn56xx;
-	struct cvmx_pciercx_cfg029_s cn56xxp1;
-	struct cvmx_pciercx_cfg029_s cn61xx;
-	struct cvmx_pciercx_cfg029_s cn63xx;
-	struct cvmx_pciercx_cfg029_s cn63xxp1;
-	struct cvmx_pciercx_cfg029_s cn66xx;
-	struct cvmx_pciercx_cfg029_s cn68xx;
-	struct cvmx_pciercx_cfg029_s cn68xxp1;
-	struct cvmx_pciercx_cfg029_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg030 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg030_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_22_31:10;
-		uint32_t tp:1;
-		uint32_t ap_d:1;
-		uint32_t ur_d:1;
-		uint32_t fe_d:1;
-		uint32_t nfe_d:1;
-		uint32_t ce_d:1;
-		uint32_t reserved_15_15:1;
-		uint32_t mrrs:3;
-		uint32_t ns_en:1;
-		uint32_t ap_en:1;
-		uint32_t pf_en:1;
-		uint32_t etf_en:1;
-		uint32_t mps:3;
-		uint32_t ro_en:1;
-		uint32_t ur_en:1;
-		uint32_t fe_en:1;
-		uint32_t nfe_en:1;
-		uint32_t ce_en:1;
-#else
-		uint32_t ce_en:1;
-		uint32_t nfe_en:1;
-		uint32_t fe_en:1;
-		uint32_t ur_en:1;
-		uint32_t ro_en:1;
-		uint32_t mps:3;
-		uint32_t etf_en:1;
-		uint32_t pf_en:1;
-		uint32_t ap_en:1;
-		uint32_t ns_en:1;
-		uint32_t mrrs:3;
-		uint32_t reserved_15_15:1;
-		uint32_t ce_d:1;
-		uint32_t nfe_d:1;
-		uint32_t fe_d:1;
-		uint32_t ur_d:1;
-		uint32_t ap_d:1;
-		uint32_t tp:1;
-		uint32_t reserved_22_31:10;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_22_31:10,
+		__BITFIELD_FIELD(uint32_t tp:1,
+		__BITFIELD_FIELD(uint32_t ap_d:1,
+		__BITFIELD_FIELD(uint32_t ur_d:1,
+		__BITFIELD_FIELD(uint32_t fe_d:1,
+		__BITFIELD_FIELD(uint32_t nfe_d:1,
+		__BITFIELD_FIELD(uint32_t ce_d:1,
+		__BITFIELD_FIELD(uint32_t reserved_15_15:1,
+		__BITFIELD_FIELD(uint32_t mrrs:3,
+		__BITFIELD_FIELD(uint32_t ns_en:1,
+		__BITFIELD_FIELD(uint32_t ap_en:1,
+		__BITFIELD_FIELD(uint32_t pf_en:1,
+		__BITFIELD_FIELD(uint32_t etf_en:1,
+		__BITFIELD_FIELD(uint32_t mps:3,
+		__BITFIELD_FIELD(uint32_t ro_en:1,
+		__BITFIELD_FIELD(uint32_t ur_en:1,
+		__BITFIELD_FIELD(uint32_t fe_en:1,
+		__BITFIELD_FIELD(uint32_t nfe_en:1,
+		__BITFIELD_FIELD(uint32_t ce_en:1,
+		;)))))))))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg030_s cn52xx;
-	struct cvmx_pciercx_cfg030_s cn52xxp1;
-	struct cvmx_pciercx_cfg030_s cn56xx;
-	struct cvmx_pciercx_cfg030_s cn56xxp1;
-	struct cvmx_pciercx_cfg030_s cn61xx;
-	struct cvmx_pciercx_cfg030_s cn63xx;
-	struct cvmx_pciercx_cfg030_s cn63xxp1;
-	struct cvmx_pciercx_cfg030_s cn66xx;
-	struct cvmx_pciercx_cfg030_s cn68xx;
-	struct cvmx_pciercx_cfg030_s cn68xxp1;
-	struct cvmx_pciercx_cfg030_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg031 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg031_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t pnum:8;
-		uint32_t reserved_23_23:1;
-		uint32_t aspm:1;
-		uint32_t lbnc:1;
-		uint32_t dllarc:1;
-		uint32_t sderc:1;
-		uint32_t cpm:1;
-		uint32_t l1el:3;
-		uint32_t l0el:3;
-		uint32_t aslpms:2;
-		uint32_t mlw:6;
-		uint32_t mls:4;
-#else
-		uint32_t mls:4;
-		uint32_t mlw:6;
-		uint32_t aslpms:2;
-		uint32_t l0el:3;
-		uint32_t l1el:3;
-		uint32_t cpm:1;
-		uint32_t sderc:1;
-		uint32_t dllarc:1;
-		uint32_t lbnc:1;
-		uint32_t aspm:1;
-		uint32_t reserved_23_23:1;
-		uint32_t pnum:8;
-#endif
+		__BITFIELD_FIELD(uint32_t pnum:8,
+		__BITFIELD_FIELD(uint32_t reserved_23_23:1,
+		__BITFIELD_FIELD(uint32_t aspm:1,
+		__BITFIELD_FIELD(uint32_t lbnc:1,
+		__BITFIELD_FIELD(uint32_t dllarc:1,
+		__BITFIELD_FIELD(uint32_t sderc:1,
+		__BITFIELD_FIELD(uint32_t cpm:1,
+		__BITFIELD_FIELD(uint32_t l1el:3,
+		__BITFIELD_FIELD(uint32_t l0el:3,
+		__BITFIELD_FIELD(uint32_t aslpms:2,
+		__BITFIELD_FIELD(uint32_t mlw:6,
+		__BITFIELD_FIELD(uint32_t mls:4,
+		;))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg031_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t pnum:8;
-		uint32_t reserved_22_23:2;
-		uint32_t lbnc:1;
-		uint32_t dllarc:1;
-		uint32_t sderc:1;
-		uint32_t cpm:1;
-		uint32_t l1el:3;
-		uint32_t l0el:3;
-		uint32_t aslpms:2;
-		uint32_t mlw:6;
-		uint32_t mls:4;
-#else
-		uint32_t mls:4;
-		uint32_t mlw:6;
-		uint32_t aslpms:2;
-		uint32_t l0el:3;
-		uint32_t l1el:3;
-		uint32_t cpm:1;
-		uint32_t sderc:1;
-		uint32_t dllarc:1;
-		uint32_t lbnc:1;
-		uint32_t reserved_22_23:2;
-		uint32_t pnum:8;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg031_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg031_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg031_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg031_s cn61xx;
-	struct cvmx_pciercx_cfg031_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg031_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg031_s cn66xx;
-	struct cvmx_pciercx_cfg031_s cn68xx;
-	struct cvmx_pciercx_cfg031_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg031_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg032 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg032_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t lab:1;
-		uint32_t lbm:1;
-		uint32_t dlla:1;
-		uint32_t scc:1;
-		uint32_t lt:1;
-		uint32_t reserved_26_26:1;
-		uint32_t nlw:6;
-		uint32_t ls:4;
-		uint32_t reserved_12_15:4;
-		uint32_t lab_int_enb:1;
-		uint32_t lbm_int_enb:1;
-		uint32_t hawd:1;
-		uint32_t ecpm:1;
-		uint32_t es:1;
-		uint32_t ccc:1;
-		uint32_t rl:1;
-		uint32_t ld:1;
-		uint32_t rcb:1;
-		uint32_t reserved_2_2:1;
-		uint32_t aslpc:2;
-#else
-		uint32_t aslpc:2;
-		uint32_t reserved_2_2:1;
-		uint32_t rcb:1;
-		uint32_t ld:1;
-		uint32_t rl:1;
-		uint32_t ccc:1;
-		uint32_t es:1;
-		uint32_t ecpm:1;
-		uint32_t hawd:1;
-		uint32_t lbm_int_enb:1;
-		uint32_t lab_int_enb:1;
-		uint32_t reserved_12_15:4;
-		uint32_t ls:4;
-		uint32_t nlw:6;
-		uint32_t reserved_26_26:1;
-		uint32_t lt:1;
-		uint32_t scc:1;
-		uint32_t dlla:1;
-		uint32_t lbm:1;
-		uint32_t lab:1;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg032_s cn52xx;
-	struct cvmx_pciercx_cfg032_s cn52xxp1;
-	struct cvmx_pciercx_cfg032_s cn56xx;
-	struct cvmx_pciercx_cfg032_s cn56xxp1;
-	struct cvmx_pciercx_cfg032_s cn61xx;
-	struct cvmx_pciercx_cfg032_s cn63xx;
-	struct cvmx_pciercx_cfg032_s cn63xxp1;
-	struct cvmx_pciercx_cfg032_s cn66xx;
-	struct cvmx_pciercx_cfg032_s cn68xx;
-	struct cvmx_pciercx_cfg032_s cn68xxp1;
-	struct cvmx_pciercx_cfg032_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg033 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg033_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t ps_num:13;
-		uint32_t nccs:1;
-		uint32_t emip:1;
-		uint32_t sp_ls:2;
-		uint32_t sp_lv:8;
-		uint32_t hp_c:1;
-		uint32_t hp_s:1;
-		uint32_t pip:1;
-		uint32_t aip:1;
-		uint32_t mrlsp:1;
-		uint32_t pcp:1;
-		uint32_t abp:1;
-#else
-		uint32_t abp:1;
-		uint32_t pcp:1;
-		uint32_t mrlsp:1;
-		uint32_t aip:1;
-		uint32_t pip:1;
-		uint32_t hp_s:1;
-		uint32_t hp_c:1;
-		uint32_t sp_lv:8;
-		uint32_t sp_ls:2;
-		uint32_t emip:1;
-		uint32_t nccs:1;
-		uint32_t ps_num:13;
-#endif
+		__BITFIELD_FIELD(uint32_t lab:1,
+		__BITFIELD_FIELD(uint32_t lbm:1,
+		__BITFIELD_FIELD(uint32_t dlla:1,
+		__BITFIELD_FIELD(uint32_t scc:1,
+		__BITFIELD_FIELD(uint32_t lt:1,
+		__BITFIELD_FIELD(uint32_t reserved_26_26:1,
+		__BITFIELD_FIELD(uint32_t nlw:6,
+		__BITFIELD_FIELD(uint32_t ls:4,
+		__BITFIELD_FIELD(uint32_t reserved_12_15:4,
+		__BITFIELD_FIELD(uint32_t lab_int_enb:1,
+		__BITFIELD_FIELD(uint32_t lbm_int_enb:1,
+		__BITFIELD_FIELD(uint32_t hawd:1,
+		__BITFIELD_FIELD(uint32_t ecpm:1,
+		__BITFIELD_FIELD(uint32_t es:1,
+		__BITFIELD_FIELD(uint32_t ccc:1,
+		__BITFIELD_FIELD(uint32_t rl:1,
+		__BITFIELD_FIELD(uint32_t ld:1,
+		__BITFIELD_FIELD(uint32_t rcb:1,
+		__BITFIELD_FIELD(uint32_t reserved_2_2:1,
+		__BITFIELD_FIELD(uint32_t aslpc:2,
+		;))))))))))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg033_s cn52xx;
-	struct cvmx_pciercx_cfg033_s cn52xxp1;
-	struct cvmx_pciercx_cfg033_s cn56xx;
-	struct cvmx_pciercx_cfg033_s cn56xxp1;
-	struct cvmx_pciercx_cfg033_s cn61xx;
-	struct cvmx_pciercx_cfg033_s cn63xx;
-	struct cvmx_pciercx_cfg033_s cn63xxp1;
-	struct cvmx_pciercx_cfg033_s cn66xx;
-	struct cvmx_pciercx_cfg033_s cn68xx;
-	struct cvmx_pciercx_cfg033_s cn68xxp1;
-	struct cvmx_pciercx_cfg033_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg034 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg034_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t dlls_c:1;
-		uint32_t emis:1;
-		uint32_t pds:1;
-		uint32_t mrlss:1;
-		uint32_t ccint_d:1;
-		uint32_t pd_c:1;
-		uint32_t mrls_c:1;
-		uint32_t pf_d:1;
-		uint32_t abp_d:1;
-		uint32_t reserved_13_15:3;
-		uint32_t dlls_en:1;
-		uint32_t emic:1;
-		uint32_t pcc:1;
-		uint32_t pic:2;
-		uint32_t aic:2;
-		uint32_t hpint_en:1;
-		uint32_t ccint_en:1;
-		uint32_t pd_en:1;
-		uint32_t mrls_en:1;
-		uint32_t pf_en:1;
-		uint32_t abp_en:1;
-#else
-		uint32_t abp_en:1;
-		uint32_t pf_en:1;
-		uint32_t mrls_en:1;
-		uint32_t pd_en:1;
-		uint32_t ccint_en:1;
-		uint32_t hpint_en:1;
-		uint32_t aic:2;
-		uint32_t pic:2;
-		uint32_t pcc:1;
-		uint32_t emic:1;
-		uint32_t dlls_en:1;
-		uint32_t reserved_13_15:3;
-		uint32_t abp_d:1;
-		uint32_t pf_d:1;
-		uint32_t mrls_c:1;
-		uint32_t pd_c:1;
-		uint32_t ccint_d:1;
-		uint32_t mrlss:1;
-		uint32_t pds:1;
-		uint32_t emis:1;
-		uint32_t dlls_c:1;
-		uint32_t reserved_25_31:7;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_25_31:7,
+		__BITFIELD_FIELD(uint32_t dlls_c:1,
+		__BITFIELD_FIELD(uint32_t emis:1,
+		__BITFIELD_FIELD(uint32_t pds:1,
+		__BITFIELD_FIELD(uint32_t mrlss:1,
+		__BITFIELD_FIELD(uint32_t ccint_d:1,
+		__BITFIELD_FIELD(uint32_t pd_c:1,
+		__BITFIELD_FIELD(uint32_t mrls_c:1,
+		__BITFIELD_FIELD(uint32_t pf_d:1,
+		__BITFIELD_FIELD(uint32_t abp_d:1,
+		__BITFIELD_FIELD(uint32_t reserved_13_15:3,
+		__BITFIELD_FIELD(uint32_t dlls_en:1,
+		__BITFIELD_FIELD(uint32_t emic:1,
+		__BITFIELD_FIELD(uint32_t pcc:1,
+		__BITFIELD_FIELD(uint32_t pic:1,
+		__BITFIELD_FIELD(uint32_t aic:1,
+		__BITFIELD_FIELD(uint32_t hpint_en:1,
+		__BITFIELD_FIELD(uint32_t ccint_en:1,
+		__BITFIELD_FIELD(uint32_t pd_en:1,
+		__BITFIELD_FIELD(uint32_t mrls_en:1,
+		__BITFIELD_FIELD(uint32_t pf_en:1,
+		__BITFIELD_FIELD(uint32_t abp_en:1,
+		;))))))))))))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg034_s cn52xx;
-	struct cvmx_pciercx_cfg034_s cn52xxp1;
-	struct cvmx_pciercx_cfg034_s cn56xx;
-	struct cvmx_pciercx_cfg034_s cn56xxp1;
-	struct cvmx_pciercx_cfg034_s cn61xx;
-	struct cvmx_pciercx_cfg034_s cn63xx;
-	struct cvmx_pciercx_cfg034_s cn63xxp1;
-	struct cvmx_pciercx_cfg034_s cn66xx;
-	struct cvmx_pciercx_cfg034_s cn68xx;
-	struct cvmx_pciercx_cfg034_s cn68xxp1;
-	struct cvmx_pciercx_cfg034_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg035 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg035_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_17_31:15;
-		uint32_t crssv:1;
-		uint32_t reserved_5_15:11;
-		uint32_t crssve:1;
-		uint32_t pmeie:1;
-		uint32_t sefee:1;
-		uint32_t senfee:1;
-		uint32_t secee:1;
-#else
-		uint32_t secee:1;
-		uint32_t senfee:1;
-		uint32_t sefee:1;
-		uint32_t pmeie:1;
-		uint32_t crssve:1;
-		uint32_t reserved_5_15:11;
-		uint32_t crssv:1;
-		uint32_t reserved_17_31:15;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg035_s cn52xx;
-	struct cvmx_pciercx_cfg035_s cn52xxp1;
-	struct cvmx_pciercx_cfg035_s cn56xx;
-	struct cvmx_pciercx_cfg035_s cn56xxp1;
-	struct cvmx_pciercx_cfg035_s cn61xx;
-	struct cvmx_pciercx_cfg035_s cn63xx;
-	struct cvmx_pciercx_cfg035_s cn63xxp1;
-	struct cvmx_pciercx_cfg035_s cn66xx;
-	struct cvmx_pciercx_cfg035_s cn68xx;
-	struct cvmx_pciercx_cfg035_s cn68xxp1;
-	struct cvmx_pciercx_cfg035_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg036 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg036_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_18_31:14;
-		uint32_t pme_pend:1;
-		uint32_t pme_stat:1;
-		uint32_t pme_rid:16;
-#else
-		uint32_t pme_rid:16;
-		uint32_t pme_stat:1;
-		uint32_t pme_pend:1;
-		uint32_t reserved_18_31:14;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg036_s cn52xx;
-	struct cvmx_pciercx_cfg036_s cn52xxp1;
-	struct cvmx_pciercx_cfg036_s cn56xx;
-	struct cvmx_pciercx_cfg036_s cn56xxp1;
-	struct cvmx_pciercx_cfg036_s cn61xx;
-	struct cvmx_pciercx_cfg036_s cn63xx;
-	struct cvmx_pciercx_cfg036_s cn63xxp1;
-	struct cvmx_pciercx_cfg036_s cn66xx;
-	struct cvmx_pciercx_cfg036_s cn68xx;
-	struct cvmx_pciercx_cfg036_s cn68xxp1;
-	struct cvmx_pciercx_cfg036_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg037 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg037_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_20_31:12;
-		uint32_t obffs:2;
-		uint32_t reserved_12_17:6;
-		uint32_t ltrs:1;
-		uint32_t noroprpr:1;
-		uint32_t atom128s:1;
-		uint32_t atom64s:1;
-		uint32_t atom32s:1;
-		uint32_t atom_ops:1;
-		uint32_t reserved_5_5:1;
-		uint32_t ctds:1;
-		uint32_t ctrs:4;
-#else
-		uint32_t ctrs:4;
-		uint32_t ctds:1;
-		uint32_t reserved_5_5:1;
-		uint32_t atom_ops:1;
-		uint32_t atom32s:1;
-		uint32_t atom64s:1;
-		uint32_t atom128s:1;
-		uint32_t noroprpr:1;
-		uint32_t ltrs:1;
-		uint32_t reserved_12_17:6;
-		uint32_t obffs:2;
-		uint32_t reserved_20_31:12;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg037_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_5_31:27;
-		uint32_t ctds:1;
-		uint32_t ctrs:4;
-#else
-		uint32_t ctrs:4;
-		uint32_t ctds:1;
-		uint32_t reserved_5_31:27;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg037_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg037_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg037_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg037_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_14_31:18;
-		uint32_t tph:2;
-		uint32_t reserved_11_11:1;
-		uint32_t noroprpr:1;
-		uint32_t atom128s:1;
-		uint32_t atom64s:1;
-		uint32_t atom32s:1;
-		uint32_t atom_ops:1;
-		uint32_t ari_fw:1;
-		uint32_t ctds:1;
-		uint32_t ctrs:4;
-#else
-		uint32_t ctrs:4;
-		uint32_t ctds:1;
-		uint32_t ari_fw:1;
-		uint32_t atom_ops:1;
-		uint32_t atom32s:1;
-		uint32_t atom64s:1;
-		uint32_t atom128s:1;
-		uint32_t noroprpr:1;
-		uint32_t reserved_11_11:1;
-		uint32_t tph:2;
-		uint32_t reserved_14_31:18;
-#endif
-	} cn61xx;
-	struct cvmx_pciercx_cfg037_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg037_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg037_cn66xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_14_31:18;
-		uint32_t tph:2;
-		uint32_t reserved_11_11:1;
-		uint32_t noroprpr:1;
-		uint32_t atom128s:1;
-		uint32_t atom64s:1;
-		uint32_t atom32s:1;
-		uint32_t atom_ops:1;
-		uint32_t ari:1;
-		uint32_t ctds:1;
-		uint32_t ctrs:4;
-#else
-		uint32_t ctrs:4;
-		uint32_t ctds:1;
-		uint32_t ari:1;
-		uint32_t atom_ops:1;
-		uint32_t atom32s:1;
-		uint32_t atom64s:1;
-		uint32_t atom128s:1;
-		uint32_t noroprpr:1;
-		uint32_t reserved_11_11:1;
-		uint32_t tph:2;
-		uint32_t reserved_14_31:18;
-#endif
-	} cn66xx;
-	struct cvmx_pciercx_cfg037_cn66xx cn68xx;
-	struct cvmx_pciercx_cfg037_cn66xx cn68xxp1;
-	struct cvmx_pciercx_cfg037_cnf71xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_20_31:12;
-		uint32_t obffs:2;
-		uint32_t reserved_14_17:4;
-		uint32_t tphs:2;
-		uint32_t ltrs:1;
-		uint32_t noroprpr:1;
-		uint32_t atom128s:1;
-		uint32_t atom64s:1;
-		uint32_t atom32s:1;
-		uint32_t atom_ops:1;
-		uint32_t ari_fw:1;
-		uint32_t ctds:1;
-		uint32_t ctrs:4;
-#else
-		uint32_t ctrs:4;
-		uint32_t ctds:1;
-		uint32_t ari_fw:1;
-		uint32_t atom_ops:1;
-		uint32_t atom32s:1;
-		uint32_t atom64s:1;
-		uint32_t atom128s:1;
-		uint32_t noroprpr:1;
-		uint32_t ltrs:1;
-		uint32_t tphs:2;
-		uint32_t reserved_14_17:4;
-		uint32_t obffs:2;
-		uint32_t reserved_20_31:12;
-#endif
-	} cnf71xx;
-};
-
-union cvmx_pciercx_cfg038 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg038_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_15_31:17;
-		uint32_t obffe:2;
-		uint32_t reserved_11_12:2;
-		uint32_t ltre:1;
-		uint32_t id0_cp:1;
-		uint32_t id0_rq:1;
-		uint32_t atom_op_eb:1;
-		uint32_t atom_op:1;
-		uint32_t ari:1;
-		uint32_t ctd:1;
-		uint32_t ctv:4;
-#else
-		uint32_t ctv:4;
-		uint32_t ctd:1;
-		uint32_t ari:1;
-		uint32_t atom_op:1;
-		uint32_t atom_op_eb:1;
-		uint32_t id0_rq:1;
-		uint32_t id0_cp:1;
-		uint32_t ltre:1;
-		uint32_t reserved_11_12:2;
-		uint32_t obffe:2;
-		uint32_t reserved_15_31:17;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_17_31:15,
+		__BITFIELD_FIELD(uint32_t crssv:1,
+		__BITFIELD_FIELD(uint32_t reserved_5_15:11,
+		__BITFIELD_FIELD(uint32_t crssve:1,
+		__BITFIELD_FIELD(uint32_t pmeie:1,
+		__BITFIELD_FIELD(uint32_t sefee:1,
+		__BITFIELD_FIELD(uint32_t senfee:1,
+		__BITFIELD_FIELD(uint32_t secee:1,
+		;))))))))
 	} s;
-	struct cvmx_pciercx_cfg038_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_5_31:27;
-		uint32_t ctd:1;
-		uint32_t ctv:4;
-#else
-		uint32_t ctv:4;
-		uint32_t ctd:1;
-		uint32_t reserved_5_31:27;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg038_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg038_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg038_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg038_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_10_31:22;
-		uint32_t id0_cp:1;
-		uint32_t id0_rq:1;
-		uint32_t atom_op_eb:1;
-		uint32_t atom_op:1;
-		uint32_t ari:1;
-		uint32_t ctd:1;
-		uint32_t ctv:4;
-#else
-		uint32_t ctv:4;
-		uint32_t ctd:1;
-		uint32_t ari:1;
-		uint32_t atom_op:1;
-		uint32_t atom_op_eb:1;
-		uint32_t id0_rq:1;
-		uint32_t id0_cp:1;
-		uint32_t reserved_10_31:22;
-#endif
-	} cn61xx;
-	struct cvmx_pciercx_cfg038_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg038_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg038_cn61xx cn66xx;
-	struct cvmx_pciercx_cfg038_cn61xx cn68xx;
-	struct cvmx_pciercx_cfg038_cn61xx cn68xxp1;
-	struct cvmx_pciercx_cfg038_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg039 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg039_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_9_31:23;
-		uint32_t cls:1;
-		uint32_t slsv:7;
-		uint32_t reserved_0_0:1;
-#else
-		uint32_t reserved_0_0:1;
-		uint32_t slsv:7;
-		uint32_t cls:1;
-		uint32_t reserved_9_31:23;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg039_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_0_31:32;
-#else
-		uint32_t reserved_0_31:32;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg039_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg039_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg039_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg039_s cn61xx;
-	struct cvmx_pciercx_cfg039_s cn63xx;
-	struct cvmx_pciercx_cfg039_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg039_s cn66xx;
-	struct cvmx_pciercx_cfg039_s cn68xx;
-	struct cvmx_pciercx_cfg039_s cn68xxp1;
-	struct cvmx_pciercx_cfg039_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg040 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg040_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_17_31:15;
-		uint32_t cdl:1;
-		uint32_t reserved_13_15:3;
-		uint32_t cde:1;
-		uint32_t csos:1;
-		uint32_t emc:1;
-		uint32_t tm:3;
-		uint32_t sde:1;
-		uint32_t hasd:1;
-		uint32_t ec:1;
-		uint32_t tls:4;
-#else
-		uint32_t tls:4;
-		uint32_t ec:1;
-		uint32_t hasd:1;
-		uint32_t sde:1;
-		uint32_t tm:3;
-		uint32_t emc:1;
-		uint32_t csos:1;
-		uint32_t cde:1;
-		uint32_t reserved_13_15:3;
-		uint32_t cdl:1;
-		uint32_t reserved_17_31:15;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg040_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_0_31:32;
-#else
-		uint32_t reserved_0_31:32;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg040_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg040_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg040_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg040_s cn61xx;
-	struct cvmx_pciercx_cfg040_s cn63xx;
-	struct cvmx_pciercx_cfg040_s cn63xxp1;
-	struct cvmx_pciercx_cfg040_s cn66xx;
-	struct cvmx_pciercx_cfg040_s cn68xx;
-	struct cvmx_pciercx_cfg040_s cn68xxp1;
-	struct cvmx_pciercx_cfg040_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg041 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg041_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_0_31:32;
-#else
-		uint32_t reserved_0_31:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg041_s cn52xx;
-	struct cvmx_pciercx_cfg041_s cn52xxp1;
-	struct cvmx_pciercx_cfg041_s cn56xx;
-	struct cvmx_pciercx_cfg041_s cn56xxp1;
-	struct cvmx_pciercx_cfg041_s cn61xx;
-	struct cvmx_pciercx_cfg041_s cn63xx;
-	struct cvmx_pciercx_cfg041_s cn63xxp1;
-	struct cvmx_pciercx_cfg041_s cn66xx;
-	struct cvmx_pciercx_cfg041_s cn68xx;
-	struct cvmx_pciercx_cfg041_s cn68xxp1;
-	struct cvmx_pciercx_cfg041_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg042 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg042_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_0_31:32;
-#else
-		uint32_t reserved_0_31:32;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_22_31:10,
+		__BITFIELD_FIELD(uint32_t ler:1,
+		__BITFIELD_FIELD(uint32_t ep3s:1,
+		__BITFIELD_FIELD(uint32_t ep2s:1,
+		__BITFIELD_FIELD(uint32_t ep1s:1,
+		__BITFIELD_FIELD(uint32_t eqc:1,
+		__BITFIELD_FIELD(uint32_t cdl:1,
+		__BITFIELD_FIELD(uint32_t cde:4,
+		__BITFIELD_FIELD(uint32_t csos:1,
+		__BITFIELD_FIELD(uint32_t emc:1,
+		__BITFIELD_FIELD(uint32_t tm:3,
+		__BITFIELD_FIELD(uint32_t sde:1,
+		__BITFIELD_FIELD(uint32_t hasd:1,
+		__BITFIELD_FIELD(uint32_t ec:1,
+		__BITFIELD_FIELD(uint32_t tls:4,
+		;)))))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg042_s cn52xx;
-	struct cvmx_pciercx_cfg042_s cn52xxp1;
-	struct cvmx_pciercx_cfg042_s cn56xx;
-	struct cvmx_pciercx_cfg042_s cn56xxp1;
-	struct cvmx_pciercx_cfg042_s cn61xx;
-	struct cvmx_pciercx_cfg042_s cn63xx;
-	struct cvmx_pciercx_cfg042_s cn63xxp1;
-	struct cvmx_pciercx_cfg042_s cn66xx;
-	struct cvmx_pciercx_cfg042_s cn68xx;
-	struct cvmx_pciercx_cfg042_s cn68xxp1;
-	struct cvmx_pciercx_cfg042_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg064 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg064_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t nco:12;
-		uint32_t cv:4;
-		uint32_t pcieec:16;
-#else
-		uint32_t pcieec:16;
-		uint32_t cv:4;
-		uint32_t nco:12;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg064_s cn52xx;
-	struct cvmx_pciercx_cfg064_s cn52xxp1;
-	struct cvmx_pciercx_cfg064_s cn56xx;
-	struct cvmx_pciercx_cfg064_s cn56xxp1;
-	struct cvmx_pciercx_cfg064_s cn61xx;
-	struct cvmx_pciercx_cfg064_s cn63xx;
-	struct cvmx_pciercx_cfg064_s cn63xxp1;
-	struct cvmx_pciercx_cfg064_s cn66xx;
-	struct cvmx_pciercx_cfg064_s cn68xx;
-	struct cvmx_pciercx_cfg064_s cn68xxp1;
-	struct cvmx_pciercx_cfg064_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg065 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg065_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t uatombs:1;
-		uint32_t reserved_23_23:1;
-		uint32_t ucies:1;
-		uint32_t reserved_21_21:1;
-		uint32_t ures:1;
-		uint32_t ecrces:1;
-		uint32_t mtlps:1;
-		uint32_t ros:1;
-		uint32_t ucs:1;
-		uint32_t cas:1;
-		uint32_t cts:1;
-		uint32_t fcpes:1;
-		uint32_t ptlps:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdes:1;
-		uint32_t dlpes:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpes:1;
-		uint32_t sdes:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlps:1;
-		uint32_t fcpes:1;
-		uint32_t cts:1;
-		uint32_t cas:1;
-		uint32_t ucs:1;
-		uint32_t ros:1;
-		uint32_t mtlps:1;
-		uint32_t ecrces:1;
-		uint32_t ures:1;
-		uint32_t reserved_21_21:1;
-		uint32_t ucies:1;
-		uint32_t reserved_23_23:1;
-		uint32_t uatombs:1;
-		uint32_t reserved_25_31:7;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg065_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_21_31:11;
-		uint32_t ures:1;
-		uint32_t ecrces:1;
-		uint32_t mtlps:1;
-		uint32_t ros:1;
-		uint32_t ucs:1;
-		uint32_t cas:1;
-		uint32_t cts:1;
-		uint32_t fcpes:1;
-		uint32_t ptlps:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdes:1;
-		uint32_t dlpes:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpes:1;
-		uint32_t sdes:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlps:1;
-		uint32_t fcpes:1;
-		uint32_t cts:1;
-		uint32_t cas:1;
-		uint32_t ucs:1;
-		uint32_t ros:1;
-		uint32_t mtlps:1;
-		uint32_t ecrces:1;
-		uint32_t ures:1;
-		uint32_t reserved_21_31:11;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg065_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg065_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg065_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg065_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t uatombs:1;
-		uint32_t reserved_21_23:3;
-		uint32_t ures:1;
-		uint32_t ecrces:1;
-		uint32_t mtlps:1;
-		uint32_t ros:1;
-		uint32_t ucs:1;
-		uint32_t cas:1;
-		uint32_t cts:1;
-		uint32_t fcpes:1;
-		uint32_t ptlps:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdes:1;
-		uint32_t dlpes:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpes:1;
-		uint32_t sdes:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlps:1;
-		uint32_t fcpes:1;
-		uint32_t cts:1;
-		uint32_t cas:1;
-		uint32_t ucs:1;
-		uint32_t ros:1;
-		uint32_t mtlps:1;
-		uint32_t ecrces:1;
-		uint32_t ures:1;
-		uint32_t reserved_21_23:3;
-		uint32_t uatombs:1;
-		uint32_t reserved_25_31:7;
-#endif
-	} cn61xx;
-	struct cvmx_pciercx_cfg065_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg065_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg065_cn61xx cn66xx;
-	struct cvmx_pciercx_cfg065_cn61xx cn68xx;
-	struct cvmx_pciercx_cfg065_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg065_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg066 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg066_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t uatombm:1;
-		uint32_t reserved_23_23:1;
-		uint32_t uciem:1;
-		uint32_t reserved_21_21:1;
-		uint32_t urem:1;
-		uint32_t ecrcem:1;
-		uint32_t mtlpm:1;
-		uint32_t rom:1;
-		uint32_t ucm:1;
-		uint32_t cam:1;
-		uint32_t ctm:1;
-		uint32_t fcpem:1;
-		uint32_t ptlpm:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdem:1;
-		uint32_t dlpem:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpem:1;
-		uint32_t sdem:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlpm:1;
-		uint32_t fcpem:1;
-		uint32_t ctm:1;
-		uint32_t cam:1;
-		uint32_t ucm:1;
-		uint32_t rom:1;
-		uint32_t mtlpm:1;
-		uint32_t ecrcem:1;
-		uint32_t urem:1;
-		uint32_t reserved_21_21:1;
-		uint32_t uciem:1;
-		uint32_t reserved_23_23:1;
-		uint32_t uatombm:1;
-		uint32_t reserved_25_31:7;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg066_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_21_31:11;
-		uint32_t urem:1;
-		uint32_t ecrcem:1;
-		uint32_t mtlpm:1;
-		uint32_t rom:1;
-		uint32_t ucm:1;
-		uint32_t cam:1;
-		uint32_t ctm:1;
-		uint32_t fcpem:1;
-		uint32_t ptlpm:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdem:1;
-		uint32_t dlpem:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpem:1;
-		uint32_t sdem:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlpm:1;
-		uint32_t fcpem:1;
-		uint32_t ctm:1;
-		uint32_t cam:1;
-		uint32_t ucm:1;
-		uint32_t rom:1;
-		uint32_t mtlpm:1;
-		uint32_t ecrcem:1;
-		uint32_t urem:1;
-		uint32_t reserved_21_31:11;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg066_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg066_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg066_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg066_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t uatombm:1;
-		uint32_t reserved_21_23:3;
-		uint32_t urem:1;
-		uint32_t ecrcem:1;
-		uint32_t mtlpm:1;
-		uint32_t rom:1;
-		uint32_t ucm:1;
-		uint32_t cam:1;
-		uint32_t ctm:1;
-		uint32_t fcpem:1;
-		uint32_t ptlpm:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdem:1;
-		uint32_t dlpem:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpem:1;
-		uint32_t sdem:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlpm:1;
-		uint32_t fcpem:1;
-		uint32_t ctm:1;
-		uint32_t cam:1;
-		uint32_t ucm:1;
-		uint32_t rom:1;
-		uint32_t mtlpm:1;
-		uint32_t ecrcem:1;
-		uint32_t urem:1;
-		uint32_t reserved_21_23:3;
-		uint32_t uatombm:1;
-		uint32_t reserved_25_31:7;
-#endif
-	} cn61xx;
-	struct cvmx_pciercx_cfg066_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg066_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg066_cn61xx cn66xx;
-	struct cvmx_pciercx_cfg066_cn61xx cn68xx;
-	struct cvmx_pciercx_cfg066_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg066_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg067 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg067_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t uatombs:1;
-		uint32_t reserved_23_23:1;
-		uint32_t ucies:1;
-		uint32_t reserved_21_21:1;
-		uint32_t ures:1;
-		uint32_t ecrces:1;
-		uint32_t mtlps:1;
-		uint32_t ros:1;
-		uint32_t ucs:1;
-		uint32_t cas:1;
-		uint32_t cts:1;
-		uint32_t fcpes:1;
-		uint32_t ptlps:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdes:1;
-		uint32_t dlpes:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpes:1;
-		uint32_t sdes:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlps:1;
-		uint32_t fcpes:1;
-		uint32_t cts:1;
-		uint32_t cas:1;
-		uint32_t ucs:1;
-		uint32_t ros:1;
-		uint32_t mtlps:1;
-		uint32_t ecrces:1;
-		uint32_t ures:1;
-		uint32_t reserved_21_21:1;
-		uint32_t ucies:1;
-		uint32_t reserved_23_23:1;
-		uint32_t uatombs:1;
-		uint32_t reserved_25_31:7;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg067_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_21_31:11;
-		uint32_t ures:1;
-		uint32_t ecrces:1;
-		uint32_t mtlps:1;
-		uint32_t ros:1;
-		uint32_t ucs:1;
-		uint32_t cas:1;
-		uint32_t cts:1;
-		uint32_t fcpes:1;
-		uint32_t ptlps:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdes:1;
-		uint32_t dlpes:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpes:1;
-		uint32_t sdes:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlps:1;
-		uint32_t fcpes:1;
-		uint32_t cts:1;
-		uint32_t cas:1;
-		uint32_t ucs:1;
-		uint32_t ros:1;
-		uint32_t mtlps:1;
-		uint32_t ecrces:1;
-		uint32_t ures:1;
-		uint32_t reserved_21_31:11;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg067_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg067_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg067_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg067_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t uatombs:1;
-		uint32_t reserved_21_23:3;
-		uint32_t ures:1;
-		uint32_t ecrces:1;
-		uint32_t mtlps:1;
-		uint32_t ros:1;
-		uint32_t ucs:1;
-		uint32_t cas:1;
-		uint32_t cts:1;
-		uint32_t fcpes:1;
-		uint32_t ptlps:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdes:1;
-		uint32_t dlpes:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpes:1;
-		uint32_t sdes:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlps:1;
-		uint32_t fcpes:1;
-		uint32_t cts:1;
-		uint32_t cas:1;
-		uint32_t ucs:1;
-		uint32_t ros:1;
-		uint32_t mtlps:1;
-		uint32_t ecrces:1;
-		uint32_t ures:1;
-		uint32_t reserved_21_23:3;
-		uint32_t uatombs:1;
-		uint32_t reserved_25_31:7;
-#endif
-	} cn61xx;
-	struct cvmx_pciercx_cfg067_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg067_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg067_cn61xx cn66xx;
-	struct cvmx_pciercx_cfg067_cn61xx cn68xx;
-	struct cvmx_pciercx_cfg067_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg067_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg068 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg068_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_15_31:17;
-		uint32_t cies:1;
-		uint32_t anfes:1;
-		uint32_t rtts:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rnrs:1;
-		uint32_t bdllps:1;
-		uint32_t btlps:1;
-		uint32_t reserved_1_5:5;
-		uint32_t res:1;
-#else
-		uint32_t res:1;
-		uint32_t reserved_1_5:5;
-		uint32_t btlps:1;
-		uint32_t bdllps:1;
-		uint32_t rnrs:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rtts:1;
-		uint32_t anfes:1;
-		uint32_t cies:1;
-		uint32_t reserved_15_31:17;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg068_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_14_31:18;
-		uint32_t anfes:1;
-		uint32_t rtts:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rnrs:1;
-		uint32_t bdllps:1;
-		uint32_t btlps:1;
-		uint32_t reserved_1_5:5;
-		uint32_t res:1;
-#else
-		uint32_t res:1;
-		uint32_t reserved_1_5:5;
-		uint32_t btlps:1;
-		uint32_t bdllps:1;
-		uint32_t rnrs:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rtts:1;
-		uint32_t anfes:1;
-		uint32_t reserved_14_31:18;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg068_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg068_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg068_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg068_cn52xx cn61xx;
-	struct cvmx_pciercx_cfg068_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg068_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg068_cn52xx cn66xx;
-	struct cvmx_pciercx_cfg068_cn52xx cn68xx;
-	struct cvmx_pciercx_cfg068_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg068_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg069 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg069_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_15_31:17;
-		uint32_t ciem:1;
-		uint32_t anfem:1;
-		uint32_t rttm:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rnrm:1;
-		uint32_t bdllpm:1;
-		uint32_t btlpm:1;
-		uint32_t reserved_1_5:5;
-		uint32_t rem:1;
-#else
-		uint32_t rem:1;
-		uint32_t reserved_1_5:5;
-		uint32_t btlpm:1;
-		uint32_t bdllpm:1;
-		uint32_t rnrm:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rttm:1;
-		uint32_t anfem:1;
-		uint32_t ciem:1;
-		uint32_t reserved_15_31:17;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg069_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_14_31:18;
-		uint32_t anfem:1;
-		uint32_t rttm:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rnrm:1;
-		uint32_t bdllpm:1;
-		uint32_t btlpm:1;
-		uint32_t reserved_1_5:5;
-		uint32_t rem:1;
-#else
-		uint32_t rem:1;
-		uint32_t reserved_1_5:5;
-		uint32_t btlpm:1;
-		uint32_t bdllpm:1;
-		uint32_t rnrm:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rttm:1;
-		uint32_t anfem:1;
-		uint32_t reserved_14_31:18;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg069_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg069_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg069_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg069_cn52xx cn61xx;
-	struct cvmx_pciercx_cfg069_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg069_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg069_cn52xx cn66xx;
-	struct cvmx_pciercx_cfg069_cn52xx cn68xx;
-	struct cvmx_pciercx_cfg069_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg069_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg070 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg070_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_9_31:23;
-		uint32_t ce:1;
-		uint32_t cc:1;
-		uint32_t ge:1;
-		uint32_t gc:1;
-		uint32_t fep:5;
-#else
-		uint32_t fep:5;
-		uint32_t gc:1;
-		uint32_t ge:1;
-		uint32_t cc:1;
-		uint32_t ce:1;
-		uint32_t reserved_9_31:23;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg070_s cn52xx;
-	struct cvmx_pciercx_cfg070_s cn52xxp1;
-	struct cvmx_pciercx_cfg070_s cn56xx;
-	struct cvmx_pciercx_cfg070_s cn56xxp1;
-	struct cvmx_pciercx_cfg070_s cn61xx;
-	struct cvmx_pciercx_cfg070_s cn63xx;
-	struct cvmx_pciercx_cfg070_s cn63xxp1;
-	struct cvmx_pciercx_cfg070_s cn66xx;
-	struct cvmx_pciercx_cfg070_s cn68xx;
-	struct cvmx_pciercx_cfg070_s cn68xxp1;
-	struct cvmx_pciercx_cfg070_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg071 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg071_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dword1:32;
-#else
-		uint32_t dword1:32;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_12_31:20,
+		__BITFIELD_FIELD(uint32_t tplp:1,
+		__BITFIELD_FIELD(uint32_t reserved_9_10:2,
+		__BITFIELD_FIELD(uint32_t ce:1,
+		__BITFIELD_FIELD(uint32_t cc:1,
+		__BITFIELD_FIELD(uint32_t ge:1,
+		__BITFIELD_FIELD(uint32_t gc:1,
+		__BITFIELD_FIELD(uint32_t fep:5,
+		;))))))))
 	} s;
-	struct cvmx_pciercx_cfg071_s cn52xx;
-	struct cvmx_pciercx_cfg071_s cn52xxp1;
-	struct cvmx_pciercx_cfg071_s cn56xx;
-	struct cvmx_pciercx_cfg071_s cn56xxp1;
-	struct cvmx_pciercx_cfg071_s cn61xx;
-	struct cvmx_pciercx_cfg071_s cn63xx;
-	struct cvmx_pciercx_cfg071_s cn63xxp1;
-	struct cvmx_pciercx_cfg071_s cn66xx;
-	struct cvmx_pciercx_cfg071_s cn68xx;
-	struct cvmx_pciercx_cfg071_s cn68xxp1;
-	struct cvmx_pciercx_cfg071_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg072 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg072_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dword2:32;
-#else
-		uint32_t dword2:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg072_s cn52xx;
-	struct cvmx_pciercx_cfg072_s cn52xxp1;
-	struct cvmx_pciercx_cfg072_s cn56xx;
-	struct cvmx_pciercx_cfg072_s cn56xxp1;
-	struct cvmx_pciercx_cfg072_s cn61xx;
-	struct cvmx_pciercx_cfg072_s cn63xx;
-	struct cvmx_pciercx_cfg072_s cn63xxp1;
-	struct cvmx_pciercx_cfg072_s cn66xx;
-	struct cvmx_pciercx_cfg072_s cn68xx;
-	struct cvmx_pciercx_cfg072_s cn68xxp1;
-	struct cvmx_pciercx_cfg072_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg073 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg073_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dword3:32;
-#else
-		uint32_t dword3:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg073_s cn52xx;
-	struct cvmx_pciercx_cfg073_s cn52xxp1;
-	struct cvmx_pciercx_cfg073_s cn56xx;
-	struct cvmx_pciercx_cfg073_s cn56xxp1;
-	struct cvmx_pciercx_cfg073_s cn61xx;
-	struct cvmx_pciercx_cfg073_s cn63xx;
-	struct cvmx_pciercx_cfg073_s cn63xxp1;
-	struct cvmx_pciercx_cfg073_s cn66xx;
-	struct cvmx_pciercx_cfg073_s cn68xx;
-	struct cvmx_pciercx_cfg073_s cn68xxp1;
-	struct cvmx_pciercx_cfg073_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg074 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg074_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dword4:32;
-#else
-		uint32_t dword4:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg074_s cn52xx;
-	struct cvmx_pciercx_cfg074_s cn52xxp1;
-	struct cvmx_pciercx_cfg074_s cn56xx;
-	struct cvmx_pciercx_cfg074_s cn56xxp1;
-	struct cvmx_pciercx_cfg074_s cn61xx;
-	struct cvmx_pciercx_cfg074_s cn63xx;
-	struct cvmx_pciercx_cfg074_s cn63xxp1;
-	struct cvmx_pciercx_cfg074_s cn66xx;
-	struct cvmx_pciercx_cfg074_s cn68xx;
-	struct cvmx_pciercx_cfg074_s cn68xxp1;
-	struct cvmx_pciercx_cfg074_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg075 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg075_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_3_31:29;
-		uint32_t fere:1;
-		uint32_t nfere:1;
-		uint32_t cere:1;
-#else
-		uint32_t cere:1;
-		uint32_t nfere:1;
-		uint32_t fere:1;
-		uint32_t reserved_3_31:29;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg075_s cn52xx;
-	struct cvmx_pciercx_cfg075_s cn52xxp1;
-	struct cvmx_pciercx_cfg075_s cn56xx;
-	struct cvmx_pciercx_cfg075_s cn56xxp1;
-	struct cvmx_pciercx_cfg075_s cn61xx;
-	struct cvmx_pciercx_cfg075_s cn63xx;
-	struct cvmx_pciercx_cfg075_s cn63xxp1;
-	struct cvmx_pciercx_cfg075_s cn66xx;
-	struct cvmx_pciercx_cfg075_s cn68xx;
-	struct cvmx_pciercx_cfg075_s cn68xxp1;
-	struct cvmx_pciercx_cfg075_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg076 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg076_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t aeimn:5;
-		uint32_t reserved_7_26:20;
-		uint32_t femr:1;
-		uint32_t nfemr:1;
-		uint32_t fuf:1;
-		uint32_t multi_efnfr:1;
-		uint32_t efnfr:1;
-		uint32_t multi_ecr:1;
-		uint32_t ecr:1;
-#else
-		uint32_t ecr:1;
-		uint32_t multi_ecr:1;
-		uint32_t efnfr:1;
-		uint32_t multi_efnfr:1;
-		uint32_t fuf:1;
-		uint32_t nfemr:1;
-		uint32_t femr:1;
-		uint32_t reserved_7_26:20;
-		uint32_t aeimn:5;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_3_31:29,
+		__BITFIELD_FIELD(uint32_t fere:1,
+		__BITFIELD_FIELD(uint32_t nfere:1,
+		__BITFIELD_FIELD(uint32_t cere:1,
+		;))))
 	} s;
-	struct cvmx_pciercx_cfg076_s cn52xx;
-	struct cvmx_pciercx_cfg076_s cn52xxp1;
-	struct cvmx_pciercx_cfg076_s cn56xx;
-	struct cvmx_pciercx_cfg076_s cn56xxp1;
-	struct cvmx_pciercx_cfg076_s cn61xx;
-	struct cvmx_pciercx_cfg076_s cn63xx;
-	struct cvmx_pciercx_cfg076_s cn63xxp1;
-	struct cvmx_pciercx_cfg076_s cn66xx;
-	struct cvmx_pciercx_cfg076_s cn68xx;
-	struct cvmx_pciercx_cfg076_s cn68xxp1;
-	struct cvmx_pciercx_cfg076_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg077 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg077_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t efnfsi:16;
-		uint32_t ecsi:16;
-#else
-		uint32_t ecsi:16;
-		uint32_t efnfsi:16;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg077_s cn52xx;
-	struct cvmx_pciercx_cfg077_s cn52xxp1;
-	struct cvmx_pciercx_cfg077_s cn56xx;
-	struct cvmx_pciercx_cfg077_s cn56xxp1;
-	struct cvmx_pciercx_cfg077_s cn61xx;
-	struct cvmx_pciercx_cfg077_s cn63xx;
-	struct cvmx_pciercx_cfg077_s cn63xxp1;
-	struct cvmx_pciercx_cfg077_s cn66xx;
-	struct cvmx_pciercx_cfg077_s cn68xx;
-	struct cvmx_pciercx_cfg077_s cn68xxp1;
-	struct cvmx_pciercx_cfg077_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg448 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg448_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t rtl:16;
-		uint32_t rtltl:16;
-#else
-		uint32_t rtltl:16;
-		uint32_t rtl:16;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg448_s cn52xx;
-	struct cvmx_pciercx_cfg448_s cn52xxp1;
-	struct cvmx_pciercx_cfg448_s cn56xx;
-	struct cvmx_pciercx_cfg448_s cn56xxp1;
-	struct cvmx_pciercx_cfg448_s cn61xx;
-	struct cvmx_pciercx_cfg448_s cn63xx;
-	struct cvmx_pciercx_cfg448_s cn63xxp1;
-	struct cvmx_pciercx_cfg448_s cn66xx;
-	struct cvmx_pciercx_cfg448_s cn68xx;
-	struct cvmx_pciercx_cfg448_s cn68xxp1;
-	struct cvmx_pciercx_cfg448_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg449 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg449_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t omr:32;
-#else
-		uint32_t omr:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg449_s cn52xx;
-	struct cvmx_pciercx_cfg449_s cn52xxp1;
-	struct cvmx_pciercx_cfg449_s cn56xx;
-	struct cvmx_pciercx_cfg449_s cn56xxp1;
-	struct cvmx_pciercx_cfg449_s cn61xx;
-	struct cvmx_pciercx_cfg449_s cn63xx;
-	struct cvmx_pciercx_cfg449_s cn63xxp1;
-	struct cvmx_pciercx_cfg449_s cn66xx;
-	struct cvmx_pciercx_cfg449_s cn68xx;
-	struct cvmx_pciercx_cfg449_s cn68xxp1;
-	struct cvmx_pciercx_cfg449_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg450 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg450_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t lpec:8;
-		uint32_t reserved_22_23:2;
-		uint32_t link_state:6;
-		uint32_t force_link:1;
-		uint32_t reserved_8_14:7;
-		uint32_t link_num:8;
-#else
-		uint32_t link_num:8;
-		uint32_t reserved_8_14:7;
-		uint32_t force_link:1;
-		uint32_t link_state:6;
-		uint32_t reserved_22_23:2;
-		uint32_t lpec:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg450_s cn52xx;
-	struct cvmx_pciercx_cfg450_s cn52xxp1;
-	struct cvmx_pciercx_cfg450_s cn56xx;
-	struct cvmx_pciercx_cfg450_s cn56xxp1;
-	struct cvmx_pciercx_cfg450_s cn61xx;
-	struct cvmx_pciercx_cfg450_s cn63xx;
-	struct cvmx_pciercx_cfg450_s cn63xxp1;
-	struct cvmx_pciercx_cfg450_s cn66xx;
-	struct cvmx_pciercx_cfg450_s cn68xx;
-	struct cvmx_pciercx_cfg450_s cn68xxp1;
-	struct cvmx_pciercx_cfg450_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg451 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg451_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_31_31:1;
-		uint32_t easpml1:1;
-		uint32_t l1el:3;
-		uint32_t l0el:3;
-		uint32_t n_fts_cc:8;
-		uint32_t n_fts:8;
-		uint32_t ack_freq:8;
-#else
-		uint32_t ack_freq:8;
-		uint32_t n_fts:8;
-		uint32_t n_fts_cc:8;
-		uint32_t l0el:3;
-		uint32_t l1el:3;
-		uint32_t easpml1:1;
-		uint32_t reserved_31_31:1;
-#endif
+		__BITFIELD_FIELD(uint32_t rtl:16,
+		__BITFIELD_FIELD(uint32_t rtltl:16,
+		;))
 	} s;
-	struct cvmx_pciercx_cfg451_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_30_31:2;
-		uint32_t l1el:3;
-		uint32_t l0el:3;
-		uint32_t n_fts_cc:8;
-		uint32_t n_fts:8;
-		uint32_t ack_freq:8;
-#else
-		uint32_t ack_freq:8;
-		uint32_t n_fts:8;
-		uint32_t n_fts_cc:8;
-		uint32_t l0el:3;
-		uint32_t l1el:3;
-		uint32_t reserved_30_31:2;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg451_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg451_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg451_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg451_s cn61xx;
-	struct cvmx_pciercx_cfg451_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg451_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg451_s cn66xx;
-	struct cvmx_pciercx_cfg451_s cn68xx;
-	struct cvmx_pciercx_cfg451_s cn68xxp1;
-	struct cvmx_pciercx_cfg451_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg452 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg452_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_26_31:6;
-		uint32_t eccrc:1;
-		uint32_t reserved_22_24:3;
-		uint32_t lme:6;
-		uint32_t reserved_8_15:8;
-		uint32_t flm:1;
-		uint32_t reserved_6_6:1;
-		uint32_t dllle:1;
-		uint32_t reserved_4_4:1;
-		uint32_t ra:1;
-		uint32_t le:1;
-		uint32_t sd:1;
-		uint32_t omr:1;
-#else
-		uint32_t omr:1;
-		uint32_t sd:1;
-		uint32_t le:1;
-		uint32_t ra:1;
-		uint32_t reserved_4_4:1;
-		uint32_t dllle:1;
-		uint32_t reserved_6_6:1;
-		uint32_t flm:1;
-		uint32_t reserved_8_15:8;
-		uint32_t lme:6;
-		uint32_t reserved_22_24:3;
-		uint32_t eccrc:1;
-		uint32_t reserved_26_31:6;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_26_31:6,
+		__BITFIELD_FIELD(uint32_t eccrc:1,
+		__BITFIELD_FIELD(uint32_t reserved_22_24:3,
+		__BITFIELD_FIELD(uint32_t lme:6,
+		__BITFIELD_FIELD(uint32_t reserved_12_15:4,
+		__BITFIELD_FIELD(uint32_t link_rate:4,
+		__BITFIELD_FIELD(uint32_t flm:1,
+		__BITFIELD_FIELD(uint32_t reserved_6_6:1,
+		__BITFIELD_FIELD(uint32_t dllle:1,
+		__BITFIELD_FIELD(uint32_t reserved_4_4:1,
+		__BITFIELD_FIELD(uint32_t ra:1,
+		__BITFIELD_FIELD(uint32_t le:1,
+		__BITFIELD_FIELD(uint32_t sd:1,
+		__BITFIELD_FIELD(uint32_t omr:1,
+		;))))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg452_s cn52xx;
-	struct cvmx_pciercx_cfg452_s cn52xxp1;
-	struct cvmx_pciercx_cfg452_s cn56xx;
-	struct cvmx_pciercx_cfg452_s cn56xxp1;
-	struct cvmx_pciercx_cfg452_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_22_31:10;
-		uint32_t lme:6;
-		uint32_t reserved_8_15:8;
-		uint32_t flm:1;
-		uint32_t reserved_6_6:1;
-		uint32_t dllle:1;
-		uint32_t reserved_4_4:1;
-		uint32_t ra:1;
-		uint32_t le:1;
-		uint32_t sd:1;
-		uint32_t omr:1;
-#else
-		uint32_t omr:1;
-		uint32_t sd:1;
-		uint32_t le:1;
-		uint32_t ra:1;
-		uint32_t reserved_4_4:1;
-		uint32_t dllle:1;
-		uint32_t reserved_6_6:1;
-		uint32_t flm:1;
-		uint32_t reserved_8_15:8;
-		uint32_t lme:6;
-		uint32_t reserved_22_31:10;
-#endif
-	} cn61xx;
-	struct cvmx_pciercx_cfg452_s cn63xx;
-	struct cvmx_pciercx_cfg452_s cn63xxp1;
-	struct cvmx_pciercx_cfg452_cn61xx cn66xx;
-	struct cvmx_pciercx_cfg452_cn61xx cn68xx;
-	struct cvmx_pciercx_cfg452_cn61xx cn68xxp1;
-	struct cvmx_pciercx_cfg452_cn61xx cnf71xx;
-};
-
-union cvmx_pciercx_cfg453 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg453_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dlld:1;
-		uint32_t reserved_26_30:5;
-		uint32_t ack_nak:1;
-		uint32_t fcd:1;
-		uint32_t ilst:24;
-#else
-		uint32_t ilst:24;
-		uint32_t fcd:1;
-		uint32_t ack_nak:1;
-		uint32_t reserved_26_30:5;
-		uint32_t dlld:1;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg453_s cn52xx;
-	struct cvmx_pciercx_cfg453_s cn52xxp1;
-	struct cvmx_pciercx_cfg453_s cn56xx;
-	struct cvmx_pciercx_cfg453_s cn56xxp1;
-	struct cvmx_pciercx_cfg453_s cn61xx;
-	struct cvmx_pciercx_cfg453_s cn63xx;
-	struct cvmx_pciercx_cfg453_s cn63xxp1;
-	struct cvmx_pciercx_cfg453_s cn66xx;
-	struct cvmx_pciercx_cfg453_s cn68xx;
-	struct cvmx_pciercx_cfg453_s cn68xxp1;
-	struct cvmx_pciercx_cfg453_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg454 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg454_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t cx_nfunc:3;
-		uint32_t tmfcwt:5;
-		uint32_t tmanlt:5;
-		uint32_t tmrt:5;
-		uint32_t reserved_11_13:3;
-		uint32_t nskps:3;
-		uint32_t reserved_0_7:8;
-#else
-		uint32_t reserved_0_7:8;
-		uint32_t nskps:3;
-		uint32_t reserved_11_13:3;
-		uint32_t tmrt:5;
-		uint32_t tmanlt:5;
-		uint32_t tmfcwt:5;
-		uint32_t cx_nfunc:3;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg454_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_29_31:3;
-		uint32_t tmfcwt:5;
-		uint32_t tmanlt:5;
-		uint32_t tmrt:5;
-		uint32_t reserved_11_13:3;
-		uint32_t nskps:3;
-		uint32_t reserved_4_7:4;
-		uint32_t ntss:4;
-#else
-		uint32_t ntss:4;
-		uint32_t reserved_4_7:4;
-		uint32_t nskps:3;
-		uint32_t reserved_11_13:3;
-		uint32_t tmrt:5;
-		uint32_t tmanlt:5;
-		uint32_t tmfcwt:5;
-		uint32_t reserved_29_31:3;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg454_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg454_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg454_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg454_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t cx_nfunc:3;
-		uint32_t tmfcwt:5;
-		uint32_t tmanlt:5;
-		uint32_t tmrt:5;
-		uint32_t reserved_8_13:6;
-		uint32_t mfuncn:8;
-#else
-		uint32_t mfuncn:8;
-		uint32_t reserved_8_13:6;
-		uint32_t tmrt:5;
-		uint32_t tmanlt:5;
-		uint32_t tmfcwt:5;
-		uint32_t cx_nfunc:3;
-#endif
-	} cn61xx;
-	struct cvmx_pciercx_cfg454_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg454_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg454_cn61xx cn66xx;
-	struct cvmx_pciercx_cfg454_cn61xx cn68xx;
-	struct cvmx_pciercx_cfg454_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg454_cn61xx cnf71xx;
 };
 
 union cvmx_pciercx_cfg455 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg455_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t m_cfg0_filt:1;
-		uint32_t m_io_filt:1;
-		uint32_t msg_ctrl:1;
-		uint32_t m_cpl_ecrc_filt:1;
-		uint32_t m_ecrc_filt:1;
-		uint32_t m_cpl_len_err:1;
-		uint32_t m_cpl_attr_err:1;
-		uint32_t m_cpl_tc_err:1;
-		uint32_t m_cpl_fun_err:1;
-		uint32_t m_cpl_rid_err:1;
-		uint32_t m_cpl_tag_err:1;
-		uint32_t m_lk_filt:1;
-		uint32_t m_cfg1_filt:1;
-		uint32_t m_bar_match:1;
-		uint32_t m_pois_filt:1;
-		uint32_t m_fun:1;
-		uint32_t dfcwt:1;
-		uint32_t reserved_11_14:4;
-		uint32_t skpiv:11;
-#else
-		uint32_t skpiv:11;
-		uint32_t reserved_11_14:4;
-		uint32_t dfcwt:1;
-		uint32_t m_fun:1;
-		uint32_t m_pois_filt:1;
-		uint32_t m_bar_match:1;
-		uint32_t m_cfg1_filt:1;
-		uint32_t m_lk_filt:1;
-		uint32_t m_cpl_tag_err:1;
-		uint32_t m_cpl_rid_err:1;
-		uint32_t m_cpl_fun_err:1;
-		uint32_t m_cpl_tc_err:1;
-		uint32_t m_cpl_attr_err:1;
-		uint32_t m_cpl_len_err:1;
-		uint32_t m_ecrc_filt:1;
-		uint32_t m_cpl_ecrc_filt:1;
-		uint32_t msg_ctrl:1;
-		uint32_t m_io_filt:1;
-		uint32_t m_cfg0_filt:1;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg455_s cn52xx;
-	struct cvmx_pciercx_cfg455_s cn52xxp1;
-	struct cvmx_pciercx_cfg455_s cn56xx;
-	struct cvmx_pciercx_cfg455_s cn56xxp1;
-	struct cvmx_pciercx_cfg455_s cn61xx;
-	struct cvmx_pciercx_cfg455_s cn63xx;
-	struct cvmx_pciercx_cfg455_s cn63xxp1;
-	struct cvmx_pciercx_cfg455_s cn66xx;
-	struct cvmx_pciercx_cfg455_s cn68xx;
-	struct cvmx_pciercx_cfg455_s cn68xxp1;
-	struct cvmx_pciercx_cfg455_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg456 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg456_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_4_31:28;
-		uint32_t m_handle_flush:1;
-		uint32_t m_dabort_4ucpl:1;
-		uint32_t m_vend1_drp:1;
-		uint32_t m_vend0_drp:1;
-#else
-		uint32_t m_vend0_drp:1;
-		uint32_t m_vend1_drp:1;
-		uint32_t m_dabort_4ucpl:1;
-		uint32_t m_handle_flush:1;
-		uint32_t reserved_4_31:28;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg456_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_2_31:30;
-		uint32_t m_vend1_drp:1;
-		uint32_t m_vend0_drp:1;
-#else
-		uint32_t m_vend0_drp:1;
-		uint32_t m_vend1_drp:1;
-		uint32_t reserved_2_31:30;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg456_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg456_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg456_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg456_s cn61xx;
-	struct cvmx_pciercx_cfg456_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg456_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg456_s cn66xx;
-	struct cvmx_pciercx_cfg456_s cn68xx;
-	struct cvmx_pciercx_cfg456_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg456_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg458 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg458_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dbg_info_l32:32;
-#else
-		uint32_t dbg_info_l32:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg458_s cn52xx;
-	struct cvmx_pciercx_cfg458_s cn52xxp1;
-	struct cvmx_pciercx_cfg458_s cn56xx;
-	struct cvmx_pciercx_cfg458_s cn56xxp1;
-	struct cvmx_pciercx_cfg458_s cn61xx;
-	struct cvmx_pciercx_cfg458_s cn63xx;
-	struct cvmx_pciercx_cfg458_s cn63xxp1;
-	struct cvmx_pciercx_cfg458_s cn66xx;
-	struct cvmx_pciercx_cfg458_s cn68xx;
-	struct cvmx_pciercx_cfg458_s cn68xxp1;
-	struct cvmx_pciercx_cfg458_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg459 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg459_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dbg_info_u32:32;
-#else
-		uint32_t dbg_info_u32:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg459_s cn52xx;
-	struct cvmx_pciercx_cfg459_s cn52xxp1;
-	struct cvmx_pciercx_cfg459_s cn56xx;
-	struct cvmx_pciercx_cfg459_s cn56xxp1;
-	struct cvmx_pciercx_cfg459_s cn61xx;
-	struct cvmx_pciercx_cfg459_s cn63xx;
-	struct cvmx_pciercx_cfg459_s cn63xxp1;
-	struct cvmx_pciercx_cfg459_s cn66xx;
-	struct cvmx_pciercx_cfg459_s cn68xx;
-	struct cvmx_pciercx_cfg459_s cn68xxp1;
-	struct cvmx_pciercx_cfg459_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg460 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg460_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_20_31:12;
-		uint32_t tphfcc:8;
-		uint32_t tpdfcc:12;
-#else
-		uint32_t tpdfcc:12;
-		uint32_t tphfcc:8;
-		uint32_t reserved_20_31:12;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg460_s cn52xx;
-	struct cvmx_pciercx_cfg460_s cn52xxp1;
-	struct cvmx_pciercx_cfg460_s cn56xx;
-	struct cvmx_pciercx_cfg460_s cn56xxp1;
-	struct cvmx_pciercx_cfg460_s cn61xx;
-	struct cvmx_pciercx_cfg460_s cn63xx;
-	struct cvmx_pciercx_cfg460_s cn63xxp1;
-	struct cvmx_pciercx_cfg460_s cn66xx;
-	struct cvmx_pciercx_cfg460_s cn68xx;
-	struct cvmx_pciercx_cfg460_s cn68xxp1;
-	struct cvmx_pciercx_cfg460_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg461 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg461_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_20_31:12;
-		uint32_t tchfcc:8;
-		uint32_t tcdfcc:12;
-#else
-		uint32_t tcdfcc:12;
-		uint32_t tchfcc:8;
-		uint32_t reserved_20_31:12;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg461_s cn52xx;
-	struct cvmx_pciercx_cfg461_s cn52xxp1;
-	struct cvmx_pciercx_cfg461_s cn56xx;
-	struct cvmx_pciercx_cfg461_s cn56xxp1;
-	struct cvmx_pciercx_cfg461_s cn61xx;
-	struct cvmx_pciercx_cfg461_s cn63xx;
-	struct cvmx_pciercx_cfg461_s cn63xxp1;
-	struct cvmx_pciercx_cfg461_s cn66xx;
-	struct cvmx_pciercx_cfg461_s cn68xx;
-	struct cvmx_pciercx_cfg461_s cn68xxp1;
-	struct cvmx_pciercx_cfg461_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg462 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg462_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_20_31:12;
-		uint32_t tchfcc:8;
-		uint32_t tcdfcc:12;
-#else
-		uint32_t tcdfcc:12;
-		uint32_t tchfcc:8;
-		uint32_t reserved_20_31:12;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg462_s cn52xx;
-	struct cvmx_pciercx_cfg462_s cn52xxp1;
-	struct cvmx_pciercx_cfg462_s cn56xx;
-	struct cvmx_pciercx_cfg462_s cn56xxp1;
-	struct cvmx_pciercx_cfg462_s cn61xx;
-	struct cvmx_pciercx_cfg462_s cn63xx;
-	struct cvmx_pciercx_cfg462_s cn63xxp1;
-	struct cvmx_pciercx_cfg462_s cn66xx;
-	struct cvmx_pciercx_cfg462_s cn68xx;
-	struct cvmx_pciercx_cfg462_s cn68xxp1;
-	struct cvmx_pciercx_cfg462_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg463 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg463_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_3_31:29;
-		uint32_t rqne:1;
-		uint32_t trbne:1;
-		uint32_t rtlpfccnr:1;
-#else
-		uint32_t rtlpfccnr:1;
-		uint32_t trbne:1;
-		uint32_t rqne:1;
-		uint32_t reserved_3_31:29;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg463_s cn52xx;
-	struct cvmx_pciercx_cfg463_s cn52xxp1;
-	struct cvmx_pciercx_cfg463_s cn56xx;
-	struct cvmx_pciercx_cfg463_s cn56xxp1;
-	struct cvmx_pciercx_cfg463_s cn61xx;
-	struct cvmx_pciercx_cfg463_s cn63xx;
-	struct cvmx_pciercx_cfg463_s cn63xxp1;
-	struct cvmx_pciercx_cfg463_s cn66xx;
-	struct cvmx_pciercx_cfg463_s cn68xx;
-	struct cvmx_pciercx_cfg463_s cn68xxp1;
-	struct cvmx_pciercx_cfg463_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg464 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg464_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t wrr_vc3:8;
-		uint32_t wrr_vc2:8;
-		uint32_t wrr_vc1:8;
-		uint32_t wrr_vc0:8;
-#else
-		uint32_t wrr_vc0:8;
-		uint32_t wrr_vc1:8;
-		uint32_t wrr_vc2:8;
-		uint32_t wrr_vc3:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg464_s cn52xx;
-	struct cvmx_pciercx_cfg464_s cn52xxp1;
-	struct cvmx_pciercx_cfg464_s cn56xx;
-	struct cvmx_pciercx_cfg464_s cn56xxp1;
-	struct cvmx_pciercx_cfg464_s cn61xx;
-	struct cvmx_pciercx_cfg464_s cn63xx;
-	struct cvmx_pciercx_cfg464_s cn63xxp1;
-	struct cvmx_pciercx_cfg464_s cn66xx;
-	struct cvmx_pciercx_cfg464_s cn68xx;
-	struct cvmx_pciercx_cfg464_s cn68xxp1;
-	struct cvmx_pciercx_cfg464_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg465 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg465_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t wrr_vc7:8;
-		uint32_t wrr_vc6:8;
-		uint32_t wrr_vc5:8;
-		uint32_t wrr_vc4:8;
-#else
-		uint32_t wrr_vc4:8;
-		uint32_t wrr_vc5:8;
-		uint32_t wrr_vc6:8;
-		uint32_t wrr_vc7:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg465_s cn52xx;
-	struct cvmx_pciercx_cfg465_s cn52xxp1;
-	struct cvmx_pciercx_cfg465_s cn56xx;
-	struct cvmx_pciercx_cfg465_s cn56xxp1;
-	struct cvmx_pciercx_cfg465_s cn61xx;
-	struct cvmx_pciercx_cfg465_s cn63xx;
-	struct cvmx_pciercx_cfg465_s cn63xxp1;
-	struct cvmx_pciercx_cfg465_s cn66xx;
-	struct cvmx_pciercx_cfg465_s cn68xx;
-	struct cvmx_pciercx_cfg465_s cn68xxp1;
-	struct cvmx_pciercx_cfg465_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg466 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg466_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t rx_queue_order:1;
-		uint32_t type_ordering:1;
-		uint32_t reserved_24_29:6;
-		uint32_t queue_mode:3;
-		uint32_t reserved_20_20:1;
-		uint32_t header_credits:8;
-		uint32_t data_credits:12;
-#else
-		uint32_t data_credits:12;
-		uint32_t header_credits:8;
-		uint32_t reserved_20_20:1;
-		uint32_t queue_mode:3;
-		uint32_t reserved_24_29:6;
-		uint32_t type_ordering:1;
-		uint32_t rx_queue_order:1;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg466_s cn52xx;
-	struct cvmx_pciercx_cfg466_s cn52xxp1;
-	struct cvmx_pciercx_cfg466_s cn56xx;
-	struct cvmx_pciercx_cfg466_s cn56xxp1;
-	struct cvmx_pciercx_cfg466_s cn61xx;
-	struct cvmx_pciercx_cfg466_s cn63xx;
-	struct cvmx_pciercx_cfg466_s cn63xxp1;
-	struct cvmx_pciercx_cfg466_s cn66xx;
-	struct cvmx_pciercx_cfg466_s cn68xx;
-	struct cvmx_pciercx_cfg466_s cn68xxp1;
-	struct cvmx_pciercx_cfg466_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg467 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg467_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_24_31:8;
-		uint32_t queue_mode:3;
-		uint32_t reserved_20_20:1;
-		uint32_t header_credits:8;
-		uint32_t data_credits:12;
-#else
-		uint32_t data_credits:12;
-		uint32_t header_credits:8;
-		uint32_t reserved_20_20:1;
-		uint32_t queue_mode:3;
-		uint32_t reserved_24_31:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg467_s cn52xx;
-	struct cvmx_pciercx_cfg467_s cn52xxp1;
-	struct cvmx_pciercx_cfg467_s cn56xx;
-	struct cvmx_pciercx_cfg467_s cn56xxp1;
-	struct cvmx_pciercx_cfg467_s cn61xx;
-	struct cvmx_pciercx_cfg467_s cn63xx;
-	struct cvmx_pciercx_cfg467_s cn63xxp1;
-	struct cvmx_pciercx_cfg467_s cn66xx;
-	struct cvmx_pciercx_cfg467_s cn68xx;
-	struct cvmx_pciercx_cfg467_s cn68xxp1;
-	struct cvmx_pciercx_cfg467_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg468 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg468_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_24_31:8;
-		uint32_t queue_mode:3;
-		uint32_t reserved_20_20:1;
-		uint32_t header_credits:8;
-		uint32_t data_credits:12;
-#else
-		uint32_t data_credits:12;
-		uint32_t header_credits:8;
-		uint32_t reserved_20_20:1;
-		uint32_t queue_mode:3;
-		uint32_t reserved_24_31:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg468_s cn52xx;
-	struct cvmx_pciercx_cfg468_s cn52xxp1;
-	struct cvmx_pciercx_cfg468_s cn56xx;
-	struct cvmx_pciercx_cfg468_s cn56xxp1;
-	struct cvmx_pciercx_cfg468_s cn61xx;
-	struct cvmx_pciercx_cfg468_s cn63xx;
-	struct cvmx_pciercx_cfg468_s cn63xxp1;
-	struct cvmx_pciercx_cfg468_s cn66xx;
-	struct cvmx_pciercx_cfg468_s cn68xx;
-	struct cvmx_pciercx_cfg468_s cn68xxp1;
-	struct cvmx_pciercx_cfg468_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg490 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg490_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_26_31:6;
-		uint32_t header_depth:10;
-		uint32_t reserved_14_15:2;
-		uint32_t data_depth:14;
-#else
-		uint32_t data_depth:14;
-		uint32_t reserved_14_15:2;
-		uint32_t header_depth:10;
-		uint32_t reserved_26_31:6;
-#endif
+		__BITFIELD_FIELD(uint32_t m_cfg0_filt:1,
+		__BITFIELD_FIELD(uint32_t m_io_filt:1,
+		__BITFIELD_FIELD(uint32_t msg_ctrl:1,
+		__BITFIELD_FIELD(uint32_t m_cpl_ecrc_filt:1,
+		__BITFIELD_FIELD(uint32_t m_ecrc_filt:1,
+		__BITFIELD_FIELD(uint32_t m_cpl_len_err:1,
+		__BITFIELD_FIELD(uint32_t m_cpl_attr_err:1,
+		__BITFIELD_FIELD(uint32_t m_cpl_tc_err:1,
+		__BITFIELD_FIELD(uint32_t m_cpl_fun_err:1,
+		__BITFIELD_FIELD(uint32_t m_cpl_rid_err:1,
+		__BITFIELD_FIELD(uint32_t m_cpl_tag_err:1,
+		__BITFIELD_FIELD(uint32_t m_lk_filt:1,
+		__BITFIELD_FIELD(uint32_t m_cfg1_filt:1,
+		__BITFIELD_FIELD(uint32_t m_bar_match:1,
+		__BITFIELD_FIELD(uint32_t m_pois_filt:1,
+		__BITFIELD_FIELD(uint32_t m_fun:1,
+		__BITFIELD_FIELD(uint32_t dfcwt:1,
+		__BITFIELD_FIELD(uint32_t reserved_11_14:4,
+		__BITFIELD_FIELD(uint32_t skpiv:11,
+		;)))))))))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg490_s cn52xx;
-	struct cvmx_pciercx_cfg490_s cn52xxp1;
-	struct cvmx_pciercx_cfg490_s cn56xx;
-	struct cvmx_pciercx_cfg490_s cn56xxp1;
-	struct cvmx_pciercx_cfg490_s cn61xx;
-	struct cvmx_pciercx_cfg490_s cn63xx;
-	struct cvmx_pciercx_cfg490_s cn63xxp1;
-	struct cvmx_pciercx_cfg490_s cn66xx;
-	struct cvmx_pciercx_cfg490_s cn68xx;
-	struct cvmx_pciercx_cfg490_s cn68xxp1;
-	struct cvmx_pciercx_cfg490_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg491 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg491_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_26_31:6;
-		uint32_t header_depth:10;
-		uint32_t reserved_14_15:2;
-		uint32_t data_depth:14;
-#else
-		uint32_t data_depth:14;
-		uint32_t reserved_14_15:2;
-		uint32_t header_depth:10;
-		uint32_t reserved_26_31:6;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg491_s cn52xx;
-	struct cvmx_pciercx_cfg491_s cn52xxp1;
-	struct cvmx_pciercx_cfg491_s cn56xx;
-	struct cvmx_pciercx_cfg491_s cn56xxp1;
-	struct cvmx_pciercx_cfg491_s cn61xx;
-	struct cvmx_pciercx_cfg491_s cn63xx;
-	struct cvmx_pciercx_cfg491_s cn63xxp1;
-	struct cvmx_pciercx_cfg491_s cn66xx;
-	struct cvmx_pciercx_cfg491_s cn68xx;
-	struct cvmx_pciercx_cfg491_s cn68xxp1;
-	struct cvmx_pciercx_cfg491_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg492 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg492_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_26_31:6;
-		uint32_t header_depth:10;
-		uint32_t reserved_14_15:2;
-		uint32_t data_depth:14;
-#else
-		uint32_t data_depth:14;
-		uint32_t reserved_14_15:2;
-		uint32_t header_depth:10;
-		uint32_t reserved_26_31:6;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg492_s cn52xx;
-	struct cvmx_pciercx_cfg492_s cn52xxp1;
-	struct cvmx_pciercx_cfg492_s cn56xx;
-	struct cvmx_pciercx_cfg492_s cn56xxp1;
-	struct cvmx_pciercx_cfg492_s cn61xx;
-	struct cvmx_pciercx_cfg492_s cn63xx;
-	struct cvmx_pciercx_cfg492_s cn63xxp1;
-	struct cvmx_pciercx_cfg492_s cn66xx;
-	struct cvmx_pciercx_cfg492_s cn68xx;
-	struct cvmx_pciercx_cfg492_s cn68xxp1;
-	struct cvmx_pciercx_cfg492_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg515 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg515_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_21_31:11;
-		uint32_t s_d_e:1;
-		uint32_t ctcrb:1;
-		uint32_t cpyts:1;
-		uint32_t dsc:1;
-		uint32_t le:9;
-		uint32_t n_fts:8;
-#else
-		uint32_t n_fts:8;
-		uint32_t le:9;
-		uint32_t dsc:1;
-		uint32_t cpyts:1;
-		uint32_t ctcrb:1;
-		uint32_t s_d_e:1;
-		uint32_t reserved_21_31:11;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg515_s cn61xx;
-	struct cvmx_pciercx_cfg515_s cn63xx;
-	struct cvmx_pciercx_cfg515_s cn63xxp1;
-	struct cvmx_pciercx_cfg515_s cn66xx;
-	struct cvmx_pciercx_cfg515_s cn68xx;
-	struct cvmx_pciercx_cfg515_s cn68xxp1;
-	struct cvmx_pciercx_cfg515_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg516 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg516_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t phy_stat:32;
-#else
-		uint32_t phy_stat:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg516_s cn52xx;
-	struct cvmx_pciercx_cfg516_s cn52xxp1;
-	struct cvmx_pciercx_cfg516_s cn56xx;
-	struct cvmx_pciercx_cfg516_s cn56xxp1;
-	struct cvmx_pciercx_cfg516_s cn61xx;
-	struct cvmx_pciercx_cfg516_s cn63xx;
-	struct cvmx_pciercx_cfg516_s cn63xxp1;
-	struct cvmx_pciercx_cfg516_s cn66xx;
-	struct cvmx_pciercx_cfg516_s cn68xx;
-	struct cvmx_pciercx_cfg516_s cn68xxp1;
-	struct cvmx_pciercx_cfg516_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg517 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg517_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t phy_ctrl:32;
-#else
-		uint32_t phy_ctrl:32;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_21_31:11,
+		__BITFIELD_FIELD(uint32_t s_d_e:1,
+		__BITFIELD_FIELD(uint32_t ctcrb:1,
+		__BITFIELD_FIELD(uint32_t cpyts:1,
+		__BITFIELD_FIELD(uint32_t dsc:1,
+		__BITFIELD_FIELD(uint32_t le:9,
+		__BITFIELD_FIELD(uint32_t n_fts:8,
+		;)))))))
 	} s;
-	struct cvmx_pciercx_cfg517_s cn52xx;
-	struct cvmx_pciercx_cfg517_s cn52xxp1;
-	struct cvmx_pciercx_cfg517_s cn56xx;
-	struct cvmx_pciercx_cfg517_s cn56xxp1;
-	struct cvmx_pciercx_cfg517_s cn61xx;
-	struct cvmx_pciercx_cfg517_s cn63xx;
-	struct cvmx_pciercx_cfg517_s cn63xxp1;
-	struct cvmx_pciercx_cfg517_s cn66xx;
-	struct cvmx_pciercx_cfg517_s cn68xx;
-	struct cvmx_pciercx_cfg517_s cn68xxp1;
-	struct cvmx_pciercx_cfg517_s cnf71xx;
 };
 
 #endif
diff --git a/arch/mips/include/asm/octeon/cvmx-sli-defs.h b/arch/mips/include/asm/octeon/cvmx-sli-defs.h
index e697c2f52a62..52cf96ea43e5 100644
--- a/arch/mips/include/asm/octeon/cvmx-sli-defs.h
+++ b/arch/mips/include/asm/octeon/cvmx-sli-defs.h
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2012 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -28,3494 +28,101 @@
 #ifndef __CVMX_SLI_DEFS_H__
 #define __CVMX_SLI_DEFS_H__
 
-#define CVMX_SLI_BIST_STATUS (0x0000000000000580ull)
-#define CVMX_SLI_CTL_PORTX(offset) (0x0000000000000050ull + ((offset) & 3) * 16)
-#define CVMX_SLI_CTL_STATUS (0x0000000000000570ull)
-#define CVMX_SLI_DATA_OUT_CNT (0x00000000000005F0ull)
-#define CVMX_SLI_DBG_DATA (0x0000000000000310ull)
-#define CVMX_SLI_DBG_SELECT (0x0000000000000300ull)
-#define CVMX_SLI_DMAX_CNT(offset) (0x0000000000000400ull + ((offset) & 1) * 16)
-#define CVMX_SLI_DMAX_INT_LEVEL(offset) (0x00000000000003E0ull + ((offset) & 1) * 16)
-#define CVMX_SLI_DMAX_TIM(offset) (0x0000000000000420ull + ((offset) & 1) * 16)
-#define CVMX_SLI_INT_ENB_CIU (0x0000000000003CD0ull)
-#define CVMX_SLI_INT_ENB_PORTX(offset) (0x0000000000000340ull + ((offset) & 1) * 16)
-#define CVMX_SLI_INT_SUM (0x0000000000000330ull)
-#define CVMX_SLI_LAST_WIN_RDATA0 (0x0000000000000600ull)
-#define CVMX_SLI_LAST_WIN_RDATA1 (0x0000000000000610ull)
-#define CVMX_SLI_LAST_WIN_RDATA2 (0x00000000000006C0ull)
-#define CVMX_SLI_LAST_WIN_RDATA3 (0x00000000000006D0ull)
-#define CVMX_SLI_MAC_CREDIT_CNT (0x0000000000003D70ull)
-#define CVMX_SLI_MAC_CREDIT_CNT2 (0x0000000000003E10ull)
-#define CVMX_SLI_MAC_NUMBER (0x0000000000003E00ull)
-#define CVMX_SLI_MEM_ACCESS_CTL (0x00000000000002F0ull)
-#define CVMX_SLI_MEM_ACCESS_SUBIDX(offset) (0x00000000000000E0ull + ((offset) & 31) * 16 - 16*12)
-#define CVMX_SLI_MSI_ENB0 (0x0000000000003C50ull)
-#define CVMX_SLI_MSI_ENB1 (0x0000000000003C60ull)
-#define CVMX_SLI_MSI_ENB2 (0x0000000000003C70ull)
-#define CVMX_SLI_MSI_ENB3 (0x0000000000003C80ull)
-#define CVMX_SLI_MSI_RCV0 (0x0000000000003C10ull)
-#define CVMX_SLI_MSI_RCV1 (0x0000000000003C20ull)
-#define CVMX_SLI_MSI_RCV2 (0x0000000000003C30ull)
-#define CVMX_SLI_MSI_RCV3 (0x0000000000003C40ull)
-#define CVMX_SLI_MSI_RD_MAP (0x0000000000003CA0ull)
-#define CVMX_SLI_MSI_W1C_ENB0 (0x0000000000003CF0ull)
-#define CVMX_SLI_MSI_W1C_ENB1 (0x0000000000003D00ull)
-#define CVMX_SLI_MSI_W1C_ENB2 (0x0000000000003D10ull)
-#define CVMX_SLI_MSI_W1C_ENB3 (0x0000000000003D20ull)
-#define CVMX_SLI_MSI_W1S_ENB0 (0x0000000000003D30ull)
-#define CVMX_SLI_MSI_W1S_ENB1 (0x0000000000003D40ull)
-#define CVMX_SLI_MSI_W1S_ENB2 (0x0000000000003D50ull)
-#define CVMX_SLI_MSI_W1S_ENB3 (0x0000000000003D60ull)
-#define CVMX_SLI_MSI_WR_MAP (0x0000000000003C90ull)
-#define CVMX_SLI_PCIE_MSI_RCV (0x0000000000003CB0ull)
-#define CVMX_SLI_PCIE_MSI_RCV_B1 (0x0000000000000650ull)
-#define CVMX_SLI_PCIE_MSI_RCV_B2 (0x0000000000000660ull)
-#define CVMX_SLI_PCIE_MSI_RCV_B3 (0x0000000000000670ull)
-#define CVMX_SLI_PKTX_CNTS(offset) (0x0000000000002400ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_INSTR_BADDR(offset) (0x0000000000002800ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_INSTR_BAOFF_DBELL(offset) (0x0000000000002C00ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_INSTR_FIFO_RSIZE(offset) (0x0000000000003000ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_INSTR_HEADER(offset) (0x0000000000003400ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_IN_BP(offset) (0x0000000000003800ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_OUT_SIZE(offset) (0x0000000000000C00ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_SLIST_BADDR(offset) (0x0000000000001400ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_SLIST_BAOFF_DBELL(offset) (0x0000000000001800ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_SLIST_FIFO_RSIZE(offset) (0x0000000000001C00ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKT_CNT_INT (0x0000000000001130ull)
-#define CVMX_SLI_PKT_CNT_INT_ENB (0x0000000000001150ull)
-#define CVMX_SLI_PKT_CTL (0x0000000000001220ull)
-#define CVMX_SLI_PKT_DATA_OUT_ES (0x00000000000010B0ull)
-#define CVMX_SLI_PKT_DATA_OUT_NS (0x00000000000010A0ull)
-#define CVMX_SLI_PKT_DATA_OUT_ROR (0x0000000000001090ull)
-#define CVMX_SLI_PKT_DPADDR (0x0000000000001080ull)
-#define CVMX_SLI_PKT_INPUT_CONTROL (0x0000000000001170ull)
-#define CVMX_SLI_PKT_INSTR_ENB (0x0000000000001000ull)
-#define CVMX_SLI_PKT_INSTR_RD_SIZE (0x00000000000011A0ull)
-#define CVMX_SLI_PKT_INSTR_SIZE (0x0000000000001020ull)
-#define CVMX_SLI_PKT_INT_LEVELS (0x0000000000001120ull)
-#define CVMX_SLI_PKT_IN_BP (0x0000000000001210ull)
-#define CVMX_SLI_PKT_IN_DONEX_CNTS(offset) (0x0000000000002000ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKT_IN_INSTR_COUNTS (0x0000000000001200ull)
-#define CVMX_SLI_PKT_IN_PCIE_PORT (0x00000000000011B0ull)
-#define CVMX_SLI_PKT_IPTR (0x0000000000001070ull)
-#define CVMX_SLI_PKT_OUTPUT_WMARK (0x0000000000001180ull)
-#define CVMX_SLI_PKT_OUT_BMODE (0x00000000000010D0ull)
-#define CVMX_SLI_PKT_OUT_BP_EN (0x0000000000001240ull)
-#define CVMX_SLI_PKT_OUT_ENB (0x0000000000001010ull)
-#define CVMX_SLI_PKT_PCIE_PORT (0x00000000000010E0ull)
-#define CVMX_SLI_PKT_PORT_IN_RST (0x00000000000011F0ull)
-#define CVMX_SLI_PKT_SLIST_ES (0x0000000000001050ull)
-#define CVMX_SLI_PKT_SLIST_NS (0x0000000000001040ull)
-#define CVMX_SLI_PKT_SLIST_ROR (0x0000000000001030ull)
-#define CVMX_SLI_PKT_TIME_INT (0x0000000000001140ull)
-#define CVMX_SLI_PKT_TIME_INT_ENB (0x0000000000001160ull)
-#define CVMX_SLI_PORTX_PKIND(offset) (0x0000000000000800ull + ((offset) & 31) * 16)
-#define CVMX_SLI_S2M_PORTX_CTL(offset) (0x0000000000003D80ull + ((offset) & 3) * 16)
-#define CVMX_SLI_SCRATCH_1 (0x00000000000003C0ull)
-#define CVMX_SLI_SCRATCH_2 (0x00000000000003D0ull)
-#define CVMX_SLI_STATE1 (0x0000000000000620ull)
-#define CVMX_SLI_STATE2 (0x0000000000000630ull)
-#define CVMX_SLI_STATE3 (0x0000000000000640ull)
-#define CVMX_SLI_TX_PIPE (0x0000000000001230ull)
-#define CVMX_SLI_WINDOW_CTL (0x00000000000002E0ull)
-#define CVMX_SLI_WIN_RD_ADDR (0x0000000000000010ull)
-#define CVMX_SLI_WIN_RD_DATA (0x0000000000000040ull)
-#define CVMX_SLI_WIN_WR_ADDR (0x0000000000000000ull)
-#define CVMX_SLI_WIN_WR_DATA (0x0000000000000020ull)
-#define CVMX_SLI_WIN_WR_MASK (0x0000000000000030ull)
+#include <uapi/asm/bitfield.h>
+
+#define CVMX_SLI_PCIE_MSI_RCV CVMX_SLI_PCIE_MSI_RCV_FUNC()
+static inline uint64_t CVMX_SLI_PCIE_MSI_RCV_FUNC(void)
+{
+	switch (cvmx_get_octeon_family()) {
+	case OCTEON_CNF71XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN61XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN63XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN66XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN68XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN70XX & OCTEON_FAMILY_MASK:
+		return 0x0000000000003CB0ull;
+	case OCTEON_CNF75XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN73XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN78XX & OCTEON_FAMILY_MASK:
+		if (OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X))
+			return 0x0000000000003CB0ull;
+	default:
+		return 0x0000000000023CB0ull;
+	}
+}
 
-union cvmx_sli_bist_status {
-	uint64_t u64;
-	struct cvmx_sli_bist_status_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t ncb_req:1;
-		uint64_t n2p0_c:1;
-		uint64_t n2p0_o:1;
-		uint64_t n2p1_c:1;
-		uint64_t n2p1_o:1;
-		uint64_t cpl_p0:1;
-		uint64_t cpl_p1:1;
-		uint64_t reserved_19_24:6;
-		uint64_t p2n0_c0:1;
-		uint64_t p2n0_c1:1;
-		uint64_t p2n0_n:1;
-		uint64_t p2n0_p0:1;
-		uint64_t p2n0_p1:1;
-		uint64_t p2n1_c0:1;
-		uint64_t p2n1_c1:1;
-		uint64_t p2n1_n:1;
-		uint64_t p2n1_p0:1;
-		uint64_t p2n1_p1:1;
-		uint64_t reserved_6_8:3;
-		uint64_t dsi1_1:1;
-		uint64_t dsi1_0:1;
-		uint64_t dsi0_1:1;
-		uint64_t dsi0_0:1;
-		uint64_t msi:1;
-		uint64_t ncb_cmd:1;
-#else
-		uint64_t ncb_cmd:1;
-		uint64_t msi:1;
-		uint64_t dsi0_0:1;
-		uint64_t dsi0_1:1;
-		uint64_t dsi1_0:1;
-		uint64_t dsi1_1:1;
-		uint64_t reserved_6_8:3;
-		uint64_t p2n1_p1:1;
-		uint64_t p2n1_p0:1;
-		uint64_t p2n1_n:1;
-		uint64_t p2n1_c1:1;
-		uint64_t p2n1_c0:1;
-		uint64_t p2n0_p1:1;
-		uint64_t p2n0_p0:1;
-		uint64_t p2n0_n:1;
-		uint64_t p2n0_c1:1;
-		uint64_t p2n0_c0:1;
-		uint64_t reserved_19_24:6;
-		uint64_t cpl_p1:1;
-		uint64_t cpl_p0:1;
-		uint64_t n2p1_o:1;
-		uint64_t n2p1_c:1;
-		uint64_t n2p0_o:1;
-		uint64_t n2p0_c:1;
-		uint64_t ncb_req:1;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_bist_status_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_31_63:33;
-		uint64_t n2p0_c:1;
-		uint64_t n2p0_o:1;
-		uint64_t reserved_27_28:2;
-		uint64_t cpl_p0:1;
-		uint64_t cpl_p1:1;
-		uint64_t reserved_19_24:6;
-		uint64_t p2n0_c0:1;
-		uint64_t p2n0_c1:1;
-		uint64_t p2n0_n:1;
-		uint64_t p2n0_p0:1;
-		uint64_t p2n0_p1:1;
-		uint64_t p2n1_c0:1;
-		uint64_t p2n1_c1:1;
-		uint64_t p2n1_n:1;
-		uint64_t p2n1_p0:1;
-		uint64_t p2n1_p1:1;
-		uint64_t reserved_6_8:3;
-		uint64_t dsi1_1:1;
-		uint64_t dsi1_0:1;
-		uint64_t dsi0_1:1;
-		uint64_t dsi0_0:1;
-		uint64_t msi:1;
-		uint64_t ncb_cmd:1;
-#else
-		uint64_t ncb_cmd:1;
-		uint64_t msi:1;
-		uint64_t dsi0_0:1;
-		uint64_t dsi0_1:1;
-		uint64_t dsi1_0:1;
-		uint64_t dsi1_1:1;
-		uint64_t reserved_6_8:3;
-		uint64_t p2n1_p1:1;
-		uint64_t p2n1_p0:1;
-		uint64_t p2n1_n:1;
-		uint64_t p2n1_c1:1;
-		uint64_t p2n1_c0:1;
-		uint64_t p2n0_p1:1;
-		uint64_t p2n0_p0:1;
-		uint64_t p2n0_n:1;
-		uint64_t p2n0_c1:1;
-		uint64_t p2n0_c0:1;
-		uint64_t reserved_19_24:6;
-		uint64_t cpl_p1:1;
-		uint64_t cpl_p0:1;
-		uint64_t reserved_27_28:2;
-		uint64_t n2p0_o:1;
-		uint64_t n2p0_c:1;
-		uint64_t reserved_31_63:33;
-#endif
-	} cn61xx;
-	struct cvmx_sli_bist_status_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_31_63:33;
-		uint64_t n2p0_c:1;
-		uint64_t n2p0_o:1;
-		uint64_t n2p1_c:1;
-		uint64_t n2p1_o:1;
-		uint64_t cpl_p0:1;
-		uint64_t cpl_p1:1;
-		uint64_t reserved_19_24:6;
-		uint64_t p2n0_c0:1;
-		uint64_t p2n0_c1:1;
-		uint64_t p2n0_n:1;
-		uint64_t p2n0_p0:1;
-		uint64_t p2n0_p1:1;
-		uint64_t p2n1_c0:1;
-		uint64_t p2n1_c1:1;
-		uint64_t p2n1_n:1;
-		uint64_t p2n1_p0:1;
-		uint64_t p2n1_p1:1;
-		uint64_t reserved_6_8:3;
-		uint64_t dsi1_1:1;
-		uint64_t dsi1_0:1;
-		uint64_t dsi0_1:1;
-		uint64_t dsi0_0:1;
-		uint64_t msi:1;
-		uint64_t ncb_cmd:1;
-#else
-		uint64_t ncb_cmd:1;
-		uint64_t msi:1;
-		uint64_t dsi0_0:1;
-		uint64_t dsi0_1:1;
-		uint64_t dsi1_0:1;
-		uint64_t dsi1_1:1;
-		uint64_t reserved_6_8:3;
-		uint64_t p2n1_p1:1;
-		uint64_t p2n1_p0:1;
-		uint64_t p2n1_n:1;
-		uint64_t p2n1_c1:1;
-		uint64_t p2n1_c0:1;
-		uint64_t p2n0_p1:1;
-		uint64_t p2n0_p0:1;
-		uint64_t p2n0_n:1;
-		uint64_t p2n0_c1:1;
-		uint64_t p2n0_c0:1;
-		uint64_t reserved_19_24:6;
-		uint64_t cpl_p1:1;
-		uint64_t cpl_p0:1;
-		uint64_t n2p1_o:1;
-		uint64_t n2p1_c:1;
-		uint64_t n2p0_o:1;
-		uint64_t n2p0_c:1;
-		uint64_t reserved_31_63:33;
-#endif
-	} cn63xx;
-	struct cvmx_sli_bist_status_cn63xx cn63xxp1;
-	struct cvmx_sli_bist_status_cn61xx cn66xx;
-	struct cvmx_sli_bist_status_s cn68xx;
-	struct cvmx_sli_bist_status_s cn68xxp1;
-	struct cvmx_sli_bist_status_cn61xx cnf71xx;
-};
 
 union cvmx_sli_ctl_portx {
 	uint64_t u64;
 	struct cvmx_sli_ctl_portx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_22_63:42;
-		uint64_t intd:1;
-		uint64_t intc:1;
-		uint64_t intb:1;
-		uint64_t inta:1;
-		uint64_t dis_port:1;
-		uint64_t waitl_com:1;
-		uint64_t intd_map:2;
-		uint64_t intc_map:2;
-		uint64_t intb_map:2;
-		uint64_t inta_map:2;
-		uint64_t ctlp_ro:1;
-		uint64_t reserved_6_6:1;
-		uint64_t ptlp_ro:1;
-		uint64_t reserved_1_4:4;
-		uint64_t wait_com:1;
-#else
-		uint64_t wait_com:1;
-		uint64_t reserved_1_4:4;
-		uint64_t ptlp_ro:1;
-		uint64_t reserved_6_6:1;
-		uint64_t ctlp_ro:1;
-		uint64_t inta_map:2;
-		uint64_t intb_map:2;
-		uint64_t intc_map:2;
-		uint64_t intd_map:2;
-		uint64_t waitl_com:1;
-		uint64_t dis_port:1;
-		uint64_t inta:1;
-		uint64_t intb:1;
-		uint64_t intc:1;
-		uint64_t intd:1;
-		uint64_t reserved_22_63:42;
-#endif
-	} s;
-	struct cvmx_sli_ctl_portx_s cn61xx;
-	struct cvmx_sli_ctl_portx_s cn63xx;
-	struct cvmx_sli_ctl_portx_s cn63xxp1;
-	struct cvmx_sli_ctl_portx_s cn66xx;
-	struct cvmx_sli_ctl_portx_s cn68xx;
-	struct cvmx_sli_ctl_portx_s cn68xxp1;
-	struct cvmx_sli_ctl_portx_s cnf71xx;
-};
-
-union cvmx_sli_ctl_status {
-	uint64_t u64;
-	struct cvmx_sli_ctl_status_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_20_63:44;
-		uint64_t p1_ntags:6;
-		uint64_t p0_ntags:6;
-		uint64_t chip_rev:8;
-#else
-		uint64_t chip_rev:8;
-		uint64_t p0_ntags:6;
-		uint64_t p1_ntags:6;
-		uint64_t reserved_20_63:44;
-#endif
-	} s;
-	struct cvmx_sli_ctl_status_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t p0_ntags:6;
-		uint64_t chip_rev:8;
-#else
-		uint64_t chip_rev:8;
-		uint64_t p0_ntags:6;
-		uint64_t reserved_14_63:50;
-#endif
-	} cn61xx;
-	struct cvmx_sli_ctl_status_s cn63xx;
-	struct cvmx_sli_ctl_status_s cn63xxp1;
-	struct cvmx_sli_ctl_status_cn61xx cn66xx;
-	struct cvmx_sli_ctl_status_s cn68xx;
-	struct cvmx_sli_ctl_status_s cn68xxp1;
-	struct cvmx_sli_ctl_status_cn61xx cnf71xx;
-};
-
-union cvmx_sli_data_out_cnt {
-	uint64_t u64;
-	struct cvmx_sli_data_out_cnt_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_44_63:20;
-		uint64_t p1_ucnt:16;
-		uint64_t p1_fcnt:6;
-		uint64_t p0_ucnt:16;
-		uint64_t p0_fcnt:6;
-#else
-		uint64_t p0_fcnt:6;
-		uint64_t p0_ucnt:16;
-		uint64_t p1_fcnt:6;
-		uint64_t p1_ucnt:16;
-		uint64_t reserved_44_63:20;
-#endif
-	} s;
-	struct cvmx_sli_data_out_cnt_s cn61xx;
-	struct cvmx_sli_data_out_cnt_s cn63xx;
-	struct cvmx_sli_data_out_cnt_s cn63xxp1;
-	struct cvmx_sli_data_out_cnt_s cn66xx;
-	struct cvmx_sli_data_out_cnt_s cn68xx;
-	struct cvmx_sli_data_out_cnt_s cn68xxp1;
-	struct cvmx_sli_data_out_cnt_s cnf71xx;
-};
-
-union cvmx_sli_dbg_data {
-	uint64_t u64;
-	struct cvmx_sli_dbg_data_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_18_63:46;
-		uint64_t dsel_ext:1;
-		uint64_t data:17;
-#else
-		uint64_t data:17;
-		uint64_t dsel_ext:1;
-		uint64_t reserved_18_63:46;
-#endif
-	} s;
-	struct cvmx_sli_dbg_data_s cn61xx;
-	struct cvmx_sli_dbg_data_s cn63xx;
-	struct cvmx_sli_dbg_data_s cn63xxp1;
-	struct cvmx_sli_dbg_data_s cn66xx;
-	struct cvmx_sli_dbg_data_s cn68xx;
-	struct cvmx_sli_dbg_data_s cn68xxp1;
-	struct cvmx_sli_dbg_data_s cnf71xx;
-};
-
-union cvmx_sli_dbg_select {
-	uint64_t u64;
-	struct cvmx_sli_dbg_select_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_33_63:31;
-		uint64_t adbg_sel:1;
-		uint64_t dbg_sel:32;
-#else
-		uint64_t dbg_sel:32;
-		uint64_t adbg_sel:1;
-		uint64_t reserved_33_63:31;
-#endif
-	} s;
-	struct cvmx_sli_dbg_select_s cn61xx;
-	struct cvmx_sli_dbg_select_s cn63xx;
-	struct cvmx_sli_dbg_select_s cn63xxp1;
-	struct cvmx_sli_dbg_select_s cn66xx;
-	struct cvmx_sli_dbg_select_s cn68xx;
-	struct cvmx_sli_dbg_select_s cn68xxp1;
-	struct cvmx_sli_dbg_select_s cnf71xx;
-};
-
-union cvmx_sli_dmax_cnt {
-	uint64_t u64;
-	struct cvmx_sli_dmax_cnt_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t cnt:32;
-#else
-		uint64_t cnt:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_dmax_cnt_s cn61xx;
-	struct cvmx_sli_dmax_cnt_s cn63xx;
-	struct cvmx_sli_dmax_cnt_s cn63xxp1;
-	struct cvmx_sli_dmax_cnt_s cn66xx;
-	struct cvmx_sli_dmax_cnt_s cn68xx;
-	struct cvmx_sli_dmax_cnt_s cn68xxp1;
-	struct cvmx_sli_dmax_cnt_s cnf71xx;
-};
-
-union cvmx_sli_dmax_int_level {
-	uint64_t u64;
-	struct cvmx_sli_dmax_int_level_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t time:32;
-		uint64_t cnt:32;
-#else
-		uint64_t cnt:32;
-		uint64_t time:32;
-#endif
-	} s;
-	struct cvmx_sli_dmax_int_level_s cn61xx;
-	struct cvmx_sli_dmax_int_level_s cn63xx;
-	struct cvmx_sli_dmax_int_level_s cn63xxp1;
-	struct cvmx_sli_dmax_int_level_s cn66xx;
-	struct cvmx_sli_dmax_int_level_s cn68xx;
-	struct cvmx_sli_dmax_int_level_s cn68xxp1;
-	struct cvmx_sli_dmax_int_level_s cnf71xx;
-};
-
-union cvmx_sli_dmax_tim {
-	uint64_t u64;
-	struct cvmx_sli_dmax_tim_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t tim:32;
-#else
-		uint64_t tim:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_dmax_tim_s cn61xx;
-	struct cvmx_sli_dmax_tim_s cn63xx;
-	struct cvmx_sli_dmax_tim_s cn63xxp1;
-	struct cvmx_sli_dmax_tim_s cn66xx;
-	struct cvmx_sli_dmax_tim_s cn68xx;
-	struct cvmx_sli_dmax_tim_s cn68xxp1;
-	struct cvmx_sli_dmax_tim_s cnf71xx;
-};
-
-union cvmx_sli_int_enb_ciu {
-	uint64_t u64;
-	struct cvmx_sli_int_enb_ciu_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_62_63:2;
-		uint64_t pipe_err:1;
-		uint64_t ill_pad:1;
-		uint64_t sprt3_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_28_31:4;
-		uint64_t m3_un_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_up_b0:1;
-		uint64_t reserved_18_19:2;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t reserved_18_19:2;
-		uint64_t m2_up_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_un_wi:1;
-		uint64_t reserved_28_31:4;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt3_err:1;
-		uint64_t ill_pad:1;
-		uint64_t pipe_err:1;
-		uint64_t reserved_62_63:2;
-#endif
-	} s;
-	struct cvmx_sli_int_enb_ciu_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_61_63:3;
-		uint64_t ill_pad:1;
-		uint64_t sprt3_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_28_31:4;
-		uint64_t m3_un_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_up_b0:1;
-		uint64_t reserved_18_19:2;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t reserved_18_19:2;
-		uint64_t m2_up_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_un_wi:1;
-		uint64_t reserved_28_31:4;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt3_err:1;
-		uint64_t ill_pad:1;
-		uint64_t reserved_61_63:3;
-#endif
-	} cn61xx;
-	struct cvmx_sli_int_enb_ciu_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_61_63:3;
-		uint64_t ill_pad:1;
-		uint64_t reserved_58_59:2;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_18_31:14;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t reserved_18_31:14;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t reserved_58_59:2;
-		uint64_t ill_pad:1;
-		uint64_t reserved_61_63:3;
-#endif
-	} cn63xx;
-	struct cvmx_sli_int_enb_ciu_cn63xx cn63xxp1;
-	struct cvmx_sli_int_enb_ciu_cn61xx cn66xx;
-	struct cvmx_sli_int_enb_ciu_cn68xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_62_63:2;
-		uint64_t pipe_err:1;
-		uint64_t ill_pad:1;
-		uint64_t reserved_58_59:2;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t reserved_51_51:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_18_31:14;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t reserved_18_31:14;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t reserved_51_51:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t reserved_58_59:2;
-		uint64_t ill_pad:1;
-		uint64_t pipe_err:1;
-		uint64_t reserved_62_63:2;
-#endif
-	} cn68xx;
-	struct cvmx_sli_int_enb_ciu_cn68xx cn68xxp1;
-	struct cvmx_sli_int_enb_ciu_cn61xx cnf71xx;
-};
-
-union cvmx_sli_int_enb_portx {
-	uint64_t u64;
-	struct cvmx_sli_int_enb_portx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_62_63:2;
-		uint64_t pipe_err:1;
-		uint64_t ill_pad:1;
-		uint64_t sprt3_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_28_31:4;
-		uint64_t m3_un_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_up_b0:1;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t m2_up_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_un_wi:1;
-		uint64_t reserved_28_31:4;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt3_err:1;
-		uint64_t ill_pad:1;
-		uint64_t pipe_err:1;
-		uint64_t reserved_62_63:2;
-#endif
-	} s;
-	struct cvmx_sli_int_enb_portx_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_61_63:3;
-		uint64_t ill_pad:1;
-		uint64_t sprt3_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_28_31:4;
-		uint64_t m3_un_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_up_b0:1;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t m2_up_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_un_wi:1;
-		uint64_t reserved_28_31:4;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt3_err:1;
-		uint64_t ill_pad:1;
-		uint64_t reserved_61_63:3;
-#endif
-	} cn61xx;
-	struct cvmx_sli_int_enb_portx_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_61_63:3;
-		uint64_t ill_pad:1;
-		uint64_t reserved_58_59:2;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_20_31:12;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t reserved_20_31:12;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t reserved_58_59:2;
-		uint64_t ill_pad:1;
-		uint64_t reserved_61_63:3;
-#endif
-	} cn63xx;
-	struct cvmx_sli_int_enb_portx_cn63xx cn63xxp1;
-	struct cvmx_sli_int_enb_portx_cn61xx cn66xx;
-	struct cvmx_sli_int_enb_portx_cn68xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_62_63:2;
-		uint64_t pipe_err:1;
-		uint64_t ill_pad:1;
-		uint64_t reserved_58_59:2;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t reserved_51_51:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_20_31:12;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t reserved_20_31:12;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t reserved_51_51:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t reserved_58_59:2;
-		uint64_t ill_pad:1;
-		uint64_t pipe_err:1;
-		uint64_t reserved_62_63:2;
-#endif
-	} cn68xx;
-	struct cvmx_sli_int_enb_portx_cn68xx cn68xxp1;
-	struct cvmx_sli_int_enb_portx_cn61xx cnf71xx;
-};
-
-union cvmx_sli_int_sum {
-	uint64_t u64;
-	struct cvmx_sli_int_sum_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_62_63:2;
-		uint64_t pipe_err:1;
-		uint64_t ill_pad:1;
-		uint64_t sprt3_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_28_31:4;
-		uint64_t m3_un_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_up_b0:1;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t m2_up_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_un_wi:1;
-		uint64_t reserved_28_31:4;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt3_err:1;
-		uint64_t ill_pad:1;
-		uint64_t pipe_err:1;
-		uint64_t reserved_62_63:2;
-#endif
-	} s;
-	struct cvmx_sli_int_sum_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_61_63:3;
-		uint64_t ill_pad:1;
-		uint64_t sprt3_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_28_31:4;
-		uint64_t m3_un_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_up_b0:1;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t m2_up_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_un_wi:1;
-		uint64_t reserved_28_31:4;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt3_err:1;
-		uint64_t ill_pad:1;
-		uint64_t reserved_61_63:3;
-#endif
-	} cn61xx;
-	struct cvmx_sli_int_sum_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_61_63:3;
-		uint64_t ill_pad:1;
-		uint64_t reserved_58_59:2;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_20_31:12;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t reserved_20_31:12;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t reserved_58_59:2;
-		uint64_t ill_pad:1;
-		uint64_t reserved_61_63:3;
-#endif
-	} cn63xx;
-	struct cvmx_sli_int_sum_cn63xx cn63xxp1;
-	struct cvmx_sli_int_sum_cn61xx cn66xx;
-	struct cvmx_sli_int_sum_cn68xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_62_63:2;
-		uint64_t pipe_err:1;
-		uint64_t ill_pad:1;
-		uint64_t reserved_58_59:2;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t reserved_51_51:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_20_31:12;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t reserved_20_31:12;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t reserved_51_51:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t reserved_58_59:2;
-		uint64_t ill_pad:1;
-		uint64_t pipe_err:1;
-		uint64_t reserved_62_63:2;
-#endif
-	} cn68xx;
-	struct cvmx_sli_int_sum_cn68xx cn68xxp1;
-	struct cvmx_sli_int_sum_cn61xx cnf71xx;
-};
-
-union cvmx_sli_last_win_rdata0 {
-	uint64_t u64;
-	struct cvmx_sli_last_win_rdata0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t data:64;
-#else
-		uint64_t data:64;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_22_63:42,
+		__BITFIELD_FIELD(uint64_t intd:1,
+		__BITFIELD_FIELD(uint64_t intc:1,
+		__BITFIELD_FIELD(uint64_t intb:1,
+		__BITFIELD_FIELD(uint64_t inta:1,
+		__BITFIELD_FIELD(uint64_t dis_port:1,
+		__BITFIELD_FIELD(uint64_t waitl_com:1,
+		__BITFIELD_FIELD(uint64_t intd_map:2,
+		__BITFIELD_FIELD(uint64_t intc_map:2,
+		__BITFIELD_FIELD(uint64_t intb_map:2,
+		__BITFIELD_FIELD(uint64_t inta_map:2,
+		__BITFIELD_FIELD(uint64_t ctlp_ro:1,
+		__BITFIELD_FIELD(uint64_t reserved_6_6:1,
+		__BITFIELD_FIELD(uint64_t ptlp_ro:1,
+		__BITFIELD_FIELD(uint64_t reserved_1_4:4,
+		__BITFIELD_FIELD(uint64_t wait_com:1,
+		;))))))))))))))))
 	} s;
-	struct cvmx_sli_last_win_rdata0_s cn61xx;
-	struct cvmx_sli_last_win_rdata0_s cn63xx;
-	struct cvmx_sli_last_win_rdata0_s cn63xxp1;
-	struct cvmx_sli_last_win_rdata0_s cn66xx;
-	struct cvmx_sli_last_win_rdata0_s cn68xx;
-	struct cvmx_sli_last_win_rdata0_s cn68xxp1;
-	struct cvmx_sli_last_win_rdata0_s cnf71xx;
-};
-
-union cvmx_sli_last_win_rdata1 {
-	uint64_t u64;
-	struct cvmx_sli_last_win_rdata1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t data:64;
-#else
-		uint64_t data:64;
-#endif
-	} s;
-	struct cvmx_sli_last_win_rdata1_s cn61xx;
-	struct cvmx_sli_last_win_rdata1_s cn63xx;
-	struct cvmx_sli_last_win_rdata1_s cn63xxp1;
-	struct cvmx_sli_last_win_rdata1_s cn66xx;
-	struct cvmx_sli_last_win_rdata1_s cn68xx;
-	struct cvmx_sli_last_win_rdata1_s cn68xxp1;
-	struct cvmx_sli_last_win_rdata1_s cnf71xx;
-};
-
-union cvmx_sli_last_win_rdata2 {
-	uint64_t u64;
-	struct cvmx_sli_last_win_rdata2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t data:64;
-#else
-		uint64_t data:64;
-#endif
-	} s;
-	struct cvmx_sli_last_win_rdata2_s cn61xx;
-	struct cvmx_sli_last_win_rdata2_s cn66xx;
-	struct cvmx_sli_last_win_rdata2_s cnf71xx;
-};
-
-union cvmx_sli_last_win_rdata3 {
-	uint64_t u64;
-	struct cvmx_sli_last_win_rdata3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t data:64;
-#else
-		uint64_t data:64;
-#endif
-	} s;
-	struct cvmx_sli_last_win_rdata3_s cn61xx;
-	struct cvmx_sli_last_win_rdata3_s cn66xx;
-	struct cvmx_sli_last_win_rdata3_s cnf71xx;
-};
-
-union cvmx_sli_mac_credit_cnt {
-	uint64_t u64;
-	struct cvmx_sli_mac_credit_cnt_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_54_63:10;
-		uint64_t p1_c_d:1;
-		uint64_t p1_n_d:1;
-		uint64_t p1_p_d:1;
-		uint64_t p0_c_d:1;
-		uint64_t p0_n_d:1;
-		uint64_t p0_p_d:1;
-		uint64_t p1_ccnt:8;
-		uint64_t p1_ncnt:8;
-		uint64_t p1_pcnt:8;
-		uint64_t p0_ccnt:8;
-		uint64_t p0_ncnt:8;
-		uint64_t p0_pcnt:8;
-#else
-		uint64_t p0_pcnt:8;
-		uint64_t p0_ncnt:8;
-		uint64_t p0_ccnt:8;
-		uint64_t p1_pcnt:8;
-		uint64_t p1_ncnt:8;
-		uint64_t p1_ccnt:8;
-		uint64_t p0_p_d:1;
-		uint64_t p0_n_d:1;
-		uint64_t p0_c_d:1;
-		uint64_t p1_p_d:1;
-		uint64_t p1_n_d:1;
-		uint64_t p1_c_d:1;
-		uint64_t reserved_54_63:10;
-#endif
-	} s;
-	struct cvmx_sli_mac_credit_cnt_s cn61xx;
-	struct cvmx_sli_mac_credit_cnt_s cn63xx;
-	struct cvmx_sli_mac_credit_cnt_cn63xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_48_63:16;
-		uint64_t p1_ccnt:8;
-		uint64_t p1_ncnt:8;
-		uint64_t p1_pcnt:8;
-		uint64_t p0_ccnt:8;
-		uint64_t p0_ncnt:8;
-		uint64_t p0_pcnt:8;
-#else
-		uint64_t p0_pcnt:8;
-		uint64_t p0_ncnt:8;
-		uint64_t p0_ccnt:8;
-		uint64_t p1_pcnt:8;
-		uint64_t p1_ncnt:8;
-		uint64_t p1_ccnt:8;
-		uint64_t reserved_48_63:16;
-#endif
-	} cn63xxp1;
-	struct cvmx_sli_mac_credit_cnt_s cn66xx;
-	struct cvmx_sli_mac_credit_cnt_s cn68xx;
-	struct cvmx_sli_mac_credit_cnt_s cn68xxp1;
-	struct cvmx_sli_mac_credit_cnt_s cnf71xx;
-};
-
-union cvmx_sli_mac_credit_cnt2 {
-	uint64_t u64;
-	struct cvmx_sli_mac_credit_cnt2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_54_63:10;
-		uint64_t p3_c_d:1;
-		uint64_t p3_n_d:1;
-		uint64_t p3_p_d:1;
-		uint64_t p2_c_d:1;
-		uint64_t p2_n_d:1;
-		uint64_t p2_p_d:1;
-		uint64_t p3_ccnt:8;
-		uint64_t p3_ncnt:8;
-		uint64_t p3_pcnt:8;
-		uint64_t p2_ccnt:8;
-		uint64_t p2_ncnt:8;
-		uint64_t p2_pcnt:8;
-#else
-		uint64_t p2_pcnt:8;
-		uint64_t p2_ncnt:8;
-		uint64_t p2_ccnt:8;
-		uint64_t p3_pcnt:8;
-		uint64_t p3_ncnt:8;
-		uint64_t p3_ccnt:8;
-		uint64_t p2_p_d:1;
-		uint64_t p2_n_d:1;
-		uint64_t p2_c_d:1;
-		uint64_t p3_p_d:1;
-		uint64_t p3_n_d:1;
-		uint64_t p3_c_d:1;
-		uint64_t reserved_54_63:10;
-#endif
-	} s;
-	struct cvmx_sli_mac_credit_cnt2_s cn61xx;
-	struct cvmx_sli_mac_credit_cnt2_s cn66xx;
-	struct cvmx_sli_mac_credit_cnt2_s cnf71xx;
-};
-
-union cvmx_sli_mac_number {
-	uint64_t u64;
-	struct cvmx_sli_mac_number_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_9_63:55;
-		uint64_t a_mode:1;
-		uint64_t num:8;
-#else
-		uint64_t num:8;
-		uint64_t a_mode:1;
-		uint64_t reserved_9_63:55;
-#endif
-	} s;
-	struct cvmx_sli_mac_number_s cn61xx;
-	struct cvmx_sli_mac_number_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_8_63:56;
-		uint64_t num:8;
-#else
-		uint64_t num:8;
-		uint64_t reserved_8_63:56;
-#endif
-	} cn63xx;
-	struct cvmx_sli_mac_number_s cn66xx;
-	struct cvmx_sli_mac_number_cn63xx cn68xx;
-	struct cvmx_sli_mac_number_cn63xx cn68xxp1;
-	struct cvmx_sli_mac_number_s cnf71xx;
 };
 
 union cvmx_sli_mem_access_ctl {
 	uint64_t u64;
 	struct cvmx_sli_mem_access_ctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t max_word:4;
-		uint64_t timer:10;
-#else
-		uint64_t timer:10;
-		uint64_t max_word:4;
-		uint64_t reserved_14_63:50;
-#endif
-	} s;
-	struct cvmx_sli_mem_access_ctl_s cn61xx;
-	struct cvmx_sli_mem_access_ctl_s cn63xx;
-	struct cvmx_sli_mem_access_ctl_s cn63xxp1;
-	struct cvmx_sli_mem_access_ctl_s cn66xx;
-	struct cvmx_sli_mem_access_ctl_s cn68xx;
-	struct cvmx_sli_mem_access_ctl_s cn68xxp1;
-	struct cvmx_sli_mem_access_ctl_s cnf71xx;
-};
-
-union cvmx_sli_mem_access_subidx {
-	uint64_t u64;
-	struct cvmx_sli_mem_access_subidx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_43_63:21;
-		uint64_t zero:1;
-		uint64_t port:3;
-		uint64_t nmerge:1;
-		uint64_t esr:2;
-		uint64_t esw:2;
-		uint64_t wtype:2;
-		uint64_t rtype:2;
-		uint64_t reserved_0_29:30;
-#else
-		uint64_t reserved_0_29:30;
-		uint64_t rtype:2;
-		uint64_t wtype:2;
-		uint64_t esw:2;
-		uint64_t esr:2;
-		uint64_t nmerge:1;
-		uint64_t port:3;
-		uint64_t zero:1;
-		uint64_t reserved_43_63:21;
-#endif
-	} s;
-	struct cvmx_sli_mem_access_subidx_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_43_63:21;
-		uint64_t zero:1;
-		uint64_t port:3;
-		uint64_t nmerge:1;
-		uint64_t esr:2;
-		uint64_t esw:2;
-		uint64_t wtype:2;
-		uint64_t rtype:2;
-		uint64_t ba:30;
-#else
-		uint64_t ba:30;
-		uint64_t rtype:2;
-		uint64_t wtype:2;
-		uint64_t esw:2;
-		uint64_t esr:2;
-		uint64_t nmerge:1;
-		uint64_t port:3;
-		uint64_t zero:1;
-		uint64_t reserved_43_63:21;
-#endif
-	} cn61xx;
-	struct cvmx_sli_mem_access_subidx_cn61xx cn63xx;
-	struct cvmx_sli_mem_access_subidx_cn61xx cn63xxp1;
-	struct cvmx_sli_mem_access_subidx_cn61xx cn66xx;
-	struct cvmx_sli_mem_access_subidx_cn68xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_43_63:21;
-		uint64_t zero:1;
-		uint64_t port:3;
-		uint64_t nmerge:1;
-		uint64_t esr:2;
-		uint64_t esw:2;
-		uint64_t wtype:2;
-		uint64_t rtype:2;
-		uint64_t ba:28;
-		uint64_t reserved_0_1:2;
-#else
-		uint64_t reserved_0_1:2;
-		uint64_t ba:28;
-		uint64_t rtype:2;
-		uint64_t wtype:2;
-		uint64_t esw:2;
-		uint64_t esr:2;
-		uint64_t nmerge:1;
-		uint64_t port:3;
-		uint64_t zero:1;
-		uint64_t reserved_43_63:21;
-#endif
-	} cn68xx;
-	struct cvmx_sli_mem_access_subidx_cn68xx cn68xxp1;
-	struct cvmx_sli_mem_access_subidx_cn61xx cnf71xx;
-};
-
-union cvmx_sli_msi_enb0 {
-	uint64_t u64;
-	struct cvmx_sli_msi_enb0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t enb:64;
-#else
-		uint64_t enb:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_enb0_s cn61xx;
-	struct cvmx_sli_msi_enb0_s cn63xx;
-	struct cvmx_sli_msi_enb0_s cn63xxp1;
-	struct cvmx_sli_msi_enb0_s cn66xx;
-	struct cvmx_sli_msi_enb0_s cn68xx;
-	struct cvmx_sli_msi_enb0_s cn68xxp1;
-	struct cvmx_sli_msi_enb0_s cnf71xx;
-};
-
-union cvmx_sli_msi_enb1 {
-	uint64_t u64;
-	struct cvmx_sli_msi_enb1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t enb:64;
-#else
-		uint64_t enb:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_enb1_s cn61xx;
-	struct cvmx_sli_msi_enb1_s cn63xx;
-	struct cvmx_sli_msi_enb1_s cn63xxp1;
-	struct cvmx_sli_msi_enb1_s cn66xx;
-	struct cvmx_sli_msi_enb1_s cn68xx;
-	struct cvmx_sli_msi_enb1_s cn68xxp1;
-	struct cvmx_sli_msi_enb1_s cnf71xx;
-};
-
-union cvmx_sli_msi_enb2 {
-	uint64_t u64;
-	struct cvmx_sli_msi_enb2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t enb:64;
-#else
-		uint64_t enb:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_enb2_s cn61xx;
-	struct cvmx_sli_msi_enb2_s cn63xx;
-	struct cvmx_sli_msi_enb2_s cn63xxp1;
-	struct cvmx_sli_msi_enb2_s cn66xx;
-	struct cvmx_sli_msi_enb2_s cn68xx;
-	struct cvmx_sli_msi_enb2_s cn68xxp1;
-	struct cvmx_sli_msi_enb2_s cnf71xx;
-};
-
-union cvmx_sli_msi_enb3 {
-	uint64_t u64;
-	struct cvmx_sli_msi_enb3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t enb:64;
-#else
-		uint64_t enb:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_enb3_s cn61xx;
-	struct cvmx_sli_msi_enb3_s cn63xx;
-	struct cvmx_sli_msi_enb3_s cn63xxp1;
-	struct cvmx_sli_msi_enb3_s cn66xx;
-	struct cvmx_sli_msi_enb3_s cn68xx;
-	struct cvmx_sli_msi_enb3_s cn68xxp1;
-	struct cvmx_sli_msi_enb3_s cnf71xx;
-};
-
-union cvmx_sli_msi_rcv0 {
-	uint64_t u64;
-	struct cvmx_sli_msi_rcv0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t intr:64;
-#else
-		uint64_t intr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_rcv0_s cn61xx;
-	struct cvmx_sli_msi_rcv0_s cn63xx;
-	struct cvmx_sli_msi_rcv0_s cn63xxp1;
-	struct cvmx_sli_msi_rcv0_s cn66xx;
-	struct cvmx_sli_msi_rcv0_s cn68xx;
-	struct cvmx_sli_msi_rcv0_s cn68xxp1;
-	struct cvmx_sli_msi_rcv0_s cnf71xx;
-};
-
-union cvmx_sli_msi_rcv1 {
-	uint64_t u64;
-	struct cvmx_sli_msi_rcv1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t intr:64;
-#else
-		uint64_t intr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_rcv1_s cn61xx;
-	struct cvmx_sli_msi_rcv1_s cn63xx;
-	struct cvmx_sli_msi_rcv1_s cn63xxp1;
-	struct cvmx_sli_msi_rcv1_s cn66xx;
-	struct cvmx_sli_msi_rcv1_s cn68xx;
-	struct cvmx_sli_msi_rcv1_s cn68xxp1;
-	struct cvmx_sli_msi_rcv1_s cnf71xx;
-};
-
-union cvmx_sli_msi_rcv2 {
-	uint64_t u64;
-	struct cvmx_sli_msi_rcv2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t intr:64;
-#else
-		uint64_t intr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_rcv2_s cn61xx;
-	struct cvmx_sli_msi_rcv2_s cn63xx;
-	struct cvmx_sli_msi_rcv2_s cn63xxp1;
-	struct cvmx_sli_msi_rcv2_s cn66xx;
-	struct cvmx_sli_msi_rcv2_s cn68xx;
-	struct cvmx_sli_msi_rcv2_s cn68xxp1;
-	struct cvmx_sli_msi_rcv2_s cnf71xx;
-};
-
-union cvmx_sli_msi_rcv3 {
-	uint64_t u64;
-	struct cvmx_sli_msi_rcv3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t intr:64;
-#else
-		uint64_t intr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_rcv3_s cn61xx;
-	struct cvmx_sli_msi_rcv3_s cn63xx;
-	struct cvmx_sli_msi_rcv3_s cn63xxp1;
-	struct cvmx_sli_msi_rcv3_s cn66xx;
-	struct cvmx_sli_msi_rcv3_s cn68xx;
-	struct cvmx_sli_msi_rcv3_s cn68xxp1;
-	struct cvmx_sli_msi_rcv3_s cnf71xx;
-};
-
-union cvmx_sli_msi_rd_map {
-	uint64_t u64;
-	struct cvmx_sli_msi_rd_map_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t rd_int:8;
-		uint64_t msi_int:8;
-#else
-		uint64_t msi_int:8;
-		uint64_t rd_int:8;
-		uint64_t reserved_16_63:48;
-#endif
-	} s;
-	struct cvmx_sli_msi_rd_map_s cn61xx;
-	struct cvmx_sli_msi_rd_map_s cn63xx;
-	struct cvmx_sli_msi_rd_map_s cn63xxp1;
-	struct cvmx_sli_msi_rd_map_s cn66xx;
-	struct cvmx_sli_msi_rd_map_s cn68xx;
-	struct cvmx_sli_msi_rd_map_s cn68xxp1;
-	struct cvmx_sli_msi_rd_map_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1c_enb0 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1c_enb0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t clr:64;
-#else
-		uint64_t clr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1c_enb0_s cn61xx;
-	struct cvmx_sli_msi_w1c_enb0_s cn63xx;
-	struct cvmx_sli_msi_w1c_enb0_s cn63xxp1;
-	struct cvmx_sli_msi_w1c_enb0_s cn66xx;
-	struct cvmx_sli_msi_w1c_enb0_s cn68xx;
-	struct cvmx_sli_msi_w1c_enb0_s cn68xxp1;
-	struct cvmx_sli_msi_w1c_enb0_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1c_enb1 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1c_enb1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t clr:64;
-#else
-		uint64_t clr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1c_enb1_s cn61xx;
-	struct cvmx_sli_msi_w1c_enb1_s cn63xx;
-	struct cvmx_sli_msi_w1c_enb1_s cn63xxp1;
-	struct cvmx_sli_msi_w1c_enb1_s cn66xx;
-	struct cvmx_sli_msi_w1c_enb1_s cn68xx;
-	struct cvmx_sli_msi_w1c_enb1_s cn68xxp1;
-	struct cvmx_sli_msi_w1c_enb1_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1c_enb2 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1c_enb2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t clr:64;
-#else
-		uint64_t clr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1c_enb2_s cn61xx;
-	struct cvmx_sli_msi_w1c_enb2_s cn63xx;
-	struct cvmx_sli_msi_w1c_enb2_s cn63xxp1;
-	struct cvmx_sli_msi_w1c_enb2_s cn66xx;
-	struct cvmx_sli_msi_w1c_enb2_s cn68xx;
-	struct cvmx_sli_msi_w1c_enb2_s cn68xxp1;
-	struct cvmx_sli_msi_w1c_enb2_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1c_enb3 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1c_enb3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t clr:64;
-#else
-		uint64_t clr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1c_enb3_s cn61xx;
-	struct cvmx_sli_msi_w1c_enb3_s cn63xx;
-	struct cvmx_sli_msi_w1c_enb3_s cn63xxp1;
-	struct cvmx_sli_msi_w1c_enb3_s cn66xx;
-	struct cvmx_sli_msi_w1c_enb3_s cn68xx;
-	struct cvmx_sli_msi_w1c_enb3_s cn68xxp1;
-	struct cvmx_sli_msi_w1c_enb3_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1s_enb0 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1s_enb0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t set:64;
-#else
-		uint64_t set:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1s_enb0_s cn61xx;
-	struct cvmx_sli_msi_w1s_enb0_s cn63xx;
-	struct cvmx_sli_msi_w1s_enb0_s cn63xxp1;
-	struct cvmx_sli_msi_w1s_enb0_s cn66xx;
-	struct cvmx_sli_msi_w1s_enb0_s cn68xx;
-	struct cvmx_sli_msi_w1s_enb0_s cn68xxp1;
-	struct cvmx_sli_msi_w1s_enb0_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1s_enb1 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1s_enb1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t set:64;
-#else
-		uint64_t set:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1s_enb1_s cn61xx;
-	struct cvmx_sli_msi_w1s_enb1_s cn63xx;
-	struct cvmx_sli_msi_w1s_enb1_s cn63xxp1;
-	struct cvmx_sli_msi_w1s_enb1_s cn66xx;
-	struct cvmx_sli_msi_w1s_enb1_s cn68xx;
-	struct cvmx_sli_msi_w1s_enb1_s cn68xxp1;
-	struct cvmx_sli_msi_w1s_enb1_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1s_enb2 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1s_enb2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t set:64;
-#else
-		uint64_t set:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1s_enb2_s cn61xx;
-	struct cvmx_sli_msi_w1s_enb2_s cn63xx;
-	struct cvmx_sli_msi_w1s_enb2_s cn63xxp1;
-	struct cvmx_sli_msi_w1s_enb2_s cn66xx;
-	struct cvmx_sli_msi_w1s_enb2_s cn68xx;
-	struct cvmx_sli_msi_w1s_enb2_s cn68xxp1;
-	struct cvmx_sli_msi_w1s_enb2_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1s_enb3 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1s_enb3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t set:64;
-#else
-		uint64_t set:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1s_enb3_s cn61xx;
-	struct cvmx_sli_msi_w1s_enb3_s cn63xx;
-	struct cvmx_sli_msi_w1s_enb3_s cn63xxp1;
-	struct cvmx_sli_msi_w1s_enb3_s cn66xx;
-	struct cvmx_sli_msi_w1s_enb3_s cn68xx;
-	struct cvmx_sli_msi_w1s_enb3_s cn68xxp1;
-	struct cvmx_sli_msi_w1s_enb3_s cnf71xx;
-};
-
-union cvmx_sli_msi_wr_map {
-	uint64_t u64;
-	struct cvmx_sli_msi_wr_map_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t ciu_int:8;
-		uint64_t msi_int:8;
-#else
-		uint64_t msi_int:8;
-		uint64_t ciu_int:8;
-		uint64_t reserved_16_63:48;
-#endif
-	} s;
-	struct cvmx_sli_msi_wr_map_s cn61xx;
-	struct cvmx_sli_msi_wr_map_s cn63xx;
-	struct cvmx_sli_msi_wr_map_s cn63xxp1;
-	struct cvmx_sli_msi_wr_map_s cn66xx;
-	struct cvmx_sli_msi_wr_map_s cn68xx;
-	struct cvmx_sli_msi_wr_map_s cn68xxp1;
-	struct cvmx_sli_msi_wr_map_s cnf71xx;
-};
-
-union cvmx_sli_pcie_msi_rcv {
-	uint64_t u64;
-	struct cvmx_sli_pcie_msi_rcv_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_8_63:56;
-		uint64_t intr:8;
-#else
-		uint64_t intr:8;
-		uint64_t reserved_8_63:56;
-#endif
-	} s;
-	struct cvmx_sli_pcie_msi_rcv_s cn61xx;
-	struct cvmx_sli_pcie_msi_rcv_s cn63xx;
-	struct cvmx_sli_pcie_msi_rcv_s cn63xxp1;
-	struct cvmx_sli_pcie_msi_rcv_s cn66xx;
-	struct cvmx_sli_pcie_msi_rcv_s cn68xx;
-	struct cvmx_sli_pcie_msi_rcv_s cn68xxp1;
-	struct cvmx_sli_pcie_msi_rcv_s cnf71xx;
-};
-
-union cvmx_sli_pcie_msi_rcv_b1 {
-	uint64_t u64;
-	struct cvmx_sli_pcie_msi_rcv_b1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t intr:8;
-		uint64_t reserved_0_7:8;
-#else
-		uint64_t reserved_0_7:8;
-		uint64_t intr:8;
-		uint64_t reserved_16_63:48;
-#endif
-	} s;
-	struct cvmx_sli_pcie_msi_rcv_b1_s cn61xx;
-	struct cvmx_sli_pcie_msi_rcv_b1_s cn63xx;
-	struct cvmx_sli_pcie_msi_rcv_b1_s cn63xxp1;
-	struct cvmx_sli_pcie_msi_rcv_b1_s cn66xx;
-	struct cvmx_sli_pcie_msi_rcv_b1_s cn68xx;
-	struct cvmx_sli_pcie_msi_rcv_b1_s cn68xxp1;
-	struct cvmx_sli_pcie_msi_rcv_b1_s cnf71xx;
-};
-
-union cvmx_sli_pcie_msi_rcv_b2 {
-	uint64_t u64;
-	struct cvmx_sli_pcie_msi_rcv_b2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_24_63:40;
-		uint64_t intr:8;
-		uint64_t reserved_0_15:16;
-#else
-		uint64_t reserved_0_15:16;
-		uint64_t intr:8;
-		uint64_t reserved_24_63:40;
-#endif
-	} s;
-	struct cvmx_sli_pcie_msi_rcv_b2_s cn61xx;
-	struct cvmx_sli_pcie_msi_rcv_b2_s cn63xx;
-	struct cvmx_sli_pcie_msi_rcv_b2_s cn63xxp1;
-	struct cvmx_sli_pcie_msi_rcv_b2_s cn66xx;
-	struct cvmx_sli_pcie_msi_rcv_b2_s cn68xx;
-	struct cvmx_sli_pcie_msi_rcv_b2_s cn68xxp1;
-	struct cvmx_sli_pcie_msi_rcv_b2_s cnf71xx;
-};
-
-union cvmx_sli_pcie_msi_rcv_b3 {
-	uint64_t u64;
-	struct cvmx_sli_pcie_msi_rcv_b3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t intr:8;
-		uint64_t reserved_0_23:24;
-#else
-		uint64_t reserved_0_23:24;
-		uint64_t intr:8;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pcie_msi_rcv_b3_s cn61xx;
-	struct cvmx_sli_pcie_msi_rcv_b3_s cn63xx;
-	struct cvmx_sli_pcie_msi_rcv_b3_s cn63xxp1;
-	struct cvmx_sli_pcie_msi_rcv_b3_s cn66xx;
-	struct cvmx_sli_pcie_msi_rcv_b3_s cn68xx;
-	struct cvmx_sli_pcie_msi_rcv_b3_s cn68xxp1;
-	struct cvmx_sli_pcie_msi_rcv_b3_s cnf71xx;
-};
-
-union cvmx_sli_pktx_cnts {
-	uint64_t u64;
-	struct cvmx_sli_pktx_cnts_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_54_63:10;
-		uint64_t timer:22;
-		uint64_t cnt:32;
-#else
-		uint64_t cnt:32;
-		uint64_t timer:22;
-		uint64_t reserved_54_63:10;
-#endif
-	} s;
-	struct cvmx_sli_pktx_cnts_s cn61xx;
-	struct cvmx_sli_pktx_cnts_s cn63xx;
-	struct cvmx_sli_pktx_cnts_s cn63xxp1;
-	struct cvmx_sli_pktx_cnts_s cn66xx;
-	struct cvmx_sli_pktx_cnts_s cn68xx;
-	struct cvmx_sli_pktx_cnts_s cn68xxp1;
-	struct cvmx_sli_pktx_cnts_s cnf71xx;
-};
-
-union cvmx_sli_pktx_in_bp {
-	uint64_t u64;
-	struct cvmx_sli_pktx_in_bp_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t wmark:32;
-		uint64_t cnt:32;
-#else
-		uint64_t cnt:32;
-		uint64_t wmark:32;
-#endif
-	} s;
-	struct cvmx_sli_pktx_in_bp_s cn61xx;
-	struct cvmx_sli_pktx_in_bp_s cn63xx;
-	struct cvmx_sli_pktx_in_bp_s cn63xxp1;
-	struct cvmx_sli_pktx_in_bp_s cn66xx;
-	struct cvmx_sli_pktx_in_bp_s cnf71xx;
-};
-
-union cvmx_sli_pktx_instr_baddr {
-	uint64_t u64;
-	struct cvmx_sli_pktx_instr_baddr_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t addr:61;
-		uint64_t reserved_0_2:3;
-#else
-		uint64_t reserved_0_2:3;
-		uint64_t addr:61;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_14_63:50,
+		__BITFIELD_FIELD(uint64_t max_word:4,
+		__BITFIELD_FIELD(uint64_t timer:10,
+		;)))
 	} s;
-	struct cvmx_sli_pktx_instr_baddr_s cn61xx;
-	struct cvmx_sli_pktx_instr_baddr_s cn63xx;
-	struct cvmx_sli_pktx_instr_baddr_s cn63xxp1;
-	struct cvmx_sli_pktx_instr_baddr_s cn66xx;
-	struct cvmx_sli_pktx_instr_baddr_s cn68xx;
-	struct cvmx_sli_pktx_instr_baddr_s cn68xxp1;
-	struct cvmx_sli_pktx_instr_baddr_s cnf71xx;
-};
-
-union cvmx_sli_pktx_instr_baoff_dbell {
-	uint64_t u64;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t aoff:32;
-		uint64_t dbell:32;
-#else
-		uint64_t dbell:32;
-		uint64_t aoff:32;
-#endif
-	} s;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s cn61xx;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s cn63xx;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s cn63xxp1;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s cn66xx;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s cn68xx;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s cn68xxp1;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s cnf71xx;
-};
-
-union cvmx_sli_pktx_instr_fifo_rsize {
-	uint64_t u64;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t max:9;
-		uint64_t rrp:9;
-		uint64_t wrp:9;
-		uint64_t fcnt:5;
-		uint64_t rsize:32;
-#else
-		uint64_t rsize:32;
-		uint64_t fcnt:5;
-		uint64_t wrp:9;
-		uint64_t rrp:9;
-		uint64_t max:9;
-#endif
-	} s;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s cn61xx;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s cn63xx;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s cn63xxp1;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s cn66xx;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s cn68xx;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s cn68xxp1;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s cnf71xx;
-};
-
-union cvmx_sli_pktx_instr_header {
-	uint64_t u64;
-	struct cvmx_sli_pktx_instr_header_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_44_63:20;
-		uint64_t pbp:1;
-		uint64_t reserved_38_42:5;
-		uint64_t rparmode:2;
-		uint64_t reserved_35_35:1;
-		uint64_t rskp_len:7;
-		uint64_t rngrpext:2;
-		uint64_t rnqos:1;
-		uint64_t rngrp:1;
-		uint64_t rntt:1;
-		uint64_t rntag:1;
-		uint64_t use_ihdr:1;
-		uint64_t reserved_16_20:5;
-		uint64_t par_mode:2;
-		uint64_t reserved_13_13:1;
-		uint64_t skp_len:7;
-		uint64_t ngrpext:2;
-		uint64_t nqos:1;
-		uint64_t ngrp:1;
-		uint64_t ntt:1;
-		uint64_t ntag:1;
-#else
-		uint64_t ntag:1;
-		uint64_t ntt:1;
-		uint64_t ngrp:1;
-		uint64_t nqos:1;
-		uint64_t ngrpext:2;
-		uint64_t skp_len:7;
-		uint64_t reserved_13_13:1;
-		uint64_t par_mode:2;
-		uint64_t reserved_16_20:5;
-		uint64_t use_ihdr:1;
-		uint64_t rntag:1;
-		uint64_t rntt:1;
-		uint64_t rngrp:1;
-		uint64_t rnqos:1;
-		uint64_t rngrpext:2;
-		uint64_t rskp_len:7;
-		uint64_t reserved_35_35:1;
-		uint64_t rparmode:2;
-		uint64_t reserved_38_42:5;
-		uint64_t pbp:1;
-		uint64_t reserved_44_63:20;
-#endif
-	} s;
-	struct cvmx_sli_pktx_instr_header_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_44_63:20;
-		uint64_t pbp:1;
-		uint64_t reserved_38_42:5;
-		uint64_t rparmode:2;
-		uint64_t reserved_35_35:1;
-		uint64_t rskp_len:7;
-		uint64_t reserved_26_27:2;
-		uint64_t rnqos:1;
-		uint64_t rngrp:1;
-		uint64_t rntt:1;
-		uint64_t rntag:1;
-		uint64_t use_ihdr:1;
-		uint64_t reserved_16_20:5;
-		uint64_t par_mode:2;
-		uint64_t reserved_13_13:1;
-		uint64_t skp_len:7;
-		uint64_t reserved_4_5:2;
-		uint64_t nqos:1;
-		uint64_t ngrp:1;
-		uint64_t ntt:1;
-		uint64_t ntag:1;
-#else
-		uint64_t ntag:1;
-		uint64_t ntt:1;
-		uint64_t ngrp:1;
-		uint64_t nqos:1;
-		uint64_t reserved_4_5:2;
-		uint64_t skp_len:7;
-		uint64_t reserved_13_13:1;
-		uint64_t par_mode:2;
-		uint64_t reserved_16_20:5;
-		uint64_t use_ihdr:1;
-		uint64_t rntag:1;
-		uint64_t rntt:1;
-		uint64_t rngrp:1;
-		uint64_t rnqos:1;
-		uint64_t reserved_26_27:2;
-		uint64_t rskp_len:7;
-		uint64_t reserved_35_35:1;
-		uint64_t rparmode:2;
-		uint64_t reserved_38_42:5;
-		uint64_t pbp:1;
-		uint64_t reserved_44_63:20;
-#endif
-	} cn61xx;
-	struct cvmx_sli_pktx_instr_header_cn61xx cn63xx;
-	struct cvmx_sli_pktx_instr_header_cn61xx cn63xxp1;
-	struct cvmx_sli_pktx_instr_header_cn61xx cn66xx;
-	struct cvmx_sli_pktx_instr_header_s cn68xx;
-	struct cvmx_sli_pktx_instr_header_cn61xx cn68xxp1;
-	struct cvmx_sli_pktx_instr_header_cn61xx cnf71xx;
-};
-
-union cvmx_sli_pktx_out_size {
-	uint64_t u64;
-	struct cvmx_sli_pktx_out_size_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_23_63:41;
-		uint64_t isize:7;
-		uint64_t bsize:16;
-#else
-		uint64_t bsize:16;
-		uint64_t isize:7;
-		uint64_t reserved_23_63:41;
-#endif
-	} s;
-	struct cvmx_sli_pktx_out_size_s cn61xx;
-	struct cvmx_sli_pktx_out_size_s cn63xx;
-	struct cvmx_sli_pktx_out_size_s cn63xxp1;
-	struct cvmx_sli_pktx_out_size_s cn66xx;
-	struct cvmx_sli_pktx_out_size_s cn68xx;
-	struct cvmx_sli_pktx_out_size_s cn68xxp1;
-	struct cvmx_sli_pktx_out_size_s cnf71xx;
-};
-
-union cvmx_sli_pktx_slist_baddr {
-	uint64_t u64;
-	struct cvmx_sli_pktx_slist_baddr_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t addr:60;
-		uint64_t reserved_0_3:4;
-#else
-		uint64_t reserved_0_3:4;
-		uint64_t addr:60;
-#endif
-	} s;
-	struct cvmx_sli_pktx_slist_baddr_s cn61xx;
-	struct cvmx_sli_pktx_slist_baddr_s cn63xx;
-	struct cvmx_sli_pktx_slist_baddr_s cn63xxp1;
-	struct cvmx_sli_pktx_slist_baddr_s cn66xx;
-	struct cvmx_sli_pktx_slist_baddr_s cn68xx;
-	struct cvmx_sli_pktx_slist_baddr_s cn68xxp1;
-	struct cvmx_sli_pktx_slist_baddr_s cnf71xx;
-};
-
-union cvmx_sli_pktx_slist_baoff_dbell {
-	uint64_t u64;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t aoff:32;
-		uint64_t dbell:32;
-#else
-		uint64_t dbell:32;
-		uint64_t aoff:32;
-#endif
-	} s;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s cn61xx;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s cn63xx;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s cn63xxp1;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s cn66xx;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s cn68xx;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s cn68xxp1;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s cnf71xx;
-};
-
-union cvmx_sli_pktx_slist_fifo_rsize {
-	uint64_t u64;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t rsize:32;
-#else
-		uint64_t rsize:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s cn61xx;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s cn63xx;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s cn63xxp1;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s cn66xx;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s cn68xx;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s cn68xxp1;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s cnf71xx;
-};
-
-union cvmx_sli_pkt_cnt_int {
-	uint64_t u64;
-	struct cvmx_sli_pkt_cnt_int_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t port:32;
-#else
-		uint64_t port:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_cnt_int_s cn61xx;
-	struct cvmx_sli_pkt_cnt_int_s cn63xx;
-	struct cvmx_sli_pkt_cnt_int_s cn63xxp1;
-	struct cvmx_sli_pkt_cnt_int_s cn66xx;
-	struct cvmx_sli_pkt_cnt_int_s cn68xx;
-	struct cvmx_sli_pkt_cnt_int_s cn68xxp1;
-	struct cvmx_sli_pkt_cnt_int_s cnf71xx;
-};
-
-union cvmx_sli_pkt_cnt_int_enb {
-	uint64_t u64;
-	struct cvmx_sli_pkt_cnt_int_enb_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t port:32;
-#else
-		uint64_t port:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_cnt_int_enb_s cn61xx;
-	struct cvmx_sli_pkt_cnt_int_enb_s cn63xx;
-	struct cvmx_sli_pkt_cnt_int_enb_s cn63xxp1;
-	struct cvmx_sli_pkt_cnt_int_enb_s cn66xx;
-	struct cvmx_sli_pkt_cnt_int_enb_s cn68xx;
-	struct cvmx_sli_pkt_cnt_int_enb_s cn68xxp1;
-	struct cvmx_sli_pkt_cnt_int_enb_s cnf71xx;
-};
-
-union cvmx_sli_pkt_ctl {
-	uint64_t u64;
-	struct cvmx_sli_pkt_ctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_5_63:59;
-		uint64_t ring_en:1;
-		uint64_t pkt_bp:4;
-#else
-		uint64_t pkt_bp:4;
-		uint64_t ring_en:1;
-		uint64_t reserved_5_63:59;
-#endif
-	} s;
-	struct cvmx_sli_pkt_ctl_s cn61xx;
-	struct cvmx_sli_pkt_ctl_s cn63xx;
-	struct cvmx_sli_pkt_ctl_s cn63xxp1;
-	struct cvmx_sli_pkt_ctl_s cn66xx;
-	struct cvmx_sli_pkt_ctl_s cn68xx;
-	struct cvmx_sli_pkt_ctl_s cn68xxp1;
-	struct cvmx_sli_pkt_ctl_s cnf71xx;
-};
-
-union cvmx_sli_pkt_data_out_es {
-	uint64_t u64;
-	struct cvmx_sli_pkt_data_out_es_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t es:64;
-#else
-		uint64_t es:64;
-#endif
-	} s;
-	struct cvmx_sli_pkt_data_out_es_s cn61xx;
-	struct cvmx_sli_pkt_data_out_es_s cn63xx;
-	struct cvmx_sli_pkt_data_out_es_s cn63xxp1;
-	struct cvmx_sli_pkt_data_out_es_s cn66xx;
-	struct cvmx_sli_pkt_data_out_es_s cn68xx;
-	struct cvmx_sli_pkt_data_out_es_s cn68xxp1;
-	struct cvmx_sli_pkt_data_out_es_s cnf71xx;
-};
-
-union cvmx_sli_pkt_data_out_ns {
-	uint64_t u64;
-	struct cvmx_sli_pkt_data_out_ns_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t nsr:32;
-#else
-		uint64_t nsr:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_data_out_ns_s cn61xx;
-	struct cvmx_sli_pkt_data_out_ns_s cn63xx;
-	struct cvmx_sli_pkt_data_out_ns_s cn63xxp1;
-	struct cvmx_sli_pkt_data_out_ns_s cn66xx;
-	struct cvmx_sli_pkt_data_out_ns_s cn68xx;
-	struct cvmx_sli_pkt_data_out_ns_s cn68xxp1;
-	struct cvmx_sli_pkt_data_out_ns_s cnf71xx;
-};
-
-union cvmx_sli_pkt_data_out_ror {
-	uint64_t u64;
-	struct cvmx_sli_pkt_data_out_ror_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t ror:32;
-#else
-		uint64_t ror:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_data_out_ror_s cn61xx;
-	struct cvmx_sli_pkt_data_out_ror_s cn63xx;
-	struct cvmx_sli_pkt_data_out_ror_s cn63xxp1;
-	struct cvmx_sli_pkt_data_out_ror_s cn66xx;
-	struct cvmx_sli_pkt_data_out_ror_s cn68xx;
-	struct cvmx_sli_pkt_data_out_ror_s cn68xxp1;
-	struct cvmx_sli_pkt_data_out_ror_s cnf71xx;
-};
-
-union cvmx_sli_pkt_dpaddr {
-	uint64_t u64;
-	struct cvmx_sli_pkt_dpaddr_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t dptr:32;
-#else
-		uint64_t dptr:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_dpaddr_s cn61xx;
-	struct cvmx_sli_pkt_dpaddr_s cn63xx;
-	struct cvmx_sli_pkt_dpaddr_s cn63xxp1;
-	struct cvmx_sli_pkt_dpaddr_s cn66xx;
-	struct cvmx_sli_pkt_dpaddr_s cn68xx;
-	struct cvmx_sli_pkt_dpaddr_s cn68xxp1;
-	struct cvmx_sli_pkt_dpaddr_s cnf71xx;
-};
-
-union cvmx_sli_pkt_in_bp {
-	uint64_t u64;
-	struct cvmx_sli_pkt_in_bp_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t bp:32;
-#else
-		uint64_t bp:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_in_bp_s cn61xx;
-	struct cvmx_sli_pkt_in_bp_s cn63xx;
-	struct cvmx_sli_pkt_in_bp_s cn63xxp1;
-	struct cvmx_sli_pkt_in_bp_s cn66xx;
-	struct cvmx_sli_pkt_in_bp_s cnf71xx;
-};
-
-union cvmx_sli_pkt_in_donex_cnts {
-	uint64_t u64;
-	struct cvmx_sli_pkt_in_donex_cnts_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t cnt:32;
-#else
-		uint64_t cnt:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_in_donex_cnts_s cn61xx;
-	struct cvmx_sli_pkt_in_donex_cnts_s cn63xx;
-	struct cvmx_sli_pkt_in_donex_cnts_s cn63xxp1;
-	struct cvmx_sli_pkt_in_donex_cnts_s cn66xx;
-	struct cvmx_sli_pkt_in_donex_cnts_s cn68xx;
-	struct cvmx_sli_pkt_in_donex_cnts_s cn68xxp1;
-	struct cvmx_sli_pkt_in_donex_cnts_s cnf71xx;
-};
-
-union cvmx_sli_pkt_in_instr_counts {
-	uint64_t u64;
-	struct cvmx_sli_pkt_in_instr_counts_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t wr_cnt:32;
-		uint64_t rd_cnt:32;
-#else
-		uint64_t rd_cnt:32;
-		uint64_t wr_cnt:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_in_instr_counts_s cn61xx;
-	struct cvmx_sli_pkt_in_instr_counts_s cn63xx;
-	struct cvmx_sli_pkt_in_instr_counts_s cn63xxp1;
-	struct cvmx_sli_pkt_in_instr_counts_s cn66xx;
-	struct cvmx_sli_pkt_in_instr_counts_s cn68xx;
-	struct cvmx_sli_pkt_in_instr_counts_s cn68xxp1;
-	struct cvmx_sli_pkt_in_instr_counts_s cnf71xx;
-};
-
-union cvmx_sli_pkt_in_pcie_port {
-	uint64_t u64;
-	struct cvmx_sli_pkt_in_pcie_port_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t pp:64;
-#else
-		uint64_t pp:64;
-#endif
-	} s;
-	struct cvmx_sli_pkt_in_pcie_port_s cn61xx;
-	struct cvmx_sli_pkt_in_pcie_port_s cn63xx;
-	struct cvmx_sli_pkt_in_pcie_port_s cn63xxp1;
-	struct cvmx_sli_pkt_in_pcie_port_s cn66xx;
-	struct cvmx_sli_pkt_in_pcie_port_s cn68xx;
-	struct cvmx_sli_pkt_in_pcie_port_s cn68xxp1;
-	struct cvmx_sli_pkt_in_pcie_port_s cnf71xx;
-};
-
-union cvmx_sli_pkt_input_control {
-	uint64_t u64;
-	struct cvmx_sli_pkt_input_control_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t prd_erst:1;
-		uint64_t prd_rds:7;
-		uint64_t gii_erst:1;
-		uint64_t gii_rds:7;
-		uint64_t reserved_41_47:7;
-		uint64_t prc_idle:1;
-		uint64_t reserved_24_39:16;
-		uint64_t pin_rst:1;
-		uint64_t pkt_rr:1;
-		uint64_t pbp_dhi:13;
-		uint64_t d_nsr:1;
-		uint64_t d_esr:2;
-		uint64_t d_ror:1;
-		uint64_t use_csr:1;
-		uint64_t nsr:1;
-		uint64_t esr:2;
-		uint64_t ror:1;
-#else
-		uint64_t ror:1;
-		uint64_t esr:2;
-		uint64_t nsr:1;
-		uint64_t use_csr:1;
-		uint64_t d_ror:1;
-		uint64_t d_esr:2;
-		uint64_t d_nsr:1;
-		uint64_t pbp_dhi:13;
-		uint64_t pkt_rr:1;
-		uint64_t pin_rst:1;
-		uint64_t reserved_24_39:16;
-		uint64_t prc_idle:1;
-		uint64_t reserved_41_47:7;
-		uint64_t gii_rds:7;
-		uint64_t gii_erst:1;
-		uint64_t prd_rds:7;
-		uint64_t prd_erst:1;
-#endif
-	} s;
-	struct cvmx_sli_pkt_input_control_s cn61xx;
-	struct cvmx_sli_pkt_input_control_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_23_63:41;
-		uint64_t pkt_rr:1;
-		uint64_t pbp_dhi:13;
-		uint64_t d_nsr:1;
-		uint64_t d_esr:2;
-		uint64_t d_ror:1;
-		uint64_t use_csr:1;
-		uint64_t nsr:1;
-		uint64_t esr:2;
-		uint64_t ror:1;
-#else
-		uint64_t ror:1;
-		uint64_t esr:2;
-		uint64_t nsr:1;
-		uint64_t use_csr:1;
-		uint64_t d_ror:1;
-		uint64_t d_esr:2;
-		uint64_t d_nsr:1;
-		uint64_t pbp_dhi:13;
-		uint64_t pkt_rr:1;
-		uint64_t reserved_23_63:41;
-#endif
-	} cn63xx;
-	struct cvmx_sli_pkt_input_control_cn63xx cn63xxp1;
-	struct cvmx_sli_pkt_input_control_s cn66xx;
-	struct cvmx_sli_pkt_input_control_s cn68xx;
-	struct cvmx_sli_pkt_input_control_s cn68xxp1;
-	struct cvmx_sli_pkt_input_control_s cnf71xx;
-};
-
-union cvmx_sli_pkt_instr_enb {
-	uint64_t u64;
-	struct cvmx_sli_pkt_instr_enb_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t enb:32;
-#else
-		uint64_t enb:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_instr_enb_s cn61xx;
-	struct cvmx_sli_pkt_instr_enb_s cn63xx;
-	struct cvmx_sli_pkt_instr_enb_s cn63xxp1;
-	struct cvmx_sli_pkt_instr_enb_s cn66xx;
-	struct cvmx_sli_pkt_instr_enb_s cn68xx;
-	struct cvmx_sli_pkt_instr_enb_s cn68xxp1;
-	struct cvmx_sli_pkt_instr_enb_s cnf71xx;
-};
-
-union cvmx_sli_pkt_instr_rd_size {
-	uint64_t u64;
-	struct cvmx_sli_pkt_instr_rd_size_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t rdsize:64;
-#else
-		uint64_t rdsize:64;
-#endif
-	} s;
-	struct cvmx_sli_pkt_instr_rd_size_s cn61xx;
-	struct cvmx_sli_pkt_instr_rd_size_s cn63xx;
-	struct cvmx_sli_pkt_instr_rd_size_s cn63xxp1;
-	struct cvmx_sli_pkt_instr_rd_size_s cn66xx;
-	struct cvmx_sli_pkt_instr_rd_size_s cn68xx;
-	struct cvmx_sli_pkt_instr_rd_size_s cn68xxp1;
-	struct cvmx_sli_pkt_instr_rd_size_s cnf71xx;
-};
-
-union cvmx_sli_pkt_instr_size {
-	uint64_t u64;
-	struct cvmx_sli_pkt_instr_size_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t is_64b:32;
-#else
-		uint64_t is_64b:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_instr_size_s cn61xx;
-	struct cvmx_sli_pkt_instr_size_s cn63xx;
-	struct cvmx_sli_pkt_instr_size_s cn63xxp1;
-	struct cvmx_sli_pkt_instr_size_s cn66xx;
-	struct cvmx_sli_pkt_instr_size_s cn68xx;
-	struct cvmx_sli_pkt_instr_size_s cn68xxp1;
-	struct cvmx_sli_pkt_instr_size_s cnf71xx;
-};
-
-union cvmx_sli_pkt_int_levels {
-	uint64_t u64;
-	struct cvmx_sli_pkt_int_levels_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_54_63:10;
-		uint64_t time:22;
-		uint64_t cnt:32;
-#else
-		uint64_t cnt:32;
-		uint64_t time:22;
-		uint64_t reserved_54_63:10;
-#endif
-	} s;
-	struct cvmx_sli_pkt_int_levels_s cn61xx;
-	struct cvmx_sli_pkt_int_levels_s cn63xx;
-	struct cvmx_sli_pkt_int_levels_s cn63xxp1;
-	struct cvmx_sli_pkt_int_levels_s cn66xx;
-	struct cvmx_sli_pkt_int_levels_s cn68xx;
-	struct cvmx_sli_pkt_int_levels_s cn68xxp1;
-	struct cvmx_sli_pkt_int_levels_s cnf71xx;
-};
-
-union cvmx_sli_pkt_iptr {
-	uint64_t u64;
-	struct cvmx_sli_pkt_iptr_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t iptr:32;
-#else
-		uint64_t iptr:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_iptr_s cn61xx;
-	struct cvmx_sli_pkt_iptr_s cn63xx;
-	struct cvmx_sli_pkt_iptr_s cn63xxp1;
-	struct cvmx_sli_pkt_iptr_s cn66xx;
-	struct cvmx_sli_pkt_iptr_s cn68xx;
-	struct cvmx_sli_pkt_iptr_s cn68xxp1;
-	struct cvmx_sli_pkt_iptr_s cnf71xx;
-};
-
-union cvmx_sli_pkt_out_bmode {
-	uint64_t u64;
-	struct cvmx_sli_pkt_out_bmode_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t bmode:32;
-#else
-		uint64_t bmode:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_out_bmode_s cn61xx;
-	struct cvmx_sli_pkt_out_bmode_s cn63xx;
-	struct cvmx_sli_pkt_out_bmode_s cn63xxp1;
-	struct cvmx_sli_pkt_out_bmode_s cn66xx;
-	struct cvmx_sli_pkt_out_bmode_s cn68xx;
-	struct cvmx_sli_pkt_out_bmode_s cn68xxp1;
-	struct cvmx_sli_pkt_out_bmode_s cnf71xx;
-};
-
-union cvmx_sli_pkt_out_bp_en {
-	uint64_t u64;
-	struct cvmx_sli_pkt_out_bp_en_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t bp_en:32;
-#else
-		uint64_t bp_en:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_out_bp_en_s cn68xx;
-	struct cvmx_sli_pkt_out_bp_en_s cn68xxp1;
-};
-
-union cvmx_sli_pkt_out_enb {
-	uint64_t u64;
-	struct cvmx_sli_pkt_out_enb_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t enb:32;
-#else
-		uint64_t enb:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_out_enb_s cn61xx;
-	struct cvmx_sli_pkt_out_enb_s cn63xx;
-	struct cvmx_sli_pkt_out_enb_s cn63xxp1;
-	struct cvmx_sli_pkt_out_enb_s cn66xx;
-	struct cvmx_sli_pkt_out_enb_s cn68xx;
-	struct cvmx_sli_pkt_out_enb_s cn68xxp1;
-	struct cvmx_sli_pkt_out_enb_s cnf71xx;
-};
-
-union cvmx_sli_pkt_output_wmark {
-	uint64_t u64;
-	struct cvmx_sli_pkt_output_wmark_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t wmark:32;
-#else
-		uint64_t wmark:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_output_wmark_s cn61xx;
-	struct cvmx_sli_pkt_output_wmark_s cn63xx;
-	struct cvmx_sli_pkt_output_wmark_s cn63xxp1;
-	struct cvmx_sli_pkt_output_wmark_s cn66xx;
-	struct cvmx_sli_pkt_output_wmark_s cn68xx;
-	struct cvmx_sli_pkt_output_wmark_s cn68xxp1;
-	struct cvmx_sli_pkt_output_wmark_s cnf71xx;
-};
-
-union cvmx_sli_pkt_pcie_port {
-	uint64_t u64;
-	struct cvmx_sli_pkt_pcie_port_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t pp:64;
-#else
-		uint64_t pp:64;
-#endif
-	} s;
-	struct cvmx_sli_pkt_pcie_port_s cn61xx;
-	struct cvmx_sli_pkt_pcie_port_s cn63xx;
-	struct cvmx_sli_pkt_pcie_port_s cn63xxp1;
-	struct cvmx_sli_pkt_pcie_port_s cn66xx;
-	struct cvmx_sli_pkt_pcie_port_s cn68xx;
-	struct cvmx_sli_pkt_pcie_port_s cn68xxp1;
-	struct cvmx_sli_pkt_pcie_port_s cnf71xx;
-};
-
-union cvmx_sli_pkt_port_in_rst {
-	uint64_t u64;
-	struct cvmx_sli_pkt_port_in_rst_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t in_rst:32;
-		uint64_t out_rst:32;
-#else
-		uint64_t out_rst:32;
-		uint64_t in_rst:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_port_in_rst_s cn61xx;
-	struct cvmx_sli_pkt_port_in_rst_s cn63xx;
-	struct cvmx_sli_pkt_port_in_rst_s cn63xxp1;
-	struct cvmx_sli_pkt_port_in_rst_s cn66xx;
-	struct cvmx_sli_pkt_port_in_rst_s cn68xx;
-	struct cvmx_sli_pkt_port_in_rst_s cn68xxp1;
-	struct cvmx_sli_pkt_port_in_rst_s cnf71xx;
-};
-
-union cvmx_sli_pkt_slist_es {
-	uint64_t u64;
-	struct cvmx_sli_pkt_slist_es_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t es:64;
-#else
-		uint64_t es:64;
-#endif
-	} s;
-	struct cvmx_sli_pkt_slist_es_s cn61xx;
-	struct cvmx_sli_pkt_slist_es_s cn63xx;
-	struct cvmx_sli_pkt_slist_es_s cn63xxp1;
-	struct cvmx_sli_pkt_slist_es_s cn66xx;
-	struct cvmx_sli_pkt_slist_es_s cn68xx;
-	struct cvmx_sli_pkt_slist_es_s cn68xxp1;
-	struct cvmx_sli_pkt_slist_es_s cnf71xx;
-};
-
-union cvmx_sli_pkt_slist_ns {
-	uint64_t u64;
-	struct cvmx_sli_pkt_slist_ns_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t nsr:32;
-#else
-		uint64_t nsr:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_slist_ns_s cn61xx;
-	struct cvmx_sli_pkt_slist_ns_s cn63xx;
-	struct cvmx_sli_pkt_slist_ns_s cn63xxp1;
-	struct cvmx_sli_pkt_slist_ns_s cn66xx;
-	struct cvmx_sli_pkt_slist_ns_s cn68xx;
-	struct cvmx_sli_pkt_slist_ns_s cn68xxp1;
-	struct cvmx_sli_pkt_slist_ns_s cnf71xx;
-};
-
-union cvmx_sli_pkt_slist_ror {
-	uint64_t u64;
-	struct cvmx_sli_pkt_slist_ror_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t ror:32;
-#else
-		uint64_t ror:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_slist_ror_s cn61xx;
-	struct cvmx_sli_pkt_slist_ror_s cn63xx;
-	struct cvmx_sli_pkt_slist_ror_s cn63xxp1;
-	struct cvmx_sli_pkt_slist_ror_s cn66xx;
-	struct cvmx_sli_pkt_slist_ror_s cn68xx;
-	struct cvmx_sli_pkt_slist_ror_s cn68xxp1;
-	struct cvmx_sli_pkt_slist_ror_s cnf71xx;
-};
-
-union cvmx_sli_pkt_time_int {
-	uint64_t u64;
-	struct cvmx_sli_pkt_time_int_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t port:32;
-#else
-		uint64_t port:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_time_int_s cn61xx;
-	struct cvmx_sli_pkt_time_int_s cn63xx;
-	struct cvmx_sli_pkt_time_int_s cn63xxp1;
-	struct cvmx_sli_pkt_time_int_s cn66xx;
-	struct cvmx_sli_pkt_time_int_s cn68xx;
-	struct cvmx_sli_pkt_time_int_s cn68xxp1;
-	struct cvmx_sli_pkt_time_int_s cnf71xx;
-};
-
-union cvmx_sli_pkt_time_int_enb {
-	uint64_t u64;
-	struct cvmx_sli_pkt_time_int_enb_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t port:32;
-#else
-		uint64_t port:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_time_int_enb_s cn61xx;
-	struct cvmx_sli_pkt_time_int_enb_s cn63xx;
-	struct cvmx_sli_pkt_time_int_enb_s cn63xxp1;
-	struct cvmx_sli_pkt_time_int_enb_s cn66xx;
-	struct cvmx_sli_pkt_time_int_enb_s cn68xx;
-	struct cvmx_sli_pkt_time_int_enb_s cn68xxp1;
-	struct cvmx_sli_pkt_time_int_enb_s cnf71xx;
-};
-
-union cvmx_sli_portx_pkind {
-	uint64_t u64;
-	struct cvmx_sli_portx_pkind_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_25_63:39;
-		uint64_t rpk_enb:1;
-		uint64_t reserved_22_23:2;
-		uint64_t pkindr:6;
-		uint64_t reserved_14_15:2;
-		uint64_t bpkind:6;
-		uint64_t reserved_6_7:2;
-		uint64_t pkind:6;
-#else
-		uint64_t pkind:6;
-		uint64_t reserved_6_7:2;
-		uint64_t bpkind:6;
-		uint64_t reserved_14_15:2;
-		uint64_t pkindr:6;
-		uint64_t reserved_22_23:2;
-		uint64_t rpk_enb:1;
-		uint64_t reserved_25_63:39;
-#endif
-	} s;
-	struct cvmx_sli_portx_pkind_s cn68xx;
-	struct cvmx_sli_portx_pkind_cn68xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t bpkind:6;
-		uint64_t reserved_6_7:2;
-		uint64_t pkind:6;
-#else
-		uint64_t pkind:6;
-		uint64_t reserved_6_7:2;
-		uint64_t bpkind:6;
-		uint64_t reserved_14_63:50;
-#endif
-	} cn68xxp1;
 };
 
 union cvmx_sli_s2m_portx_ctl {
 	uint64_t u64;
 	struct cvmx_sli_s2m_portx_ctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_5_63:59;
-		uint64_t wind_d:1;
-		uint64_t bar0_d:1;
-		uint64_t mrrs:3;
-#else
-		uint64_t mrrs:3;
-		uint64_t bar0_d:1;
-		uint64_t wind_d:1;
-		uint64_t reserved_5_63:59;
-#endif
-	} s;
-	struct cvmx_sli_s2m_portx_ctl_s cn61xx;
-	struct cvmx_sli_s2m_portx_ctl_s cn63xx;
-	struct cvmx_sli_s2m_portx_ctl_s cn63xxp1;
-	struct cvmx_sli_s2m_portx_ctl_s cn66xx;
-	struct cvmx_sli_s2m_portx_ctl_s cn68xx;
-	struct cvmx_sli_s2m_portx_ctl_s cn68xxp1;
-	struct cvmx_sli_s2m_portx_ctl_s cnf71xx;
-};
-
-union cvmx_sli_scratch_1 {
-	uint64_t u64;
-	struct cvmx_sli_scratch_1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t data:64;
-#else
-		uint64_t data:64;
-#endif
-	} s;
-	struct cvmx_sli_scratch_1_s cn61xx;
-	struct cvmx_sli_scratch_1_s cn63xx;
-	struct cvmx_sli_scratch_1_s cn63xxp1;
-	struct cvmx_sli_scratch_1_s cn66xx;
-	struct cvmx_sli_scratch_1_s cn68xx;
-	struct cvmx_sli_scratch_1_s cn68xxp1;
-	struct cvmx_sli_scratch_1_s cnf71xx;
-};
-
-union cvmx_sli_scratch_2 {
-	uint64_t u64;
-	struct cvmx_sli_scratch_2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t data:64;
-#else
-		uint64_t data:64;
-#endif
-	} s;
-	struct cvmx_sli_scratch_2_s cn61xx;
-	struct cvmx_sli_scratch_2_s cn63xx;
-	struct cvmx_sli_scratch_2_s cn63xxp1;
-	struct cvmx_sli_scratch_2_s cn66xx;
-	struct cvmx_sli_scratch_2_s cn68xx;
-	struct cvmx_sli_scratch_2_s cn68xxp1;
-	struct cvmx_sli_scratch_2_s cnf71xx;
-};
-
-union cvmx_sli_state1 {
-	uint64_t u64;
-	struct cvmx_sli_state1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t cpl1:12;
-		uint64_t cpl0:12;
-		uint64_t arb:1;
-		uint64_t csr:39;
-#else
-		uint64_t csr:39;
-		uint64_t arb:1;
-		uint64_t cpl0:12;
-		uint64_t cpl1:12;
-#endif
-	} s;
-	struct cvmx_sli_state1_s cn61xx;
-	struct cvmx_sli_state1_s cn63xx;
-	struct cvmx_sli_state1_s cn63xxp1;
-	struct cvmx_sli_state1_s cn66xx;
-	struct cvmx_sli_state1_s cn68xx;
-	struct cvmx_sli_state1_s cn68xxp1;
-	struct cvmx_sli_state1_s cnf71xx;
-};
-
-union cvmx_sli_state2 {
-	uint64_t u64;
-	struct cvmx_sli_state2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_56_63:8;
-		uint64_t nnp1:8;
-		uint64_t reserved_47_47:1;
-		uint64_t rac:1;
-		uint64_t csm1:15;
-		uint64_t csm0:15;
-		uint64_t nnp0:8;
-		uint64_t nnd:8;
-#else
-		uint64_t nnd:8;
-		uint64_t nnp0:8;
-		uint64_t csm0:15;
-		uint64_t csm1:15;
-		uint64_t rac:1;
-		uint64_t reserved_47_47:1;
-		uint64_t nnp1:8;
-		uint64_t reserved_56_63:8;
-#endif
-	} s;
-	struct cvmx_sli_state2_s cn61xx;
-	struct cvmx_sli_state2_s cn63xx;
-	struct cvmx_sli_state2_s cn63xxp1;
-	struct cvmx_sli_state2_s cn66xx;
-	struct cvmx_sli_state2_s cn68xx;
-	struct cvmx_sli_state2_s cn68xxp1;
-	struct cvmx_sli_state2_s cnf71xx;
-};
-
-union cvmx_sli_state3 {
-	uint64_t u64;
-	struct cvmx_sli_state3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_56_63:8;
-		uint64_t psm1:15;
-		uint64_t psm0:15;
-		uint64_t nsm1:13;
-		uint64_t nsm0:13;
-#else
-		uint64_t nsm0:13;
-		uint64_t nsm1:13;
-		uint64_t psm0:15;
-		uint64_t psm1:15;
-		uint64_t reserved_56_63:8;
-#endif
-	} s;
-	struct cvmx_sli_state3_s cn61xx;
-	struct cvmx_sli_state3_s cn63xx;
-	struct cvmx_sli_state3_s cn63xxp1;
-	struct cvmx_sli_state3_s cn66xx;
-	struct cvmx_sli_state3_s cn68xx;
-	struct cvmx_sli_state3_s cn68xxp1;
-	struct cvmx_sli_state3_s cnf71xx;
-};
-
-union cvmx_sli_tx_pipe {
-	uint64_t u64;
-	struct cvmx_sli_tx_pipe_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_24_63:40;
-		uint64_t nump:8;
-		uint64_t reserved_7_15:9;
-		uint64_t base:7;
-#else
-		uint64_t base:7;
-		uint64_t reserved_7_15:9;
-		uint64_t nump:8;
-		uint64_t reserved_24_63:40;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_5_63:59,
+		__BITFIELD_FIELD(uint64_t wind_d:1,
+		__BITFIELD_FIELD(uint64_t bar0_d:1,
+		__BITFIELD_FIELD(uint64_t mrrs:3,
+		;))))
 	} s;
-	struct cvmx_sli_tx_pipe_s cn68xx;
-	struct cvmx_sli_tx_pipe_s cn68xxp1;
 };
 
-union cvmx_sli_win_rd_addr {
-	uint64_t u64;
-	struct cvmx_sli_win_rd_addr_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_51_63:13;
-		uint64_t ld_cmd:2;
-		uint64_t iobit:1;
-		uint64_t rd_addr:48;
-#else
-		uint64_t rd_addr:48;
-		uint64_t iobit:1;
-		uint64_t ld_cmd:2;
-		uint64_t reserved_51_63:13;
-#endif
-	} s;
-	struct cvmx_sli_win_rd_addr_s cn61xx;
-	struct cvmx_sli_win_rd_addr_s cn63xx;
-	struct cvmx_sli_win_rd_addr_s cn63xxp1;
-	struct cvmx_sli_win_rd_addr_s cn66xx;
-	struct cvmx_sli_win_rd_addr_s cn68xx;
-	struct cvmx_sli_win_rd_addr_s cn68xxp1;
-	struct cvmx_sli_win_rd_addr_s cnf71xx;
-};
-
-union cvmx_sli_win_rd_data {
-	uint64_t u64;
-	struct cvmx_sli_win_rd_data_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t rd_data:64;
-#else
-		uint64_t rd_data:64;
-#endif
-	} s;
-	struct cvmx_sli_win_rd_data_s cn61xx;
-	struct cvmx_sli_win_rd_data_s cn63xx;
-	struct cvmx_sli_win_rd_data_s cn63xxp1;
-	struct cvmx_sli_win_rd_data_s cn66xx;
-	struct cvmx_sli_win_rd_data_s cn68xx;
-	struct cvmx_sli_win_rd_data_s cn68xxp1;
-	struct cvmx_sli_win_rd_data_s cnf71xx;
-};
-
-union cvmx_sli_win_wr_addr {
-	uint64_t u64;
-	struct cvmx_sli_win_wr_addr_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_49_63:15;
-		uint64_t iobit:1;
-		uint64_t wr_addr:45;
-		uint64_t reserved_0_2:3;
-#else
-		uint64_t reserved_0_2:3;
-		uint64_t wr_addr:45;
-		uint64_t iobit:1;
-		uint64_t reserved_49_63:15;
-#endif
-	} s;
-	struct cvmx_sli_win_wr_addr_s cn61xx;
-	struct cvmx_sli_win_wr_addr_s cn63xx;
-	struct cvmx_sli_win_wr_addr_s cn63xxp1;
-	struct cvmx_sli_win_wr_addr_s cn66xx;
-	struct cvmx_sli_win_wr_addr_s cn68xx;
-	struct cvmx_sli_win_wr_addr_s cn68xxp1;
-	struct cvmx_sli_win_wr_addr_s cnf71xx;
-};
-
-union cvmx_sli_win_wr_data {
-	uint64_t u64;
-	struct cvmx_sli_win_wr_data_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t wr_data:64;
-#else
-		uint64_t wr_data:64;
-#endif
-	} s;
-	struct cvmx_sli_win_wr_data_s cn61xx;
-	struct cvmx_sli_win_wr_data_s cn63xx;
-	struct cvmx_sli_win_wr_data_s cn63xxp1;
-	struct cvmx_sli_win_wr_data_s cn66xx;
-	struct cvmx_sli_win_wr_data_s cn68xx;
-	struct cvmx_sli_win_wr_data_s cn68xxp1;
-	struct cvmx_sli_win_wr_data_s cnf71xx;
-};
-
-union cvmx_sli_win_wr_mask {
-	uint64_t u64;
-	struct cvmx_sli_win_wr_mask_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_8_63:56;
-		uint64_t wr_mask:8;
-#else
-		uint64_t wr_mask:8;
-		uint64_t reserved_8_63:56;
-#endif
-	} s;
-	struct cvmx_sli_win_wr_mask_s cn61xx;
-	struct cvmx_sli_win_wr_mask_s cn63xx;
-	struct cvmx_sli_win_wr_mask_s cn63xxp1;
-	struct cvmx_sli_win_wr_mask_s cn66xx;
-	struct cvmx_sli_win_wr_mask_s cn68xx;
-	struct cvmx_sli_win_wr_mask_s cn68xxp1;
-	struct cvmx_sli_win_wr_mask_s cnf71xx;
-};
-
-union cvmx_sli_window_ctl {
+union cvmx_sli_mem_access_subidx {
 	uint64_t u64;
-	struct cvmx_sli_window_ctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t time:32;
-#else
-		uint64_t time:32;
-		uint64_t reserved_32_63:32;
-#endif
+	struct cvmx_sli_mem_access_subidx_s {
+		__BITFIELD_FIELD(uint64_t reserved_43_63:21,
+		__BITFIELD_FIELD(uint64_t zero:1,
+		__BITFIELD_FIELD(uint64_t port:3,
+		__BITFIELD_FIELD(uint64_t nmerge:1,
+		__BITFIELD_FIELD(uint64_t esr:2,
+		__BITFIELD_FIELD(uint64_t esw:2,
+		__BITFIELD_FIELD(uint64_t wtype:2,
+		__BITFIELD_FIELD(uint64_t rtype:2,
+		__BITFIELD_FIELD(uint64_t ba:30,
+		;)))))))))
 	} s;
-	struct cvmx_sli_window_ctl_s cn61xx;
-	struct cvmx_sli_window_ctl_s cn63xx;
-	struct cvmx_sli_window_ctl_s cn63xxp1;
-	struct cvmx_sli_window_ctl_s cn66xx;
-	struct cvmx_sli_window_ctl_s cn68xx;
-	struct cvmx_sli_window_ctl_s cn68xxp1;
-	struct cvmx_sli_window_ctl_s cnf71xx;
+	struct cvmx_sli_mem_access_subidx_cn68xx {
+		__BITFIELD_FIELD(uint64_t reserved_43_63:21,
+		__BITFIELD_FIELD(uint64_t zero:1,
+		__BITFIELD_FIELD(uint64_t port:3,
+		__BITFIELD_FIELD(uint64_t nmerge:1,
+		__BITFIELD_FIELD(uint64_t esr:2,
+		__BITFIELD_FIELD(uint64_t esw:2,
+		__BITFIELD_FIELD(uint64_t wtype:2,
+		__BITFIELD_FIELD(uint64_t rtype:2,
+		__BITFIELD_FIELD(uint64_t ba:28,
+		__BITFIELD_FIELD(uint64_t reserved_0_1:2,
+		;))))))))))
+	} cn68xx;
 };
 
 #endif
diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h
index 2530e8731c8a..9742202f2a32 100644
--- a/arch/mips/include/asm/octeon/cvmx.h
+++ b/arch/mips/include/asm/octeon/cvmx.h
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2008 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -62,7 +62,6 @@ enum cvmx_mips_space {
 #include <asm/octeon/cvmx-iob-defs.h>
 #include <asm/octeon/cvmx-ipd-defs.h>
 #include <asm/octeon/cvmx-l2c-defs.h>
-#include <asm/octeon/cvmx-l2d-defs.h>
 #include <asm/octeon/cvmx-l2t-defs.h>
 #include <asm/octeon/cvmx-led-defs.h>
 #include <asm/octeon/cvmx-mio-defs.h>
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index a8705f6c8180..a1bdb1ea5234 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -110,6 +110,32 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #endif
 
+#ifndef __PAGETABLE_PUD_FOLDED
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	pud_t *pud;
+
+	pud = (pud_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PUD_ORDER);
+	if (pud)
+		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
+	return pud;
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	free_pages((unsigned long)pud, PUD_ORDER);
+}
+
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+	set_pgd(pgd, __pgd((unsigned long)pud));
+}
+
+#define __pud_free_tlb(tlb, x, addr)	pud_free((tlb)->mm, x)
+
+#endif /* __PAGETABLE_PUD_FOLDED */
+
 #define check_pgt_cache()	do { } while (0)
 
 extern void pagetable_init(void);
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index 6f94bed571c4..74afe8c76bdd 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -19,6 +19,10 @@
 #define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
+#ifdef CONFIG_HIGHMEM
+#include <asm/highmem.h>
+#endif
+
 extern int temp_tlb_entry;
 
 /*
@@ -62,7 +66,8 @@ extern int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1,
 
 #define VMALLOC_START	  MAP_BASE
 
-#define PKMAP_BASE		(0xfe000000UL)
+#define PKMAP_END	((FIXADDR_START) & ~((LAST_PKMAP << PAGE_SHIFT)-1))
+#define PKMAP_BASE	(PKMAP_END - PAGE_SIZE * LAST_PKMAP)
 
 #ifdef CONFIG_HIGHMEM
 # define VMALLOC_END	(PKMAP_BASE-2*PAGE_SIZE)
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 130a2a6c1531..67fe6dc5211c 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -20,7 +20,7 @@
 #define __ARCH_USE_5LEVEL_HACK
 #if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48)
 #include <asm-generic/pgtable-nopmd.h>
-#else
+#elif !(defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_MIPS_VA_BITS_48))
 #include <asm-generic/pgtable-nopud.h>
 #endif
 
@@ -54,9 +54,18 @@
 #define PMD_SIZE	(1UL << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE-1))
 
+# ifdef __PAGETABLE_PUD_FOLDED
+# define PGDIR_SHIFT	(PMD_SHIFT + (PAGE_SHIFT + PMD_ORDER - 3))
+# endif
+#endif
 
-#define PGDIR_SHIFT	(PMD_SHIFT + (PAGE_SHIFT + PMD_ORDER - 3))
+#ifndef __PAGETABLE_PUD_FOLDED
+#define PUD_SHIFT	(PMD_SHIFT + (PAGE_SHIFT + PMD_ORDER - 3))
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+#define PGDIR_SHIFT	(PUD_SHIFT + (PAGE_SHIFT + PUD_ORDER - 3))
 #endif
+
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
@@ -79,8 +88,13 @@
  * of virtual address space.
  */
 #ifdef CONFIG_PAGE_SIZE_4KB
-#define PGD_ORDER		1
-#define PUD_ORDER		aieeee_attempt_to_allocate_pud
+# ifdef CONFIG_MIPS_VA_BITS_48
+#  define PGD_ORDER		0
+#  define PUD_ORDER		0
+# else
+#  define PGD_ORDER		1
+#  define PUD_ORDER		aieeee_attempt_to_allocate_pud
+# endif
 #define PMD_ORDER		0
 #define PTE_ORDER		0
 #endif
@@ -118,6 +132,9 @@
 #endif
 
 #define PTRS_PER_PGD	((PAGE_SIZE << PGD_ORDER) / sizeof(pgd_t))
+#ifndef __PAGETABLE_PUD_FOLDED
+#define PTRS_PER_PUD	((PAGE_SIZE << PUD_ORDER) / sizeof(pud_t))
+#endif
 #ifndef __PAGETABLE_PMD_FOLDED
 #define PTRS_PER_PMD	((PAGE_SIZE << PMD_ORDER) / sizeof(pmd_t))
 #endif
@@ -134,7 +151,7 @@
 #define VMALLOC_START		(MAP_BASE + (2 * PAGE_SIZE))
 #define VMALLOC_END	\
 	(MAP_BASE + \
-	 min(PTRS_PER_PGD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, \
+	 min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, \
 	     (1UL << cpu_vmbits)) - (1UL << 32))
 
 #if defined(CONFIG_MODULES) && defined(KBUILD_64BIT_SYM32) && \
@@ -150,12 +167,72 @@
 #define pmd_ERROR(e) \
 	printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
 #endif
+#ifndef __PAGETABLE_PUD_FOLDED
+#define pud_ERROR(e) \
+	printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+#endif
 #define pgd_ERROR(e) \
 	printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))
 
 extern pte_t invalid_pte_table[PTRS_PER_PTE];
 extern pte_t empty_bad_page_table[PTRS_PER_PTE];
 
+#ifndef __PAGETABLE_PUD_FOLDED
+/*
+ * For 4-level pagetables we defines these ourselves, for 3-level the
+ * definitions are below, for 2-level the
+ * definitions are supplied by <asm-generic/pgtable-nopmd.h>.
+ */
+typedef struct { unsigned long pud; } pud_t;
+#define pud_val(x)	((x).pud)
+#define __pud(x)	((pud_t) { (x) })
+
+extern pud_t invalid_pud_table[PTRS_PER_PUD];
+
+/*
+ * Empty pgd entries point to the invalid_pud_table.
+ */
+static inline int pgd_none(pgd_t pgd)
+{
+	return pgd_val(pgd) == (unsigned long)invalid_pud_table;
+}
+
+static inline int pgd_bad(pgd_t pgd)
+{
+	if (unlikely(pgd_val(pgd) & ~PAGE_MASK))
+		return 1;
+
+	return 0;
+}
+
+static inline int pgd_present(pgd_t pgd)
+{
+	return pgd_val(pgd) != (unsigned long)invalid_pud_table;
+}
+
+static inline void pgd_clear(pgd_t *pgdp)
+{
+	pgd_val(*pgdp) = (unsigned long)invalid_pud_table;
+}
+
+#define pud_index(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+{
+	return pgd_val(pgd);
+}
+
+static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+{
+	return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
+}
+
+static inline void set_pgd(pgd_t *pgd, pgd_t pgdval)
+{
+	*pgd = pgdval;
+}
+
+#endif
 
 #ifndef __PAGETABLE_PMD_FOLDED
 /*
@@ -281,6 +358,7 @@ static inline pmd_t *pmd_offset(pud_t * pud, unsigned long address)
  * Initialize a new pgd / pmd table with invalid pointers.
  */
 extern void pgd_init(unsigned long page);
+extern void pud_init(unsigned long page, unsigned long pagetable);
 extern void pmd_init(unsigned long page, unsigned long pagetable);
 
 /*
diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h
index e9a9e2ade1d2..3748f4d120a5 100644
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -21,77 +21,46 @@
 #define UASM_EXPORT_SYMBOL(sym)
 #endif
 
-#define _UASM_ISA_CLASSIC	0
-#define _UASM_ISA_MICROMIPS	1
-
-#ifndef UASM_ISA
-#ifdef CONFIG_CPU_MICROMIPS
-#define UASM_ISA	_UASM_ISA_MICROMIPS
-#else
-#define UASM_ISA	_UASM_ISA_CLASSIC
-#endif
-#endif
-
-#if (UASM_ISA == _UASM_ISA_CLASSIC)
-#ifdef CONFIG_CPU_MICROMIPS
-#define ISAOPC(op)	CL_uasm_i##op
-#define ISAFUNC(x)	CL_##x
-#else
-#define ISAOPC(op)	uasm_i##op
-#define ISAFUNC(x)	x
-#endif
-#elif (UASM_ISA == _UASM_ISA_MICROMIPS)
-#ifdef CONFIG_CPU_MICROMIPS
-#define ISAOPC(op)	uasm_i##op
-#define ISAFUNC(x)	x
-#else
-#define ISAOPC(op)	MM_uasm_i##op
-#define ISAFUNC(x)	MM_##x
-#endif
-#else
-#error Unsupported micro-assembler ISA!!!
-#endif
-
 #define Ip_u1u2u3(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
 
 #define Ip_u2u1u3(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
 
 #define Ip_u3u2u1(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
 
 #define Ip_u3u1u2(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
 
 #define Ip_u1u2s3(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, signed int c)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b, signed int c)
 
 #define Ip_u2s3u1(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, signed int b, unsigned int c)
+void uasm_i##op(u32 **buf, unsigned int a, signed int b, unsigned int c)
 
 #define Ip_s3s1s2(op)							\
-void ISAOPC(op)(u32 **buf, int a, int b, int c)
+void uasm_i##op(u32 **buf, int a, int b, int c)
 
 #define Ip_u2u1s3(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, signed int c)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b, signed int c)
 
 #define Ip_u2u1msbu3(op)						\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, unsigned int c, \
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b, unsigned int c, \
 	   unsigned int d)
 
 #define Ip_u1u2(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b)
 
 #define Ip_u2u1(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b)
 
 #define Ip_u1s2(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, signed int b)
+void uasm_i##op(u32 **buf, unsigned int a, signed int b)
 
-#define Ip_u1(op) void ISAOPC(op)(u32 **buf, unsigned int a)
+#define Ip_u1(op) void uasm_i##op(u32 **buf, unsigned int a)
 
-#define Ip_0(op) void ISAOPC(op)(u32 **buf)
+#define Ip_0(op) void uasm_i##op(u32 **buf)
 
 Ip_u2u1s3(_addiu);
 Ip_u3u1u2(_addu);
@@ -138,6 +107,7 @@ Ip_u2s3u1(_lb);
 Ip_u2s3u1(_ld);
 Ip_u3u1u2(_ldx);
 Ip_u2s3u1(_lh);
+Ip_u2s3u1(_lhu);
 Ip_u2s3u1(_ll);
 Ip_u2s3u1(_lld);
 Ip_u1s2(_lui);
@@ -190,20 +160,20 @@ struct uasm_label {
 	int lab;
 };
 
-void ISAFUNC(uasm_build_label)(struct uasm_label **lab, u32 *addr,
+void uasm_build_label(struct uasm_label **lab, u32 *addr,
 			int lid);
 #ifdef CONFIG_64BIT
-int ISAFUNC(uasm_in_compat_space_p)(long addr);
+int uasm_in_compat_space_p(long addr);
 #endif
-int ISAFUNC(uasm_rel_hi)(long val);
-int ISAFUNC(uasm_rel_lo)(long val);
-void ISAFUNC(UASM_i_LA_mostly)(u32 **buf, unsigned int rs, long addr);
-void ISAFUNC(UASM_i_LA)(u32 **buf, unsigned int rs, long addr);
+int uasm_rel_hi(long val);
+int uasm_rel_lo(long val);
+void UASM_i_LA_mostly(u32 **buf, unsigned int rs, long addr);
+void UASM_i_LA(u32 **buf, unsigned int rs, long addr);
 
 #define UASM_L_LA(lb)							\
-static inline void ISAFUNC(uasm_l##lb)(struct uasm_label **lab, u32 *addr) \
+static inline void uasm_l##lb(struct uasm_label **lab, u32 *addr)	\
 {									\
-	ISAFUNC(uasm_build_label)(lab, addr, label##lb);		\
+	uasm_build_label(lab, addr, label##lb);				\
 }
 
 /* convenience macros for instructions */
@@ -255,27 +225,27 @@ static inline void uasm_i_drotr_safe(u32 **p, unsigned int a1,
 				     unsigned int a2, unsigned int a3)
 {
 	if (a3 < 32)
-		ISAOPC(_drotr)(p, a1, a2, a3);
+		uasm_i_drotr(p, a1, a2, a3);
 	else
-		ISAOPC(_drotr32)(p, a1, a2, a3 - 32);
+		uasm_i_drotr32(p, a1, a2, a3 - 32);
 }
 
 static inline void uasm_i_dsll_safe(u32 **p, unsigned int a1,
 				    unsigned int a2, unsigned int a3)
 {
 	if (a3 < 32)
-		ISAOPC(_dsll)(p, a1, a2, a3);
+		uasm_i_dsll(p, a1, a2, a3);
 	else
-		ISAOPC(_dsll32)(p, a1, a2, a3 - 32);
+		uasm_i_dsll32(p, a1, a2, a3 - 32);
 }
 
 static inline void uasm_i_dsrl_safe(u32 **p, unsigned int a1,
 				    unsigned int a2, unsigned int a3)
 {
 	if (a3 < 32)
-		ISAOPC(_dsrl)(p, a1, a2, a3);
+		uasm_i_dsrl(p, a1, a2, a3);
 	else
-		ISAOPC(_dsrl32)(p, a1, a2, a3 - 32);
+		uasm_i_dsrl32(p, a1, a2, a3 - 32);
 }
 
 /* Handle relocations. */
diff --git a/arch/mips/include/uapi/asm/Kbuild b/arch/mips/include/uapi/asm/Kbuild
index f2cf41461146..a0266feba9e6 100644
--- a/arch/mips/include/uapi/asm/Kbuild
+++ b/arch/mips/include/uapi/asm/Kbuild
@@ -2,40 +2,3 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += ipcbuf.h
-
-header-y += auxvec.h
-header-y += bitfield.h
-header-y += bitsperlong.h
-header-y += break.h
-header-y += byteorder.h
-header-y += cachectl.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += inst.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += sgidefs.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += sysmips.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index b11facd11c9d..f702a459a830 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -804,8 +804,10 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 			break;
 		}
 		/* Compact branch: BNEZC || JIALC */
-		if (insn.i_format.rs)
+		if (!insn.i_format.rs) {
+			/* JIALC: set $31/ra */
 			regs->regs[31] = epc + 4;
+		}
 		regs->cp0_epc += 8;
 		break;
 #endif
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 3382892544f0..1aba27786bd5 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -34,6 +34,7 @@
 
 /* Hardware capabilities */
 unsigned int elf_hwcap __read_mostly;
+EXPORT_SYMBOL_GPL(elf_hwcap);
 
 /*
  * Get the FPU Implementation/Revision.
@@ -1955,6 +1956,12 @@ void cpu_probe(void)
 	struct cpuinfo_mips *c = &current_cpu_data;
 	unsigned int cpu = smp_processor_id();
 
+	/*
+	 * Set a default elf platform, cpu probe may later
+	 * overwrite it with a more precise value
+	 */
+	set_elf_platform(cpu, "mips");
+
 	c->processor_id = PRID_IMP_UNKNOWN;
 	c->fpu_id	= FPIR_IMP_NONE;
 	c->cputype	= CPU_UNKNOWN;
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index 8d83fc2a96b7..38a302919e6b 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -11,6 +11,7 @@
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/compiler.h>
+#include <asm/irqflags.h>
 #include <asm/regdef.h>
 #include <asm/mipsregs.h>
 #include <asm/stackframe.h>
@@ -119,6 +120,7 @@ work_pending:
 	andi	t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS
 	beqz	t0, work_notifysig
 work_resched:
+	TRACE_IRQS_OFF
 	jal	schedule
 
 	local_irq_disable		# make sure need_resched and
@@ -155,6 +157,7 @@ syscall_exit_work:
 	beqz	t0, work_pending	# trace bit set?
 	local_irq_enable		# could let syscall_trace_leave()
 					# call schedule() instead
+	TRACE_IRQS_ON
 	move	a0, sp
 	jal	syscall_trace_leave
 	b	resume_userspace
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 30a3b75e88eb..9d9b8fbae202 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -38,20 +38,6 @@ void arch_ftrace_update_code(int command)
 
 #endif
 
-/*
- * Check if the address is in kernel space
- *
- * Clone core_kernel_text() from kernel/extable.c, but doesn't call
- * init_kernel_text() for Ftrace doesn't trace functions in init sections.
- */
-static inline int in_kernel_space(unsigned long ip)
-{
-	if (ip >= (unsigned long)_stext &&
-	    ip <= (unsigned long)_etext)
-		return 1;
-	return 0;
-}
-
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 #define JAL 0x0c000000		/* jump & link: ip --> ra, jump to target */
@@ -198,7 +184,7 @@ int ftrace_make_nop(struct module *mod,
 	 * If ip is in kernel space, no long call, otherwise, long call is
 	 * needed.
 	 */
-	new = in_kernel_space(ip) ? INSN_NOP : INSN_B_1F;
+	new = core_kernel_text(ip) ? INSN_NOP : INSN_B_1F;
 #ifdef CONFIG_64BIT
 	return ftrace_modify_code(ip, new);
 #else
@@ -218,12 +204,12 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	unsigned int new;
 	unsigned long ip = rec->ip;
 
-	new = in_kernel_space(ip) ? insn_jal_ftrace_caller : insn_la_mcount[0];
+	new = core_kernel_text(ip) ? insn_jal_ftrace_caller : insn_la_mcount[0];
 
 #ifdef CONFIG_64BIT
 	return ftrace_modify_code(ip, new);
 #else
-	return ftrace_modify_code_2r(ip, new, in_kernel_space(ip) ?
+	return ftrace_modify_code_2r(ip, new, core_kernel_text(ip) ?
 						INSN_NOP : insn_la_mcount[1]);
 #endif
 }
@@ -289,7 +275,7 @@ unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long
 	 * instruction "lui v1, hi_16bit_of_mcount"(offset is 24), but for
 	 * kernel, move after the instruction "move ra, at"(offset is 16)
 	 */
-	ip = self_ra - (in_kernel_space(self_ra) ? 16 : 24);
+	ip = self_ra - (core_kernel_text(self_ra) ? 16 : 24);
 
 	/*
 	 * search the text until finding the non-store instruction or "s{d,w}
@@ -394,7 +380,7 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
 	 * entries configured through the tracing/set_graph_function interface.
 	 */
 
-	insns = in_kernel_space(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
+	insns = core_kernel_text(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
 	trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
 
 	/* Only trace if the calling function expects to */
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index cf052204eb0a..d1bb506adc10 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -106,8 +106,8 @@ NESTED(kernel_entry, 16, sp)			# kernel entry point
 	beq		t0, t1, dtb_found
 #endif
 	li		t1, -2
-	beq		a0, t1, dtb_found
 	move		t2, a1
+	beq		a0, t1, dtb_found
 
 	li		t2, 0
 dtb_found:
diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index 550e7d03090a..ae64c8f56a8c 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -1096,10 +1096,20 @@ repeat:
 		}
 		break;
 
-	case beql_op:
-	case bnel_op:
 	case blezl_op:
 	case bgtzl_op:
+		/*
+		 * For BLEZL and BGTZL, rt field must be set to 0. If this
+		 * is not the case, this may be an encoding of a MIPS R6
+		 * instruction, so return to CPU execution if this occurs
+		 */
+		if (MIPSInst_RT(inst)) {
+			err = SIGILL;
+			break;
+		}
+		/* fall through */
+	case beql_op:
+	case bnel_op:
 		if (delay_slot(regs)) {
 			err = SIGILL;
 			break;
@@ -2329,6 +2339,8 @@ static int mipsr2_stats_clear_show(struct seq_file *s, void *unused)
 	__this_cpu_write((mipsr2bremustats).bgezl, 0);
 	__this_cpu_write((mipsr2bremustats).bltzll, 0);
 	__this_cpu_write((mipsr2bremustats).bgezll, 0);
+	__this_cpu_write((mipsr2bremustats).bltzall, 0);
+	__this_cpu_write((mipsr2bremustats).bgezall, 0);
 	__this_cpu_write((mipsr2bremustats).bltzal, 0);
 	__this_cpu_write((mipsr2bremustats).bgezal, 0);
 	__this_cpu_write((mipsr2bremustats).beql, 0);
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 313a88b2973f..f3e301f95aef 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -1597,7 +1597,6 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
 		break;
 	case CPU_P5600:
 	case CPU_P6600:
-	case CPU_I6400:
 		/* 8-bit event numbers */
 		raw_id = config & 0x1ff;
 		base_id = raw_id & 0xff;
@@ -1610,6 +1609,11 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
 		raw_event.range = P;
 #endif
 		break;
+	case CPU_I6400:
+		/* 8-bit event numbers */
+		base_id = config & 0xff;
+		raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD;
+		break;
 	case CPU_1004K:
 		if (IS_BOTH_COUNTERS_1004K_EVENT(base_id))
 			raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD;
diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
index 5f928c34c148..d99416094ba9 100644
--- a/arch/mips/kernel/pm-cps.c
+++ b/arch/mips/kernel/pm-cps.c
@@ -56,7 +56,6 @@ DECLARE_BITMAP(state_support, CPS_PM_STATE_COUNT);
  * state. Actually per-core rather than per-CPU.
  */
 static DEFINE_PER_CPU_ALIGNED(u32*, ready_count);
-static DEFINE_PER_CPU_ALIGNED(void*, ready_count_alloc);
 
 /* Indicates online CPUs coupled with the current CPU */
 static DEFINE_PER_CPU_ALIGNED(cpumask_t, online_coupled);
@@ -642,7 +641,6 @@ static int cps_pm_online_cpu(unsigned int cpu)
 {
 	enum cps_pm_state state;
 	unsigned core = cpu_data[cpu].core;
-	unsigned dlinesz = cpu_data[cpu].dcache.linesz;
 	void *entry_fn, *core_rc;
 
 	for (state = CPS_PM_NC_WAIT; state < CPS_PM_STATE_COUNT; state++) {
@@ -662,16 +660,11 @@ static int cps_pm_online_cpu(unsigned int cpu)
 	}
 
 	if (!per_cpu(ready_count, core)) {
-		core_rc = kmalloc(dlinesz * 2, GFP_KERNEL);
+		core_rc = kmalloc(sizeof(u32), GFP_KERNEL);
 		if (!core_rc) {
 			pr_err("Failed allocate core %u ready_count\n", core);
 			return -ENOMEM;
 		}
-		per_cpu(ready_count_alloc, core) = core_rc;
-
-		/* Ensure ready_count is aligned to a cacheline boundary */
-		core_rc += dlinesz - 1;
-		core_rc = (void *)((unsigned long)core_rc & ~(dlinesz - 1));
 		per_cpu(ready_count, core) = core_rc;
 	}
 
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index b68e10fc453d..5351e1f3950d 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -114,13 +114,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 /*
  * Copy architecture-specific thread state
  */
-int copy_thread(unsigned long clone_flags, unsigned long usp,
-	unsigned long kthread_arg, struct task_struct *p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
+	unsigned long kthread_arg, struct task_struct *p, unsigned long tls)
 {
 	struct thread_info *ti = task_thread_info(p);
 	struct pt_regs *childregs, *regs = current_pt_regs();
 	unsigned long childksp;
-	p->set_child_tid = p->clear_child_tid = NULL;
 
 	childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32;
 
@@ -176,7 +175,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE);
 
 	if (clone_flags & CLONE_SETTLS)
-		ti->tp_value = regs->regs[7];
+		ti->tp_value = tls;
 
 	return 0;
 }
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index 758577861523..7b386d54fd65 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -25,12 +25,6 @@
 /* preprocessor replaces the fp in ".set fp=64" with $30 otherwise */
 #undef fp
 
-/*
- * Offset to the current process status flags, the first 32 bytes of the
- * stack are not used.
- */
-#define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS)
-
 #ifndef USE_ALTERNATE_RESUME_IMPL
 /*
  * task_struct *resume(task_struct *prev, task_struct *next,
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 795b4aaf8927..36954ddd0b9f 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -8,6 +8,7 @@
  * option) any later version.
  */
 
+#include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/irqchip/mips-gic.h>
@@ -408,7 +409,6 @@ static int cps_cpu_disable(void)
 	return 0;
 }
 
-static DECLARE_COMPLETION(cpu_death_chosen);
 static unsigned cpu_death_sibling;
 static enum {
 	CPU_DEATH_HALT,
@@ -443,7 +443,7 @@ void play_dead(void)
 	}
 
 	/* This CPU has chosen its way out */
-	complete(&cpu_death_chosen);
+	(void)cpu_report_death();
 
 	if (cpu_death == CPU_DEATH_HALT) {
 		vpe_id = cpu_vpe_id(&cpu_data[cpu]);
@@ -492,8 +492,7 @@ static void cps_cpu_die(unsigned int cpu)
 	int err;
 
 	/* Wait for the cpu to choose its way out */
-	if (!wait_for_completion_timeout(&cpu_death_chosen,
-					 msecs_to_jiffies(5000))) {
+	if (!cpu_wait_death(cpu, 5)) {
 		pr_err("CPU%u: didn't offline\n", cpu);
 		return;
 	}
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index e398cbc3d776..ed6b4df583ea 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -83,6 +83,8 @@ static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0,
 	if (tc != 0)
 		smvp_copy_vpe_config();
 
+	cpu_data[ncpu].vpe_id = tc;
+
 	return ncpu;
 }
 
@@ -114,49 +116,6 @@ static void __init smvp_tc_init(unsigned int tc, unsigned int mvpconf0)
 	write_tc_c0_tchalt(TCHALT_H);
 }
 
-static void vsmp_send_ipi_single(int cpu, unsigned int action)
-{
-	int i;
-	unsigned long flags;
-	int vpflags;
-
-#ifdef CONFIG_MIPS_GIC
-	if (gic_present) {
-		mips_smp_send_ipi_single(cpu, action);
-		return;
-	}
-#endif
-	local_irq_save(flags);
-
-	vpflags = dvpe();	/* can't access the other CPU's registers whilst MVPE enabled */
-
-	switch (action) {
-	case SMP_CALL_FUNCTION:
-		i = C_SW1;
-		break;
-
-	case SMP_RESCHEDULE_YOURSELF:
-	default:
-		i = C_SW0;
-		break;
-	}
-
-	/* 1:1 mapping of vpe and tc... */
-	settc(cpu);
-	write_vpe_c0_cause(read_vpe_c0_cause() | i);
-	evpe(vpflags);
-
-	local_irq_restore(flags);
-}
-
-static void vsmp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
-{
-	unsigned int i;
-
-	for_each_cpu(i, mask)
-		vsmp_send_ipi_single(i, action);
-}
-
 static void vsmp_init_secondary(void)
 {
 #ifdef CONFIG_MIPS_GIC
@@ -281,8 +240,8 @@ static void __init vsmp_prepare_cpus(unsigned int max_cpus)
 }
 
 struct plat_smp_ops vsmp_smp_ops = {
-	.send_ipi_single	= vsmp_send_ipi_single,
-	.send_ipi_mask		= vsmp_send_ipi_mask,
+	.send_ipi_single	= mips_smp_send_ipi_single,
+	.send_ipi_mask		= mips_smp_send_ipi_mask,
 	.init_secondary		= vsmp_init_secondary,
 	.smp_finish		= vsmp_smp_finish,
 	.boot_secondary		= vsmp_boot_secondary,
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 6e71130549ea..aba1afb64b62 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -261,16 +261,20 @@ int mips_smp_ipi_allocate(const struct cpumask *mask)
 		ipidomain = irq_find_matching_host(NULL, DOMAIN_BUS_IPI);
 
 	/*
-	 * There are systems which only use IPI domains some of the time,
-	 * depending upon configuration we don't know until runtime. An
-	 * example is Malta where we may compile in support for GIC & the
-	 * MT ASE, but run on a system which has multiple VPEs in a single
-	 * core and doesn't include a GIC. Until all IPI implementations
-	 * have been converted to use IPI domains the best we can do here
-	 * is to return & hope some other code sets up the IPIs.
+	 * There are systems which use IPI IRQ domains, but only have one
+	 * registered when some runtime condition is met. For example a Malta
+	 * kernel may include support for GIC & CPU interrupt controller IPI
+	 * IRQ domains, but if run on a system with no GIC & no MT ASE then
+	 * neither will be supported or registered.
+	 *
+	 * We only have a problem if we're actually using multiple CPUs so fail
+	 * loudly if that is the case. Otherwise simply return, skipping IPI
+	 * setup, if we're running with only a single CPU.
 	 */
-	if (!ipidomain)
+	if (!ipidomain) {
+		BUG_ON(num_present_cpus() > 1);
 		return 0;
+	}
 
 	virq = irq_reserve_ipi(ipidomain, mask);
 	BUG_ON(!virq);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 9681b5877140..38dfa27730ff 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -201,6 +201,8 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 {
 	struct pt_regs regs;
 	mm_segment_t old_fs = get_fs();
+
+	regs.cp0_status = KSU_KERNEL;
 	if (sp) {
 		regs.regs[29] = (unsigned long)sp;
 		regs.regs[31] = 0;
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index 7c6336dd2638..7cd92166a0b9 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -166,7 +166,11 @@ static int _kvm_mips_host_tlb_inv(unsigned long entryhi)
 int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
 			  bool user, bool kernel)
 {
-	int idx_user, idx_kernel;
+	/*
+	 * Initialize idx_user and idx_kernel to workaround bogus
+	 * maybe-initialized warning when using GCC 6.
+	 */
+	int idx_user = 0, idx_kernel = 0;
 	unsigned long flags, old_entryhi;
 
 	local_irq_save(flags);
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index 0ddf3698b85d..33728b7af426 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@@ -274,47 +274,6 @@ static void ltq_hw_irq_handler(struct irq_desc *desc)
 	ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2);
 }
 
-#ifdef CONFIG_MIPS_MT_SMP
-void __init arch_init_ipiirq(int irq, struct irqaction *action)
-{
-	setup_irq(irq, action);
-	irq_set_handler(irq, handle_percpu_irq);
-}
-
-static void ltq_sw0_irqdispatch(void)
-{
-	do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ);
-}
-
-static void ltq_sw1_irqdispatch(void)
-{
-	do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ);
-}
-static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
-{
-	scheduler_ipi();
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
-{
-	generic_smp_call_function_interrupt();
-	return IRQ_HANDLED;
-}
-
-static struct irqaction irq_resched = {
-	.handler	= ipi_resched_interrupt,
-	.flags		= IRQF_PERCPU,
-	.name		= "IPI_resched"
-};
-
-static struct irqaction irq_call = {
-	.handler	= ipi_call_interrupt,
-	.flags		= IRQF_PERCPU,
-	.name		= "IPI_call"
-};
-#endif
-
 asmlinkage void plat_irq_dispatch(void)
 {
 	unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
@@ -402,17 +361,6 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
 		(MAX_IM * INT_NUM_IM_OFFSET) + MIPS_CPU_IRQ_CASCADE,
 		&irq_domain_ops, 0);
 
-#if defined(CONFIG_MIPS_MT_SMP)
-	if (cpu_has_vint) {
-		pr_info("Setting up IPI vectored interrupts\n");
-		set_vi_handler(MIPS_CPU_IPI_RESCHED_IRQ, ltq_sw0_irqdispatch);
-		set_vi_handler(MIPS_CPU_IPI_CALL_IRQ, ltq_sw1_irqdispatch);
-	}
-	arch_init_ipiirq(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ,
-		&irq_resched);
-	arch_init_ipiirq(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ, &irq_call);
-#endif
-
 #ifndef CONFIG_MIPS_MT_SMP
 	set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 |
 		IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index a298ac93edcc..f12fde10c8ad 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -439,6 +439,8 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 	union mips_instruction insn = (union mips_instruction)dec_insn.insn;
 	unsigned int fcr31;
 	unsigned int bit = 0;
+	unsigned int bit0;
+	union fpureg *fpr;
 
 	switch (insn.i_format.opcode) {
 	case spec_op:
@@ -706,14 +708,14 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 		    ((insn.i_format.rs == bc1eqz_op) ||
 		     (insn.i_format.rs == bc1nez_op))) {
 			bit = 0;
+			fpr = &current->thread.fpu.fpr[insn.i_format.rt];
+			bit0 = get_fpr32(fpr, 0) & 0x1;
 			switch (insn.i_format.rs) {
 			case bc1eqz_op:
-				if (get_fpr32(&current->thread.fpu.fpr[insn.i_format.rt], 0) & 0x1)
-				    bit = 1;
+				bit = bit0 == 0;
 				break;
 			case bc1nez_op:
-				if (!(get_fpr32(&current->thread.fpu.fpr[insn.i_format.rt], 0) & 0x1))
-				    bit = 1;
+				bit = bit0 != 0;
 				break;
 			}
 			if (bit)
diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c
index 4a2d03c72959..caa62f20a888 100644
--- a/arch/mips/math-emu/dp_maddf.c
+++ b/arch/mips/math-emu/dp_maddf.c
@@ -54,7 +54,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
 		return ieee754dp_nanxcpt(z);
 	case IEEE754_CLASS_DNORM:
 		DPDNORMZ;
-	/* QNAN is handled separately below */
+	/* QNAN and ZERO cases are handled separately below */
 	}
 
 	switch (CLPAIR(xc, yc)) {
@@ -210,6 +210,9 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
 	}
 	assert(rm & (DP_HIDDEN_BIT << 3));
 
+	if (zc == IEEE754_CLASS_ZERO)
+		return ieee754dp_format(rs, re, rm);
+
 	/* And now the addition */
 	assert(zm & DP_HIDDEN_BIT);
 
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c
index a8cd8b4f235e..c91d5e5d9b5f 100644
--- a/arch/mips/math-emu/sp_maddf.c
+++ b/arch/mips/math-emu/sp_maddf.c
@@ -54,7 +54,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
 		return ieee754sp_nanxcpt(z);
 	case IEEE754_CLASS_DNORM:
 		SPDNORMZ;
-	/* QNAN is handled separately below */
+	/* QNAN and ZERO cases are handled separately below */
 	}
 
 	switch (CLPAIR(xc, yc)) {
@@ -203,6 +203,9 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
 	}
 	assert(rm & (SP_HIDDEN_BIT << 3));
 
+	if (zc == IEEE754_CLASS_ZERO)
+		return ieee754sp_format(rs, re, rm);
+
 	/* And now the addition */
 
 	assert(zm & SP_HIDDEN_BIT);
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index fe8df14b6169..e08598c70b3e 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -68,12 +68,25 @@ static inline struct page *dma_addr_to_page(struct device *dev,
  * systems and only the R10000 and R12000 are used in such systems, the
  * SGI IP28 Indigo² rsp. SGI IP32 aka O2.
  */
-static inline int cpu_needs_post_dma_flush(struct device *dev)
+static inline bool cpu_needs_post_dma_flush(struct device *dev)
 {
-	return !plat_device_is_coherent(dev) &&
-	       (boot_cpu_type() == CPU_R10000 ||
-		boot_cpu_type() == CPU_R12000 ||
-		boot_cpu_type() == CPU_BMIPS5000);
+	if (plat_device_is_coherent(dev))
+		return false;
+
+	switch (boot_cpu_type()) {
+	case CPU_R10000:
+	case CPU_R12000:
+	case CPU_BMIPS5000:
+		return true;
+
+	default:
+		/*
+		 * Presence of MAARs suggests that the CPU supports
+		 * speculatively prefetching data, and therefore requires
+		 * the post-DMA flush/invalidate.
+		 */
+		return cpu_has_maar;
+	}
 }
 
 static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp)
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 3bef306cdfdb..4f8f5bf46977 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -267,19 +267,19 @@ do_sigbus:
 	/* Kernel mode? Handle exceptions or die */
 	if (!user_mode(regs))
 		goto no_context;
-	else
+
 	/*
 	 * Send a sigbus, regardless of whether we were in kernel
 	 * or user mode.
 	 */
 #if 0
-		printk("do_page_fault() #3: sending SIGBUS to %s for "
-		       "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
-		       tsk->comm,
-		       write ? "write access to" : "read access from",
-		       field, address,
-		       field, (unsigned long) regs->cp0_epc,
-		       field, (unsigned long) regs->regs[31]);
+	printk("do_page_fault() #3: sending SIGBUS to %s for "
+	       "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
+	       tsk->comm,
+	       write ? "write access to" : "read access from",
+	       field, address,
+	       field, (unsigned long) regs->cp0_epc,
+	       field, (unsigned long) regs->regs[31]);
 #endif
 	current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f;
 	tsk->thread.cp0_badvaddr = address;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 3ca20283b31e..8ce2983a7015 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -537,6 +537,9 @@ unsigned long pgd_current[NR_CPUS];
  * it in the linker script.
  */
 pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
+#ifndef __PAGETABLE_PUD_FOLDED
+pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss;
+#endif
 #ifndef __PAGETABLE_PMD_FOLDED
 pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
 EXPORT_SYMBOL_GPL(invalid_pmd_table);
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 64dd8bdd92c3..28adeabe851f 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -93,7 +93,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index adc6911ba748..b19a3c506b1e 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -51,15 +51,15 @@ void __init pagetable_init(void)
 	/*
 	 * Fixed mappings:
 	 */
-	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
-	fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base);
+	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1);
+	fixrange_init(vaddr & PMD_MASK, vaddr + FIXADDR_SIZE, pgd_base);
 
 #ifdef CONFIG_HIGHMEM
 	/*
 	 * Permanent kmaps:
 	 */
 	vaddr = PKMAP_BASE;
-	fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+	fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
 
 	pgd = swapper_pg_dir + __pgd_offset(vaddr);
 	pud = pud_offset(pgd, vaddr);
diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
index 0ae7b28b4db5..6fd6e96fdebb 100644
--- a/arch/mips/mm/pgtable-64.c
+++ b/arch/mips/mm/pgtable-64.c
@@ -19,10 +19,12 @@ void pgd_init(unsigned long page)
 	unsigned long *p, *end;
 	unsigned long entry;
 
-#ifdef __PAGETABLE_PMD_FOLDED
-	entry = (unsigned long)invalid_pte_table;
-#else
+#if !defined(__PAGETABLE_PUD_FOLDED)
+	entry = (unsigned long)invalid_pud_table;
+#elif !defined(__PAGETABLE_PMD_FOLDED)
 	entry = (unsigned long)invalid_pmd_table;
+#else
+	entry = (unsigned long)invalid_pte_table;
 #endif
 
 	p = (unsigned long *) page;
@@ -64,6 +66,28 @@ void pmd_init(unsigned long addr, unsigned long pagetable)
 EXPORT_SYMBOL_GPL(pmd_init);
 #endif
 
+#ifndef __PAGETABLE_PUD_FOLDED
+void pud_init(unsigned long addr, unsigned long pagetable)
+{
+	unsigned long *p, *end;
+
+	p = (unsigned long *)addr;
+	end = p + PTRS_PER_PUD;
+
+	do {
+		p[0] = pagetable;
+		p[1] = pagetable;
+		p[2] = pagetable;
+		p[3] = pagetable;
+		p[4] = pagetable;
+		p += 8;
+		p[-3] = pagetable;
+		p[-2] = pagetable;
+		p[-1] = pagetable;
+	} while (p != end);
+}
+#endif
+
 pmd_t mk_pmd(struct page *page, pgprot_t prot)
 {
 	pmd_t pmd;
@@ -87,6 +111,9 @@ void __init pagetable_init(void)
 
 	/* Initialize the entire pgd.  */
 	pgd_init((unsigned long)swapper_pg_dir);
+#ifndef __PAGETABLE_PUD_FOLDED
+	pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table);
+#endif
 #ifndef __PAGETABLE_PMD_FOLDED
 	pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table);
 #endif
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 4f642e07c2b1..ed1c5297547a 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -865,6 +865,13 @@ void build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
 
 	uasm_i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3);
 	uasm_i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */
+#ifndef __PAGETABLE_PUD_FOLDED
+	uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */
+	uasm_i_ld(p, ptr, 0, ptr); /* get pud pointer */
+	uasm_i_dsrl_safe(p, tmp, tmp, PUD_SHIFT - 3); /* get pud offset in bytes */
+	uasm_i_andi(p, tmp, tmp, (PTRS_PER_PUD - 1) << 3);
+	uasm_i_daddu(p, ptr, ptr, tmp); /* add in pud offset */
+#endif
 #ifndef __PAGETABLE_PMD_FOLDED
 	uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */
 	uasm_i_ld(p, ptr, 0, ptr); /* get pmd pointer */
@@ -1184,6 +1191,21 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
 		uasm_i_ld(p, LOC_PTEP, 0, ptr); /* get pmd pointer */
 	}
 
+#ifndef __PAGETABLE_PUD_FOLDED
+	/* get pud offset in bytes */
+	uasm_i_dsrl_safe(p, scratch, tmp, PUD_SHIFT - 3);
+	uasm_i_andi(p, scratch, scratch, (PTRS_PER_PUD - 1) << 3);
+
+	if (use_lwx_insns()) {
+		UASM_i_LWX(p, ptr, scratch, ptr);
+	} else {
+		uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */
+		UASM_i_LW(p, ptr, 0, ptr);
+	}
+	/* ptr contains a pointer to PMD entry */
+	/* tmp contains the address */
+#endif
+
 #ifndef __PAGETABLE_PMD_FOLDED
 	/* get pmd offset in bytes */
 	uasm_i_dsrl_safe(p, scratch, tmp, PMD_SHIFT - 3);
diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index 763d3f1edb8a..2277499fe6ae 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -103,6 +103,7 @@ static struct insn insn_table[] = {
 	{ insn_ld,  M(ld_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_ldx, M(spec3_op, 0, 0, 0, ldx_op, lx_op), RS | RT | RD },
 	{ insn_lh,  M(lh_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
+	{ insn_lhu,  M(lhu_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 #ifndef CONFIG_CPU_MIPSR6
 	{ insn_lld,  M(lld_op, 0, 0, 0, 0, 0),	RS | RT | SIMM },
 	{ insn_ll,  M(ll_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index a82970442b8a..730363b59bac 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -61,7 +61,7 @@ enum opcode {
 	insn_sllv, insn_slt, insn_sltiu, insn_sltu, insn_sra, insn_srl,
 	insn_srlv, insn_subu, insn_sw, insn_sync, insn_syscall, insn_tlbp,
 	insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, insn_wsbh, insn_xor,
-	insn_xori, insn_yield, insn_lddir, insn_ldpte,
+	insn_xori, insn_yield, insn_lddir, insn_ldpte, insn_lhu,
 };
 
 struct insn {
@@ -297,6 +297,7 @@ I_u1(_jr)
 I_u2s3u1(_lb)
 I_u2s3u1(_ld)
 I_u2s3u1(_lh)
+I_u2s3u1(_lhu)
 I_u2s3u1(_ll)
 I_u2s3u1(_lld)
 I_u1s2(_lui)
@@ -349,7 +350,7 @@ I_u2u1u3(_lddir)
 
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 #include <asm/octeon/octeon.h>
-void ISAFUNC(uasm_i_pref)(u32 **buf, unsigned int a, signed int b,
+void uasm_i_pref(u32 **buf, unsigned int a, signed int b,
 			    unsigned int c)
 {
 	if (CAVIUM_OCTEON_DCACHE_PREFETCH_WAR && a <= 24 && a != 5)
@@ -361,26 +362,26 @@ void ISAFUNC(uasm_i_pref)(u32 **buf, unsigned int a, signed int b,
 	else
 		build_insn(buf, insn_pref, c, a, b);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_i_pref));
+UASM_EXPORT_SYMBOL(uasm_i_pref);
 #else
 I_u2s3u1(_pref)
 #endif
 
 /* Handle labels. */
-void ISAFUNC(uasm_build_label)(struct uasm_label **lab, u32 *addr, int lid)
+void uasm_build_label(struct uasm_label **lab, u32 *addr, int lid)
 {
 	(*lab)->addr = addr;
 	(*lab)->lab = lid;
 	(*lab)++;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_build_label));
+UASM_EXPORT_SYMBOL(uasm_build_label);
 
-int ISAFUNC(uasm_in_compat_space_p)(long addr)
+int uasm_in_compat_space_p(long addr)
 {
 	/* Is this address in 32bit compat space? */
 	return addr == (int)addr;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_in_compat_space_p));
+UASM_EXPORT_SYMBOL(uasm_in_compat_space_p);
 
 static int uasm_rel_highest(long val)
 {
@@ -400,64 +401,64 @@ static int uasm_rel_higher(long val)
 #endif
 }
 
-int ISAFUNC(uasm_rel_hi)(long val)
+int uasm_rel_hi(long val)
 {
 	return ((((val + 0x8000L) >> 16) & 0xffff) ^ 0x8000) - 0x8000;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_rel_hi));
+UASM_EXPORT_SYMBOL(uasm_rel_hi);
 
-int ISAFUNC(uasm_rel_lo)(long val)
+int uasm_rel_lo(long val)
 {
 	return ((val & 0xffff) ^ 0x8000) - 0x8000;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_rel_lo));
+UASM_EXPORT_SYMBOL(uasm_rel_lo);
 
-void ISAFUNC(UASM_i_LA_mostly)(u32 **buf, unsigned int rs, long addr)
+void UASM_i_LA_mostly(u32 **buf, unsigned int rs, long addr)
 {
-	if (!ISAFUNC(uasm_in_compat_space_p)(addr)) {
-		ISAFUNC(uasm_i_lui)(buf, rs, uasm_rel_highest(addr));
+	if (!uasm_in_compat_space_p(addr)) {
+		uasm_i_lui(buf, rs, uasm_rel_highest(addr));
 		if (uasm_rel_higher(addr))
-			ISAFUNC(uasm_i_daddiu)(buf, rs, rs, uasm_rel_higher(addr));
-		if (ISAFUNC(uasm_rel_hi(addr))) {
-			ISAFUNC(uasm_i_dsll)(buf, rs, rs, 16);
-			ISAFUNC(uasm_i_daddiu)(buf, rs, rs,
-					ISAFUNC(uasm_rel_hi)(addr));
-			ISAFUNC(uasm_i_dsll)(buf, rs, rs, 16);
+			uasm_i_daddiu(buf, rs, rs, uasm_rel_higher(addr));
+		if (uasm_rel_hi(addr)) {
+			uasm_i_dsll(buf, rs, rs, 16);
+			uasm_i_daddiu(buf, rs, rs,
+					uasm_rel_hi(addr));
+			uasm_i_dsll(buf, rs, rs, 16);
 		} else
-			ISAFUNC(uasm_i_dsll32)(buf, rs, rs, 0);
+			uasm_i_dsll32(buf, rs, rs, 0);
 	} else
-		ISAFUNC(uasm_i_lui)(buf, rs, ISAFUNC(uasm_rel_hi(addr)));
+		uasm_i_lui(buf, rs, uasm_rel_hi(addr));
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(UASM_i_LA_mostly));
+UASM_EXPORT_SYMBOL(UASM_i_LA_mostly);
 
-void ISAFUNC(UASM_i_LA)(u32 **buf, unsigned int rs, long addr)
+void UASM_i_LA(u32 **buf, unsigned int rs, long addr)
 {
-	ISAFUNC(UASM_i_LA_mostly)(buf, rs, addr);
-	if (ISAFUNC(uasm_rel_lo(addr))) {
-		if (!ISAFUNC(uasm_in_compat_space_p)(addr))
-			ISAFUNC(uasm_i_daddiu)(buf, rs, rs,
-					ISAFUNC(uasm_rel_lo(addr)));
+	UASM_i_LA_mostly(buf, rs, addr);
+	if (uasm_rel_lo(addr)) {
+		if (!uasm_in_compat_space_p(addr))
+			uasm_i_daddiu(buf, rs, rs,
+					uasm_rel_lo(addr));
 		else
-			ISAFUNC(uasm_i_addiu)(buf, rs, rs,
-					ISAFUNC(uasm_rel_lo(addr)));
+			uasm_i_addiu(buf, rs, rs,
+					uasm_rel_lo(addr));
 	}
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(UASM_i_LA));
+UASM_EXPORT_SYMBOL(UASM_i_LA);
 
 /* Handle relocations. */
-void ISAFUNC(uasm_r_mips_pc16)(struct uasm_reloc **rel, u32 *addr, int lid)
+void uasm_r_mips_pc16(struct uasm_reloc **rel, u32 *addr, int lid)
 {
 	(*rel)->addr = addr;
 	(*rel)->type = R_MIPS_PC16;
 	(*rel)->lab = lid;
 	(*rel)++;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_r_mips_pc16));
+UASM_EXPORT_SYMBOL(uasm_r_mips_pc16);
 
 static inline void __resolve_relocs(struct uasm_reloc *rel,
 				    struct uasm_label *lab);
 
-void ISAFUNC(uasm_resolve_relocs)(struct uasm_reloc *rel,
+void uasm_resolve_relocs(struct uasm_reloc *rel,
 				  struct uasm_label *lab)
 {
 	struct uasm_label *l;
@@ -467,39 +468,39 @@ void ISAFUNC(uasm_resolve_relocs)(struct uasm_reloc *rel,
 			if (rel->lab == l->lab)
 				__resolve_relocs(rel, l);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_resolve_relocs));
+UASM_EXPORT_SYMBOL(uasm_resolve_relocs);
 
-void ISAFUNC(uasm_move_relocs)(struct uasm_reloc *rel, u32 *first, u32 *end,
+void uasm_move_relocs(struct uasm_reloc *rel, u32 *first, u32 *end,
 			       long off)
 {
 	for (; rel->lab != UASM_LABEL_INVALID; rel++)
 		if (rel->addr >= first && rel->addr < end)
 			rel->addr += off;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_move_relocs));
+UASM_EXPORT_SYMBOL(uasm_move_relocs);
 
-void ISAFUNC(uasm_move_labels)(struct uasm_label *lab, u32 *first, u32 *end,
+void uasm_move_labels(struct uasm_label *lab, u32 *first, u32 *end,
 			       long off)
 {
 	for (; lab->lab != UASM_LABEL_INVALID; lab++)
 		if (lab->addr >= first && lab->addr < end)
 			lab->addr += off;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_move_labels));
+UASM_EXPORT_SYMBOL(uasm_move_labels);
 
-void ISAFUNC(uasm_copy_handler)(struct uasm_reloc *rel, struct uasm_label *lab,
+void uasm_copy_handler(struct uasm_reloc *rel, struct uasm_label *lab,
 				u32 *first, u32 *end, u32 *target)
 {
 	long off = (long)(target - first);
 
 	memcpy(target, first, (end - first) * sizeof(u32));
 
-	ISAFUNC(uasm_move_relocs(rel, first, end, off));
-	ISAFUNC(uasm_move_labels(lab, first, end, off));
+	uasm_move_relocs(rel, first, end, off);
+	uasm_move_labels(lab, first, end, off);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_copy_handler));
+UASM_EXPORT_SYMBOL(uasm_copy_handler);
 
-int ISAFUNC(uasm_insn_has_bdelay)(struct uasm_reloc *rel, u32 *addr)
+int uasm_insn_has_bdelay(struct uasm_reloc *rel, u32 *addr)
 {
 	for (; rel->lab != UASM_LABEL_INVALID; rel++) {
 		if (rel->addr == addr
@@ -510,92 +511,92 @@ int ISAFUNC(uasm_insn_has_bdelay)(struct uasm_reloc *rel, u32 *addr)
 
 	return 0;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_insn_has_bdelay));
+UASM_EXPORT_SYMBOL(uasm_insn_has_bdelay);
 
 /* Convenience functions for labeled branches. */
-void ISAFUNC(uasm_il_bltz)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_bltz(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			   int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_bltz)(p, reg, 0);
+	uasm_i_bltz(p, reg, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bltz));
+UASM_EXPORT_SYMBOL(uasm_il_bltz);
 
-void ISAFUNC(uasm_il_b)(u32 **p, struct uasm_reloc **r, int lid)
+void uasm_il_b(u32 **p, struct uasm_reloc **r, int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_b)(p, 0);
+	uasm_i_b(p, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_b));
+UASM_EXPORT_SYMBOL(uasm_il_b);
 
-void ISAFUNC(uasm_il_beq)(u32 **p, struct uasm_reloc **r, unsigned int r1,
+void uasm_il_beq(u32 **p, struct uasm_reloc **r, unsigned int r1,
 			  unsigned int r2, int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_beq)(p, r1, r2, 0);
+	uasm_i_beq(p, r1, r2, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beq));
+UASM_EXPORT_SYMBOL(uasm_il_beq);
 
-void ISAFUNC(uasm_il_beqz)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_beqz(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			   int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_beqz)(p, reg, 0);
+	uasm_i_beqz(p, reg, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beqz));
+UASM_EXPORT_SYMBOL(uasm_il_beqz);
 
-void ISAFUNC(uasm_il_beqzl)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			    int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_beqzl)(p, reg, 0);
+	uasm_i_beqzl(p, reg, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beqzl));
+UASM_EXPORT_SYMBOL(uasm_il_beqzl);
 
-void ISAFUNC(uasm_il_bne)(u32 **p, struct uasm_reloc **r, unsigned int reg1,
+void uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1,
 			  unsigned int reg2, int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_bne)(p, reg1, reg2, 0);
+	uasm_i_bne(p, reg1, reg2, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bne));
+UASM_EXPORT_SYMBOL(uasm_il_bne);
 
-void ISAFUNC(uasm_il_bnez)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			   int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_bnez)(p, reg, 0);
+	uasm_i_bnez(p, reg, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bnez));
+UASM_EXPORT_SYMBOL(uasm_il_bnez);
 
-void ISAFUNC(uasm_il_bgezl)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_bgezl(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			    int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_bgezl)(p, reg, 0);
+	uasm_i_bgezl(p, reg, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bgezl));
+UASM_EXPORT_SYMBOL(uasm_il_bgezl);
 
-void ISAFUNC(uasm_il_bgez)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_bgez(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			   int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_bgez)(p, reg, 0);
+	uasm_i_bgez(p, reg, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bgez));
+UASM_EXPORT_SYMBOL(uasm_il_bgez);
 
-void ISAFUNC(uasm_il_bbit0)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_bbit0(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			    unsigned int bit, int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_bbit0)(p, reg, bit, 0);
+	uasm_i_bbit0(p, reg, bit, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bbit0));
+UASM_EXPORT_SYMBOL(uasm_il_bbit0);
 
-void ISAFUNC(uasm_il_bbit1)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_bbit1(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			    unsigned int bit, int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_bbit1)(p, reg, bit, 0);
+	uasm_i_bbit1(p, reg, bit, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bbit1));
+UASM_EXPORT_SYMBOL(uasm_il_bbit1);
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index 54f56d5a96c4..b0f9b188e833 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -145,56 +145,6 @@ static irqreturn_t corehi_handler(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_MIPS_MT_SMP
-
-#define MIPS_CPU_IPI_RESCHED_IRQ 0	/* SW int 0 for resched */
-#define C_RESCHED C_SW0
-#define MIPS_CPU_IPI_CALL_IRQ 1		/* SW int 1 for resched */
-#define C_CALL C_SW1
-static int cpu_ipi_resched_irq, cpu_ipi_call_irq;
-
-static void ipi_resched_dispatch(void)
-{
-	do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ);
-}
-
-static void ipi_call_dispatch(void)
-{
-	do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ);
-}
-
-static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
-{
-#ifdef CONFIG_MIPS_VPE_APSP_API_CMP
-	if (aprp_hook)
-		aprp_hook();
-#endif
-
-	scheduler_ipi();
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
-{
-	generic_smp_call_function_interrupt();
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction irq_resched = {
-	.handler	= ipi_resched_interrupt,
-	.flags		= IRQF_PERCPU,
-	.name		= "IPI_resched"
-};
-
-static struct irqaction irq_call = {
-	.handler	= ipi_call_interrupt,
-	.flags		= IRQF_PERCPU,
-	.name		= "IPI_call"
-};
-#endif /* CONFIG_MIPS_MT_SMP */
-
 static struct irqaction corehi_irqaction = {
 	.handler = corehi_handler,
 	.name = "CoreHi",
@@ -222,12 +172,6 @@ static msc_irqmap_t msc_eicirqmap[] __initdata = {
 
 static int msc_nr_eicirqs __initdata = ARRAY_SIZE(msc_eicirqmap);
 
-void __init arch_init_ipiirq(int irq, struct irqaction *action)
-{
-	setup_irq(irq, action);
-	irq_set_handler(irq, handle_percpu_irq);
-}
-
 void __init arch_init_irq(void)
 {
 	int corehi_irq;
@@ -273,30 +217,11 @@ void __init arch_init_irq(void)
 
 	if (gic_present) {
 		corehi_irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_COREHI;
+	} else if (cpu_has_veic) {
+		set_vi_handler(MSC01E_INT_COREHI, corehi_irqdispatch);
+		corehi_irq = MSC01E_INT_BASE + MSC01E_INT_COREHI;
 	} else {
-#if defined(CONFIG_MIPS_MT_SMP)
-		/* set up ipi interrupts */
-		if (cpu_has_veic) {
-			set_vi_handler (MSC01E_INT_SW0, ipi_resched_dispatch);
-			set_vi_handler (MSC01E_INT_SW1, ipi_call_dispatch);
-			cpu_ipi_resched_irq = MSC01E_INT_SW0;
-			cpu_ipi_call_irq = MSC01E_INT_SW1;
-		} else {
-			cpu_ipi_resched_irq = MIPS_CPU_IRQ_BASE +
-				MIPS_CPU_IPI_RESCHED_IRQ;
-			cpu_ipi_call_irq = MIPS_CPU_IRQ_BASE +
-				MIPS_CPU_IPI_CALL_IRQ;
-		}
-		arch_init_ipiirq(cpu_ipi_resched_irq, &irq_resched);
-		arch_init_ipiirq(cpu_ipi_call_irq, &irq_call);
-#endif
-		if (cpu_has_veic) {
-			set_vi_handler(MSC01E_INT_COREHI,
-				       corehi_irqdispatch);
-			corehi_irq = MSC01E_INT_BASE + MSC01E_INT_COREHI;
-		} else {
-			corehi_irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_COREHI;
-		}
+		corehi_irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_COREHI;
 	}
 
 	setup_irq(corehi_irq, &corehi_irqaction);
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 49a2e2226fee..44b925005dd3 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -365,6 +365,12 @@ static inline void emit_half_load(unsigned int reg, unsigned int base,
 	emit_instr(ctx, lh, reg, offset, base);
 }
 
+static inline void emit_half_load_unsigned(unsigned int reg, unsigned int base,
+					   unsigned int offset, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, lhu, reg, offset, base);
+}
+
 static inline void emit_mul(unsigned int dst, unsigned int src1,
 			    unsigned int src2, struct jit_ctx *ctx)
 {
@@ -526,7 +532,8 @@ static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
 	u32 sflags, tmp_flags;
 
 	/* Adjust the stack pointer */
-	emit_stack_offset(-align_sp(offset), ctx);
+	if (offset)
+		emit_stack_offset(-align_sp(offset), ctx);
 
 	tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
 	/* sflags is essentially a bitmap */
@@ -578,7 +585,8 @@ static void restore_bpf_jit_regs(struct jit_ctx *ctx,
 		emit_load_stack_reg(r_ra, r_sp, real_off, ctx);
 
 	/* Restore the sp and discard the scrach memory */
-	emit_stack_offset(align_sp(offset), ctx);
+	if (offset)
+		emit_stack_offset(align_sp(offset), ctx);
 }
 
 static unsigned int get_stack_depth(struct jit_ctx *ctx)
@@ -625,8 +633,14 @@ static void build_prologue(struct jit_ctx *ctx)
 	if (ctx->flags & SEEN_X)
 		emit_jit_reg_move(r_X, r_zero, ctx);
 
-	/* Do not leak kernel data to userspace */
-	if (bpf_needs_clear_a(&ctx->skf->insns[0]))
+	/*
+	 * Do not leak kernel data to userspace, we only need to clear
+	 * r_A if it is ever used.  In fact if it is never used, we
+	 * will not save/restore it, so clearing it in this case would
+	 * corrupt the state of the caller.
+	 */
+	if (bpf_needs_clear_a(&ctx->skf->insns[0]) &&
+	    (ctx->flags & SEEN_A))
 		emit_jit_reg_move(r_A, r_zero, ctx);
 }
 
@@ -1112,6 +1126,8 @@ jmp_cmp:
 			break;
 		case BPF_ANC | SKF_AD_IFINDEX:
 			/* A = skb->dev->ifindex */
+		case BPF_ANC | SKF_AD_HATYPE:
+			/* A = skb->dev->type */
 			ctx->flags |= SEEN_SKB | SEEN_A;
 			off = offsetof(struct sk_buff, dev);
 			/* Load *dev pointer */
@@ -1120,10 +1136,15 @@ jmp_cmp:
 			emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
 				   b_imm(prog->len, ctx), ctx);
 			emit_reg_move(r_ret, r_zero, ctx);
-			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
-						  ifindex) != 4);
-			off = offsetof(struct net_device, ifindex);
-			emit_load(r_A, r_s0, off, ctx);
+			if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
+				BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+				off = offsetof(struct net_device, ifindex);
+				emit_load(r_A, r_s0, off, ctx);
+			} else { /* (code == (BPF_ANC | SKF_AD_HATYPE) */
+				BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
+				off = offsetof(struct net_device, type);
+				emit_half_load_unsigned(r_A, r_s0, off, ctx);
+			}
 			break;
 		case BPF_ANC | SKF_AD_MARK:
 			ctx->flags |= SEEN_SKB | SEEN_A;
@@ -1143,7 +1164,7 @@ jmp_cmp:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
 						  vlan_tci) != 2);
 			off = offsetof(struct sk_buff, vlan_tci);
-			emit_half_load(r_s0, r_skb, off, ctx);
+			emit_half_load_unsigned(r_s0, r_skb, off, ctx);
 			if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
 				emit_andi(r_A, r_s0, (u16)~VLAN_TAG_PRESENT, ctx);
 			} else {
@@ -1170,7 +1191,7 @@ jmp_cmp:
 			BUILD_BUG_ON(offsetof(struct sk_buff,
 					      queue_mapping) > 0xff);
 			off = offsetof(struct sk_buff, queue_mapping);
-			emit_half_load(r_A, r_skb, off, ctx);
+			emit_half_load_unsigned(r_A, r_skb, off, ctx);
 			break;
 		default:
 			pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__,
diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S
index 5d2e0c8d29c0..88a2075305d1 100644
--- a/arch/mips/net/bpf_jit_asm.S
+++ b/arch/mips/net/bpf_jit_asm.S
@@ -90,18 +90,14 @@ FEXPORT(sk_load_half_positive)
 	is_offset_in_header(2, half)
 	/* Offset within header boundaries */
 	PTR_ADDU t1, $r_skb_data, offset
-	.set	reorder
-	lh	$r_A, 0(t1)
-	.set	noreorder
+	lhu	$r_A, 0(t1)
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
 # if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
-	wsbh	t0, $r_A
-	seh	$r_A, t0
+	wsbh	$r_A, $r_A
 # else
-	sll	t0, $r_A, 24
-	andi	t1, $r_A, 0xff00
-	sra	t0, t0, 16
-	srl	t1, t1, 8
+	sll	t0, $r_A, 8
+	srl	t1, $r_A, 8
+	andi	t0, t0, 0xff00
 	or	$r_A, t0, t1
 # endif
 #endif
@@ -115,7 +111,7 @@ FEXPORT(sk_load_byte_positive)
 	is_offset_in_header(1, byte)
 	/* Offset within header boundaries */
 	PTR_ADDU t1, $r_skb_data, offset
-	lb	$r_A, 0(t1)
+	lbu	$r_A, 0(t1)
 	jr	$r_ra
 	 move	$r_ret, zero
 	END(sk_load_byte)
@@ -139,6 +135,11 @@ FEXPORT(sk_load_byte_positive)
  * (void *to) is returned in r_s0
  *
  */
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define DS_OFFSET(SIZE) (4 * SZREG)
+#else
+#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE))
+#endif
 #define bpf_slow_path_common(SIZE)				\
 	/* Quick check. Are we within reasonable boundaries? */ \
 	LONG_ADDIU	$r_s1, $r_skb_len, -SIZE;		\
@@ -150,7 +151,7 @@ FEXPORT(sk_load_byte_positive)
 	PTR_LA		t0, skb_copy_bits;			\
 	PTR_S		$r_ra, (5 * SZREG)($r_sp);		\
 	/* Assign low slot to a2 */				\
-	move		a2, $r_sp;				\
+	PTR_ADDIU	a2, $r_sp, DS_OFFSET(SIZE);		\
 	jalr		t0;					\
 	/* Reset our destination slot (DS but it's ok) */	\
 	 INT_S		zero, (4 * SZREG)($r_sp);		\
diff --git a/arch/mips/pci/pcie-octeon.c b/arch/mips/pci/pcie-octeon.c
index 9f672ceb089b..ad3584dbc9d7 100644
--- a/arch/mips/pci/pcie-octeon.c
+++ b/arch/mips/pci/pcie-octeon.c
@@ -679,7 +679,7 @@ static void __cvmx_increment_ba(union cvmx_sli_mem_access_subidx *pmas)
 	if (OCTEON_IS_MODEL(OCTEON_CN68XX))
 		pmas->cn68xx.ba++;
 	else
-		pmas->cn63xx.ba++;
+		pmas->s.ba++;
 }
 
 /**
@@ -1351,7 +1351,7 @@ static int __cvmx_pcie_rc_initialize_gen2(int pcie_port)
 	if (OCTEON_IS_MODEL(OCTEON_CN68XX))
 		mem_access_subid.cn68xx.ba = 0;
 	else
-		mem_access_subid.cn63xx.ba = 0;
+		mem_access_subid.s.ba = 0;
 
 	/*
 	 * Setup mem access 12-15 for port 0, 16-19 for port 1,
diff --git a/arch/mips/sibyte/bcm1480/setup.c b/arch/mips/sibyte/bcm1480/setup.c
index a05246cbf54c..2035aaec8514 100644
--- a/arch/mips/sibyte/bcm1480/setup.c
+++ b/arch/mips/sibyte/bcm1480/setup.c
@@ -36,6 +36,7 @@ unsigned int soc_pass;
 unsigned int soc_type;
 EXPORT_SYMBOL(soc_type);
 unsigned int periph_rev;
+EXPORT_SYMBOL_GPL(periph_rev);
 unsigned int zbbus_mhz;
 EXPORT_SYMBOL(zbbus_mhz);
 
diff --git a/arch/mips/sibyte/sb1250/setup.c b/arch/mips/sibyte/sb1250/setup.c
index 90e43782342b..aa7713adfa58 100644
--- a/arch/mips/sibyte/sb1250/setup.c
+++ b/arch/mips/sibyte/sb1250/setup.c
@@ -34,6 +34,7 @@ unsigned int soc_pass;
 unsigned int soc_type;
 EXPORT_SYMBOL(soc_type);
 unsigned int periph_rev;
+EXPORT_SYMBOL_GPL(periph_rev);
 unsigned int zbbus_mhz;
 EXPORT_SYMBOL(zbbus_mhz);
 
diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h
index 18e17abf7664..3ae479117b42 100644
--- a/arch/mn10300/include/asm/processor.h
+++ b/arch/mn10300/include/asm/processor.h
@@ -132,11 +132,6 @@ static inline void start_thread(struct pt_regs *regs,
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define task_pt_regs(task) ((task)->thread.uregs)
diff --git a/arch/mn10300/include/uapi/asm/Kbuild b/arch/mn10300/include/uapi/asm/Kbuild
index 040178cdb3eb..b15bf6bc0e94 100644
--- a/arch/mn10300/include/uapi/asm/Kbuild
+++ b/arch/mn10300/include/uapi/asm/Kbuild
@@ -1,34 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index c9fa42619c6a..89e8027e07fb 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -40,14 +40,6 @@
 #include "internal.h"
 
 /*
- * return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return ((unsigned long *) tsk->thread.sp)[3];
-}
-
-/*
  * power off function, if any
  */
 void (*pm_power_off)(void);
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 51a56c8b04b4..a72d5f0de692 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -6,6 +6,8 @@ config NIOS2
 	select GENERIC_CPU_DEVICES
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
+	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_KGDB
 	select IRQ_DOMAIN
diff --git a/arch/nios2/Kconfig.debug b/arch/nios2/Kconfig.debug
index 2fd08cbfdddb..55105220370c 100644
--- a/arch/nios2/Kconfig.debug
+++ b/arch/nios2/Kconfig.debug
@@ -18,7 +18,6 @@ config EARLY_PRINTK
 	bool "Activate early kernel debugging"
 	default y
 	select SERIAL_CORE_CONSOLE
-	depends on SERIAL_ALTERA_JTAGUART_CONSOLE || SERIAL_ALTERA_UART_CONSOLE
 	help
 	  Enable early printk on console
 	  This is useful for kernel debugging when your machine crashes very
diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile
index e74afc12d516..8673a79dca9c 100644
--- a/arch/nios2/Makefile
+++ b/arch/nios2/Makefile
@@ -22,10 +22,15 @@ export MMU
 
 LIBGCC         := $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
 
+KBUILD_AFLAGS += -march=r$(CONFIG_NIOS2_ARCH_REVISION)
+
 KBUILD_CFLAGS += -pipe -D__linux__ -D__ELF__
+KBUILD_CFLAGS += -march=r$(CONFIG_NIOS2_ARCH_REVISION)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_HW_MUL_SUPPORT),-mhw-mul,-mno-hw-mul)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_HW_MULX_SUPPORT),-mhw-mulx,-mno-hw-mulx)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_HW_DIV_SUPPORT),-mhw-div,-mno-hw-div)
+KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_BMX_SUPPORT),-mbmx,-mno-bmx)
+KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_CDX_SUPPORT),-mcdx,-mno-cdx)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_FPU_SUPPORT),-mcustom-fpu-cfg=60-1,)
 
 KBUILD_CFLAGS += -fno-optimize-sibling-calls
diff --git a/arch/nios2/boot/.gitignore b/arch/nios2/boot/.gitignore
new file mode 100644
index 000000000000..109279ca5a4d
--- /dev/null
+++ b/arch/nios2/boot/.gitignore
@@ -0,0 +1,2 @@
+*.dtb
+vmImage
diff --git a/arch/nios2/boot/dts/10m50_devboard.dts b/arch/nios2/boot/dts/10m50_devboard.dts
index f362b2224ee7..4bb4dc1b52e9 100644
--- a/arch/nios2/boot/dts/10m50_devboard.dts
+++ b/arch/nios2/boot/dts/10m50_devboard.dts
@@ -244,6 +244,7 @@
 	};
 
 	chosen {
-		bootargs = "debug console=ttyS0,115200";
+		bootargs = "debug earlycon console=ttyS0,115200";
+		stdout-path = &a_16550_uart_0;
 	};
 };
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 87e70f2b463f..727dbb333f60 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += bitsperlong.h
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += clkdev.h
+generic-y += cmpxchg.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h
index 52abba973dc2..55e383c173f7 100644
--- a/arch/nios2/include/asm/cacheflush.h
+++ b/arch/nios2/include/asm/cacheflush.h
@@ -46,7 +46,9 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
 extern void flush_dcache_range(unsigned long start, unsigned long end);
 extern void invalidate_dcache_range(unsigned long start, unsigned long end);
 
-#define flush_dcache_mmap_lock(mapping)		do { } while (0)
-#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_dcache_mmap_lock(mapping) \
+	spin_lock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_unlock(mapping) \
+	spin_unlock_irq(&(mapping)->tree_lock)
 
 #endif /* _ASM_NIOS2_CACHEFLUSH_H */
diff --git a/arch/nios2/include/asm/cmpxchg.h b/arch/nios2/include/asm/cmpxchg.h
deleted file mode 100644
index a7978f14d157..000000000000
--- a/arch/nios2/include/asm/cmpxchg.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (C) 2004 Microtronix Datacom Ltd.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_NIOS2_CMPXCHG_H
-#define _ASM_NIOS2_CMPXCHG_H
-
-#include <asm-generic/cmpxchg.h>
-
-#endif /* _ASM_NIOS2_CMPXCHG_H */
diff --git a/arch/nios2/include/asm/cpuinfo.h b/arch/nios2/include/asm/cpuinfo.h
index 348bb228fec9..dbdaf96f28d4 100644
--- a/arch/nios2/include/asm/cpuinfo.h
+++ b/arch/nios2/include/asm/cpuinfo.h
@@ -29,6 +29,8 @@ struct cpuinfo {
 	bool has_div;
 	bool has_mul;
 	bool has_mulx;
+	bool has_bmx;
+	bool has_cdx;
 
 	/* CPU caches */
 	u32 icache_line_size;
diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h
index 3bbbc3d798e5..4944e2e1d8b0 100644
--- a/arch/nios2/include/asm/processor.h
+++ b/arch/nios2/include/asm/processor.h
@@ -75,9 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/* Return saved PC of a blocked thread. */
-#define thread_saved_pc(tsk)	((tsk)->thread.kregs->ea)
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 #define task_pt_regs(p) \
diff --git a/arch/nios2/include/asm/prom.h b/arch/nios2/include/asm/prom.h
deleted file mode 100644
index 75fffb42cfa5..000000000000
--- a/arch/nios2/include/asm/prom.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright Altera Corporation (C) <2015>. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ASM_NIOS2_PROM_H__
-#define __ASM_NIOS2_PROM_H__
-
-extern unsigned long __init of_early_console(void);
-
-#endif
diff --git a/arch/nios2/include/asm/setup.h b/arch/nios2/include/asm/setup.h
index dcbf8cf1a344..ac9bff248e6d 100644
--- a/arch/nios2/include/asm/setup.h
+++ b/arch/nios2/include/asm/setup.h
@@ -30,8 +30,6 @@ extern char fast_handler_end[];
 
 extern void pagetable_init(void);
 
-extern void setup_early_printk(void);
-
 #endif/* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h
index 727bd9504899..dfa3c7cb30b4 100644
--- a/arch/nios2/include/asm/uaccess.h
+++ b/arch/nios2/include/asm/uaccess.h
@@ -42,6 +42,8 @@
 
 # define __EX_TABLE_SECTION	".section __ex_table,\"a\"\n"
 
+#define user_addr_max() (uaccess_kernel() ? ~0UL : TASK_SIZE)
+
 /*
  * Zero Userspace
  */
@@ -81,8 +83,9 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n);
 #define INLINE_COPY_TO_USER
 
 extern long strncpy_from_user(char *__to, const char __user *__from,
-				long __len);
-extern long strnlen_user(const char __user *s, long n);
+			      long __len);
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *s, long n);
 
 /* Optimized macros */
 #define __get_user_asm(val, insn, addr, err)				\
diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index e0bb972a50d7..374bd123329f 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild
@@ -1,5 +1,5 @@
+# UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += elf.h
-
+generic-y += setup.h
 generic-y += ucontext.h
diff --git a/arch/nios2/kernel/.gitignore b/arch/nios2/kernel/.gitignore
new file mode 100644
index 000000000000..c5f676c3c224
--- /dev/null
+++ b/arch/nios2/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/nios2/kernel/Makefile b/arch/nios2/kernel/Makefile
index 1aae25703657..06d07432b38d 100644
--- a/arch/nios2/kernel/Makefile
+++ b/arch/nios2/kernel/Makefile
@@ -20,7 +20,6 @@ obj-y	+= syscall_table.o
 obj-y	+= time.o
 obj-y	+= traps.o
 
-obj-$(CONFIG_EARLY_PRINTK)		+= early_printk.o
 obj-$(CONFIG_KGDB)			+= kgdb.o
 obj-$(CONFIG_MODULES)			+= module.o
 obj-$(CONFIG_NIOS2_ALIGNMENT_TRAP)	+= misaligned.o
diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c
index 1cccc36877bc..93207718bb22 100644
--- a/arch/nios2/kernel/cpuinfo.c
+++ b/arch/nios2/kernel/cpuinfo.c
@@ -67,6 +67,8 @@ void __init setup_cpuinfo(void)
 	cpuinfo.has_div = of_property_read_bool(cpu, "altr,has-div");
 	cpuinfo.has_mul = of_property_read_bool(cpu, "altr,has-mul");
 	cpuinfo.has_mulx = of_property_read_bool(cpu, "altr,has-mulx");
+	cpuinfo.has_bmx = of_property_read_bool(cpu, "altr,has-bmx");
+	cpuinfo.has_cdx = of_property_read_bool(cpu, "altr,has-cdx");
 	cpuinfo.mmu = of_property_read_bool(cpu, "altr,has-mmu");
 
 	if (IS_ENABLED(CONFIG_NIOS2_HW_DIV_SUPPORT) && !cpuinfo.has_div)
@@ -78,6 +80,12 @@ void __init setup_cpuinfo(void)
 	if (IS_ENABLED(CONFIG_NIOS2_HW_MULX_SUPPORT) && !cpuinfo.has_mulx)
 		err_cpu("MULX");
 
+	if (IS_ENABLED(CONFIG_NIOS2_BMX_SUPPORT) && !cpuinfo.has_bmx)
+		err_cpu("BMX");
+
+	if (IS_ENABLED(CONFIG_NIOS2_CDX_SUPPORT) && !cpuinfo.has_cdx)
+		err_cpu("CDX");
+
 	cpuinfo.tlb_num_ways = fcpu(cpu, "altr,tlb-num-ways");
 	if (!cpuinfo.tlb_num_ways)
 		panic("altr,tlb-num-ways can't be 0. Please check your hardware "
@@ -125,12 +133,14 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 	seq_printf(m,
 		   "CPU:\t\tNios II/%s\n"
+		   "REV:\t\t%i\n"
 		   "MMU:\t\t%s\n"
 		   "FPU:\t\tnone\n"
 		   "Clocking:\t%u.%02u MHz\n"
 		   "BogoMips:\t%lu.%02lu\n"
 		   "Calibration:\t%lu loops\n",
 		   cpuinfo.cpu_impl,
+		   CONFIG_NIOS2_ARCH_REVISION,
 		   cpuinfo.mmu ? "present" : "none",
 		   clockfreq / 1000000, (clockfreq / 100000) % 10,
 		   (loops_per_jiffy * HZ) / 500000,
@@ -141,10 +151,14 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		   "HW:\n"
 		   " MUL:\t\t%s\n"
 		   " MULX:\t\t%s\n"
-		   " DIV:\t\t%s\n",
+		   " DIV:\t\t%s\n"
+		   " BMX:\t\t%s\n"
+		   " CDX:\t\t%s\n",
 		   cpuinfo.has_mul ? "yes" : "no",
 		   cpuinfo.has_mulx ? "yes" : "no",
-		   cpuinfo.has_div ? "yes" : "no");
+		   cpuinfo.has_div ? "yes" : "no",
+		   cpuinfo.has_bmx ? "yes" : "no",
+		   cpuinfo.has_cdx ? "yes" : "no");
 
 	seq_printf(m,
 		   "Icache:\t\t%ukB, line length: %u\n",
diff --git a/arch/nios2/kernel/early_printk.c b/arch/nios2/kernel/early_printk.c
deleted file mode 100644
index c08e4c1486fc..000000000000
--- a/arch/nios2/kernel/early_printk.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Early printk for Nios2.
- *
- * Copyright (C) 2015, Altera Corporation
- * Copyright (C) 2010, Tobias Klauser <tklauser@distanz.ch>
- * Copyright (C) 2009, Wind River Systems Inc
- *   Implemented by fredrik.markstrom@gmail.com and ivarholmqvist@gmail.com
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/console.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/io.h>
-
-#include <asm/prom.h>
-
-static unsigned long base_addr;
-
-#if defined(CONFIG_SERIAL_ALTERA_JTAGUART_CONSOLE)
-
-#define ALTERA_JTAGUART_DATA_REG		0
-#define ALTERA_JTAGUART_CONTROL_REG		4
-#define ALTERA_JTAGUART_CONTROL_WSPACE_MSK	0xFFFF0000
-#define ALTERA_JTAGUART_CONTROL_AC_MSK		0x00000400
-
-#define JUART_GET_CR() \
-	__builtin_ldwio((void *)(base_addr + ALTERA_JTAGUART_CONTROL_REG))
-#define JUART_SET_CR(v) \
-	__builtin_stwio((void *)(base_addr + ALTERA_JTAGUART_CONTROL_REG), v)
-#define JUART_SET_TX(v) \
-	__builtin_stwio((void *)(base_addr + ALTERA_JTAGUART_DATA_REG), v)
-
-static void early_console_write(struct console *con, const char *s, unsigned n)
-{
-	unsigned long status;
-
-	while (n-- && *s) {
-		while (((status = JUART_GET_CR())
-				& ALTERA_JTAGUART_CONTROL_WSPACE_MSK) == 0) {
-#if defined(CONFIG_SERIAL_ALTERA_JTAGUART_CONSOLE_BYPASS)
-			if ((status & ALTERA_JTAGUART_CONTROL_AC_MSK) == 0)
-				return;	/* no connection activity */
-#endif
-		}
-		JUART_SET_TX(*s);
-		s++;
-	}
-}
-
-#elif defined(CONFIG_SERIAL_ALTERA_UART_CONSOLE)
-
-#define ALTERA_UART_TXDATA_REG		4
-#define ALTERA_UART_STATUS_REG		8
-#define ALTERA_UART_STATUS_TRDY		0x0040
-
-#define UART_GET_SR() \
-	__builtin_ldwio((void *)(base_addr + ALTERA_UART_STATUS_REG))
-#define UART_SET_TX(v) \
-	__builtin_stwio((void *)(base_addr + ALTERA_UART_TXDATA_REG), v)
-
-static void early_console_putc(char c)
-{
-	while (!(UART_GET_SR() & ALTERA_UART_STATUS_TRDY))
-		;
-
-	UART_SET_TX(c);
-}
-
-static void early_console_write(struct console *con, const char *s, unsigned n)
-{
-	while (n-- && *s) {
-		early_console_putc(*s);
-		if (*s == '\n')
-			early_console_putc('\r');
-		s++;
-	}
-}
-
-#else
-# error Neither SERIAL_ALTERA_JTAGUART_CONSOLE nor SERIAL_ALTERA_UART_CONSOLE \
-selected
-#endif
-
-static struct console early_console_prom = {
-	.name	= "early",
-	.write	= early_console_write,
-	.flags	= CON_PRINTBUFFER | CON_BOOT,
-	.index	= -1
-};
-
-void __init setup_early_printk(void)
-{
-#if defined(CONFIG_SERIAL_ALTERA_JTAGUART_CONSOLE) ||	\
-	defined(CONFIG_SERIAL_ALTERA_UART_CONSOLE)
-	base_addr = of_early_console();
-#else
-	base_addr = 0;
-#endif
-
-	if (!base_addr)
-		return;
-
-#if defined(CONFIG_SERIAL_ALTERA_JTAGUART_CONSOLE_BYPASS)
-	/* Clear activity bit so BYPASS doesn't stall if we've used JTAG for
-	 * downloading the kernel. This might cause early data to be lost even
-	 * if the JTAG terminal is running.
-	 */
-	JUART_SET_CR(JUART_GET_CR() | ALTERA_JTAGUART_CONTROL_AC_MSK);
-#endif
-
-	early_console = &early_console_prom;
-	register_console(early_console);
-	pr_info("early_console initialized at 0x%08lx\n", base_addr);
-}
diff --git a/arch/nios2/kernel/irq.c b/arch/nios2/kernel/irq.c
index f5b74ae69b5b..6c833a9d4eab 100644
--- a/arch/nios2/kernel/irq.c
+++ b/arch/nios2/kernel/irq.c
@@ -67,7 +67,7 @@ static int irq_map(struct irq_domain *h, unsigned int virq,
 	return 0;
 }
 
-static struct irq_domain_ops irq_ops = {
+static const struct irq_domain_ops irq_ops = {
 	.map	= irq_map,
 	.xlate	= irq_domain_xlate_onecell,
 };
diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c
index 3901b80d4420..6688576b3a47 100644
--- a/arch/nios2/kernel/prom.c
+++ b/arch/nios2/kernel/prom.c
@@ -30,7 +30,6 @@
 #include <linux/of_fdt.h>
 #include <linux/io.h>
 
-#include <asm/prom.h>
 #include <asm/sections.h>
 
 void __init early_init_dt_add_memory_arch(u64 base, u64 size)
@@ -71,51 +70,3 @@ void __init early_init_devtree(void *params)
 
 	early_init_dt_scan(params);
 }
-
-#ifdef CONFIG_EARLY_PRINTK
-static int __init early_init_dt_scan_serial(unsigned long node,
-			const char *uname, int depth, void *data)
-{
-	u64 *addr64 = (u64 *) data;
-	const char *p;
-
-	/* only consider serial nodes */
-	if (strncmp(uname, "serial", 6) != 0)
-		return 0;
-
-	p = of_get_flat_dt_prop(node, "compatible", NULL);
-	if (!p)
-		return 0;
-
-	/*
-	 * We found an altera_jtaguart but it wasn't configured for console, so
-	 * skip it.
-	 */
-#ifndef CONFIG_SERIAL_ALTERA_JTAGUART_CONSOLE
-	if (strncmp(p, "altr,juart", 10) == 0)
-		return 0;
-#endif
-
-	/*
-	 * Same for altera_uart.
-	 */
-#ifndef CONFIG_SERIAL_ALTERA_UART_CONSOLE
-	if (strncmp(p, "altr,uart", 9) == 0)
-		return 0;
-#endif
-
-	*addr64 = of_flat_dt_translate_address(node);
-
-	return *addr64 == OF_BAD_ADDR ? 0 : 1;
-}
-
-unsigned long __init of_early_console(void)
-{
-	u64 base = 0;
-
-	if (of_scan_flat_dt(early_init_dt_scan_serial, &base))
-		return (u32)ioremap(base, 32);
-	else
-		return 0;
-}
-#endif /* CONFIG_EARLY_PRINTK */
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index 6044d9be28b4..926a02b17b31 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -137,6 +137,8 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6,
 		strncpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
 #endif
 #endif
+
+	parse_early_param();
 }
 
 void __init setup_arch(char **cmdline_p)
@@ -145,10 +147,6 @@ void __init setup_arch(char **cmdline_p)
 
 	console_verbose();
 
-#ifdef CONFIG_EARLY_PRINTK
-	setup_early_printk();
-#endif
-
 	memory_start = PAGE_ALIGN((unsigned long)__pa(_end));
 	memory_end = (unsigned long) CONFIG_NIOS2_MEM_BASE + memory_size;
 
diff --git a/arch/nios2/mm/uaccess.c b/arch/nios2/mm/uaccess.c
index 804983317766..34f10af8ea40 100644
--- a/arch/nios2/mm/uaccess.c
+++ b/arch/nios2/mm/uaccess.c
@@ -128,36 +128,3 @@ asm(
 	".word 12b,13b\n"
 	".previous\n");
 EXPORT_SYMBOL(raw_copy_to_user);
-
-long strncpy_from_user(char *__to, const char __user *__from, long __len)
-{
-	int l = strnlen_user(__from, __len);
-	int is_zt = 1;
-
-	if (l > __len) {
-		is_zt = 0;
-		l = __len;
-	}
-
-	if (l == 0 || copy_from_user(__to, __from, l))
-		return -EFAULT;
-
-	if (is_zt)
-		l--;
-	return l;
-}
-
-long strnlen_user(const char __user *s, long n)
-{
-	long i;
-
-	for (i = 0; i < n; i++) {
-		char c;
-
-		if (get_user(c, s + i) == -EFAULT)
-			return 0;
-		if (c == 0)
-			return i + 1;
-	}
-	return n + 1;
-}
diff --git a/arch/nios2/platform/Kconfig.platform b/arch/nios2/platform/Kconfig.platform
index d3e5df9fb36b..74c1aaf588b8 100644
--- a/arch/nios2/platform/Kconfig.platform
+++ b/arch/nios2/platform/Kconfig.platform
@@ -52,6 +52,14 @@ config NIOS2_DTB_SOURCE
 
 comment "Nios II instructions"
 
+config NIOS2_ARCH_REVISION
+	int "Select Nios II architecture revision"
+	range 1 2
+	default 1
+	help
+	  Select between Nios II R1 and Nios II R2 . The architectures
+	  are binary incompatible. Default is R1 .
+
 config NIOS2_HW_MUL_SUPPORT
 	bool "Enable MUL instruction"
 	default n
@@ -73,6 +81,24 @@ config NIOS2_HW_DIV_SUPPORT
 	  Set to true if you configured the Nios II to include the DIV
 	  instruction.  Enables the -mhw-div compiler flag.
 
+config NIOS2_BMX_SUPPORT
+	bool "Enable BMX instructions"
+	depends on NIOS2_ARCH_REVISION = 2
+	default n
+	help
+	  Set to true if you configured the Nios II R2 to include
+	  the BMX Bit Manipulation Extension instructions. Enables
+	  the -mbmx compiler flag.
+
+config NIOS2_CDX_SUPPORT
+	bool "Enable CDX instructions"
+	depends on NIOS2_ARCH_REVISION = 2
+	default n
+	help
+	  Set to true if you configured the Nios II R2 to include
+	  the CDX Bit Manipulation Extension instructions. Enables
+	  the -mcdx compiler flag.
+
 config NIOS2_FPU_SUPPORT
 	bool "Custom floating point instr support"
 	default n
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index df8e2f7bc7dd..fdbcf0bf44a4 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -1,6 +1,3 @@
-
-header-y += ucontext.h
-
 generic-y += auxvec.h
 generic-y += barrier.h
 generic-y += bitsperlong.h
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index a908e6c30a00..396d8f306c21 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -84,11 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
 void release_thread(struct task_struct *);
 unsigned long get_wchan(struct task_struct *p);
 
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 #define init_stack      (init_thread_union.stack)
 
 #define cpu_relax()     barrier()
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
index 80761eb82b5f..b15bf6bc0e94 100644
--- a/arch/openrisc/include/uapi/asm/Kbuild
+++ b/arch/openrisc/include/uapi/asm/Kbuild
@@ -1,10 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += byteorder.h
-header-y += elf.h
-header-y += kvm_para.h
-header-y += param.h
-header-y += ptrace.h
-header-y += sigcontext.h
-header-y += unistd.h
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index f8da545854f9..f9b77003f113 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -110,11 +110,6 @@ void show_regs(struct pt_regs *regs)
 	show_registers(regs);
 }
 
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return (unsigned long)user_regs(t->stack)->pc;
-}
-
 void release_thread(struct task_struct *dead_task)
 {
 }
@@ -167,8 +162,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 
 	top_of_kernel_stack = sp;
 
-	p->set_child_tid = p->clear_child_tid = NULL;
-
 	/* Locate userspace context on stack... */
 	sp -= STACK_FRAME_OVERHEAD;	/* redzone */
 	sp -= sizeof(struct pt_regs);
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index ea4e6ae091d0..b3b66c3d6f3c 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -165,12 +165,7 @@ struct thread_struct {
 	.flags		= 0 \
 	}
 
-/*
- * Return saved PC of a blocked thread.  This is used by ps mostly.
- */
-
 struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *t);
 void show_trace(struct task_struct *task, unsigned long *stack);
 
 /*
diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild
index 348356c99514..3971c60a7e7f 100644
--- a/arch/parisc/include/uapi/asm/Kbuild
+++ b/arch/parisc/include/uapi/asm/Kbuild
@@ -2,31 +2,3 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += resource.h
-
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += pdc.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 4516a5b53f38..b64d7d21646e 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -239,11 +239,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 	return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return t->thread.regs.kpc;
-}
-
 unsigned long
 get_wchan(struct task_struct *p)
 {
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index e5288638a1d9..378a754ca186 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -90,7 +90,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *prev;
 	unsigned long task_size = TASK_SIZE;
 	int do_color_align, last_mmap;
 	struct vm_unmapped_area_info info;
@@ -117,9 +117,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		else
 			addr = PAGE_ALIGN(addr);
 
-		vma = find_vma(mm, addr);
+		vma = find_vma_prev(mm, addr, &prev);
 		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)) &&
+		    (!prev || addr >= vm_end_gap(prev)))
 			goto found_addr;
 	}
 
@@ -143,7 +144,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			  const unsigned long len, const unsigned long pgoff,
 			  const unsigned long flags)
 {
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *prev;
 	struct mm_struct *mm = current->mm;
 	unsigned long addr = addr0;
 	int do_color_align, last_mmap;
@@ -177,9 +178,11 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			addr = COLOR_ALIGN(addr, last_mmap, pgoff);
 		else
 			addr = PAGE_ALIGN(addr);
-		vma = find_vma(mm, addr);
+
+		vma = find_vma_prev(mm, addr, &prev);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)) &&
+		    (!prev || addr >= vm_end_gap(prev)))
 			goto found_addr;
 	}
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d8834e8bfb05..6189238e69f8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -146,6 +146,7 @@ config PPC
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
 	select ARCH_WANT_IPC_PARSE_VERSION
+	select ARCH_WEAK_RELEASE_ACQUIRE
 	select BINFMT_ELF
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
@@ -183,7 +184,7 @@ config PPC
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
 	select HAVE_GCC_PLUGINS
-	select HAVE_GENERIC_RCU_GUP
+	select HAVE_GENERIC_GUP
 	select HAVE_HW_BREAKPOINT		if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
 	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
@@ -1198,11 +1199,6 @@ source "arch/powerpc/Kconfig.debug"
 
 source "security/Kconfig"
 
-config KEYS_COMPAT
-	bool
-	depends on COMPAT && KEYS
-	default y
-
 source "crypto/Kconfig"
 
 config PPC_LIB_RHEAP
diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink
index 3c22d64b2de9..eccfcc88afae 100644
--- a/arch/powerpc/Makefile.postlink
+++ b/arch/powerpc/Makefile.postlink
@@ -7,7 +7,7 @@
 PHONY := __archpost
 __archpost:
 
-include include/config/auto.conf
+-include include/config/auto.conf
 include scripts/Kbuild.include
 
 quiet_cmd_relocs_check = CHKREL  $@
diff --git a/arch/powerpc/boot/dts/include/dt-bindings b/arch/powerpc/boot/dts/include/dt-bindings
deleted file mode 120000
index 08c00e4972fa..000000000000
--- a/arch/powerpc/boot/dts/include/dt-bindings
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../include/dt-bindings
-\ No newline at end of file
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index b4b5e6b671ca..0c4e470571ca 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -8,7 +8,7 @@
 #define H_PTE_INDEX_SIZE  9
 #define H_PMD_INDEX_SIZE  7
 #define H_PUD_INDEX_SIZE  9
-#define H_PGD_INDEX_SIZE  12
+#define H_PGD_INDEX_SIZE  9
 
 #ifndef __ASSEMBLY__
 #define H_PTE_TABLE_SIZE	(sizeof(pte_t) << H_PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 214219dff87c..9732837aaae8 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -2,9 +2,9 @@
 #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
 
 #define H_PTE_INDEX_SIZE  8
-#define H_PMD_INDEX_SIZE  5
-#define H_PUD_INDEX_SIZE  5
-#define H_PGD_INDEX_SIZE  15
+#define H_PMD_INDEX_SIZE  10
+#define H_PUD_INDEX_SIZE  7
+#define H_PGD_INDEX_SIZE  8
 
 /*
  * 64k aligned address free up few of the lower bits of RPN for us
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index f2c562a0a427..0151af6c2a50 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -104,7 +104,7 @@
 		"1:	"PPC_TLNEI"	%4,0\n"			\
 		_EMIT_BUG_ENTRY					\
 		: : "i" (__FILE__), "i" (__LINE__),		\
-		  "i" (BUGFLAG_TAINT(TAINT_WARN)),		\
+		  "i" (BUGFLAG_WARNING|BUGFLAG_TAINT(TAINT_WARN)),\
 		  "i" (sizeof(struct bug_entry)),		\
 		  "r" (__ret_warn_on));				\
 	}							\
diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h
index 8ee4211ca0c6..14ad37865000 100644
--- a/arch/powerpc/include/asm/cpm1.h
+++ b/arch/powerpc/include/asm/cpm1.h
@@ -560,6 +560,8 @@ typedef struct risc_timer_pram {
 #define CPM_PIN_SECONDARY 2
 #define CPM_PIN_GPIO      4
 #define CPM_PIN_OPENDRAIN 8
+#define CPM_PIN_FALLEDGE  16
+#define CPM_PIN_ANYEDGE   0
 
 enum cpm_port {
 	CPM_PORTA,
diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h
index 6e834caa3720..0d1df02bf99d 100644
--- a/arch/powerpc/include/asm/cpu_has_feature.h
+++ b/arch/powerpc/include/asm/cpu_has_feature.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_POWERPC_CPUFEATURES_H
-#define __ASM_POWERPC_CPUFEATURES_H
+#ifndef __ASM_POWERPC_CPU_HAS_FEATURE_H
+#define __ASM_POWERPC_CPU_HAS_FEATURE_H
 
 #ifndef __ASSEMBLY__
 
@@ -52,4 +52,4 @@ static inline bool cpu_has_feature(unsigned long feature)
 #endif
 
 #endif /* __ASSEMBLY__ */
-#endif /* __ASM_POWERPC_CPUFEATURE_H */
+#endif /* __ASM_POWERPC_CPU_HAS_FEATURE_H */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 1f6847b107e4..d02ad93bf708 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -118,7 +118,9 @@ extern struct cpu_spec		*cur_cpu_spec;
 
 extern unsigned int __start___ftr_fixup, __stop___ftr_fixup;
 
+extern void set_cur_cpu_spec(struct cpu_spec *s);
 extern struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr);
+extern void identify_cpu_name(unsigned int pvr);
 extern void do_feature_fixups(unsigned long value, void *fixup_start,
 			      void *fixup_end);
 
@@ -212,7 +214,6 @@ enum {
 #define CPU_FTR_DAWR			LONG_ASM_CONST(0x0400000000000000)
 #define CPU_FTR_DABRX			LONG_ASM_CONST(0x0800000000000000)
 #define CPU_FTR_PMAO_BUG		LONG_ASM_CONST(0x1000000000000000)
-#define CPU_FTR_SUBCORE			LONG_ASM_CONST(0x2000000000000000)
 #define CPU_FTR_POWER9_DD1		LONG_ASM_CONST(0x4000000000000000)
 
 #ifndef __ASSEMBLY__
@@ -461,7 +462,7 @@ enum {
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
 	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
-	    CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_SUBCORE)
+	    CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
 #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
 #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
 #define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
diff --git a/arch/powerpc/include/asm/dt_cpu_ftrs.h b/arch/powerpc/include/asm/dt_cpu_ftrs.h
new file mode 100644
index 000000000000..7a34fc11bf63
--- /dev/null
+++ b/arch/powerpc/include/asm/dt_cpu_ftrs.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_POWERPC_DT_CPU_FTRS_H
+#define __ASM_POWERPC_DT_CPU_FTRS_H
+
+/*
+ *  Copyright 2017, IBM Corporation
+ *  cpufeatures is the new way to discover CPU features with /cpus/features
+ *  devicetree. This supersedes PVR based discovery ("cputable"), and older
+ *  device tree feature advertisement.
+ */
+
+#include <linux/types.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+#include <uapi/asm/cputable.h>
+
+#ifdef CONFIG_PPC_DT_CPU_FTRS
+bool dt_cpu_ftrs_init(void *fdt);
+void dt_cpu_ftrs_scan(void);
+bool dt_cpu_ftrs_in_use(void);
+#else
+static inline bool dt_cpu_ftrs_init(void *fdt) { return false; }
+static inline void dt_cpu_ftrs_scan(void) { }
+static inline bool dt_cpu_ftrs_in_use(void) { return false; }
+#endif
+
+#endif /* __ASM_POWERPC_DT_CPU_FTRS_H */
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index a83821f33ea3..8814a7249ceb 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_handler(struct pt_regs *regs);
 extern int kprobe_post_handler(struct pt_regs *regs);
+extern int is_current_kprobe_addr(unsigned long addr);
 #ifdef CONFIG_KPROBES_ON_FTRACE
 extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
 			   struct kprobe_ctlblk *kcb);
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 0593d9479f74..b148496ffe36 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -111,6 +111,8 @@ struct kvmppc_host_state {
 	struct kvm_vcpu *kvm_vcpu;
 	struct kvmppc_vcore *kvm_vcore;
 	void __iomem *xics_phys;
+	void __iomem *xive_tima_phys;
+	void __iomem *xive_tima_virt;
 	u32 saved_xirr;
 	u64 dabr;
 	u64 host_mmcr[7];	/* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 77c60826d145..9c51ac4b8f36 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -210,6 +210,12 @@ struct kvmppc_spapr_tce_table {
 /* XICS components, defined in book3s_xics.c */
 struct kvmppc_xics;
 struct kvmppc_icp;
+extern struct kvm_device_ops kvm_xics_ops;
+
+/* XIVE components, defined in book3s_xive.c */
+struct kvmppc_xive;
+struct kvmppc_xive_vcpu;
+extern struct kvm_device_ops kvm_xive_ops;
 
 struct kvmppc_passthru_irqmap;
 
@@ -298,6 +304,7 @@ struct kvm_arch {
 #endif
 #ifdef CONFIG_KVM_XICS
 	struct kvmppc_xics *xics;
+	struct kvmppc_xive *xive;
 	struct kvmppc_passthru_irqmap *pimap;
 #endif
 	struct kvmppc_ops *kvm_ops;
@@ -427,7 +434,7 @@ struct kvmppc_passthru_irqmap {
 
 #define KVMPPC_IRQ_DEFAULT	0
 #define KVMPPC_IRQ_MPIC		1
-#define KVMPPC_IRQ_XICS		2
+#define KVMPPC_IRQ_XICS		2 /* Includes a XIVE option */
 
 #define MMIO_HPTE_CACHE_SIZE	4
 
@@ -454,6 +461,21 @@ struct mmio_hpte_cache {
 
 struct openpic;
 
+/* W0 and W1 of a XIVE thread management context */
+union xive_tma_w01 {
+	struct {
+		u8	nsr;
+		u8	cppr;
+		u8	ipb;
+		u8	lsmfb;
+		u8	ack;
+		u8	inc;
+		u8	age;
+		u8	pipr;
+	};
+	__be64 w01;
+};
+
 struct kvm_vcpu_arch {
 	ulong host_stack;
 	u32 host_pid;
@@ -714,6 +736,10 @@ struct kvm_vcpu_arch {
 	struct openpic *mpic;	/* KVM_IRQ_MPIC */
 #ifdef CONFIG_KVM_XICS
 	struct kvmppc_icp *icp; /* XICS presentation controller */
+	struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
+	__be32 xive_cam_word;    /* Cooked W2 in proper endian with valid bit */
+	u32 xive_pushed;	 /* Is the VP pushed on the physical CPU ? */
+	union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
 #endif
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 76e940a3c145..e0d88c38602b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -240,6 +240,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
 extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
 extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
 extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+
 extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
 				u32 priority);
 extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
@@ -428,6 +429,14 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 	paca[cpu].kvm_hstate.xics_phys = (void __iomem *)addr;
 }
 
+static inline void kvmppc_set_xive_tima(int cpu,
+					unsigned long phys_addr,
+					void __iomem *virt_addr)
+{
+	paca[cpu].kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
+	paca[cpu].kvm_hstate.xive_tima_virt = virt_addr;
+}
+
 static inline u32 kvmppc_get_xics_latch(void)
 {
 	u32 xirr;
@@ -458,6 +467,11 @@ static inline void __init kvm_cma_reserve(void)
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {}
 
+static inline void kvmppc_set_xive_tima(int cpu,
+					unsigned long phys_addr,
+					void __iomem *virt_addr)
+{}
+
 static inline u32 kvmppc_get_xics_latch(void)
 {
 	return 0;
@@ -508,6 +522,10 @@ extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, __be32 xirr,
 					struct kvmppc_irq_map *irq_map,
 					struct kvmppc_passthru_irqmap *pimap,
 					bool *again);
+
+extern int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
+			       int level, bool line_status);
+
 extern int h_ipi_redirect;
 #else
 static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
@@ -525,6 +543,60 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 	{ return 0; }
 #endif
 
+#ifdef CONFIG_KVM_XIVE
+/*
+ * Below the first "xive" is the "eXternal Interrupt Virtualization Engine"
+ * ie. P9 new interrupt controller, while the second "xive" is the legacy
+ * "eXternal Interrupt Vector Entry" which is the configuration of an
+ * interrupt on the "xics" interrupt controller on P8 and earlier. Those
+ * two function consume or produce a legacy "XIVE" state from the
+ * new "XIVE" interrupt controller.
+ */
+extern int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
+				u32 priority);
+extern int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+				u32 *priority);
+extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq);
+extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq);
+extern void kvmppc_xive_init_module(void);
+extern void kvmppc_xive_exit_module(void);
+
+extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+				    struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu);
+extern int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+				  struct irq_desc *host_desc);
+extern int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+				  struct irq_desc *host_desc);
+extern u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu);
+extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
+
+extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
+			       int level, bool line_status);
+#else
+static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
+				       u32 priority) { return -1; }
+static inline int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+				       u32 *priority) { return -1; }
+static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; }
+static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; }
+static inline void kvmppc_xive_init_module(void) { }
+static inline void kvmppc_xive_exit_module(void) { }
+
+static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+					   struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
+static inline void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
+static inline int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+					 struct irq_desc *host_desc) { return -ENODEV; }
+static inline int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+					 struct irq_desc *host_desc) { return -ENODEV; }
+static inline u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) { return 0; }
+static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { return -ENOENT; }
+
+static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
+				      int level, bool line_status) { return -ENODEV; }
+#endif /* CONFIG_KVM_XIVE */
+
 /*
  * Prototypes for functions called only from assembler code.
  * Having prototypes reduces sparse errors.
@@ -562,6 +634,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
 long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
                           unsigned long slb_v, unsigned int status, bool data);
 unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
+unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu);
+unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
 int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
                     unsigned long mfrr);
 int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 53885512b8d3..6c0132c7212f 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -14,6 +14,10 @@
 #include <asm-generic/module.h>
 
 
+#ifdef CC_USING_MPROFILE_KERNEL
+#define MODULE_ARCH_VERMAGIC	"mprofile-kernel"
+#endif
+
 #ifndef __powerpc64__
 /*
  * Thanks to Paul M for explaining this.
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 2a32483c7b6c..8da5d4c1cab2 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -132,7 +132,19 @@ extern long long virt_phys_offset;
 #define virt_to_pfn(kaddr)	(__pa(kaddr) >> PAGE_SHIFT)
 #define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
 #define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * On hash the vmalloc and other regions alias to the kernel region when passed
+ * through __pa(), which virt_to_pfn() uses. That means virt_addr_valid() can
+ * return true for some vmalloc addresses, which is incorrect. So explicitly
+ * check that the address is in the kernel region.
+ */
+#define virt_addr_valid(kaddr) (REGION_ID(kaddr) == KERNEL_REGION_ID && \
+				pfn_valid(virt_to_pfn(kaddr)))
+#else
 #define virt_addr_valid(kaddr)	pfn_valid(virt_to_pfn(kaddr))
+#endif
 
 /*
  * On Book-E parts we need __va to parse the device tree and we can't
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index a4b1d8d6b793..1189d04f3bd1 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -110,13 +110,18 @@ void release_thread(struct task_struct *);
 #define TASK_SIZE_128TB (0x0000800000000000UL)
 #define TASK_SIZE_512TB (0x0002000000000000UL)
 
-#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * For now 512TB is only supported with book3s and 64K linux page size.
+ */
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES)
 /*
  * Max value currently used:
  */
-#define TASK_SIZE_USER64	TASK_SIZE_512TB
+#define TASK_SIZE_USER64		TASK_SIZE_512TB
+#define DEFAULT_MAP_WINDOW_USER64	TASK_SIZE_128TB
 #else
-#define TASK_SIZE_USER64	TASK_SIZE_64TB
+#define TASK_SIZE_USER64		TASK_SIZE_64TB
+#define DEFAULT_MAP_WINDOW_USER64	TASK_SIZE_64TB
 #endif
 
 /*
@@ -132,7 +137,7 @@ void release_thread(struct task_struct *);
  * space during mmap's.
  */
 #define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
-#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_128TB / 4))
+#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4))
 
 #define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \
 		TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
@@ -143,16 +148,15 @@ void release_thread(struct task_struct *);
  * with 128TB and conditionally enable upto 512TB
  */
 #ifdef CONFIG_PPC_BOOK3S_64
-#define DEFAULT_MAP_WINDOW	((is_32bit_task()) ? \
-				 TASK_SIZE_USER32 : TASK_SIZE_128TB)
+#define DEFAULT_MAP_WINDOW	((is_32bit_task()) ?			\
+				 TASK_SIZE_USER32 : DEFAULT_MAP_WINDOW_USER64)
 #else
 #define DEFAULT_MAP_WINDOW	TASK_SIZE
 #endif
 
 #ifdef __powerpc64__
 
-/* Limit stack to 128TB */
-#define STACK_TOP_USER64 TASK_SIZE_128TB
+#define STACK_TOP_USER64 DEFAULT_MAP_WINDOW_USER64
 #define STACK_TOP_USER32 TASK_SIZE_USER32
 
 #define STACK_TOP (is_32bit_task() ? \
@@ -374,12 +378,6 @@ struct thread_struct {
 }
 #endif
 
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-#define thread_saved_pc(tsk)    \
-        ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
-
 #define task_pt_regs(tsk)	((struct pt_regs *)(tsk)->thread.regs)
 
 unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index d4f653c9259a..7e50e47375d6 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1229,6 +1229,7 @@
 #define PVR_POWER8E	0x004B
 #define PVR_POWER8NVL	0x004C
 #define PVR_POWER8	0x004D
+#define PVR_POWER9	0x004E
 #define PVR_BE		0x0070
 #define PVR_PA6T	0x0090
 
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 8b3b46b7b0f2..329771559cbb 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -44,8 +44,22 @@ extern void __init dump_numa_cpu_topology(void);
 extern int sysfs_add_device_to_node(struct device *dev, int nid);
 extern void sysfs_remove_device_from_node(struct device *dev, int nid);
 
+static inline int early_cpu_to_node(int cpu)
+{
+	int nid;
+
+	nid = numa_cpu_lookup_table[cpu];
+
+	/*
+	 * Fall back to node 0 if nid is unset (it should be, except bugs).
+	 * This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)).
+	 */
+	return (nid < 0) ? 0 : nid;
+}
 #else
 
+static inline int early_cpu_to_node(int cpu) { return 0; }
+
 static inline void dump_numa_cpu_topology(void) {}
 
 static inline int sysfs_add_device_to_node(struct device *dev, int nid)
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 5c0d8a8cdae5..41e88d3ce36b 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -267,13 +267,7 @@ do {								\
 extern unsigned long __copy_tofrom_user(void __user *to,
 		const void __user *from, unsigned long size);
 
-#ifndef __powerpc64__
-
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
-#else /* __powerpc64__ */
-
+#ifdef __powerpc64__
 static inline unsigned long
 raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 3cdbeaeac397..c23ff4389ca2 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -94,12 +94,13 @@ struct xive_q {
  * store at 0 and some ESBs support doing a trigger via a
  * separate trigger page.
  */
-#define XIVE_ESB_GET		0x800
-#define XIVE_ESB_SET_PQ_00	0xc00
-#define XIVE_ESB_SET_PQ_01	0xd00
-#define XIVE_ESB_SET_PQ_10	0xe00
-#define XIVE_ESB_SET_PQ_11	0xf00
-#define XIVE_ESB_MASK		XIVE_ESB_SET_PQ_01
+#define XIVE_ESB_STORE_EOI	0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI	0x000 /* Load */
+#define XIVE_ESB_GET		0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00	0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01	0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10	0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11	0xf00 /* Load */
 
 #define XIVE_ESB_VAL_P		0x2
 #define XIVE_ESB_VAL_Q		0x1
@@ -136,11 +137,11 @@ extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
 				       __be32 *qpage, u32 order, bool can_escalate);
 extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
 
-extern bool __xive_irq_trigger(struct xive_irq_data *xd);
-extern bool __xive_irq_retrigger(struct xive_irq_data *xd);
-extern void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd);
-
+extern void xive_native_sync_source(u32 hw_irq);
 extern bool is_xive_irq(struct irq_chip *chip);
+extern int xive_native_enable_vp(u32 vp_id);
+extern int xive_native_disable_vp(u32 vp_id);
+extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
 
 #else
 
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index dab3717e3ea0..b15bf6bc0e94 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -1,47 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += bootx.h
-header-y += byteorder.h
-header-y += cputable.h
-header-y += eeh.h
-header-y += elf.h
-header-y += epapr_hcalls.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += nvram.h
-header-y += opal-prd.h
-header-y += param.h
-header-y += perf_event.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ps3fb.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += spu_info.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += tm.h
-header-y += types.h
-header-y += ucontext.h
-header-y += unistd.h
diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index f63c96cd3608..4d877144f377 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -46,5 +46,14 @@
 #define PPC_FEATURE2_HTM_NOSC		0x01000000
 #define PPC_FEATURE2_ARCH_3_00		0x00800000 /* ISA 3.00 */
 #define PPC_FEATURE2_HAS_IEEE128	0x00400000 /* VSX IEEE Binary Float 128-bit */
+#define PPC_FEATURE2_DARN		0x00200000 /* darn random number insn */
+#define PPC_FEATURE2_SCV		0x00100000 /* scv syscall */
+
+/*
+ * IMPORTANT!
+ * All future PPC_FEATURE definitions should be allocated in cooperation with
+ * OPAL / skiboot firmware, in accordance with the ibm,powerpc-cpu-features
+ * device tree binding.
+ */
 
 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b9db46ae545b..e132902e1f14 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_PPC_RTAS)		+= rtas.o rtas-rtc.o $(rtaspci-y-y)
 obj-$(CONFIG_PPC_RTAS_DAEMON)	+= rtasd.o
 obj-$(CONFIG_RTAS_FLASH)	+= rtas_flash.o
 obj-$(CONFIG_RTAS_PROC)		+= rtas-proc.o
+obj-$(CONFIG_PPC_DT_CPU_FTRS)	+= dt_cpu_ftrs.o
 obj-$(CONFIG_EEH)              += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
 				  eeh_driver.o eeh_event.o eeh_sysfs.o
 obj-$(CONFIG_GENERIC_TBSYNC)	+= smp-tbsync.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 439c257dec4a..709e23425317 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -634,6 +634,8 @@ int main(void)
 	HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
 	HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
 	HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
+	HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys);
+	HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt);
 	HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
 	HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
 	HSTATE_FIELD(HSTATE_PTID, ptid);
@@ -719,6 +721,14 @@ int main(void)
 	OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6);
 #endif
 
+#ifdef CONFIG_KVM_XICS
+	DEFINE(VCPU_XIVE_SAVED_STATE, offsetof(struct kvm_vcpu,
+					       arch.xive_saved_state));
+	DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
+					    arch.xive_cam_word));
+	DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
+#endif
+
 #ifdef CONFIG_KVM_EXIT_TIMING
 	OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu);
 	OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl);
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index e79b9daa873c..6f849832a669 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -23,7 +23,9 @@
 #include <asm/mmu.h>
 #include <asm/setup.h>
 
-struct cpu_spec* cur_cpu_spec = NULL;
+static struct cpu_spec the_cpu_spec __read_mostly;
+
+struct cpu_spec* cur_cpu_spec __read_mostly = NULL;
 EXPORT_SYMBOL(cur_cpu_spec);
 
 /* The platform string corresponding to the real PVR */
@@ -122,7 +124,8 @@ extern void __restore_cpu_e6500(void);
 #define COMMON_USER_POWER9	COMMON_USER_POWER8
 #define COMMON_USER2_POWER9	(COMMON_USER2_POWER8 | \
 				 PPC_FEATURE2_ARCH_3_00 | \
-				 PPC_FEATURE2_HAS_IEEE128)
+				 PPC_FEATURE2_HAS_IEEE128 | \
+				 PPC_FEATURE2_DARN )
 
 #ifdef CONFIG_PPC_BOOK3E_64
 #define COMMON_USER_BOOKE	(COMMON_USER_PPC64 | PPC_FEATURE_BOOKE)
@@ -2179,7 +2182,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
 #endif /* CONFIG_E500 */
 };
 
-static struct cpu_spec the_cpu_spec;
+void __init set_cur_cpu_spec(struct cpu_spec *s)
+{
+	struct cpu_spec *t = &the_cpu_spec;
+
+	t = PTRRELOC(t);
+	*t = *s;
+
+	*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
+}
 
 static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
 					       struct cpu_spec *s)
@@ -2266,6 +2277,29 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
 	return NULL;
 }
 
+/*
+ * Used by cpufeatures to get the name for CPUs with a PVR table.
+ * If they don't hae a PVR table, cpufeatures gets the name from
+ * cpu device-tree node.
+ */
+void __init identify_cpu_name(unsigned int pvr)
+{
+	struct cpu_spec *s = cpu_specs;
+	struct cpu_spec *t = &the_cpu_spec;
+	int i;
+
+	s = PTRRELOC(s);
+	t = PTRRELOC(t);
+
+	for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) {
+		if ((pvr & s->pvr_mask) == s->pvr_value) {
+			t->cpu_name = s->cpu_name;
+			return;
+		}
+	}
+}
+
+
 #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
 struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS] = {
 			[0 ... NUM_CPU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
new file mode 100644
index 000000000000..4c7656dc4e04
--- /dev/null
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -0,0 +1,1069 @@
+/*
+ * Copyright 2017, Nicholas Piggin, IBM Corporation
+ * Licensed under GPLv2.
+ */
+
+#define pr_fmt(fmt) "dt-cpu-ftrs: " fmt
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <linux/libfdt.h>
+#include <linux/memblock.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/threads.h>
+
+#include <asm/cputable.h>
+#include <asm/dt_cpu_ftrs.h>
+#include <asm/mmu.h>
+#include <asm/oprofile_impl.h>
+#include <asm/prom.h>
+#include <asm/setup.h>
+
+
+/* Device-tree visible constants follow */
+#define ISA_V2_07B      2070
+#define ISA_V3_0B       3000
+
+#define USABLE_PR               (1U << 0)
+#define USABLE_OS               (1U << 1)
+#define USABLE_HV               (1U << 2)
+
+#define HV_SUPPORT_HFSCR        (1U << 0)
+#define OS_SUPPORT_FSCR         (1U << 0)
+
+/* For parsing, we define all bits set as "NONE" case */
+#define HV_SUPPORT_NONE		0xffffffffU
+#define OS_SUPPORT_NONE		0xffffffffU
+
+struct dt_cpu_feature {
+	const char *name;
+	uint32_t isa;
+	uint32_t usable_privilege;
+	uint32_t hv_support;
+	uint32_t os_support;
+	uint32_t hfscr_bit_nr;
+	uint32_t fscr_bit_nr;
+	uint32_t hwcap_bit_nr;
+	/* fdt parsing */
+	unsigned long node;
+	int enabled;
+	int disabled;
+};
+
+#define CPU_FTRS_BASE \
+	   (CPU_FTR_USE_TB | \
+	    CPU_FTR_LWSYNC | \
+	    CPU_FTR_FPU_UNAVAILABLE |\
+	    CPU_FTR_NODSISRALIGN |\
+	    CPU_FTR_NOEXECUTE |\
+	    CPU_FTR_COHERENT_ICACHE | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS |\
+	    CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_DAWR | \
+	    CPU_FTR_ARCH_206 |\
+	    CPU_FTR_ARCH_207S)
+
+#define MMU_FTRS_HASH_BASE (MMU_FTRS_POWER8)
+
+#define COMMON_USER_BASE	(PPC_FEATURE_32 | PPC_FEATURE_64 | \
+				 PPC_FEATURE_ARCH_2_06 |\
+				 PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER2_BASE	(PPC_FEATURE2_ARCH_2_07 | \
+				 PPC_FEATURE2_ISEL)
+/*
+ * Set up the base CPU
+ */
+
+extern void __flush_tlb_power8(unsigned int action);
+extern void __flush_tlb_power9(unsigned int action);
+extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
+
+static int hv_mode;
+
+static struct {
+	u64	lpcr;
+	u64	hfscr;
+	u64	fscr;
+} system_registers;
+
+static void (*init_pmu_registers)(void);
+
+static void cpufeatures_flush_tlb(void)
+{
+	unsigned long rb;
+	unsigned int i, num_sets;
+
+	/*
+	 * This is a temporary measure to keep equivalent TLB flush as the
+	 * cputable based setup code.
+	 */
+	switch (PVR_VER(mfspr(SPRN_PVR))) {
+	case PVR_POWER8:
+	case PVR_POWER8E:
+	case PVR_POWER8NVL:
+		num_sets = POWER8_TLB_SETS;
+		break;
+	case PVR_POWER9:
+		num_sets = POWER9_TLB_SETS_HASH;
+		break;
+	default:
+		num_sets = 1;
+		pr_err("unknown CPU version for boot TLB flush\n");
+		break;
+	}
+
+	asm volatile("ptesync" : : : "memory");
+	rb = TLBIEL_INVAL_SET;
+	for (i = 0; i < num_sets; i++) {
+		asm volatile("tlbiel %0" : : "r" (rb));
+		rb += 1 << TLBIEL_INVAL_SET_SHIFT;
+	}
+	asm volatile("ptesync" : : : "memory");
+}
+
+static void __restore_cpu_cpufeatures(void)
+{
+	/*
+	 * LPCR is restored by the power on engine already. It can be changed
+	 * after early init e.g., by radix enable, and we have no unified API
+	 * for saving and restoring such SPRs.
+	 *
+	 * This ->restore hook should really be removed from idle and register
+	 * restore moved directly into the idle restore code, because this code
+	 * doesn't know how idle is implemented or what it needs restored here.
+	 *
+	 * The best we can do to accommodate secondary boot and idle restore
+	 * for now is "or" LPCR with existing.
+	 */
+
+	mtspr(SPRN_LPCR, system_registers.lpcr | mfspr(SPRN_LPCR));
+	if (hv_mode) {
+		mtspr(SPRN_LPID, 0);
+		mtspr(SPRN_HFSCR, system_registers.hfscr);
+	}
+	mtspr(SPRN_FSCR, system_registers.fscr);
+
+	if (init_pmu_registers)
+		init_pmu_registers();
+
+	cpufeatures_flush_tlb();
+}
+
+static char dt_cpu_name[64];
+
+static struct cpu_spec __initdata base_cpu_spec = {
+	.cpu_name		= NULL,
+	.cpu_features		= CPU_FTRS_BASE,
+	.cpu_user_features	= COMMON_USER_BASE,
+	.cpu_user_features2	= COMMON_USER2_BASE,
+	.mmu_features		= 0,
+	.icache_bsize		= 32, /* minimum block size, fixed by */
+	.dcache_bsize		= 32, /* cache info init.             */
+	.num_pmcs		= 0,
+	.pmc_type		= PPC_PMC_DEFAULT,
+	.oprofile_cpu_type	= NULL,
+	.oprofile_type		= PPC_OPROFILE_INVALID,
+	.cpu_setup		= NULL,
+	.cpu_restore		= __restore_cpu_cpufeatures,
+	.flush_tlb		= NULL,
+	.machine_check_early	= NULL,
+	.platform		= NULL,
+};
+
+static void __init cpufeatures_setup_cpu(void)
+{
+	set_cur_cpu_spec(&base_cpu_spec);
+
+	cur_cpu_spec->pvr_mask = -1;
+	cur_cpu_spec->pvr_value = mfspr(SPRN_PVR);
+
+	/* Initialize the base environment -- clear FSCR/HFSCR.  */
+	hv_mode = !!(mfmsr() & MSR_HV);
+	if (hv_mode) {
+		/* CPU_FTR_HVMODE is used early in PACA setup */
+		cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+		mtspr(SPRN_HFSCR, 0);
+	}
+	mtspr(SPRN_FSCR, 0);
+
+	/*
+	 * LPCR does not get cleared, to match behaviour with secondaries
+	 * in __restore_cpu_cpufeatures. Once the idle code is fixed, this
+	 * could clear LPCR too.
+	 */
+}
+
+static int __init feat_try_enable_unknown(struct dt_cpu_feature *f)
+{
+	if (f->hv_support == HV_SUPPORT_NONE) {
+	} else if (f->hv_support & HV_SUPPORT_HFSCR) {
+		u64 hfscr = mfspr(SPRN_HFSCR);
+		hfscr |= 1UL << f->hfscr_bit_nr;
+		mtspr(SPRN_HFSCR, hfscr);
+	} else {
+		/* Does not have a known recipe */
+		return 0;
+	}
+
+	if (f->os_support == OS_SUPPORT_NONE) {
+	} else if (f->os_support & OS_SUPPORT_FSCR) {
+		u64 fscr = mfspr(SPRN_FSCR);
+		fscr |= 1UL << f->fscr_bit_nr;
+		mtspr(SPRN_FSCR, fscr);
+	} else {
+		/* Does not have a known recipe */
+		return 0;
+	}
+
+	if ((f->usable_privilege & USABLE_PR) && (f->hwcap_bit_nr != -1)) {
+		uint32_t word = f->hwcap_bit_nr / 32;
+		uint32_t bit = f->hwcap_bit_nr % 32;
+
+		if (word == 0)
+			cur_cpu_spec->cpu_user_features |= 1U << bit;
+		else if (word == 1)
+			cur_cpu_spec->cpu_user_features2 |= 1U << bit;
+		else
+			pr_err("%s could not advertise to user (no hwcap bits)\n", f->name);
+	}
+
+	return 1;
+}
+
+static int __init feat_enable(struct dt_cpu_feature *f)
+{
+	if (f->hv_support != HV_SUPPORT_NONE) {
+		if (f->hfscr_bit_nr != -1) {
+			u64 hfscr = mfspr(SPRN_HFSCR);
+			hfscr |= 1UL << f->hfscr_bit_nr;
+			mtspr(SPRN_HFSCR, hfscr);
+		}
+	}
+
+	if (f->os_support != OS_SUPPORT_NONE) {
+		if (f->fscr_bit_nr != -1) {
+			u64 fscr = mfspr(SPRN_FSCR);
+			fscr |= 1UL << f->fscr_bit_nr;
+			mtspr(SPRN_FSCR, fscr);
+		}
+	}
+
+	if ((f->usable_privilege & USABLE_PR) && (f->hwcap_bit_nr != -1)) {
+		uint32_t word = f->hwcap_bit_nr / 32;
+		uint32_t bit = f->hwcap_bit_nr % 32;
+
+		if (word == 0)
+			cur_cpu_spec->cpu_user_features |= 1U << bit;
+		else if (word == 1)
+			cur_cpu_spec->cpu_user_features2 |= 1U << bit;
+		else
+			pr_err("CPU feature: %s could not advertise to user (no hwcap bits)\n", f->name);
+	}
+
+	return 1;
+}
+
+static int __init feat_disable(struct dt_cpu_feature *f)
+{
+	return 0;
+}
+
+static int __init feat_enable_hv(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	if (!hv_mode) {
+		pr_err("CPU feature hypervisor present in device tree but HV mode not enabled in the CPU. Ignoring.\n");
+		return 0;
+	}
+
+	mtspr(SPRN_LPID, 0);
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr &=  ~LPCR_LPES0; /* HV external interrupts */
+	mtspr(SPRN_LPCR, lpcr);
+
+	cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+
+	return 1;
+}
+
+static int __init feat_enable_le(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_TRUE_LE;
+	return 1;
+}
+
+static int __init feat_enable_smt(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_SMT;
+	return 1;
+}
+
+static int __init feat_enable_idle_nap(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	/* Set PECE wakeup modes for ISA 207 */
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |=  LPCR_PECE0;
+	lpcr |=  LPCR_PECE1;
+	lpcr |=  LPCR_PECE2;
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static int __init feat_enable_align_dsisr(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->cpu_features &= ~CPU_FTR_NODSISRALIGN;
+
+	return 1;
+}
+
+static int __init feat_enable_idle_stop(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	/* Set PECE wakeup modes for ISAv3.0B */
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |=  LPCR_PECE0;
+	lpcr |=  LPCR_PECE1;
+	lpcr |=  LPCR_PECE2;
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr &= ~LPCR_ISL;
+
+	/* VRMASD */
+	lpcr |= LPCR_VPM0;
+	lpcr &= ~LPCR_VPM1;
+	lpcr |= 0x10UL << LPCR_VRMASD_SH; /* L=1 LP=00 */
+	mtspr(SPRN_LPCR, lpcr);
+
+	cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+	return 1;
+}
+
+static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr &= ~LPCR_ISL;
+	mtspr(SPRN_LPCR, lpcr);
+
+	cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+	return 1;
+}
+
+
+static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_PPC_RADIX_MMU
+	cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
+	cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+	return 1;
+#endif
+	return 0;
+}
+
+static int __init feat_enable_dscr(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	feat_enable(f);
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr &= ~LPCR_DPFD;
+	lpcr |=  (4UL << LPCR_DPFD_SH);
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static void hfscr_pmu_enable(void)
+{
+	u64 hfscr = mfspr(SPRN_HFSCR);
+	hfscr |= PPC_BIT(60);
+	mtspr(SPRN_HFSCR, hfscr);
+}
+
+static void init_pmu_power8(void)
+{
+	if (hv_mode) {
+		mtspr(SPRN_MMCRC, 0);
+		mtspr(SPRN_MMCRH, 0);
+	}
+
+	mtspr(SPRN_MMCRA, 0);
+	mtspr(SPRN_MMCR0, 0);
+	mtspr(SPRN_MMCR1, 0);
+	mtspr(SPRN_MMCR2, 0);
+	mtspr(SPRN_MMCRS, 0);
+}
+
+static int __init feat_enable_mce_power8(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->platform = "power8";
+	cur_cpu_spec->flush_tlb = __flush_tlb_power8;
+	cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p8;
+
+	return 1;
+}
+
+static int __init feat_enable_pmu_power8(struct dt_cpu_feature *f)
+{
+	hfscr_pmu_enable();
+
+	init_pmu_power8();
+	init_pmu_registers = init_pmu_power8;
+
+	cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+	if (pvr_version_is(PVR_POWER8E))
+		cur_cpu_spec->cpu_features |= CPU_FTR_PMAO_BUG;
+
+	cur_cpu_spec->num_pmcs		= 6;
+	cur_cpu_spec->pmc_type		= PPC_PMC_IBM;
+	cur_cpu_spec->oprofile_cpu_type	= "ppc64/power8";
+
+	return 1;
+}
+
+static void init_pmu_power9(void)
+{
+	if (hv_mode)
+		mtspr(SPRN_MMCRC, 0);
+
+	mtspr(SPRN_MMCRA, 0);
+	mtspr(SPRN_MMCR0, 0);
+	mtspr(SPRN_MMCR1, 0);
+	mtspr(SPRN_MMCR2, 0);
+}
+
+static int __init feat_enable_mce_power9(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->platform = "power9";
+	cur_cpu_spec->flush_tlb = __flush_tlb_power9;
+	cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p9;
+
+	return 1;
+}
+
+static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f)
+{
+	hfscr_pmu_enable();
+
+	init_pmu_power9();
+	init_pmu_registers = init_pmu_power9;
+
+	cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+
+	cur_cpu_spec->num_pmcs		= 6;
+	cur_cpu_spec->pmc_type		= PPC_PMC_IBM;
+	cur_cpu_spec->oprofile_cpu_type	= "ppc64/power9";
+
+	return 1;
+}
+
+static int __init feat_enable_tm(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	feat_enable(f);
+	cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NOSC;
+	return 1;
+#endif
+	return 0;
+}
+
+static int __init feat_enable_fp(struct dt_cpu_feature *f)
+{
+	feat_enable(f);
+	cur_cpu_spec->cpu_features &= ~CPU_FTR_FPU_UNAVAILABLE;
+
+	return 1;
+}
+
+static int __init feat_enable_vector(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_ALTIVEC
+	feat_enable(f);
+	cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
+	cur_cpu_spec->cpu_features |= CPU_FTR_VMX_COPY;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
+
+	return 1;
+#endif
+	return 0;
+}
+
+static int __init feat_enable_vsx(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_VSX
+	feat_enable(f);
+	cur_cpu_spec->cpu_features |= CPU_FTR_VSX;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_VSX;
+
+	return 1;
+#endif
+	return 0;
+}
+
+static int __init feat_enable_purr(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->cpu_features |= CPU_FTR_PURR | CPU_FTR_SPURR;
+
+	return 1;
+}
+
+static int __init feat_enable_ebb(struct dt_cpu_feature *f)
+{
+	/*
+	 * PPC_FEATURE2_EBB is enabled in PMU init code because it has
+	 * historically been related to the PMU facility. This may have
+	 * to be decoupled if EBB becomes more generic. For now, follow
+	 * existing convention.
+	 */
+	f->hwcap_bit_nr = -1;
+	feat_enable(f);
+
+	return 1;
+}
+
+static int __init feat_enable_dbell(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	/* P9 has an HFSCR for privileged state */
+	feat_enable(f);
+
+	cur_cpu_spec->cpu_features |= CPU_FTR_DBELL;
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |=  LPCR_PECEDH; /* hyp doorbell wakeup */
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static int __init feat_enable_hvi(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	/*
+	 * POWER9 XIVE interrupts including in OPAL XICS compatibility
+	 * are always delivered as hypervisor virtualization interrupts (HVI)
+	 * rather than EE.
+	 *
+	 * However LPES0 is not set here, in the chance that an EE does get
+	 * delivered to the host somehow, the EE handler would not expect it
+	 * to be delivered in LPES0 mode (e.g., using SRR[01]). This could
+	 * happen if there is a bug in interrupt controller code, or IC is
+	 * misconfigured in systemsim.
+	 */
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |= LPCR_HVICE;	/* enable hvi interrupts */
+	lpcr |= LPCR_HEIC;	/* disable ee interrupts when MSR_HV */
+	lpcr |= LPCR_PECE_HVEE; /* hvi can wake from stop */
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static int __init feat_enable_large_ci(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->mmu_features |= MMU_FTR_CI_LARGE_PAGE;
+
+	return 1;
+}
+
+struct dt_cpu_feature_match {
+	const char *name;
+	int (*enable)(struct dt_cpu_feature *f);
+	u64 cpu_ftr_bit_mask;
+};
+
+static struct dt_cpu_feature_match __initdata
+		dt_cpu_feature_match_table[] = {
+	{"hypervisor", feat_enable_hv, 0},
+	{"big-endian", feat_enable, 0},
+	{"little-endian", feat_enable_le, CPU_FTR_REAL_LE},
+	{"smt", feat_enable_smt, 0},
+	{"interrupt-facilities", feat_enable, 0},
+	{"timer-facilities", feat_enable, 0},
+	{"timer-facilities-v3", feat_enable, 0},
+	{"debug-facilities", feat_enable, 0},
+	{"come-from-address-register", feat_enable, CPU_FTR_CFAR},
+	{"branch-tracing", feat_enable, 0},
+	{"floating-point", feat_enable_fp, 0},
+	{"vector", feat_enable_vector, 0},
+	{"vector-scalar", feat_enable_vsx, 0},
+	{"vector-scalar-v3", feat_enable, 0},
+	{"decimal-floating-point", feat_enable, 0},
+	{"decimal-integer", feat_enable, 0},
+	{"quadword-load-store", feat_enable, 0},
+	{"vector-crypto", feat_enable, 0},
+	{"mmu-hash", feat_enable_mmu_hash, 0},
+	{"mmu-radix", feat_enable_mmu_radix, 0},
+	{"mmu-hash-v3", feat_enable_mmu_hash_v3, 0},
+	{"virtual-page-class-key-protection", feat_enable, 0},
+	{"transactional-memory", feat_enable_tm, CPU_FTR_TM},
+	{"transactional-memory-v3", feat_enable_tm, 0},
+	{"idle-nap", feat_enable_idle_nap, 0},
+	{"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0},
+	{"idle-stop", feat_enable_idle_stop, 0},
+	{"machine-check-power8", feat_enable_mce_power8, 0},
+	{"performance-monitor-power8", feat_enable_pmu_power8, 0},
+	{"data-stream-control-register", feat_enable_dscr, CPU_FTR_DSCR},
+	{"event-based-branch", feat_enable_ebb, 0},
+	{"target-address-register", feat_enable, 0},
+	{"branch-history-rolling-buffer", feat_enable, 0},
+	{"control-register", feat_enable, CPU_FTR_CTRL},
+	{"processor-control-facility", feat_enable_dbell, CPU_FTR_DBELL},
+	{"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL},
+	{"processor-utilization-of-resources-register", feat_enable_purr, 0},
+	{"no-execute", feat_enable, 0},
+	{"strong-access-ordering", feat_enable, CPU_FTR_SAO},
+	{"cache-inhibited-large-page", feat_enable_large_ci, 0},
+	{"coprocessor-icswx", feat_enable, CPU_FTR_ICSWX},
+	{"hypervisor-virtualization-interrupt", feat_enable_hvi, 0},
+	{"program-priority-register", feat_enable, CPU_FTR_HAS_PPR},
+	{"wait", feat_enable, 0},
+	{"atomic-memory-operations", feat_enable, 0},
+	{"branch-v3", feat_enable, 0},
+	{"copy-paste", feat_enable, 0},
+	{"decimal-floating-point-v3", feat_enable, 0},
+	{"decimal-integer-v3", feat_enable, 0},
+	{"fixed-point-v3", feat_enable, 0},
+	{"floating-point-v3", feat_enable, 0},
+	{"group-start-register", feat_enable, 0},
+	{"pc-relative-addressing", feat_enable, 0},
+	{"machine-check-power9", feat_enable_mce_power9, 0},
+	{"performance-monitor-power9", feat_enable_pmu_power9, 0},
+	{"event-based-branch-v3", feat_enable, 0},
+	{"random-number-generator", feat_enable, 0},
+	{"system-call-vectored", feat_disable, 0},
+	{"trace-interrupt-v3", feat_enable, 0},
+	{"vector-v3", feat_enable, 0},
+	{"vector-binary128", feat_enable, 0},
+	{"vector-binary16", feat_enable, 0},
+	{"wait-v3", feat_enable, 0},
+};
+
+static bool __initdata using_dt_cpu_ftrs;
+static bool __initdata enable_unknown = true;
+
+static int __init dt_cpu_ftrs_parse(char *str)
+{
+	if (!str)
+		return 0;
+
+	if (!strcmp(str, "off"))
+		using_dt_cpu_ftrs = false;
+	else if (!strcmp(str, "known"))
+		enable_unknown = false;
+	else
+		return 1;
+
+	return 0;
+}
+early_param("dt_cpu_ftrs", dt_cpu_ftrs_parse);
+
+static void __init cpufeatures_setup_start(u32 isa)
+{
+	pr_info("setup for ISA %d\n", isa);
+
+	if (isa >= 3000) {
+		cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300;
+		cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00;
+	}
+}
+
+static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
+{
+	const struct dt_cpu_feature_match *m;
+	bool known = false;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dt_cpu_feature_match_table); i++) {
+		m = &dt_cpu_feature_match_table[i];
+		if (!strcmp(f->name, m->name)) {
+			known = true;
+			if (m->enable(f))
+				break;
+
+			pr_info("not enabling: %s (disabled or unsupported by kernel)\n",
+				f->name);
+			return false;
+		}
+	}
+
+	if (!known && enable_unknown) {
+		if (!feat_try_enable_unknown(f)) {
+			pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
+				f->name);
+			return false;
+		}
+	}
+
+	if (m->cpu_ftr_bit_mask)
+		cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask;
+
+	if (known)
+		pr_debug("enabling: %s\n", f->name);
+	else
+		pr_debug("enabling: %s (unknown)\n", f->name);
+
+	return true;
+}
+
+static __init void cpufeatures_cpu_quirks(void)
+{
+	int version = mfspr(SPRN_PVR);
+
+	/*
+	 * Not all quirks can be derived from the cpufeatures device tree.
+	 */
+	if ((version & 0xffffff00) == 0x004e0100)
+		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
+}
+
+static void __init cpufeatures_setup_finished(void)
+{
+	cpufeatures_cpu_quirks();
+
+	if (hv_mode && !(cur_cpu_spec->cpu_features & CPU_FTR_HVMODE)) {
+		pr_err("hypervisor not present in device tree but HV mode is enabled in the CPU. Enabling.\n");
+		cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+	}
+
+	system_registers.lpcr = mfspr(SPRN_LPCR);
+	system_registers.hfscr = mfspr(SPRN_HFSCR);
+	system_registers.fscr = mfspr(SPRN_FSCR);
+
+	cpufeatures_flush_tlb();
+
+	pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n",
+		cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
+}
+
+static int __init disabled_on_cmdline(void)
+{
+	unsigned long root, chosen;
+	const char *p;
+
+	root = of_get_flat_dt_root();
+	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+	if (chosen == -FDT_ERR_NOTFOUND)
+		return false;
+
+	p = of_get_flat_dt_prop(chosen, "bootargs", NULL);
+	if (!p)
+		return false;
+
+	if (strstr(p, "dt_cpu_ftrs=off"))
+		return true;
+
+	return false;
+}
+
+static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
+					int depth, void *data)
+{
+	if (of_flat_dt_is_compatible(node, "ibm,powerpc-cpu-features")
+	    && of_get_flat_dt_prop(node, "isa", NULL))
+		return 1;
+
+	return 0;
+}
+
+bool __init dt_cpu_ftrs_in_use(void)
+{
+	return using_dt_cpu_ftrs;
+}
+
+bool __init dt_cpu_ftrs_init(void *fdt)
+{
+	using_dt_cpu_ftrs = false;
+
+	/* Setup and verify the FDT, if it fails we just bail */
+	if (!early_init_dt_verify(fdt))
+		return false;
+
+	if (!of_scan_flat_dt(fdt_find_cpu_features, NULL))
+		return false;
+
+	if (disabled_on_cmdline())
+		return false;
+
+	cpufeatures_setup_cpu();
+
+	using_dt_cpu_ftrs = true;
+	return true;
+}
+
+static int nr_dt_cpu_features;
+static struct dt_cpu_feature *dt_cpu_features;
+
+static int __init process_cpufeatures_node(unsigned long node,
+					  const char *uname, int i)
+{
+	const __be32 *prop;
+	struct dt_cpu_feature *f;
+	int len;
+
+	f = &dt_cpu_features[i];
+	memset(f, 0, sizeof(struct dt_cpu_feature));
+
+	f->node = node;
+
+	f->name = uname;
+
+	prop = of_get_flat_dt_prop(node, "isa", &len);
+	if (!prop) {
+		pr_warn("%s: missing isa property\n", uname);
+		return 0;
+	}
+	f->isa = be32_to_cpup(prop);
+
+	prop = of_get_flat_dt_prop(node, "usable-privilege", &len);
+	if (!prop) {
+		pr_warn("%s: missing usable-privilege property", uname);
+		return 0;
+	}
+	f->usable_privilege = be32_to_cpup(prop);
+
+	prop = of_get_flat_dt_prop(node, "hv-support", &len);
+	if (prop)
+		f->hv_support = be32_to_cpup(prop);
+	else
+		f->hv_support = HV_SUPPORT_NONE;
+
+	prop = of_get_flat_dt_prop(node, "os-support", &len);
+	if (prop)
+		f->os_support = be32_to_cpup(prop);
+	else
+		f->os_support = OS_SUPPORT_NONE;
+
+	prop = of_get_flat_dt_prop(node, "hfscr-bit-nr", &len);
+	if (prop)
+		f->hfscr_bit_nr = be32_to_cpup(prop);
+	else
+		f->hfscr_bit_nr = -1;
+	prop = of_get_flat_dt_prop(node, "fscr-bit-nr", &len);
+	if (prop)
+		f->fscr_bit_nr = be32_to_cpup(prop);
+	else
+		f->fscr_bit_nr = -1;
+	prop = of_get_flat_dt_prop(node, "hwcap-bit-nr", &len);
+	if (prop)
+		f->hwcap_bit_nr = be32_to_cpup(prop);
+	else
+		f->hwcap_bit_nr = -1;
+
+	if (f->usable_privilege & USABLE_HV) {
+		if (!(mfmsr() & MSR_HV)) {
+			pr_warn("%s: HV feature passed to guest\n", uname);
+			return 0;
+		}
+
+		if (f->hv_support == HV_SUPPORT_NONE && f->hfscr_bit_nr != -1) {
+			pr_warn("%s: unwanted hfscr_bit_nr\n", uname);
+			return 0;
+		}
+
+		if (f->hv_support == HV_SUPPORT_HFSCR) {
+			if (f->hfscr_bit_nr == -1) {
+				pr_warn("%s: missing hfscr_bit_nr\n", uname);
+				return 0;
+			}
+		}
+	} else {
+		if (f->hv_support != HV_SUPPORT_NONE || f->hfscr_bit_nr != -1) {
+			pr_warn("%s: unwanted hv_support/hfscr_bit_nr\n", uname);
+			return 0;
+		}
+	}
+
+	if (f->usable_privilege & USABLE_OS) {
+		if (f->os_support == OS_SUPPORT_NONE && f->fscr_bit_nr != -1) {
+			pr_warn("%s: unwanted fscr_bit_nr\n", uname);
+			return 0;
+		}
+
+		if (f->os_support == OS_SUPPORT_FSCR) {
+			if (f->fscr_bit_nr == -1) {
+				pr_warn("%s: missing fscr_bit_nr\n", uname);
+				return 0;
+			}
+		}
+	} else {
+		if (f->os_support != OS_SUPPORT_NONE || f->fscr_bit_nr != -1) {
+			pr_warn("%s: unwanted os_support/fscr_bit_nr\n", uname);
+			return 0;
+		}
+	}
+
+	if (!(f->usable_privilege & USABLE_PR)) {
+		if (f->hwcap_bit_nr != -1) {
+			pr_warn("%s: unwanted hwcap_bit_nr\n", uname);
+			return 0;
+		}
+	}
+
+	/* Do all the independent features in the first pass */
+	if (!of_get_flat_dt_prop(node, "dependencies", &len)) {
+		if (cpufeatures_process_feature(f))
+			f->enabled = 1;
+		else
+			f->disabled = 1;
+	}
+
+	return 0;
+}
+
+static void __init cpufeatures_deps_enable(struct dt_cpu_feature *f)
+{
+	const __be32 *prop;
+	int len;
+	int nr_deps;
+	int i;
+
+	if (f->enabled || f->disabled)
+		return;
+
+	prop = of_get_flat_dt_prop(f->node, "dependencies", &len);
+	if (!prop) {
+		pr_warn("%s: missing dependencies property", f->name);
+		return;
+	}
+
+	nr_deps = len / sizeof(int);
+
+	for (i = 0; i < nr_deps; i++) {
+		unsigned long phandle = be32_to_cpu(prop[i]);
+		int j;
+
+		for (j = 0; j < nr_dt_cpu_features; j++) {
+			struct dt_cpu_feature *d = &dt_cpu_features[j];
+
+			if (of_get_flat_dt_phandle(d->node) == phandle) {
+				cpufeatures_deps_enable(d);
+				if (d->disabled) {
+					f->disabled = 1;
+					return;
+				}
+			}
+		}
+	}
+
+	if (cpufeatures_process_feature(f))
+		f->enabled = 1;
+	else
+		f->disabled = 1;
+}
+
+static int __init scan_cpufeatures_subnodes(unsigned long node,
+					  const char *uname,
+					  void *data)
+{
+	int *count = data;
+
+	process_cpufeatures_node(node, uname, *count);
+
+	(*count)++;
+
+	return 0;
+}
+
+static int __init count_cpufeatures_subnodes(unsigned long node,
+					  const char *uname,
+					  void *data)
+{
+	int *count = data;
+
+	(*count)++;
+
+	return 0;
+}
+
+static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
+					    *uname, int depth, void *data)
+{
+	const __be32 *prop;
+	int count, i;
+	u32 isa;
+
+	/* We are scanning "ibm,powerpc-cpu-features" nodes only */
+	if (!of_flat_dt_is_compatible(node, "ibm,powerpc-cpu-features"))
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "isa", NULL);
+	if (!prop)
+		/* We checked before, "can't happen" */
+		return 0;
+
+	isa = be32_to_cpup(prop);
+
+	/* Count and allocate space for cpu features */
+	of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes,
+						&nr_dt_cpu_features);
+	dt_cpu_features = __va(
+		memblock_alloc(sizeof(struct dt_cpu_feature)*
+				nr_dt_cpu_features, PAGE_SIZE));
+
+	cpufeatures_setup_start(isa);
+
+	/* Scan nodes into dt_cpu_features and enable those without deps  */
+	count = 0;
+	of_scan_flat_dt_subnodes(node, scan_cpufeatures_subnodes, &count);
+
+	/* Recursive enable remaining features with dependencies */
+	for (i = 0; i < nr_dt_cpu_features; i++) {
+		struct dt_cpu_feature *f = &dt_cpu_features[i];
+
+		cpufeatures_deps_enable(f);
+	}
+
+	prop = of_get_flat_dt_prop(node, "display-name", NULL);
+	if (prop && strlen((char *)prop) != 0) {
+		strlcpy(dt_cpu_name, (char *)prop, sizeof(dt_cpu_name));
+		cur_cpu_spec->cpu_name = dt_cpu_name;
+	}
+
+	cpufeatures_setup_finished();
+
+	memblock_free(__pa(dt_cpu_features),
+			sizeof(struct dt_cpu_feature)*nr_dt_cpu_features);
+
+	return 0;
+}
+
+void __init dt_cpu_ftrs_scan(void)
+{
+	if (!using_dt_cpu_ftrs)
+		return;
+
+	of_scan_flat_dt(dt_cpu_ftrs_scan_callback, NULL);
+}
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 45b453e4d0c8..acd8ca76233e 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -735,8 +735,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	andis.	r15,r14,(DBSR_IC|DBSR_BT)@h
 	beq+	1f
 
+#ifdef CONFIG_RELOCATABLE
+	ld	r15,PACATOC(r13)
+	ld	r14,interrupt_base_book3e@got(r15)
+	ld	r15,__end_interrupts@got(r15)
+#else
 	LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
 	LOAD_REG_IMMEDIATE(r15,__end_interrupts)
+#endif
 	cmpld	cr0,r10,r14
 	cmpld	cr1,r10,r15
 	blt+	cr0,1f
@@ -799,8 +805,14 @@ kernel_dbg_exc:
 	andis.	r15,r14,(DBSR_IC|DBSR_BT)@h
 	beq+	1f
 
+#ifdef CONFIG_RELOCATABLE
+	ld	r15,PACATOC(r13)
+	ld	r14,interrupt_base_book3e@got(r15)
+	ld	r15,__end_interrupts@got(r15)
+#else
 	LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
 	LOAD_REG_IMMEDIATE(r15,__end_interrupts)
+#endif
 	cmpld	cr0,r10,r14
 	cmpld	cr1,r10,r15
 	blt+	cr0,1f
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index a9312b52fe6f..b886795060fd 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -391,9 +391,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	 */
 	BEGIN_FTR_SECTION
 	rlwinm.	r11,r12,47-31,30,31
-	beq-	4f
-	BRANCH_TO_COMMON(r10, machine_check_idle_common)
-4:
+	bne	machine_check_idle_common
 	END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 #endif
 
@@ -1413,10 +1411,8 @@ USE_TEXT_SECTION()
 	.balign	IFETCH_ALIGN_BYTES
 do_hash_page:
 #ifdef CONFIG_PPC_STD_MMU_64
-	andis.	r0,r4,0xa410		/* weird error? */
+	andis.	r0,r4,0xa450		/* weird error? */
 	bne-	handle_page_fault	/* if not, try to insert a HPTE */
-	andis.  r0,r4,DSISR_DABRMATCH@h
-	bne-    handle_dabr_fault
 	CURRENT_THREAD_INFO(r11, r1)
 	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
 	andis.	r0,r0,NMI_MASK@h	/* (i.e. an irq when soft-disabled) */
@@ -1440,11 +1436,16 @@ do_hash_page:
 
 	/* Error */
 	blt-	13f
+
+	/* Reload DSISR into r4 for the DABR check below */
+	ld      r4,_DSISR(r1)
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
 /* Here we have a page fault that hash_page can't handle. */
 handle_page_fault:
-11:	ld	r4,_DAR(r1)
+11:	andis.  r0,r4,DSISR_DABRMATCH@h
+	bne-    handle_dabr_fault
+	ld	r4,_DAR(r1)
 	ld	r5,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_page_fault
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 07d4e0ad60db..4898d676dcae 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -416,7 +416,7 @@ power9_dd1_recover_paca:
 	 * which needs to be restored from the stack.
 	 */
 	li	r3, 1
-	stb	r0,PACA_NAPSTATELOST(r13)
+	stb	r3,PACA_NAPSTATELOST(r13)
 	blr
 
 /*
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 160ae0fa7d0d..01addfb0ed0a 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
 struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
 
+int is_current_kprobe_addr(unsigned long addr)
+{
+	struct kprobe *p = kprobe_running();
+	return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+}
+
 bool arch_within_kprobe_blacklist(unsigned long addr)
 {
 	return  (addr >= (unsigned long)__kprobes_text_start &&
@@ -305,16 +311,17 @@ int kprobe_handler(struct pt_regs *regs)
 			save_previous_kprobe(kcb);
 			set_current_kprobe(p, regs, kcb);
 			kprobes_inc_nmissed_count(p);
-			prepare_singlestep(p, regs);
 			kcb->kprobe_status = KPROBE_REENTER;
 			if (p->ainsn.boostable >= 0) {
 				ret = try_to_emulate(p, regs);
 
 				if (ret > 0) {
 					restore_previous_kprobe(kcb);
+					preempt_enable_no_resched();
 					return 1;
 				}
 			}
+			prepare_singlestep(p, regs);
 			return 1;
 		} else {
 			if (*addr != BREAKPOINT_INSTRUCTION) {
@@ -616,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
 #endif
 
+	/*
+	 * jprobes use jprobe_return() which skips the normal return
+	 * path of the function, and this messes up the accounting of the
+	 * function graph tracer.
+	 *
+	 * Pause function graph tracing while performing the jprobe function.
+	 */
+	pause_graph_tracing();
+
 	return 1;
 }
 NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -641,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	 * saved regs...
 	 */
 	memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+	/* It's OK to start function graph tracing again */
+	unpause_graph_tracing();
 	preempt_enable_no_resched();
 	return 1;
 }
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index d645da302bf2..2ad725ef4368 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -864,6 +864,25 @@ static void tm_reclaim_thread(struct thread_struct *thr,
 	if (!MSR_TM_SUSPENDED(mfmsr()))
 		return;
 
+	/*
+	 * If we are in a transaction and FP is off then we can't have
+	 * used FP inside that transaction. Hence the checkpointed
+	 * state is the same as the live state. We need to copy the
+	 * live state to the checkpointed state so that when the
+	 * transaction is restored, the checkpointed state is correct
+	 * and the aborted transaction sees the correct state. We use
+	 * ckpt_regs.msr here as that's what tm_reclaim will use to
+	 * determine if it's going to write the checkpointed state or
+	 * not. So either this will write the checkpointed registers,
+	 * or reclaim will. Similarly for VMX.
+	 */
+	if ((thr->ckpt_regs.msr & MSR_FP) == 0)
+		memcpy(&thr->ckfp_state, &thr->fp_state,
+		       sizeof(struct thread_fp_state));
+	if ((thr->ckpt_regs.msr & MSR_VEC) == 0)
+		memcpy(&thr->ckvr_state, &thr->vr_state,
+		       sizeof(struct thread_vr_state));
+
 	giveup_all(container_of(thr, struct task_struct, thread));
 
 	tm_reclaim(thr, thr->ckpt_regs.msr, cause);
@@ -1647,6 +1666,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 #ifdef CONFIG_VSX
 	current->thread.used_vsr = 0;
 #endif
+	current->thread.load_fp = 0;
 	memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
 	current->thread.fp_save_area = NULL;
 #ifdef CONFIG_ALTIVEC
@@ -1655,6 +1675,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 	current->thread.vr_save_area = NULL;
 	current->thread.vrsave = 0;
 	current->thread.used_vr = 0;
+	current->thread.load_vec = 0;
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_SPE
 	memset(current->thread.evr, 0, sizeof(current->thread.evr));
@@ -1666,6 +1687,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 	current->thread.tm_tfhar = 0;
 	current->thread.tm_texasr = 0;
 	current->thread.tm_tfiar = 0;
+	current->thread.load_tm = 0;
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 }
 EXPORT_SYMBOL(start_thread);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index d2f0afeae5a0..f83056297441 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -57,6 +57,7 @@
 #include <asm/fadump.h>
 #include <asm/epapr_hcalls.h>
 #include <asm/firmware.h>
+#include <asm/dt_cpu_ftrs.h>
 
 #include <mm/mmu_decl.h>
 
@@ -160,7 +161,9 @@ static struct ibm_pa_feature {
 	{ .pabyte = 0,  .pabit = 3, .cpu_features  = CPU_FTR_CTRL },
 	{ .pabyte = 0,  .pabit = 6, .cpu_features  = CPU_FTR_NOEXECUTE },
 	{ .pabyte = 1,  .pabit = 2, .mmu_features  = MMU_FTR_CI_LARGE_PAGE },
+#ifdef CONFIG_PPC_RADIX_MMU
 	{ .pabyte = 40, .pabit = 0, .mmu_features  = MMU_FTR_TYPE_RADIX },
+#endif
 	{ .pabyte = 1,  .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN },
 	{ .pabyte = 5,  .pabit = 0, .cpu_features  = CPU_FTR_REAL_LE,
 				    .cpu_user_ftrs = PPC_FEATURE_TRUE_LE },
@@ -375,23 +378,31 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	 * A POWER6 partition in "POWER6 architected" mode
 	 * uses the 0x0f000002 PVR value; in POWER5+ mode
 	 * it uses 0x0f000001.
+	 *
+	 * If we're using device tree CPU feature discovery then we don't
+	 * support the cpu-version property, and it's the responsibility of the
+	 * firmware/hypervisor to provide the correct feature set for the
+	 * architecture level via the ibm,powerpc-cpu-features binding.
 	 */
-	prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
-	if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000)
-		identify_cpu(0, be32_to_cpup(prop));
+	if (!dt_cpu_ftrs_in_use()) {
+		prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
+		if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000)
+			identify_cpu(0, be32_to_cpup(prop));
 
-	identical_pvr_fixup(node);
+		check_cpu_feature_properties(node);
+		check_cpu_pa_features(node);
+	}
 
-	check_cpu_feature_properties(node);
-	check_cpu_pa_features(node);
+	identical_pvr_fixup(node);
 	init_mmu_slb_size(node);
 
 #ifdef CONFIG_PPC64
-	if (nthreads > 1)
-		cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
-	else
+	if (nthreads == 1)
 		cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
+	else if (!dt_cpu_ftrs_in_use())
+		cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
 #endif
+
 	return 0;
 }
 
@@ -721,6 +732,8 @@ void __init early_init_devtree(void *params)
 
 	DBG("Scanning CPUs ...\n");
 
+	dt_cpu_ftrs_scan();
+
 	/* Retrieve CPU related informations from the flat tree
 	 * (altivec support, boot CPU ID, ...)
 	 */
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 69e077180db6..857129acf960 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -261,7 +261,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	seq_printf(m, "processor\t: %lu\n", cpu_id);
 	seq_printf(m, "cpu\t\t: ");
 
-	if (cur_cpu_spec->pvr_mask)
+	if (cur_cpu_spec->pvr_mask && cur_cpu_spec->cpu_name)
 		seq_printf(m, "%s", cur_cpu_spec->cpu_name);
 	else
 		seq_printf(m, "unknown (%08x)", pvr);
@@ -928,7 +928,7 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_PPC_MM_SLICES
 #ifdef CONFIG_PPC64
-	init_mm.context.addr_limit = TASK_SIZE_128TB;
+	init_mm.context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
 #else
 #error	"context.addr_limit not initialized."
 #endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 0d4dcaeaafcb..4640f6d64f8b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -49,6 +49,7 @@
 #include <asm/paca.h>
 #include <asm/time.h>
 #include <asm/cputable.h>
+#include <asm/dt_cpu_ftrs.h>
 #include <asm/sections.h>
 #include <asm/btext.h>
 #include <asm/nvram.h>
@@ -274,8 +275,10 @@ void __init early_setup(unsigned long dt_ptr)
 
 	/* -------- printk is _NOT_ safe to use here ! ------- */
 
-	/* Identify CPU type */
-	identify_cpu(0, mfspr(SPRN_PVR));
+	/* Try new device tree based feature discovery ... */
+	if (!dt_cpu_ftrs_init(__va(dt_ptr)))
+		/* Otherwise use the old style CPU table */
+		identify_cpu(0, mfspr(SPRN_PVR));
 
 	/* Assume we're on cpu 0 for now. Don't write to the paca yet! */
 	initialise_paca(&boot_paca, 0);
@@ -541,6 +544,9 @@ void __init initialize_cache_info(void)
 	dcache_bsize = ppc64_caches.l1d.block_size;
 	icache_bsize = ppc64_caches.l1i.block_size;
 
+	cur_cpu_spec->dcache_bsize = dcache_bsize;
+	cur_cpu_spec->icache_bsize = icache_bsize;
+
 	DBG(" <- initialize_cache_info()\n");
 }
 
@@ -610,6 +616,24 @@ void __init exc_lvl_early_init(void)
 #endif
 
 /*
+ * Emergency stacks are used for a range of things, from asynchronous
+ * NMIs (system reset, machine check) to synchronous, process context.
+ * We set preempt_count to zero, even though that isn't necessarily correct. To
+ * get the right value we'd need to copy it from the previous thread_info, but
+ * doing that might fault causing more problems.
+ * TODO: what to do with accounting?
+ */
+static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
+{
+	ti->task = NULL;
+	ti->cpu = cpu;
+	ti->preempt_count = 0;
+	ti->local_flags = 0;
+	ti->flags = 0;
+	klp_init_thread_info(ti);
+}
+
+/*
  * Stack space used when we detect a bad kernel stack pointer, and
  * early in SMP boots before relocation is enabled. Exclusive emergency
  * stack for machine checks.
@@ -627,24 +651,31 @@ void __init emergency_stack_init(void)
 	 * Since we use these as temporary stacks during secondary CPU
 	 * bringup, we need to get at them in real mode. This means they
 	 * must also be within the RMO region.
+	 *
+	 * The IRQ stacks allocated elsewhere in this file are zeroed and
+	 * initialized in kernel/irq.c. These are initialized here in order
+	 * to have emergency stacks available as early as possible.
 	 */
 	limit = min(safe_stack_limit(), ppc64_rma_size);
 
 	for_each_possible_cpu(i) {
 		struct thread_info *ti;
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
 
 #ifdef CONFIG_PPC_BOOK3S_64
 		/* emergency stack for NMI exception handling. */
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
 
 		/* emergency stack for machine check exception handling. */
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
 #endif
 	}
@@ -655,7 +686,7 @@ void __init emergency_stack_init(void)
 
 static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 {
-	return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align,
+	return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align,
 				    __pa(MAX_DMA_ADDRESS));
 }
 
@@ -666,7 +697,7 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
 
 static int pcpu_cpu_distance(unsigned int from, unsigned int to)
 {
-	if (cpu_to_node(from) == cpu_to_node(to))
+	if (early_cpu_to_node(from) == early_cpu_to_node(to))
 		return LOCAL_DISTANCE;
 	else
 		return REMOTE_DISTANCE;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index df2a41647d8e..1069f74fca47 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -97,7 +97,7 @@ int smp_generic_cpu_bootable(unsigned int nr)
 	/* Special case - we inhibit secondary thread startup
 	 * during boot if the user requests it.
 	 */
-	if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
+	if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
 		if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
 			return 0;
 		if (smt_enabled_at_boot
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 7c933a99f5d5..c98e90b4ea7b 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller)
 	stdu	r1,-SWITCH_FRAME_SIZE(r1)
 
 	/* Save all gprs to pt_regs */
-	SAVE_8GPRS(0,r1)
-	SAVE_8GPRS(8,r1)
-	SAVE_8GPRS(16,r1)
-	SAVE_8GPRS(24,r1)
+	SAVE_GPR(0, r1)
+	SAVE_10GPRS(2, r1)
+	SAVE_10GPRS(12, r1)
+	SAVE_10GPRS(22, r1)
+
+	/* Save previous stack pointer (r1) */
+	addi	r8, r1, SWITCH_FRAME_SIZE
+	std	r8, GPR1(r1)
 
 	/* Load special regs for save below */
 	mfmsr   r8
@@ -95,18 +99,44 @@ ftrace_call:
 	bl	ftrace_stub
 	nop
 
-	/* Load ctr with the possibly modified NIP */
-	ld	r3, _NIP(r1)
-	mtctr	r3
+	/* Load the possibly modified NIP */
+	ld	r15, _NIP(r1)
+
 #ifdef CONFIG_LIVEPATCH
-	cmpd	r14,r3		/* has NIP been altered? */
+	cmpd	r14, r15	/* has NIP been altered? */
+#endif
+
+#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
+	/* NIP has not been altered, skip over further checks */
+	beq	1f
+
+	/* Check if there is an active kprobe on us */
+	subi	r3, r14, 4
+	bl	is_current_kprobe_addr
+	nop
+
+	/*
+	 * If r3 == 1, then this is a kprobe/jprobe.
+	 * else, this is livepatched function.
+	 *
+	 * The conditional branch for livepatch_handler below will use the
+	 * result of this comparison. For kprobe/jprobe, we just need to branch to
+	 * the new NIP, not call livepatch_handler. The branch below is bne, so we
+	 * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
+	 * CR0[EQ] = (r3 == 1).
+	 */
+	cmpdi	r3, 1
+1:
 #endif
 
+	/* Load CTR with the possibly modified NIP */
+	mtctr	r15
+
 	/* Restore gprs */
-	REST_8GPRS(0,r1)
-	REST_8GPRS(8,r1)
-	REST_8GPRS(16,r1)
-	REST_8GPRS(24,r1)
+	REST_GPR(0,r1)
+	REST_10GPRS(2,r1)
+	REST_10GPRS(12,r1)
+	REST_10GPRS(22,r1)
 
 	/* Restore possibly modified LR */
 	ld	r0, _LINK(r1)
@@ -119,7 +149,10 @@ ftrace_call:
 	addi r1, r1, SWITCH_FRAME_SIZE
 
 #ifdef CONFIG_LIVEPATCH
-        /* Based on the cmpd above, if the NIP was altered handle livepatch */
+        /*
+	 * Based on the cmpd or cmpdi above, if the NIP was altered and we're
+	 * not on a kprobe/jprobe, then handle livepatch.
+	 */
 	bne-	livepatch_handler
 #endif
 
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 65a471de96de..0c52cb5d43f5 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -67,7 +67,7 @@ config KVM_BOOK3S_64
 	select KVM_BOOK3S_64_HANDLER
 	select KVM
 	select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
-	select SPAPR_TCE_IOMMU if IOMMU_SUPPORT
+	select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_SERIES || PPC_POWERNV)
 	---help---
 	  Support running unmodified book3s_64 and book3s_32 guest kernels
 	  in virtual machines on book3s_64 host processors.
@@ -197,6 +197,11 @@ config KVM_XICS
 	  Specification) interrupt controller architecture used on
 	  IBM POWER (pSeries) servers.
 
+config KVM_XIVE
+	bool
+	default y
+	depends on KVM_XICS && PPC_XIVE_NATIVE && KVM_BOOK3S_HV_POSSIBLE
+
 source drivers/vhost/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index b87ccde2137a..381a6ec0ff3b 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -46,7 +46,7 @@ kvm-e500mc-objs := \
 	e500_emulate.o
 kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
 
-kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \
+kvm-book3s_64-builtin-objs-$(CONFIG_SPAPR_TCE_IOMMU) := \
 	book3s_64_vio_hv.o
 
 kvm-pr-y := \
@@ -74,7 +74,7 @@ kvm-hv-y += \
 	book3s_64_mmu_radix.o
 
 kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
-	book3s_hv_rm_xics.o
+	book3s_hv_rm_xics.o book3s_hv_rm_xive.o
 
 ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
@@ -89,10 +89,12 @@ endif
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
 	book3s_xics.o
 
+kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o
+kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o
+
 kvm-book3s_64-module-objs := \
 	$(common-objs-y) \
 	book3s.o \
-	book3s_64_vio.o \
 	book3s_rtas.o \
 	$(kvm-book3s_64-objs-y)
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 8c4d7e9d27d2..72d977e30952 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -35,6 +35,7 @@
 #include <asm/kvm_book3s.h>
 #include <asm/mmu_context.h>
 #include <asm/page.h>
+#include <asm/xive.h>
 
 #include "book3s.h"
 #include "trace.h"
@@ -596,11 +597,14 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
 			break;
 #ifdef CONFIG_KVM_XICS
 		case KVM_REG_PPC_ICP_STATE:
-			if (!vcpu->arch.icp) {
+			if (!vcpu->arch.icp && !vcpu->arch.xive_vcpu) {
 				r = -ENXIO;
 				break;
 			}
-			*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
+			if (xive_enabled())
+				*val = get_reg_val(id, kvmppc_xive_get_icp(vcpu));
+			else
+				*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
 			break;
 #endif /* CONFIG_KVM_XICS */
 		case KVM_REG_PPC_FSCR:
@@ -666,12 +670,14 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
 #endif /* CONFIG_VSX */
 #ifdef CONFIG_KVM_XICS
 		case KVM_REG_PPC_ICP_STATE:
-			if (!vcpu->arch.icp) {
+			if (!vcpu->arch.icp && !vcpu->arch.xive_vcpu) {
 				r = -ENXIO;
 				break;
 			}
-			r = kvmppc_xics_set_icp(vcpu,
-						set_reg_val(id, *val));
+			if (xive_enabled())
+				r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val));
+			else
+				r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
 			break;
 #endif /* CONFIG_KVM_XICS */
 		case KVM_REG_PPC_FSCR:
@@ -942,6 +948,50 @@ int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall)
 	return kvm->arch.kvm_ops->hcall_implemented(hcall);
 }
 
+#ifdef CONFIG_KVM_XICS
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+		bool line_status)
+{
+	if (xive_enabled())
+		return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level,
+					   line_status);
+	else
+		return kvmppc_xics_set_irq(kvm, irq_source_id, irq, level,
+					   line_status);
+}
+
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *irq_entry,
+			      struct kvm *kvm, int irq_source_id,
+			      int level, bool line_status)
+{
+	return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
+			   level, line_status);
+}
+static int kvmppc_book3s_set_irq(struct kvm_kernel_irq_routing_entry *e,
+				 struct kvm *kvm, int irq_source_id, int level,
+				 bool line_status)
+{
+	return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status);
+}
+
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
+{
+	entries->gsi = gsi;
+	entries->type = KVM_IRQ_ROUTING_IRQCHIP;
+	entries->set = kvmppc_book3s_set_irq;
+	entries->irqchip.irqchip = 0;
+	entries->irqchip.pin = gsi;
+	return 1;
+}
+
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	return pin;
+}
+
+#endif /* CONFIG_KVM_XICS */
+
 static int kvmppc_book3s_init(void)
 {
 	int r;
@@ -952,12 +1002,25 @@ static int kvmppc_book3s_init(void)
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
 	r = kvmppc_book3s_init_pr();
 #endif
-	return r;
 
+#ifdef CONFIG_KVM_XICS
+#ifdef CONFIG_KVM_XIVE
+	if (xive_enabled()) {
+		kvmppc_xive_init_module();
+		kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
+	} else
+#endif
+		kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
+#endif
+	return r;
 }
 
 static void kvmppc_book3s_exit(void)
 {
+#ifdef CONFIG_KVM_XICS
+	if (xive_enabled())
+		kvmppc_xive_exit_module();
+#endif
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
 	kvmppc_book3s_exit_pr();
 #endif
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index eda0a8f6fae8..3adfd2f5301c 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -301,6 +301,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 	/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
 	/* 	    liobn, ioba, tce); */
 
+	/* For radix, we might be in virtual mode, so punt */
+	if (kvm_is_radix(vcpu->kvm))
+		return H_TOO_HARD;
+
 	stt = kvmppc_find_table(vcpu->kvm, liobn);
 	if (!stt)
 		return H_TOO_HARD;
@@ -381,6 +385,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 	bool prereg = false;
 	struct kvmppc_spapr_tce_iommu_table *stit;
 
+	/* For radix, we might be in virtual mode, so punt */
+	if (kvm_is_radix(vcpu->kvm))
+		return H_TOO_HARD;
+
 	stt = kvmppc_find_table(vcpu->kvm, liobn);
 	if (!stt)
 		return H_TOO_HARD;
@@ -491,6 +499,10 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
 	long i, ret;
 	struct kvmppc_spapr_tce_iommu_table *stit;
 
+	/* For radix, we might be in virtual mode, so punt */
+	if (kvm_is_radix(vcpu->kvm))
+		return H_TOO_HARD;
+
 	stt = kvmppc_find_table(vcpu->kvm, liobn);
 	if (!stt)
 		return H_TOO_HARD;
@@ -527,6 +539,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
 	return H_SUCCESS;
 }
 
+/* This can be called in either virtual mode or real mode */
 long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 		      unsigned long ioba)
 {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 549dd6070dee..8d1a365b8edc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -67,6 +67,7 @@
 #include <asm/mmu.h>
 #include <asm/opal.h>
 #include <asm/xics.h>
+#include <asm/xive.h>
 
 #include "book3s.h"
 
@@ -837,6 +838,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	case H_IPOLL:
 	case H_XIRR_X:
 		if (kvmppc_xics_enabled(vcpu)) {
+			if (xive_enabled()) {
+				ret = H_NOT_AVAILABLE;
+				return RESUME_GUEST;
+			}
 			ret = kvmppc_xics_hcall(vcpu, req);
 			break;
 		}
@@ -1481,6 +1486,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
 		break;
 	case KVM_REG_PPC_TB_OFFSET:
+		/*
+		 * POWER9 DD1 has an erratum where writing TBU40 causes
+		 * the timebase to lose ticks.  So we don't let the
+		 * timebase offset be changed on P9 DD1.  (It is
+		 * initialized to zero.)
+		 */
+		if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+			break;
 		/* round up to multiple of 2^24 */
 		vcpu->arch.vcore->tb_offset =
 			ALIGN(set_reg_val(id, *val), 1UL << 24);
@@ -2902,12 +2915,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	int r;
 	int srcu_idx;
+	unsigned long ebb_regs[3] = {};	/* shut up GCC */
+	unsigned long user_tar = 0;
+	unsigned int user_vrsave;
 
 	if (!vcpu->arch.sane) {
 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		return -EINVAL;
 	}
 
+	/*
+	 * Don't allow entry with a suspended transaction, because
+	 * the guest entry/exit code will lose it.
+	 * If the guest has TM enabled, save away their TM-related SPRs
+	 * (they will get restored by the TM unavailable interrupt).
+	 */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+	    (current->thread.regs->msr & MSR_TM)) {
+		if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
+			run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+			run->fail_entry.hardware_entry_failure_reason = 0;
+			return -EINVAL;
+		}
+		current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+		current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+		current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+		current->thread.regs->msr &= ~MSR_TM;
+	}
+#endif
+
 	kvmppc_core_prepare_to_enter(vcpu);
 
 	/* No need to go into the guest when all we'll do is come back out */
@@ -2929,6 +2966,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 	flush_all_to_thread(current);
 
+	/* Save userspace EBB and other register values */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		ebb_regs[0] = mfspr(SPRN_EBBHR);
+		ebb_regs[1] = mfspr(SPRN_EBBRR);
+		ebb_regs[2] = mfspr(SPRN_BESCR);
+		user_tar = mfspr(SPRN_TAR);
+	}
+	user_vrsave = mfspr(SPRN_VRSAVE);
+
 	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
 	vcpu->arch.pgdir = current->mm->pgd;
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
@@ -2947,10 +2993,24 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			r = kvmppc_book3s_hv_page_fault(run, vcpu,
 				vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
 			srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
-		} else if (r == RESUME_PASSTHROUGH)
-			r = kvmppc_xics_rm_complete(vcpu, 0);
+		} else if (r == RESUME_PASSTHROUGH) {
+			if (WARN_ON(xive_enabled()))
+				r = H_SUCCESS;
+			else
+				r = kvmppc_xics_rm_complete(vcpu, 0);
+		}
 	} while (is_kvmppc_resume_guest(r));
 
+	/* Restore userspace EBB and other register values */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		mtspr(SPRN_EBBHR, ebb_regs[0]);
+		mtspr(SPRN_EBBRR, ebb_regs[1]);
+		mtspr(SPRN_BESCR, ebb_regs[2]);
+		mtspr(SPRN_TAR, user_tar);
+		mtspr(SPRN_FSCR, current->thread.fscr);
+	}
+	mtspr(SPRN_VRSAVE, user_vrsave);
+
  out:
 	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
 	atomic_dec(&vcpu->kvm->arch.vcpus_running);
@@ -3400,10 +3460,20 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	/*
 	 * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed)
 	 * Set HVICE bit to enable hypervisor virtualization interrupts.
+	 * Set HEIC to prevent OS interrupts to go to hypervisor (should
+	 * be unnecessary but better safe than sorry in case we re-enable
+	 * EE in HV mode with this LPCR still set)
 	 */
 	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 		lpcr &= ~LPCR_VPM0;
-		lpcr |= LPCR_HVICE;
+		lpcr |= LPCR_HVICE | LPCR_HEIC;
+
+		/*
+		 * If xive is enabled, we route 0x500 interrupts directly
+		 * to the guest.
+		 */
+		if (xive_enabled())
+			lpcr |= LPCR_LPES;
 	}
 
 	/*
@@ -3533,7 +3603,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	struct kvmppc_irq_map *irq_map;
 	struct kvmppc_passthru_irqmap *pimap;
 	struct irq_chip *chip;
-	int i;
+	int i, rc = 0;
 
 	if (!kvm_irq_bypass)
 		return 1;
@@ -3558,10 +3628,10 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	/*
 	 * For now, we only support interrupts for which the EOI operation
 	 * is an OPAL call followed by a write to XIRR, since that's
-	 * what our real-mode EOI code does.
+	 * what our real-mode EOI code does, or a XIVE interrupt
 	 */
 	chip = irq_data_get_irq_chip(&desc->irq_data);
-	if (!chip || !is_pnv_opal_msi(chip)) {
+	if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) {
 		pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
 			host_irq, guest_gsi);
 		mutex_unlock(&kvm->lock);
@@ -3603,7 +3673,12 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	if (i == pimap->n_mapped)
 		pimap->n_mapped++;
 
-	kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
+	if (xive_enabled())
+		rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
+	else
+		kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
+	if (rc)
+		irq_map->r_hwirq = 0;
 
 	mutex_unlock(&kvm->lock);
 
@@ -3614,7 +3689,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 {
 	struct irq_desc *desc;
 	struct kvmppc_passthru_irqmap *pimap;
-	int i;
+	int i, rc = 0;
 
 	if (!kvm_irq_bypass)
 		return 0;
@@ -3639,9 +3714,12 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 		return -ENODEV;
 	}
 
-	kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
+	if (xive_enabled())
+		rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
+	else
+		kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
 
-	/* invalidate the entry */
+	/* invalidate the entry (what do do on error from the above ?) */
 	pimap->mapped[i].r_hwirq = 0;
 
 	/*
@@ -3650,7 +3728,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	 */
  unlock:
 	mutex_unlock(&kvm->lock);
-	return 0;
+	return rc;
 }
 
 static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
@@ -3928,7 +4006,7 @@ static int kvmppc_book3s_init_hv(void)
 	 * indirectly, via OPAL.
 	 */
 #ifdef CONFIG_SMP
-	if (!get_paca()->kvm_hstate.xics_phys) {
+	if (!xive_enabled() && !local_paca->kvm_hstate.xics_phys) {
 		struct device_node *np;
 
 		np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 9c71c72e65ce..ee4c2558c305 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -32,6 +32,24 @@
 
 #define KVM_CMA_CHUNK_ORDER	18
 
+#include "book3s_xics.h"
+#include "book3s_xive.h"
+
+/*
+ * The XIVE module will populate these when it loads
+ */
+unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
+unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
+int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
+		       unsigned long mfrr);
+int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
+int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
+EXPORT_SYMBOL_GPL(__xive_vm_h_xirr);
+EXPORT_SYMBOL_GPL(__xive_vm_h_ipoll);
+EXPORT_SYMBOL_GPL(__xive_vm_h_ipi);
+EXPORT_SYMBOL_GPL(__xive_vm_h_cppr);
+EXPORT_SYMBOL_GPL(__xive_vm_h_eoi);
+
 /*
  * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
  * should be power of 2.
@@ -189,7 +207,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
 
 long kvmppc_h_random(struct kvm_vcpu *vcpu)
 {
-	if (powernv_get_random_real_mode(&vcpu->arch.gpr[4]))
+	int r;
+
+	/* Only need to do the expensive mfmsr() on radix */
+	if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR))
+		r = powernv_get_random_long(&vcpu->arch.gpr[4]);
+	else
+		r = powernv_get_random_real_mode(&vcpu->arch.gpr[4]);
+	if (r)
 		return H_SUCCESS;
 
 	return H_HARDWARE;
@@ -211,6 +236,7 @@ void kvmhv_rm_send_ipi(int cpu)
 		__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
 		return;
 	}
+
 	/* On POWER8 for IPIs to threads in the same core, use msgsnd. */
 	if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
 	    cpu_first_thread_sibling(cpu) ==
@@ -407,6 +433,9 @@ static long kvmppc_read_one_intr(bool *again)
 	u8 host_ipi;
 	int64_t rc;
 
+	if (xive_enabled())
+		return 1;
+
 	/* see if a host IPI is pending */
 	host_ipi = local_paca->kvm_hstate.host_ipi;
 	if (host_ipi)
@@ -491,3 +520,84 @@ static long kvmppc_read_one_intr(bool *again)
 
 	return kvmppc_check_passthru(xisr, xirr, again);
 }
+
+#ifdef CONFIG_KVM_XICS
+static inline bool is_rm(void)
+{
+	return !(mfmsr() & MSR_DR);
+}
+
+unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
+{
+	if (xive_enabled()) {
+		if (is_rm())
+			return xive_rm_h_xirr(vcpu);
+		if (unlikely(!__xive_vm_h_xirr))
+			return H_NOT_AVAILABLE;
+		return __xive_vm_h_xirr(vcpu);
+	} else
+		return xics_rm_h_xirr(vcpu);
+}
+
+unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.gpr[5] = get_tb();
+	if (xive_enabled()) {
+		if (is_rm())
+			return xive_rm_h_xirr(vcpu);
+		if (unlikely(!__xive_vm_h_xirr))
+			return H_NOT_AVAILABLE;
+		return __xive_vm_h_xirr(vcpu);
+	} else
+		return xics_rm_h_xirr(vcpu);
+}
+
+unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
+{
+	if (xive_enabled()) {
+		if (is_rm())
+			return xive_rm_h_ipoll(vcpu, server);
+		if (unlikely(!__xive_vm_h_ipoll))
+			return H_NOT_AVAILABLE;
+		return __xive_vm_h_ipoll(vcpu, server);
+	} else
+		return H_TOO_HARD;
+}
+
+int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+		    unsigned long mfrr)
+{
+	if (xive_enabled()) {
+		if (is_rm())
+			return xive_rm_h_ipi(vcpu, server, mfrr);
+		if (unlikely(!__xive_vm_h_ipi))
+			return H_NOT_AVAILABLE;
+		return __xive_vm_h_ipi(vcpu, server, mfrr);
+	} else
+		return xics_rm_h_ipi(vcpu, server, mfrr);
+}
+
+int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+	if (xive_enabled()) {
+		if (is_rm())
+			return xive_rm_h_cppr(vcpu, cppr);
+		if (unlikely(!__xive_vm_h_cppr))
+			return H_NOT_AVAILABLE;
+		return __xive_vm_h_cppr(vcpu, cppr);
+	} else
+		return xics_rm_h_cppr(vcpu, cppr);
+}
+
+int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+	if (xive_enabled()) {
+		if (is_rm())
+			return xive_rm_h_eoi(vcpu, xirr);
+		if (unlikely(!__xive_vm_h_eoi))
+			return H_NOT_AVAILABLE;
+		return __xive_vm_h_eoi(vcpu, xirr);
+	} else
+		return xics_rm_h_eoi(vcpu, xirr);
+}
+#endif /* CONFIG_KVM_XICS */
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 0fdc4a28970b..404deb512844 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -121,10 +121,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	 * Put whatever is in the decrementer into the
 	 * hypervisor decrementer.
 	 */
+BEGIN_FTR_SECTION
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	ld	r6, VCORE_KVM(r5)
+	ld	r9, KVM_HOST_LPCR(r6)
+	andis.	r9, r9, LPCR_LD@h
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	mfspr	r8,SPRN_DEC
 	mftb	r7
-	mtspr	SPRN_HDEC,r8
+BEGIN_FTR_SECTION
+	/* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
+	bne	32f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	extsw	r8,r8
+32:	mtspr	SPRN_HDEC,r8
 	add	r8,r8,r7
 	std	r8,HSTATE_DECEXP(r13)
 
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index ffde4507ddfd..2a862618f072 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -484,7 +484,7 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 }
 
 
-unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
+unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
 {
 	union kvmppc_icp_state old_state, new_state;
 	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -522,8 +522,8 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
 	return check_too_hard(xics, icp);
 }
 
-int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
-		    unsigned long mfrr)
+int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+		  unsigned long mfrr)
 {
 	union kvmppc_icp_state old_state, new_state;
 	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -609,7 +609,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 	return check_too_hard(xics, this_icp);
 }
 
-int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
 {
 	union kvmppc_icp_state old_state, new_state;
 	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -729,7 +729,7 @@ static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq)
 	return check_too_hard(xics, icp);
 }
 
-int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 {
 	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
 	struct kvmppc_icp *icp = vcpu->arch.icp;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c
new file mode 100644
index 000000000000..abf5f01b6eb1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_xive.c
@@ -0,0 +1,47 @@
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/debug.h>
+#include <asm/synch.h>
+#include <asm/cputhreads.h>
+#include <asm/pgtable.h>
+#include <asm/ppc-opcode.h>
+#include <asm/pnv-pci.h>
+#include <asm/opal.h>
+#include <asm/smp.h>
+#include <asm/asm-prototypes.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+
+#include "book3s_xive.h"
+
+/* XXX */
+#include <asm/udbg.h>
+//#define DBG(fmt...) udbg_printf(fmt)
+#define DBG(fmt...) do { } while(0)
+
+static inline void __iomem *get_tima_phys(void)
+{
+	return local_paca->kvm_hstate.xive_tima_phys;
+}
+
+#undef XIVE_RUNTIME_CHECKS
+#define X_PFX xive_rm_
+#define X_STATIC
+#define X_STAT_PFX stat_rm_
+#define __x_tima		get_tima_phys()
+#define __x_eoi_page(xd)	((void __iomem *)((xd)->eoi_page))
+#define __x_trig_page(xd)	((void __iomem *)((xd)->trig_page))
+#define __x_readb	__raw_rm_readb
+#define __x_writeb	__raw_rm_writeb
+#define __x_readw	__raw_rm_readw
+#define __x_readq	__raw_rm_readq
+#define __x_writeq	__raw_rm_writeq
+
+#include "book3s_xive_template.c"
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 7c6477d1840a..4888dd494604 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -30,6 +30,13 @@
 #include <asm/book3s/64/mmu-hash.h>
 #include <asm/tm.h>
 #include <asm/opal.h>
+#include <asm/xive-regs.h>
+
+/* Sign-extend HDEC if not on POWER9 */
+#define EXTEND_HDEC(reg)			\
+BEGIN_FTR_SECTION;				\
+	extsw	reg, reg;			\
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 
 #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
 
@@ -37,6 +44,17 @@
 #define NAPPING_CEDE	1
 #define NAPPING_NOVCPU	2
 
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS			144
+#define STACK_SLOT_TRAP		(SFS-4)
+#define STACK_SLOT_TID		(SFS-16)
+#define STACK_SLOT_PSSCR	(SFS-24)
+#define STACK_SLOT_PID		(SFS-32)
+#define STACK_SLOT_IAMR		(SFS-40)
+#define STACK_SLOT_CIABR	(SFS-48)
+#define STACK_SLOT_DAWR		(SFS-56)
+#define STACK_SLOT_DAWRX	(SFS-64)
+
 /*
  * Call kvmppc_hv_entry in real mode.
  * Must be called with interrupts hard-disabled.
@@ -213,6 +231,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+	/* HDEC may be larger than DEC for arch >= v3.00, but since the */
+	/* HDEC value came from DEC in the first place, it will fit */
 	mfspr	r3, SPRN_HDEC
 	mtspr	SPRN_DEC, r3
 	/*
@@ -294,8 +314,9 @@ kvm_novcpu_wakeup:
 
 	/* See if our timeslice has expired (HDEC is negative) */
 	mfspr	r0, SPRN_HDEC
+	EXTEND_HDEC(r0)
 	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
-	cmpwi	r0, 0
+	cmpdi	r0, 0
 	blt	kvm_novcpu_exit
 
 	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@@ -318,10 +339,10 @@ kvm_novcpu_exit:
 	bl	kvmhv_accumulate_time
 #endif
 13:	mr	r3, r12
-	stw	r12, 112-4(r1)
+	stw	r12, STACK_SLOT_TRAP(r1)
 	bl	kvmhv_commence_exit
 	nop
-	lwz	r12, 112-4(r1)
+	lwz	r12, STACK_SLOT_TRAP(r1)
 	b	kvmhv_switch_to_host
 
 /*
@@ -389,8 +410,8 @@ kvm_secondary_got_guest:
 	lbz	r4, HSTATE_PTID(r13)
 	cmpwi	r4, 0
 	bne	63f
-	lis	r6, 0x7fff
-	ori	r6, r6, 0xffff
+	LOAD_REG_ADDR(r6, decrementer_max)
+	ld	r6, 0(r6)
 	mtspr	SPRN_HDEC, r6
 	/* and set per-LPAR registers, if doing dynamic micro-threading */
 	ld	r6, HSTATE_SPLIT_MODE(r13)
@@ -544,11 +565,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  *                                                                            *
  *****************************************************************************/
 
-/* Stack frame offsets */
-#define STACK_SLOT_TID		(112-16)
-#define STACK_SLOT_PSSCR	(112-24)
-#define STACK_SLOT_PID		(112-32)
-
 .global kvmppc_hv_entry
 kvmppc_hv_entry:
 
@@ -564,7 +580,7 @@ kvmppc_hv_entry:
 	 */
 	mflr	r0
 	std	r0, PPC_LR_STKOFF(r1)
-	stdu	r1, -112(r1)
+	stdu	r1, -SFS(r1)
 
 	/* Save R1 in the PACA */
 	std	r1, HSTATE_HOST_R1(r13)
@@ -748,10 +764,20 @@ BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_TIDR
 	mfspr	r6, SPRN_PSSCR
 	mfspr	r7, SPRN_PID
+	mfspr	r8, SPRN_IAMR
 	std	r5, STACK_SLOT_TID(r1)
 	std	r6, STACK_SLOT_PSSCR(r1)
 	std	r7, STACK_SLOT_PID(r1)
+	std	r8, STACK_SLOT_IAMR(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+	mfspr	r5, SPRN_CIABR
+	mfspr	r6, SPRN_DAWR
+	mfspr	r7, SPRN_DAWRX
+	std	r5, STACK_SLOT_CIABR(r1)
+	std	r6, STACK_SLOT_DAWR(r1)
+	std	r7, STACK_SLOT_DAWRX(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 BEGIN_FTR_SECTION
 	/* Set partition DABR */
@@ -967,9 +993,27 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 
 	/* Check if HDEC expires soon */
 	mfspr	r3, SPRN_HDEC
-	cmpwi	r3, 512		/* 1 microsecond */
+	EXTEND_HDEC(r3)
+	cmpdi	r3, 512		/* 1 microsecond */
 	blt	hdec_soon
 
+#ifdef CONFIG_KVM_XICS
+	/* We are entering the guest on that thread, push VCPU to XIVE */
+	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
+	cmpldi	cr0, r10, r0
+	beq	no_xive
+	ld	r11, VCPU_XIVE_SAVED_STATE(r4)
+	li	r9, TM_QW1_OS
+	stdcix	r11,r9,r10
+	eieio
+	lwz	r11, VCPU_XIVE_CAM_WORD(r4)
+	li	r9, TM_QW1_OS + TM_WORD2
+	stwcix	r11,r9,r10
+	li	r9, 1
+	stw	r9, VCPU_XIVE_PUSHED(r4)
+no_xive:
+#endif /* CONFIG_KVM_XICS */
+
 deliver_guest_interrupt:
 	ld	r6, VCPU_CTR(r4)
 	ld	r7, VCPU_XER(r4)
@@ -1307,6 +1351,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	blt	deliver_guest_interrupt
 
 guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
+#ifdef CONFIG_KVM_XICS
+	/* We are exiting, pull the VP from the XIVE */
+	lwz	r0, VCPU_XIVE_PUSHED(r9)
+	cmpwi	cr0, r0, 0
+	beq	1f
+	li	r7, TM_SPC_PULL_OS_CTX
+	li	r6, TM_QW1_OS
+	mfmsr	r0
+	andi.	r0, r0, MSR_IR		/* in real mode? */
+	beq	2f
+	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
+	cmpldi	cr0, r10, 0
+	beq	1f
+	/* First load to pull the context, we ignore the value */
+	lwzx	r11, r7, r10
+	eieio
+	/* Second load to recover the context state (Words 0 and 1) */
+	ldx	r11, r6, r10
+	b	3f
+2:	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
+	cmpldi	cr0, r10, 0
+	beq	1f
+	/* First load to pull the context, we ignore the value */
+	lwzcix	r11, r7, r10
+	eieio
+	/* Second load to recover the context state (Words 0 and 1) */
+	ldcix	r11, r6, r10
+3:	std	r11, VCPU_XIVE_SAVED_STATE(r9)
+	/* Fixup some of the state for the next load */
+	li	r10, 0
+	li	r0, 0xff
+	stw	r10, VCPU_XIVE_PUSHED(r9)
+	stb	r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
+	stb	r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
+1:
+#endif /* CONFIG_KVM_XICS */
 	/* Save more register state  */
 	mfdar	r6
 	mfdsisr	r7
@@ -1451,11 +1531,10 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	 * set by the guest could disrupt the host.
 	 */
 	li	r0, 0
-	mtspr	SPRN_IAMR, r0
-	mtspr	SPRN_CIABR, r0
-	mtspr	SPRN_DAWRX, r0
+	mtspr	SPRN_PSPB, r0
 	mtspr	SPRN_WORT, r0
 BEGIN_FTR_SECTION
+	mtspr	SPRN_IAMR, r0
 	mtspr	SPRN_TCSCR, r0
 	/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
 	li	r0, 1
@@ -1471,6 +1550,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	std	r6,VCPU_UAMOR(r9)
 	li	r6,0
 	mtspr	SPRN_AMR,r6
+	mtspr	SPRN_UAMOR, r6
 
 	/* Switch DSCR back to host value */
 	mfspr	r8, SPRN_DSCR
@@ -1616,12 +1696,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 	/* Restore host values of some registers */
 BEGIN_FTR_SECTION
+	ld	r5, STACK_SLOT_CIABR(r1)
+	ld	r6, STACK_SLOT_DAWR(r1)
+	ld	r7, STACK_SLOT_DAWRX(r1)
+	mtspr	SPRN_CIABR, r5
+	mtspr	SPRN_DAWR, r6
+	mtspr	SPRN_DAWRX, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
 	ld	r5, STACK_SLOT_TID(r1)
 	ld	r6, STACK_SLOT_PSSCR(r1)
 	ld	r7, STACK_SLOT_PID(r1)
+	ld	r8, STACK_SLOT_IAMR(r1)
 	mtspr	SPRN_TIDR, r5
 	mtspr	SPRN_PSSCR, r6
 	mtspr	SPRN_PID, r7
+	mtspr	SPRN_IAMR, r8
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 BEGIN_FTR_SECTION
 	PPC_INVALIDATE_ERAT
@@ -1765,8 +1855,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 	li	r0, KVM_GUEST_MODE_NONE
 	stb	r0, HSTATE_IN_GUEST(r13)
 
-	ld	r0, 112+PPC_LR_STKOFF(r1)
-	addi	r1, r1, 112
+	ld	r0, SFS+PPC_LR_STKOFF(r1)
+	addi	r1, r1, SFS
 	mtlr	r0
 	blr
 
@@ -2011,7 +2101,7 @@ hcall_real_table:
 	.long	DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
 	.long	DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
 	.long	DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
-	.long	0		/* 0x70 - H_IPOLL */
+	.long	DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table
 	.long	DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
 #else
 	.long	0		/* 0x64 - H_EOI */
@@ -2181,7 +2271,11 @@ hcall_real_table:
 	.long	0		/* 0x2f0 */
 	.long	0		/* 0x2f4 */
 	.long	0		/* 0x2f8 */
-	.long	0		/* 0x2fc */
+#ifdef CONFIG_KVM_XICS
+	.long	DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table
+#else
+	.long	0		/* 0x2fc - H_XIRR_X*/
+#endif
 	.long	DOTSYM(kvmppc_h_random) - hcall_real_table
 	.globl	hcall_real_table_end
 hcall_real_table_end:
@@ -2308,12 +2402,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
 	mfspr	r3, SPRN_DEC
 	mfspr	r4, SPRN_HDEC
 	mftb	r5
-	cmpw	r3, r4
+	extsw	r3, r3
+	EXTEND_HDEC(r4)
+	cmpd	r3, r4
 	ble	67f
 	mtspr	SPRN_DEC, r4
 67:
 	/* save expiry time of guest decrementer */
-	extsw	r3, r3
 	add	r3, r3, r5
 	ld	r4, HSTATE_KVM_VCPU(r13)
 	ld	r5, HSTATE_KVM_VCORE(r13)
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index bcbeeb62dd13..8a4205fa774f 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -50,7 +50,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
 	pteg_addr = get_pteg_addr(vcpu, pte_index);
 
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
-	copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
+	ret = H_FUNCTION;
+	if (copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)))
+		goto done;
 	hpte = pteg;
 
 	ret = H_PTEG_FULL;
@@ -71,7 +73,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
 	hpte[0] = cpu_to_be64(kvmppc_get_gpr(vcpu, 6));
 	hpte[1] = cpu_to_be64(kvmppc_get_gpr(vcpu, 7));
 	pteg_addr += i * HPTE_SIZE;
-	copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE);
+	ret = H_FUNCTION;
+	if (copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE))
+		goto done;
 	kvmppc_set_gpr(vcpu, 4, pte_index | i);
 	ret = H_SUCCESS;
 
@@ -93,7 +97,9 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
 
 	pteg = get_pteg_addr(vcpu, pte_index);
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
-	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+	ret = H_FUNCTION;
+	if (copy_from_user(pte, (void __user *)pteg, sizeof(pte)))
+		goto done;
 	pte[0] = be64_to_cpu((__force __be64)pte[0]);
 	pte[1] = be64_to_cpu((__force __be64)pte[1]);
 
@@ -103,7 +109,9 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
 	    ((flags & H_ANDCOND) && (pte[0] & avpn) != 0))
 		goto done;
 
-	copy_to_user((void __user *)pteg, &v, sizeof(v));
+	ret = H_FUNCTION;
+	if (copy_to_user((void __user *)pteg, &v, sizeof(v)))
+		goto done;
 
 	rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
 	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
@@ -171,7 +179,10 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
 		}
 
 		pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
-		copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+		if (copy_from_user(pte, (void __user *)pteg, sizeof(pte))) {
+			ret = H_FUNCTION;
+			break;
+		}
 		pte[0] = be64_to_cpu((__force __be64)pte[0]);
 		pte[1] = be64_to_cpu((__force __be64)pte[1]);
 
@@ -184,7 +195,10 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
 			tsh |= H_BULK_REMOVE_NOT_FOUND;
 		} else {
 			/* Splat the pteg in (userland) hpt */
-			copy_to_user((void __user *)pteg, &v, sizeof(v));
+			if (copy_to_user((void __user *)pteg, &v, sizeof(v))) {
+				ret = H_FUNCTION;
+				break;
+			}
 
 			rb = compute_tlbie_rb(pte[0], pte[1],
 					      tsh & H_BULK_REMOVE_PTEX);
@@ -211,7 +225,9 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
 
 	pteg = get_pteg_addr(vcpu, pte_index);
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
-	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+	ret = H_FUNCTION;
+	if (copy_from_user(pte, (void __user *)pteg, sizeof(pte)))
+		goto done;
 	pte[0] = be64_to_cpu((__force __be64)pte[0]);
 	pte[1] = be64_to_cpu((__force __be64)pte[1]);
 
@@ -234,7 +250,9 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
 	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
 	pte[0] = (__force u64)cpu_to_be64(pte[0]);
 	pte[1] = (__force u64)cpu_to_be64(pte[1]);
-	copy_to_user((void __user *)pteg, pte, sizeof(pte));
+	ret = H_FUNCTION;
+	if (copy_to_user((void __user *)pteg, pte, sizeof(pte)))
+		goto done;
 	ret = H_SUCCESS;
 
  done:
@@ -244,36 +262,37 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
 	return EMULATE_DONE;
 }
 
-static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
+static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu)
 {
-	unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
-	unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
-	unsigned long tce = kvmppc_get_gpr(vcpu, 6);
 	long rc;
 
-	rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce);
+	rc = kvmppc_h_logical_ci_load(vcpu);
 	if (rc == H_TOO_HARD)
 		return EMULATE_FAIL;
 	kvmppc_set_gpr(vcpu, 3, rc);
 	return EMULATE_DONE;
 }
 
-static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu)
+static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
 {
 	long rc;
 
-	rc = kvmppc_h_logical_ci_load(vcpu);
+	rc = kvmppc_h_logical_ci_store(vcpu);
 	if (rc == H_TOO_HARD)
 		return EMULATE_FAIL;
 	kvmppc_set_gpr(vcpu, 3, rc);
 	return EMULATE_DONE;
 }
 
-static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
 {
+	unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+	unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+	unsigned long tce = kvmppc_get_gpr(vcpu, 6);
 	long rc;
 
-	rc = kvmppc_h_logical_ci_store(vcpu);
+	rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce);
 	if (rc == H_TOO_HARD)
 		return EMULATE_FAIL;
 	kvmppc_set_gpr(vcpu, 3, rc);
@@ -311,6 +330,23 @@ static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
 	return EMULATE_DONE;
 }
 
+#else /* CONFIG_SPAPR_TCE_IOMMU */
+static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
+{
+	return EMULATE_FAIL;
+}
+
+static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu)
+{
+	return EMULATE_FAIL;
+}
+
+static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
+{
+	return EMULATE_FAIL;
+}
+#endif /* CONFIG_SPAPR_TCE_IOMMU */
+
 static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 {
 	long rc = kvmppc_xics_hcall(vcpu, cmd);
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 20528701835b..2d3b2b1cc272 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -16,6 +16,7 @@
 #include <asm/kvm_ppc.h>
 #include <asm/hvcall.h>
 #include <asm/rtas.h>
+#include <asm/xive.h>
 
 #ifdef CONFIG_KVM_XICS
 static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
@@ -32,7 +33,10 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
 	server = be32_to_cpu(args->args[1]);
 	priority = be32_to_cpu(args->args[2]);
 
-	rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
+	if (xive_enabled())
+		rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority);
+	else
+		rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
 	if (rc)
 		rc = -3;
 out:
@@ -52,7 +56,10 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
 	irq = be32_to_cpu(args->args[0]);
 
 	server = priority = 0;
-	rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
+	if (xive_enabled())
+		rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority);
+	else
+		rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
 	if (rc) {
 		rc = -3;
 		goto out;
@@ -76,7 +83,10 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
 
 	irq = be32_to_cpu(args->args[0]);
 
-	rc = kvmppc_xics_int_off(vcpu->kvm, irq);
+	if (xive_enabled())
+		rc = kvmppc_xive_int_off(vcpu->kvm, irq);
+	else
+		rc = kvmppc_xics_int_off(vcpu->kvm, irq);
 	if (rc)
 		rc = -3;
 out:
@@ -95,7 +105,10 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
 
 	irq = be32_to_cpu(args->args[0]);
 
-	rc = kvmppc_xics_int_on(vcpu->kvm, irq);
+	if (xive_enabled())
+		rc = kvmppc_xive_int_on(vcpu->kvm, irq);
+	else
+		rc = kvmppc_xics_int_on(vcpu->kvm, irq);
 	if (rc)
 		rc = -3;
 out:
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 459b72cb617a..d329b2add7e2 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -1306,8 +1306,8 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
 	return 0;
 }
 
-int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
-		bool line_status)
+int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+			bool line_status)
 {
 	struct kvmppc_xics *xics = kvm->arch.xics;
 
@@ -1316,14 +1316,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	return ics_deliver_irq(xics, irq, level);
 }
 
-int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *irq_entry,
-			      struct kvm *kvm, int irq_source_id,
-			      int level, bool line_status)
-{
-	return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
-			   level, line_status);
-}
-
 static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
 	struct kvmppc_xics *xics = dev->private;
@@ -1457,29 +1449,6 @@ void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
 	vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
 }
 
-static int xics_set_irq(struct kvm_kernel_irq_routing_entry *e,
-			struct kvm *kvm, int irq_source_id, int level,
-			bool line_status)
-{
-	return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status);
-}
-
-int kvm_irq_map_gsi(struct kvm *kvm,
-		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
-{
-	entries->gsi = gsi;
-	entries->type = KVM_IRQ_ROUTING_IRQCHIP;
-	entries->set = xics_set_irq;
-	entries->irqchip.irqchip = 0;
-	entries->irqchip.pin = gsi;
-	return 1;
-}
-
-int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	return pin;
-}
-
 void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq,
 			    unsigned long host_irq)
 {
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index ec5474cf70c6..453c9e518c19 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -10,6 +10,7 @@
 #ifndef _KVM_PPC_BOOK3S_XICS_H
 #define _KVM_PPC_BOOK3S_XICS_H
 
+#ifdef CONFIG_KVM_XICS
 /*
  * We use a two-level tree to store interrupt source information.
  * There are up to 1024 ICS nodes, each of which can represent
@@ -144,5 +145,11 @@ static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
 	return ics;
 }
 
+extern unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu);
+extern int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+			 unsigned long mfrr);
+extern int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
+extern int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
 
+#endif /* CONFIG_KVM_XICS */
 #endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
new file mode 100644
index 000000000000..ffe1da95033a
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -0,0 +1,1894 @@
+/*
+ * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "xive-kvm: " fmt
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/percpu.h>
+#include <linux/cpumask.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/debug.h>
+#include <asm/debugfs.h>
+#include <asm/time.h>
+#include <asm/opal.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "book3s_xive.h"
+
+
+/*
+ * Virtual mode variants of the hcalls for use on radix/radix
+ * with AIL. They require the VCPU's VP to be "pushed"
+ *
+ * We still instanciate them here because we use some of the
+ * generated utility functions as well in this file.
+ */
+#define XIVE_RUNTIME_CHECKS
+#define X_PFX xive_vm_
+#define X_STATIC static
+#define X_STAT_PFX stat_vm_
+#define __x_tima		xive_tima
+#define __x_eoi_page(xd)	((void __iomem *)((xd)->eoi_mmio))
+#define __x_trig_page(xd)	((void __iomem *)((xd)->trig_mmio))
+#define __x_readb	__raw_readb
+#define __x_writeb	__raw_writeb
+#define __x_readw	__raw_readw
+#define __x_readq	__raw_readq
+#define __x_writeq	__raw_writeq
+
+#include "book3s_xive_template.c"
+
+/*
+ * We leave a gap of a couple of interrupts in the queue to
+ * account for the IPI and additional safety guard.
+ */
+#define XIVE_Q_GAP	2
+
+/*
+ * This is a simple trigger for a generic XIVE IRQ. This must
+ * only be called for interrupts that support a trigger page
+ */
+static bool xive_irq_trigger(struct xive_irq_data *xd)
+{
+	/* This should be only for MSIs */
+	if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI))
+		return false;
+
+	/* Those interrupts should always have a trigger page */
+	if (WARN_ON(!xd->trig_mmio))
+		return false;
+
+	out_be64(xd->trig_mmio, 0);
+
+	return true;
+}
+
+static irqreturn_t xive_esc_irq(int irq, void *data)
+{
+	struct kvm_vcpu *vcpu = data;
+
+	/* We use the existing H_PROD mechanism to wake up the target */
+	vcpu->arch.prodded = 1;
+	smp_mb();
+	if (vcpu->arch.ceded)
+		kvmppc_fast_vcpu_kick(vcpu);
+
+	return IRQ_HANDLED;
+}
+
+static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct xive_q *q = &xc->queues[prio];
+	char *name = NULL;
+	int rc;
+
+	/* Already there ? */
+	if (xc->esc_virq[prio])
+		return 0;
+
+	/* Hook up the escalation interrupt */
+	xc->esc_virq[prio] = irq_create_mapping(NULL, q->esc_irq);
+	if (!xc->esc_virq[prio]) {
+		pr_err("Failed to map escalation interrupt for queue %d of VCPU %d\n",
+		       prio, xc->server_num);
+		return -EIO;
+	}
+
+	/*
+	 * Future improvement: start with them disabled
+	 * and handle DD2 and later scheme of merged escalation
+	 * interrupts
+	 */
+	name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+			 vcpu->kvm->arch.lpid, xc->server_num, prio);
+	if (!name) {
+		pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
+		       prio, xc->server_num);
+		rc = -ENOMEM;
+		goto error;
+	}
+	rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
+			 IRQF_NO_THREAD, name, vcpu);
+	if (rc) {
+		pr_err("Failed to request escalation interrupt for queue %d of VCPU %d\n",
+		       prio, xc->server_num);
+		goto error;
+	}
+	xc->esc_virq_names[prio] = name;
+	return 0;
+error:
+	irq_dispose_mapping(xc->esc_virq[prio]);
+	xc->esc_virq[prio] = 0;
+	kfree(name);
+	return rc;
+}
+
+static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = xc->xive;
+	struct xive_q *q =  &xc->queues[prio];
+	void *qpage;
+	int rc;
+
+	if (WARN_ON(q->qpage))
+		return 0;
+
+	/* Allocate the queue and retrieve infos on current node for now */
+	qpage = (__be32 *)__get_free_pages(GFP_KERNEL, xive->q_page_order);
+	if (!qpage) {
+		pr_err("Failed to allocate queue %d for VCPU %d\n",
+		       prio, xc->server_num);
+		return -ENOMEM;;
+	}
+	memset(qpage, 0, 1 << xive->q_order);
+
+	/*
+	 * Reconfigure the queue. This will set q->qpage only once the
+	 * queue is fully configured. This is a requirement for prio 0
+	 * as we will stop doing EOIs for every IPI as soon as we observe
+	 * qpage being non-NULL, and instead will only EOI when we receive
+	 * corresponding queue 0 entries
+	 */
+	rc = xive_native_configure_queue(xc->vp_id, q, prio, qpage,
+					 xive->q_order, true);
+	if (rc)
+		pr_err("Failed to configure queue %d for VCPU %d\n",
+		       prio, xc->server_num);
+	return rc;
+}
+
+/* Called with kvm_lock held */
+static int xive_check_provisioning(struct kvm *kvm, u8 prio)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvm_vcpu *vcpu;
+	int i, rc;
+
+	lockdep_assert_held(&kvm->lock);
+
+	/* Already provisioned ? */
+	if (xive->qmap & (1 << prio))
+		return 0;
+
+	pr_devel("Provisioning prio... %d\n", prio);
+
+	/* Provision each VCPU and enable escalations */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!vcpu->arch.xive_vcpu)
+			continue;
+		rc = xive_provision_queue(vcpu, prio);
+		if (rc == 0)
+			xive_attach_escalation(vcpu, prio);
+		if (rc)
+			return rc;
+	}
+
+	/* Order previous stores and mark it as provisioned */
+	mb();
+	xive->qmap |= (1 << prio);
+	return 0;
+}
+
+static void xive_inc_q_pending(struct kvm *kvm, u32 server, u8 prio)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvmppc_xive_vcpu *xc;
+	struct xive_q *q;
+
+	/* Locate target server */
+	vcpu = kvmppc_xive_find_server(kvm, server);
+	if (!vcpu) {
+		pr_warn("%s: Can't find server %d\n", __func__, server);
+		return;
+	}
+	xc = vcpu->arch.xive_vcpu;
+	if (WARN_ON(!xc))
+		return;
+
+	q = &xc->queues[prio];
+	atomic_inc(&q->pending_count);
+}
+
+static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct xive_q *q;
+	u32 max;
+
+	if (WARN_ON(!xc))
+		return -ENXIO;
+	if (!xc->valid)
+		return -ENXIO;
+
+	q = &xc->queues[prio];
+	if (WARN_ON(!q->qpage))
+		return -ENXIO;
+
+	/* Calculate max number of interrupts in that queue. */
+	max = (q->msk + 1) - XIVE_Q_GAP;
+	return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY;
+}
+
+static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
+{
+	struct kvm_vcpu *vcpu;
+	int i, rc;
+
+	/* Locate target server */
+	vcpu = kvmppc_xive_find_server(kvm, *server);
+	if (!vcpu) {
+		pr_devel("Can't find server %d\n", *server);
+		return -EINVAL;
+	}
+
+	pr_devel("Finding irq target on 0x%x/%d...\n", *server, prio);
+
+	/* Try pick it */
+	rc = xive_try_pick_queue(vcpu, prio);
+	if (rc == 0)
+		return rc;
+
+	pr_devel(" .. failed, looking up candidate...\n");
+
+	/* Failed, pick another VCPU */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!vcpu->arch.xive_vcpu)
+			continue;
+		rc = xive_try_pick_queue(vcpu, prio);
+		if (rc == 0) {
+			*server = vcpu->arch.xive_vcpu->server_num;
+			pr_devel("  found on 0x%x/%d\n", *server, prio);
+			return rc;
+		}
+	}
+	pr_devel("  no available target !\n");
+
+	/* No available target ! */
+	return -EBUSY;
+}
+
+static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
+			     struct kvmppc_xive_src_block *sb,
+			     struct kvmppc_xive_irq_state *state)
+{
+	struct xive_irq_data *xd;
+	u32 hw_num;
+	u8 old_prio;
+	u64 val;
+
+	/*
+	 * Take the lock, set masked, try again if racing
+	 * with H_EOI
+	 */
+	for (;;) {
+		arch_spin_lock(&sb->lock);
+		old_prio = state->guest_priority;
+		state->guest_priority = MASKED;
+		mb();
+		if (!state->in_eoi)
+			break;
+		state->guest_priority = old_prio;
+		arch_spin_unlock(&sb->lock);
+	}
+
+	/* No change ? Bail */
+	if (old_prio == MASKED)
+		return old_prio;
+
+	/* Get the right irq */
+	kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+	/*
+	 * If the interrupt is marked as needing masking via
+	 * firmware, we do it here. Firmware masking however
+	 * is "lossy", it won't return the old p and q bits
+	 * and won't set the interrupt to a state where it will
+	 * record queued ones. If this is an issue we should do
+	 * lazy masking instead.
+	 *
+	 * For now, we work around this in unmask by forcing
+	 * an interrupt whenever we unmask a non-LSI via FW
+	 * (if ever).
+	 */
+	if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
+		xive_native_configure_irq(hw_num,
+					  xive->vp_base + state->act_server,
+					  MASKED, state->number);
+		/* set old_p so we can track if an H_EOI was done */
+		state->old_p = true;
+		state->old_q = false;
+	} else {
+		/* Set PQ to 10, return old P and old Q and remember them */
+		val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10);
+		state->old_p = !!(val & 2);
+		state->old_q = !!(val & 1);
+
+		/*
+		 * Synchronize hardware to sensure the queues are updated
+		 * when masking
+		 */
+		xive_native_sync_source(hw_num);
+	}
+
+	return old_prio;
+}
+
+static void xive_lock_for_unmask(struct kvmppc_xive_src_block *sb,
+				 struct kvmppc_xive_irq_state *state)
+{
+	/*
+	 * Take the lock try again if racing with H_EOI
+	 */
+	for (;;) {
+		arch_spin_lock(&sb->lock);
+		if (!state->in_eoi)
+			break;
+		arch_spin_unlock(&sb->lock);
+	}
+}
+
+static void xive_finish_unmask(struct kvmppc_xive *xive,
+			       struct kvmppc_xive_src_block *sb,
+			       struct kvmppc_xive_irq_state *state,
+			       u8 prio)
+{
+	struct xive_irq_data *xd;
+	u32 hw_num;
+
+	/* If we aren't changing a thing, move on */
+	if (state->guest_priority != MASKED)
+		goto bail;
+
+	/* Get the right irq */
+	kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+	/*
+	 * See command in xive_lock_and_mask() concerning masking
+	 * via firmware.
+	 */
+	if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
+		xive_native_configure_irq(hw_num,
+					  xive->vp_base + state->act_server,
+					  state->act_priority, state->number);
+		/* If an EOI is needed, do it here */
+		if (!state->old_p)
+			xive_vm_source_eoi(hw_num, xd);
+		/* If this is not an LSI, force a trigger */
+		if (!(xd->flags & OPAL_XIVE_IRQ_LSI))
+			xive_irq_trigger(xd);
+		goto bail;
+	}
+
+	/* Old Q set, set PQ to 11 */
+	if (state->old_q)
+		xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
+
+	/*
+	 * If not old P, then perform an "effective" EOI,
+	 * on the source. This will handle the cases where
+	 * FW EOI is needed.
+	 */
+	if (!state->old_p)
+		xive_vm_source_eoi(hw_num, xd);
+
+	/* Synchronize ordering and mark unmasked */
+	mb();
+bail:
+	state->guest_priority = prio;
+}
+
+/*
+ * Target an interrupt to a given server/prio, this will fallback
+ * to another server if necessary and perform the HW targetting
+ * updates as needed
+ *
+ * NOTE: Must be called with the state lock held
+ */
+static int xive_target_interrupt(struct kvm *kvm,
+				 struct kvmppc_xive_irq_state *state,
+				 u32 server, u8 prio)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	u32 hw_num;
+	int rc;
+
+	/*
+	 * This will return a tentative server and actual
+	 * priority. The count for that new target will have
+	 * already been incremented.
+	 */
+	rc = xive_select_target(kvm, &server, prio);
+
+	/*
+	 * We failed to find a target ? Not much we can do
+	 * at least until we support the GIQ.
+	 */
+	if (rc)
+		return rc;
+
+	/*
+	 * Increment the old queue pending count if there
+	 * was one so that the old queue count gets adjusted later
+	 * when observed to be empty.
+	 */
+	if (state->act_priority != MASKED)
+		xive_inc_q_pending(kvm,
+				   state->act_server,
+				   state->act_priority);
+	/*
+	 * Update state and HW
+	 */
+	state->act_priority = prio;
+	state->act_server = server;
+
+	/* Get the right irq */
+	kvmppc_xive_select_irq(state, &hw_num, NULL);
+
+	return xive_native_configure_irq(hw_num,
+					 xive->vp_base + server,
+					 prio, state->number);
+}
+
+/*
+ * Targetting rules: In order to avoid losing track of
+ * pending interrupts accross mask and unmask, which would
+ * allow queue overflows, we implement the following rules:
+ *
+ *  - Unless it was never enabled (or we run out of capacity)
+ *    an interrupt is always targetted at a valid server/queue
+ *    pair even when "masked" by the guest. This pair tends to
+ *    be the last one used but it can be changed under some
+ *    circumstances. That allows us to separate targetting
+ *    from masking, we only handle accounting during (re)targetting,
+ *    this also allows us to let an interrupt drain into its target
+ *    queue after masking, avoiding complex schemes to remove
+ *    interrupts out of remote processor queues.
+ *
+ *  - When masking, we set PQ to 10 and save the previous value
+ *    of P and Q.
+ *
+ *  - When unmasking, if saved Q was set, we set PQ to 11
+ *    otherwise we leave PQ to the HW state which will be either
+ *    10 if nothing happened or 11 if the interrupt fired while
+ *    masked. Effectively we are OR'ing the previous Q into the
+ *    HW Q.
+ *
+ *    Then if saved P is clear, we do an effective EOI (Q->P->Trigger)
+ *    which will unmask the interrupt and shoot a new one if Q was
+ *    set.
+ *
+ *    Otherwise (saved P is set) we leave PQ unchanged (so 10 or 11,
+ *    effectively meaning an H_EOI from the guest is still expected
+ *    for that interrupt).
+ *
+ *  - If H_EOI occurs while masked, we clear the saved P.
+ *
+ *  - When changing target, we account on the new target and
+ *    increment a separate "pending" counter on the old one.
+ *    This pending counter will be used to decrement the old
+ *    target's count when its queue has been observed empty.
+ */
+
+int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
+			 u32 priority)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u8 new_act_prio;
+	int rc = 0;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	pr_devel("set_xive ! irq 0x%x server 0x%x prio %d\n",
+		 irq, server, priority);
+
+	/* First, check provisioning of queues */
+	if (priority != MASKED)
+		rc = xive_check_provisioning(xive->kvm,
+			      xive_prio_from_guest(priority));
+	if (rc) {
+		pr_devel("  provisioning failure %d !\n", rc);
+		return rc;
+	}
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	/*
+	 * We first handle masking/unmasking since the locking
+	 * might need to be retried due to EOIs, we'll handle
+	 * targetting changes later. These functions will return
+	 * with the SB lock held.
+	 *
+	 * xive_lock_and_mask() will also set state->guest_priority
+	 * but won't otherwise change other fields of the state.
+	 *
+	 * xive_lock_for_unmask will not actually unmask, this will
+	 * be done later by xive_finish_unmask() once the targetting
+	 * has been done, so we don't try to unmask an interrupt
+	 * that hasn't yet been targetted.
+	 */
+	if (priority == MASKED)
+		xive_lock_and_mask(xive, sb, state);
+	else
+		xive_lock_for_unmask(sb, state);
+
+
+	/*
+	 * Then we handle targetting.
+	 *
+	 * First calculate a new "actual priority"
+	 */
+	new_act_prio = state->act_priority;
+	if (priority != MASKED)
+		new_act_prio = xive_prio_from_guest(priority);
+
+	pr_devel(" new_act_prio=%x act_server=%x act_prio=%x\n",
+		 new_act_prio, state->act_server, state->act_priority);
+
+	/*
+	 * Then check if we actually need to change anything,
+	 *
+	 * The condition for re-targetting the interrupt is that
+	 * we have a valid new priority (new_act_prio is not 0xff)
+	 * and either the server or the priority changed.
+	 *
+	 * Note: If act_priority was ff and the new priority is
+	 *       also ff, we don't do anything and leave the interrupt
+	 *       untargetted. An attempt of doing an int_on on an
+	 *       untargetted interrupt will fail. If that is a problem
+	 *       we could initialize interrupts with valid default
+	 */
+
+	if (new_act_prio != MASKED &&
+	    (state->act_server != server ||
+	     state->act_priority != new_act_prio))
+		rc = xive_target_interrupt(kvm, state, server, new_act_prio);
+
+	/*
+	 * Perform the final unmasking of the interrupt source
+	 * if necessary
+	 */
+	if (priority != MASKED)
+		xive_finish_unmask(xive, sb, state, priority);
+
+	/*
+	 * Finally Update saved_priority to match. Only int_on/off
+	 * set this field to a different value.
+	 */
+	state->saved_priority = priority;
+
+	arch_spin_unlock(&sb->lock);
+	return rc;
+}
+
+int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+			 u32 *priority)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+	arch_spin_lock(&sb->lock);
+	*server = state->guest_server;
+	*priority = state->guest_priority;
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+
+int kvmppc_xive_int_on(struct kvm *kvm, u32 irq)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	pr_devel("int_on(irq=0x%x)\n", irq);
+
+	/*
+	 * Check if interrupt was not targetted
+	 */
+	if (state->act_priority == MASKED) {
+		pr_devel("int_on on untargetted interrupt\n");
+		return -EINVAL;
+	}
+
+	/* If saved_priority is 0xff, do nothing */
+	if (state->saved_priority == MASKED)
+		return 0;
+
+	/*
+	 * Lock and unmask it.
+	 */
+	xive_lock_for_unmask(sb, state);
+	xive_finish_unmask(xive, sb, state, state->saved_priority);
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+
+int kvmppc_xive_int_off(struct kvm *kvm, u32 irq)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	pr_devel("int_off(irq=0x%x)\n", irq);
+
+	/*
+	 * Lock and mask
+	 */
+	state->saved_priority = xive_lock_and_mask(xive, sb, state);
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+
+static bool xive_restore_pending_irq(struct kvmppc_xive *xive, u32 irq)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return false;
+	state = &sb->irq_state[idx];
+	if (!state->valid)
+		return false;
+
+	/*
+	 * Trigger the IPI. This assumes we never restore a pass-through
+	 * interrupt which should be safe enough
+	 */
+	xive_irq_trigger(&state->ipi_data);
+
+	return true;
+}
+
+u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+	if (!xc)
+		return 0;
+
+	/* Return the per-cpu state for state saving/migration */
+	return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
+	       (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
+}
+
+int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+	u8 cppr, mfrr;
+	u32 xisr;
+
+	if (!xc || !xive)
+		return -ENOENT;
+
+	/* Grab individual state fields. We don't use pending_pri */
+	cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
+	xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
+		KVM_REG_PPC_ICP_XISR_MASK;
+	mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
+
+	pr_devel("set_icp vcpu %d cppr=0x%x mfrr=0x%x xisr=0x%x\n",
+		 xc->server_num, cppr, mfrr, xisr);
+
+	/*
+	 * We can't update the state of a "pushed" VCPU, but that
+	 * shouldn't happen.
+	 */
+	if (WARN_ON(vcpu->arch.xive_pushed))
+		return -EIO;
+
+	/* Update VCPU HW saved state */
+	vcpu->arch.xive_saved_state.cppr = cppr;
+	xc->hw_cppr = xc->cppr = cppr;
+
+	/*
+	 * Update MFRR state. If it's not 0xff, we mark the VCPU as
+	 * having a pending MFRR change, which will re-evaluate the
+	 * target. The VCPU will thus potentially get a spurious
+	 * interrupt but that's not a big deal.
+	 */
+	xc->mfrr = mfrr;
+	if (mfrr < cppr)
+		xive_irq_trigger(&xc->vp_ipi_data);
+
+	/*
+	 * Now saved XIRR is "interesting". It means there's something in
+	 * the legacy "1 element" queue... for an IPI we simply ignore it,
+	 * as the MFRR restore will handle that. For anything else we need
+	 * to force a resend of the source.
+	 * However the source may not have been setup yet. If that's the
+	 * case, we keep that info and increment a counter in the xive to
+	 * tell subsequent xive_set_source() to go look.
+	 */
+	if (xisr > XICS_IPI && !xive_restore_pending_irq(xive, xisr)) {
+		xc->delayed_irq = xisr;
+		xive->delayed_irqs++;
+		pr_devel("  xisr restore delayed\n");
+	}
+
+	return 0;
+}
+
+int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+			   struct irq_desc *host_desc)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	struct irq_data *host_data = irq_desc_get_irq_data(host_desc);
+	unsigned int host_irq = irq_desc_get_irq(host_desc);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data);
+	u16 idx;
+	u8 prio;
+	int rc;
+
+	if (!xive)
+		return -ENODEV;
+
+	pr_devel("set_mapped girq 0x%lx host HW irq 0x%x...\n",guest_irq, hw_irq);
+
+	sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	/*
+	 * Mark the passed-through interrupt as going to a VCPU,
+	 * this will prevent further EOIs and similar operations
+	 * from the XIVE code. It will also mask the interrupt
+	 * to either PQ=10 or 11 state, the latter if the interrupt
+	 * is pending. This will allow us to unmask or retrigger it
+	 * after routing it to the guest with a simple EOI.
+	 *
+	 * The "state" argument is a "token", all it needs is to be
+	 * non-NULL to switch to passed-through or NULL for the
+	 * other way around. We may not yet have an actual VCPU
+	 * target here and we don't really care.
+	 */
+	rc = irq_set_vcpu_affinity(host_irq, state);
+	if (rc) {
+		pr_err("Failed to set VCPU affinity for irq %d\n", host_irq);
+		return rc;
+	}
+
+	/*
+	 * Mask and read state of IPI. We need to know if its P bit
+	 * is set as that means it's potentially already using a
+	 * queue entry in the target
+	 */
+	prio = xive_lock_and_mask(xive, sb, state);
+	pr_devel(" old IPI prio %02x P:%d Q:%d\n", prio,
+		 state->old_p, state->old_q);
+
+	/* Turn the IPI hard off */
+	xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+
+	/* Grab info about irq */
+	state->pt_number = hw_irq;
+	state->pt_data = irq_data_get_irq_handler_data(host_data);
+
+	/*
+	 * Configure the IRQ to match the existing configuration of
+	 * the IPI if it was already targetted. Otherwise this will
+	 * mask the interrupt in a lossy way (act_priority is 0xff)
+	 * which is fine for a never started interrupt.
+	 */
+	xive_native_configure_irq(hw_irq,
+				  xive->vp_base + state->act_server,
+				  state->act_priority, state->number);
+
+	/*
+	 * We do an EOI to enable the interrupt (and retrigger if needed)
+	 * if the guest has the interrupt unmasked and the P bit was *not*
+	 * set in the IPI. If it was set, we know a slot may still be in
+	 * use in the target queue thus we have to wait for a guest
+	 * originated EOI
+	 */
+	if (prio != MASKED && !state->old_p)
+		xive_vm_source_eoi(hw_irq, state->pt_data);
+
+	/* Clear old_p/old_q as they are no longer relevant */
+	state->old_p = state->old_q = false;
+
+	/* Restore guest prio (unlocks EOI) */
+	mb();
+	state->guest_priority = prio;
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped);
+
+int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+			   struct irq_desc *host_desc)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	unsigned int host_irq = irq_desc_get_irq(host_desc);
+	u16 idx;
+	u8 prio;
+	int rc;
+
+	if (!xive)
+		return -ENODEV;
+
+	pr_devel("clr_mapped girq 0x%lx...\n", guest_irq);
+
+	sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	/*
+	 * Mask and read state of IRQ. We need to know if its P bit
+	 * is set as that means it's potentially already using a
+	 * queue entry in the target
+	 */
+	prio = xive_lock_and_mask(xive, sb, state);
+	pr_devel(" old IRQ prio %02x P:%d Q:%d\n", prio,
+		 state->old_p, state->old_q);
+
+	/*
+	 * If old_p is set, the interrupt is pending, we switch it to
+	 * PQ=11. This will force a resend in the host so the interrupt
+	 * isn't lost to whatver host driver may pick it up
+	 */
+	if (state->old_p)
+		xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_11);
+
+	/* Release the passed-through interrupt to the host */
+	rc = irq_set_vcpu_affinity(host_irq, NULL);
+	if (rc) {
+		pr_err("Failed to clr VCPU affinity for irq %d\n", host_irq);
+		return rc;
+	}
+
+	/* Forget about the IRQ */
+	state->pt_number = 0;
+	state->pt_data = NULL;
+
+	/* Reconfigure the IPI */
+	xive_native_configure_irq(state->ipi_number,
+				  xive->vp_base + state->act_server,
+				  state->act_priority, state->number);
+
+	/*
+	 * If old_p is set (we have a queue entry potentially
+	 * occupied) or the interrupt is masked, we set the IPI
+	 * to PQ=10 state. Otherwise we just re-enable it (PQ=00).
+	 */
+	if (prio == MASKED || state->old_p)
+		xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_10);
+	else
+		xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_00);
+
+	/* Restore guest prio (unlocks EOI) */
+	mb();
+	state->guest_priority = prio;
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped);
+
+static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	int i, j;
+
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+		if (!sb)
+			continue;
+		for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
+			struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
+
+			if (!state->valid)
+				continue;
+			if (state->act_priority == MASKED)
+				continue;
+			if (state->act_server != xc->server_num)
+				continue;
+
+			/* Clean it up */
+			arch_spin_lock(&sb->lock);
+			state->act_priority = MASKED;
+			xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+			xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
+			if (state->pt_number) {
+				xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
+				xive_native_configure_irq(state->pt_number, 0, MASKED, 0);
+			}
+			arch_spin_unlock(&sb->lock);
+		}
+	}
+}
+
+void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = xc->xive;
+	int i;
+
+	pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num);
+
+	/* Ensure no interrupt is still routed to that VP */
+	xc->valid = false;
+	kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+	/* Mask the VP IPI */
+	xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
+
+	/* Disable the VP */
+	xive_native_disable_vp(xc->vp_id);
+
+	/* Free the queues & associated interrupts */
+	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+		struct xive_q *q = &xc->queues[i];
+
+		/* Free the escalation irq */
+		if (xc->esc_virq[i]) {
+			free_irq(xc->esc_virq[i], vcpu);
+			irq_dispose_mapping(xc->esc_virq[i]);
+			kfree(xc->esc_virq_names[i]);
+		}
+		/* Free the queue */
+		xive_native_disable_queue(xc->vp_id, q, i);
+		if (q->qpage) {
+			free_pages((unsigned long)q->qpage,
+				   xive->q_page_order);
+			q->qpage = NULL;
+		}
+	}
+
+	/* Free the IPI */
+	if (xc->vp_ipi) {
+		xive_cleanup_irq_data(&xc->vp_ipi_data);
+		xive_native_free_irq(xc->vp_ipi);
+	}
+	/* Free the VP */
+	kfree(xc);
+}
+
+int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+			     struct kvm_vcpu *vcpu, u32 cpu)
+{
+	struct kvmppc_xive *xive = dev->private;
+	struct kvmppc_xive_vcpu *xc;
+	int i, r = -EBUSY;
+
+	pr_devel("connect_vcpu(cpu=%d)\n", cpu);
+
+	if (dev->ops != &kvm_xive_ops) {
+		pr_devel("Wrong ops !\n");
+		return -EPERM;
+	}
+	if (xive->kvm != vcpu->kvm)
+		return -EPERM;
+	if (vcpu->arch.irq_type)
+		return -EBUSY;
+	if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
+		pr_devel("Duplicate !\n");
+		return -EEXIST;
+	}
+	if (cpu >= KVM_MAX_VCPUS) {
+		pr_devel("Out of bounds !\n");
+		return -EINVAL;
+	}
+	xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+	if (!xc)
+		return -ENOMEM;
+
+	/* We need to synchronize with queue provisioning */
+	mutex_lock(&vcpu->kvm->lock);
+	vcpu->arch.xive_vcpu = xc;
+	xc->xive = xive;
+	xc->vcpu = vcpu;
+	xc->server_num = cpu;
+	xc->vp_id = xive->vp_base + cpu;
+	xc->mfrr = 0xff;
+	xc->valid = true;
+
+	r = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
+	if (r)
+		goto bail;
+
+	/* Configure VCPU fields for use by assembly push/pull */
+	vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
+	vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
+
+	/* Allocate IPI */
+	xc->vp_ipi = xive_native_alloc_irq();
+	if (!xc->vp_ipi) {
+		r = -EIO;
+		goto bail;
+	}
+	pr_devel(" IPI=0x%x\n", xc->vp_ipi);
+
+	r = xive_native_populate_irq_data(xc->vp_ipi, &xc->vp_ipi_data);
+	if (r)
+		goto bail;
+
+	/*
+	 * Initialize queues. Initially we set them all for no queueing
+	 * and we enable escalation for queue 0 only which we'll use for
+	 * our mfrr change notifications. If the VCPU is hot-plugged, we
+	 * do handle provisioning however.
+	 */
+	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+		struct xive_q *q = &xc->queues[i];
+
+		/* Is queue already enabled ? Provision it */
+		if (xive->qmap & (1 << i)) {
+			r = xive_provision_queue(vcpu, i);
+			if (r == 0)
+				xive_attach_escalation(vcpu, i);
+			if (r)
+				goto bail;
+		} else {
+			r = xive_native_configure_queue(xc->vp_id,
+							q, i, NULL, 0, true);
+			if (r) {
+				pr_err("Failed to configure queue %d for VCPU %d\n",
+				       i, cpu);
+				goto bail;
+			}
+		}
+	}
+
+	/* If not done above, attach priority 0 escalation */
+	r = xive_attach_escalation(vcpu, 0);
+	if (r)
+		goto bail;
+
+	/* Enable the VP */
+	r = xive_native_enable_vp(xc->vp_id);
+	if (r)
+		goto bail;
+
+	/* Route the IPI */
+	r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
+	if (!r)
+		xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_00);
+
+bail:
+	mutex_unlock(&vcpu->kvm->lock);
+	if (r) {
+		kvmppc_xive_cleanup_vcpu(vcpu);
+		return r;
+	}
+
+	vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
+	return 0;
+}
+
+/*
+ * Scanning of queues before/after migration save
+ */
+static void xive_pre_save_set_queued(struct kvmppc_xive *xive, u32 irq)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return;
+
+	state = &sb->irq_state[idx];
+
+	/* Some sanity checking */
+	if (!state->valid) {
+		pr_err("invalid irq 0x%x in cpu queue!\n", irq);
+		return;
+	}
+
+	/*
+	 * If the interrupt is in a queue it should have P set.
+	 * We warn so that gets reported. A backtrace isn't useful
+	 * so no need to use a WARN_ON.
+	 */
+	if (!state->saved_p)
+		pr_err("Interrupt 0x%x is marked in a queue but P not set !\n", irq);
+
+	/* Set flag */
+	state->in_queue = true;
+}
+
+static void xive_pre_save_mask_irq(struct kvmppc_xive *xive,
+				   struct kvmppc_xive_src_block *sb,
+				   u32 irq)
+{
+	struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
+
+	if (!state->valid)
+		return;
+
+	/* Mask and save state, this will also sync HW queues */
+	state->saved_scan_prio = xive_lock_and_mask(xive, sb, state);
+
+	/* Transfer P and Q */
+	state->saved_p = state->old_p;
+	state->saved_q = state->old_q;
+
+	/* Unlock */
+	arch_spin_unlock(&sb->lock);
+}
+
+static void xive_pre_save_unmask_irq(struct kvmppc_xive *xive,
+				     struct kvmppc_xive_src_block *sb,
+				     u32 irq)
+{
+	struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
+
+	if (!state->valid)
+		return;
+
+	/*
+	 * Lock / exclude EOI (not technically necessary if the
+	 * guest isn't running concurrently. If this becomes a
+	 * performance issue we can probably remove the lock.
+	 */
+	xive_lock_for_unmask(sb, state);
+
+	/* Restore mask/prio if it wasn't masked */
+	if (state->saved_scan_prio != MASKED)
+		xive_finish_unmask(xive, sb, state, state->saved_scan_prio);
+
+	/* Unlock */
+	arch_spin_unlock(&sb->lock);
+}
+
+static void xive_pre_save_queue(struct kvmppc_xive *xive, struct xive_q *q)
+{
+	u32 idx = q->idx;
+	u32 toggle = q->toggle;
+	u32 irq;
+
+	do {
+		irq = __xive_read_eq(q->qpage, q->msk, &idx, &toggle);
+		if (irq > XICS_IPI)
+			xive_pre_save_set_queued(xive, irq);
+	} while(irq);
+}
+
+static void xive_pre_save_scan(struct kvmppc_xive *xive)
+{
+	struct kvm_vcpu *vcpu = NULL;
+	int i, j;
+
+	/*
+	 * See comment in xive_get_source() about how this
+	 * work. Collect a stable state for all interrupts
+	 */
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+		if (!sb)
+			continue;
+		for (j = 0;  j < KVMPPC_XICS_IRQ_PER_ICS; j++)
+			xive_pre_save_mask_irq(xive, sb, j);
+	}
+
+	/* Then scan the queues and update the "in_queue" flag */
+	kvm_for_each_vcpu(i, vcpu, xive->kvm) {
+		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+		if (!xc)
+			continue;
+		for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) {
+			if (xc->queues[i].qpage)
+				xive_pre_save_queue(xive, &xc->queues[i]);
+		}
+	}
+
+	/* Finally restore interrupt states */
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+		if (!sb)
+			continue;
+		for (j = 0;  j < KVMPPC_XICS_IRQ_PER_ICS; j++)
+			xive_pre_save_unmask_irq(xive, sb, j);
+	}
+}
+
+static void xive_post_save_scan(struct kvmppc_xive *xive)
+{
+	u32 i, j;
+
+	/* Clear all the in_queue flags */
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+		if (!sb)
+			continue;
+		for (j = 0;  j < KVMPPC_XICS_IRQ_PER_ICS; j++)
+			sb->irq_state[j].in_queue = false;
+	}
+
+	/* Next get_source() will do a new scan */
+	xive->saved_src_count = 0;
+}
+
+/*
+ * This returns the source configuration and state to user space.
+ */
+static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u64 val, prio;
+	u16 idx;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -ENOENT;
+
+	state = &sb->irq_state[idx];
+
+	if (!state->valid)
+		return -ENOENT;
+
+	pr_devel("get_source(%ld)...\n", irq);
+
+	/*
+	 * So to properly save the state into something that looks like a
+	 * XICS migration stream we cannot treat interrupts individually.
+	 *
+	 * We need, instead, mask them all (& save their previous PQ state)
+	 * to get a stable state in the HW, then sync them to ensure that
+	 * any interrupt that had already fired hits its queue, and finally
+	 * scan all the queues to collect which interrupts are still present
+	 * in the queues, so we can set the "pending" flag on them and
+	 * they can be resent on restore.
+	 *
+	 * So we do it all when the "first" interrupt gets saved, all the
+	 * state is collected at that point, the rest of xive_get_source()
+	 * will merely collect and convert that state to the expected
+	 * userspace bit mask.
+	 */
+	if (xive->saved_src_count == 0)
+		xive_pre_save_scan(xive);
+	xive->saved_src_count++;
+
+	/* Convert saved state into something compatible with xics */
+	val = state->guest_server;
+	prio = state->saved_scan_prio;
+
+	if (prio == MASKED) {
+		val |= KVM_XICS_MASKED;
+		prio = state->saved_priority;
+	}
+	val |= prio << KVM_XICS_PRIORITY_SHIFT;
+	if (state->lsi) {
+		val |= KVM_XICS_LEVEL_SENSITIVE;
+		if (state->saved_p)
+			val |= KVM_XICS_PENDING;
+	} else {
+		if (state->saved_p)
+			val |= KVM_XICS_PRESENTED;
+
+		if (state->saved_q)
+			val |= KVM_XICS_QUEUED;
+
+		/*
+		 * We mark it pending (which will attempt a re-delivery)
+		 * if we are in a queue *or* we were masked and had
+		 * Q set which is equivalent to the XICS "masked pending"
+		 * state
+		 */
+		if (state->in_queue || (prio == MASKED && state->saved_q))
+			val |= KVM_XICS_PENDING;
+	}
+
+	/*
+	 * If that was the last interrupt saved, reset the
+	 * in_queue flags
+	 */
+	if (xive->saved_src_count == xive->src_count)
+		xive_post_save_scan(xive);
+
+	/* Copy the result to userspace */
+	if (put_user(val, ubufp))
+		return -EFAULT;
+
+	return 0;
+}
+
+static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *xive,
+							   int irq)
+{
+	struct kvm *kvm = xive->kvm;
+	struct kvmppc_xive_src_block *sb;
+	int i, bid;
+
+	bid = irq >> KVMPPC_XICS_ICS_SHIFT;
+
+	mutex_lock(&kvm->lock);
+
+	/* block already exists - somebody else got here first */
+	if (xive->src_blocks[bid])
+		goto out;
+
+	/* Create the ICS */
+	sb = kzalloc(sizeof(*sb), GFP_KERNEL);
+	if (!sb)
+		goto out;
+
+	sb->id = bid;
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i;
+		sb->irq_state[i].guest_priority = MASKED;
+		sb->irq_state[i].saved_priority = MASKED;
+		sb->irq_state[i].act_priority = MASKED;
+	}
+	smp_wmb();
+	xive->src_blocks[bid] = sb;
+
+	if (bid > xive->max_sbid)
+		xive->max_sbid = bid;
+
+out:
+	mutex_unlock(&kvm->lock);
+	return xive->src_blocks[bid];
+}
+
+static bool xive_check_delayed_irq(struct kvmppc_xive *xive, u32 irq)
+{
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu = NULL;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+		if (!xc)
+			continue;
+
+		if (xc->delayed_irq == irq) {
+			xc->delayed_irq = 0;
+			xive->delayed_irqs--;
+			return true;
+		}
+	}
+	return false;
+}
+
+static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u16 idx;
+	u64 val;
+	u8 act_prio, guest_prio;
+	u32 server;
+	int rc = 0;
+
+	if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
+		return -ENOENT;
+
+	pr_devel("set_source(irq=0x%lx)\n", irq);
+
+	/* Find the source */
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb) {
+		pr_devel("No source, creating source block...\n");
+		sb = xive_create_src_block(xive, irq);
+		if (!sb) {
+			pr_devel("Failed to create block...\n");
+			return -ENOMEM;
+		}
+	}
+	state = &sb->irq_state[idx];
+
+	/* Read user passed data */
+	if (get_user(val, ubufp)) {
+		pr_devel("fault getting user info !\n");
+		return -EFAULT;
+	}
+
+	server = val & KVM_XICS_DESTINATION_MASK;
+	guest_prio = val >> KVM_XICS_PRIORITY_SHIFT;
+
+	pr_devel("  val=0x016%llx (server=0x%x, guest_prio=%d)\n",
+		 val, server, guest_prio);
+	/*
+	 * If the source doesn't already have an IPI, allocate
+	 * one and get the corresponding data
+	 */
+	if (!state->ipi_number) {
+		state->ipi_number = xive_native_alloc_irq();
+		if (state->ipi_number == 0) {
+			pr_devel("Failed to allocate IPI !\n");
+			return -ENOMEM;
+		}
+		xive_native_populate_irq_data(state->ipi_number, &state->ipi_data);
+		pr_devel(" src_ipi=0x%x\n", state->ipi_number);
+	}
+
+	/*
+	 * We use lock_and_mask() to set us in the right masked
+	 * state. We will override that state from the saved state
+	 * further down, but this will handle the cases of interrupts
+	 * that need FW masking. We set the initial guest_priority to
+	 * 0 before calling it to ensure it actually performs the masking.
+	 */
+	state->guest_priority = 0;
+	xive_lock_and_mask(xive, sb, state);
+
+	/*
+	 * Now, we select a target if we have one. If we don't we
+	 * leave the interrupt untargetted. It means that an interrupt
+	 * can become "untargetted" accross migration if it was masked
+	 * by set_xive() but there is little we can do about it.
+	 */
+
+	/* First convert prio and mark interrupt as untargetted */
+	act_prio = xive_prio_from_guest(guest_prio);
+	state->act_priority = MASKED;
+	state->guest_server = server;
+
+	/*
+	 * We need to drop the lock due to the mutex below. Hopefully
+	 * nothing is touching that interrupt yet since it hasn't been
+	 * advertized to a running guest yet
+	 */
+	arch_spin_unlock(&sb->lock);
+
+	/* If we have a priority target the interrupt */
+	if (act_prio != MASKED) {
+		/* First, check provisioning of queues */
+		mutex_lock(&xive->kvm->lock);
+		rc = xive_check_provisioning(xive->kvm, act_prio);
+		mutex_unlock(&xive->kvm->lock);
+
+		/* Target interrupt */
+		if (rc == 0)
+			rc = xive_target_interrupt(xive->kvm, state,
+						   server, act_prio);
+		/*
+		 * If provisioning or targetting failed, leave it
+		 * alone and masked. It will remain disabled until
+		 * the guest re-targets it.
+		 */
+	}
+
+	/*
+	 * Find out if this was a delayed irq stashed in an ICP,
+	 * in which case, treat it as pending
+	 */
+	if (xive->delayed_irqs && xive_check_delayed_irq(xive, irq)) {
+		val |= KVM_XICS_PENDING;
+		pr_devel("  Found delayed ! forcing PENDING !\n");
+	}
+
+	/* Cleanup the SW state */
+	state->old_p = false;
+	state->old_q = false;
+	state->lsi = false;
+	state->asserted = false;
+
+	/* Restore LSI state */
+	if (val & KVM_XICS_LEVEL_SENSITIVE) {
+		state->lsi = true;
+		if (val & KVM_XICS_PENDING)
+			state->asserted = true;
+		pr_devel("  LSI ! Asserted=%d\n", state->asserted);
+	}
+
+	/*
+	 * Restore P and Q. If the interrupt was pending, we
+	 * force both P and Q, which will trigger a resend.
+	 *
+	 * That means that a guest that had both an interrupt
+	 * pending (queued) and Q set will restore with only
+	 * one instance of that interrupt instead of 2, but that
+	 * is perfectly fine as coalescing interrupts that haven't
+	 * been presented yet is always allowed.
+	 */
+	if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+		state->old_p = true;
+	if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
+		state->old_q = true;
+
+	pr_devel("  P=%d, Q=%d\n", state->old_p, state->old_q);
+
+	/*
+	 * If the interrupt was unmasked, update guest priority and
+	 * perform the appropriate state transition and do a
+	 * re-trigger if necessary.
+	 */
+	if (val & KVM_XICS_MASKED) {
+		pr_devel("  masked, saving prio\n");
+		state->guest_priority = MASKED;
+		state->saved_priority = guest_prio;
+	} else {
+		pr_devel("  unmasked, restoring to prio %d\n", guest_prio);
+		xive_finish_unmask(xive, sb, state, guest_prio);
+		state->saved_priority = guest_prio;
+	}
+
+	/* Increment the number of valid sources and mark this one valid */
+	if (!state->valid)
+		xive->src_count++;
+	state->valid = true;
+
+	return 0;
+}
+
+int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+			bool line_status)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+
+	/* Perform locklessly .... (we need to do some RCUisms here...) */
+	state = &sb->irq_state[idx];
+	if (!state->valid)
+		return -EINVAL;
+
+	/* We don't allow a trigger on a passed-through interrupt */
+	if (state->pt_number)
+		return -EINVAL;
+
+	if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL)
+		state->asserted = 1;
+	else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
+		state->asserted = 0;
+		return 0;
+	}
+
+	/* Trigger the IPI */
+	xive_irq_trigger(&state->ipi_data);
+
+	return 0;
+}
+
+static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct kvmppc_xive *xive = dev->private;
+
+	/* We honor the existing XICS ioctl */
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		return xive_set_source(xive, attr->attr, attr->addr);
+	}
+	return -ENXIO;
+}
+
+static int xive_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct kvmppc_xive *xive = dev->private;
+
+	/* We honor the existing XICS ioctl */
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		return xive_get_source(xive, attr->attr, attr->addr);
+	}
+	return -ENXIO;
+}
+
+static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	/* We honor the same limits as XICS, at least for now */
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
+		    attr->attr < KVMPPC_XICS_NR_IRQS)
+			return 0;
+		break;
+	}
+	return -ENXIO;
+}
+
+static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd)
+{
+	xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
+	xive_native_configure_irq(hw_num, 0, MASKED, 0);
+	xive_cleanup_irq_data(xd);
+}
+
+static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
+{
+	int i;
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
+
+		if (!state->valid)
+			continue;
+
+		kvmppc_xive_cleanup_irq(state->ipi_number, &state->ipi_data);
+		xive_native_free_irq(state->ipi_number);
+
+		/* Pass-through, cleanup too */
+		if (state->pt_number)
+			kvmppc_xive_cleanup_irq(state->pt_number, state->pt_data);
+
+		state->valid = false;
+	}
+}
+
+static void kvmppc_xive_free(struct kvm_device *dev)
+{
+	struct kvmppc_xive *xive = dev->private;
+	struct kvm *kvm = xive->kvm;
+	int i;
+
+	debugfs_remove(xive->dentry);
+
+	if (kvm)
+		kvm->arch.xive = NULL;
+
+	/* Mask and free interrupts */
+	for (i = 0; i <= xive->max_sbid; i++) {
+		if (xive->src_blocks[i])
+			kvmppc_xive_free_sources(xive->src_blocks[i]);
+		kfree(xive->src_blocks[i]);
+		xive->src_blocks[i] = NULL;
+	}
+
+	if (xive->vp_base != XIVE_INVALID_VP)
+		xive_native_free_vp_block(xive->vp_base);
+
+
+	kfree(xive);
+	kfree(dev);
+}
+
+static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
+{
+	struct kvmppc_xive *xive;
+	struct kvm *kvm = dev->kvm;
+	int ret = 0;
+
+	pr_devel("Creating xive for partition\n");
+
+	xive = kzalloc(sizeof(*xive), GFP_KERNEL);
+	if (!xive)
+		return -ENOMEM;
+
+	dev->private = xive;
+	xive->dev = dev;
+	xive->kvm = kvm;
+
+	/* Already there ? */
+	if (kvm->arch.xive)
+		ret = -EEXIST;
+	else
+		kvm->arch.xive = xive;
+
+	/* We use the default queue size set by the host */
+	xive->q_order = xive_native_default_eq_shift();
+	if (xive->q_order < PAGE_SHIFT)
+		xive->q_page_order = 0;
+	else
+		xive->q_page_order = xive->q_order - PAGE_SHIFT;
+
+	/* Allocate a bunch of VPs */
+	xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
+	pr_devel("VP_Base=%x\n", xive->vp_base);
+
+	if (xive->vp_base == XIVE_INVALID_VP)
+		ret = -ENOMEM;
+
+	if (ret) {
+		kfree(xive);
+		return ret;
+	}
+
+	return 0;
+}
+
+
+static int xive_debug_show(struct seq_file *m, void *private)
+{
+	struct kvmppc_xive *xive = m->private;
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu;
+	u64 t_rm_h_xirr = 0;
+	u64 t_rm_h_ipoll = 0;
+	u64 t_rm_h_cppr = 0;
+	u64 t_rm_h_eoi = 0;
+	u64 t_rm_h_ipi = 0;
+	u64 t_vm_h_xirr = 0;
+	u64 t_vm_h_ipoll = 0;
+	u64 t_vm_h_cppr = 0;
+	u64 t_vm_h_eoi = 0;
+	u64 t_vm_h_ipi = 0;
+	unsigned int i;
+
+	if (!kvm)
+		return 0;
+
+	seq_printf(m, "=========\nVCPU state\n=========\n");
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+		if (!xc)
+			continue;
+
+		seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x"
+			   " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
+			   xc->server_num, xc->cppr, xc->hw_cppr,
+			   xc->mfrr, xc->pending,
+			   xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
+
+		t_rm_h_xirr += xc->stat_rm_h_xirr;
+		t_rm_h_ipoll += xc->stat_rm_h_ipoll;
+		t_rm_h_cppr += xc->stat_rm_h_cppr;
+		t_rm_h_eoi += xc->stat_rm_h_eoi;
+		t_rm_h_ipi += xc->stat_rm_h_ipi;
+		t_vm_h_xirr += xc->stat_vm_h_xirr;
+		t_vm_h_ipoll += xc->stat_vm_h_ipoll;
+		t_vm_h_cppr += xc->stat_vm_h_cppr;
+		t_vm_h_eoi += xc->stat_vm_h_eoi;
+		t_vm_h_ipi += xc->stat_vm_h_ipi;
+	}
+
+	seq_printf(m, "Hcalls totals\n");
+	seq_printf(m, " H_XIRR  R=%10lld V=%10lld\n", t_rm_h_xirr, t_vm_h_xirr);
+	seq_printf(m, " H_IPOLL R=%10lld V=%10lld\n", t_rm_h_ipoll, t_vm_h_ipoll);
+	seq_printf(m, " H_CPPR  R=%10lld V=%10lld\n", t_rm_h_cppr, t_vm_h_cppr);
+	seq_printf(m, " H_EOI   R=%10lld V=%10lld\n", t_rm_h_eoi, t_vm_h_eoi);
+	seq_printf(m, " H_IPI   R=%10lld V=%10lld\n", t_rm_h_ipi, t_vm_h_ipi);
+
+	return 0;
+}
+
+static int xive_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xive_debug_show, inode->i_private);
+}
+
+static const struct file_operations xive_debug_fops = {
+	.open = xive_debug_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void xive_debugfs_init(struct kvmppc_xive *xive)
+{
+	char *name;
+
+	name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
+	if (!name) {
+		pr_err("%s: no memory for name\n", __func__);
+		return;
+	}
+
+	xive->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+					   xive, &xive_debug_fops);
+
+	pr_debug("%s: created %s\n", __func__, name);
+	kfree(name);
+}
+
+static void kvmppc_xive_init(struct kvm_device *dev)
+{
+	struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
+
+	/* Register some debug interfaces */
+	xive_debugfs_init(xive);
+}
+
+struct kvm_device_ops kvm_xive_ops = {
+	.name = "kvm-xive",
+	.create = kvmppc_xive_create,
+	.init = kvmppc_xive_init,
+	.destroy = kvmppc_xive_free,
+	.set_attr = xive_set_attr,
+	.get_attr = xive_get_attr,
+	.has_attr = xive_has_attr,
+};
+
+void kvmppc_xive_init_module(void)
+{
+	__xive_vm_h_xirr = xive_vm_h_xirr;
+	__xive_vm_h_ipoll = xive_vm_h_ipoll;
+	__xive_vm_h_ipi = xive_vm_h_ipi;
+	__xive_vm_h_cppr = xive_vm_h_cppr;
+	__xive_vm_h_eoi = xive_vm_h_eoi;
+}
+
+void kvmppc_xive_exit_module(void)
+{
+	__xive_vm_h_xirr = NULL;
+	__xive_vm_h_ipoll = NULL;
+	__xive_vm_h_ipi = NULL;
+	__xive_vm_h_cppr = NULL;
+	__xive_vm_h_eoi = NULL;
+}
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
new file mode 100644
index 000000000000..5938f7644dc1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _KVM_PPC_BOOK3S_XIVE_H
+#define _KVM_PPC_BOOK3S_XIVE_H
+
+#ifdef CONFIG_KVM_XICS
+#include "book3s_xics.h"
+
+/*
+ * State for one guest irq source.
+ *
+ * For each guest source we allocate a HW interrupt in the XIVE
+ * which we use for all SW triggers. It will be unused for
+ * pass-through but it's easier to keep around as the same
+ * guest interrupt can alternatively be emulated or pass-through
+ * if a physical device is hot unplugged and replaced with an
+ * emulated one.
+ *
+ * This state structure is very similar to the XICS one with
+ * additional XIVE specific tracking.
+ */
+struct kvmppc_xive_irq_state {
+	bool valid;			/* Interrupt entry is valid */
+
+	u32 number;			/* Guest IRQ number */
+	u32 ipi_number;			/* XIVE IPI HW number */
+	struct xive_irq_data ipi_data;	/* XIVE IPI associated data */
+	u32 pt_number;			/* XIVE Pass-through number if any */
+	struct xive_irq_data *pt_data;	/* XIVE Pass-through associated data */
+
+	/* Targetting as set by guest */
+	u32 guest_server;		/* Current guest selected target */
+	u8 guest_priority;		/* Guest set priority */
+	u8 saved_priority;		/* Saved priority when masking */
+
+	/* Actual targetting */
+	u32 act_server;			/* Actual server */
+	u8 act_priority;		/* Actual priority */
+
+	/* Various state bits */
+	bool in_eoi;			/* Synchronize with H_EOI */
+	bool old_p;			/* P bit state when masking */
+	bool old_q;			/* Q bit state when masking */
+	bool lsi;			/* level-sensitive interrupt */
+	bool asserted;			/* Only for emulated LSI: current state */
+
+	/* Saved for migration state */
+	bool in_queue;
+	bool saved_p;
+	bool saved_q;
+	u8 saved_scan_prio;
+};
+
+/* Select the "right" interrupt (IPI vs. passthrough) */
+static inline void kvmppc_xive_select_irq(struct kvmppc_xive_irq_state *state,
+					  u32 *out_hw_irq,
+					  struct xive_irq_data **out_xd)
+{
+	if (state->pt_number) {
+		if (out_hw_irq)
+			*out_hw_irq = state->pt_number;
+		if (out_xd)
+			*out_xd = state->pt_data;
+	} else {
+		if (out_hw_irq)
+			*out_hw_irq = state->ipi_number;
+		if (out_xd)
+			*out_xd = &state->ipi_data;
+	}
+}
+
+/*
+ * This corresponds to an "ICS" in XICS terminology, we use it
+ * as a mean to break up source information into multiple structures.
+ */
+struct kvmppc_xive_src_block {
+	arch_spinlock_t lock;
+	u16 id;
+	struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
+};
+
+
+struct kvmppc_xive {
+	struct kvm *kvm;
+	struct kvm_device *dev;
+	struct dentry *dentry;
+
+	/* VP block associated with the VM */
+	u32	vp_base;
+
+	/* Blocks of sources */
+	struct kvmppc_xive_src_block *src_blocks[KVMPPC_XICS_MAX_ICS_ID + 1];
+	u32	max_sbid;
+
+	/*
+	 * For state save, we lazily scan the queues on the first interrupt
+	 * being migrated. We don't have a clean way to reset that flags
+	 * so we keep track of the number of valid sources and how many of
+	 * them were migrated so we can reset when all of them have been
+	 * processed.
+	 */
+	u32	src_count;
+	u32	saved_src_count;
+
+	/*
+	 * Some irqs are delayed on restore until the source is created,
+	 * keep track here of how many of them
+	 */
+	u32	delayed_irqs;
+
+	/* Which queues (priorities) are in use by the guest */
+	u8	qmap;
+
+	/* Queue orders */
+	u32	q_order;
+	u32	q_page_order;
+
+};
+
+#define KVMPPC_XIVE_Q_COUNT	8
+
+struct kvmppc_xive_vcpu {
+	struct kvmppc_xive	*xive;
+	struct kvm_vcpu		*vcpu;
+	bool			valid;
+
+	/* Server number. This is the HW CPU ID from a guest perspective */
+	u32			server_num;
+
+	/*
+	 * HW VP corresponding to this VCPU. This is the base of the VP
+	 * block plus the server number.
+	 */
+	u32			vp_id;
+	u32			vp_chip_id;
+	u32			vp_cam;
+
+	/* IPI used for sending ... IPIs */
+	u32			vp_ipi;
+	struct xive_irq_data	vp_ipi_data;
+
+	/* Local emulation state */
+	uint8_t			cppr;	/* guest CPPR */
+	uint8_t			hw_cppr;/* Hardware CPPR */
+	uint8_t			mfrr;
+	uint8_t			pending;
+
+	/* Each VP has 8 queues though we only provision some */
+	struct xive_q		queues[KVMPPC_XIVE_Q_COUNT];
+	u32			esc_virq[KVMPPC_XIVE_Q_COUNT];
+	char			*esc_virq_names[KVMPPC_XIVE_Q_COUNT];
+
+	/* Stash a delayed irq on restore from migration (see set_icp) */
+	u32			delayed_irq;
+
+	/* Stats */
+	u64			stat_rm_h_xirr;
+	u64			stat_rm_h_ipoll;
+	u64			stat_rm_h_cppr;
+	u64			stat_rm_h_eoi;
+	u64			stat_rm_h_ipi;
+	u64			stat_vm_h_xirr;
+	u64			stat_vm_h_ipoll;
+	u64			stat_vm_h_cppr;
+	u64			stat_vm_h_eoi;
+	u64			stat_vm_h_ipi;
+};
+
+static inline struct kvm_vcpu *kvmppc_xive_find_server(struct kvm *kvm, u32 nr)
+{
+	struct kvm_vcpu *vcpu = NULL;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (vcpu->arch.xive_vcpu && nr == vcpu->arch.xive_vcpu->server_num)
+			return vcpu;
+	}
+	return NULL;
+}
+
+static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmppc_xive *xive,
+		u32 irq, u16 *source)
+{
+	u32 bid = irq >> KVMPPC_XICS_ICS_SHIFT;
+	u16 src = irq & KVMPPC_XICS_SRC_MASK;
+
+	if (source)
+		*source = src;
+	if (bid > KVMPPC_XICS_MAX_ICS_ID)
+		return NULL;
+	return xive->src_blocks[bid];
+}
+
+/*
+ * Mapping between guest priorities and host priorities
+ * is as follow.
+ *
+ * Guest request for 0...6 are honored. Guest request for anything
+ * higher results in a priority of 7 being applied.
+ *
+ * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
+ * in order to match AIX expectations
+ *
+ * Similar mapping is done for CPPR values
+ */
+static inline u8 xive_prio_from_guest(u8 prio)
+{
+	if (prio == 0xff || prio < 8)
+		return prio;
+	return 7;
+}
+
+static inline u8 xive_prio_to_guest(u8 prio)
+{
+	if (prio == 0xff || prio < 7)
+		return prio;
+	return 0xb;
+}
+
+static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
+{
+	u32 cur;
+
+	if (!qpage)
+		return 0;
+	cur = be32_to_cpup(qpage + *idx);
+	if ((cur >> 31) == *toggle)
+		return 0;
+	*idx = (*idx + 1) & msk;
+	if (*idx == 0)
+		(*toggle) ^= 1;
+	return cur & 0x7fffffff;
+}
+
+extern unsigned long xive_rm_h_xirr(struct kvm_vcpu *vcpu);
+extern unsigned long xive_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
+extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+			 unsigned long mfrr);
+extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
+extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
+
+extern unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
+extern unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
+extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
+			      unsigned long mfrr);
+extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
+extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
+
+#endif /* CONFIG_KVM_XICS */
+#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
new file mode 100644
index 000000000000..4636ca6e7d38
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -0,0 +1,503 @@
+/*
+ * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+/* File to be included by other .c files */
+
+#define XGLUE(a,b) a##b
+#define GLUE(a,b) XGLUE(a,b)
+
+static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
+{
+	u8 cppr;
+	u16 ack;
+
+	/* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */
+
+	/* Perform the acknowledge OS to register cycle. */
+	ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
+
+	/* Synchronize subsequent queue accesses */
+	mb();
+
+	/* XXX Check grouping level */
+
+	/* Anything ? */
+	if (!((ack >> 8) & TM_QW1_NSR_EO))
+		return;
+
+	/* Grab CPPR of the most favored pending interrupt */
+	cppr = ack & 0xff;
+	if (cppr < 8)
+		xc->pending |= 1 << cppr;
+
+#ifdef XIVE_RUNTIME_CHECKS
+	/* Check consistency */
+	if (cppr >= xc->hw_cppr)
+		pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
+			smp_processor_id(), cppr, xc->hw_cppr);
+#endif
+
+	/*
+	 * Update our image of the HW CPPR. We don't yet modify
+	 * xc->cppr, this will be done as we scan for interrupts
+	 * in the queues.
+	 */
+	xc->hw_cppr = cppr;
+}
+
+static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset)
+{
+	u64 val;
+
+	if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
+		offset |= offset << 4;
+
+	val =__x_readq(__x_eoi_page(xd) + offset);
+#ifdef __LITTLE_ENDIAN__
+	val >>= 64-8;
+#endif
+	return (u8)val;
+}
+
+
+static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
+{
+	/* If the XIVE supports the new "store EOI facility, use it */
+	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+		__x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
+	else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
+		opal_int_eoi(hw_irq);
+	} else {
+		uint64_t eoi_val;
+
+		/*
+		 * Otherwise for EOI, we use the special MMIO that does
+		 * a clear of both P and Q and returns the old Q,
+		 * except for LSIs where we use the "EOI cycle" special
+		 * load.
+		 *
+		 * This allows us to then do a re-trigger if Q was set
+		 * rather than synthetizing an interrupt in software
+		 *
+		 * For LSIs, using the HW EOI cycle works around a problem
+		 * on P9 DD1 PHBs where the other ESB accesses don't work
+		 * properly.
+		 */
+		if (xd->flags & XIVE_IRQ_FLAG_LSI)
+			__x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
+		else {
+			eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
+
+			/* Re-trigger if needed */
+			if ((eoi_val & 1) && __x_trig_page(xd))
+				__x_writeq(0, __x_trig_page(xd));
+		}
+	}
+}
+
+enum {
+	scan_fetch,
+	scan_poll,
+	scan_eoi,
+};
+
+static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
+				       u8 pending, int scan_type)
+{
+	u32 hirq = 0;
+	u8 prio = 0xff;
+
+	/* Find highest pending priority */
+	while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
+		struct xive_q *q;
+		u32 idx, toggle;
+		__be32 *qpage;
+
+		/*
+		 * If pending is 0 this will return 0xff which is what
+		 * we want
+		 */
+		prio = ffs(pending) - 1;
+
+		/*
+		 * If the most favoured prio we found pending is less
+		 * favored (or equal) than a pending IPI, we return
+		 * the IPI instead.
+		 *
+		 * Note: If pending was 0 and mfrr is 0xff, we will
+		 * not spurriously take an IPI because mfrr cannot
+		 * then be smaller than cppr.
+		 */
+		if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
+			prio = xc->mfrr;
+			hirq = XICS_IPI;
+			break;
+		}
+
+		/* Don't scan past the guest cppr */
+		if (prio >= xc->cppr || prio > 7)
+			break;
+
+		/* Grab queue and pointers */
+		q = &xc->queues[prio];
+		idx = q->idx;
+		toggle = q->toggle;
+
+		/*
+		 * Snapshot the queue page. The test further down for EOI
+		 * must use the same "copy" that was used by __xive_read_eq
+		 * since qpage can be set concurrently and we don't want
+		 * to miss an EOI.
+		 */
+		qpage = READ_ONCE(q->qpage);
+
+skip_ipi:
+		/*
+		 * Try to fetch from the queue. Will return 0 for a
+		 * non-queueing priority (ie, qpage = 0).
+		 */
+		hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
+
+		/*
+		 * If this was a signal for an MFFR change done by
+		 * H_IPI we skip it. Additionally, if we were fetching
+		 * we EOI it now, thus re-enabling reception of a new
+		 * such signal.
+		 *
+		 * We also need to do that if prio is 0 and we had no
+		 * page for the queue. In this case, we have non-queued
+		 * IPI that needs to be EOId.
+		 *
+		 * This is safe because if we have another pending MFRR
+		 * change that wasn't observed above, the Q bit will have
+		 * been set and another occurrence of the IPI will trigger.
+		 */
+		if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
+			if (scan_type == scan_fetch)
+				GLUE(X_PFX,source_eoi)(xc->vp_ipi,
+						       &xc->vp_ipi_data);
+			/* Loop back on same queue with updated idx/toggle */
+#ifdef XIVE_RUNTIME_CHECKS
+			WARN_ON(hirq && hirq != XICS_IPI);
+#endif
+			if (hirq)
+				goto skip_ipi;
+		}
+
+		/* If fetching, update queue pointers */
+		if (scan_type == scan_fetch) {
+			q->idx = idx;
+			q->toggle = toggle;
+		}
+
+		/* Something found, stop searching */
+		if (hirq)
+			break;
+
+		/* Clear the pending bit on the now empty queue */
+		pending &= ~(1 << prio);
+
+		/*
+		 * Check if the queue count needs adjusting due to
+		 * interrupts being moved away.
+		 */
+		if (atomic_read(&q->pending_count)) {
+			int p = atomic_xchg(&q->pending_count, 0);
+			if (p) {
+#ifdef XIVE_RUNTIME_CHECKS
+				WARN_ON(p > atomic_read(&q->count));
+#endif
+				atomic_sub(p, &q->count);
+			}
+		}
+	}
+
+	/* If we are just taking a "peek", do nothing else */
+	if (scan_type == scan_poll)
+		return hirq;
+
+	/* Update the pending bits */
+	xc->pending = pending;
+
+	/*
+	 * If this is an EOI that's it, no CPPR adjustment done here,
+	 * all we needed was cleanup the stale pending bits and check
+	 * if there's anything left.
+	 */
+	if (scan_type == scan_eoi)
+		return hirq;
+
+	/*
+	 * If we found an interrupt, adjust what the guest CPPR should
+	 * be as if we had just fetched that interrupt from HW.
+	 */
+	if (hirq)
+		xc->cppr = prio;
+	/*
+	 * If it was an IPI the HW CPPR might have been lowered too much
+	 * as the HW interrupt we use for IPIs is routed to priority 0.
+	 *
+	 * We re-sync it here.
+	 */
+	if (xc->cppr != xc->hw_cppr) {
+		xc->hw_cppr = xc->cppr;
+		__x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
+	}
+
+	return hirq;
+}
+
+X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	u8 old_cppr;
+	u32 hirq;
+
+	pr_devel("H_XIRR\n");
+
+	xc->GLUE(X_STAT_PFX,h_xirr)++;
+
+	/* First collect pending bits from HW */
+	GLUE(X_PFX,ack_pending)(xc);
+
+	/*
+	 * Cleanup the old-style bits if needed (they may have been
+	 * set by pull or an escalation interrupts).
+	 */
+	if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions))
+		clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
+			  &vcpu->arch.pending_exceptions);
+
+	pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
+		 xc->pending, xc->hw_cppr, xc->cppr);
+
+	/* Grab previous CPPR and reverse map it */
+	old_cppr = xive_prio_to_guest(xc->cppr);
+
+	/* Scan for actual interrupts */
+	hirq = GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_fetch);
+
+	pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
+		 hirq, xc->hw_cppr, xc->cppr);
+
+#ifdef XIVE_RUNTIME_CHECKS
+	/* That should never hit */
+	if (hirq & 0xff000000)
+		pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
+#endif
+
+	/*
+	 * XXX We could check if the interrupt is masked here and
+	 * filter it. If we chose to do so, we would need to do:
+	 *
+	 *    if (masked) {
+	 *        lock();
+	 *        if (masked) {
+	 *            old_Q = true;
+	 *            hirq = 0;
+	 *        }
+	 *        unlock();
+	 *    }
+	 */
+
+	/* Return interrupt and old CPPR in GPR4 */
+	vcpu->arch.gpr[4] = hirq | (old_cppr << 24);
+
+	return H_SUCCESS;
+}
+
+X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	u8 pending = xc->pending;
+	u32 hirq;
+	u8 pipr;
+
+	pr_devel("H_IPOLL(server=%ld)\n", server);
+
+	xc->GLUE(X_STAT_PFX,h_ipoll)++;
+
+	/* Grab the target VCPU if not the current one */
+	if (xc->server_num != server) {
+		vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
+		if (!vcpu)
+			return H_PARAMETER;
+		xc = vcpu->arch.xive_vcpu;
+
+		/* Scan all priorities */
+		pending = 0xff;
+	} else {
+		/* Grab pending interrupt if any */
+		pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR);
+		if (pipr < 8)
+			pending |= 1 << pipr;
+	}
+
+	hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
+
+	/* Return interrupt and old CPPR in GPR4 */
+	vcpu->arch.gpr[4] = hirq | (xc->cppr << 24);
+
+	return H_SUCCESS;
+}
+
+static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
+{
+	u8 pending, prio;
+
+	pending = xc->pending;
+	if (xc->mfrr != 0xff) {
+		if (xc->mfrr < 8)
+			pending |= 1 << xc->mfrr;
+		else
+			pending |= 0x80;
+	}
+	if (!pending)
+		return;
+	prio = ffs(pending) - 1;
+
+	__x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
+}
+
+X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	u8 old_cppr;
+
+	pr_devel("H_CPPR(cppr=%ld)\n", cppr);
+
+	xc->GLUE(X_STAT_PFX,h_cppr)++;
+
+	/* Map CPPR */
+	cppr = xive_prio_from_guest(cppr);
+
+	/* Remember old and update SW state */
+	old_cppr = xc->cppr;
+	xc->cppr = cppr;
+
+	/*
+	 * We are masking less, we need to look for pending things
+	 * to deliver and set VP pending bits accordingly to trigger
+	 * a new interrupt otherwise we might miss MFRR changes for
+	 * which we have optimized out sending an IPI signal.
+	 */
+	if (cppr > old_cppr)
+		GLUE(X_PFX,push_pending_to_hw)(xc);
+
+	/* Apply new CPPR */
+	xc->hw_cppr = cppr;
+	__x_writeb(cppr, __x_tima + TM_QW1_OS + TM_CPPR);
+
+	return H_SUCCESS;
+}
+
+X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct xive_irq_data *xd;
+	u8 new_cppr = xirr >> 24;
+	u32 irq = xirr & 0x00ffffff, hw_num;
+	u16 src;
+	int rc = 0;
+
+	pr_devel("H_EOI(xirr=%08lx)\n", xirr);
+
+	xc->GLUE(X_STAT_PFX,h_eoi)++;
+
+	xc->cppr = xive_prio_from_guest(new_cppr);
+
+	/*
+	 * IPIs are synthetized from MFRR and thus don't need
+	 * any special EOI handling. The underlying interrupt
+	 * used to signal MFRR changes is EOId when fetched from
+	 * the queue.
+	 */
+	if (irq == XICS_IPI || irq == 0)
+		goto bail;
+
+	/* Find interrupt source */
+	sb = kvmppc_xive_find_source(xive, irq, &src);
+	if (!sb) {
+		pr_devel(" source not found !\n");
+		rc = H_PARAMETER;
+		goto bail;
+	}
+	state = &sb->irq_state[src];
+	kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+	state->in_eoi = true;
+	mb();
+
+again:
+	if (state->guest_priority == MASKED) {
+		arch_spin_lock(&sb->lock);
+		if (state->guest_priority != MASKED) {
+			arch_spin_unlock(&sb->lock);
+			goto again;
+		}
+		pr_devel(" EOI on saved P...\n");
+
+		/* Clear old_p, that will cause unmask to perform an EOI */
+		state->old_p = false;
+
+		arch_spin_unlock(&sb->lock);
+	} else {
+		pr_devel(" EOI on source...\n");
+
+		/* Perform EOI on the source */
+		GLUE(X_PFX,source_eoi)(hw_num, xd);
+
+		/* If it's an emulated LSI, check level and resend */
+		if (state->lsi && state->asserted)
+			__x_writeq(0, __x_trig_page(xd));
+
+	}
+
+	mb();
+	state->in_eoi = false;
+bail:
+
+	/* Re-evaluate pending IRQs and update HW */
+	GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_eoi);
+	GLUE(X_PFX,push_pending_to_hw)(xc);
+	pr_devel(" after scan pending=%02x\n", xc->pending);
+
+	/* Apply new CPPR */
+	xc->hw_cppr = xc->cppr;
+	__x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
+
+	return rc;
+}
+
+X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
+			       unsigned long mfrr)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+	pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
+
+	xc->GLUE(X_STAT_PFX,h_ipi)++;
+
+	/* Find target */
+	vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
+	if (!vcpu)
+		return H_PARAMETER;
+	xc = vcpu->arch.xive_vcpu;
+
+	/* Locklessly write over MFRR */
+	xc->mfrr = mfrr;
+
+	/* Shoot the IPI if most favored than target cppr */
+	if (mfrr < xc->cppr)
+		__x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
+
+	return H_SUCCESS;
+}
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
index 5a9a10b90762..3f1be85a83bc 100644
--- a/arch/powerpc/kvm/irq.h
+++ b/arch/powerpc/kvm/irq.h
@@ -12,6 +12,7 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
 #endif
 #ifdef CONFIG_KVM_XICS
 	ret = ret || (kvm->arch.xics != NULL);
+	ret = ret || (kvm->arch.xive != NULL);
 #endif
 	smp_rmb();
 	return ret;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 1ee22a910074..7f71ab5fcad1 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -38,6 +38,8 @@
 #include <asm/irqflags.h>
 #include <asm/iommu.h>
 #include <asm/switch_to.h>
+#include <asm/xive.h>
+
 #include "timing.h"
 #include "irq.h"
 #include "../mm/mmu_decl.h"
@@ -697,7 +699,10 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 		kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
 		break;
 	case KVMPPC_IRQ_XICS:
-		kvmppc_xics_free_icp(vcpu);
+		if (xive_enabled())
+			kvmppc_xive_cleanup_vcpu(vcpu);
+		else
+			kvmppc_xics_free_icp(vcpu);
 		break;
 	}
 
@@ -1522,8 +1527,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
 		r = -EPERM;
 		dev = kvm_device_from_filp(f.file);
-		if (dev)
-			r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
+		if (dev) {
+			if (xive_enabled())
+				r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]);
+			else
+				r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
+		}
 
 		fdput(f);
 		break;
@@ -1547,7 +1556,7 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
 		return true;
 #endif
 #ifdef CONFIG_KVM_XICS
-	if (kvm->arch.xics)
+	if (kvm->arch.xics || kvm->arch.xive)
 		return true;
 #endif
 	return false;
@@ -1740,7 +1749,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 		break;
 	}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_SPAPR_TCE_IOMMU
 	case KVM_CREATE_SPAPR_TCE_64: {
 		struct kvm_create_spapr_tce_64 create_tce_64;
 
@@ -1771,6 +1780,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
 		goto out;
 	}
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_PPC_GET_SMMU_INFO: {
 		struct kvm_ppc_smmu_info info;
 		struct kvm *kvm = filp->private_data;
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
index d659345a98d6..44fe4833910f 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -16,6 +16,7 @@
  */
 #include <linux/debugfs.h>
 #include <linux/fs.h>
+#include <linux/hugetlb.h>
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
@@ -391,7 +392,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 
 	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
 		addr = start + i * PMD_SIZE;
-		if (!pmd_none(*pmd))
+		if (!pmd_none(*pmd) && !pmd_huge(*pmd))
 			/* pmd exists */
 			walk_pte(st, pmd, addr);
 		else
@@ -407,7 +408,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 
 	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
 		addr = start + i * PUD_SIZE;
-		if (!pud_none(*pud))
+		if (!pud_none(*pud) && !pud_huge(*pud))
 			/* pud exists */
 			walk_pmd(st, pud, addr);
 		else
@@ -427,7 +428,7 @@ static void walk_pagetables(struct pg_state *st)
 	 */
 	for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
 		addr = KERN_VIRT_START + i * PGDIR_SIZE;
-		if (!pgd_none(*pgd))
+		if (!pgd_none(*pgd) && !pgd_huge(*pgd))
 			/* pgd exists */
 			walk_pud(st, pgd, addr);
 		else
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
index 6575b9aabef4..a12e86395025 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/hugetlbpage-radix.c
@@ -68,7 +68,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (mm->task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 	/*
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 9dbd2a733d6b..0ee6be4f1ba4 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -112,7 +112,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -157,7 +157,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index c6dca2ae78ef..a3edf813d455 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -99,7 +99,7 @@ static int hash__init_new_context(struct mm_struct *mm)
 	 * mm->context.addr_limit. Default to max task size so that we copy the
 	 * default values to paca which will help us to handle slb miss early.
 	 */
-	mm->context.addr_limit = TASK_SIZE_128TB;
+	mm->context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
 
 	/*
 	 * The old code would re-promote on fork, we don't do that when using
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 966b9fccfa66..45f6740dd407 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -99,7 +99,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
 	if ((mm->task_size - len) < addr)
 		return 0;
 	vma = find_vma(mm, addr);
-	return (!vma || (addr + len) <= vma->vm_start);
+	return (!vma || (addr + len) <= vm_start_gap(vma));
 }
 
 static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index cbd82fde5770..09ceea6175ba 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 			struct pt_regs *regs_user_copy)
 {
 	regs_user->regs = task_pt_regs(current);
-	regs_user->abi  = perf_reg_abi(current);
+	regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
+			 PERF_SAMPLE_REGS_ABI_NONE;
 }
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 018f8e90ac35..bb28e1a41257 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -402,7 +402,7 @@ static struct power_pmu power9_isa207_pmu = {
 	.name			= "POWER9",
 	.n_counter		= MAX_PMU_COUNTERS,
 	.add_fields		= ISA207_ADD_FIELDS,
-	.test_adder		= ISA207_TEST_ADDER,
+	.test_adder		= P9_DD1_TEST_ADDER,
 	.compute_mmcr		= isa207_compute_mmcr,
 	.config_bhrb		= power9_config_bhrb,
 	.bhrb_filter_map	= power9_bhrb_filter_map,
@@ -421,7 +421,7 @@ static struct power_pmu power9_pmu = {
 	.name			= "POWER9",
 	.n_counter		= MAX_PMU_COUNTERS,
 	.add_fields		= ISA207_ADD_FIELDS,
-	.test_adder		= P9_DD1_TEST_ADDER,
+	.test_adder		= ISA207_TEST_ADDER,
 	.compute_mmcr		= isa207_compute_mmcr,
 	.config_bhrb		= power9_config_bhrb,
 	.bhrb_filter_map	= power9_bhrb_filter_map,
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 33244e3d9375..4fd64d3f5c44 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -59,6 +59,17 @@ config PPC_OF_BOOT_TRAMPOLINE
 
 	  In case of doubt, say Y
 
+config PPC_DT_CPU_FTRS
+	bool "Device-tree based CPU feature discovery & setup"
+	depends on PPC_BOOK3S_64
+	default y
+	help
+	  This enables code to use a new device tree binding for describing CPU
+	  compatibility and features. Saying Y here will attempt to use the new
+	  binding if the firmware provides it. Currently only the skiboot
+	  firmware provides this binding.
+	  If you're not sure say Y.
+
 config UDBG_RTAS_CONSOLE
 	bool "RTAS based debug console"
 	depends on PPC_RTAS
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 96c2b8a40630..0c45cdbac4cf 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -197,7 +197,9 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
 	    (REGION_ID(ea) != USER_REGION_ID)) {
 
 		spin_unlock(&spu->register_lock);
-		ret = hash_page(ea, _PAGE_PRESENT | _PAGE_READ, 0x300, dsisr);
+		ret = hash_page(ea,
+				_PAGE_PRESENT | _PAGE_READ | _PAGE_PRIVILEGED,
+				0x300, dsisr);
 		spin_lock(&spu->register_lock);
 
 		if (!ret) {
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index e5a891ae80ee..84b7ac926ce6 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -175,6 +175,8 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
 	skip = roundup(cprm->pos - total + sz, 4) - cprm->pos;
 	if (!dump_skip(cprm, skip))
 		goto Eio;
+
+	rc = 0;
 out:
 	free_page((unsigned long)buf);
 	return rc;
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index d2f19821d71d..d12ea7b9fd47 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -412,11 +412,14 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
 	 * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
 	 * that PE to block its config space.
 	 *
+	 * Broadcom BCM5718 2-ports NICs (14e4:1656)
 	 * Broadcom Austin 4-ports NICs (14e4:1657)
 	 * Broadcom Shiner 4-ports 1G NICs (14e4:168a)
 	 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
 	 */
 	if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+	     pdn->device_id == 0x1656) ||
+	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
 	     pdn->device_id == 0x1657) ||
 	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
 	     pdn->device_id == 0x168a) ||
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 067defeea691..b5d960d6db3d 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -75,7 +75,8 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
 	if (WARN_ON(!gpdev))
 		return NULL;
 
-	if (WARN_ON(!gpdev->dev.of_node))
+	/* Not all PCI devices have device-tree nodes */
+	if (!gpdev->dev.of_node)
 		return NULL;
 
 	/* Get assoicated PCI device */
@@ -448,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
 	return mmio_atsd_reg;
 }
 
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
 {
 	unsigned long launch;
 
@@ -464,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
 	/* PID */
 	launch |= pid << PPC_BITLSHIFT(38);
 
+	/* No flush */
+	launch |= !flush << PPC_BITLSHIFT(39);
+
 	/* Invalidating the entire process doesn't use a va */
 	return mmio_launch_invalidate(npu, launch, 0);
 }
 
 static int mmio_invalidate_va(struct npu *npu, unsigned long va,
-			unsigned long pid)
+			unsigned long pid, bool flush)
 {
 	unsigned long launch;
 
@@ -485,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
 	/* PID */
 	launch |= pid << PPC_BITLSHIFT(38);
 
+	/* No flush */
+	launch |= !flush << PPC_BITLSHIFT(39);
+
 	return mmio_launch_invalidate(npu, launch, va);
 }
 
 #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
 
+struct mmio_atsd_reg {
+	struct npu *npu;
+	int reg;
+};
+
+static void mmio_invalidate_wait(
+	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+{
+	struct npu *npu;
+	int i, reg;
+
+	/* Wait for all invalidations to complete */
+	for (i = 0; i <= max_npu2_index; i++) {
+		if (mmio_atsd_reg[i].reg < 0)
+			continue;
+
+		/* Wait for completion */
+		npu = mmio_atsd_reg[i].npu;
+		reg = mmio_atsd_reg[i].reg;
+		while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+			cpu_relax();
+
+		put_mmio_atsd_reg(npu, reg);
+
+		/*
+		 * The GPU requires two flush ATSDs to ensure all entries have
+		 * been flushed. We use PID 0 as it will never be used for a
+		 * process on the GPU.
+		 */
+		if (flush)
+			mmio_invalidate_pid(npu, 0, true);
+	}
+}
+
 /*
  * Invalidate either a single address or an entire PID depending on
  * the value of va.
  */
 static void mmio_invalidate(struct npu_context *npu_context, int va,
-			unsigned long address)
+			unsigned long address, bool flush)
 {
-	int i, j, reg;
+	int i, j;
 	struct npu *npu;
 	struct pnv_phb *nphb;
 	struct pci_dev *npdev;
-	struct {
-		struct npu *npu;
-		int reg;
-	} mmio_atsd_reg[NV_MAX_NPUS];
+	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
 	unsigned long pid = npu_context->mm->context.id;
 
 	/*
@@ -524,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 
 			if (va)
 				mmio_atsd_reg[i].reg =
-					mmio_invalidate_va(npu, address, pid);
+					mmio_invalidate_va(npu, address, pid,
+							flush);
 			else
 				mmio_atsd_reg[i].reg =
-					mmio_invalidate_pid(npu, pid);
+					mmio_invalidate_pid(npu, pid, flush);
 
 			/*
 			 * The NPU hardware forwards the shootdown to all GPUs
@@ -543,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 	 */
 	flush_tlb_mm(npu_context->mm);
 
-	/* Wait for all invalidations to complete */
-	for (i = 0; i <= max_npu2_index; i++) {
-		if (mmio_atsd_reg[i].reg < 0)
-			continue;
-
-		/* Wait for completion */
-		npu = mmio_atsd_reg[i].npu;
-		reg = mmio_atsd_reg[i].reg;
-		while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
-			cpu_relax();
-		put_mmio_atsd_reg(npu, reg);
-	}
+	mmio_invalidate_wait(mmio_atsd_reg, flush);
+	if (flush)
+		/* Wait for the flush to complete */
+		mmio_invalidate_wait(mmio_atsd_reg, false);
 }
 
 static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -570,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
 	 * There should be no more translation requests for this PID, but we
 	 * need to ensure any entries for it are removed from the TLB.
 	 */
-	mmio_invalidate(npu_context, 0, 0);
+	mmio_invalidate(npu_context, 0, 0, true);
 }
 
 static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -580,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
 {
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 
-	mmio_invalidate(npu_context, 1, address);
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -589,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
 {
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 
-	mmio_invalidate(npu_context, 1, address);
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -599,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 	unsigned long address;
 
-	for (address = start; address <= end; address += PAGE_SIZE)
-		mmio_invalidate(npu_context, 1, address);
+	for (address = start; address < end; address += PAGE_SIZE)
+		mmio_invalidate(npu_context, 1, address, false);
+
+	/* Do the flush only on the final addess == end */
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -650,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 		/* No nvlink associated with this GPU device */
 		return ERR_PTR(-ENODEV);
 
-	if (!mm) {
-		/* kernel thread contexts are not supported */
+	if (!mm || mm->context.id == 0) {
+		/*
+		 * Kernel thread contexts are not supported and context id 0 is
+		 * reserved on the GPU.
+		 */
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -714,7 +751,7 @@ static void pnv_npu2_release_context(struct kref *kref)
 void pnv_npu2_destroy_context(struct npu_context *npu_context,
 			struct pci_dev *gpdev)
 {
-	struct pnv_phb *nphb, *phb;
+	struct pnv_phb *nphb;
 	struct npu *npu;
 	struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
 	struct device_node *nvlink_dn;
@@ -728,13 +765,12 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
 
 	nphb = pci_bus_to_host(npdev->bus)->private_data;
 	npu = &nphb->npu;
-	phb = pci_bus_to_host(gpdev->bus)->private_data;
 	nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
 	if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
 							&nvlink_index)))
 		return;
 	npu_context->npdev[npu->index][nvlink_index] = NULL;
-	opal_npu_destroy_context(phb->opal_id, npu_context->mm->context.id,
+	opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
 				PCI_DEVID(gpdev->bus->number, gpdev->devfn));
 	kref_put(&npu_context->kref, pnv_npu2_release_context);
 }
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 7925a9d72cca..59684b4af4d1 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -967,3 +967,4 @@ EXPORT_SYMBOL_GPL(opal_leds_set_ind);
 EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
 /* Export this for KVM */
 EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
+EXPORT_SYMBOL_GPL(opal_int_eoi);
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 0babef11136f..8c6119280c13 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -407,7 +407,13 @@ static DEVICE_ATTR(subcores_per_core, 0644,
 
 static int subcore_init(void)
 {
-	if (!cpu_has_feature(CPU_FTR_SUBCORE))
+	unsigned pvr_ver;
+
+	pvr_ver = PVR_VER(mfspr(SPRN_PVR));
+
+	if (pvr_ver != PVR_POWER8 &&
+	    pvr_ver != PVR_POWER8E &&
+	    pvr_ver != PVR_POWER8NVL)
 		return 0;
 
 	/*
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index e104c71ea44a..1fb162ba9d1c 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -124,6 +124,7 @@ static struct property *dlpar_clone_drconf_property(struct device_node *dn)
 	for (i = 0; i < num_lmbs; i++) {
 		lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr);
 		lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index);
+		lmbs[i].aa_index = be32_to_cpu(lmbs[i].aa_index);
 		lmbs[i].flags = be32_to_cpu(lmbs[i].flags);
 	}
 
@@ -147,6 +148,7 @@ static void dlpar_update_drconf_property(struct device_node *dn,
 	for (i = 0; i < num_lmbs; i++) {
 		lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
 		lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
+		lmbs[i].aa_index = cpu_to_be32(lmbs[i].aa_index);
 		lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
 	}
 
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index 986cd111d4df..c651e668996b 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -377,6 +377,10 @@ static void cpm1_set_pin16(int port, int pin, int flags)
 			setbits16(&iop->odr_sor, pin);
 		else
 			clrbits16(&iop->odr_sor, pin);
+		if (flags & CPM_PIN_FALLEDGE)
+			setbits16(&iop->intr, pin);
+		else
+			clrbits16(&iop->intr, pin);
 	}
 }
 
@@ -528,6 +532,9 @@ struct cpm1_gpio16_chip {
 
 	/* shadowed data register to clear/set bits safely */
 	u16 cpdata;
+
+	/* IRQ associated with Pins when relevant */
+	int irq[16];
 };
 
 static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc)
@@ -578,6 +585,14 @@ static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value)
 	spin_unlock_irqrestore(&cpm1_gc->lock, flags);
 }
 
+static int cpm1_gpio16_to_irq(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+
+	return cpm1_gc->irq[gpio] ? : -ENXIO;
+}
+
 static int cpm1_gpio16_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 {
 	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
@@ -618,6 +633,7 @@ int cpm1_gpiochip_add16(struct device_node *np)
 	struct cpm1_gpio16_chip *cpm1_gc;
 	struct of_mm_gpio_chip *mm_gc;
 	struct gpio_chip *gc;
+	u16 mask;
 
 	cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL);
 	if (!cpm1_gc)
@@ -625,6 +641,14 @@ int cpm1_gpiochip_add16(struct device_node *np)
 
 	spin_lock_init(&cpm1_gc->lock);
 
+	if (!of_property_read_u16(np, "fsl,cpm1-gpio-irq-mask", &mask)) {
+		int i, j;
+
+		for (i = 0, j = 0; i < 16; i++)
+			if (mask & (1 << (15 - i)))
+				cpm1_gc->irq[i] = irq_of_parse_and_map(np, j++);
+	}
+
 	mm_gc = &cpm1_gc->mm_gc;
 	gc = &mm_gc->gc;
 
@@ -634,6 +658,7 @@ int cpm1_gpiochip_add16(struct device_node *np)
 	gc->direction_output = cpm1_gpio16_dir_out;
 	gc->get = cpm1_gpio16_get;
 	gc->set = cpm1_gpio16_set;
+	gc->to_irq = cpm1_gpio16_to_irq;
 
 	return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc);
 }
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
index ef470b470b04..6afddae2fb47 100644
--- a/arch/powerpc/sysdev/simple_gpio.c
+++ b/arch/powerpc/sysdev/simple_gpio.c
@@ -75,7 +75,8 @@ static int u8_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 
 static void u8_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
 {
-	struct u8_gpio_chip *u8_gc = gpiochip_get_data(&mm_gc->gc);
+	struct u8_gpio_chip *u8_gc =
+		container_of(mm_gc, struct u8_gpio_chip, mm_gc);
 
 	u8_gc->data = in_8(mm_gc->regs);
 }
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 6a98efb14264..8f5e3035483b 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -46,13 +46,15 @@
 #endif
 
 bool __xive_enabled;
+EXPORT_SYMBOL_GPL(__xive_enabled);
 bool xive_cmdline_disabled;
 
 /* We use only one priority for now */
 static u8 xive_irq_priority;
 
-/* TIMA */
+/* TIMA exported to KVM */
 void __iomem *xive_tima;
+EXPORT_SYMBOL_GPL(xive_tima);
 u32 xive_tima_offset;
 
 /* Backend ops */
@@ -295,7 +297,7 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
 {
 	/* If the XIVE supports the new "store EOI facility, use it */
 	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
-		out_be64(xd->eoi_mmio, 0);
+		out_be64(xd->eoi_mmio + XIVE_ESB_STORE_EOI, 0);
 	else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
 		/*
 		 * The FW told us to call it. This happens for some
@@ -345,8 +347,11 @@ static void xive_irq_eoi(struct irq_data *d)
 	DBG_VERBOSE("eoi_irq: irq=%d [0x%lx] pending=%02x\n",
 		    d->irq, irqd_to_hwirq(d), xc->pending_prio);
 
-	/* EOI the source if it hasn't been disabled */
-	if (!irqd_irq_disabled(d))
+	/*
+	 * EOI the source if it hasn't been disabled and hasn't
+	 * been passed-through to a KVM guest
+	 */
+	if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
 		xive_do_source_eoi(irqd_to_hwirq(d), xd);
 
 	/*
@@ -689,9 +694,14 @@ static int xive_irq_set_affinity(struct irq_data *d,
 
 	old_target = xd->target;
 
-	rc = xive_ops->configure_irq(hw_irq,
-				     get_hard_smp_processor_id(target),
-				     xive_irq_priority, d->irq);
+	/*
+	 * Only configure the irq if it's not currently passed-through to
+	 * a KVM guest
+	 */
+	if (!irqd_is_forwarded_to_vcpu(d))
+		rc = xive_ops->configure_irq(hw_irq,
+					     get_hard_smp_processor_id(target),
+					     xive_irq_priority, d->irq);
 	if (rc < 0) {
 		pr_err("Error %d reconfiguring irq %d\n", rc, d->irq);
 		return rc;
@@ -771,6 +781,123 @@ static int xive_irq_retrigger(struct irq_data *d)
 	return 1;
 }
 
+static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int rc;
+	u8 pq;
+
+	/*
+	 * We only support this on interrupts that do not require
+	 * firmware calls for masking and unmasking
+	 */
+	if (xd->flags & XIVE_IRQ_FLAG_MASK_FW)
+		return -EIO;
+
+	/*
+	 * This is called by KVM with state non-NULL for enabling
+	 * pass-through or NULL for disabling it
+	 */
+	if (state) {
+		irqd_set_forwarded_to_vcpu(d);
+
+		/* Set it to PQ=10 state to prevent further sends */
+		pq = xive_poke_esb(xd, XIVE_ESB_SET_PQ_10);
+
+		/* No target ? nothing to do */
+		if (xd->target == XIVE_INVALID_TARGET) {
+			/*
+			 * An untargetted interrupt should have been
+			 * also masked at the source
+			 */
+			WARN_ON(pq & 2);
+
+			return 0;
+		}
+
+		/*
+		 * If P was set, adjust state to PQ=11 to indicate
+		 * that a resend is needed for the interrupt to reach
+		 * the guest. Also remember the value of P.
+		 *
+		 * This also tells us that it's in flight to a host queue
+		 * or has already been fetched but hasn't been EOIed yet
+		 * by the host. This it's potentially using up a host
+		 * queue slot. This is important to know because as long
+		 * as this is the case, we must not hard-unmask it when
+		 * "returning" that interrupt to the host.
+		 *
+		 * This saved_p is cleared by the host EOI, when we know
+		 * for sure the queue slot is no longer in use.
+		 */
+		if (pq & 2) {
+			pq = xive_poke_esb(xd, XIVE_ESB_SET_PQ_11);
+			xd->saved_p = true;
+
+			/*
+			 * Sync the XIVE source HW to ensure the interrupt
+			 * has gone through the EAS before we change its
+			 * target to the guest. That should guarantee us
+			 * that we *will* eventually get an EOI for it on
+			 * the host. Otherwise there would be a small window
+			 * for P to be seen here but the interrupt going
+			 * to the guest queue.
+			 */
+			if (xive_ops->sync_source)
+				xive_ops->sync_source(hw_irq);
+		} else
+			xd->saved_p = false;
+	} else {
+		irqd_clr_forwarded_to_vcpu(d);
+
+		/* No host target ? hard mask and return */
+		if (xd->target == XIVE_INVALID_TARGET) {
+			xive_do_source_set_mask(xd, true);
+			return 0;
+		}
+
+		/*
+		 * Sync the XIVE source HW to ensure the interrupt
+		 * has gone through the EAS before we change its
+		 * target to the host.
+		 */
+		if (xive_ops->sync_source)
+			xive_ops->sync_source(hw_irq);
+
+		/*
+		 * By convention we are called with the interrupt in
+		 * a PQ=10 or PQ=11 state, ie, it won't fire and will
+		 * have latched in Q whether there's a pending HW
+		 * interrupt or not.
+		 *
+		 * First reconfigure the target.
+		 */
+		rc = xive_ops->configure_irq(hw_irq,
+					     get_hard_smp_processor_id(xd->target),
+					     xive_irq_priority, d->irq);
+		if (rc)
+			return rc;
+
+		/*
+		 * Then if saved_p is not set, effectively re-enable the
+		 * interrupt with an EOI. If it is set, we know there is
+		 * still a message in a host queue somewhere that will be
+		 * EOId eventually.
+		 *
+		 * Note: We don't check irqd_irq_disabled(). Effectively,
+		 * we *will* let the irq get through even if masked if the
+		 * HW is still firing it in order to deal with the whole
+		 * saved_p business properly. If the interrupt triggers
+		 * while masked, the generic code will re-mask it anyway.
+		 */
+		if (!xd->saved_p)
+			xive_do_source_eoi(hw_irq, xd);
+
+	}
+	return 0;
+}
+
 static struct irq_chip xive_irq_chip = {
 	.name = "XIVE-IRQ",
 	.irq_startup = xive_irq_startup,
@@ -781,12 +908,14 @@ static struct irq_chip xive_irq_chip = {
 	.irq_set_affinity = xive_irq_set_affinity,
 	.irq_set_type = xive_irq_set_type,
 	.irq_retrigger = xive_irq_retrigger,
+	.irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
 };
 
 bool is_xive_irq(struct irq_chip *chip)
 {
 	return chip == &xive_irq_chip;
 }
+EXPORT_SYMBOL_GPL(is_xive_irq);
 
 void xive_cleanup_irq_data(struct xive_irq_data *xd)
 {
@@ -801,6 +930,7 @@ void xive_cleanup_irq_data(struct xive_irq_data *xd)
 		xd->trig_mmio = NULL;
 	}
 }
+EXPORT_SYMBOL_GPL(xive_cleanup_irq_data);
 
 static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
 {
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 1a726229a427..ab9ecce61ee5 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -31,6 +31,7 @@
 #include <asm/xive.h>
 #include <asm/xive-regs.h>
 #include <asm/opal.h>
+#include <asm/kvm_ppc.h>
 
 #include "xive-internal.h"
 
@@ -95,6 +96,7 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(xive_native_populate_irq_data);
 
 int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
 {
@@ -108,6 +110,8 @@ int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
 	}
 	return rc == 0 ? 0 : -ENXIO;
 }
+EXPORT_SYMBOL_GPL(xive_native_configure_irq);
+
 
 /* This can be called multiple time to change a queue configuration */
 int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
@@ -172,6 +176,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
 fail:
 	return rc;
 }
+EXPORT_SYMBOL_GPL(xive_native_configure_queue);
 
 static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
 {
@@ -192,6 +197,7 @@ void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
 {
 	__xive_native_disable_queue(vp_id, q, prio);
 }
+EXPORT_SYMBOL_GPL(xive_native_disable_queue);
 
 static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
 {
@@ -262,6 +268,7 @@ static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
 	}
 	return 0;
 }
+#endif /* CONFIG_SMP */
 
 u32 xive_native_alloc_irq(void)
 {
@@ -277,6 +284,7 @@ u32 xive_native_alloc_irq(void)
 		return 0;
 	return rc;
 }
+EXPORT_SYMBOL_GPL(xive_native_alloc_irq);
 
 void xive_native_free_irq(u32 irq)
 {
@@ -287,7 +295,9 @@ void xive_native_free_irq(u32 irq)
 		msleep(1);
 	}
 }
+EXPORT_SYMBOL_GPL(xive_native_free_irq);
 
+#ifdef CONFIG_SMP
 static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc)
 {
 	s64 rc;
@@ -383,7 +393,7 @@ static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
 		return;
 
 	/* Enable the pool VP */
-	vp = xive_pool_vps + get_hard_smp_processor_id(cpu);
+	vp = xive_pool_vps + cpu;
 	pr_debug("CPU %d setting up pool VP 0x%x\n", cpu, vp);
 	for (;;) {
 		rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0);
@@ -428,7 +438,7 @@ static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
 	in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
 
 	/* Disable it */
-	vp = xive_pool_vps + get_hard_smp_processor_id(cpu);
+	vp = xive_pool_vps + cpu;
 	for (;;) {
 		rc = opal_xive_set_vp_info(vp, 0, 0);
 		if (rc != OPAL_BUSY)
@@ -437,10 +447,11 @@ static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
 	}
 }
 
-static void xive_native_sync_source(u32 hw_irq)
+void xive_native_sync_source(u32 hw_irq)
 {
 	opal_xive_sync(XIVE_SYNC_EAS, hw_irq);
 }
+EXPORT_SYMBOL_GPL(xive_native_sync_source);
 
 static const struct xive_ops xive_native_ops = {
 	.populate_irq_data	= xive_native_populate_irq_data,
@@ -501,10 +512,24 @@ static bool xive_parse_provisioning(struct device_node *np)
 	return true;
 }
 
+static void xive_native_setup_pools(void)
+{
+	/* Allocate a pool big enough */
+	pr_debug("XIVE: Allocating VP block for pool size %d\n", nr_cpu_ids);
+
+	xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids);
+	if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP))
+		pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n");
+
+	pr_debug("XIVE: Pool VPs allocated at 0x%x for %d max CPUs\n",
+		 xive_pool_vps, nr_cpu_ids);
+}
+
 u32 xive_native_default_eq_shift(void)
 {
 	return xive_queue_shift;
 }
+EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
 
 bool xive_native_init(void)
 {
@@ -514,7 +539,7 @@ bool xive_native_init(void)
 	struct property *prop;
 	u8 max_prio = 7;
 	const __be32 *p;
-	u32 val;
+	u32 val, cpu;
 	s64 rc;
 
 	if (xive_cmdline_disabled)
@@ -550,7 +575,11 @@ bool xive_native_init(void)
 			break;
 	}
 
-	/* Grab size of provisioning pages */
+	/* Configure Thread Management areas for KVM */
+	for_each_possible_cpu(cpu)
+		kvmppc_set_xive_tima(cpu, r.start, tima);
+
+	/* Grab size of provisionning pages */
 	xive_parse_provisioning(np);
 
 	/* Switch the XIVE to exploitation mode */
@@ -560,6 +589,9 @@ bool xive_native_init(void)
 		return false;
 	}
 
+	/* Setup some dummy HV pool VPs */
+	xive_native_setup_pools();
+
 	/* Initialize XIVE core with our backend */
 	if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS,
 			    max_prio)) {
@@ -638,3 +670,47 @@ void xive_native_free_vp_block(u32 vp_base)
 		pr_warn("OPAL error %lld freeing VP block\n", rc);
 }
 EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
+
+int xive_native_enable_vp(u32 vp_id)
+{
+	s64 rc;
+
+	for (;;) {
+		rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(1);
+	}
+	return rc ? -EIO : 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_enable_vp);
+
+int xive_native_disable_vp(u32 vp_id)
+{
+	s64 rc;
+
+	for (;;) {
+		rc = opal_xive_set_vp_info(vp_id, 0, 0);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(1);
+	}
+	return rc ? -EIO : 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_disable_vp);
+
+int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
+{
+	__be64 vp_cam_be;
+	__be32 vp_chip_id_be;
+	s64 rc;
+
+	rc = opal_xive_get_vp_info(vp_id, NULL, &vp_cam_be, NULL, &vp_chip_id_be);
+	if (rc)
+		return -EIO;
+	*out_cam_id = be64_to_cpu(vp_cam_be) & 0xffffffffu;
+	*out_chip_id = be32_to_cpu(vp_chip_id_be);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index e161fafb495b..6967addc6a89 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -363,9 +363,6 @@ config COMPAT
 config SYSVIPC_COMPAT
 	def_bool y if COMPAT && SYSVIPC
 
-config KEYS_COMPAT
-	def_bool y if COMPAT && KEYS
-
 config SMP
 	def_bool y
 	prompt "Symmetric multi-processing support"
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index a5039fa89314..282072206df7 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -30,6 +30,7 @@ CONFIG_USER_NS=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
@@ -44,7 +45,10 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_BSD_DISKLABEL=y
@@ -90,6 +94,8 @@ CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
 CONFIG_XFRM_USER=m
 CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
@@ -359,6 +365,7 @@ CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
@@ -367,16 +374,19 @@ CONFIG_DEVTMPFS=y
 CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_OSD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_ATA_OVER_ETH=m
+CONFIG_BLK_DEV_RAM_DAX=y
 CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_RBD=m
 CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -442,6 +452,8 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -452,7 +464,6 @@ CONFIG_PPTP=m
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -471,6 +482,7 @@ CONFIG_DIAG288_WATCHDOG=m
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -487,12 +499,18 @@ CONFIG_XFS_POSIX_ACL=y
 CONFIG_XFS_RT=y
 CONFIG_XFS_DEBUG=y
 CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
 CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
 CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_BTRFS_DEBUG=y
 CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
 CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QUOTA_DEBUG=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
 CONFIG_AUTOFS4_FS=m
@@ -558,6 +576,7 @@ CONFIG_HEADERS_CHECK=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_RODATA_TEST=y
 CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_SELFTEST=y
 CONFIG_DEBUG_OBJECTS_FREE=y
@@ -580,7 +599,6 @@ CONFIG_DETECT_HUNG_TASK=y
 CONFIG_WQ_WATCHDOG=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_DEBUG_TIMEKEEPING=y
-CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_RT_MUTEXES=y
 CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
 CONFIG_PROVE_LOCKING=y
@@ -595,6 +613,7 @@ CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=300
 CONFIG_NOTIFIER_ERROR_INJECTION=m
 CONFIG_PM_NOTIFIER_ERROR_INJECT=m
+CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
 CONFIG_FAULT_INJECTION=y
 CONFIG_FAILSLAB=y
 CONFIG_FAIL_PAGE_ALLOC=y
@@ -616,13 +635,12 @@ CONFIG_HIST_TRIGGERS=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
 CONFIG_LKDTM=m
 CONFIG_TEST_LIST_SORT=y
+CONFIG_TEST_SORT=y
 CONFIG_KPROBES_SANITY_TEST=y
 CONFIG_RBTREE_TEST=y
 CONFIG_INTERVAL_TREE_TEST=m
 CONFIG_PERCPU_TEST=m
 CONFIG_ATOMIC64_SELFTEST=y
-CONFIG_TEST_STRING_HELPERS=y
-CONFIG_TEST_KSTRTOX=y
 CONFIG_DMA_API_DEBUG=y
 CONFIG_TEST_BPF=m
 CONFIG_BUG_ON_DATA_CORRUPTION=y
@@ -630,6 +648,7 @@ CONFIG_S390_PTDUMP=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
@@ -640,7 +659,9 @@ CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_USER=m
+CONFIG_CRYPTO_PCRYPT=m
 CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
@@ -648,6 +669,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
@@ -657,8 +679,10 @@ CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -674,6 +698,7 @@ CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
@@ -685,6 +710,7 @@ CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_ASYMMETRIC_KEY_TYPE=y
@@ -692,6 +718,7 @@ CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
 CONFIG_CRC7=m
 CONFIG_CRC8=m
+CONFIG_RANDOM32_SELFTEST=y
 CONFIG_CORDIC=m
 CONFIG_CMM=m
 CONFIG_APPLDATA_BASE=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index 83970b5afb2b..3c6b78189fbc 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -31,6 +31,7 @@ CONFIG_USER_NS=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
@@ -46,7 +47,10 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_BSD_DISKLABEL=y
@@ -88,6 +92,8 @@ CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
 CONFIG_XFRM_USER=m
 CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
@@ -356,6 +362,7 @@ CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
@@ -364,16 +371,18 @@ CONFIG_DEVTMPFS=y
 CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_OSD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_ATA_OVER_ETH=m
+CONFIG_BLK_DEV_RAM_DAX=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -439,6 +448,8 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -449,7 +460,6 @@ CONFIG_PPTP=m
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -468,6 +478,7 @@ CONFIG_DIAG288_WATCHDOG=m
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -483,11 +494,15 @@ CONFIG_XFS_QUOTA=y
 CONFIG_XFS_POSIX_ACL=y
 CONFIG_XFS_RT=y
 CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
 CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
 CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
 CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
@@ -553,7 +568,6 @@ CONFIG_UNUSED_SYMBOLS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_PANIC_ON_OOPS=y
-CONFIG_TIMER_STATS=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_LATENCYTOP=y
@@ -576,6 +590,7 @@ CONFIG_BIG_KEYS=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
@@ -599,6 +614,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
@@ -611,6 +627,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -626,16 +643,19 @@ CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
 CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_CRC7=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index fbc6542aaf59..653d72bcc007 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -31,6 +31,7 @@ CONFIG_USER_NS=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
@@ -44,7 +45,10 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_BSD_DISKLABEL=y
@@ -86,6 +90,8 @@ CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
 CONFIG_XFRM_USER=m
 CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
@@ -354,6 +360,7 @@ CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
@@ -362,16 +369,18 @@ CONFIG_DEVTMPFS=y
 CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_OSD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_ATA_OVER_ETH=m
+CONFIG_BLK_DEV_RAM_DAX=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -437,6 +446,8 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -447,7 +458,6 @@ CONFIG_PPTP=m
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -466,6 +476,7 @@ CONFIG_DIAG288_WATCHDOG=m
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -481,11 +492,15 @@ CONFIG_XFS_QUOTA=y
 CONFIG_XFS_POSIX_ACL=y
 CONFIG_XFS_RT=y
 CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
 CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
 CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
 CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
@@ -551,7 +566,6 @@ CONFIG_UNUSED_SYMBOLS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_PANIC_ON_OOPS=y
-CONFIG_TIMER_STATS=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_LATENCYTOP=y
@@ -574,6 +588,7 @@ CONFIG_BIG_KEYS=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
@@ -597,6 +612,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
@@ -609,6 +625,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -624,6 +641,7 @@ CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
@@ -635,6 +653,7 @@ CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_CRC7=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index e23d97c13735..afa46a7406ea 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -12,8 +12,10 @@ CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=2
 # CONFIG_HOTPLUG_CPU is not set
 CONFIG_HZ_100=y
+# CONFIG_ARCH_RANDOM is not set
 # CONFIG_COMPACTION is not set
 # CONFIG_MIGRATION is not set
+# CONFIG_BOUNCE is not set
 # CONFIG_CHECK_STACK is not set
 # CONFIG_CHSC_SCH is not set
 # CONFIG_SCM_BUS is not set
@@ -36,11 +38,11 @@ CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_FC_ATTRS=y
 CONFIG_ZFCP=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 # CONFIG_HVC_IUCV is not set
+# CONFIG_HW_RANDOM_S390 is not set
 CONFIG_RAW_DRIVER=y
 # CONFIG_SCLP_ASYNC is not set
 # CONFIG_HMC_DRV is not set
@@ -54,9 +56,9 @@ CONFIG_RAW_DRIVER=y
 # CONFIG_INOTIFY_USER is not set
 CONFIG_CONFIGFS_FS=y
 # CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_PANIC_ON_OOPS=y
 # CONFIG_SCHED_DEBUG is not set
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 97189dbaf34b..20244a38c886 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -28,6 +28,7 @@ CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
@@ -108,7 +109,6 @@ CONFIG_ZFCP=y
 CONFIG_SCSI_VIRTIO=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_MD_MULTIPATH=m
 CONFIG_BLK_DEV_DM=y
 CONFIG_DM_CRYPT=m
@@ -131,6 +131,7 @@ CONFIG_TUN=m
 CONFIG_VIRTIO_NET=y
 # CONFIG_NET_VENDOR_ALACRITECH is not set
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 CONFIG_DEVKMEM=y
@@ -162,7 +163,6 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_PAGEALLOC=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_PANIC_ON_OOPS=y
-CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_RT_MUTEXES=y
 CONFIG_PROVE_LOCKING=y
 CONFIG_LOCK_STAT=y
@@ -172,14 +172,12 @@ CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_SG=y
 CONFIG_DEBUG_NOTIFIERS=y
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
-CONFIG_RCU_TRACE=y
 CONFIG_LATENCYTOP=y
 CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
 CONFIG_KPROBES_SANITY_TEST=y
@@ -190,7 +188,6 @@ CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
 CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_CTS=m
-CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_XTS=m
@@ -230,6 +227,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_ZCRYPT=m
 CONFIG_PKEY=m
+CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 0206c8052328..df7b54ea956d 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -10,6 +10,7 @@
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
 #include <linux/time.h>
+#include <linux/refcount.h>
 #include <uapi/asm/debug.h>
 
 #define DEBUG_MAX_LEVEL            6  /* debug levels range from 0 to 6 */
@@ -31,7 +32,7 @@ struct debug_view;
 typedef struct debug_info {	
 	struct debug_info* next;
 	struct debug_info* prev;
-	atomic_t ref_count;
+	refcount_t ref_count;
 	spinlock_t lock;			
 	int level;
 	int nr_areas;
diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h
index 60323c21938b..37f617dfbede 100644
--- a/arch/s390/include/asm/dis.h
+++ b/arch/s390/include/asm/dis.h
@@ -40,6 +40,8 @@ static inline int insn_length(unsigned char code)
 	return ((((int) code + 64) >> 7) + 1) << 1;
 }
 
+struct pt_regs;
+
 void show_code(struct pt_regs *regs);
 void print_fn_code(unsigned char *code, unsigned long len);
 int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len);
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index 67026300c88e..144809a3f4f6 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/device.h>
+#include <linux/blkdev.h>
 
 struct arqb {
 	u64 data;
@@ -105,13 +106,14 @@ struct scm_driver {
 	int (*probe) (struct scm_device *scmdev);
 	int (*remove) (struct scm_device *scmdev);
 	void (*notify) (struct scm_device *scmdev, enum scm_event event);
-	void (*handler) (struct scm_device *scmdev, void *data, int error);
+	void (*handler) (struct scm_device *scmdev, void *data,
+			blk_status_t error);
 };
 
 int scm_driver_register(struct scm_driver *scmdrv);
 void scm_driver_unregister(struct scm_driver *scmdrv);
 
 int eadm_start_aob(struct aob *aob);
-void scm_irq_handler(struct aob *aob, int error);
+void scm_irq_handler(struct aob *aob, blk_status_t error);
 
 #endif /* _ASM_S390_EADM_H */
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 1293c4066cfc..28792ef82c83 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -27,12 +27,21 @@
  * 2005-Dec	Used as a template for s390 by Mike Grundy
  *		<grundym@us.ibm.com>
  */
+#include <linux/types.h>
 #include <asm-generic/kprobes.h>
 
 #define BREAKPOINT_INSTRUCTION	0x0002
 
+#define FIXUP_PSW_NORMAL	0x08
+#define FIXUP_BRANCH_NOT_TAKEN	0x04
+#define FIXUP_RETURN_REGISTER	0x02
+#define FIXUP_NOT_REQUIRED	0x01
+
+int probe_is_prohibited_opcode(u16 *insn);
+int probe_get_fixup_type(u16 *insn);
+int probe_is_insn_relative_long(u16 *insn);
+
 #ifdef CONFIG_KPROBES
-#include <linux/types.h>
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
 #include <linux/sched/task_stack.h>
@@ -56,11 +65,6 @@ typedef u16 kprobe_opcode_t;
 
 #define KPROBE_SWAP_INST	0x10
 
-#define FIXUP_PSW_NORMAL	0x08
-#define FIXUP_BRANCH_NOT_TAKEN	0x04
-#define FIXUP_RETURN_REGISTER	0x02
-#define FIXUP_NOT_REQUIRED	0x01
-
 /* Architecture specific copy of original instruction */
 struct arch_specific_insn {
 	/* copy of original instruction */
@@ -90,10 +94,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 int kprobe_exceptions_notify(struct notifier_block *self,
 	unsigned long val, void *data);
 
-int probe_is_prohibited_opcode(u16 *insn);
-int probe_get_fixup_type(u16 *insn);
-int probe_is_insn_relative_long(u16 *insn);
-
 #define flush_insn_slot(p)	do { } while (0)
 
 #endif /* CONFIG_KPROBES */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 426614a882a9..65d07ac34647 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -541,7 +541,6 @@ struct kvm_s390_float_interrupt {
 	struct mutex ais_lock;
 	u8 simm;
 	u8 nimm;
-	int ais_enabled;
 };
 
 struct kvm_hw_wp_info_arch {
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 60d395fdc864..aeac013968f2 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -221,11 +221,6 @@ extern void release_thread(struct task_struct *);
 /* Free guarded storage control block for current */
 void exit_thread_gs(void);
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 unsigned long get_wchan(struct task_struct *p);
 #define task_pt_regs(tsk) ((struct pt_regs *) \
         (task_stack_page(tsk) + THREAD_SIZE) - 1)
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index 73bff45ced55..2b498e58b914 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -109,7 +109,7 @@ struct sysinfo_2_2_2 {
 	unsigned short cpus_shared;
 	char reserved_4[3];
 	unsigned char vsne;
-	uuid_be uuid;
+	uuid_t uuid;
 	char reserved_5[160];
 	char ext_name[256];
 };
@@ -134,7 +134,7 @@ struct sysinfo_3_2_2 {
 		char reserved_1[3];
 		unsigned char evmne;
 		unsigned int reserved_2;
-		uuid_be uuid;
+		uuid_t uuid;
 	} vm[8];
 	char reserved_3[1504];
 	char ext_names[8][256];
@@ -146,7 +146,7 @@ extern int topology_max_mnest;
  * Returns the maximum nesting level supported by the cpu topology code.
  * The current maximum level is 4 which is the drawer level.
  */
-static inline int topology_mnest_limit(void)
+static inline unsigned char topology_mnest_limit(void)
 {
 	return min(topology_max_mnest, 4);
 }
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index addb09cee0f5..ca62066895e0 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -10,49 +10,3 @@ generic-y += poll.h
 generic-y += resource.h
 generic-y += sockios.h
 generic-y += termbits.h
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += chpid.h
-header-y += chsc.h
-header-y += clp.h
-header-y += cmb.h
-header-y += dasd.h
-header-y += debug.h
-header-y += errno.h
-header-y += guarded_storage.h
-header-y += hypfs.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm.h
-header-y += kvm_para.h
-header-y += kvm_perf.h
-header-y += kvm_virtio.h
-header-y += monwriter.h
-header-y += msgbuf.h
-header-y += pkey.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += qeth.h
-header-y += schid.h
-header-y += sclp_ctl.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sie.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += tape390.h
-header-y += termios.h
-header-y += types.h
-header-y += ucontext.h
-header-y += unistd.h
-header-y += virtio-ccw.h
-header-y += vtoc.h
-header-y += zcrypt.h
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 530226b6cb19..86b3e74f569e 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -277,7 +277,7 @@ debug_info_alloc(const char *name, int pages_per_area, int nr_areas,
 	memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
 	memset(rc->debugfs_entries, 0 ,DEBUG_MAX_VIEWS *
 		sizeof(struct dentry*));
-	atomic_set(&(rc->ref_count), 0);
+	refcount_set(&(rc->ref_count), 0);
 
 	return rc;
 
@@ -361,7 +361,7 @@ debug_info_create(const char *name, int pages_per_area, int nr_areas,
         debug_area_last = rc;
         rc->next = NULL;
 
-	debug_info_get(rc);
+	refcount_set(&rc->ref_count, 1);
 out:
 	return rc;
 }
@@ -416,7 +416,7 @@ static void
 debug_info_get(debug_info_t * db_info)
 {
 	if (db_info)
-		atomic_inc(&db_info->ref_count);
+		refcount_inc(&db_info->ref_count);
 }
 
 /*
@@ -431,7 +431,7 @@ debug_info_put(debug_info_t *db_info)
 
 	if (!db_info)
 		return;
-	if (atomic_dec_and_test(&db_info->ref_count)) {
+	if (refcount_dec_and_test(&db_info->ref_count)) {
 		for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
 			if (!db_info->views[i])
 				continue;
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index a5f5d3bb3dbc..6315037335ba 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -231,12 +231,17 @@ ENTRY(sie64a)
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 .Lsie_done:
 # some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions between sie64a and .Lsie_done should not cause program
-# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
+# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
+# Other instructions between sie64a and .Lsie_done should not cause program
+# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
 # See also .Lcleanup_sie
-.Lrewind_pad:
-	nop	0
+.Lrewind_pad6:
+	nopr	7
+.Lrewind_pad4:
+	nopr	7
+.Lrewind_pad2:
+	nopr	7
 	.globl sie_exit
 sie_exit:
 	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
@@ -249,7 +254,9 @@ sie_exit:
 	stg	%r14,__SF_EMPTY+16(%r15)	# set exit reason code
 	j	sie_exit
 
-	EX_TABLE(.Lrewind_pad,.Lsie_fault)
+	EX_TABLE(.Lrewind_pad6,.Lsie_fault)
+	EX_TABLE(.Lrewind_pad4,.Lsie_fault)
+	EX_TABLE(.Lrewind_pad2,.Lsie_fault)
 	EX_TABLE(sie_exit,.Lsie_fault)
 EXPORT_SYMBOL(sie64a)
 EXPORT_SYMBOL(sie_exit)
@@ -312,6 +319,7 @@ ENTRY(system_call)
 	lg	%r14,__LC_VDSO_PER_CPU
 	lmg	%r0,%r10,__PT_R0(%r11)
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
+.Lsysc_exit_timer:
 	stpt	__LC_EXIT_TIMER
 	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
 	lmg	%r11,%r15,__PT_R11(%r11)
@@ -623,6 +631,7 @@ ENTRY(io_int_handler)
 	lg	%r14,__LC_VDSO_PER_CPU
 	lmg	%r0,%r10,__PT_R0(%r11)
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
+.Lio_exit_timer:
 	stpt	__LC_EXIT_TIMER
 	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
 	lmg	%r11,%r15,__PT_R11(%r11)
@@ -1174,15 +1183,23 @@ cleanup_critical:
 	br	%r14
 
 .Lcleanup_sysc_restore:
+	# check if stpt has been executed
 	clg	%r9,BASED(.Lcleanup_sysc_restore_insn)
+	jh	0f
+	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	cghi	%r11,__LC_SAVE_AREA_ASYNC
 	je	0f
+	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
+0:	clg	%r9,BASED(.Lcleanup_sysc_restore_insn+8)
+	je	1f
 	lg	%r9,24(%r11)		# get saved pointer to pt_regs
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
 	mvc	0(64,%r11),__PT_R8(%r9)
 	lmg	%r0,%r7,__PT_R0(%r9)
-0:	lmg	%r8,%r9,__LC_RETURN_PSW
+1:	lmg	%r8,%r9,__LC_RETURN_PSW
 	br	%r14
 .Lcleanup_sysc_restore_insn:
+	.quad	.Lsysc_exit_timer
 	.quad	.Lsysc_done - 4
 
 .Lcleanup_io_tif:
@@ -1190,15 +1207,20 @@ cleanup_critical:
 	br	%r14
 
 .Lcleanup_io_restore:
+	# check if stpt has been executed
 	clg	%r9,BASED(.Lcleanup_io_restore_insn)
-	je	0f
+	jh	0f
+	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
+0:	clg	%r9,BASED(.Lcleanup_io_restore_insn+8)
+	je	1f
 	lg	%r9,24(%r11)		# get saved r11 pointer to pt_regs
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
 	mvc	0(64,%r11),__PT_R8(%r9)
 	lmg	%r0,%r7,__PT_R0(%r9)
-0:	lmg	%r8,%r9,__LC_RETURN_PSW
+1:	lmg	%r8,%r9,__LC_RETURN_PSW
 	br	%r14
 .Lcleanup_io_restore_insn:
+	.quad	.Lio_exit_timer
 	.quad	.Lio_done - 4
 
 .Lcleanup_idle:
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 27477f34cc0a..d03a6d12c4bd 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -173,6 +173,8 @@ int __init ftrace_dyn_arch_init(void)
 	return 0;
 }
 
+#ifdef CONFIG_MODULES
+
 static int __init ftrace_plt_init(void)
 {
 	unsigned int *ip;
@@ -191,6 +193,8 @@ static int __init ftrace_plt_init(void)
 }
 device_initcall(ftrace_plt_init);
 
+#endif /* CONFIG_MODULES */
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 /*
  * Hook the return address and push it in the stack of return addresses
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index e545ffe5155a..8e622bb52f7a 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -564,8 +564,6 @@ static struct kset *ipl_kset;
 
 static void __ipl_run(void *unused)
 {
-	if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
-		diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
 	diag308(DIAG308_LOAD_CLEAR, NULL);
 	if (MACHINE_IS_VM)
 		__cpcmd("IPL", NULL, 0, NULL);
@@ -1088,10 +1086,7 @@ static void __reipl_run(void *unused)
 		break;
 	case REIPL_METHOD_CCW_DIAG:
 		diag308(DIAG308_SET, reipl_block_ccw);
-		if (MACHINE_IS_LPAR)
-			diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
-		else
-			diag308(DIAG308_LOAD_CLEAR, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_RW_DIAG:
 		diag308(DIAG308_SET, reipl_block_fcp);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 999d7154bbdc..bb32b8618bf6 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -41,31 +41,6 @@
 
 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
 
-/*
- * Return saved PC of a blocked thread. used in kernel/sched.
- * resume in entry.S does not create a new stack frame, it
- * just stores the registers %r6-%r15 to the frame given by
- * schedule. We want to return the address of the caller of
- * schedule, so we have to walk the backchain one time to
- * find the frame schedule() store its return address.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct stack_frame *sf, *low, *high;
-
-	if (!tsk || !task_stack_page(tsk))
-		return 0;
-	low = task_stack_page(tsk);
-	high = (struct stack_frame *) task_pt_regs(tsk);
-	sf = (struct stack_frame *) tsk->thread.ksp;
-	if (sf <= low || sf > high)
-		return 0;
-	sf = (struct stack_frame *) sf->back_chain;
-	if (sf <= low || sf > high)
-		return 0;
-	return sf->gprs[8];
-}
-
 extern void kernel_thread_starter(void);
 
 /*
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index eefcb54872a5..fb869b103825 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -242,7 +242,7 @@ static void print_ext_name(struct seq_file *m, int lvl,
 
 static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info)
 {
-	if (!memcmp(&info->vm[i].uuid, &NULL_UUID_BE, sizeof(uuid_be)))
+	if (uuid_is_null(&info->vm[i].uuid))
 		return;
 	seq_printf(m, "VM%02d UUID:            %pUb\n", i, &info->vm[i].uuid);
 }
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 72307f108c40..6e2c42bd1c3b 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -31,8 +31,14 @@ SECTIONS
 {
 	. = 0x00000000;
 	.text : {
-	_text = .;		/* Text and read-only data */
+		/* Text and read-only data */
 		HEAD_TEXT
+		/*
+		 * E.g. perf doesn't like symbols starting at address zero,
+		 * therefore skip the initial PSW and channel program located
+		 * at address zero and let _text start at 0x200.
+		 */
+	_text = 0x200;
 		TEXT_TEXT
 		SCHED_TEXT
 		CPUIDLE_TEXT
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 9da243d94cc3..3b297fa3aa67 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -977,11 +977,12 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 	ptr = asce.origin * 4096;
 	if (asce.r) {
 		*fake = 1;
+		ptr = 0;
 		asce.dt = ASCE_TYPE_REGION1;
 	}
 	switch (asce.dt) {
 	case ASCE_TYPE_REGION1:
-		if (vaddr.rfx01 > asce.tl && !asce.r)
+		if (vaddr.rfx01 > asce.tl && !*fake)
 			return PGM_REGION_FIRST_TRANS;
 		break;
 	case ASCE_TYPE_REGION2:
@@ -1009,8 +1010,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 		union region1_table_entry rfte;
 
 		if (*fake) {
-			/* offset in 16EB guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.rsx << 53UL);
+			ptr += (unsigned long) vaddr.rfx << 53;
 			rfte.val = ptr;
 			goto shadow_r2t;
 		}
@@ -1036,8 +1036,7 @@ shadow_r2t:
 		union region2_table_entry rste;
 
 		if (*fake) {
-			/* offset in 8PB guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.rtx << 42UL);
+			ptr += (unsigned long) vaddr.rsx << 42;
 			rste.val = ptr;
 			goto shadow_r3t;
 		}
@@ -1064,8 +1063,7 @@ shadow_r3t:
 		union region3_table_entry rtte;
 
 		if (*fake) {
-			/* offset in 4TB guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.sx << 31UL);
+			ptr += (unsigned long) vaddr.rtx << 31;
 			rtte.val = ptr;
 			goto shadow_sgt;
 		}
@@ -1101,8 +1099,7 @@ shadow_sgt:
 		union segment_table_entry ste;
 
 		if (*fake) {
-			/* offset in 2G guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.sx << 20UL);
+			ptr += (unsigned long) vaddr.sx << 20;
 			ste.val = ptr;
 			goto shadow_pgt;
 		}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index caf15c8a8948..2d120fef7d90 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2160,7 +2160,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
 	struct kvm_s390_ais_req req;
 	int ret = 0;
 
-	if (!fi->ais_enabled)
+	if (!test_kvm_facility(kvm, 72))
 		return -ENOTSUPP;
 
 	if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
@@ -2204,7 +2204,7 @@ static int kvm_s390_inject_airq(struct kvm *kvm,
 	};
 	int ret = 0;
 
-	if (!fi->ais_enabled || !adapter->suppressible)
+	if (!test_kvm_facility(kvm, 72) || !adapter->suppressible)
 		return kvm_s390_inject_vm(kvm, &s390int);
 
 	mutex_lock(&fi->ais_lock);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 689ac48361c6..f28e2e776931 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -558,7 +558,6 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		} else {
 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
 			set_kvm_facility(kvm->arch.model.fac_list, 72);
-			kvm->arch.float_int.ais_enabled = 1;
 			r = 0;
 		}
 		mutex_unlock(&kvm->lock);
@@ -1533,7 +1532,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	mutex_init(&kvm->arch.float_int.ais_lock);
 	kvm->arch.float_int.simm = 0;
 	kvm->arch.float_int.nimm = 0;
-	kvm->arch.float_int.ais_enabled = 0;
 	spin_lock_init(&kvm->arch.float_int.lock);
 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
diff --git a/arch/s390/lib/probes.c b/arch/s390/lib/probes.c
index ae90e1ae3607..1963ddbf4ab3 100644
--- a/arch/s390/lib/probes.c
+++ b/arch/s390/lib/probes.c
@@ -4,6 +4,7 @@
  *    Copyright IBM Corp. 2014
  */
 
+#include <linux/errno.h>
 #include <asm/kprobes.h>
 #include <asm/dis.h>
 
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 1e5bb2b86c42..b3bd3f23b8e8 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -337,8 +337,8 @@ long __strncpy_from_user(char *dst, const char __user *src, long size)
 		return 0;
 	done = 0;
 	do {
-		offset = (size_t)src & ~PAGE_MASK;
-		len = min(size - done, PAGE_SIZE - offset);
+		offset = (size_t)src & (L1_CACHE_BYTES - 1);
+		len = min(size - done, L1_CACHE_BYTES - offset);
 		if (copy_from_user(dst, src, len))
 			return -EFAULT;
 		len_str = strnlen(dst, len);
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index b017daed6887..b854b1da281a 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -101,7 +101,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			goto check_asce_limit;
 	}
 
@@ -151,7 +151,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)))
 			goto check_asce_limit;
 	}
 
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index e3a8d0f96652..54b3b2039af1 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -1,6 +1,3 @@
-
-header-y +=
-
 generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += current.h
diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h
index d9a922d8711b..299274581968 100644
--- a/arch/score/include/asm/processor.h
+++ b/arch/score/include/asm/processor.h
@@ -13,7 +13,6 @@ struct task_struct;
  */
 extern void (*cpu_wait)(void);
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
 extern void start_thread(struct pt_regs *regs,
 			unsigned long pc, unsigned long sp);
 extern unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild
index 040178cdb3eb..b15bf6bc0e94 100644
--- a/arch/score/include/uapi/asm/Kbuild
+++ b/arch/score/include/uapi/asm/Kbuild
@@ -1,34 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index eb64d7a677cb..6e20241a1ed4 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -101,11 +101,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *r)
 	return 1;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return task_pt_regs(tsk)->cp0_epc;
-}
-
 unsigned long get_wchan(struct task_struct *task)
 {
 	if (!task || task == current || task->state == TASK_RUNNING)
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index 336f33a419d9..280bbff12102 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -94,7 +94,8 @@ defaultimage-$(CONFIG_SH_7206_SOLUTION_ENGINE)	:= vmlinux
 defaultimage-$(CONFIG_SH_7619_SOLUTION_ENGINE)	:= vmlinux
 
 # Set some sensible Kbuild defaults
-KBUILD_IMAGE		:= $(defaultimage-y)
+boot := arch/sh/boot
+KBUILD_IMAGE		:= $(boot)/$(defaultimage-y)
 
 #
 # Choosing incompatible machines durings configuration will result in
@@ -186,8 +187,6 @@ cpuincdir-y			+= cpu-common	# Must be last
 drivers-y			+= arch/sh/drivers/
 drivers-$(CONFIG_OPROFILE)	+= arch/sh/oprofile/
 
-boot := arch/sh/boot
-
 cflags-y	+= $(foreach d, $(cpuincdir-y), -Iarch/sh/include/$(d)) \
 		   $(foreach d, $(machdir-y), -Iarch/sh/include/$(d))
 
@@ -211,7 +210,7 @@ BOOT_TARGETS = uImage uImage.bz2 uImage.gz uImage.lzma uImage.xz uImage.lzo \
 	       romImage
 PHONY += $(BOOT_TARGETS)
 
-all: $(KBUILD_IMAGE)
+all: $(notdir $(KBUILD_IMAGE))
 
 $(BOOT_TARGETS): vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild
index 60613ae78513..b15bf6bc0e94 100644
--- a/arch/sh/include/uapi/asm/Kbuild
+++ b/arch/sh/include/uapi/asm/Kbuild
@@ -1,25 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += byteorder.h
-header-y += cachectl.h
-header-y += cpu-features.h
-header-y += hw_breakpoint.h
-header-y += ioctls.h
-header-y += posix_types.h
-header-y += posix_types_32.h
-header-y += posix_types_64.h
-header-y += ptrace.h
-header-y += ptrace_32.h
-header-y += ptrace_64.h
-header-y += setup.h
-header-y += sigcontext.h
-header-y += signal.h
-header-y += sockios.h
-header-y += stat.h
-header-y += swab.h
-header-y += types.h
-header-y += unistd.h
-header-y += unistd_32.h
-header-y += unistd_64.h
diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
index 08e7af0be4a7..6a1a1297baae 100644
--- a/arch/sh/mm/mmap.c
+++ b/arch/sh/mm/mmap.c
@@ -64,7 +64,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -114,7 +114,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 58243b0d21c0..5639c9fe5b55 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -192,9 +192,9 @@ config NR_CPUS
 	int "Maximum number of CPUs"
 	depends on SMP
 	range 2 32 if SPARC32
-	range 2 1024 if SPARC64
+	range 2 4096 if SPARC64
 	default 32 if SPARC32
-	default 64 if SPARC64
+	default 4096 if SPARC64
 
 source kernel/Kconfig.hz
 
@@ -295,9 +295,13 @@ config NUMA
 	depends on SPARC64 && SMP
 
 config NODES_SHIFT
-	int
-	default "4"
+	int "Maximum NUMA Nodes (as a power of 2)"
+	range 4 5 if SPARC64
+	default "5"
 	depends on NEED_MULTIPLE_NODES
+	help
+	  Specify the maximum number of NUMA Nodes available on the target
+	  system.  Increases memory reserved to accommodate various tables.
 
 # Some NUMA nodes have memory ranges that span
 # other nodes.  Even though a pfn is valid and
@@ -573,9 +577,6 @@ config SYSVIPC_COMPAT
 	depends on COMPAT && SYSVIPC
 	default y
 
-config KEYS_COMPAT
-	def_bool y if COMPAT && KEYS
-
 endmenu
 
 source "net/Kconfig"
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index dcbf985ab243..d1f837dc77a4 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -24,9 +24,11 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
 static inline int prepare_hugepage_range(struct file *file,
 			unsigned long addr, unsigned long len)
 {
-	if (len & ~HPAGE_MASK)
+	struct hstate *h = hstate_file(file);
+
+	if (len & ~huge_page_mask(h))
 		return -EINVAL;
-	if (addr & ~HPAGE_MASK)
+	if (addr & ~huge_page_mask(h))
 		return -EINVAL;
 	return 0;
 }
diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h
index f7de0dbc38af..83b36a5371ff 100644
--- a/arch/sparc/include/asm/mmu_64.h
+++ b/arch/sparc/include/asm/mmu_64.h
@@ -52,7 +52,7 @@
 #define CTX_NR_MASK		TAG_CONTEXT_BITS
 #define CTX_HW_MASK		(CTX_NR_MASK | CTX_PGSZ_MASK)
 
-#define CTX_FIRST_VERSION	((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL))
+#define CTX_FIRST_VERSION	BIT(CTX_VERSION_SHIFT)
 #define CTX_VALID(__ctx)	\
 	 (!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK))
 #define CTX_HWBITS(__ctx)	((__ctx.sparc64_ctx_val) & CTX_HW_MASK)
diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
index 22fede6eba11..2cddcda4f85f 100644
--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -19,13 +19,8 @@ extern spinlock_t ctx_alloc_lock;
 extern unsigned long tlb_context_cache;
 extern unsigned long mmu_context_bmap[];
 
+DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm);
 void get_new_mmu_context(struct mm_struct *mm);
-#ifdef CONFIG_SMP
-void smp_new_mmu_context_version(void);
-#else
-#define smp_new_mmu_context_version() do { } while (0)
-#endif
-
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
 void destroy_context(struct mm_struct *mm);
 
@@ -76,8 +71,9 @@ void __flush_tlb_mm(unsigned long, unsigned long);
 static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk)
 {
 	unsigned long ctx_valid, flags;
-	int cpu;
+	int cpu = smp_processor_id();
 
+	per_cpu(per_cpu_secondary_mm, cpu) = mm;
 	if (unlikely(mm == &init_mm))
 		return;
 
@@ -123,7 +119,6 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
 	 * for the first time, we must flush that context out of the
 	 * local TLB.
 	 */
-	cpu = smp_processor_id();
 	if (!ctx_valid || !cpumask_test_cpu(cpu, mm_cpumask(mm))) {
 		cpumask_set_cpu(cpu, mm_cpumask(mm));
 		__flush_tlb_mm(CTX_HWBITS(mm->context),
@@ -133,26 +128,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
 }
 
 #define deactivate_mm(tsk,mm)	do { } while (0)
-
-/* Activate a new MM instance for the current task. */
-static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm)
-{
-	unsigned long flags;
-	int cpu;
-
-	spin_lock_irqsave(&mm->context.lock, flags);
-	if (!CTX_VALID(mm->context))
-		get_new_mmu_context(mm);
-	cpu = smp_processor_id();
-	if (!cpumask_test_cpu(cpu, mm_cpumask(mm)))
-		cpumask_set_cpu(cpu, mm_cpumask(mm));
-
-	load_secondary_context(mm);
-	__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
-	tsb_context_switch(mm);
-	spin_unlock_irqrestore(&mm->context.lock, flags);
-}
-
+#define activate_mm(active_mm, mm) switch_mm(active_mm, mm, NULL)
 #endif /* !(__ASSEMBLY__) */
 
 #endif /* !(__SPARC64_MMU_CONTEXT_H) */
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index ce6f56980aef..cf190728360b 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -91,9 +91,9 @@ extern unsigned long pfn_base;
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
  */
-extern unsigned long empty_zero_page;
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 
-#define ZERO_PAGE(vaddr) (virt_to_page(&empty_zero_page))
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
 
 /*
  * In general all page table modifications should use the V8 atomic
diff --git a/arch/sparc/include/asm/pil.h b/arch/sparc/include/asm/pil.h
index 266937030546..522b43db2ed3 100644
--- a/arch/sparc/include/asm/pil.h
+++ b/arch/sparc/include/asm/pil.h
@@ -20,7 +20,6 @@
 #define PIL_SMP_CALL_FUNC	1
 #define PIL_SMP_RECEIVE_SIGNAL	2
 #define PIL_SMP_CAPTURE		3
-#define PIL_SMP_CTX_NEW_VERSION	4
 #define PIL_DEVICE_IRQ		5
 #define PIL_SMP_CALL_FUNC_SNGL	6
 #define PIL_DEFERRED_PCR_WORK	7
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index dd27159819eb..b395e5620c0b 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -67,9 +67,6 @@ struct thread_struct {
 	.current_ds = KERNEL_DS, \
 }
 
-/* Return saved PC of a blocked thread. */
-unsigned long thread_saved_pc(struct task_struct *t);
-
 /* Do necessary setup to start up a newly executed thread. */
 static inline void start_thread(struct pt_regs * regs, unsigned long pc,
 				    unsigned long sp)
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index b58ee9018433..f04dc5a43062 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -89,9 +89,7 @@ struct thread_struct {
 #include <linux/types.h>
 #include <asm/fpumacro.h>
 
-/* Return saved PC of a blocked thread. */
 struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *);
 
 /* On Uniprocessor, even in RMO processes see TSO semantics */
 #ifdef CONFIG_SMP
diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 478bf6bb4598..3fae200dd251 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -16,7 +16,7 @@ extern char reboot_command[];
  */
 extern unsigned char boot_cpu_id;
 
-extern unsigned long empty_zero_page;
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 
 extern int serial_console;
 static inline int con_is_present(void)
diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h
index 8174f6cdbbbb..9dca7a892978 100644
--- a/arch/sparc/include/asm/vio.h
+++ b/arch/sparc/include/asm/vio.h
@@ -327,6 +327,7 @@ struct vio_dev {
 	int			compat_len;
 
 	u64			dev_no;
+	u64			id;
 
 	unsigned long		channel_id;
 
diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild
index b5843ee09fb5..b15bf6bc0e94 100644
--- a/arch/sparc/include/uapi/asm/Kbuild
+++ b/arch/sparc/include/uapi/asm/Kbuild
@@ -1,50 +1,2 @@
 # UAPI Header export list
-# User exported sparc header files
-
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += apc.h
-header-y += asi.h
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += display7seg.h
-header-y += envctrl.h
-header-y += errno.h
-header-y += fbio.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += jsflash.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += openpromio.h
-header-y += param.h
-header-y += perfctr.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += psr.h
-header-y += psrcompat.h
-header-y += pstate.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += traps.h
-header-y += uctx.h
-header-y += unistd.h
-header-y += utrap.h
-header-y += watchdog.h
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index b542cc7c8d94..f87265afb175 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -909,7 +909,7 @@ static int register_services(struct ds_info *dp)
 		pbuf.req.handle = cp->handle;
 		pbuf.req.major = 1;
 		pbuf.req.minor = 0;
-		strcpy(pbuf.req.svc_id, cp->service_id);
+		strcpy(pbuf.id_buf, cp->service_id);
 
 		err = __ds_send(lp, &pbuf, msg_len);
 		if (err > 0)
diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
index 6bcff698069b..cec54dc4ab81 100644
--- a/arch/sparc/kernel/ftrace.c
+++ b/arch/sparc/kernel/ftrace.c
@@ -130,17 +130,16 @@ unsigned long prepare_ftrace_return(unsigned long parent,
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		return parent + 8UL;
 
-	if (ftrace_push_return_trace(parent, self_addr, &trace.depth,
-				     frame_pointer, NULL) == -EBUSY)
-		return parent + 8UL;
-
 	trace.func = self_addr;
+	trace.depth = current->curr_ret_stack + 1;
 
 	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace)) {
-		current->curr_ret_stack--;
+	if (!ftrace_graph_entry(&trace))
+		return parent + 8UL;
+
+	if (ftrace_push_return_trace(parent, self_addr, &trace.depth,
+				     frame_pointer, NULL) == -EBUSY)
 		return parent + 8UL;
-	}
 
 	return return_hooker;
 }
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 4d0248aa0928..99dd133a029f 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -1034,17 +1034,26 @@ static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb)
 {
 #ifdef CONFIG_SMP
 	unsigned long page;
+	void *mondo, *p;
 
-	BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
+	BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > PAGE_SIZE);
+
+	/* Make sure mondo block is 64byte aligned */
+	p = kzalloc(127, GFP_KERNEL);
+	if (!p) {
+		prom_printf("SUN4V: Error, cannot allocate mondo block.\n");
+		prom_halt();
+	}
+	mondo = (void *)(((unsigned long)p + 63) & ~0x3f);
+	tb->cpu_mondo_block_pa = __pa(mondo);
 
 	page = get_zeroed_page(GFP_KERNEL);
 	if (!page) {
-		prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
+		prom_printf("SUN4V: Error, cannot allocate cpu list page.\n");
 		prom_halt();
 	}
 
-	tb->cpu_mondo_block_pa = __pa(page);
-	tb->cpu_list_pa = __pa(page + 64);
+	tb->cpu_list_pa = __pa(page);
 #endif
 }
 
diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
index c9804551262c..6ae1e77be0bf 100644
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h
@@ -37,7 +37,6 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
 /* smp_64.c */
 void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs);
 void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs);
-void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs);
 void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs);
 void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs);
 
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index b6dac8e980f0..9245f93398c7 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -177,14 +177,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
 }
 
 /*
- * Note: sparc64 has a pretty intricated thread_saved_pc, check it out.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return task_thread_info(tsk)->kpc;
-}
-
-/*
  * Free current thread data structures etc..
  */
 void exit_thread(struct task_struct *tsk)
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 1badc493e62e..b96104da5bd6 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -400,25 +400,6 @@ core_initcall(sparc_sysrq_init);
 
 #endif
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct thread_info *ti = task_thread_info(tsk);
-	unsigned long ret = 0xdeadbeefUL;
-	
-	if (ti && ti->ksp) {
-		unsigned long *sp;
-		sp = (unsigned long *)(ti->ksp + STACK_BIAS);
-		if (((unsigned long)sp & (sizeof(long) - 1)) == 0UL &&
-		    sp[14]) {
-			unsigned long *fp;
-			fp = (unsigned long *)(sp[14] + STACK_BIAS);
-			if (((unsigned long)fp & (sizeof(long) - 1)) == 0UL)
-				ret = fp[15];
-		}
-	}
-	return ret;
-}
-
 /* Free current thread data structures etc.. */
 void exit_thread(struct task_struct *tsk)
 {
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index b3bc0ac757cc..fdf31040a7dc 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -964,37 +964,6 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
 	preempt_enable();
 }
 
-void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
-{
-	struct mm_struct *mm;
-	unsigned long flags;
-
-	clear_softint(1 << irq);
-
-	/* See if we need to allocate a new TLB context because
-	 * the version of the one we are using is now out of date.
-	 */
-	mm = current->active_mm;
-	if (unlikely(!mm || (mm == &init_mm)))
-		return;
-
-	spin_lock_irqsave(&mm->context.lock, flags);
-
-	if (unlikely(!CTX_VALID(mm->context)))
-		get_new_mmu_context(mm);
-
-	spin_unlock_irqrestore(&mm->context.lock, flags);
-
-	load_secondary_context(mm);
-	__flush_tlb_mm(CTX_HWBITS(mm->context),
-		       SECONDARY_CONTEXT);
-}
-
-void smp_new_mmu_context_version(void)
-{
-	smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
-}
-
 #ifdef CONFIG_KGDB
 void kgdb_roundup_cpus(unsigned long flags)
 {
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index ef4520efc813..043544d0cda3 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -120,7 +120,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
 
 		vma = find_vma(mm, addr);
 		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -183,7 +183,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 		vma = find_vma(mm, addr);
 		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
index 10689cfd0ad4..07c0df924960 100644
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -455,13 +455,16 @@ __tsb_context_switch:
 	.type	copy_tsb,#function
 copy_tsb:		/* %o0=old_tsb_base, %o1=old_tsb_size
 			 * %o2=new_tsb_base, %o3=new_tsb_size
+			 * %o4=page_size_shift
 			 */
 	sethi		%uhi(TSB_PASS_BITS), %g7
 	srlx		%o3, 4, %o3
-	add		%o0, %o1, %g1	/* end of old tsb */
+	add		%o0, %o1, %o1	/* end of old tsb */
 	sllx		%g7, 32, %g7
 	sub		%o3, 1, %o3	/* %o3 == new tsb hash mask */
 
+	mov		%o4, %g1	/* page_size_shift */
+
 661:	prefetcha	[%o0] ASI_N, #one_read
 	.section	.tsb_phys_patch, "ax"
 	.word		661b
@@ -486,9 +489,9 @@ copy_tsb:		/* %o0=old_tsb_base, %o1=old_tsb_size
 	/* This can definitely be computed faster... */
 	srlx		%o0, 4, %o5	/* Build index */
 	and		%o5, 511, %o5	/* Mask index */
-	sllx		%o5, PAGE_SHIFT, %o5 /* Put into vaddr position */
+	sllx		%o5, %g1, %o5	/* Put into vaddr position */
 	or		%o4, %o5, %o4	/* Full VADDR. */
-	srlx		%o4, PAGE_SHIFT, %o4 /* Shift down to create index */
+	srlx		%o4, %g1, %o4	/* Shift down to create index */
 	and		%o4, %o3, %o4	/* Mask with new_tsb_nents-1 */
 	sllx		%o4, 4, %o4	/* Shift back up into tsb ent offset */
 	TSB_STORE(%o2 + %o4, %g2)	/* Store TAG */
@@ -496,7 +499,7 @@ copy_tsb:		/* %o0=old_tsb_base, %o1=old_tsb_size
 	TSB_STORE(%o2 + %o4, %g3)	/* Store TTE */
 
 80:	add		%o0, 16, %o0
-	cmp		%o0, %g1
+	cmp		%o0, %o1
 	bne,pt		%xcc, 90b
 	 nop
 
diff --git a/arch/sparc/kernel/ttable_64.S b/arch/sparc/kernel/ttable_64.S
index 7bd8f6556352..efe93ab4a9c0 100644
--- a/arch/sparc/kernel/ttable_64.S
+++ b/arch/sparc/kernel/ttable_64.S
@@ -50,7 +50,7 @@ tl0_resv03e:	BTRAP(0x3e) BTRAP(0x3f) BTRAP(0x40)
 tl0_irq1:	TRAP_IRQ(smp_call_function_client, 1)
 tl0_irq2:	TRAP_IRQ(smp_receive_signal_client, 2)
 tl0_irq3:	TRAP_IRQ(smp_penguin_jailcell, 3)
-tl0_irq4:	TRAP_IRQ(smp_new_mmu_context_version_client, 4)
+tl0_irq4:       BTRAP(0x44)
 #else
 tl0_irq1:	BTRAP(0x41)
 tl0_irq2:	BTRAP(0x42)
diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c
index f6bb857254fc..075d38980dee 100644
--- a/arch/sparc/kernel/vio.c
+++ b/arch/sparc/kernel/vio.c
@@ -302,13 +302,16 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp,
 	if (!id) {
 		dev_set_name(&vdev->dev, "%s", bus_id_name);
 		vdev->dev_no = ~(u64)0;
+		vdev->id = ~(u64)0;
 	} else if (!cfg_handle) {
 		dev_set_name(&vdev->dev, "%s-%llu", bus_id_name, *id);
 		vdev->dev_no = *id;
+		vdev->id = ~(u64)0;
 	} else {
 		dev_set_name(&vdev->dev, "%s-%llu-%llu", bus_id_name,
 			     *cfg_handle, *id);
 		vdev->dev_no = *cfg_handle;
+		vdev->id = *id;
 	}
 
 	vdev->dev.parent = parent;
@@ -351,27 +354,84 @@ static void vio_add(struct mdesc_handle *hp, u64 node)
 	(void) vio_create_one(hp, node, &root_vdev->dev);
 }
 
+struct vio_md_node_query {
+	const char *type;
+	u64 dev_no;
+	u64 id;
+};
+
 static int vio_md_node_match(struct device *dev, void *arg)
 {
+	struct vio_md_node_query *query = (struct vio_md_node_query *) arg;
 	struct vio_dev *vdev = to_vio_dev(dev);
 
-	if (vdev->mp == (u64) arg)
-		return 1;
+	if (vdev->dev_no != query->dev_no)
+		return 0;
+	if (vdev->id != query->id)
+		return 0;
+	if (strcmp(vdev->type, query->type))
+		return 0;
 
-	return 0;
+	return 1;
 }
 
 static void vio_remove(struct mdesc_handle *hp, u64 node)
 {
+	const char *type;
+	const u64 *id, *cfg_handle;
+	u64 a;
+	struct vio_md_node_query query;
 	struct device *dev;
 
-	dev = device_find_child(&root_vdev->dev, (void *) node,
+	type = mdesc_get_property(hp, node, "device-type", NULL);
+	if (!type) {
+		type = mdesc_get_property(hp, node, "name", NULL);
+		if (!type)
+			type = mdesc_node_name(hp, node);
+	}
+
+	query.type = type;
+
+	id = mdesc_get_property(hp, node, "id", NULL);
+	cfg_handle = NULL;
+	mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) {
+		u64 target;
+
+		target = mdesc_arc_target(hp, a);
+		cfg_handle = mdesc_get_property(hp, target,
+						"cfg-handle", NULL);
+		if (cfg_handle)
+			break;
+	}
+
+	if (!id) {
+		query.dev_no = ~(u64)0;
+		query.id = ~(u64)0;
+	} else if (!cfg_handle) {
+		query.dev_no = *id;
+		query.id = ~(u64)0;
+	} else {
+		query.dev_no = *cfg_handle;
+		query.id = *id;
+	}
+
+	dev = device_find_child(&root_vdev->dev, &query,
 				vio_md_node_match);
 	if (dev) {
 		printk(KERN_INFO "VIO: Removing device %s\n", dev_name(dev));
 
 		device_unregister(dev);
 		put_device(dev);
+	} else {
+		if (!id)
+			printk(KERN_ERR "VIO: Removed unknown %s node.\n",
+			       type);
+		else if (!cfg_handle)
+			printk(KERN_ERR "VIO: Removed unknown %s node %llu.\n",
+			       type, *id);
+		else
+			printk(KERN_ERR "VIO: Removed unknown %s node %llu-%llu.\n",
+			       type, *cfg_handle, *id);
 	}
 }
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 69912d2f8b54..07c03e72d812 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -15,6 +15,7 @@ lib-$(CONFIG_SPARC32) += copy_user.o locks.o
 lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
+lib-$(CONFIG_SPARC64) += multi3.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/multi3.S b/arch/sparc/lib/multi3.S
new file mode 100644
index 000000000000..d6b6c97fe3c7
--- /dev/null
+++ b/arch/sparc/lib/multi3.S
@@ -0,0 +1,35 @@
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+	.align	4
+ENTRY(__multi3) /* %o0 = u, %o1 = v */
+	mov	%o1, %g1
+	srl	%o3, 0, %g4
+	mulx	%g4, %g1, %o1
+	srlx	%g1, 0x20, %g3
+	mulx	%g3, %g4, %g5
+	sllx	%g5, 0x20, %o5
+	srl	%g1, 0, %g4
+	sub	%o1, %o5, %o5
+	srlx	%o5, 0x20, %o5
+	addcc	%g5, %o5, %g5
+	srlx	%o3, 0x20, %o5
+	mulx	%g4, %o5, %g4
+	mulx	%g3, %o5, %o5
+	sethi	%hi(0x80000000), %g3
+	addcc	%g5, %g4, %g5
+	srlx	%g5, 0x20, %g5
+	add	%g3, %g3, %g3
+	movcc	%xcc, %g0, %g3
+	addcc	%o5, %g5, %o5
+	sllx	%g4, 0x20, %g4
+	add	%o1, %g4, %o1
+	add	%o5, %g3, %g2
+	mulx	%g1, %o2, %g1
+	add	%g1, %g2, %g1
+	mulx	%o0, %o3, %o0
+	retl
+	 add	%g1, %o0, %o0
+ENDPROC(__multi3)
+EXPORT_SYMBOL(__multi3)
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 7c29d38e6b99..88855e383b34 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -120,7 +120,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 	if (mm->get_unmapped_area == arch_get_unmapped_area)
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index c6afe98de4d9..3bd0d513bddb 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -290,7 +290,7 @@ void __init mem_init(void)
 
 
 	/* Saves us work later. */
-	memset((void *)&empty_zero_page, 0, PAGE_SIZE);
+	memset((void *)empty_zero_page, 0, PAGE_SIZE);
 
 	i = last_valid_pfn >> ((20 - PAGE_SHIFT) + 5);
 	i += 1;
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 0cda653ae007..3c40ebd50f92 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -358,7 +358,8 @@ static int __init setup_hugepagesz(char *string)
 	}
 
 	if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) {
-		pr_warn("hugepagesz=%llu not supported by MMU.\n",
+		hugetlb_bad_size();
+		pr_err("hugepagesz=%llu not supported by MMU.\n",
 			hugepage_size);
 		goto out;
 	}
@@ -706,10 +707,58 @@ EXPORT_SYMBOL(__flush_dcache_range);
 
 /* get_new_mmu_context() uses "cache + 1".  */
 DEFINE_SPINLOCK(ctx_alloc_lock);
-unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1;
+unsigned long tlb_context_cache = CTX_FIRST_VERSION;
 #define MAX_CTX_NR	(1UL << CTX_NR_BITS)
 #define CTX_BMAP_SLOTS	BITS_TO_LONGS(MAX_CTX_NR)
 DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
+DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0};
+
+static void mmu_context_wrap(void)
+{
+	unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK;
+	unsigned long new_ver, new_ctx, old_ctx;
+	struct mm_struct *mm;
+	int cpu;
+
+	bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS);
+
+	/* Reserve kernel context */
+	set_bit(0, mmu_context_bmap);
+
+	new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION;
+	if (unlikely(new_ver == 0))
+		new_ver = CTX_FIRST_VERSION;
+	tlb_context_cache = new_ver;
+
+	/*
+	 * Make sure that any new mm that are added into per_cpu_secondary_mm,
+	 * are going to go through get_new_mmu_context() path.
+	 */
+	mb();
+
+	/*
+	 * Updated versions to current on those CPUs that had valid secondary
+	 * contexts
+	 */
+	for_each_online_cpu(cpu) {
+		/*
+		 * If a new mm is stored after we took this mm from the array,
+		 * it will go into get_new_mmu_context() path, because we
+		 * already bumped the version in tlb_context_cache.
+		 */
+		mm = per_cpu(per_cpu_secondary_mm, cpu);
+
+		if (unlikely(!mm || mm == &init_mm))
+			continue;
+
+		old_ctx = mm->context.sparc64_ctx_val;
+		if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) {
+			new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver;
+			set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap);
+			mm->context.sparc64_ctx_val = new_ctx;
+		}
+	}
+}
 
 /* Caller does TLB context flushing on local CPU if necessary.
  * The caller also ensures that CTX_VALID(mm->context) is false.
@@ -725,48 +774,30 @@ void get_new_mmu_context(struct mm_struct *mm)
 {
 	unsigned long ctx, new_ctx;
 	unsigned long orig_pgsz_bits;
-	int new_version;
 
 	spin_lock(&ctx_alloc_lock);
+retry:
+	/* wrap might have happened, test again if our context became valid */
+	if (unlikely(CTX_VALID(mm->context)))
+		goto out;
 	orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
 	ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
 	new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
-	new_version = 0;
 	if (new_ctx >= (1 << CTX_NR_BITS)) {
 		new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
 		if (new_ctx >= ctx) {
-			int i;
-			new_ctx = (tlb_context_cache & CTX_VERSION_MASK) +
-				CTX_FIRST_VERSION;
-			if (new_ctx == 1)
-				new_ctx = CTX_FIRST_VERSION;
-
-			/* Don't call memset, for 16 entries that's just
-			 * plain silly...
-			 */
-			mmu_context_bmap[0] = 3;
-			mmu_context_bmap[1] = 0;
-			mmu_context_bmap[2] = 0;
-			mmu_context_bmap[3] = 0;
-			for (i = 4; i < CTX_BMAP_SLOTS; i += 4) {
-				mmu_context_bmap[i + 0] = 0;
-				mmu_context_bmap[i + 1] = 0;
-				mmu_context_bmap[i + 2] = 0;
-				mmu_context_bmap[i + 3] = 0;
-			}
-			new_version = 1;
-			goto out;
+			mmu_context_wrap();
+			goto retry;
 		}
 	}
+	if (mm->context.sparc64_ctx_val)
+		cpumask_clear(mm_cpumask(mm));
 	mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63));
 	new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
-out:
 	tlb_context_cache = new_ctx;
 	mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
+out:
 	spin_unlock(&ctx_alloc_lock);
-
-	if (unlikely(new_version))
-		smp_new_mmu_context_version();
 }
 
 static int numa_enabled = 1;
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index bedf08b22a47..0d4b998c7d7b 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -496,7 +496,8 @@ retry_tsb_alloc:
 		extern void copy_tsb(unsigned long old_tsb_base,
 				     unsigned long old_tsb_size,
 				     unsigned long new_tsb_base,
-				     unsigned long new_tsb_size);
+				     unsigned long new_tsb_size,
+				     unsigned long page_size_shift);
 		unsigned long old_tsb_base = (unsigned long) old_tsb;
 		unsigned long new_tsb_base = (unsigned long) new_tsb;
 
@@ -504,7 +505,9 @@ retry_tsb_alloc:
 			old_tsb_base = __pa(old_tsb_base);
 			new_tsb_base = __pa(new_tsb_base);
 		}
-		copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
+		copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size,
+			tsb_index == MM_TSB_BASE ?
+			PAGE_SHIFT : REAL_HPAGE_SHIFT);
 	}
 
 	mm->context.tsb_block[tsb_index].tsb = new_tsb;
diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
index 5d2fd6cd3189..fcf4d27a38fb 100644
--- a/arch/sparc/mm/ultra.S
+++ b/arch/sparc/mm/ultra.S
@@ -971,11 +971,6 @@ xcall_capture:
 	wr		%g0, (1 << PIL_SMP_CAPTURE), %set_softint
 	retry
 
-	.globl		xcall_new_mmu_context_version
-xcall_new_mmu_context_version:
-	wr		%g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint
-	retry
-
 #ifdef CONFIG_KGDB
 	.globl		xcall_kgdb_capture
 xcall_kgdb_capture:
diff --git a/arch/tile/include/arch/Kbuild b/arch/tile/include/arch/Kbuild
deleted file mode 100644
index 3751c9fabcf2..000000000000
--- a/arch/tile/include/arch/Kbuild
+++ /dev/null
@@ -1 +0,0 @@
-# Tile arch headers
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 24c44e93804d..16f0b08c8ce9 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -1,6 +1,3 @@
-
-header-y += ../arch/
-
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += clkdev.h
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 0bc9968b97a1..f71e5206650b 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -214,13 +214,6 @@ static inline void release_thread(struct task_struct *dead_task)
 
 extern void prepare_exit_to_usermode(struct pt_regs *regs, u32 flags);
 
-
-/*
- * Return saved (kernel) PC of a blocked thread.
- * Only used in a printk() in kernel/sched/core.c, so don't work too hard.
- */
-#define thread_saved_pc(t)   ((t)->thread.pc)
-
 unsigned long get_wchan(struct task_struct *p);
 
 /* Return initial ksp value for given task. */
diff --git a/arch/tile/include/uapi/arch/Kbuild b/arch/tile/include/uapi/arch/Kbuild
deleted file mode 100644
index 97dfbecec6b6..000000000000
--- a/arch/tile/include/uapi/arch/Kbuild
+++ /dev/null
@@ -1,17 +0,0 @@
-# UAPI Header export list
-header-y += abi.h
-header-y += chip.h
-header-y += chip_tilegx.h
-header-y += chip_tilepro.h
-header-y += icache.h
-header-y += interrupts.h
-header-y += interrupts_32.h
-header-y += interrupts_64.h
-header-y += opcode.h
-header-y += opcode_tilegx.h
-header-y += opcode_tilepro.h
-header-y += sim.h
-header-y += sim_def.h
-header-y += spr_def.h
-header-y += spr_def_32.h
-header-y += spr_def_64.h
diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild
index c20db8e428bf..0c74c3c5ebfa 100644
--- a/arch/tile/include/uapi/asm/Kbuild
+++ b/arch/tile/include/uapi/asm/Kbuild
@@ -1,21 +1,4 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += cachectl.h
-header-y += hardwall.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += ptrace.h
-header-y += setup.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += stat.h
-header-y += swab.h
-header-y += ucontext.h
-header-y += unistd.h
-
 generic-y += ucontext.h
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index 1a70e6c0f259..94709ab41ed8 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -24,8 +24,7 @@
  * has an opportunity to return -EFAULT to the user if needed.
  * The 64-bit routines just return a "long long" with the value,
  * since they are only used from kernel space and don't expect to fault.
- * Support for 16-bit ops is included in the framework but we don't provide
- * any (x86_64 has an atomic_inc_short(), so we might want to some day).
+ * Support for 16-bit ops is included in the framework but we don't provide any.
  *
  * Note that the caller is advised to issue a suitable L1 or L2
  * prefetch on the address being manipulated to avoid extra stalls.
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index cb10153b5c9f..03e5cc4e76e4 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -233,7 +233,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 	if (current->mm->get_unmapped_area == arch_get_unmapped_area)
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index ed9c5b5ff028..85f6dd204ab6 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -57,3 +57,8 @@ config HZ
 config SUBARCH
 	string
 	option env="SUBARCH"
+
+config NR_CPUS
+	int
+	range 1 1
+	default 1
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 85410279beab..b55fe9bf5d3e 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -534,7 +534,7 @@ static void ubd_handler(void)
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			blk_end_request(
 				(*irq_req_buffer)[count]->req,
-				0,
+				BLK_STS_OK,
 				(*irq_req_buffer)[count]->length
 			);
 			kfree((*irq_req_buffer)[count]);
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 2d1e0dd5bb0b..f6d1a3f747a9 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -58,8 +58,6 @@ static inline void release_thread(struct task_struct *task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 static inline void mm_copy_segments(struct mm_struct *from_mm,
 				    struct mm_struct *new_mm)
 {
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index 48bae81f8dca..6f6e7896e53f 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -14,7 +14,7 @@
 static char *initrd __initdata = NULL;
 static int load_initrd(char *filename, void *buf, int size);
 
-static int __init read_initrd(void)
+int __init read_initrd(void)
 {
 	void *area;
 	long long size;
@@ -46,8 +46,6 @@ static int __init read_initrd(void)
 	return 0;
 }
 
-__uml_postsetup(read_initrd);
-
 static int __init uml_initrd_setup(char *line, int *add)
 {
 	initrd = line;
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index a76295f7ede9..6b995e870d55 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -20,10 +20,8 @@
 
 static void _print_addr(void *data, unsigned long address, int reliable)
 {
-	pr_info(" [<%08lx>]", address);
-	pr_cont(" %s", reliable ? "" : "? ");
-	print_symbol("%s", address);
-	pr_cont("\n");
+	pr_info(" [<%08lx>] %s%pF\n", address, reliable ? "" : "? ",
+		(void *)address);
 }
 
 static const struct stacktrace_ops stackops = {
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 4b85acd4020c..7b5640117325 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -56,12 +56,6 @@ union thread_union cpu0_irqstack
 	__attribute__((__section__(".data..init_irqstack"))) =
 		{ INIT_THREAD_INFO(init_task) };
 
-unsigned long thread_saved_pc(struct task_struct *task)
-{
-	/* FIXME: Need to look up userspace_pid by cpu */
-	return os_process_pc(userspace_pid[0]);
-}
-
 /* Changed in setup_arch, which is called in early boot */
 static char host_info[(__NEW_UTS_LEN + 1) * 5];
 
@@ -338,11 +332,17 @@ int __init linux_main(int argc, char **argv)
 	return start_uml();
 }
 
+int __init __weak read_initrd(void)
+{
+	return 0;
+}
+
 void __init setup_arch(char **cmdline_p)
 {
 	stack_protections((unsigned long) &init_thread_info);
 	setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
 	mem_total_pages(physmem_size, iomem_size, highmem);
+	read_initrd();
 
 	paging_init();
 	strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 23025d645160..03b3c4cc7735 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -21,6 +21,7 @@
 #include <registers.h>
 #include <skas.h>
 #include <sysdep/stub.h>
+#include <linux/threads.h>
 
 int is_skas_winch(int pid, int fd, void *data)
 {
@@ -233,9 +234,6 @@ static int userspace_tramp(void *stack)
 	return 0;
 }
 
-/* Each element set once, and only accessed by a single processor anyway */
-#undef NR_CPUS
-#define NR_CPUS 1
 int userspace_pid[NR_CPUS];
 
 int start_userspace(unsigned long stub_stack)
diff --git a/arch/unicore32/Makefile b/arch/unicore32/Makefile
index b6f5c4c1eaf9..98a5ca43ae87 100644
--- a/arch/unicore32/Makefile
+++ b/arch/unicore32/Makefile
@@ -43,9 +43,9 @@ boot			:= arch/unicore32/boot
 
 # Default defconfig and target when executing plain make
 KBUILD_DEFCONFIG	:= $(ARCH)_defconfig
-KBUILD_IMAGE		:= zImage
+KBUILD_IMAGE		:= $(boot)/zImage
 
-all:	$(KBUILD_IMAGE)
+all:	zImage
 
 zImage Image uImage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
index 0514d7ad6855..13a97aa2285f 100644
--- a/arch/unicore32/include/uapi/asm/Kbuild
+++ b/arch/unicore32/include/uapi/asm/Kbuild
@@ -1,10 +1,4 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += byteorder.h
-header-y += kvm_para.h
-header-y += ptrace.h
-header-y += sigcontext.h
-header-y += unistd.h
-
 generic-y += kvm_para.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cd18994a9555..737212c0333e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -69,7 +69,7 @@ config X86
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
-	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP
+	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select BUILDTIME_EXTABLE_SORT
@@ -360,7 +360,7 @@ config SMP
 	  Management" code will be disabled if you say Y here.
 
 	  See also <file:Documentation/x86/i386/IO-APIC.txt>,
-	  <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
+	  <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at
 	  <http://www.tldp.org/docs.html#howto>.
 
 	  If you don't know what to do here, say N.
@@ -2776,10 +2776,6 @@ config COMPAT_FOR_U64_ALIGNMENT
 config SYSVIPC_COMPAT
 	def_bool y
 	depends on SYSVIPC
-
-config KEYS_COMPAT
-	def_bool y
-	depends on KEYS
 endif
 
 endmenu
@@ -2797,6 +2793,9 @@ config X86_DMA_REMAP
 	bool
 	depends on STA2X11
 
+config HAVE_GENERIC_GUP
+	def_bool y
+
 source "net/Kconfig"
 
 source "drivers/Kconfig"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 4430dd489620..bf240b920473 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -159,7 +159,7 @@ ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	# If '-Os' is enabled, disable it and print a warning.
         ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
           undefine CONFIG_CC_OPTIMIZE_FOR_SIZE
-	  $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE.  Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.)
+          $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE.  Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.)
         endif
 
     endif
@@ -179,7 +179,8 @@ ifdef CONFIG_JUMP_LABEL
 endif
 
 ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
-	KBUILD_CFLAGS += -maccumulate-outgoing-args
+	# This compiler flag is not supported by Clang:
+	KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
 endif
 
 # Stackpointer is addressed different for 32 bit and 64 bit x86
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 44163e8c3868..2c860ad4fe06 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -94,7 +94,7 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
 quiet_cmd_check_data_rel = DATAREL $@
 define cmd_check_data_rel
 	for obj in $(filter %.o,$^); do \
-		readelf -S $$obj | grep -qF .rel.local && { \
+		${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \
 			echo "error: $$obj has data relocations!" >&2; \
 			exit 1; \
 		} || true; \
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index 73ccf63b0f48..9dc1ce6ba3c0 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -13,7 +13,7 @@ static inline char rdfs8(addr_t addr)
 	return *((char *)(fs + addr));
 }
 #include "../cmdline.c"
-static unsigned long get_cmd_line_ptr(void)
+unsigned long get_cmd_line_ptr(void)
 {
 	unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr;
 
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index cbf4b87f55b9..c3e869eaef0c 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1046,9 +1046,31 @@ struct boot_params *efi_main(struct efi_config *c,
 	memset((char *)gdt->address, 0x0, gdt->size);
 	desc = (struct desc_struct *)gdt->address;
 
-	/* The first GDT is a dummy and the second is unused. */
-	desc += 2;
+	/* The first GDT is a dummy. */
+	desc++;
+
+	if (IS_ENABLED(CONFIG_X86_64)) {
+		/* __KERNEL32_CS */
+		desc->limit0 = 0xffff;
+		desc->base0 = 0x0000;
+		desc->base1 = 0x0000;
+		desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
+		desc->s = DESC_TYPE_CODE_DATA;
+		desc->dpl = 0;
+		desc->p = 1;
+		desc->limit = 0xf;
+		desc->avl = 0;
+		desc->l = 0;
+		desc->d = SEG_OP_SIZE_32BIT;
+		desc->g = SEG_GRANULARITY_4KB;
+		desc->base2 = 0x00;
+		desc++;
+	} else {
+		/* Second entry is unused on 32-bit */
+		desc++;
+	}
 
+	/* __KERNEL_CS */
 	desc->limit0 = 0xffff;
 	desc->base0 = 0x0000;
 	desc->base1 = 0x0000;
@@ -1058,12 +1080,18 @@ struct boot_params *efi_main(struct efi_config *c,
 	desc->p = 1;
 	desc->limit = 0xf;
 	desc->avl = 0;
-	desc->l = 0;
-	desc->d = SEG_OP_SIZE_32BIT;
+	if (IS_ENABLED(CONFIG_X86_64)) {
+		desc->l = 1;
+		desc->d = 0;
+	} else {
+		desc->l = 0;
+		desc->d = SEG_OP_SIZE_32BIT;
+	}
 	desc->g = SEG_GRANULARITY_4KB;
 	desc->base2 = 0x00;
-
 	desc++;
+
+	/* __KERNEL_DS */
 	desc->limit0 = 0xffff;
 	desc->base0 = 0x0000;
 	desc->base1 = 0x0000;
@@ -1077,24 +1105,25 @@ struct boot_params *efi_main(struct efi_config *c,
 	desc->d = SEG_OP_SIZE_32BIT;
 	desc->g = SEG_GRANULARITY_4KB;
 	desc->base2 = 0x00;
-
-#ifdef CONFIG_X86_64
-	/* Task segment value */
 	desc++;
-	desc->limit0 = 0x0000;
-	desc->base0 = 0x0000;
-	desc->base1 = 0x0000;
-	desc->type = SEG_TYPE_TSS;
-	desc->s = 0;
-	desc->dpl = 0;
-	desc->p = 1;
-	desc->limit = 0x0;
-	desc->avl = 0;
-	desc->l = 0;
-	desc->d = 0;
-	desc->g = SEG_GRANULARITY_4KB;
-	desc->base2 = 0x00;
-#endif /* CONFIG_X86_64 */
+
+	if (IS_ENABLED(CONFIG_X86_64)) {
+		/* Task segment value */
+		desc->limit0 = 0x0000;
+		desc->base0 = 0x0000;
+		desc->base1 = 0x0000;
+		desc->type = SEG_TYPE_TSS;
+		desc->s = 0;
+		desc->dpl = 0;
+		desc->p = 1;
+		desc->limit = 0x0;
+		desc->avl = 0;
+		desc->l = 0;
+		desc->d = 0;
+		desc->g = SEG_GRANULARITY_4KB;
+		desc->base2 = 0x00;
+		desc++;
+	}
 
 	asm volatile("cli");
 	asm volatile ("lgdt %0" : : "m" (*gdt));
diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h
index 2e59dac07f9e..d732e608e3af 100644
--- a/arch/x86/boot/compressed/error.h
+++ b/arch/x86/boot/compressed/error.h
@@ -1,7 +1,9 @@
 #ifndef BOOT_COMPRESSED_ERROR_H
 #define BOOT_COMPRESSED_ERROR_H
 
+#include <linux/compiler.h>
+
 void warn(char *m);
-void error(char *m);
+void error(char *m) __noreturn;
 
 #endif /* BOOT_COMPRESSED_ERROR_H */
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d2ae1f821e0c..fbf4c32d0b62 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -346,6 +346,48 @@ preferred_addr:
 	/* Set up the stack */
 	leaq	boot_stack_end(%rbx), %rsp
 
+#ifdef CONFIG_X86_5LEVEL
+	/* Check if 5-level paging has already enabled */
+	movq	%cr4, %rax
+	testl	$X86_CR4_LA57, %eax
+	jnz	lvl5
+
+	/*
+	 * At this point we are in long mode with 4-level paging enabled,
+	 * but we want to enable 5-level paging.
+	 *
+	 * The problem is that we cannot do it directly. Setting LA57 in
+	 * long mode would trigger #GP. So we need to switch off long mode
+	 * first.
+	 *
+	 * NOTE: This is not going to work if bootloader put us above 4G
+	 * limit.
+	 *
+	 * The first step is go into compatibility mode.
+	 */
+
+	/* Clear additional page table */
+	leaq	lvl5_pgtable(%rbx), %rdi
+	xorq	%rax, %rax
+	movq	$(PAGE_SIZE/8), %rcx
+	rep	stosq
+
+	/*
+	 * Setup current CR3 as the first and only entry in a new top level
+	 * page table.
+	 */
+	movq	%cr3, %rdi
+	leaq	0x7 (%rdi), %rax
+	movq	%rax, lvl5_pgtable(%rbx)
+
+	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
+	pushq	$__KERNEL32_CS
+	leaq	compatible_mode(%rip), %rax
+	pushq	%rax
+	lretq
+lvl5:
+#endif
+
 	/* Zero EFLAGS */
 	pushq	$0
 	popfq
@@ -429,6 +471,44 @@ relocated:
 	jmp	*%rax
 
 	.code32
+#ifdef CONFIG_X86_5LEVEL
+compatible_mode:
+	/* Setup data and stack segments */
+	movl	$__KERNEL_DS, %eax
+	movl	%eax, %ds
+	movl	%eax, %ss
+
+	/* Disable paging */
+	movl	%cr0, %eax
+	btrl	$X86_CR0_PG_BIT, %eax
+	movl	%eax, %cr0
+
+	/* Point CR3 to 5-level paging */
+	leal	lvl5_pgtable(%ebx), %eax
+	movl	%eax, %cr3
+
+	/* Enable PAE and LA57 mode */
+	movl	%cr4, %eax
+	orl	$(X86_CR4_PAE | X86_CR4_LA57), %eax
+	movl	%eax, %cr4
+
+	/* Calculate address we are running at */
+	call	1f
+1:	popl	%edi
+	subl	$1b, %edi
+
+	/* Prepare stack for far return to Long Mode */
+	pushl	$__KERNEL_CS
+	leal	lvl5(%edi), %eax
+	push	%eax
+
+	/* Enable paging back */
+	movl	$(X86_CR0_PG | X86_CR0_PE), %eax
+	movl	%eax, %cr0
+
+	lret
+#endif
+
 no_longmode:
 	/* This isn't an x86-64 CPU so hang */
 1:
@@ -442,7 +522,7 @@ gdt:
 	.word	gdt_end - gdt
 	.long	gdt
 	.word	0
-	.quad	0x0000000000000000	/* NULL descriptor */
+	.quad	0x00cf9a000000ffff	/* __KERNEL32_CS */
 	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
 	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
 	.quad	0x0080890000000000	/* TS descriptor */
@@ -486,3 +566,7 @@ boot_stack_end:
 	.balign 4096
 pgtable:
 	.fill BOOT_PGT_SIZE, 1, 0
+#ifdef CONFIG_X86_5LEVEL
+lvl5_pgtable:
+	.fill PAGE_SIZE, 1, 0
+#endif
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 54c24f0a43d3..91f27ab970ef 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -9,16 +9,42 @@
  * contain the entire properly aligned running kernel image.
  *
  */
+
+/*
+ * isspace() in linux/ctype.h is expected by next_args() to filter
+ * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h,
+ * since isdigit() is implemented in both of them. Hence disable it
+ * here.
+ */
+#define BOOT_CTYPE_H
+
+/*
+ * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
+ * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
+ * which is meaningless and will cause compiling error in some cases.
+ * So do not include linux/export.h and define EXPORT_SYMBOL(sym)
+ * as empty.
+ */
+#define _LINUX_EXPORT_H
+#define EXPORT_SYMBOL(sym)
+
 #include "misc.h"
 #include "error.h"
-#include "../boot.h"
+#include "../string.h"
 
 #include <generated/compile.h>
 #include <linux/module.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
+#include <linux/ctype.h>
 #include <generated/utsrelease.h>
 
+/* Macros used by the included decompressor code below. */
+#define STATIC
+#include <linux/decompress/mm.h>
+
+extern unsigned long get_cmd_line_ptr(void);
+
 /* Simplified build-specific string for starting entropy. */
 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
 		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
@@ -62,6 +88,11 @@ struct mem_vector {
 
 static bool memmap_too_large;
 
+
+/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
+unsigned long long mem_limit = ULLONG_MAX;
+
+
 enum mem_avoid_index {
 	MEM_AVOID_ZO_RANGE = 0,
 	MEM_AVOID_INITRD,
@@ -85,49 +116,14 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
 	return true;
 }
 
-/**
- *	_memparse - Parse a string with mem suffixes into a number
- *	@ptr: Where parse begins
- *	@retptr: (output) Optional pointer to next char after parse completes
- *
- *	Parses a string into a number.  The number stored at @ptr is
- *	potentially suffixed with K, M, G, T, P, E.
- */
-static unsigned long long _memparse(const char *ptr, char **retptr)
+char *skip_spaces(const char *str)
 {
-	char *endptr;	/* Local pointer to end of parsed string */
-
-	unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
-
-	switch (*endptr) {
-	case 'E':
-	case 'e':
-		ret <<= 10;
-	case 'P':
-	case 'p':
-		ret <<= 10;
-	case 'T':
-	case 't':
-		ret <<= 10;
-	case 'G':
-	case 'g':
-		ret <<= 10;
-	case 'M':
-	case 'm':
-		ret <<= 10;
-	case 'K':
-	case 'k':
-		ret <<= 10;
-		endptr++;
-	default:
-		break;
-	}
-
-	if (retptr)
-		*retptr = endptr;
-
-	return ret;
+	while (isspace(*str))
+		++str;
+	return (char *)str;
 }
+#include "../../../../lib/ctype.c"
+#include "../../../../lib/cmdline.c"
 
 static int
 parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
@@ -142,40 +138,41 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
 		return -EINVAL;
 
 	oldp = p;
-	*size = _memparse(p, &p);
+	*size = memparse(p, &p);
 	if (p == oldp)
 		return -EINVAL;
 
 	switch (*p) {
-	case '@':
-		/* Skip this region, usable */
-		*start = 0;
-		*size = 0;
-		return 0;
 	case '#':
 	case '$':
 	case '!':
-		*start = _memparse(p + 1, &p);
+		*start = memparse(p + 1, &p);
+		return 0;
+	case '@':
+		/* memmap=nn@ss specifies usable region, should be skipped */
+		*size = 0;
+		/* Fall through */
+	default:
+		/*
+		 * If w/o offset, only size specified, memmap=nn[KMG] has the
+		 * same behaviour as mem=nn[KMG]. It limits the max address
+		 * system can use. Region above the limit should be avoided.
+		 */
+		*start = 0;
 		return 0;
 	}
 
 	return -EINVAL;
 }
 
-static void mem_avoid_memmap(void)
+static void mem_avoid_memmap(char *str)
 {
-	char arg[128];
+	static int i;
 	int rc;
-	int i;
-	char *str;
 
-	/* See if we have any memmap areas */
-	rc = cmdline_find_option("memmap", arg, sizeof(arg));
-	if (rc <= 0)
+	if (i >= MAX_MEMMAP_REGIONS)
 		return;
 
-	i = 0;
-	str = arg;
 	while (str && (i < MAX_MEMMAP_REGIONS)) {
 		int rc;
 		unsigned long long start, size;
@@ -188,9 +185,14 @@ static void mem_avoid_memmap(void)
 		if (rc < 0)
 			break;
 		str = k;
-		/* A usable region that should not be skipped */
-		if (size == 0)
+
+		if (start == 0) {
+			/* Store the specified memory limit if size > 0 */
+			if (size > 0)
+				mem_limit = size;
+
 			continue;
+		}
 
 		mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start;
 		mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size;
@@ -202,6 +204,57 @@ static void mem_avoid_memmap(void)
 		memmap_too_large = true;
 }
 
+static int handle_mem_memmap(void)
+{
+	char *args = (char *)get_cmd_line_ptr();
+	size_t len = strlen((char *)args);
+	char *tmp_cmdline;
+	char *param, *val;
+	u64 mem_size;
+
+	if (!strstr(args, "memmap=") && !strstr(args, "mem="))
+		return 0;
+
+	tmp_cmdline = malloc(len + 1);
+	if (!tmp_cmdline )
+		error("Failed to allocate space for tmp_cmdline");
+
+	memcpy(tmp_cmdline, args, len);
+	tmp_cmdline[len] = 0;
+	args = tmp_cmdline;
+
+	/* Chew leading spaces */
+	args = skip_spaces(args);
+
+	while (*args) {
+		args = next_arg(args, &param, &val);
+		/* Stop at -- */
+		if (!val && strcmp(param, "--") == 0) {
+			warn("Only '--' specified in cmdline");
+			free(tmp_cmdline);
+			return -1;
+		}
+
+		if (!strcmp(param, "memmap")) {
+			mem_avoid_memmap(val);
+		} else if (!strcmp(param, "mem")) {
+			char *p = val;
+
+			if (!strcmp(p, "nopentium"))
+				continue;
+			mem_size = memparse(p, &p);
+			if (mem_size == 0) {
+				free(tmp_cmdline);
+				return -EINVAL;
+			}
+			mem_limit = mem_size;
+		}
+	}
+
+	free(tmp_cmdline);
+	return 0;
+}
+
 /*
  * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
  * The mem_avoid array is used to store the ranges that need to be avoided
@@ -323,7 +376,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
 	/* We don't need to set a mapping for setup_data. */
 
 	/* Mark the memmap regions we need to avoid */
-	mem_avoid_memmap();
+	handle_mem_memmap();
 
 #ifdef CONFIG_X86_VERBOSE_BOOTUP
 	/* Make sure video RAM can be used. */
@@ -432,7 +485,8 @@ static void process_e820_entry(struct boot_e820_entry *entry,
 {
 	struct mem_vector region, overlap;
 	struct slot_area slot_area;
-	unsigned long start_orig;
+	unsigned long start_orig, end;
+	struct boot_e820_entry cur_entry;
 
 	/* Skip non-RAM entries. */
 	if (entry->type != E820_TYPE_RAM)
@@ -446,8 +500,15 @@ static void process_e820_entry(struct boot_e820_entry *entry,
 	if (entry->addr + entry->size < minimum)
 		return;
 
-	region.start = entry->addr;
-	region.size = entry->size;
+	/* Ignore entries above memory limit */
+	end = min(entry->size + entry->addr, mem_limit);
+	if (entry->addr >= end)
+		return;
+	cur_entry.addr = entry->addr;
+	cur_entry.size = end - entry->addr;
+
+	region.start = cur_entry.addr;
+	region.size = cur_entry.size;
 
 	/* Give up if slot area array is full. */
 	while (slot_area_index < MAX_SLOT_AREA) {
@@ -461,7 +522,7 @@ static void process_e820_entry(struct boot_e820_entry *entry,
 		region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
 
 		/* Did we raise the address above this e820 region? */
-		if (region.start > entry->addr + entry->size)
+		if (region.start > cur_entry.addr + cur_entry.size)
 			return;
 
 		/* Reduce size by any delta from the original address. */
@@ -564,9 +625,6 @@ void choose_random_location(unsigned long input,
 {
 	unsigned long random_addr, min_addr;
 
-	/* By default, keep output position unchanged. */
-	*virt_addr = *output;
-
 	if (cmdline_find_option_bool("nokaslr")) {
 		warn("KASLR disabled: 'nokaslr' on cmdline.");
 		return;
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b3c5a5f030ce..00241c815524 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -338,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 				  unsigned long output_len)
 {
 	const unsigned long kernel_total_size = VO__end - VO__text;
-	unsigned long virt_addr = (unsigned long)output;
+	unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
 
 	/* Retain x86 boot parameters pointer passed from startup_32/64. */
 	boot_params = rmode;
@@ -390,6 +390,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 #ifdef CONFIG_X86_64
 	if (heap > 0x3fffffffffffUL)
 		error("Destination address too large");
+	if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
+		error("Destination virtual address is beyond the kernel mapping area");
 #else
 	if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
 		error("Destination address too large");
@@ -397,7 +399,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 #ifndef CONFIG_RELOCATABLE
 	if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
 		error("Destination address does not match LOAD_PHYSICAL_ADDR");
-	if ((unsigned long)output != virt_addr)
+	if (virt_addr != LOAD_PHYSICAL_ADDR)
 		error("Destination virtual address changed when not relocatable");
 #endif
 
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 1c8355eadbd1..766a5211f827 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -81,8 +81,6 @@ static inline void choose_random_location(unsigned long input,
 					  unsigned long output_size,
 					  unsigned long *virt_addr)
 {
-	/* No change from existing output location. */
-	*virt_addr = *output;
 }
 #endif
 
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 56589d0a804b..28029be47fbb 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -63,14 +63,14 @@ static void *alloc_pgt_page(void *context)
 static struct alloc_pgt_data pgt_data;
 
 /* The top level page table entry pointer. */
-static unsigned long level4p;
+static unsigned long top_level_pgt;
 
 /*
  * Mapping information structure passed to kernel_ident_mapping_init().
  * Due to relocation, pointers must be assigned at run time not build time.
  */
 static struct x86_mapping_info mapping_info = {
-	.pmd_flag       = __PAGE_KERNEL_LARGE_EXEC,
+	.page_flag       = __PAGE_KERNEL_LARGE_EXEC,
 };
 
 /* Locates and clears a region for a new top level page table. */
@@ -91,9 +91,15 @@ void initialize_identity_maps(void)
 	 * If we came here via startup_32(), cr3 will be _pgtable already
 	 * and we must append to the existing area instead of entirely
 	 * overwriting it.
+	 *
+	 * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
+	 * the top-level page table is allocated separately.
+	 *
+	 * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
+	 * cases. On 4-level paging it's equal to 'top_level_pgt'.
 	 */
-	level4p = read_cr3();
-	if (level4p == (unsigned long)_pgtable) {
+	top_level_pgt = read_cr3_pa();
+	if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
 		debug_putstr("booted via startup_32()\n");
 		pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
 		pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
@@ -103,7 +109,7 @@ void initialize_identity_maps(void)
 		pgt_data.pgt_buf = _pgtable;
 		pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
 		memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
-		level4p = (unsigned long)alloc_pgt_page(&pgt_data);
+		top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
 	}
 }
 
@@ -123,7 +129,7 @@ void add_identity_map(unsigned long start, unsigned long size)
 		return;
 
 	/* Build the mapping. */
-	kernel_ident_mapping_init(&mapping_info, (pgd_t *)level4p,
+	kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt,
 				  start, end);
 }
 
@@ -134,5 +140,5 @@ void add_identity_map(unsigned long start, unsigned long size)
  */
 void finalize_identity_maps(void)
 {
-	write_cr3(level4p);
+	write_cr3(top_level_pgt);
 }
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S
index 1eb7d298b47d..15d9f74b0008 100644
--- a/arch/x86/boot/copy.S
+++ b/arch/x86/boot/copy.S
@@ -65,23 +65,3 @@ GLOBAL(copy_to_fs)
 	popw	%es
 	retl
 ENDPROC(copy_to_fs)
-
-#if 0 /* Not currently used, but can be enabled as needed */
-GLOBAL(copy_from_gs)
-	pushw	%ds
-	pushw	%gs
-	popw	%ds
-	calll	memcpy
-	popw	%ds
-	retl
-ENDPROC(copy_from_gs)
-
-GLOBAL(copy_to_gs)
-	pushw	%es
-	pushw	%gs
-	popw	%es
-	calll	memcpy
-	popw	%es
-	retl
-ENDPROC(copy_to_gs)
-#endif
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 5457b02fc050..630e3664906b 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -122,6 +122,14 @@ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int bas
 	return result;
 }
 
+long simple_strtol(const char *cp, char **endp, unsigned int base)
+{
+	if (*cp == '-')
+		return -simple_strtoull(cp + 1, endp, base);
+
+	return simple_strtoull(cp, endp, base);
+}
+
 /**
  * strlen - Find the length of a string
  * @s: The string to be sized
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
index 113588ddb43f..f274a50db5fa 100644
--- a/arch/x86/boot/string.h
+++ b/arch/x86/boot/string.h
@@ -22,6 +22,7 @@ extern int strcmp(const char *str1, const char *str2);
 extern int strncmp(const char *cs, const char *ct, size_t count);
 extern size_t strlen(const char *s);
 extern char *strstr(const char *s1, const char *s2);
+extern char *strchr(const char *s, int c);
 extern size_t strnlen(const char *s, size_t maxlen);
 extern unsigned int atou(const char *s);
 extern unsigned long long simple_strtoull(const char *cp, char **endp,
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 34b3fa2889d1..9e32d40d71bd 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,6 +2,8 @@
 # Arch-specific CryptoAPI modules.
 #
 
+OBJECT_FILES_NON_STANDARD := y
+
 avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
 				$(comma)4)$(comma)%ymm2,yes,no)
diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile
index 2f8756375df5..2e14acc3da25 100644
--- a/arch/x86/crypto/sha1-mb/Makefile
+++ b/arch/x86/crypto/sha1-mb/Makefile
@@ -2,6 +2,8 @@
 # Arch-specific CryptoAPI modules.
 #
 
+OBJECT_FILES_NON_STANDARD := y
+
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
                                 $(comma)4)$(comma)%ymm2,yes,no)
 ifeq ($(avx2_supported),yes)
diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile
index 41089e7c400c..45b4fca6c4a8 100644
--- a/arch/x86/crypto/sha256-mb/Makefile
+++ b/arch/x86/crypto/sha256-mb/Makefile
@@ -2,6 +2,8 @@
 # Arch-specific CryptoAPI modules.
 #
 
+OBJECT_FILES_NON_STANDARD := y
+
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
                                 $(comma)4)$(comma)%ymm2,yes,no)
 ifeq ($(avx2_supported),yes)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 50bc26949e9e..48ef7bb32c42 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,6 +252,23 @@ ENTRY(__switch_to_asm)
 END(__switch_to_asm)
 
 /*
+ * The unwinder expects the last frame on the stack to always be at the same
+ * offset from the end of the page, which allows it to validate the stack.
+ * Calling schedule_tail() directly would break that convention because its an
+ * asmlinkage function so its argument has to be pushed on the stack.  This
+ * wrapper creates a proper "end of stack" frame header before the call.
+ */
+ENTRY(schedule_tail_wrapper)
+	FRAME_BEGIN
+
+	pushl	%eax
+	call	schedule_tail
+	popl	%eax
+
+	FRAME_END
+	ret
+ENDPROC(schedule_tail_wrapper)
+/*
  * A newly forked process directly context switches into this address.
  *
  * eax: prev task we switched from
@@ -259,24 +276,15 @@ END(__switch_to_asm)
  * edi: kernel thread arg
  */
 ENTRY(ret_from_fork)
-	FRAME_BEGIN		/* help unwinder find end of stack */
-
-	/*
-	 * schedule_tail() is asmlinkage so we have to put its 'prev' argument
-	 * on the stack.
-	 */
-	pushl	%eax
-	call	schedule_tail
-	popl	%eax
+	call	schedule_tail_wrapper
 
 	testl	%ebx, %ebx
 	jnz	1f		/* kernel threads are uncommon */
 
 2:
 	/* When we fork, we trace the syscall return in the child, too. */
-	leal	FRAME_OFFSET(%esp), %eax
+	movl    %esp, %eax
 	call    syscall_return_slowpath
-	FRAME_END
 	jmp     restore_all
 
 	/* kernel thread */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 607d72c4a485..a9a8027a6c0e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -36,7 +36,6 @@
 #include <asm/smap.h>
 #include <asm/pgtable_types.h>
 #include <asm/export.h>
-#include <asm/frame.h>
 #include <linux/err.h>
 
 .code64
@@ -266,7 +265,8 @@ return_from_SYSCALL_64:
 	 * If width of "canonical tail" ever becomes variable, this will need
 	 * to be updated to remain correct on both old and new CPUs.
 	 *
-	 * Change top 16 bits to be the sign-extension of 47th bit
+	 * Change top bits to match most significant bit (47th or 56th bit
+	 * depending on paging mode) in the address.
 	 */
 	shl	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
 	sar	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
@@ -406,19 +406,17 @@ END(__switch_to_asm)
  * r12: kernel thread arg
  */
 ENTRY(ret_from_fork)
-	FRAME_BEGIN			/* help unwinder find end of stack */
 	movq	%rax, %rdi
-	call	schedule_tail		/* rdi: 'prev' task parameter */
+	call	schedule_tail			/* rdi: 'prev' task parameter */
 
-	testq	%rbx, %rbx		/* from kernel_thread? */
-	jnz	1f			/* kernel threads are uncommon */
+	testq	%rbx, %rbx			/* from kernel_thread? */
+	jnz	1f				/* kernel threads are uncommon */
 
 2:
-	leaq	FRAME_OFFSET(%rsp),%rdi	/* pt_regs pointer */
+	movq	%rsp, %rdi
 	call	syscall_return_slowpath	/* returns with IRQs disabled */
 	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
 	SWAPGS
-	FRAME_END
 	jmp	restore_regs_and_iret
 
 1:
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 580b60f5ac83..2de0dd73830a 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1750,6 +1750,8 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
 	return ret;
 }
 
+static struct attribute_group x86_pmu_attr_group;
+
 static int __init init_hw_perf_events(void)
 {
 	struct x86_pmu_quirk *quirk;
@@ -1813,6 +1815,14 @@ static int __init init_hw_perf_events(void)
 			x86_pmu_events_group.attrs = tmp;
 	}
 
+	if (x86_pmu.attrs) {
+		struct attribute **tmp;
+
+		tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs);
+		if (!WARN_ON(!tmp))
+			x86_pmu_attr_group.attrs = tmp;
+	}
+
 	pr_info("... version:                %d\n",     x86_pmu.version);
 	pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
 	pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
@@ -2101,8 +2111,7 @@ static int x86_pmu_event_init(struct perf_event *event)
 
 static void refresh_pce(void *ignored)
 {
-	if (current->active_mm)
-		load_mm_cr4(current->active_mm);
+	load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
 }
 
 static void x86_pmu_event_mapped(struct perf_event *event)
@@ -2255,7 +2264,7 @@ static struct pmu pmu = {
 void arch_perf_update_userpage(struct perf_event *event,
 			       struct perf_event_mmap_page *userpg, u64 now)
 {
-	struct cyc2ns_data *data;
+	struct cyc2ns_data data;
 	u64 offset;
 
 	userpg->cap_user_time = 0;
@@ -2267,17 +2276,17 @@ void arch_perf_update_userpage(struct perf_event *event,
 	if (!using_native_sched_clock() || !sched_clock_stable())
 		return;
 
-	data = cyc2ns_read_begin();
+	cyc2ns_read_begin(&data);
 
-	offset = data->cyc2ns_offset + __sched_clock_offset;
+	offset = data.cyc2ns_offset + __sched_clock_offset;
 
 	/*
 	 * Internal timekeeping for enabled/running/stopped times
 	 * is always in the local_clock domain.
 	 */
 	userpg->cap_user_time = 1;
-	userpg->time_mult = data->cyc2ns_mul;
-	userpg->time_shift = data->cyc2ns_shift;
+	userpg->time_mult = data.cyc2ns_mul;
+	userpg->time_shift = data.cyc2ns_shift;
 	userpg->time_offset = offset - now;
 
 	/*
@@ -2289,7 +2298,7 @@ void arch_perf_update_userpage(struct perf_event *event,
 		userpg->time_zero = offset;
 	}
 
-	cyc2ns_read_end(data);
+	cyc2ns_read_end();
 }
 
 void
@@ -2334,7 +2343,7 @@ static unsigned long get_segment_base(unsigned int segment)
 
 		/* IRQs are off, so this synchronizes with smp_store_release */
 		ldt = lockless_dereference(current->active_mm->context.ldt);
-		if (!ldt || idx > ldt->size)
+		if (!ldt || idx > ldt->nr_entries)
 			return 0;
 
 		desc = &ldt->entries[idx];
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a6d91d4e37a1..31acf2a98394 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -431,11 +431,11 @@ static __initconst const u64 skl_hw_cache_event_ids
  [ C(DTLB) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x81d0,	/* MEM_INST_RETIRED.ALL_LOADS */
-		[ C(RESULT_MISS)   ] = 0x608,	/* DTLB_LOAD_MISSES.WALK_COMPLETED */
+		[ C(RESULT_MISS)   ] = 0xe08,	/* DTLB_LOAD_MISSES.WALK_COMPLETED */
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = 0x82d0,	/* MEM_INST_RETIRED.ALL_STORES */
-		[ C(RESULT_MISS)   ] = 0x649,	/* DTLB_STORE_MISSES.WALK_COMPLETED */
+		[ C(RESULT_MISS)   ] = 0xe49,	/* DTLB_STORE_MISSES.WALK_COMPLETED */
 	},
 	[ C(OP_PREFETCH) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0,
@@ -3160,6 +3160,19 @@ err:
 	return -ENOMEM;
 }
 
+static void flip_smm_bit(void *data)
+{
+	unsigned long set = *(unsigned long *)data;
+
+	if (set > 0) {
+		msr_set_bit(MSR_IA32_DEBUGCTLMSR,
+			    DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
+	} else {
+		msr_clear_bit(MSR_IA32_DEBUGCTLMSR,
+			      DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
+	}
+}
+
 static void intel_pmu_cpu_starting(int cpu)
 {
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
@@ -3174,6 +3187,8 @@ static void intel_pmu_cpu_starting(int cpu)
 
 	cpuc->lbr_sel = NULL;
 
+	flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
+
 	if (!cpuc->shared_regs)
 		return;
 
@@ -3595,6 +3610,52 @@ static struct attribute *hsw_events_attrs[] = {
 	NULL
 };
 
+static ssize_t freeze_on_smi_show(struct device *cdev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	return sprintf(buf, "%lu\n", x86_pmu.attr_freeze_on_smi);
+}
+
+static DEFINE_MUTEX(freeze_on_smi_mutex);
+
+static ssize_t freeze_on_smi_store(struct device *cdev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	unsigned long val;
+	ssize_t ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	if (val > 1)
+		return -EINVAL;
+
+	mutex_lock(&freeze_on_smi_mutex);
+
+	if (x86_pmu.attr_freeze_on_smi == val)
+		goto done;
+
+	x86_pmu.attr_freeze_on_smi = val;
+
+	get_online_cpus();
+	on_each_cpu(flip_smm_bit, &val, 1);
+	put_online_cpus();
+done:
+	mutex_unlock(&freeze_on_smi_mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(freeze_on_smi);
+
+static struct attribute *intel_pmu_attrs[] = {
+	&dev_attr_freeze_on_smi.attr,
+	NULL,
+};
+
 __init int intel_pmu_init(void)
 {
 	union cpuid10_edx edx;
@@ -3641,6 +3702,8 @@ __init int intel_pmu_init(void)
 
 	x86_pmu.max_pebs_events		= min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
 
+
+	x86_pmu.attrs			= intel_pmu_attrs;
 	/*
 	 * Quirk: v2 perfmon does not report fixed-purpose events, so
 	 * assume at least 3 events, when not running in a hypervisor:
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index f924629836a8..eb261656a320 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -18,7 +18,7 @@ enum {
 	LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_TIME,
 };
 
-static enum {
+static const enum {
 	LBR_EIP_FLAGS		= 1,
 	LBR_TSX			= 2,
 } lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
@@ -287,7 +287,7 @@ inline u64 lbr_from_signext_quirk_wr(u64 val)
 /*
  * If quirk is needed, ensure sign extension is 61 bits:
  */
-u64 lbr_from_signext_quirk_rd(u64 val)
+static u64 lbr_from_signext_quirk_rd(u64 val)
 {
 	if (static_branch_unlikely(&lbr_from_quirk_key)) {
 		/*
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 9d05c7e67f60..a45e2114a846 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -761,7 +761,7 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE,   hsw_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E,   hsw_rapl_init),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,	  hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,	  hsx_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 758c1aa5009d..44ec523287f6 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1170,7 +1170,7 @@ static int uncore_event_cpu_online(unsigned int cpu)
 		pmu = type->pmus;
 		for (i = 0; i < type->num_boxes; i++, pmu++) {
 			box = pmu->boxes[pkg];
-			if (!box && atomic_inc_return(&box->refcnt) == 1)
+			if (box && atomic_inc_return(&box->refcnt) == 1)
 				uncore_box_init(box);
 		}
 	}
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index be3d36254040..53728eea1bed 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -562,6 +562,9 @@ struct x86_pmu {
 	ssize_t		(*events_sysfs_show)(char *page, u64 config);
 	struct attribute **cpu_events;
 
+	unsigned long	attr_freeze_on_smi;
+	struct attribute **attrs;
+
 	/*
 	 * CPU Hotplug hooks
 	 */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 7acb51c49fec..7a9df3beb89b 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -32,6 +32,7 @@
 #define _ASM_ADD	__ASM_SIZE(add)
 #define _ASM_SUB	__ASM_SIZE(sub)
 #define _ASM_XADD	__ASM_SIZE(xadd)
+#define _ASM_MUL	__ASM_SIZE(mul)
 
 #define _ASM_AX		__ASM_REG(ax)
 #define _ASM_BX		__ASM_REG(bx)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index caa5798c92f4..33380b871463 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -246,19 +246,6 @@ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return c;
 }
 
-/**
- * atomic_inc_short - increment of a short integer
- * @v: pointer to type int
- *
- * Atomically adds 1 to @v
- * Returns the new value of @u
- */
-static __always_inline short int atomic_inc_short(short int *v)
-{
-	asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
-	return *v;
-}
-
 #ifdef CONFIG_X86_32
 # include <asm/atomic64_32.h>
 #else
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 2f77bcefe6b4..d2ff779f347e 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -74,7 +74,7 @@ struct efi_scratch {
 	__kernel_fpu_begin();						\
 									\
 	if (efi_scratch.use_pgd) {					\
-		efi_scratch.prev_cr3 = read_cr3();			\
+		efi_scratch.prev_cr3 = __read_cr3();			\
 		write_cr3((unsigned long)efi_scratch.efi_pgt);		\
 		__flush_tlb_all();					\
 	}								\
diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index b8ad261d11dc..c66d19e3c23e 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -29,6 +29,7 @@ struct pt_regs;
 	} while (0)
 
 extern int fixup_exception(struct pt_regs *regs, int trapnr);
+extern int fixup_bug(struct pt_regs *regs, int trapnr);
 extern bool ex_has_fault_handler(unsigned long ip);
 extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
 
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 59405a248fc2..9b76cd331990 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -22,8 +22,8 @@ typedef struct {
 #ifdef CONFIG_SMP
 	unsigned int irq_resched_count;
 	unsigned int irq_call_count;
-	unsigned int irq_tlb_count;
 #endif
+	unsigned int irq_tlb_count;
 #ifdef CONFIG_X86_THERMAL_VECTOR
 	unsigned int irq_thermal_count;
 #endif
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 737da62bfeb0..474eb8c66fee 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -4,8 +4,9 @@
 struct x86_mapping_info {
 	void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
 	void *context;			 /* context for alloc_pgt_page */
-	unsigned long pmd_flag;		 /* page flag for PMD entry */
+	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
 	unsigned long offset;		 /* ident mapping offset */
+	bool direct_gbpages;		 /* PUD level 1GB page support */
 };
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 055962615779..722d0e568863 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
 
 	bool perm_ok; /* do not check permissions if true */
 	bool ud;	/* inject an #UD if host doesn't support insn */
+	bool tf;	/* TF value before instruction (after for syscall/sysret) */
 
 	bool have_exception;
 	struct x86_exception exception;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f5bddf92faba..695605eb1dfb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -43,7 +43,7 @@
 #define KVM_PRIVATE_MEM_SLOTS 3
 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 
-#define KVM_HALT_POLL_NS_DEFAULT 400000
+#define KVM_HALT_POLL_NS_DEFAULT 200000
 
 #define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
 
@@ -1020,6 +1020,8 @@ struct kvm_x86_ops {
 	void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
 					   struct kvm_memory_slot *slot,
 					   gfn_t offset, unsigned long mask);
+	int (*write_log_dirty)(struct kvm_vcpu *vcpu);
+
 	/* pmu operations of sub-arch */
 	const struct kvm_pmu_ops *pmu_ops;
 
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4fd5195deed0..3f9a3d2a5209 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -266,6 +266,7 @@ static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *s
 #endif
 
 int mce_available(struct cpuinfo_x86 *c);
+bool mce_is_memory_error(struct mce *m);
 
 DECLARE_PER_CPU(unsigned, mce_exception_count);
 DECLARE_PER_CPU(unsigned, mce_poll_count);
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index f9813b6d8b80..79b647a7ebd0 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -37,12 +37,6 @@ typedef struct {
 #endif
 } mm_context_t;
 
-#ifdef CONFIG_SMP
 void leave_mm(int cpu);
-#else
-static inline void leave_mm(int cpu)
-{
-}
-#endif
 
 #endif /* _ASM_X86_MMU_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 68b329d77b3a..ecfcb6643c9b 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -47,7 +47,7 @@ struct ldt_struct {
 	 * allocations, but it's not worth trying to optimize.
 	 */
 	struct desc_struct *entries;
-	unsigned int size;
+	unsigned int nr_entries;
 };
 
 /*
@@ -87,22 +87,46 @@ static inline void load_mm_ldt(struct mm_struct *mm)
 	 */
 
 	if (unlikely(ldt))
-		set_ldt(ldt->entries, ldt->size);
+		set_ldt(ldt->entries, ldt->nr_entries);
 	else
 		clear_LDT();
 #else
 	clear_LDT();
 #endif
+}
+
+static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
+{
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+	/*
+	 * Load the LDT if either the old or new mm had an LDT.
+	 *
+	 * An mm will never go from having an LDT to not having an LDT.  Two
+	 * mms never share an LDT, so we don't gain anything by checking to
+	 * see whether the LDT changed.  There's also no guarantee that
+	 * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
+	 * then prev->context.ldt will also be non-NULL.
+	 *
+	 * If we really cared, we could optimize the case where prev == next
+	 * and we're exiting lazy mode.  Most of the time, if this happens,
+	 * we don't actually need to reload LDTR, but modify_ldt() is mostly
+	 * used by legacy code and emulators where we don't need this level of
+	 * performance.
+	 *
+	 * This uses | instead of || because it generates better code.
+	 */
+	if (unlikely((unsigned long)prev->context.ldt |
+		     (unsigned long)next->context.ldt))
+		load_mm_ldt(next);
+#endif
 
 	DEBUG_LOCKS_WARN_ON(preemptible());
 }
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
-#ifdef CONFIG_SMP
 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
-#endif
 }
 
 static inline int init_new_context(struct task_struct *tsk,
@@ -220,18 +244,6 @@ static inline int vma_pkey(struct vm_area_struct *vma)
 }
 #endif
 
-static inline bool __pkru_allows_pkey(u16 pkey, bool write)
-{
-	u32 pkru = read_pkru();
-
-	if (!__pkru_allows_read(pkru, pkey))
-		return false;
-	if (write && !__pkru_allows_write(pkru, pkey))
-		return false;
-
-	return true;
-}
-
 /*
  * We only want to enforce protection keys on the current process
  * because we effectively have no access to PKRU for other
@@ -268,4 +280,23 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 	return __pkru_allows_pkey(vma_pkey(vma), write);
 }
 
+
+/*
+ * This can be used from process context to figure out what the value of
+ * CR3 is without needing to do a (slow) __read_cr3().
+ *
+ * It's intended to be used for code like KVM that sneakily changes CR3
+ * and needs to restore it.  It needs to be used very carefully.
+ */
+static inline unsigned long __get_current_cr3_fast(void)
+{
+	unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
+
+	/* For now, be very restrictive about when this can be called. */
+	VM_WARN_ON(in_nmi() || !in_atomic());
+
+	VM_BUG_ON(cr3 != __read_cr3());
+	return cr3;
+}
+
 #endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index fba100713924..d5acc27ed1cc 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -2,8 +2,7 @@
 #define _ASM_X86_MSHYPER_H
 
 #include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
+#include <linux/atomic.h>
 #include <asm/hyperv.h>
 
 /*
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 673f9ac50f6d..18b162322eff 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -137,6 +137,8 @@
 #define DEBUGCTLMSR_BTS_OFF_OS		(1UL <<  9)
 #define DEBUGCTLMSR_BTS_OFF_USR		(1UL << 10)
 #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI	(1UL << 11)
+#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT	14
+#define DEBUGCTLMSR_FREEZE_IN_SMM	(1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
 
 #define MSR_PEBS_FRONTEND		0x000003f7
 
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 55fa56fe4e45..9ccac1926587 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -61,7 +61,7 @@ static inline void write_cr2(unsigned long x)
 	PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
 }
 
-static inline unsigned long read_cr3(void)
+static inline unsigned long __read_cr3(void)
 {
 	return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
 }
@@ -118,7 +118,7 @@ static inline u64 paravirt_read_msr(unsigned msr)
 static inline void paravirt_write_msr(unsigned msr,
 				      unsigned low, unsigned high)
 {
-	return PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high);
+	PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high);
 }
 
 static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
@@ -312,11 +312,9 @@ static inline void __flush_tlb_single(unsigned long addr)
 }
 
 static inline void flush_tlb_others(const struct cpumask *cpumask,
-				    struct mm_struct *mm,
-				    unsigned long start,
-				    unsigned long end)
+				    const struct flush_tlb_info *info)
 {
-	PVOP_VCALL4(pv_mmu_ops.flush_tlb_others, cpumask, mm, start, end);
+	PVOP_VCALL2(pv_mmu_ops.flush_tlb_others, cpumask, info);
 }
 
 static inline int paravirt_pgd_alloc(struct mm_struct *mm)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 7465d6fe336f..cb976bab6299 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -51,6 +51,7 @@ struct mm_struct;
 struct desc_struct;
 struct task_struct;
 struct cpumask;
+struct flush_tlb_info;
 
 /*
  * Wrapper type for pointers to code which uses the non-standard
@@ -223,9 +224,7 @@ struct pv_mmu_ops {
 	void (*flush_tlb_kernel)(void);
 	void (*flush_tlb_single)(unsigned long addr);
 	void (*flush_tlb_others)(const struct cpumask *cpus,
-				 struct mm_struct *mm,
-				 unsigned long start,
-				 unsigned long end);
+				 const struct flush_tlb_info *info);
 
 	/* Hooks for allocating and freeing a pagetable top-level */
 	int  (*pgd_alloc)(struct mm_struct *mm);
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 50d35e3185f5..c8821bab938f 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -212,4 +212,51 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
 #define __pte_to_swp_entry(pte)		((swp_entry_t){ (pte).pte_high })
 #define __swp_entry_to_pte(x)		((pte_t){ { .pte_high = (x).val } })
 
+#define gup_get_pte gup_get_pte
+/*
+ * WARNING: only to be used in the get_user_pages_fast() implementation.
+ *
+ * With get_user_pages_fast(), we walk down the pagetables without taking
+ * any locks.  For this we would like to load the pointers atomically,
+ * but that is not possible (without expensive cmpxchg8b) on PAE.  What
+ * we do have is the guarantee that a PTE will only either go from not
+ * present to present, or present to not present or both -- it will not
+ * switch to a completely different present page without a TLB flush in
+ * between; something that we are blocking by holding interrupts off.
+ *
+ * Setting ptes from not present to present goes:
+ *
+ *   ptep->pte_high = h;
+ *   smp_wmb();
+ *   ptep->pte_low = l;
+ *
+ * And present to not present goes:
+ *
+ *   ptep->pte_low = 0;
+ *   smp_wmb();
+ *   ptep->pte_high = 0;
+ *
+ * We must ensure here that the load of pte_low sees 'l' iff pte_high
+ * sees 'h'. We load pte_high *after* loading pte_low, which ensures we
+ * don't see an older value of pte_high.  *Then* we recheck pte_low,
+ * which ensures that we haven't picked up a changed pte high. We might
+ * have gotten rubbish values from pte_low and pte_high, but we are
+ * guaranteed that pte_low will not have the present bit set *unless*
+ * it is 'l'. Because get_user_pages_fast() only operates on present ptes
+ * we're safe.
+ */
+static inline pte_t gup_get_pte(pte_t *ptep)
+{
+	pte_t pte;
+
+	do {
+		pte.pte_low = ptep->pte_low;
+		smp_rmb();
+		pte.pte_high = ptep->pte_high;
+		smp_rmb();
+	} while (unlikely(pte.pte_low != ptep->pte_low));
+
+	return pte;
+}
+
 #endif /* _ASM_X86_PGTABLE_3LEVEL_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index f5af95a0c6b8..77037b6f1caa 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -244,6 +244,11 @@ static inline int pud_devmap(pud_t pud)
 	return 0;
 }
 #endif
+
+static inline int pgd_devmap(pgd_t pgd)
+{
+	return 0;
+}
 #endif
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
@@ -917,7 +922,7 @@ extern pgd_t trampoline_pgd_entry;
 static inline void __meminit init_trampoline_default(void)
 {
 	/* Default trampoline pgd value */
-	trampoline_pgd_entry = init_level4_pgt[pgd_index(__PAGE_OFFSET)];
+	trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
 }
 # ifdef CONFIG_RANDOMIZE_MEMORY
 void __meminit init_trampoline(void);
@@ -1185,6 +1190,54 @@ static inline u16 pte_flags_pkey(unsigned long pte_flags)
 #endif
 }
 
+static inline bool __pkru_allows_pkey(u16 pkey, bool write)
+{
+	u32 pkru = read_pkru();
+
+	if (!__pkru_allows_read(pkru, pkey))
+		return false;
+	if (write && !__pkru_allows_write(pkru, pkey))
+		return false;
+
+	return true;
+}
+
+/*
+ * 'pteval' can come from a PTE, PMD or PUD.  We only check
+ * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the
+ * same value on all 3 types.
+ */
+static inline bool __pte_access_permitted(unsigned long pteval, bool write)
+{
+	unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
+
+	if (write)
+		need_pte_bits |= _PAGE_RW;
+
+	if ((pteval & need_pte_bits) != need_pte_bits)
+		return 0;
+
+	return __pkru_allows_pkey(pte_flags_pkey(pteval), write);
+}
+
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+	return __pte_access_permitted(pte_val(pte), write);
+}
+
+#define pmd_access_permitted pmd_access_permitted
+static inline bool pmd_access_permitted(pmd_t pmd, bool write)
+{
+	return __pte_access_permitted(pmd_val(pmd), write);
+}
+
+#define pud_access_permitted pud_access_permitted
+static inline bool pud_access_permitted(pud_t pud, bool write)
+{
+	return __pte_access_permitted(pud_val(pud), write);
+}
+
 #include <asm-generic/pgtable.h>
 #endif	/* __ASSEMBLY__ */
 
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 9991224f6238..2160c1fee920 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -14,15 +14,17 @@
 #include <linux/bitops.h>
 #include <linux/threads.h>
 
+extern p4d_t level4_kernel_pgt[512];
+extern p4d_t level4_ident_pgt[512];
 extern pud_t level3_kernel_pgt[512];
 extern pud_t level3_ident_pgt[512];
 extern pmd_t level2_kernel_pgt[512];
 extern pmd_t level2_fixmap_pgt[512];
 extern pmd_t level2_ident_pgt[512];
 extern pte_t level1_fixmap_pgt[512];
-extern pgd_t init_level4_pgt[];
+extern pgd_t init_top_pgt[];
 
-#define swapper_pg_dir init_level4_pgt
+#define swapper_pg_dir init_top_pgt
 
 extern void paging_init(void);
 
@@ -227,6 +229,20 @@ extern void cleanup_highmap(void);
 extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
 extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
 
-#endif /* !__ASSEMBLY__ */
+#define gup_fast_permitted gup_fast_permitted
+static inline bool gup_fast_permitted(unsigned long start, int nr_pages,
+		int write)
+{
+	unsigned long len, end;
+
+	len = (unsigned long)nr_pages << PAGE_SHIFT;
+	end = start + len;
+	if (end < start)
+		return false;
+	if (end >> __VIRTUAL_MASK_SHIFT)
+		return false;
+	return true;
+}
 
+#endif /* !__ASSEMBLY__ */
 #endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index d5a22bac9988..0ff8fe71b255 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -98,7 +98,7 @@ static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
 
 		if (bytes < 8) {
 			if (!IS_ALIGNED(dest, 4) || (bytes != 4))
-				arch_wb_cache_pmem(addr, 1);
+				arch_wb_cache_pmem(addr, bytes);
 		} else {
 			if (!IS_ALIGNED(dest, 8)) {
 				dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 39fb618e2211..79aa2f98398d 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -8,4 +8,40 @@
 #else
 #define X86_VM_MASK	0 /* No VM86 support */
 #endif
+
+/*
+ * CR3's layout varies depending on several things.
+ *
+ * If CR4.PCIDE is set (64-bit only), then CR3[11:0] is the address space ID.
+ * If PAE is enabled, then CR3[11:5] is part of the PDPT address
+ * (i.e. it's 32-byte aligned, not page-aligned) and CR3[4:0] is ignored.
+ * Otherwise (non-PAE, non-PCID), CR3[3] is PWT, CR3[4] is PCD, and
+ * CR3[2:0] and CR3[11:5] are ignored.
+ *
+ * In all cases, Linux puts zeros in the low ignored bits and in PWT and PCD.
+ *
+ * CR3[63] is always read as zero.  If CR4.PCIDE is set, then CR3[63] may be
+ * written as 1 to prevent the write to CR3 from flushing the TLB.
+ *
+ * On systems with SME, one bit (in a variable position!) is stolen to indicate
+ * that the top-level paging structure is encrypted.
+ *
+ * All of the remaining bits indicate the physical address of the top-level
+ * paging structure.
+ *
+ * CR3_ADDR_MASK is the mask used by read_cr3_pa().
+ */
+#ifdef CONFIG_X86_64
+/* Mask off the address space ID bits. */
+#define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull
+#define CR3_PCID_MASK 0xFFFull
+#else
+/*
+ * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
+ * a tiny bit of code size by setting all the bits.
+ */
+#define CR3_ADDR_MASK 0xFFFFFFFFull
+#define CR3_PCID_MASK 0ull
+#endif
+
 #endif /* _ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3cada998a402..2e1696294af5 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -231,6 +231,14 @@ native_cpuid_reg(ebx)
 native_cpuid_reg(ecx)
 native_cpuid_reg(edx)
 
+/*
+ * Friendlier CR3 helpers.
+ */
+static inline unsigned long read_cr3_pa(void)
+{
+	return __read_cr3() & CR3_ADDR_MASK;
+}
+
 static inline void load_cr3(pgd_t *pgdir)
 {
 	write_cr3(__pa(pgdir));
@@ -860,8 +868,6 @@ extern unsigned long KSTK_ESP(struct task_struct *task);
 
 #endif /* CONFIG_X86_64 */
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 					       unsigned long new_sp);
 
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 12af3e35edfa..9efaabf5b54b 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -39,7 +39,7 @@ static inline void native_write_cr2(unsigned long val)
 	asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
 }
 
-static inline unsigned long native_read_cr3(void)
+static inline unsigned long __native_read_cr3(void)
 {
 	unsigned long val;
 	asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
@@ -159,9 +159,13 @@ static inline void write_cr2(unsigned long x)
 	native_write_cr2(x);
 }
 
-static inline unsigned long read_cr3(void)
+/*
+ * Careful!  CR3 contains more than just an address.  You probably want
+ * read_cr3_pa() instead.
+ */
+static inline unsigned long __read_cr3(void)
 {
-	return native_read_cr3();
+	return __native_read_cr3();
 }
 
 static inline void write_cr3(unsigned long x)
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 27e9f9d769b8..2016962103df 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -29,11 +29,9 @@ struct cyc2ns_data {
 	u32 cyc2ns_mul;
 	u32 cyc2ns_shift;
 	u64 cyc2ns_offset;
-	u32 __count;
-	/* u32 hole */
-}; /* 24 bytes -- do not grow */
+}; /* 16 bytes */
 
-extern struct cyc2ns_data *cyc2ns_read_begin(void);
-extern void cyc2ns_read_end(struct cyc2ns_data *);
+extern void cyc2ns_read_begin(struct cyc2ns_data *);
+extern void cyc2ns_read_end(void);
 
 #endif /* _ASM_X86_TIMER_H */
diff --git a/arch/x86/include/asm/tlbbatch.h b/arch/x86/include/asm/tlbbatch.h
new file mode 100644
index 000000000000..f4a6ff352a0e
--- /dev/null
+++ b/arch/x86/include/asm/tlbbatch.h
@@ -0,0 +1,14 @@
+#ifndef _ARCH_X86_TLBBATCH_H
+#define _ARCH_X86_TLBBATCH_H
+
+#include <linux/cpumask.h>
+
+struct arch_tlbflush_unmap_batch {
+	/*
+	 * Each bit set is a CPU that potentially has a TLB entry for one of
+	 * the PFNs being flushed..
+	 */
+	struct cpumask cpumask;
+};
+
+#endif /* _ARCH_X86_TLBBATCH_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6ed9ea469b48..50ea3482e1d1 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -7,6 +7,7 @@
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
+#include <asm/smp.h>
 
 static inline void __invpcid(unsigned long pcid, unsigned long addr,
 			     unsigned long type)
@@ -65,10 +66,14 @@ static inline void invpcid_flush_all_nonglobals(void)
 #endif
 
 struct tlb_state {
-#ifdef CONFIG_SMP
-	struct mm_struct *active_mm;
+	/*
+	 * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts
+	 * are on.  This means that it may not match current->active_mm,
+	 * which will contain the previous user mm when we're in lazy TLB
+	 * mode even if we've already switched back to swapper_pg_dir.
+	 */
+	struct mm_struct *loaded_mm;
 	int state;
-#endif
 
 	/*
 	 * Access to this CR4 shadow and to H/W CR4 is protected by
@@ -151,7 +156,7 @@ static inline void __native_flush_tlb(void)
 	 * back:
 	 */
 	preempt_disable();
-	native_write_cr3(native_read_cr3());
+	native_write_cr3(__native_read_cr3());
 	preempt_enable();
 }
 
@@ -220,84 +225,16 @@ static inline void __flush_tlb_one(unsigned long addr)
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_others(cpumask, mm, start, end) flushes TLBs on other cpus
+ *  - flush_tlb_others(cpumask, info) flushes TLBs on other cpus
  *
  * ..but the i386 has somewhat limited tlb flushing capabilities,
  * and page-granular flushes are available only on i486 and up.
  */
-
-#ifndef CONFIG_SMP
-
-/* "_up" is for UniProcessor.
- *
- * This is a helper for other header functions.  *Not* intended to be called
- * directly.  All global TLB flushes need to either call this, or to bump the
- * vm statistics themselves.
- */
-static inline void __flush_tlb_up(void)
-{
-	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
-	__flush_tlb();
-}
-
-static inline void flush_tlb_all(void)
-{
-	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
-	__flush_tlb_all();
-}
-
-static inline void local_flush_tlb(void)
-{
-	__flush_tlb_up();
-}
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-	if (mm == current->active_mm)
-		__flush_tlb_up();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
-				  unsigned long addr)
-{
-	if (vma->vm_mm == current->active_mm)
-		__flush_tlb_one(addr);
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end)
-{
-	if (vma->vm_mm == current->active_mm)
-		__flush_tlb_up();
-}
-
-static inline void flush_tlb_mm_range(struct mm_struct *mm,
-	   unsigned long start, unsigned long end, unsigned long vmflag)
-{
-	if (mm == current->active_mm)
-		__flush_tlb_up();
-}
-
-static inline void native_flush_tlb_others(const struct cpumask *cpumask,
-					   struct mm_struct *mm,
-					   unsigned long start,
-					   unsigned long end)
-{
-}
-
-static inline void reset_lazy_tlbstate(void)
-{
-}
-
-static inline void flush_tlb_kernel_range(unsigned long start,
-					  unsigned long end)
-{
-	flush_tlb_all();
-}
-
-#else  /* SMP */
-
-#include <asm/smp.h>
+struct flush_tlb_info {
+	struct mm_struct *mm;
+	unsigned long start;
+	unsigned long end;
+};
 
 #define local_flush_tlb() __flush_tlb()
 
@@ -307,29 +244,32 @@ static inline void flush_tlb_kernel_range(unsigned long start,
 		flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
 
 extern void flush_tlb_all(void);
-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
 extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 				unsigned long end, unsigned long vmflag);
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
+static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
+{
+	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
+}
+
 void native_flush_tlb_others(const struct cpumask *cpumask,
-				struct mm_struct *mm,
-				unsigned long start, unsigned long end);
+			     const struct flush_tlb_info *info);
 
 #define TLBSTATE_OK	1
 #define TLBSTATE_LAZY	2
 
-static inline void reset_lazy_tlbstate(void)
+static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
+					struct mm_struct *mm)
 {
-	this_cpu_write(cpu_tlbstate.state, 0);
-	this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
+	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
 }
 
-#endif	/* SMP */
+extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
 
 #ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, mm, start, end)	\
-	native_flush_tlb_others(mask, mm, start, end)
+#define flush_tlb_others(mask, info)	\
+	native_flush_tlb_others(mask, info)
 #endif
 
 #endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 68766b276d9e..a059aac9e937 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -319,10 +319,10 @@ do {									\
 #define __get_user_asm_u64(x, ptr, retval, errret)			\
 ({									\
 	__typeof__(ptr) __ptr = (ptr);					\
-	asm volatile(ASM_STAC "\n"					\
+	asm volatile("\n"					\
 		     "1:	movl %2,%%eax\n"			\
 		     "2:	movl %3,%%edx\n"			\
-		     "3: " ASM_CLAC "\n"				\
+		     "3:\n"				\
 		     ".section .fixup,\"ax\"\n"				\
 		     "4:	mov %4,%0\n"				\
 		     "	xorl %%eax,%%eax\n"				\
@@ -331,7 +331,7 @@ do {									\
 		     ".previous\n"					\
 		     _ASM_EXTABLE(1b, 4b)				\
 		     _ASM_EXTABLE(2b, 4b)				\
-		     : "=r" (retval), "=A"(x)				\
+		     : "=r" (retval), "=&A"(x)				\
 		     : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1),	\
 		       "i" (errret), "0" (retval));			\
 })
@@ -703,14 +703,15 @@ extern struct movsl_mask {
 #define unsafe_put_user(x, ptr, err_label)					\
 do {										\
 	int __pu_err;								\
-	__put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT);		\
+	__typeof__(*(ptr)) __pu_val = (x);					\
+	__put_user_size(__pu_val, (ptr), sizeof(*(ptr)), __pu_err, -EFAULT);	\
 	if (unlikely(__pu_err)) goto err_label;					\
 } while (0)
 
 #define unsafe_get_user(x, ptr, err_label)					\
 do {										\
 	int __gu_err;								\
-	unsigned long __gu_val;							\
+	__inttype(*(ptr)) __gu_val;						\
 	__get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT);	\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;				\
 	if (unlikely(__gu_err)) goto err_label;					\
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 6686820feae9..b5a32231abd8 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_UV_UV_H
 #define _ASM_X86_UV_UV_H
 
+#include <asm/tlbflush.h>
+
 enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
 
 struct cpumask;
@@ -15,10 +17,7 @@ extern void uv_cpu_init(void);
 extern void uv_nmi_init(void);
 extern void uv_system_init(void);
 extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
-						 struct mm_struct *mm,
-						 unsigned long start,
-						 unsigned long end,
-						 unsigned int cpu);
+						 const struct flush_tlb_info *info);
 
 #else	/* X86_UV */
 
@@ -28,8 +27,8 @@ static inline int is_uv_hubless(void)	{ return 0; }
 static inline void uv_cpu_init(void)	{ }
 static inline void uv_system_init(void)	{ }
 static inline const struct cpumask *
-uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
-		    unsigned long start, unsigned long end, unsigned int cpu)
+uv_flush_tlb_others(const struct cpumask *cpumask,
+		    const struct flush_tlb_info *info)
 { return cpumask; }
 
 #endif	/* X86_UV */
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index 3dec769cadf7..83b6e9a0dce4 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -4,62 +4,3 @@ include include/uapi/asm-generic/Kbuild.asm
 genhdr-y += unistd_32.h
 genhdr-y += unistd_64.h
 genhdr-y += unistd_x32.h
-header-y += a.out.h
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += boot.h
-header-y += bootparam.h
-header-y += byteorder.h
-header-y += debugreg.h
-header-y += e820.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += hw_breakpoint.h
-header-y += hyperv.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += ist.h
-header-y += kvm.h
-header-y += kvm_para.h
-header-y += kvm_perf.h
-header-y += ldt.h
-header-y += mce.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += msr-index.h
-header-y += msr.h
-header-y += mtrr.h
-header-y += param.h
-header-y += perf_regs.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += posix_types_32.h
-header-y += posix_types_64.h
-header-y += posix_types_x32.h
-header-y += prctl.h
-header-y += processor-flags.h
-header-y += ptrace-abi.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += sigcontext32.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += svm.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += ucontext.h
-header-y += unistd.h
-header-y += vm86.h
-header-y += vmx.h
-header-y += vsyscall.h
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 432df4b1baec..f4fef5a24ebd 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -34,16 +34,10 @@
 #define HV_X64_MSR_REFERENCE_TSC		0x40000021
 
 /*
- * There is a single feature flag that signifies the presence of the MSR
- * that can be used to retrieve both the local APIC Timer frequency as
- * well as the TSC frequency.
+ * There is a single feature flag that signifies if the partition has access
+ * to MSRs with local APIC and TSC frequencies.
  */
-
-/* Local APIC timer frequency MSR (HV_X64_MSR_APIC_FREQUENCY) is available */
-#define HV_X64_MSR_APIC_FREQUENCY_AVAILABLE (1 << 11)
-
-/* TSC frequency MSR (HV_X64_MSR_TSC_FREQUENCY) is available */
-#define HV_X64_MSR_TSC_FREQUENCY_AVAILABLE (1 << 11)
+#define HV_X64_ACCESS_FREQUENCY_MSRS		(1 << 11)
 
 /*
  * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
@@ -73,6 +67,9 @@
   */
 #define HV_X64_MSR_STAT_PAGES_AVAILABLE		(1 << 8)
 
+/* Frequency MSRs available */
+#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE	(1 << 8)
+
 /* Crash MSR available */
 #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10)
 
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 567de50a4c2a..185f3d10c194 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -104,6 +104,8 @@
 #define X86_CR4_OSFXSR		_BITUL(X86_CR4_OSFXSR_BIT)
 #define X86_CR4_OSXMMEXCPT_BIT	10 /* enable unmasked SSE exceptions */
 #define X86_CR4_OSXMMEXCPT	_BITUL(X86_CR4_OSXMMEXCPT_BIT)
+#define X86_CR4_LA57_BIT	12 /* enable 5-level page tables */
+#define X86_CR4_LA57		_BITUL(X86_CR4_LA57_BIT)
 #define X86_CR4_VMXE_BIT	13 /* enable VMX virtualization */
 #define X86_CR4_VMXE		_BITUL(X86_CR4_VMXE_BIT)
 #define X86_CR4_SMXE_BIT	14 /* enable safer mode (TXT) */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4b994232cb57..a01892bdd61a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -18,6 +18,7 @@ CFLAGS_REMOVE_pvclock.o = -pg
 CFLAGS_REMOVE_kvmclock.o = -pg
 CFLAGS_REMOVE_ftrace.o = -pg
 CFLAGS_REMOVE_early_printk.o = -pg
+CFLAGS_REMOVE_head64.o = -pg
 endif
 
 KASAN_SANITIZE_head$(BITS).o				:= n
@@ -29,6 +30,7 @@ OBJECT_FILES_NON_STANDARD_head_$(BITS).o		:= y
 OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o	:= y
 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o		:= y
 OBJECT_FILES_NON_STANDARD_test_nx.o			:= y
+OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o	:= y
 
 # If instrumentation of this dir is enabled, boot hangs during first second.
 # Probably could be more selective here, but note that files related to irqs,
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 26b78d86f25a..85a9e17e0dbc 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -1,3 +1,5 @@
+OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y
+
 obj-$(CONFIG_ACPI)		+= boot.o
 obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup_$(BITS).o
 obj-$(CONFIG_ACPI_APEI)		+= apei.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c5b8f760473c..32e14d137416 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -409,8 +409,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 		memcpy(insnbuf, replacement, a->replacementlen);
 		insnbuf_sz = a->replacementlen;
 
-		/* 0xe8 is a relative jump; fix the offset. */
-		if (*insnbuf == 0xe8 && a->replacementlen == 5) {
+		/*
+		 * 0xe8 is a relative jump; fix the offset.
+		 *
+		 * Instruction length is checked before the opcode to avoid
+		 * accessing uninitialized bytes for zero-length replacements.
+		 */
+		if (a->replacementlen == 5 && *insnbuf == 0xe8) {
 			*(s32 *)(insnbuf + 1) += replacement - instr;
 			DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
 				*(s32 *)(insnbuf + 1),
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
index ae50d3454d78..81ff48905623 100644
--- a/arch/x86/kernel/apic/htirq.c
+++ b/arch/x86/kernel/apic/htirq.c
@@ -150,7 +150,7 @@ static const struct irq_domain_ops htirq_domain_ops = {
 	.deactivate	= htirq_domain_deactivate,
 };
 
-void arch_init_htirq_domain(struct irq_domain *parent)
+void __init arch_init_htirq_domain(struct irq_domain *parent)
 {
 	if (disable_apic)
 		return;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 347bb9f65737..247880fc29f9 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1200,28 +1200,6 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
 
 static struct irq_chip ioapic_chip, ioapic_ir_chip;
 
-#ifdef CONFIG_X86_32
-static inline int IO_APIC_irq_trigger(int irq)
-{
-	int apic, idx, pin;
-
-	for_each_ioapic_pin(apic, pin) {
-		idx = find_irq_entry(apic, pin, mp_INT);
-		if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin, 0)))
-			return irq_trigger(idx);
-	}
-	/*
-         * nonexistent IRQs are edge default
-         */
-	return 0;
-}
-#else
-static inline int IO_APIC_irq_trigger(int irq)
-{
-	return 1;
-}
-#endif
-
 static void __init setup_IO_APIC_irqs(void)
 {
 	unsigned int ioapic, pin;
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index c61aec7e65f4..4c5d1882bec5 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -136,7 +136,7 @@ static struct msi_domain_info pci_msi_domain_info = {
 	.handler_name	= "edge",
 };
 
-void arch_init_msi_domain(struct irq_domain *parent)
+void __init arch_init_msi_domain(struct irq_domain *parent)
 {
 	if (disable_apic)
 		return;
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f3557a1eb562..e66d8e48e456 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -405,7 +405,7 @@ int __init arch_probe_nr_irqs(void)
 }
 
 #ifdef	CONFIG_X86_IO_APIC
-static void init_legacy_irqs(void)
+static void __init init_legacy_irqs(void)
 {
 	int i, node = cpu_to_node(0);
 	struct apic_chip_data *data;
@@ -424,7 +424,7 @@ static void init_legacy_irqs(void)
 	}
 }
 #else
-static void init_legacy_irqs(void) { }
+static inline void init_legacy_irqs(void) { }
 #endif
 
 int __init arch_early_irq_init(void)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index ee8f11800295..bb5abe8f5fd4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -799,8 +799,9 @@ static void init_amd(struct cpuinfo_x86 *c)
 		if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
 			set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
 
-	/* AMD CPUs don't reset SS attributes on SYSRET */
-	set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+	/* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
+	if (!cpu_has(c, X86_FEATURE_XENPV))
+		set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index a70fd61095f8..6f077445647a 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -255,6 +255,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
 		break;
 
 	case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
+	case 11: /* GX1 with inverted Device ID */
 #ifdef CONFIG_PCI
 	{
 		u32 vendor, device;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index f5af0cc7eb0d..9257bd9dc664 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -856,11 +856,13 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
 	dentry = kernfs_mount(fs_type, flags, rdt_root,
 			      RDTGROUP_SUPER_MAGIC, NULL);
 	if (IS_ERR(dentry))
-		goto out_cdp;
+		goto out_destroy;
 
 	static_branch_enable(&rdt_enable_key);
 	goto out;
 
+out_destroy:
+	kernfs_remove(kn_info);
 out_cdp:
 	cdp_disable();
 out:
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5abd4bf73d6e..5cfbaeb6529a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -499,16 +499,14 @@ static int mce_usable_address(struct mce *m)
 	return 1;
 }
 
-static bool memory_error(struct mce *m)
+bool mce_is_memory_error(struct mce *m)
 {
-	struct cpuinfo_x86 *c = &boot_cpu_data;
-
-	if (c->x86_vendor == X86_VENDOR_AMD) {
+	if (m->cpuvendor == X86_VENDOR_AMD) {
 		/* ErrCodeExt[20:16] */
 		u8 xec = (m->status >> 16) & 0x1f;
 
 		return (xec == 0x0 || xec == 0x8);
-	} else if (c->x86_vendor == X86_VENDOR_INTEL) {
+	} else if (m->cpuvendor == X86_VENDOR_INTEL) {
 		/*
 		 * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
 		 *
@@ -529,6 +527,7 @@ static bool memory_error(struct mce *m)
 
 	return false;
 }
+EXPORT_SYMBOL_GPL(mce_is_memory_error);
 
 static bool cec_add_mce(struct mce *m)
 {
@@ -536,7 +535,7 @@ static bool cec_add_mce(struct mce *m)
 		return false;
 
 	/* We eat only correctable DRAM errors with usable addresses. */
-	if (memory_error(m) &&
+	if (mce_is_memory_error(m) &&
 	    !(m->status & MCI_STATUS_UC) &&
 	    mce_usable_address(m))
 		if (!cec_add_elem(m->addr >> PAGE_SHIFT))
@@ -713,7 +712,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 
 		severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
 
-		if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m))
+		if (severity == MCE_DEFERRED_SEVERITY && mce_is_memory_error(&m))
 			if (m.status & MCI_STATUS_ADDRV)
 				m.severity = severity;
 
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 7889ae492af0..21b185793c80 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -10,7 +10,7 @@
  *  Author: Peter Oruba <peter.oruba@amd.com>
  *
  *  Based on work by:
- *  Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ *  Tigran Aivazian <aivazian.tigran@gmail.com>
  *
  *  early loader:
  *  Copyright (C) 2013 Advanced Micro Devices, Inc.
@@ -251,7 +251,7 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
 #endif
 }
 
-void __load_ucode_amd(unsigned int cpuid_1_eax, struct cpio_data *ret)
+static void __load_ucode_amd(unsigned int cpuid_1_eax, struct cpio_data *ret)
 {
 	struct ucode_cpu_info *uci;
 	struct cpio_data cp;
@@ -320,7 +320,7 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax)
 }
 
 static enum ucode_state
-load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size);
+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size);
 
 int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
 {
@@ -338,8 +338,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
 	if (!desc.mc)
 		return -EINVAL;
 
-	ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax),
-				 desc.data, desc.size);
+	ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
 	if (ret != UCODE_OK)
 		return -EINVAL;
 
@@ -352,8 +351,6 @@ void reload_ucode_amd(void)
 	u32 rev, dummy;
 
 	mc = (struct microcode_amd *)amd_ucode_patch;
-	if (!mc)
-		return;
 
 	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 
@@ -677,7 +674,7 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
 }
 
 static enum ucode_state
-load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
 {
 	enum ucode_state ret;
 
@@ -691,8 +688,8 @@ load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
 
 #ifdef CONFIG_X86_32
 	/* save BSP's matching patch for early load */
-	if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
-		struct ucode_patch *p = find_patch(cpu);
+	if (save) {
+		struct ucode_patch *p = find_patch(0);
 		if (p) {
 			memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
 			memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
@@ -724,11 +721,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
 {
 	char fw_name[36] = "amd-ucode/microcode_amd.bin";
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
 	enum ucode_state ret = UCODE_NFOUND;
 	const struct firmware *fw;
 
 	/* reload ucode container only on the boot cpu */
-	if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
+	if (!refresh_fw || !bsp)
 		return UCODE_OK;
 
 	if (c->x86 >= 0x15)
@@ -745,7 +743,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
 		goto fw_release;
 	}
 
-	ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size);
+	ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size);
 
  fw_release:
 	release_firmware(fw);
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index b4a4cd39b358..9cb98ee103db 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -1,7 +1,7 @@
 /*
  * CPU Microcode Update Driver for Linux
  *
- * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
  *	      2006	Shaohua Li <shaohua.li@intel.com>
  *	      2013-2016	Borislav Petkov <bp@alien8.de>
  *
@@ -290,6 +290,17 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
 			return (struct cpio_data){ NULL, 0, "" };
 		if (initrd_start)
 			start = initrd_start;
+	} else {
+		/*
+		 * The picture with physical addresses is a bit different: we
+		 * need to get the *physical* address to which the ramdisk was
+		 * relocated, i.e., relocated_ramdisk (not initrd_start) and
+		 * since we're running from physical addresses, we need to access
+		 * relocated_ramdisk through its *physical* address too.
+		 */
+		u64 *rr = (u64 *)__pa_nodebug(&relocated_ramdisk);
+		if (*rr)
+			start = *rr;
 	}
 
 	return find_cpio_data(path, (void *)start, size, NULL);
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 8325d8a09ab0..59edbe9d4ccb 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -1,7 +1,7 @@
 /*
  * Intel CPU Microcode Update Driver for Linux
  *
- * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
  *		 2006 Shaohua Li <shaohua.li@intel.com>
  *
  * Intel CPU microcode early update for Linux
@@ -42,7 +42,7 @@
 static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
 
 /* Current microcode patch used in early patching on the APs. */
-struct microcode_intel *intel_ucode_patch;
+static struct microcode_intel *intel_ucode_patch;
 
 static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
 					unsigned int s2, unsigned int p2)
@@ -166,7 +166,7 @@ static struct ucode_patch *__alloc_microcode_buf(void *data, unsigned int size)
 static void save_microcode_patch(void *data, unsigned int size)
 {
 	struct microcode_header_intel *mc_hdr, *mc_saved_hdr;
-	struct ucode_patch *iter, *tmp, *p;
+	struct ucode_patch *iter, *tmp, *p = NULL;
 	bool prev_found = false;
 	unsigned int sig, pf;
 
@@ -202,6 +202,18 @@ static void save_microcode_patch(void *data, unsigned int size)
 		else
 			list_add_tail(&p->plist, &microcode_cache);
 	}
+
+	/*
+	 * Save for early loading. On 32-bit, that needs to be a physical
+	 * address as the APs are running from physical addresses, before
+	 * paging has been enabled.
+	 */
+	if (p) {
+		if (IS_ENABLED(CONFIG_X86_32))
+			intel_ucode_patch = (struct microcode_intel *)__pa_nodebug(p->data);
+		else
+			intel_ucode_patch = p->data;
+	}
 }
 
 static int microcode_sanity_check(void *mc, int print_err)
@@ -607,6 +619,14 @@ int __init save_microcode_in_initrd_intel(void)
 	struct ucode_cpu_info uci;
 	struct cpio_data cp;
 
+	/*
+	 * initrd is going away, clear patch ptr. We will scan the microcode one
+	 * last time before jettisoning and save a patch, if found. Then we will
+	 * update that pointer too, with a stable patch address to use when
+	 * resuming the cores.
+	 */
+	intel_ucode_patch = NULL;
+
 	if (!load_builtin_intel_microcode(&cp))
 		cp = find_microcode_in_initrd(ucode_path, false);
 
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 04cb8d34ccb8..70e717fccdd6 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -161,6 +161,15 @@ static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
 }
 #endif
 
+static unsigned long hv_get_tsc_khz(void)
+{
+	unsigned long freq;
+
+	rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
+
+	return freq / 1000;
+}
+
 static void __init ms_hyperv_init_platform(void)
 {
 	int hv_host_info_eax;
@@ -193,8 +202,15 @@ static void __init ms_hyperv_init_platform(void)
 			hv_host_info_edx >> 24, hv_host_info_edx & 0xFFFFFF);
 	}
 
+	if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
+	    ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
+		x86_platform.calibrate_tsc = hv_get_tsc_khz;
+		x86_platform.calibrate_cpu = hv_get_tsc_khz;
+	}
+
 #ifdef CONFIG_X86_LOCAL_APIC
-	if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
+	if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
+	    ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
 		/*
 		 * Get the APIC frequency.
 		 */
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 8e598a1ad986..6b91e2eb8d3f 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -125,7 +125,7 @@ void __init init_espfix_bsp(void)
 	p4d_t *p4d;
 
 	/* Install the espfix pud into the kernel page directory */
-	pgd = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
+	pgd = &init_top_pgt[pgd_index(ESPFIX_BASE_ADDR)];
 	p4d = p4d_alloc(&init_mm, pgd, ESPFIX_BASE_ADDR);
 	p4d_populate(&init_mm, p4d, espfix_pud_page);
 
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index c2f8dde3255c..d5d44c452624 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -90,6 +90,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
  * Boot time FPU feature detection code:
  */
 unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
+EXPORT_SYMBOL_GPL(mxcsr_feature_mask);
 
 static void __init fpu__init_system_mxcsr(void)
 {
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 0651e974dcb3..9bef1bbeba63 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -689,8 +689,12 @@ static inline void *alloc_tramp(unsigned long size)
 {
 	return module_alloc(size);
 }
-static inline void tramp_free(void *tramp)
+static inline void tramp_free(void *tramp, int size)
 {
+	int npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	set_memory_nx((unsigned long)tramp, npages);
+	set_memory_rw((unsigned long)tramp, npages);
 	module_memfree(tramp);
 }
 #else
@@ -699,7 +703,7 @@ static inline void *alloc_tramp(unsigned long size)
 {
 	return NULL;
 }
-static inline void tramp_free(void *tramp) { }
+static inline void tramp_free(void *tramp, int size) { }
 #endif
 
 /* Defined as markers to the end of the ftrace default trampolines */
@@ -771,7 +775,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 	/* Copy ftrace_caller onto the trampoline memory */
 	ret = probe_kernel_read(trampoline, (void *)start_offset, size);
 	if (WARN_ON(ret < 0)) {
-		tramp_free(trampoline);
+		tramp_free(trampoline, *tramp_size);
 		return 0;
 	}
 
@@ -797,7 +801,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 
 	/* Are we pointing to the reference? */
 	if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0)) {
-		tramp_free(trampoline);
+		tramp_free(trampoline, *tramp_size);
 		return 0;
 	}
 
@@ -839,7 +843,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 	unsigned long offset;
 	unsigned long ip;
 	unsigned int size;
-	int ret;
+	int ret, npages;
 
 	if (ops->trampoline) {
 		/*
@@ -848,11 +852,14 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 		 */
 		if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
 			return;
+		npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT;
+		set_memory_rw(ops->trampoline, npages);
 	} else {
 		ops->trampoline = create_trampoline(ops, &size);
 		if (!ops->trampoline)
 			return;
 		ops->trampoline_size = size;
+		npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	}
 
 	offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
@@ -863,6 +870,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 	/* Do a safe modify in case the trampoline is executing */
 	new = ftrace_call_replace(ip, (unsigned long)func);
 	ret = update_ftrace_func(ip, new);
+	set_memory_ro(ops->trampoline, npages);
 
 	/* The update should never fail */
 	WARN_ON(ret);
@@ -939,7 +947,7 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
 	if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
 		return;
 
-	tramp_free((void *)ops->trampoline);
+	tramp_free((void *)ops->trampoline, ops->trampoline_size);
 	ops->trampoline = 0;
 }
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 43b7002f44fb..46c3c73e7f43 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -33,17 +33,120 @@
 /*
  * Manage page tables very early on.
  */
-extern pgd_t early_level4_pgt[PTRS_PER_PGD];
+extern pgd_t early_top_pgt[PTRS_PER_PGD];
 extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
-static unsigned int __initdata next_early_pgt = 2;
+static unsigned int __initdata next_early_pgt;
 pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
 
+#define __head	__section(.head.text)
+
+static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
+{
+	return ptr - (void *)_text + (void *)physaddr;
+}
+
+void __head __startup_64(unsigned long physaddr)
+{
+	unsigned long load_delta, *p;
+	pgdval_t *pgd;
+	p4dval_t *p4d;
+	pudval_t *pud;
+	pmdval_t *pmd, pmd_entry;
+	int i;
+
+	/* Is the address too large? */
+	if (physaddr >> MAX_PHYSMEM_BITS)
+		for (;;);
+
+	/*
+	 * Compute the delta between the address I am compiled to run at
+	 * and the address I am actually running at.
+	 */
+	load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map);
+
+	/* Is the address not 2M aligned? */
+	if (load_delta & ~PMD_PAGE_MASK)
+		for (;;);
+
+	/* Fixup the physical addresses in the page table */
+
+	pgd = fixup_pointer(&early_top_pgt, physaddr);
+	pgd[pgd_index(__START_KERNEL_map)] += load_delta;
+
+	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+		p4d = fixup_pointer(&level4_kernel_pgt, physaddr);
+		p4d[511] += load_delta;
+	}
+
+	pud = fixup_pointer(&level3_kernel_pgt, physaddr);
+	pud[510] += load_delta;
+	pud[511] += load_delta;
+
+	pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
+	pmd[506] += load_delta;
+
+	/*
+	 * Set up the identity mapping for the switchover.  These
+	 * entries should *NOT* have the global bit set!  This also
+	 * creates a bunch of nonsense entries but that is fine --
+	 * it avoids problems around wraparound.
+	 */
+
+	pud = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
+	pmd = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
+
+	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+		p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
+
+		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
+		pgd[i + 0] = (pgdval_t)p4d + _KERNPG_TABLE;
+		pgd[i + 1] = (pgdval_t)p4d + _KERNPG_TABLE;
+
+		i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
+		p4d[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
+		p4d[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
+	} else {
+		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
+		pgd[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
+		pgd[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
+	}
+
+	i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
+	pud[i + 0] = (pudval_t)pmd + _KERNPG_TABLE;
+	pud[i + 1] = (pudval_t)pmd + _KERNPG_TABLE;
+
+	pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
+	pmd_entry +=  physaddr;
+
+	for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
+		int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD;
+		pmd[idx] = pmd_entry + i * PMD_SIZE;
+	}
+
+	/*
+	 * Fixup the kernel text+data virtual addresses. Note that
+	 * we might write invalid pmds, when the kernel is relocated
+	 * cleanup_highmap() fixes this up along with the mappings
+	 * beyond _end.
+	 */
+
+	pmd = fixup_pointer(level2_kernel_pgt, physaddr);
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		if (pmd[i] & _PAGE_PRESENT)
+			pmd[i] += load_delta;
+	}
+
+	/* Fixup phys_base */
+	p = fixup_pointer(&phys_base, physaddr);
+	*p += load_delta;
+}
+
 /* Wipe all early page tables except for the kernel symbol map */
 static void __init reset_early_page_tables(void)
 {
-	memset(early_level4_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
+	memset(early_top_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
 	next_early_pgt = 0;
-	write_cr3(__pa_nodebug(early_level4_pgt));
+	write_cr3(__pa_nodebug(early_top_pgt));
 }
 
 /* Create a new PMD entry */
@@ -51,15 +154,16 @@ int __init early_make_pgtable(unsigned long address)
 {
 	unsigned long physaddr = address - __PAGE_OFFSET;
 	pgdval_t pgd, *pgd_p;
+	p4dval_t p4d, *p4d_p;
 	pudval_t pud, *pud_p;
 	pmdval_t pmd, *pmd_p;
 
 	/* Invalid address or early pgt is done ?  */
-	if (physaddr >= MAXMEM || read_cr3() != __pa_nodebug(early_level4_pgt))
+	if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt))
 		return -1;
 
 again:
-	pgd_p = &early_level4_pgt[pgd_index(address)].pgd;
+	pgd_p = &early_top_pgt[pgd_index(address)].pgd;
 	pgd = *pgd_p;
 
 	/*
@@ -67,8 +171,25 @@ again:
 	 * critical -- __PAGE_OFFSET would point us back into the dynamic
 	 * range and we might end up looping forever...
 	 */
-	if (pgd)
-		pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
+	if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+		p4d_p = pgd_p;
+	else if (pgd)
+		p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
+	else {
+		if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
+			reset_early_page_tables();
+			goto again;
+		}
+
+		p4d_p = (p4dval_t *)early_dynamic_pgts[next_early_pgt++];
+		memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
+		*pgd_p = (pgdval_t)p4d_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
+	}
+	p4d_p += p4d_index(address);
+	p4d = *p4d_p;
+
+	if (p4d)
+		pud_p = (pudval_t *)((p4d & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
 	else {
 		if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
 			reset_early_page_tables();
@@ -77,7 +198,7 @@ again:
 
 		pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
 		memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
-		*pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
+		*p4d_p = (p4dval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
 	}
 	pud_p += pud_index(address);
 	pud = *pud_p;
@@ -156,7 +277,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 
 	clear_bss();
 
-	clear_page(init_level4_pgt);
+	clear_page(init_top_pgt);
 
 	kasan_early_init();
 
@@ -171,8 +292,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 	 */
 	load_ucode_bsp();
 
-	/* set init_level4_pgt kernel high mapping*/
-	init_level4_pgt[511] = early_level4_pgt[511];
+	/* set init_top_pgt kernel high mapping*/
+	init_top_pgt[511] = early_top_pgt[511];
 
 	x86_64_start_reservations(real_mode_data);
 }
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ac9d327d2e42..6225550883df 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -37,10 +37,11 @@
  *
  */
 
+#define p4d_index(x)	(((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
 #define pud_index(x)	(((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 
-L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
-L4_START_KERNEL = pgd_index(__START_KERNEL_map)
+PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
+PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
 L3_START_KERNEL = pud_index(__START_KERNEL_map)
 
 	.text
@@ -72,101 +73,12 @@ startup_64:
 	/* Sanitize CPU configuration */
 	call verify_cpu
 
-	/*
-	 * Compute the delta between the address I am compiled to run at and the
-	 * address I am actually running at.
-	 */
-	leaq	_text(%rip), %rbp
-	subq	$_text - __START_KERNEL_map, %rbp
-
-	/* Is the address not 2M aligned? */
-	testl	$~PMD_PAGE_MASK, %ebp
-	jnz	bad_address
-
-	/*
-	 * Is the address too large?
-	 */
-	leaq	_text(%rip), %rax
-	shrq	$MAX_PHYSMEM_BITS, %rax
-	jnz	bad_address
-
-	/*
-	 * Fixup the physical addresses in the page table
-	 */
-	addq	%rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip)
-
-	addq	%rbp, level3_kernel_pgt + (510*8)(%rip)
-	addq	%rbp, level3_kernel_pgt + (511*8)(%rip)
-
-	addq	%rbp, level2_fixmap_pgt + (506*8)(%rip)
-
-	/*
-	 * Set up the identity mapping for the switchover.  These
-	 * entries should *NOT* have the global bit set!  This also
-	 * creates a bunch of nonsense entries but that is fine --
-	 * it avoids problems around wraparound.
-	 */
 	leaq	_text(%rip), %rdi
-	leaq	early_level4_pgt(%rip), %rbx
-
-	movq	%rdi, %rax
-	shrq	$PGDIR_SHIFT, %rax
-
-	leaq	(PAGE_SIZE + _KERNPG_TABLE)(%rbx), %rdx
-	movq	%rdx, 0(%rbx,%rax,8)
-	movq	%rdx, 8(%rbx,%rax,8)
-
-	addq	$PAGE_SIZE, %rdx
-	movq	%rdi, %rax
-	shrq	$PUD_SHIFT, %rax
-	andl	$(PTRS_PER_PUD-1), %eax
-	movq	%rdx, PAGE_SIZE(%rbx,%rax,8)
-	incl	%eax
-	andl	$(PTRS_PER_PUD-1), %eax
-	movq	%rdx, PAGE_SIZE(%rbx,%rax,8)
-
-	addq	$PAGE_SIZE * 2, %rbx
-	movq	%rdi, %rax
-	shrq	$PMD_SHIFT, %rdi
-	addq	$(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
-	leaq	(_end - 1)(%rip), %rcx
-	shrq	$PMD_SHIFT, %rcx
-	subq	%rdi, %rcx
-	incl	%ecx
-
-1:
-	andq	$(PTRS_PER_PMD - 1), %rdi
-	movq	%rax, (%rbx,%rdi,8)
-	incq	%rdi
-	addq	$PMD_SIZE, %rax
-	decl	%ecx
-	jnz	1b
-
-	test %rbp, %rbp
-	jz .Lskip_fixup
+	pushq	%rsi
+	call	__startup_64
+	popq	%rsi
 
-	/*
-	 * Fixup the kernel text+data virtual addresses. Note that
-	 * we might write invalid pmds, when the kernel is relocated
-	 * cleanup_highmap() fixes this up along with the mappings
-	 * beyond _end.
-	 */
-	leaq	level2_kernel_pgt(%rip), %rdi
-	leaq	PAGE_SIZE(%rdi), %r8
-	/* See if it is a valid page table entry */
-1:	testb	$_PAGE_PRESENT, 0(%rdi)
-	jz	2f
-	addq	%rbp, 0(%rdi)
-	/* Go to the next page */
-2:	addq	$8, %rdi
-	cmp	%r8, %rdi
-	jne	1b
-
-	/* Fixup phys_base */
-	addq	%rbp, phys_base(%rip)
-
-.Lskip_fixup:
-	movq	$(early_level4_pgt - __START_KERNEL_map), %rax
+	movq	$(early_top_pgt - __START_KERNEL_map), %rax
 	jmp 1f
 ENTRY(secondary_startup_64)
 	/*
@@ -186,14 +98,17 @@ ENTRY(secondary_startup_64)
 	/* Sanitize CPU configuration */
 	call verify_cpu
 
-	movq	$(init_level4_pgt - __START_KERNEL_map), %rax
+	movq	$(init_top_pgt - __START_KERNEL_map), %rax
 1:
 
-	/* Enable PAE mode and PGE */
+	/* Enable PAE mode, PGE and LA57 */
 	movl	$(X86_CR4_PAE | X86_CR4_PGE), %ecx
+#ifdef CONFIG_X86_5LEVEL
+	orl	$X86_CR4_LA57, %ecx
+#endif
 	movq	%rcx, %cr4
 
-	/* Setup early boot stage 4 level pagetables. */
+	/* Setup early boot stage 4-/5-level pagetables. */
 	addq	phys_base(%rip), %rax
 	movq	%rax, %cr3
 
@@ -417,9 +332,13 @@ GLOBAL(name)
 	.endr
 
 	__INITDATA
-NEXT_PAGE(early_level4_pgt)
+NEXT_PAGE(early_top_pgt)
 	.fill	511,8,0
+#ifdef CONFIG_X86_5LEVEL
+	.quad	level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+#else
 	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+#endif
 
 NEXT_PAGE(early_dynamic_pgts)
 	.fill	512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@@ -427,14 +346,14 @@ NEXT_PAGE(early_dynamic_pgts)
 	.data
 
 #ifndef CONFIG_XEN
-NEXT_PAGE(init_level4_pgt)
+NEXT_PAGE(init_top_pgt)
 	.fill	512,8,0
 #else
-NEXT_PAGE(init_level4_pgt)
+NEXT_PAGE(init_top_pgt)
 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
-	.org    init_level4_pgt + L4_PAGE_OFFSET*8, 0
+	.org    init_top_pgt + PGD_PAGE_OFFSET*8, 0
 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
-	.org    init_level4_pgt + L4_START_KERNEL*8, 0
+	.org    init_top_pgt + PGD_START_KERNEL*8, 0
 	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
 	.quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
 
@@ -448,6 +367,12 @@ NEXT_PAGE(level2_ident_pgt)
 	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
 #endif
 
+#ifdef CONFIG_X86_5LEVEL
+NEXT_PAGE(level4_kernel_pgt)
+	.fill	511,8,0
+	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+#endif
+
 NEXT_PAGE(level3_kernel_pgt)
 	.fill	L3_START_KERNEL,8,0
 	/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index be22f5a2192e..4e3b8a587c88 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -418,6 +418,7 @@ struct legacy_pic default_legacy_pic = {
 };
 
 struct legacy_pic *legacy_pic = &default_legacy_pic;
+EXPORT_SYMBOL(legacy_pic);
 
 static int __init i8259A_init_ops(void)
 {
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 5b2bbfbb3712..6b877807598b 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -52,6 +52,7 @@
 #include <linux/ftrace.h>
 #include <linux/frame.h>
 #include <linux/kasan.h>
+#include <linux/moduleloader.h>
 
 #include <asm/text-patching.h>
 #include <asm/cacheflush.h>
@@ -417,6 +418,14 @@ static void prepare_boost(struct kprobe *p, struct insn *insn)
 	}
 }
 
+/* Recover page to RW mode before releasing it */
+void free_insn_page(void *page)
+{
+	set_memory_nx((unsigned long)page & PAGE_MASK, 1);
+	set_memory_rw((unsigned long)page & PAGE_MASK, 1);
+	module_memfree(page);
+}
+
 static int arch_copy_kprobe(struct kprobe *p)
 {
 	struct insn insn;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 901c640d152f..69ea0bc1cfa3 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -28,6 +28,7 @@
 #include <linux/kdebug.h>
 #include <linux/kallsyms.h>
 #include <linux/ftrace.h>
+#include <linux/frame.h>
 
 #include <asm/text-patching.h>
 #include <asm/cacheflush.h>
@@ -94,6 +95,7 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
 }
 
 asm (
+			"optprobe_template_func:\n"
 			".global optprobe_template_entry\n"
 			"optprobe_template_entry:\n"
 #ifdef CONFIG_X86_64
@@ -131,7 +133,12 @@ asm (
 			"	popf\n"
 #endif
 			".global optprobe_template_end\n"
-			"optprobe_template_end:\n");
+			"optprobe_template_end:\n"
+			".type optprobe_template_func, @function\n"
+			".size optprobe_template_func, .-optprobe_template_func\n");
+
+void optprobe_template_func(void);
+STACK_FRAME_NON_STANDARD(optprobe_template_func);
 
 #define TMPL_MOVE_IDX \
 	((long)&optprobe_template_val - (long)&optprobe_template_entry)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index da5c09789984..43e10d6fdbed 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -161,8 +161,8 @@ void kvm_async_pf_task_wait(u32 token)
 			 */
 			rcu_irq_exit();
 			native_safe_halt();
-			rcu_irq_enter();
 			local_irq_disable();
+			rcu_irq_enter();
 		}
 	}
 	if (!n.halted)
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index d4a15831ac58..a870910c8565 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -22,24 +22,25 @@
 #include <asm/syscalls.h>
 
 /* context.lock is held for us, so we don't need any locking. */
-static void flush_ldt(void *current_mm)
+static void flush_ldt(void *__mm)
 {
+	struct mm_struct *mm = __mm;
 	mm_context_t *pc;
 
-	if (current->active_mm != current_mm)
+	if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
 		return;
 
-	pc = &current->active_mm->context;
-	set_ldt(pc->ldt->entries, pc->ldt->size);
+	pc = &mm->context;
+	set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
 }
 
 /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
-static struct ldt_struct *alloc_ldt_struct(unsigned int size)
+static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
 {
 	struct ldt_struct *new_ldt;
 	unsigned int alloc_size;
 
-	if (size > LDT_ENTRIES)
+	if (num_entries > LDT_ENTRIES)
 		return NULL;
 
 	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
@@ -47,7 +48,7 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int size)
 		return NULL;
 
 	BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
-	alloc_size = size * LDT_ENTRY_SIZE;
+	alloc_size = num_entries * LDT_ENTRY_SIZE;
 
 	/*
 	 * Xen is very picky: it requires a page-aligned LDT that has no
@@ -65,14 +66,14 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int size)
 		return NULL;
 	}
 
-	new_ldt->size = size;
+	new_ldt->nr_entries = num_entries;
 	return new_ldt;
 }
 
 /* After calling this, the LDT is immutable. */
 static void finalize_ldt_struct(struct ldt_struct *ldt)
 {
-	paravirt_alloc_ldt(ldt->entries, ldt->size);
+	paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
 }
 
 /* context.lock is held */
@@ -91,8 +92,8 @@ static void free_ldt_struct(struct ldt_struct *ldt)
 	if (likely(!ldt))
 		return;
 
-	paravirt_free_ldt(ldt->entries, ldt->size);
-	if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
+	paravirt_free_ldt(ldt->entries, ldt->nr_entries);
+	if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
 		vfree_atomic(ldt->entries);
 	else
 		free_page((unsigned long)ldt->entries);
@@ -122,14 +123,14 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
 		goto out_unlock;
 	}
 
-	new_ldt = alloc_ldt_struct(old_mm->context.ldt->size);
+	new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
 	if (!new_ldt) {
 		retval = -ENOMEM;
 		goto out_unlock;
 	}
 
 	memcpy(new_ldt->entries, old_mm->context.ldt->entries,
-	       new_ldt->size * LDT_ENTRY_SIZE);
+	       new_ldt->nr_entries * LDT_ENTRY_SIZE);
 	finalize_ldt_struct(new_ldt);
 
 	mm->context.ldt = new_ldt;
@@ -152,9 +153,9 @@ void destroy_context_ldt(struct mm_struct *mm)
 
 static int read_ldt(void __user *ptr, unsigned long bytecount)
 {
-	int retval;
-	unsigned long size;
 	struct mm_struct *mm = current->mm;
+	unsigned long entries_size;
+	int retval;
 
 	mutex_lock(&mm->context.lock);
 
@@ -166,18 +167,18 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
 	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
 		bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
 
-	size = mm->context.ldt->size * LDT_ENTRY_SIZE;
-	if (size > bytecount)
-		size = bytecount;
+	entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
+	if (entries_size > bytecount)
+		entries_size = bytecount;
 
-	if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
+	if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
 		retval = -EFAULT;
 		goto out_unlock;
 	}
 
-	if (size != bytecount) {
+	if (entries_size != bytecount) {
 		/* Zero-fill the rest and pretend we read bytecount bytes. */
-		if (clear_user(ptr + size, bytecount - size)) {
+		if (clear_user(ptr + entries_size, bytecount - entries_size)) {
 			retval = -EFAULT;
 			goto out_unlock;
 		}
@@ -208,7 +209,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 {
 	struct mm_struct *mm = current->mm;
 	struct ldt_struct *new_ldt, *old_ldt;
-	unsigned int oldsize, newsize;
+	unsigned int old_nr_entries, new_nr_entries;
 	struct user_desc ldt_info;
 	struct desc_struct ldt;
 	int error;
@@ -247,17 +248,18 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 
 	mutex_lock(&mm->context.lock);
 
-	old_ldt = mm->context.ldt;
-	oldsize = old_ldt ? old_ldt->size : 0;
-	newsize = max(ldt_info.entry_number + 1, oldsize);
+	old_ldt       = mm->context.ldt;
+	old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
+	new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);
 
 	error = -ENOMEM;
-	new_ldt = alloc_ldt_struct(newsize);
+	new_ldt = alloc_ldt_struct(new_nr_entries);
 	if (!new_ldt)
 		goto out_unlock;
 
 	if (old_ldt)
-		memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE);
+		memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);
+
 	new_ldt->entries[ldt_info.entry_number] = ldt;
 	finalize_ldt_struct(new_ldt);
 
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index ce640428d6fe..cb0a30473c23 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -114,7 +114,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 	struct x86_mapping_info info = {
 		.alloc_pgt_page	= alloc_pgt_page,
 		.context	= image,
-		.pmd_flag	= __PAGE_KERNEL_LARGE_EXEC,
+		.page_flag	= __PAGE_KERNEL_LARGE_EXEC,
 	};
 	unsigned long mstart, mend;
 	pgd_t *level4p;
@@ -123,6 +123,10 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 
 	level4p = (pgd_t *)__va(start_pgtable);
 	clear_page(level4p);
+
+	if (direct_gbpages)
+		info.direct_gbpages = true;
+
 	for (i = 0; i < nr_pfn_mapped; i++) {
 		mstart = pfn_mapped[i].start << PAGE_SHIFT;
 		mend   = pfn_mapped[i].end << PAGE_SHIFT;
@@ -343,7 +347,7 @@ void machine_kexec(struct kimage *image)
 void arch_crash_save_vmcoreinfo(void)
 {
 	VMCOREINFO_NUMBER(phys_base);
-	VMCOREINFO_SYMBOL(init_level4_pgt);
+	VMCOREINFO_SYMBOL(init_top_pgt);
 
 #ifdef CONFIG_NUMA
 	VMCOREINFO_SYMBOL(node_data);
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index 6d9582ec0324..d27f8d84c4ff 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -78,7 +78,7 @@ static void __init test_nmi_ipi(struct cpumask *mask)
 
 	/* Don't wait longer than a second */
 	timeout = USEC_PER_SEC;
-	while (!cpumask_empty(mask) && timeout--)
+	while (!cpumask_empty(mask) && --timeout)
 	        udelay(1);
 
 	/* What happens if we timeout, do we still unregister?? */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 3586996fc50d..bc0a849589bb 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -391,7 +391,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
 
 	.read_cr2 = native_read_cr2,
 	.write_cr2 = native_write_cr2,
-	.read_cr3 = native_read_cr3,
+	.read_cr3 = __native_read_cr3,
 	.write_cr3 = native_write_cr3,
 
 	.flush_tlb_user = native_flush_tlb,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0bb88428cbf2..3ca198080ea9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -545,17 +545,6 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 }
 
 /*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct inactive_task_frame *frame =
-		(struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
-	return READ_ONCE_NOCHECK(frame->ret_addr);
-}
-
-/*
  * Called from fs/proc with a reference on @p to find the function
  * which called into schedule(). This needs to be done carefully
  * because the task might wake up and we might look at a stack
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index ff40e74c9181..c6d6dc5f8bb2 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -78,7 +78,7 @@ void __show_regs(struct pt_regs *regs, int all)
 
 	printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
 	printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
-		smp_processor_id());
+		raw_smp_processor_id());
 
 	printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
 		regs->ax, regs->bx, regs->cx, regs->dx);
@@ -92,7 +92,7 @@ void __show_regs(struct pt_regs *regs, int all)
 
 	cr0 = read_cr0();
 	cr2 = read_cr2();
-	cr3 = read_cr3();
+	cr3 = __read_cr3();
 	cr4 = __read_cr4();
 	printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
 			cr0, cr2, cr3, cr4);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b6840bf3940b..c3169be4c596 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -104,7 +104,7 @@ void __show_regs(struct pt_regs *regs, int all)
 
 	cr0 = read_cr0();
 	cr2 = read_cr2();
-	cr3 = read_cr3();
+	cr3 = __read_cr3();
 	cr4 = __read_cr4();
 
 	printk(KERN_DEFAULT "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
@@ -142,7 +142,7 @@ void release_thread(struct task_struct *dead_task)
 			pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
 				dead_task->comm,
 				dead_task->mm->context.ldt->entries,
-				dead_task->mm->context.ldt->size);
+				dead_task->mm->context.ldt->nr_entries);
 			BUG();
 		}
 #endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2544700a2a87..67393fc88353 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -9,6 +9,7 @@
 #include <linux/sched.h>
 #include <linux/tboot.h>
 #include <linux/delay.h>
+#include <linux/frame.h>
 #include <acpi/reboot.h>
 #include <asm/io.h>
 #include <asm/apic.h>
@@ -123,6 +124,7 @@ void __noreturn machine_real_restart(unsigned int type)
 #ifdef CONFIG_APM_MODULE
 EXPORT_SYMBOL(machine_real_restart);
 #endif
+STACK_FRAME_NON_STANDARD(machine_real_restart);
 
 /*
  * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 603a1669a2ec..65622f07e633 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -503,7 +503,7 @@ static int __init reserve_crashkernel_low(void)
 			return 0;
 	}
 
-	low_base = memblock_find_in_range(low_size, 1ULL << 32, low_size, CRASH_ALIGN);
+	low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN);
 	if (!low_base) {
 		pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n",
 		       (unsigned long)(low_size >> 20));
@@ -980,8 +980,6 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	x86_configure_nx();
 
-	simple_udelay_calibration();
-
 	parse_early_param();
 
 #ifdef CONFIG_MEMORY_HOTPLUG
@@ -1041,6 +1039,8 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	init_hypervisor_platform();
 
+	simple_udelay_calibration();
+
 	x86_init.resources.probe_roms();
 
 	/* after parse_early_param, so could debug it */
@@ -1225,6 +1225,21 @@ void __init setup_arch(char **cmdline_p)
 
 	kasan_init();
 
+#ifdef CONFIG_X86_32
+	/* sync back kernel address range */
+	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
+			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+			KERNEL_PGD_PTRS);
+
+	/*
+	 * sync back low identity map too.  It is used for example
+	 * in the 32-bit EFI stub.
+	 */
+	clone_pgd_range(initial_page_table,
+			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+			min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
+#endif
+
 	tboot_probe();
 
 	map_vsyscall();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index bb1e8cc0bc84..10edd1e69a68 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -291,11 +291,11 @@ void __init setup_per_cpu_areas(void)
 
 #ifdef CONFIG_X86_32
 	/*
-	 * Sync back kernel address range.  We want to make sure that
-	 * all kernel mappings, including percpu mappings, are available
-	 * in the smpboot asm.  We can't reliably pick up percpu
-	 * mappings using vmalloc_fault(), because exception dispatch
-	 * needs percpu data.
+	 * Sync back kernel address range again.  We already did this in
+	 * setup_arch(), but percpu data also needs to be available in
+	 * the smpboot asm.  We can't reliably pick up percpu mappings
+	 * using vmalloc_fault(), because exception dispatch needs
+	 * percpu data.
 	 */
 	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
 			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f04479a8f74f..b474c8de7fba 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -863,7 +863,7 @@ static void announce_cpu(int cpu, int apicid)
 	if (cpu == 1)
 		printk(KERN_INFO "x86: Booting SMP configuration:\n");
 
-	if (system_state == SYSTEM_BOOTING) {
+	if (system_state < SYSTEM_RUNNING) {
 		if (node != current_node) {
 			if (current_node > (-1))
 				pr_cont("\n");
@@ -1589,7 +1589,6 @@ void native_cpu_die(unsigned int cpu)
 void play_dead_common(void)
 {
 	idle_task_exit();
-	reset_lazy_tlbstate();
 
 	/* Ack it */
 	(void)cpu_report_death();
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index f07f83b3611b..5f25cfbd952e 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -34,7 +34,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
 
 		mutex_lock(&child->mm->context.lock);
 		if (unlikely(!child->mm->context.ldt ||
-			     seg >= child->mm->context.ldt->size))
+			     seg >= child->mm->context.ldt->nr_entries))
 			addr = -1L; /* bogus selector, access would fault */
 		else {
 			desc = &child->mm->context.ldt->entries[seg];
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 207b8f2582c7..213ddf3e937d 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -144,7 +144,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (end - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -187,7 +187,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 4b1724059909..a4eb27918ceb 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -514,7 +514,7 @@ int tboot_force_iommu(void)
 	if (!tboot_enabled())
 		return 0;
 
-	if (!intel_iommu_tboot_noforce)
+	if (intel_iommu_tboot_noforce)
 		return 1;
 
 	if (no_iommu || swiotlb || dmar_disabled)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 3995d3a777d4..bf54309b85da 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -182,7 +182,7 @@ int is_valid_bugaddr(unsigned long addr)
 	return ud == INSN_UD0 || ud == INSN_UD2;
 }
 
-static int fixup_bug(struct pt_regs *regs, int trapnr)
+int fixup_bug(struct pt_regs *regs, int trapnr)
 {
 	if (trapnr != X86_TRAP_UD)
 		return 0;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 714dfba6a1e7..5270fc0c2df6 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -51,115 +51,34 @@ static u32 art_to_tsc_denominator;
 static u64 art_to_tsc_offset;
 struct clocksource *art_related_clocksource;
 
-/*
- * Use a ring-buffer like data structure, where a writer advances the head by
- * writing a new data entry and a reader advances the tail when it observes a
- * new entry.
- *
- * Writers are made to wait on readers until there's space to write a new
- * entry.
- *
- * This means that we can always use an {offset, mul} pair to compute a ns
- * value that is 'roughly' in the right direction, even if we're writing a new
- * {offset, mul} pair during the clock read.
- *
- * The down-side is that we can no longer guarantee strict monotonicity anymore
- * (assuming the TSC was that to begin with), because while we compute the
- * intersection point of the two clock slopes and make sure the time is
- * continuous at the point of switching; we can no longer guarantee a reader is
- * strictly before or after the switch point.
- *
- * It does mean a reader no longer needs to disable IRQs in order to avoid
- * CPU-Freq updates messing with his times, and similarly an NMI reader will
- * no longer run the risk of hitting half-written state.
- */
-
 struct cyc2ns {
-	struct cyc2ns_data data[2];	/*  0 + 2*24 = 48 */
-	struct cyc2ns_data *head;	/* 48 + 8    = 56 */
-	struct cyc2ns_data *tail;	/* 56 + 8    = 64 */
-}; /* exactly fits one cacheline */
-
-static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
-
-struct cyc2ns_data *cyc2ns_read_begin(void)
-{
-	struct cyc2ns_data *head;
-
-	preempt_disable();
-
-	head = this_cpu_read(cyc2ns.head);
-	/*
-	 * Ensure we observe the entry when we observe the pointer to it.
-	 * matches the wmb from cyc2ns_write_end().
-	 */
-	smp_read_barrier_depends();
-	head->__count++;
-	barrier();
+	struct cyc2ns_data data[2];	/*  0 + 2*16 = 32 */
+	seqcount_t	   seq;		/* 32 + 4    = 36 */
 
-	return head;
-}
+}; /* fits one cacheline */
 
-void cyc2ns_read_end(struct cyc2ns_data *head)
-{
-	barrier();
-	/*
-	 * If we're the outer most nested read; update the tail pointer
-	 * when we're done. This notifies possible pending writers
-	 * that we've observed the head pointer and that the other
-	 * entry is now free.
-	 */
-	if (!--head->__count) {
-		/*
-		 * x86-TSO does not reorder writes with older reads;
-		 * therefore once this write becomes visible to another
-		 * cpu, we must be finished reading the cyc2ns_data.
-		 *
-		 * matches with cyc2ns_write_begin().
-		 */
-		this_cpu_write(cyc2ns.tail, head);
-	}
-	preempt_enable();
-}
+static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
 
-/*
- * Begin writing a new @data entry for @cpu.
- *
- * Assumes some sort of write side lock; currently 'provided' by the assumption
- * that cpufreq will call its notifiers sequentially.
- */
-static struct cyc2ns_data *cyc2ns_write_begin(int cpu)
+void cyc2ns_read_begin(struct cyc2ns_data *data)
 {
-	struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
-	struct cyc2ns_data *data = c2n->data;
+	int seq, idx;
 
-	if (data == c2n->head)
-		data++;
+	preempt_disable_notrace();
 
-	/* XXX send an IPI to @cpu in order to guarantee a read? */
+	do {
+		seq = this_cpu_read(cyc2ns.seq.sequence);
+		idx = seq & 1;
 
-	/*
-	 * When we observe the tail write from cyc2ns_read_end(),
-	 * the cpu must be done with that entry and its safe
-	 * to start writing to it.
-	 */
-	while (c2n->tail == data)
-		cpu_relax();
+		data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
+		data->cyc2ns_mul    = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
+		data->cyc2ns_shift  = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
 
-	return data;
+	} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
 }
 
-static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data)
+void cyc2ns_read_end(void)
 {
-	struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
-
-	/*
-	 * Ensure the @data writes are visible before we publish the
-	 * entry. Matches the data-depencency in cyc2ns_read_begin().
-	 */
-	smp_wmb();
-
-	ACCESS_ONCE(c2n->head) = data;
+	preempt_enable_notrace();
 }
 
 /*
@@ -191,7 +110,6 @@ static void cyc2ns_data_init(struct cyc2ns_data *data)
 	data->cyc2ns_mul = 0;
 	data->cyc2ns_shift = 0;
 	data->cyc2ns_offset = 0;
-	data->__count = 0;
 }
 
 static void cyc2ns_init(int cpu)
@@ -201,51 +119,29 @@ static void cyc2ns_init(int cpu)
 	cyc2ns_data_init(&c2n->data[0]);
 	cyc2ns_data_init(&c2n->data[1]);
 
-	c2n->head = c2n->data;
-	c2n->tail = c2n->data;
+	seqcount_init(&c2n->seq);
 }
 
 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 {
-	struct cyc2ns_data *data, *tail;
+	struct cyc2ns_data data;
 	unsigned long long ns;
 
-	/*
-	 * See cyc2ns_read_*() for details; replicated in order to avoid
-	 * an extra few instructions that came with the abstraction.
-	 * Notable, it allows us to only do the __count and tail update
-	 * dance when its actually needed.
-	 */
-
-	preempt_disable_notrace();
-	data = this_cpu_read(cyc2ns.head);
-	tail = this_cpu_read(cyc2ns.tail);
-
-	if (likely(data == tail)) {
-		ns = data->cyc2ns_offset;
-		ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
-	} else {
-		data->__count++;
-
-		barrier();
-
-		ns = data->cyc2ns_offset;
-		ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
+	cyc2ns_read_begin(&data);
 
-		barrier();
+	ns = data.cyc2ns_offset;
+	ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
 
-		if (!--data->__count)
-			this_cpu_write(cyc2ns.tail, data);
-	}
-	preempt_enable_notrace();
+	cyc2ns_read_end();
 
 	return ns;
 }
 
-static void set_cyc2ns_scale(unsigned long khz, int cpu)
+static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
 {
-	unsigned long long tsc_now, ns_now;
-	struct cyc2ns_data *data;
+	unsigned long long ns_now;
+	struct cyc2ns_data data;
+	struct cyc2ns *c2n;
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -254,9 +150,6 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu)
 	if (!khz)
 		goto done;
 
-	data = cyc2ns_write_begin(cpu);
-
-	tsc_now = rdtsc();
 	ns_now = cycles_2_ns(tsc_now);
 
 	/*
@@ -264,7 +157,7 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu)
 	 * time function is continuous; see the comment near struct
 	 * cyc2ns_data.
 	 */
-	clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz,
+	clocks_calc_mult_shift(&data.cyc2ns_mul, &data.cyc2ns_shift, khz,
 			       NSEC_PER_MSEC, 0);
 
 	/*
@@ -273,20 +166,26 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu)
 	 * conversion algorithm shifting a 32-bit value (now specifies a 64-bit
 	 * value) - refer perf_event_mmap_page documentation in perf_event.h.
 	 */
-	if (data->cyc2ns_shift == 32) {
-		data->cyc2ns_shift = 31;
-		data->cyc2ns_mul >>= 1;
+	if (data.cyc2ns_shift == 32) {
+		data.cyc2ns_shift = 31;
+		data.cyc2ns_mul >>= 1;
 	}
 
-	data->cyc2ns_offset = ns_now -
-		mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift);
+	data.cyc2ns_offset = ns_now -
+		mul_u64_u32_shr(tsc_now, data.cyc2ns_mul, data.cyc2ns_shift);
+
+	c2n = per_cpu_ptr(&cyc2ns, cpu);
 
-	cyc2ns_write_end(cpu, data);
+	raw_write_seqcount_latch(&c2n->seq);
+	c2n->data[0] = data;
+	raw_write_seqcount_latch(&c2n->seq);
+	c2n->data[1] = data;
 
 done:
-	sched_clock_idle_wakeup_event(0);
+	sched_clock_idle_wakeup_event();
 	local_irq_restore(flags);
 }
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
@@ -374,6 +273,8 @@ static int __init tsc_setup(char *str)
 		tsc_clocksource_reliable = 1;
 	if (!strncmp(str, "noirqtime", 9))
 		no_sched_irq_time = 1;
+	if (!strcmp(str, "unstable"))
+		mark_tsc_unstable("boot parameter");
 	return 1;
 }
 
@@ -986,7 +887,6 @@ void tsc_restore_sched_clock_state(void)
 }
 
 #ifdef CONFIG_CPU_FREQ
-
 /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
  * changes.
  *
@@ -1027,7 +927,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
 			mark_tsc_unstable("cpufreq changes");
 
-		set_cyc2ns_scale(tsc_khz, freq->cpu);
+		set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc());
 	}
 
 	return 0;
@@ -1127,6 +1027,15 @@ static void tsc_cs_mark_unstable(struct clocksource *cs)
 	pr_info("Marking TSC unstable due to clocksource watchdog\n");
 }
 
+static void tsc_cs_tick_stable(struct clocksource *cs)
+{
+	if (tsc_unstable)
+		return;
+
+	if (using_native_sched_clock())
+		sched_clock_tick_stable();
+}
+
 /*
  * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
  */
@@ -1140,6 +1049,7 @@ static struct clocksource clocksource_tsc = {
 	.archdata               = { .vclock_mode = VCLOCK_TSC },
 	.resume			= tsc_resume,
 	.mark_unstable		= tsc_cs_mark_unstable,
+	.tick_stable		= tsc_cs_tick_stable,
 };
 
 void mark_tsc_unstable(char *reason)
@@ -1255,6 +1165,7 @@ static void tsc_refine_calibration_work(struct work_struct *work)
 	static int hpet;
 	u64 tsc_stop, ref_stop, delta;
 	unsigned long freq;
+	int cpu;
 
 	/* Don't bother refining TSC on unstable systems */
 	if (check_tsc_unstable())
@@ -1305,6 +1216,10 @@ static void tsc_refine_calibration_work(struct work_struct *work)
 	/* Inform the TSC deadline clockevent devices about the recalibration */
 	lapic_update_tsc_freq();
 
+	/* Update the sched_clock() rate to match the clocksource one */
+	for_each_possible_cpu(cpu)
+		set_cyc2ns_scale(tsc_khz, cpu, tsc_stop);
+
 out:
 	if (boot_cpu_has(X86_FEATURE_ART))
 		art_related_clocksource = &clocksource_tsc;
@@ -1350,7 +1265,7 @@ device_initcall(init_tsc_clocksource);
 
 void __init tsc_init(void)
 {
-	u64 lpj;
+	u64 lpj, cyc;
 	int cpu;
 
 	if (!boot_cpu_has(X86_FEATURE_TSC)) {
@@ -1390,9 +1305,10 @@ void __init tsc_init(void)
 	 * speed as the bootup CPU. (cpufreq notifiers will fix this
 	 * up if their speed diverges)
 	 */
+	cyc = rdtsc();
 	for_each_possible_cpu(cpu) {
 		cyc2ns_init(cpu);
-		set_cyc2ns_scale(tsc_khz, cpu);
+		set_cyc2ns_scale(tsc_khz, cpu, cyc);
 	}
 
 	if (tsc_disabled > 0)
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 82c6d7f1fd73..b9389d72b2f7 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -104,6 +104,11 @@ static inline unsigned long *last_frame(struct unwind_state *state)
 	return (unsigned long *)task_pt_regs(state->task) - 2;
 }
 
+static bool is_last_frame(struct unwind_state *state)
+{
+	return state->bp == last_frame(state);
+}
+
 #ifdef CONFIG_X86_32
 #define GCC_REALIGN_WORDS 3
 #else
@@ -115,16 +120,15 @@ static inline unsigned long *last_aligned_frame(struct unwind_state *state)
 	return last_frame(state) - GCC_REALIGN_WORDS;
 }
 
-static bool is_last_task_frame(struct unwind_state *state)
+static bool is_last_aligned_frame(struct unwind_state *state)
 {
 	unsigned long *last_bp = last_frame(state);
 	unsigned long *aligned_bp = last_aligned_frame(state);
 
 	/*
-	 * We have to check for the last task frame at two different locations
-	 * because gcc can occasionally decide to realign the stack pointer and
-	 * change the offset of the stack frame in the prologue of a function
-	 * called by head/entry code.  Examples:
+	 * GCC can occasionally decide to realign the stack pointer and change
+	 * the offset of the stack frame in the prologue of a function called
+	 * by head/entry code.  Examples:
 	 *
 	 * <start_secondary>:
 	 *      push   %edi
@@ -141,11 +145,38 @@ static bool is_last_task_frame(struct unwind_state *state)
 	 *      push   %rbp
 	 *      mov    %rsp,%rbp
 	 *
-	 * Note that after aligning the stack, it pushes a duplicate copy of
-	 * the return address before pushing the frame pointer.
+	 * After aligning the stack, it pushes a duplicate copy of the return
+	 * address before pushing the frame pointer.
+	 */
+	return (state->bp == aligned_bp && *(aligned_bp + 1) == *(last_bp + 1));
+}
+
+static bool is_last_ftrace_frame(struct unwind_state *state)
+{
+	unsigned long *last_bp = last_frame(state);
+	unsigned long *last_ftrace_bp = last_bp - 3;
+
+	/*
+	 * When unwinding from an ftrace handler of a function called by entry
+	 * code, the stack layout of the last frame is:
+	 *
+	 *   bp
+	 *   parent ret addr
+	 *   bp
+	 *   function ret addr
+	 *   parent ret addr
+	 *   pt_regs
+	 *   -----------------
 	 */
-	return (state->bp == last_bp ||
-		(state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1)));
+	return (state->bp == last_ftrace_bp &&
+		*state->bp == *(state->bp + 2) &&
+		*(state->bp + 1) == *(state->bp + 4));
+}
+
+static bool is_last_task_frame(struct unwind_state *state)
+{
+	return is_last_frame(state) || is_last_aligned_frame(state) ||
+	       is_last_ftrace_frame(state);
 }
 
 /*
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a181ae76c71c..59ca2eea522c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -780,18 +780,20 @@ out:
 static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
 {
 	struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
-	int j, nent = vcpu->arch.cpuid_nent;
+	struct kvm_cpuid_entry2 *ej;
+	int j = i;
+	int nent = vcpu->arch.cpuid_nent;
 
 	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
 	/* when no next entry is found, the current entry[i] is reselected */
-	for (j = i + 1; ; j = (j + 1) % nent) {
-		struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
-		if (ej->function == e->function) {
-			ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
-			return j;
-		}
-	}
-	return 0; /* silence gcc, even though control never reaches here */
+	do {
+		j = (j + 1) % nent;
+		ej = &vcpu->arch.cpuid_entries[j];
+	} while (ej->function != e->function);
+
+	ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
+
+	return j;
 }
 
 /* find an entry with matching function, matching index (if needed), and that
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c25cfaf584e7..80890dee66ce 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 		ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
 	}
 
+	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
 	return X86EMUL_CONTINUE;
 }
 
@@ -4173,7 +4174,7 @@ static int check_dr_write(struct x86_emulate_ctxt *ctxt)
 
 static int check_svme(struct x86_emulate_ctxt *ctxt)
 {
-	u64 efer;
+	u64 efer = 0;
 
 	ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c329d2894905..d24c8742d9b0 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1495,8 +1495,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
 
 static void cancel_hv_timer(struct kvm_lapic *apic)
 {
+	preempt_disable();
 	kvm_x86_ops->cancel_hv_timer(apic->vcpu);
 	apic->lapic_timer.hv_timer_in_use = false;
+	preempt_enable();
 }
 
 static bool start_hv_timer(struct kvm_lapic *apic)
@@ -1934,7 +1936,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	for (i = 0; i < KVM_APIC_LVT_NUM; i++)
 		kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
 	apic_update_lvtt(apic);
-	if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
+	if (kvm_vcpu_is_reset_bsp(vcpu) &&
+	    kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
 		kvm_lapic_set_reg(apic, APIC_LVT0,
 			     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 558676538fca..cb8225969255 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1498,6 +1498,21 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 		kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
 }
 
+/**
+ * kvm_arch_write_log_dirty - emulate dirty page logging
+ * @vcpu: Guest mode vcpu
+ *
+ * Emulate arch specific page modification logging for the
+ * nested hypervisor
+ */
+int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu)
+{
+	if (kvm_x86_ops->write_log_dirty)
+		return kvm_x86_ops->write_log_dirty(vcpu);
+
+	return 0;
+}
+
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 				    struct kvm_memory_slot *slot, u64 gfn)
 {
@@ -3683,12 +3698,15 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 	return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
 }
 
-static bool can_do_async_pf(struct kvm_vcpu *vcpu)
+bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
 {
 	if (unlikely(!lapic_in_kernel(vcpu) ||
 		     kvm_event_needs_reinjection(vcpu)))
 		return false;
 
+	if (is_guest_mode(vcpu))
+		return false;
+
 	return kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
@@ -3704,7 +3722,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 	if (!async)
 		return false; /* *pfn has correct page already */
 
-	if (!prefault && can_do_async_pf(vcpu)) {
+	if (!prefault && kvm_can_do_async_pf(vcpu)) {
 		trace_kvm_try_async_get_page(gva, gfn);
 		if (kvm_find_async_pf_gfn(vcpu, gfn)) {
 			trace_kvm_async_pf_doublefault(gva, gfn);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index d8ccb32f7308..330bf3a811fb 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -76,6 +76,7 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct);
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 			     bool accessed_dirty);
+bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {
@@ -202,4 +203,5 @@ void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 				    struct kvm_memory_slot *slot, u64 gfn);
+int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
 #endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 314d2071b337..b0454c7e4cff 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -226,6 +226,10 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
 		if (level == walker->level && write_fault &&
 				!(pte & PT_GUEST_DIRTY_MASK)) {
 			trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
+#if PTTYPE == PTTYPE_EPT
+			if (kvm_arch_write_log_dirty(vcpu))
+				return -EINVAL;
+#endif
 			pte |= PT_GUEST_DIRTY_MASK;
 		}
 		if (pte == orig_pte)
@@ -279,11 +283,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	pt_element_t pte;
 	pt_element_t __user *uninitialized_var(ptep_user);
 	gfn_t table_gfn;
-	unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
+	u64 pt_access, pte_access;
+	unsigned index, accessed_dirty, pte_pkey;
 	unsigned nested_access;
 	gpa_t pte_gpa;
 	bool have_ad;
 	int offset;
+	u64 walk_nx_mask = 0;
 	const int write_fault = access & PFERR_WRITE_MASK;
 	const int user_fault  = access & PFERR_USER_MASK;
 	const int fetch_fault = access & PFERR_FETCH_MASK;
@@ -298,6 +304,7 @@ retry_walk:
 	have_ad       = PT_HAVE_ACCESSED_DIRTY(mmu);
 
 #if PTTYPE == 64
+	walk_nx_mask = 1ULL << PT64_NX_SHIFT;
 	if (walker->level == PT32E_ROOT_LEVEL) {
 		pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
 		trace_kvm_mmu_paging_element(pte, walker->level);
@@ -309,8 +316,6 @@ retry_walk:
 	walker->max_level = walker->level;
 	ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
 
-	accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0;
-
 	/*
 	 * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
 	 * by the MOV to CR instruction are treated as reads and do not cause the
@@ -318,14 +323,14 @@ retry_walk:
 	 */
 	nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
 
-	pt_access = pte_access = ACC_ALL;
+	pte_access = ~0;
 	++walker->level;
 
 	do {
 		gfn_t real_gfn;
 		unsigned long host_addr;
 
-		pt_access &= pte_access;
+		pt_access = pte_access;
 		--walker->level;
 
 		index = PT_INDEX(addr, walker->level);
@@ -367,6 +372,12 @@ retry_walk:
 
 		trace_kvm_mmu_paging_element(pte, walker->level);
 
+		/*
+		 * Inverting the NX it lets us AND it like other
+		 * permission bits.
+		 */
+		pte_access = pt_access & (pte ^ walk_nx_mask);
+
 		if (unlikely(!FNAME(is_present_gpte)(pte)))
 			goto error;
 
@@ -375,14 +386,16 @@ retry_walk:
 			goto error;
 		}
 
-		accessed_dirty &= pte;
-		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
-
 		walker->ptes[walker->level - 1] = pte;
 	} while (!is_last_gpte(mmu, walker->level, pte));
 
 	pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
-	errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access);
+	accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
+
+	/* Convert to ACC_*_MASK flags for struct guest_walker.  */
+	walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask);
+	walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask);
+	errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
 	if (unlikely(errcode))
 		goto error;
 
@@ -399,7 +412,7 @@ retry_walk:
 	walker->gfn = real_gpa >> PAGE_SHIFT;
 
 	if (!write_fault)
-		FNAME(protect_clean_gpte)(mmu, &pte_access, pte);
+		FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte);
 	else
 		/*
 		 * On a write fault, fold the dirty bit into accessed_dirty.
@@ -417,10 +430,8 @@ retry_walk:
 			goto retry_walk;
 	}
 
-	walker->pt_access = pt_access;
-	walker->pte_access = pte_access;
 	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
-		 __func__, (u64)pte, pte_access, pt_access);
+		 __func__, (u64)pte, walker->pte_access, walker->pt_access);
 	return 1;
 
 error:
@@ -448,7 +459,7 @@ error:
 	 */
 	if (!(errcode & PFERR_RSVD_MASK)) {
 		vcpu->arch.exit_qualification &= 0x187;
-		vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3;
+		vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3;
 	}
 #endif
 	walker->fault.address = addr;
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
index 9d4a8504a95a..5ab4a364348e 100644
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/pmu_intel.c
@@ -294,7 +294,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 			((u64)1 << edx.split.bit_width_fixed) - 1;
 	}
 
-	pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
+	pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) |
 		(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
 	pmu->global_ctrl_mask = ~pmu->global_ctrl;
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c27ac6923a18..33460fcdeef9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -36,6 +36,7 @@
 #include <linux/slab.h>
 #include <linux/amd-iommu.h>
 #include <linux/hashtable.h>
+#include <linux/frame.h>
 
 #include <asm/apic.h>
 #include <asm/perf_event.h>
@@ -1272,7 +1273,8 @@ static void init_vmcb(struct vcpu_svm *svm)
 
 }
 
-static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, int index)
+static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
+				       unsigned int index)
 {
 	u64 *avic_physical_id_table;
 	struct kvm_arch *vm_data = &vcpu->kvm->arch;
@@ -1806,7 +1808,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
 	 * AMD's VMCB does not have an explicit unusable field, so emulate it
 	 * for cross vendor migration purposes by "not present"
 	 */
-	var->unusable = !var->present || (var->type == 0);
+	var->unusable = !var->present;
 
 	switch (seg) {
 	case VCPU_SREG_TR:
@@ -1839,6 +1841,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
 		 */
 		if (var->unusable)
 			var->db = 0;
+		/* This is symmetric with svm_set_segment() */
 		var->dpl = to_svm(vcpu)->vmcb->save.cpl;
 		break;
 	}
@@ -1979,18 +1982,14 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 	s->base = var->base;
 	s->limit = var->limit;
 	s->selector = var->selector;
-	if (var->unusable)
-		s->attrib = 0;
-	else {
-		s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
-		s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
-		s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
-		s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
-		s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
-		s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
-		s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
-		s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
-	}
+	s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
+	s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
+	s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
+	s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT;
+	s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
+	s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
+	s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
+	s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
 
 	/*
 	 * This is always accurate, except if SYSRET returned to a segment
@@ -1999,7 +1998,8 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 	 * would entail passing the CPL to userspace and back.
 	 */
 	if (seg == VCPU_SREG_SS)
-		svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
+		/* This is symmetric with svm_get_segment() */
+		svm->vmcb->save.cpl = (var->dpl & 3);
 
 	mark_dirty(svm->vmcb, VMCB_SEG);
 }
@@ -4907,6 +4907,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	mark_all_clean(svm->vmcb);
 }
+STACK_FRAME_NON_STANDARD(svm_vcpu_run);
 
 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c5fd459c4043..6dcc4873e435 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 #include <linux/tboot.h>
 #include <linux/hrtimer.h>
+#include <linux/frame.h>
 #include "kvm_cache_regs.h"
 #include "x86.h"
 
@@ -48,6 +49,7 @@
 #include <asm/kexec.h>
 #include <asm/apic.h>
 #include <asm/irq_remapping.h>
+#include <asm/mmu_context.h>
 
 #include "trace.h"
 #include "pmu.h"
@@ -248,6 +250,7 @@ struct __packed vmcs12 {
 	u64 xss_exit_bitmap;
 	u64 guest_physical_address;
 	u64 vmcs_link_pointer;
+	u64 pml_address;
 	u64 guest_ia32_debugctl;
 	u64 guest_ia32_pat;
 	u64 guest_ia32_efer;
@@ -369,6 +372,7 @@ struct __packed vmcs12 {
 	u16 guest_ldtr_selector;
 	u16 guest_tr_selector;
 	u16 guest_intr_status;
+	u16 guest_pml_index;
 	u16 host_es_selector;
 	u16 host_cs_selector;
 	u16 host_ss_selector;
@@ -407,6 +411,7 @@ struct nested_vmx {
 	/* Has the level1 guest done vmxon? */
 	bool vmxon;
 	gpa_t vmxon_ptr;
+	bool pml_full;
 
 	/* The guest-physical address of the current VMCS L1 keeps for L2 */
 	gpa_t current_vmptr;
@@ -593,6 +598,7 @@ struct vcpu_vmx {
 		int           gs_ldt_reload_needed;
 		int           fs_reload_needed;
 		u64           msr_host_bndcfgs;
+		unsigned long vmcs_host_cr3;	/* May not match real cr3 */
 		unsigned long vmcs_host_cr4;	/* May not match real cr4 */
 	} host_state;
 	struct {
@@ -742,6 +748,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
 	FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
 	FIELD(GUEST_INTR_STATUS, guest_intr_status),
+	FIELD(GUEST_PML_INDEX, guest_pml_index),
 	FIELD(HOST_ES_SELECTOR, host_es_selector),
 	FIELD(HOST_CS_SELECTOR, host_cs_selector),
 	FIELD(HOST_SS_SELECTOR, host_ss_selector),
@@ -767,6 +774,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
 	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
 	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
+	FIELD64(PML_ADDRESS, pml_address),
 	FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
 	FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
 	FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
@@ -1314,6 +1322,11 @@ static inline bool report_flexpriority(void)
 	return flexpriority_enabled;
 }
 
+static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
+{
+	return vmx_misc_cr3_count(to_vmx(vcpu)->nested.nested_vmx_misc_low);
+}
+
 static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
 {
 	return vmcs12->cpu_based_vm_exec_control & bit;
@@ -1348,6 +1361,11 @@ static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
 		vmx_xsaves_supported();
 }
 
+static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
+}
+
 static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
 {
 	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
@@ -2410,7 +2428,7 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
 	if (!(vmcs12->exception_bitmap & (1u << nr)))
 		return 0;
 
-	nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
+	nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
 			  vmcs_read32(VM_EXIT_INTR_INFO),
 			  vmcs_readl(EXIT_QUALIFICATION));
 	return 1;
@@ -2751,8 +2769,11 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 		vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
 			VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
 			VMX_EPT_1GB_PAGE_BIT;
-	       if (enable_ept_ad_bits)
+		if (enable_ept_ad_bits) {
+			vmx->nested.nested_vmx_secondary_ctls_high |=
+				SECONDARY_EXEC_ENABLE_PML;
 		       vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
+		}
 	} else
 		vmx->nested.nested_vmx_ept_caps = 0;
 
@@ -4994,12 +5015,19 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 	u32 low32, high32;
 	unsigned long tmpl;
 	struct desc_ptr dt;
-	unsigned long cr0, cr4;
+	unsigned long cr0, cr3, cr4;
 
 	cr0 = read_cr0();
 	WARN_ON(cr0 & X86_CR0_TS);
 	vmcs_writel(HOST_CR0, cr0);  /* 22.2.3 */
-	vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
+
+	/*
+	 * Save the most likely value for this task's CR3 in the VMCS.
+	 * We can't use __get_current_cr3_fast() because we're not atomic.
+	 */
+	cr3 = __read_cr3();
+	vmcs_writel(HOST_CR3, cr3);		/* 22.2.3  FIXME: shadow tables */
+	vmx->host_state.vmcs_host_cr3 = cr3;
 
 	/* Save the most likely value for this task's CR4 in the VMCS. */
 	cr4 = cr4_read_shadow();
@@ -6486,7 +6514,7 @@ static __init int hardware_setup(void)
 		enable_ept_ad_bits = 0;
 	}
 
-	if (!cpu_has_vmx_ept_ad_bits())
+	if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
 		enable_ept_ad_bits = 0;
 
 	if (!cpu_has_vmx_unrestricted_guest())
@@ -6896,97 +6924,21 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-/*
- * This function performs the various checks including
- * - if it's 4KB aligned
- * - No bits beyond the physical address width are set
- * - Returns 0 on success or else 1
- * (Intel SDM Section 30.3)
- */
-static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
-				  gpa_t *vmpointer)
+static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
 {
 	gva_t gva;
-	gpa_t vmptr;
 	struct x86_exception e;
-	struct page *page;
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 
 	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
 			vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
 		return 1;
 
-	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
-				sizeof(vmptr), &e)) {
+	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer,
+				sizeof(*vmpointer), &e)) {
 		kvm_inject_page_fault(vcpu, &e);
 		return 1;
 	}
 
-	switch (exit_reason) {
-	case EXIT_REASON_VMON:
-		/*
-		 * SDM 3: 24.11.5
-		 * The first 4 bytes of VMXON region contain the supported
-		 * VMCS revision identifier
-		 *
-		 * Note - IA32_VMX_BASIC[48] will never be 1
-		 * for the nested case;
-		 * which replaces physical address width with 32
-		 *
-		 */
-		if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
-			nested_vmx_failInvalid(vcpu);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-
-		page = nested_get_page(vcpu, vmptr);
-		if (page == NULL) {
-			nested_vmx_failInvalid(vcpu);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-		if (*(u32 *)kmap(page) != VMCS12_REVISION) {
-			kunmap(page);
-			nested_release_page_clean(page);
-			nested_vmx_failInvalid(vcpu);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-		kunmap(page);
-		nested_release_page_clean(page);
-		vmx->nested.vmxon_ptr = vmptr;
-		break;
-	case EXIT_REASON_VMCLEAR:
-		if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
-			nested_vmx_failValid(vcpu,
-					     VMXERR_VMCLEAR_INVALID_ADDRESS);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-
-		if (vmptr == vmx->nested.vmxon_ptr) {
-			nested_vmx_failValid(vcpu,
-					     VMXERR_VMCLEAR_VMXON_POINTER);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-		break;
-	case EXIT_REASON_VMPTRLD:
-		if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
-			nested_vmx_failValid(vcpu,
-					     VMXERR_VMPTRLD_INVALID_ADDRESS);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-
-		if (vmptr == vmx->nested.vmxon_ptr) {
-			nested_vmx_failValid(vcpu,
-					     VMXERR_VMPTRLD_VMXON_POINTER);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-		break;
-	default:
-		return 1; /* shouldn't happen */
-	}
-
-	if (vmpointer)
-		*vmpointer = vmptr;
 	return 0;
 }
 
@@ -7048,6 +7000,8 @@ out_msr_bitmap:
 static int handle_vmon(struct kvm_vcpu *vcpu)
 {
 	int ret;
+	gpa_t vmptr;
+	struct page *page;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
 		| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
@@ -7077,9 +7031,37 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
-	if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL))
+	if (nested_vmx_get_vmptr(vcpu, &vmptr))
 		return 1;
- 
+
+	/*
+	 * SDM 3: 24.11.5
+	 * The first 4 bytes of VMXON region contain the supported
+	 * VMCS revision identifier
+	 *
+	 * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
+	 * which replaces physical address width with 32
+	 */
+	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+		nested_vmx_failInvalid(vcpu);
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+
+	page = nested_get_page(vcpu, vmptr);
+	if (page == NULL) {
+		nested_vmx_failInvalid(vcpu);
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+	if (*(u32 *)kmap(page) != VMCS12_REVISION) {
+		kunmap(page);
+		nested_release_page_clean(page);
+		nested_vmx_failInvalid(vcpu);
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+	kunmap(page);
+	nested_release_page_clean(page);
+
+	vmx->nested.vmxon_ptr = vmptr;
 	ret = enter_vmx_operation(vcpu);
 	if (ret)
 		return ret;
@@ -7195,9 +7177,19 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
-	if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr))
+	if (nested_vmx_get_vmptr(vcpu, &vmptr))
 		return 1;
 
+	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+		nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+
+	if (vmptr == vmx->nested.vmxon_ptr) {
+		nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+
 	if (vmptr == vmx->nested.current_vmptr)
 		nested_release_vmcs12(vmx);
 
@@ -7527,9 +7519,19 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
-	if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMPTRLD, &vmptr))
+	if (nested_vmx_get_vmptr(vcpu, &vmptr))
 		return 1;
 
+	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+		nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+
+	if (vmptr == vmx->nested.vmxon_ptr) {
+		nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+
 	if (vmx->nested.current_vmptr != vmptr) {
 		struct vmcs12 *new_vmcs12;
 		struct page *page;
@@ -7895,11 +7897,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 {
 	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 	int cr = exit_qualification & 15;
-	int reg = (exit_qualification >> 8) & 15;
-	unsigned long val = kvm_register_readl(vcpu, reg);
+	int reg;
+	unsigned long val;
 
 	switch ((exit_qualification >> 4) & 3) {
 	case 0: /* mov to cr */
+		reg = (exit_qualification >> 8) & 15;
+		val = kvm_register_readl(vcpu, reg);
 		switch (cr) {
 		case 0:
 			if (vmcs12->cr0_guest_host_mask &
@@ -7954,6 +7958,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 		 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
 		 * cr0. Other attempted changes are ignored, with no exit.
 		 */
+		val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
 		if (vmcs12->cr0_guest_host_mask & 0xe &
 		    (val ^ vmcs12->cr0_read_shadow))
 			return true;
@@ -8114,7 +8119,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	case EXIT_REASON_PREEMPTION_TIMER:
 		return false;
 	case EXIT_REASON_PML_FULL:
-		/* We don't expose PML support to L1. */
+		/* We emulate PML support to L1. */
 		return false;
 	default:
 		return true;
@@ -8657,6 +8662,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 			);
 	}
 }
+STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
 
 static bool vmx_has_high_real_mode_segbase(void)
 {
@@ -8825,7 +8831,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	unsigned long debugctlmsr, cr4;
+	unsigned long debugctlmsr, cr3, cr4;
 
 	/* Don't enter VMX if guest state is invalid, let the exit handler
 	   start emulation until we arrive back to a valid state */
@@ -8847,6 +8853,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
+	cr3 = __get_current_cr3_fast();
+	if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
+		vmcs_writel(HOST_CR3, cr3);
+		vmx->host_state.vmcs_host_cr3 = cr3;
+	}
+
 	cr4 = cr4_read_shadow();
 	if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
 		vmcs_writel(HOST_CR4, cr4);
@@ -9033,6 +9045,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx_recover_nmi_blocking(vmx);
 	vmx_complete_interrupts(vmx);
 }
+STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
 
 static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
 {
@@ -9364,13 +9377,20 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
 		struct x86_exception *fault)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 exit_reason;
+	unsigned long exit_qualification = vcpu->arch.exit_qualification;
 
-	if (fault->error_code & PFERR_RSVD_MASK)
+	if (vmx->nested.pml_full) {
+		exit_reason = EXIT_REASON_PML_FULL;
+		vmx->nested.pml_full = false;
+		exit_qualification &= INTR_INFO_UNBLOCK_NMI;
+	} else if (fault->error_code & PFERR_RSVD_MASK)
 		exit_reason = EXIT_REASON_EPT_MISCONFIG;
 	else
 		exit_reason = EXIT_REASON_EPT_VIOLATION;
-	nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification);
+
+	nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
 	vmcs12->guest_physical_address = fault->address;
 }
 
@@ -9713,6 +9733,22 @@ static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
+					 struct vmcs12 *vmcs12)
+{
+	u64 address = vmcs12->pml_address;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) {
+		if (!nested_cpu_has_ept(vmcs12) ||
+		    !IS_ALIGNED(address, 4096)  ||
+		    address >> maxphyaddr)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
 				       struct vmx_msr_entry *e)
 {
@@ -9886,7 +9922,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 			  bool from_vmentry, u32 *entry_failure_code)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u32 exec_control;
+	u32 exec_control, vmcs12_exec_ctrl;
 
 	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
 	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -10017,8 +10053,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 				  SECONDARY_EXEC_APIC_REGISTER_VIRT);
 		if (nested_cpu_has(vmcs12,
-				CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
-			exec_control |= vmcs12->secondary_vm_exec_control;
+				   CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
+			vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
+				~SECONDARY_EXEC_ENABLE_PML;
+			exec_control |= vmcs12_exec_ctrl;
+		}
 
 		if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
 			vmcs_write64(EOI_EXIT_BITMAP0,
@@ -10248,6 +10287,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
+	if (nested_vmx_check_pml_controls(vcpu, vmcs12))
+		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
 	if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
 				vmx->nested.nested_vmx_procbased_ctls_low,
 				vmx->nested.nested_vmx_procbased_ctls_high) ||
@@ -10266,6 +10308,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 				vmx->nested.nested_vmx_entry_ctls_high))
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
+	if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
+		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
 	if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
 	    !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
 	    !nested_cr3_valid(vcpu, vmcs12->host_cr3))
@@ -11143,6 +11188,46 @@ static void vmx_flush_log_dirty(struct kvm *kvm)
 	kvm_flush_pml_buffers(kvm);
 }
 
+static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
+{
+	struct vmcs12 *vmcs12;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	gpa_t gpa;
+	struct page *page = NULL;
+	u64 *pml_address;
+
+	if (is_guest_mode(vcpu)) {
+		WARN_ON_ONCE(vmx->nested.pml_full);
+
+		/*
+		 * Check if PML is enabled for the nested guest.
+		 * Whether eptp bit 6 is set is already checked
+		 * as part of A/D emulation.
+		 */
+		vmcs12 = get_vmcs12(vcpu);
+		if (!nested_cpu_has_pml(vmcs12))
+			return 0;
+
+		if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
+			vmx->nested.pml_full = true;
+			return 1;
+		}
+
+		gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
+
+		page = nested_get_page(vcpu, vmcs12->pml_address);
+		if (!page)
+			return 0;
+
+		pml_address = kmap(page);
+		pml_address[vmcs12->guest_pml_index--] = gpa;
+		kunmap(page);
+		nested_release_page_clean(page);
+	}
+
+	return 0;
+}
+
 static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
 					   struct kvm_memory_slot *memslot,
 					   gfn_t offset, unsigned long mask)
@@ -11502,6 +11587,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
 	.flush_log_dirty = vmx_flush_log_dirty,
 	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
+	.write_log_dirty = vmx_write_pml_buffer,
 
 	.pre_block = vmx_pre_block,
 	.post_block = vmx_post_block,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 464da936c53d..0e846f0cb83b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1763,6 +1763,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
 {
 	struct kvm_arch *ka = &kvm->arch;
 	struct pvclock_vcpu_time_info hv_clock;
+	u64 ret;
 
 	spin_lock(&ka->pvclock_gtod_sync_lock);
 	if (!ka->use_master_clock) {
@@ -1774,10 +1775,17 @@ u64 get_kvmclock_ns(struct kvm *kvm)
 	hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
 	spin_unlock(&ka->pvclock_gtod_sync_lock);
 
+	/* both __this_cpu_read() and rdtsc() should be on the same cpu */
+	get_cpu();
+
 	kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
 			   &hv_clock.tsc_shift,
 			   &hv_clock.tsc_to_system_mul);
-	return __pvclock_read_cycles(&hv_clock, rdtsc());
+	ret = __pvclock_read_cycles(&hv_clock, rdtsc());
+
+	put_cpu();
+
+	return ret;
 }
 
 static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
@@ -3288,11 +3296,14 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 	}
 }
 
+#define XSAVE_MXCSR_OFFSET 24
+
 static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 					struct kvm_xsave *guest_xsave)
 {
 	u64 xstate_bv =
 		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
+	u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
 
 	if (boot_cpu_has(X86_FEATURE_XSAVE)) {
 		/*
@@ -3300,11 +3311,13 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 		 * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
 		 * with old userspace.
 		 */
-		if (xstate_bv & ~kvm_supported_xcr0())
+		if (xstate_bv & ~kvm_supported_xcr0() ||
+			mxcsr & ~mxcsr_feature_mask)
 			return -EINVAL;
 		load_xsave(vcpu, (u8 *)guest_xsave->region);
 	} else {
-		if (xstate_bv & ~XFEATURE_MASK_FPSSE)
+		if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
+			mxcsr & ~mxcsr_feature_mask)
 			return -EINVAL;
 		memcpy(&vcpu->arch.guest_fpu.state.fxsave,
 			guest_xsave->region, sizeof(struct fxregs_state));
@@ -4818,16 +4831,20 @@ emul_write:
 
 static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 {
-	/* TODO: String I/O for in kernel device */
-	int r;
+	int r = 0, i;
 
-	if (vcpu->arch.pio.in)
-		r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
-				    vcpu->arch.pio.size, pd);
-	else
-		r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
-				     vcpu->arch.pio.port, vcpu->arch.pio.size,
-				     pd);
+	for (i = 0; i < vcpu->arch.pio.count; i++) {
+		if (vcpu->arch.pio.in)
+			r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
+					    vcpu->arch.pio.size, pd);
+		else
+			r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
+					     vcpu->arch.pio.port, vcpu->arch.pio.size,
+					     pd);
+		if (r)
+			break;
+		pd += vcpu->arch.pio.size;
+	}
 	return r;
 }
 
@@ -4865,6 +4882,8 @@ static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 	if (vcpu->arch.pio.count)
 		goto data_avail;
 
+	memset(vcpu->arch.pio_data, 0, size * count);
+
 	ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
 	if (ret) {
 data_avail:
@@ -5048,6 +5067,8 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
 
 	if (var.unusable) {
 		memset(desc, 0, sizeof(*desc));
+		if (base3)
+			*base3 = 0;
 		return false;
 	}
 
@@ -5292,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
 	ctxt->eflags = kvm_get_rflags(vcpu);
+	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
 	ctxt->eip = kvm_rip_read(vcpu);
 	ctxt->mode = (!is_protmode(vcpu))		? X86EMUL_MODE_REAL :
 		     (ctxt->eflags & X86_EFLAGS_VM)	? X86EMUL_MODE_VM86 :
@@ -5507,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
 	return dr6;
 }
 
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
 {
 	struct kvm_run *kvm_run = vcpu->run;
 
-	/*
-	 * rflags is the old, "raw" value of the flags.  The new value has
-	 * not been saved yet.
-	 *
-	 * This is correct even for TF set by the guest, because "the
-	 * processor will not generate this exception after the instruction
-	 * that sets the TF flag".
-	 */
-	if (unlikely(rflags & X86_EFLAGS_TF)) {
-		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
-			kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
-						  DR6_RTM;
-			kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
-			kvm_run->debug.arch.exception = DB_VECTOR;
-			kvm_run->exit_reason = KVM_EXIT_DEBUG;
-			*r = EMULATE_USER_EXIT;
-		} else {
-			/*
-			 * "Certain debug exceptions may clear bit 0-3.  The
-			 * remaining contents of the DR6 register are never
-			 * cleared by the processor".
-			 */
-			vcpu->arch.dr6 &= ~15;
-			vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
-			kvm_queue_exception(vcpu, DB_VECTOR);
-		}
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+		kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+		kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+		kvm_run->debug.arch.exception = DB_VECTOR;
+		kvm_run->exit_reason = KVM_EXIT_DEBUG;
+		*r = EMULATE_USER_EXIT;
+	} else {
+		/*
+		 * "Certain debug exceptions may clear bit 0-3.  The
+		 * remaining contents of the DR6 register are never
+		 * cleared by the processor".
+		 */
+		vcpu->arch.dr6 &= ~15;
+		vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+		kvm_queue_exception(vcpu, DB_VECTOR);
 	}
 }
 
@@ -5546,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	int r = EMULATE_DONE;
 
 	kvm_x86_ops->skip_emulated_instruction(vcpu);
-	kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+
+	/*
+	 * rflags is the old, "raw" value of the flags.  The new value has
+	 * not been saved yet.
+	 *
+	 * This is correct even for TF set by the guest, because "the
+	 * processor will not generate this exception after the instruction
+	 * that sets the TF flag".
+	 */
+	if (unlikely(rflags & X86_EFLAGS_TF))
+		kvm_vcpu_do_singlestep(vcpu, &r);
 	return r == EMULATE_DONE;
 }
 EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
@@ -5705,8 +5727,9 @@ restart:
 		toggle_interruptibility(vcpu, ctxt->interruptibility);
 		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 		kvm_rip_write(vcpu, ctxt->eip);
-		if (r == EMULATE_DONE)
-			kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+		if (r == EMULATE_DONE &&
+		    (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+			kvm_vcpu_do_singlestep(vcpu, &r);
 		if (!ctxt->have_exception ||
 		    exception_type(ctxt->exception.vector) == EXCPT_TRAP)
 			__kvm_set_rflags(vcpu, ctxt->eflags);
@@ -8373,10 +8396,13 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.pv.pv_unhalted)
 		return true;
 
-	if (atomic_read(&vcpu->arch.nmi_queued))
+	if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
+	    (vcpu->arch.nmi_pending &&
+	     kvm_x86_ops->nmi_allowed(vcpu)))
 		return true;
 
-	if (kvm_test_request(KVM_REQ_SMI, vcpu))
+	if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
+	    (vcpu->arch.smi_pending && !is_smm(vcpu)))
 		return true;
 
 	if (kvm_arch_interrupt_allowed(vcpu) &&
@@ -8583,8 +8609,7 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
 	if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
 		return true;
 	else
-		return !kvm_event_needs_reinjection(vcpu) &&
-			kvm_x86_ops->interrupt_allowed(vcpu);
+		return kvm_can_do_async_pf(vcpu);
 }
 
 void kvm_arch_start_assignment(struct kvm *kvm)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index c5959576c315..020f75cc8cf6 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -37,7 +37,7 @@ ENTRY(copy_user_generic_unrolled)
 	movl %edx,%ecx
 	andl $63,%edx
 	shrl $6,%ecx
-	jz 17f
+	jz .L_copy_short_string
 1:	movq (%rsi),%r8
 2:	movq 1*8(%rsi),%r9
 3:	movq 2*8(%rsi),%r10
@@ -58,7 +58,8 @@ ENTRY(copy_user_generic_unrolled)
 	leaq 64(%rdi),%rdi
 	decl %ecx
 	jnz 1b
-17:	movl %edx,%ecx
+.L_copy_short_string:
+	movl %edx,%ecx
 	andl $7,%edx
 	shrl $3,%ecx
 	jz 20f
@@ -174,6 +175,8 @@ EXPORT_SYMBOL(copy_user_generic_string)
  */
 ENTRY(copy_user_enhanced_fast_string)
 	ASM_STAC
+	cmpl $64,%edx
+	jb .L_copy_short_string	/* less then 64 bytes, avoid the costly 'rep' */
 	movl %edx,%ecx
 1:	rep
 	movsb
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 7e48807b2fa1..45a53dfe1859 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -55,7 +55,7 @@ ENTRY(csum_partial_copy_generic)
 	movq  %r12, 3*8(%rsp)
 	movq  %r14, 4*8(%rsp)
 	movq  %r13, 5*8(%rsp)
-	movq  %rbp, 6*8(%rsp)
+	movq  %r15, 6*8(%rsp)
 
 	movq  %r8, (%rsp)
 	movq  %r9, 1*8(%rsp)
@@ -74,7 +74,7 @@ ENTRY(csum_partial_copy_generic)
 	/* main loop. clear in 64 byte blocks */
 	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
 	/* r11:	temp3, rdx: temp4, r12 loopcnt */
-	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
+	/* r10:	temp5, r15: temp6, r14 temp7, r13 temp8 */
 	.p2align 4
 .Lloop:
 	source
@@ -89,7 +89,7 @@ ENTRY(csum_partial_copy_generic)
 	source
 	movq  32(%rdi), %r10
 	source
-	movq  40(%rdi), %rbp
+	movq  40(%rdi), %r15
 	source
 	movq  48(%rdi), %r14
 	source
@@ -103,7 +103,7 @@ ENTRY(csum_partial_copy_generic)
 	adcq  %r11, %rax
 	adcq  %rdx, %rax
 	adcq  %r10, %rax
-	adcq  %rbp, %rax
+	adcq  %r15, %rax
 	adcq  %r14, %rax
 	adcq  %r13, %rax
 
@@ -121,7 +121,7 @@ ENTRY(csum_partial_copy_generic)
 	dest
 	movq %r10, 32(%rsi)
 	dest
-	movq %rbp, 40(%rsi)
+	movq %r15, 40(%rsi)
 	dest
 	movq %r14, 48(%rsi)
 	dest
@@ -203,7 +203,7 @@ ENTRY(csum_partial_copy_generic)
 	movq 3*8(%rsp), %r12
 	movq 4*8(%rsp), %r14
 	movq 5*8(%rsp), %r13
-	movq 6*8(%rsp), %rbp
+	movq 6*8(%rsp), %r15
 	addq $7*8, %rsp
 	ret
 
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index 5761a4f19455..ab2d1d73e9e7 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -5,6 +5,7 @@
  * kernel starts. This file is included in the compressed kernel and
  * normally linked in the regular.
  */
+#include <asm/asm.h>
 #include <asm/kaslr.h>
 #include <asm/msr.h>
 #include <asm/archrandom.h>
@@ -79,7 +80,7 @@ unsigned long kaslr_get_random_long(const char *purpose)
 	}
 
 	/* Circular multiply for better bit diffusion */
-	asm("mul %3"
+	asm(_ASM_MUL "%3"
 	    : "=a" (random), "=d" (raw)
 	    : "a" (random), "rm" (mix_const));
 	random += raw;
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index c81556409bbb..10ffa7e8519f 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -13,14 +13,14 @@
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
 	pushq %rbx
-	pushq %rbp
+	pushq %r12
 	movq	%rdi, %r10	/* Save pointer */
 	xorl	%r11d, %r11d	/* Return value */
 	movl    (%rdi), %eax
 	movl    4(%rdi), %ecx
 	movl    8(%rdi), %edx
 	movl    12(%rdi), %ebx
-	movl    20(%rdi), %ebp
+	movl    20(%rdi), %r12d
 	movl    24(%rdi), %esi
 	movl    28(%rdi), %edi
 1:	\op
@@ -29,10 +29,10 @@ ENTRY(\op\()_safe_regs)
 	movl    %ecx, 4(%r10)
 	movl    %edx, 8(%r10)
 	movl    %ebx, 12(%r10)
-	movl    %ebp, 20(%r10)
+	movl    %r12d, 20(%r10)
 	movl    %esi, 24(%r10)
 	movl    %edi, 28(%r10)
-	popq %rbp
+	popq %r12
 	popq %rbx
 	ret
 3:
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 767be7c76034..12e377184ee4 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -1009,7 +1009,7 @@ GrpTable: Grp15
 1: fxstor | RDGSBASE Ry (F3),(11B)
 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
-4: XSAVE
+4: XSAVE | ptwrite Ey (F3),(11B)
 5: XRSTOR | lfence (11B)
 6: XSAVEOPT | clwb (66) | mfence (11B)
 7: clflush | clflushopt (66) | sfence (11B)
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index 5e044d506b7a..a179254a5122 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -27,7 +27,7 @@ static inline struct desc_struct FPU_get_ldt_descriptor(unsigned seg)
 #ifdef CONFIG_MODIFY_LDT_SYSCALL
 	seg >>= 3;
 	mutex_lock(&current->mm->context.lock);
-	if (current->mm->context.ldt && seg < current->mm->context.ldt->size)
+	if (current->mm->context.ldt && seg < current->mm->context.ldt->nr_entries)
 		ret = current->mm->context.ldt->entries[seg];
 	mutex_unlock(&current->mm->context.lock);
 #endif
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 96d2b847e09e..0fbdcb64f9f8 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -2,7 +2,7 @@
 KCOV_INSTRUMENT_tlb.o	:= n
 
 obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-	    pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o
+	    pat.o pgtable.o physaddr.o setup_nx.o tlb.o
 
 # Make sure __phys_addr has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index bce6990b1d81..0470826d2bdc 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -431,7 +431,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
 				       bool checkwx)
 {
 #ifdef CONFIG_X86_64
-	pgd_t *start = (pgd_t *) &init_level4_pgt;
+	pgd_t *start = (pgd_t *) &init_top_pgt;
 #else
 	pgd_t *start = swapper_pg_dir;
 #endif
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 35ea061010a1..0ea8afcb929c 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -162,6 +162,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
 	if (fixup_exception(regs, trapnr))
 		return;
 
+	if (fixup_bug(regs, trapnr))
+		return;
+
 fail:
 	early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
 		     (unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8ad91a01cbc8..2a1fa10c6a98 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -346,7 +346,7 @@ static noinline int vmalloc_fault(unsigned long address)
 	 * Do _not_ use "current" here. We might be inside
 	 * an interrupt in the middle of a task switch..
 	 */
-	pgd_paddr = read_cr3();
+	pgd_paddr = read_cr3_pa();
 	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
 	if (!pmd_k)
 		return -1;
@@ -388,7 +388,7 @@ static bool low_pfn(unsigned long pfn)
 
 static void dump_pagetable(unsigned long address)
 {
-	pgd_t *base = __va(read_cr3());
+	pgd_t *base = __va(read_cr3_pa());
 	pgd_t *pgd = &base[pgd_index(address)];
 	p4d_t *p4d;
 	pud_t *pud;
@@ -451,7 +451,7 @@ static noinline int vmalloc_fault(unsigned long address)
 	 * happen within a race in page table update. In the later
 	 * case just flush:
 	 */
-	pgd = (pgd_t *)__va(read_cr3()) + pgd_index(address);
+	pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address);
 	pgd_ref = pgd_offset_k(address);
 	if (pgd_none(*pgd_ref))
 		return -1;
@@ -555,7 +555,7 @@ static int bad_address(void *p)
 
 static void dump_pagetable(unsigned long address)
 {
-	pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+	pgd_t *base = __va(read_cr3_pa());
 	pgd_t *pgd = base + pgd_index(address);
 	p4d_t *p4d;
 	pud_t *pud;
@@ -700,7 +700,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 		pgd_t *pgd;
 		pte_t *pte;
 
-		pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+		pgd = __va(read_cr3_pa());
 		pgd += pgd_index(address);
 
 		pte = lookup_address_in_pgd(pgd, address, &level);
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
deleted file mode 100644
index 456dfdfd2249..000000000000
--- a/arch/x86/mm/gup.c
+++ /dev/null
@@ -1,496 +0,0 @@
-/*
- * Lockless get_user_pages_fast for x86
- *
- * Copyright (C) 2008 Nick Piggin
- * Copyright (C) 2008 Novell Inc.
- */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmstat.h>
-#include <linux/highmem.h>
-#include <linux/swap.h>
-#include <linux/memremap.h>
-
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-
-static inline pte_t gup_get_pte(pte_t *ptep)
-{
-#ifndef CONFIG_X86_PAE
-	return READ_ONCE(*ptep);
-#else
-	/*
-	 * With get_user_pages_fast, we walk down the pagetables without taking
-	 * any locks.  For this we would like to load the pointers atomically,
-	 * but that is not possible (without expensive cmpxchg8b) on PAE.  What
-	 * we do have is the guarantee that a pte will only either go from not
-	 * present to present, or present to not present or both -- it will not
-	 * switch to a completely different present page without a TLB flush in
-	 * between; something that we are blocking by holding interrupts off.
-	 *
-	 * Setting ptes from not present to present goes:
-	 * ptep->pte_high = h;
-	 * smp_wmb();
-	 * ptep->pte_low = l;
-	 *
-	 * And present to not present goes:
-	 * ptep->pte_low = 0;
-	 * smp_wmb();
-	 * ptep->pte_high = 0;
-	 *
-	 * We must ensure here that the load of pte_low sees l iff pte_high
-	 * sees h. We load pte_high *after* loading pte_low, which ensures we
-	 * don't see an older value of pte_high.  *Then* we recheck pte_low,
-	 * which ensures that we haven't picked up a changed pte high. We might
-	 * have got rubbish values from pte_low and pte_high, but we are
-	 * guaranteed that pte_low will not have the present bit set *unless*
-	 * it is 'l'. And get_user_pages_fast only operates on present ptes, so
-	 * we're safe.
-	 *
-	 * gup_get_pte should not be used or copied outside gup.c without being
-	 * very careful -- it does not atomically load the pte or anything that
-	 * is likely to be useful for you.
-	 */
-	pte_t pte;
-
-retry:
-	pte.pte_low = ptep->pte_low;
-	smp_rmb();
-	pte.pte_high = ptep->pte_high;
-	smp_rmb();
-	if (unlikely(pte.pte_low != ptep->pte_low))
-		goto retry;
-
-	return pte;
-#endif
-}
-
-static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
-{
-	while ((*nr) - nr_start) {
-		struct page *page = pages[--(*nr)];
-
-		ClearPageReferenced(page);
-		put_page(page);
-	}
-}
-
-/*
- * 'pteval' can come from a pte, pmd, pud or p4d.  We only check
- * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the
- * same value on all 4 types.
- */
-static inline int pte_allows_gup(unsigned long pteval, int write)
-{
-	unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
-
-	if (write)
-		need_pte_bits |= _PAGE_RW;
-
-	if ((pteval & need_pte_bits) != need_pte_bits)
-		return 0;
-
-	/* Check memory protection keys permissions. */
-	if (!__pkru_allows_pkey(pte_flags_pkey(pteval), write))
-		return 0;
-
-	return 1;
-}
-
-/*
- * The performance critical leaf functions are made noinline otherwise gcc
- * inlines everything into a single function which results in too much
- * register pressure.
- */
-static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
-		unsigned long end, int write, struct page **pages, int *nr)
-{
-	struct dev_pagemap *pgmap = NULL;
-	int nr_start = *nr, ret = 0;
-	pte_t *ptep, *ptem;
-
-	/*
-	 * Keep the original mapped PTE value (ptem) around since we
-	 * might increment ptep off the end of the page when finishing
-	 * our loop iteration.
-	 */
-	ptem = ptep = pte_offset_map(&pmd, addr);
-	do {
-		pte_t pte = gup_get_pte(ptep);
-		struct page *page;
-
-		/* Similar to the PMD case, NUMA hinting must take slow path */
-		if (pte_protnone(pte))
-			break;
-
-		if (!pte_allows_gup(pte_val(pte), write))
-			break;
-
-		if (pte_devmap(pte)) {
-			pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
-			if (unlikely(!pgmap)) {
-				undo_dev_pagemap(nr, nr_start, pages);
-				break;
-			}
-		} else if (pte_special(pte))
-			break;
-
-		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-		page = pte_page(pte);
-		get_page(page);
-		put_dev_pagemap(pgmap);
-		SetPageReferenced(page);
-		pages[*nr] = page;
-		(*nr)++;
-
-	} while (ptep++, addr += PAGE_SIZE, addr != end);
-	if (addr == end)
-		ret = 1;
-	pte_unmap(ptem);
-
-	return ret;
-}
-
-static inline void get_head_page_multiple(struct page *page, int nr)
-{
-	VM_BUG_ON_PAGE(page != compound_head(page), page);
-	VM_BUG_ON_PAGE(page_count(page) == 0, page);
-	page_ref_add(page, nr);
-	SetPageReferenced(page);
-}
-
-static int __gup_device_huge(unsigned long pfn, unsigned long addr,
-		unsigned long end, struct page **pages, int *nr)
-{
-	int nr_start = *nr;
-	struct dev_pagemap *pgmap = NULL;
-
-	do {
-		struct page *page = pfn_to_page(pfn);
-
-		pgmap = get_dev_pagemap(pfn, pgmap);
-		if (unlikely(!pgmap)) {
-			undo_dev_pagemap(nr, nr_start, pages);
-			return 0;
-		}
-		SetPageReferenced(page);
-		pages[*nr] = page;
-		get_page(page);
-		put_dev_pagemap(pgmap);
-		(*nr)++;
-		pfn++;
-	} while (addr += PAGE_SIZE, addr != end);
-	return 1;
-}
-
-static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
-		unsigned long end, struct page **pages, int *nr)
-{
-	unsigned long fault_pfn;
-
-	fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
-	return __gup_device_huge(fault_pfn, addr, end, pages, nr);
-}
-
-static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
-		unsigned long end, struct page **pages, int *nr)
-{
-	unsigned long fault_pfn;
-
-	fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
-	return __gup_device_huge(fault_pfn, addr, end, pages, nr);
-}
-
-static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
-		unsigned long end, int write, struct page **pages, int *nr)
-{
-	struct page *head, *page;
-	int refs;
-
-	if (!pte_allows_gup(pmd_val(pmd), write))
-		return 0;
-
-	VM_BUG_ON(!pfn_valid(pmd_pfn(pmd)));
-	if (pmd_devmap(pmd))
-		return __gup_device_huge_pmd(pmd, addr, end, pages, nr);
-
-	/* hugepages are never "special" */
-	VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL);
-
-	refs = 0;
-	head = pmd_page(pmd);
-	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
-	do {
-		VM_BUG_ON_PAGE(compound_head(page) != head, page);
-		pages[*nr] = page;
-		(*nr)++;
-		page++;
-		refs++;
-	} while (addr += PAGE_SIZE, addr != end);
-	get_head_page_multiple(head, refs);
-
-	return 1;
-}
-
-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
-		int write, struct page **pages, int *nr)
-{
-	unsigned long next;
-	pmd_t *pmdp;
-
-	pmdp = pmd_offset(&pud, addr);
-	do {
-		pmd_t pmd = *pmdp;
-
-		next = pmd_addr_end(addr, end);
-		if (pmd_none(pmd))
-			return 0;
-		if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
-			/*
-			 * NUMA hinting faults need to be handled in the GUP
-			 * slowpath for accounting purposes and so that they
-			 * can be serialised against THP migration.
-			 */
-			if (pmd_protnone(pmd))
-				return 0;
-			if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
-				return 0;
-		} else {
-			if (!gup_pte_range(pmd, addr, next, write, pages, nr))
-				return 0;
-		}
-	} while (pmdp++, addr = next, addr != end);
-
-	return 1;
-}
-
-static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
-		unsigned long end, int write, struct page **pages, int *nr)
-{
-	struct page *head, *page;
-	int refs;
-
-	if (!pte_allows_gup(pud_val(pud), write))
-		return 0;
-
-	VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
-	if (pud_devmap(pud))
-		return __gup_device_huge_pud(pud, addr, end, pages, nr);
-
-	/* hugepages are never "special" */
-	VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL);
-
-	refs = 0;
-	head = pud_page(pud);
-	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
-	do {
-		VM_BUG_ON_PAGE(compound_head(page) != head, page);
-		pages[*nr] = page;
-		(*nr)++;
-		page++;
-		refs++;
-	} while (addr += PAGE_SIZE, addr != end);
-	get_head_page_multiple(head, refs);
-
-	return 1;
-}
-
-static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
-			int write, struct page **pages, int *nr)
-{
-	unsigned long next;
-	pud_t *pudp;
-
-	pudp = pud_offset(&p4d, addr);
-	do {
-		pud_t pud = *pudp;
-
-		next = pud_addr_end(addr, end);
-		if (pud_none(pud))
-			return 0;
-		if (unlikely(pud_large(pud))) {
-			if (!gup_huge_pud(pud, addr, next, write, pages, nr))
-				return 0;
-		} else {
-			if (!gup_pmd_range(pud, addr, next, write, pages, nr))
-				return 0;
-		}
-	} while (pudp++, addr = next, addr != end);
-
-	return 1;
-}
-
-static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
-			int write, struct page **pages, int *nr)
-{
-	unsigned long next;
-	p4d_t *p4dp;
-
-	p4dp = p4d_offset(&pgd, addr);
-	do {
-		p4d_t p4d = *p4dp;
-
-		next = p4d_addr_end(addr, end);
-		if (p4d_none(p4d))
-			return 0;
-		BUILD_BUG_ON(p4d_large(p4d));
-		if (!gup_pud_range(p4d, addr, next, write, pages, nr))
-			return 0;
-	} while (p4dp++, addr = next, addr != end);
-
-	return 1;
-}
-
-/*
- * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
- * back to the regular GUP.
- */
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
-			  struct page **pages)
-{
-	struct mm_struct *mm = current->mm;
-	unsigned long addr, len, end;
-	unsigned long next;
-	unsigned long flags;
-	pgd_t *pgdp;
-	int nr = 0;
-
-	start &= PAGE_MASK;
-	addr = start;
-	len = (unsigned long) nr_pages << PAGE_SHIFT;
-	end = start + len;
-	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
-					(void __user *)start, len)))
-		return 0;
-
-	/*
-	 * XXX: batch / limit 'nr', to avoid large irq off latency
-	 * needs some instrumenting to determine the common sizes used by
-	 * important workloads (eg. DB2), and whether limiting the batch size
-	 * will decrease performance.
-	 *
-	 * It seems like we're in the clear for the moment. Direct-IO is
-	 * the main guy that batches up lots of get_user_pages, and even
-	 * they are limited to 64-at-a-time which is not so many.
-	 */
-	/*
-	 * This doesn't prevent pagetable teardown, but does prevent
-	 * the pagetables and pages from being freed on x86.
-	 *
-	 * So long as we atomically load page table pointers versus teardown
-	 * (which we do on x86, with the above PAE exception), we can follow the
-	 * address down to the the page and take a ref on it.
-	 */
-	local_irq_save(flags);
-	pgdp = pgd_offset(mm, addr);
-	do {
-		pgd_t pgd = *pgdp;
-
-		next = pgd_addr_end(addr, end);
-		if (pgd_none(pgd))
-			break;
-		if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
-			break;
-	} while (pgdp++, addr = next, addr != end);
-	local_irq_restore(flags);
-
-	return nr;
-}
-
-/**
- * get_user_pages_fast() - pin user pages in memory
- * @start:	starting user address
- * @nr_pages:	number of pages from start to pin
- * @write:	whether pages will be written to
- * @pages:	array that receives pointers to the pages pinned.
- * 		Should be at least nr_pages long.
- *
- * Attempt to pin user pages in memory without taking mm->mmap_sem.
- * If not successful, it will fall back to taking the lock and
- * calling get_user_pages().
- *
- * Returns number of pages pinned. This may be fewer than the number
- * requested. If nr_pages is 0 or negative, returns 0. If no pages
- * were pinned, returns -errno.
- */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-			struct page **pages)
-{
-	struct mm_struct *mm = current->mm;
-	unsigned long addr, len, end;
-	unsigned long next;
-	pgd_t *pgdp;
-	int nr = 0;
-
-	start &= PAGE_MASK;
-	addr = start;
-	len = (unsigned long) nr_pages << PAGE_SHIFT;
-
-	end = start + len;
-	if (end < start)
-		goto slow_irqon;
-
-#ifdef CONFIG_X86_64
-	if (end >> __VIRTUAL_MASK_SHIFT)
-		goto slow_irqon;
-#endif
-
-	/*
-	 * XXX: batch / limit 'nr', to avoid large irq off latency
-	 * needs some instrumenting to determine the common sizes used by
-	 * important workloads (eg. DB2), and whether limiting the batch size
-	 * will decrease performance.
-	 *
-	 * It seems like we're in the clear for the moment. Direct-IO is
-	 * the main guy that batches up lots of get_user_pages, and even
-	 * they are limited to 64-at-a-time which is not so many.
-	 */
-	/*
-	 * This doesn't prevent pagetable teardown, but does prevent
-	 * the pagetables and pages from being freed on x86.
-	 *
-	 * So long as we atomically load page table pointers versus teardown
-	 * (which we do on x86, with the above PAE exception), we can follow the
-	 * address down to the the page and take a ref on it.
-	 */
-	local_irq_disable();
-	pgdp = pgd_offset(mm, addr);
-	do {
-		pgd_t pgd = *pgdp;
-
-		next = pgd_addr_end(addr, end);
-		if (pgd_none(pgd))
-			goto slow;
-		if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
-			goto slow;
-	} while (pgdp++, addr = next, addr != end);
-	local_irq_enable();
-
-	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
-	return nr;
-
-	{
-		int ret;
-
-slow:
-		local_irq_enable();
-slow_irqon:
-		/* Try to get the remaining pages with get_user_pages */
-		start += nr << PAGE_SHIFT;
-		pages += nr;
-
-		ret = get_user_pages_unlocked(start,
-					      (end - start) >> PAGE_SHIFT,
-					      pages, write ? FOLL_WRITE : 0);
-
-		/* Have to be a bit careful with return values */
-		if (nr > 0) {
-			if (ret < 0)
-				ret = nr;
-			else
-				ret += nr;
-		}
-
-		return ret;
-	}
-}
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 302f43fd9c28..adad702b39cd 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -148,7 +148,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 	if (mm->get_unmapped_area == arch_get_unmapped_area)
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 04210a29dd60..adab1595f4bd 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -13,7 +13,7 @@ static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
 		if (pmd_present(*pmd))
 			continue;
 
-		set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
+		set_pmd(pmd, __pmd((addr - info->offset) | info->page_flag));
 	}
 }
 
@@ -30,6 +30,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 		if (next > end)
 			next = end;
 
+		if (info->direct_gbpages) {
+			pud_t pudval;
+
+			if (pud_present(*pud))
+				continue;
+
+			addr &= PUD_MASK;
+			pudval = __pud((addr - info->offset) | info->page_flag);
+			set_pud(pud, pudval);
+			continue;
+		}
+
 		if (pud_present(*pud)) {
 			pmd = pmd_offset(pud, 0);
 			ident_pmd_init(info, pmd, addr, next);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index cbc87ea98751..673541eb3b3f 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -161,16 +161,16 @@ static int page_size_mask;
 
 static void __init probe_page_size_mask(void)
 {
-#if !defined(CONFIG_KMEMCHECK)
 	/*
 	 * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will
 	 * use small pages.
 	 * This will simplify cpa(), which otherwise needs to support splitting
 	 * large pages into small in interrupt context, etc.
 	 */
-	if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled())
+	if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled() && !IS_ENABLED(CONFIG_KMEMCHECK))
 		page_size_mask |= 1 << PG_LEVEL_2M;
-#endif
+	else
+		direct_gbpages = 0;
 
 	/* Enable PSE if available */
 	if (boot_cpu_has(X86_FEATURE_PSE))
@@ -811,10 +811,8 @@ void __init zone_sizes_init(void)
 }
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
-#ifdef CONFIG_SMP
-	.active_mm = &init_mm,
+	.loaded_mm = &init_mm,
 	.state = 0,
-#endif
 	.cr4 = ~0UL,	/* fail hard if we screw up cr4 shadow initialization */
 };
 EXPORT_SYMBOL_GPL(cpu_tlbstate);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 41270b96403d..dae6a5e5ad4a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -92,12 +92,50 @@ __setup("noexec32=", nonx32_setup);
  * When memory was added make sure all the processes MM have
  * suitable PGD entries in the local PGD level page.
  */
+#ifdef CONFIG_X86_5LEVEL
 void sync_global_pgds(unsigned long start, unsigned long end)
 {
-	unsigned long address;
+	unsigned long addr;
+
+	for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
+		const pgd_t *pgd_ref = pgd_offset_k(addr);
+		struct page *page;
+
+		/* Check for overflow */
+		if (addr < start)
+			break;
+
+		if (pgd_none(*pgd_ref))
+			continue;
+
+		spin_lock(&pgd_lock);
+		list_for_each_entry(page, &pgd_list, lru) {
+			pgd_t *pgd;
+			spinlock_t *pgt_lock;
+
+			pgd = (pgd_t *)page_address(page) + pgd_index(addr);
+			/* the pgt_lock only for Xen */
+			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
+			spin_lock(pgt_lock);
+
+			if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
+				BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+
+			if (pgd_none(*pgd))
+				set_pgd(pgd, *pgd_ref);
 
-	for (address = start; address <= end; address += PGDIR_SIZE) {
-		pgd_t *pgd_ref = pgd_offset_k(address);
+			spin_unlock(pgt_lock);
+		}
+		spin_unlock(&pgd_lock);
+	}
+}
+#else
+void sync_global_pgds(unsigned long start, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
+		pgd_t *pgd_ref = pgd_offset_k(addr);
 		const p4d_t *p4d_ref;
 		struct page *page;
 
@@ -106,7 +144,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 		 * handle synchonization on p4d level.
 		 */
 		BUILD_BUG_ON(pgd_none(*pgd_ref));
-		p4d_ref = p4d_offset(pgd_ref, address);
+		p4d_ref = p4d_offset(pgd_ref, addr);
 
 		if (p4d_none(*p4d_ref))
 			continue;
@@ -117,8 +155,8 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 			p4d_t *p4d;
 			spinlock_t *pgt_lock;
 
-			pgd = (pgd_t *)page_address(page) + pgd_index(address);
-			p4d = p4d_offset(pgd, address);
+			pgd = (pgd_t *)page_address(page) + pgd_index(addr);
+			p4d = p4d_offset(pgd, addr);
 			/* the pgt_lock only for Xen */
 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 			spin_lock(pgt_lock);
@@ -135,6 +173,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 		spin_unlock(&pgd_lock);
 	}
 }
+#endif
 
 /*
  * NOTE: This function is marked __ref because it calls __init function
@@ -585,6 +624,57 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 	return paddr_last;
 }
 
+static unsigned long __meminit
+phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
+	      unsigned long page_size_mask)
+{
+	unsigned long paddr_next, paddr_last = paddr_end;
+	unsigned long vaddr = (unsigned long)__va(paddr);
+	int i = p4d_index(vaddr);
+
+	if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+		return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
+
+	for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
+		p4d_t *p4d;
+		pud_t *pud;
+
+		vaddr = (unsigned long)__va(paddr);
+		p4d = p4d_page + p4d_index(vaddr);
+		paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
+
+		if (paddr >= paddr_end) {
+			if (!after_bootmem &&
+			    !e820__mapped_any(paddr & P4D_MASK, paddr_next,
+					     E820_TYPE_RAM) &&
+			    !e820__mapped_any(paddr & P4D_MASK, paddr_next,
+					     E820_TYPE_RESERVED_KERN))
+				set_p4d(p4d, __p4d(0));
+			continue;
+		}
+
+		if (!p4d_none(*p4d)) {
+			pud = pud_offset(p4d, 0);
+			paddr_last = phys_pud_init(pud, paddr,
+					paddr_end,
+					page_size_mask);
+			__flush_tlb_all();
+			continue;
+		}
+
+		pud = alloc_low_page();
+		paddr_last = phys_pud_init(pud, paddr, paddr_end,
+					   page_size_mask);
+
+		spin_lock(&init_mm.page_table_lock);
+		p4d_populate(&init_mm, p4d, pud);
+		spin_unlock(&init_mm.page_table_lock);
+	}
+	__flush_tlb_all();
+
+	return paddr_last;
+}
+
 /*
  * Create page table mapping for the physical memory for specific physical
  * addresses. The virtual and physical addresses have to be aligned on PMD level
@@ -606,26 +696,26 @@ kernel_physical_mapping_init(unsigned long paddr_start,
 	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
 		pgd_t *pgd = pgd_offset_k(vaddr);
 		p4d_t *p4d;
-		pud_t *pud;
 
 		vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
 
-		BUILD_BUG_ON(pgd_none(*pgd));
-		p4d = p4d_offset(pgd, vaddr);
-		if (p4d_val(*p4d)) {
-			pud = (pud_t *)p4d_page_vaddr(*p4d);
-			paddr_last = phys_pud_init(pud, __pa(vaddr),
+		if (pgd_val(*pgd)) {
+			p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+			paddr_last = phys_p4d_init(p4d, __pa(vaddr),
 						   __pa(vaddr_end),
 						   page_size_mask);
 			continue;
 		}
 
-		pud = alloc_low_page();
-		paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
+		p4d = alloc_low_page();
+		paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
 					   page_size_mask);
 
 		spin_lock(&init_mm.page_table_lock);
-		p4d_populate(&init_mm, p4d, pud);
+		if (IS_ENABLED(CONFIG_X86_5LEVEL))
+			pgd_populate(&init_mm, pgd, p4d);
+		else
+			p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
 		spin_unlock(&init_mm.page_table_lock);
 		pgd_changed = true;
 	}
@@ -990,7 +1080,13 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
 
 		pud_base = pud_offset(p4d, 0);
 		remove_pud_table(pud_base, addr, next, direct);
-		free_pud_table(pud_base, p4d);
+		/*
+		 * For 4-level page tables we do not want to free PUDs, but in the
+		 * 5-level case we should free them. This code will have to change
+		 * to adapt for boot-time switching between 4 and 5 level page tables.
+		 */
+		if (CONFIG_PGTABLE_LEVELS == 5)
+			free_pud_table(pud_base, p4d);
 	}
 
 	if (direct)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index bbc558b88a88..4c1b5fd0c7ad 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -424,7 +424,7 @@ static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
 {
 	/* Don't assume we're using swapper_pg_dir at this point */
-	pgd_t *base = __va(read_cr3());
+	pgd_t *base = __va(read_cr3_pa());
 	pgd_t *pgd = &base[pgd_index(addr)];
 	p4d_t *p4d = p4d_offset(pgd, addr);
 	pud_t *pud = pud_offset(p4d, addr);
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 0c7d8129bed6..88215ac16b24 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -12,7 +12,7 @@
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 
-extern pgd_t early_level4_pgt[PTRS_PER_PGD];
+extern pgd_t early_top_pgt[PTRS_PER_PGD];
 extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
 static int __init map_range(struct range *range)
@@ -109,8 +109,8 @@ void __init kasan_early_init(void)
 	for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++)
 		kasan_zero_p4d[i] = __p4d(p4d_val);
 
-	kasan_map_early_shadow(early_level4_pgt);
-	kasan_map_early_shadow(init_level4_pgt);
+	kasan_map_early_shadow(early_top_pgt);
+	kasan_map_early_shadow(init_top_pgt);
 }
 
 void __init kasan_init(void)
@@ -121,8 +121,8 @@ void __init kasan_init(void)
 	register_die_notifier(&kasan_die_notifier);
 #endif
 
-	memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt));
-	load_cr3(early_level4_pgt);
+	memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
+	load_cr3(early_top_pgt);
 	__flush_tlb_all();
 
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
@@ -148,7 +148,7 @@ void __init kasan_init(void)
 	kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
 			(void *)KASAN_SHADOW_END);
 
-	load_cr3(init_level4_pgt);
+	load_cr3(init_top_pgt);
 	__flush_tlb_all();
 
 	/*
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index aed206475aa7..af599167fe3c 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -6,12 +6,12 @@
  *
  * Entropy is generated using the KASLR early boot functions now shared in
  * the lib directory (originally written by Kees Cook). Randomization is
- * done on PGD & PUD page table levels to increase possible addresses. The
- * physical memory mapping code was adapted to support PUD level virtual
- * addresses. This implementation on the best configuration provides 30,000
- * possible virtual addresses in average for each memory region. An additional
- * low memory page is used to ensure each CPU can start with a PGD aligned
- * virtual address (for realmode).
+ * done on PGD & P4D/PUD page table levels to increase possible addresses.
+ * The physical memory mapping code was adapted to support P4D/PUD level
+ * virtual addresses. This implementation on the best configuration provides
+ * 30,000 possible virtual addresses in average for each memory region.
+ * An additional low memory page is used to ensure each CPU can start with
+ * a PGD aligned virtual address (for realmode).
  *
  * The order of each memory region is not changed. The feature looks at
  * the available space for the regions based on different configuration
@@ -70,7 +70,7 @@ static __initdata struct kaslr_memory_region {
 	unsigned long *base;
 	unsigned long size_tb;
 } kaslr_regions[] = {
-	{ &page_offset_base, 64/* Maximum */ },
+	{ &page_offset_base, 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT) /* Maximum */ },
 	{ &vmalloc_base, VMALLOC_SIZE_TB },
 	{ &vmemmap_base, 1 },
 };
@@ -142,7 +142,10 @@ void __init kernel_randomize_memory(void)
 		 */
 		entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
 		prandom_bytes_state(&rand_state, &rand, sizeof(rand));
-		entropy = (rand % (entropy + 1)) & PUD_MASK;
+		if (IS_ENABLED(CONFIG_X86_5LEVEL))
+			entropy = (rand % (entropy + 1)) & P4D_MASK;
+		else
+			entropy = (rand % (entropy + 1)) & PUD_MASK;
 		vaddr += entropy;
 		*kaslr_regions[i].base = vaddr;
 
@@ -151,27 +154,21 @@ void __init kernel_randomize_memory(void)
 		 * randomization alignment.
 		 */
 		vaddr += get_padding(&kaslr_regions[i]);
-		vaddr = round_up(vaddr + 1, PUD_SIZE);
+		if (IS_ENABLED(CONFIG_X86_5LEVEL))
+			vaddr = round_up(vaddr + 1, P4D_SIZE);
+		else
+			vaddr = round_up(vaddr + 1, PUD_SIZE);
 		remain_entropy -= entropy;
 	}
 }
 
-/*
- * Create PGD aligned trampoline table to allow real mode initialization
- * of additional CPUs. Consume only 1 low memory page.
- */
-void __meminit init_trampoline(void)
+static void __meminit init_trampoline_pud(void)
 {
 	unsigned long paddr, paddr_next;
 	pgd_t *pgd;
 	pud_t *pud_page, *pud_page_tramp;
 	int i;
 
-	if (!kaslr_memory_enabled()) {
-		init_trampoline_default();
-		return;
-	}
-
 	pud_page_tramp = alloc_low_page();
 
 	paddr = 0;
@@ -192,3 +189,49 @@ void __meminit init_trampoline(void)
 	set_pgd(&trampoline_pgd_entry,
 		__pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
 }
+
+static void __meminit init_trampoline_p4d(void)
+{
+	unsigned long paddr, paddr_next;
+	pgd_t *pgd;
+	p4d_t *p4d_page, *p4d_page_tramp;
+	int i;
+
+	p4d_page_tramp = alloc_low_page();
+
+	paddr = 0;
+	pgd = pgd_offset_k((unsigned long)__va(paddr));
+	p4d_page = (p4d_t *) pgd_page_vaddr(*pgd);
+
+	for (i = p4d_index(paddr); i < PTRS_PER_P4D; i++, paddr = paddr_next) {
+		p4d_t *p4d, *p4d_tramp;
+		unsigned long vaddr = (unsigned long)__va(paddr);
+
+		p4d_tramp = p4d_page_tramp + p4d_index(paddr);
+		p4d = p4d_page + p4d_index(vaddr);
+		paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
+
+		*p4d_tramp = *p4d;
+	}
+
+	set_pgd(&trampoline_pgd_entry,
+		__pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
+}
+
+/*
+ * Create PGD aligned trampoline table to allow real mode initialization
+ * of additional CPUs. Consume only 1 low memory page.
+ */
+void __meminit init_trampoline(void)
+{
+
+	if (!kaslr_memory_enabled()) {
+		init_trampoline_default();
+		return;
+	}
+
+	if (IS_ENABLED(CONFIG_X86_5LEVEL))
+		init_trampoline_p4d();
+	else
+		init_trampoline_pud();
+}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 19ad095b41df..797295e792b2 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -74,9 +74,6 @@ static int mmap_is_legacy(void)
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
 
-	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
-		return 1;
-
 	return sysctl_legacy_va_layout;
 }
 
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 6b7ce6279133..aca6295350f3 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -100,5 +100,6 @@ void __init initmem_init(void)
 	printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
 
+	__vmalloc_start_set = true;
 	setup_bootmem_allocator();
 }
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 1dcd2be4cce4..c8520b2c62d2 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -186,7 +186,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
 	unsigned int i, level;
 	unsigned long addr;
 
-	BUG_ON(irqs_disabled());
+	BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
 	WARN_ON(PAGE_ALIGN(start) != start);
 
 	on_each_cpu(__cpa_flush_range, NULL, 1);
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index 38868adf07ea..f6ae6830b341 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -9,7 +9,7 @@
 #include <linux/mmiotrace.h>
 
 static unsigned long mmio_address;
-module_param(mmio_address, ulong, 0);
+module_param_hw(mmio_address, ulong, iomem, 0);
 MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB "
 				"(or 8 MB if read_far is non-zero).");
 
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6e7bedf69af7..014d07a80053 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -15,7 +15,7 @@
 #include <linux/debugfs.h>
 
 /*
- *	Smarter SMP flushing macros.
+ *	TLB flushing, formerly SMP-only
  *		c/o Linus Torvalds.
  *
  *	These mean you can really definitely utterly forget about
@@ -28,39 +28,28 @@
  *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
  */
 
-#ifdef CONFIG_SMP
-
-struct flush_tlb_info {
-	struct mm_struct *flush_mm;
-	unsigned long flush_start;
-	unsigned long flush_end;
-};
-
-/*
- * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
- */
 void leave_mm(int cpu)
 {
-	struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm);
+	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+
+	/*
+	 * It's plausible that we're in lazy TLB mode while our mm is init_mm.
+	 * If so, our callers still expect us to flush the TLB, but there
+	 * aren't any user TLB entries in init_mm to worry about.
+	 *
+	 * This needs to happen before any other sanity checks due to
+	 * intel_idle's shenanigans.
+	 */
+	if (loaded_mm == &init_mm)
+		return;
+
 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
 		BUG();
-	if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
-		cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
-		load_cr3(swapper_pg_dir);
-		/*
-		 * This gets called in the idle path where RCU
-		 * functions differently.  Tracing normally
-		 * uses RCU, so we have to call the tracepoint
-		 * specially here.
-		 */
-		trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-	}
+
+	switch_mm(NULL, &init_mm, NULL);
 }
 EXPORT_SYMBOL_GPL(leave_mm);
 
-#endif /* CONFIG_SMP */
-
 void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	       struct task_struct *tsk)
 {
@@ -75,216 +64,167 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			struct task_struct *tsk)
 {
 	unsigned cpu = smp_processor_id();
+	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
 
-	if (likely(prev != next)) {
-		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
-			/*
-			 * If our current stack is in vmalloc space and isn't
-			 * mapped in the new pgd, we'll double-fault.  Forcibly
-			 * map it.
-			 */
-			unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
-
-			pgd_t *pgd = next->pgd + stack_pgd_index;
-
-			if (unlikely(pgd_none(*pgd)))
-				set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
-		}
+	/*
+	 * NB: The scheduler will call us with prev == next when
+	 * switching from lazy TLB mode to normal mode if active_mm
+	 * isn't changing.  When this happens, there is no guarantee
+	 * that CR3 (and hence cpu_tlbstate.loaded_mm) matches next.
+	 *
+	 * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
+	 */
 
-#ifdef CONFIG_SMP
-		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
-		this_cpu_write(cpu_tlbstate.active_mm, next);
-#endif
+	this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 
-		cpumask_set_cpu(cpu, mm_cpumask(next));
+	if (real_prev == next) {
+		/*
+		 * There's nothing to do: we always keep the per-mm control
+		 * regs in sync with cpu_tlbstate.loaded_mm.  Just
+		 * sanity-check mm_cpumask.
+		 */
+		if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next))))
+			cpumask_set_cpu(cpu, mm_cpumask(next));
+		return;
+	}
 
+	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
 		/*
-		 * Re-load page tables.
-		 *
-		 * This logic has an ordering constraint:
-		 *
-		 *  CPU 0: Write to a PTE for 'next'
-		 *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
-		 *  CPU 1: set bit 1 in next's mm_cpumask
-		 *  CPU 1: load from the PTE that CPU 0 writes (implicit)
-		 *
-		 * We need to prevent an outcome in which CPU 1 observes
-		 * the new PTE value and CPU 0 observes bit 1 clear in
-		 * mm_cpumask.  (If that occurs, then the IPI will never
-		 * be sent, and CPU 0's TLB will contain a stale entry.)
-		 *
-		 * The bad outcome can occur if either CPU's load is
-		 * reordered before that CPU's store, so both CPUs must
-		 * execute full barriers to prevent this from happening.
-		 *
-		 * Thus, switch_mm needs a full barrier between the
-		 * store to mm_cpumask and any operation that could load
-		 * from next->pgd.  TLB fills are special and can happen
-		 * due to instruction fetches or for no reason at all,
-		 * and neither LOCK nor MFENCE orders them.
-		 * Fortunately, load_cr3() is serializing and gives the
-		 * ordering guarantee we need.
-		 *
+		 * If our current stack is in vmalloc space and isn't
+		 * mapped in the new pgd, we'll double-fault.  Forcibly
+		 * map it.
 		 */
-		load_cr3(next->pgd);
+		unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
 
-		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+		pgd_t *pgd = next->pgd + stack_pgd_index;
 
-		/* Stop flush ipis for the previous mm */
-		cpumask_clear_cpu(cpu, mm_cpumask(prev));
+		if (unlikely(pgd_none(*pgd)))
+			set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
+	}
 
-		/* Load per-mm CR4 state */
-		load_mm_cr4(next);
+	this_cpu_write(cpu_tlbstate.loaded_mm, next);
 
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
-		/*
-		 * Load the LDT, if the LDT is different.
-		 *
-		 * It's possible that prev->context.ldt doesn't match
-		 * the LDT register.  This can happen if leave_mm(prev)
-		 * was called and then modify_ldt changed
-		 * prev->context.ldt but suppressed an IPI to this CPU.
-		 * In this case, prev->context.ldt != NULL, because we
-		 * never set context.ldt to NULL while the mm still
-		 * exists.  That means that next->context.ldt !=
-		 * prev->context.ldt, because mms never share an LDT.
-		 */
-		if (unlikely(prev->context.ldt != next->context.ldt))
-			load_mm_ldt(next);
-#endif
+	WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
+	cpumask_set_cpu(cpu, mm_cpumask(next));
+
+	/*
+	 * Re-load page tables.
+	 *
+	 * This logic has an ordering constraint:
+	 *
+	 *  CPU 0: Write to a PTE for 'next'
+	 *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
+	 *  CPU 1: set bit 1 in next's mm_cpumask
+	 *  CPU 1: load from the PTE that CPU 0 writes (implicit)
+	 *
+	 * We need to prevent an outcome in which CPU 1 observes
+	 * the new PTE value and CPU 0 observes bit 1 clear in
+	 * mm_cpumask.  (If that occurs, then the IPI will never
+	 * be sent, and CPU 0's TLB will contain a stale entry.)
+	 *
+	 * The bad outcome can occur if either CPU's load is
+	 * reordered before that CPU's store, so both CPUs must
+	 * execute full barriers to prevent this from happening.
+	 *
+	 * Thus, switch_mm needs a full barrier between the
+	 * store to mm_cpumask and any operation that could load
+	 * from next->pgd.  TLB fills are special and can happen
+	 * due to instruction fetches or for no reason at all,
+	 * and neither LOCK nor MFENCE orders them.
+	 * Fortunately, load_cr3() is serializing and gives the
+	 * ordering guarantee we need.
+	 */
+	load_cr3(next->pgd);
+
+	/*
+	 * This gets called via leave_mm() in the idle path where RCU
+	 * functions differently.  Tracing normally uses RCU, so we have to
+	 * call the tracepoint specially here.
+	 */
+	trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+
+	/* Stop flush ipis for the previous mm */
+	WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
+		     real_prev != &init_mm);
+	cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
+
+	/* Load per-mm CR4 and LDTR state */
+	load_mm_cr4(next);
+	switch_ldt(real_prev, next);
+}
+
+static void flush_tlb_func_common(const struct flush_tlb_info *f,
+				  bool local, enum tlb_flush_reason reason)
+{
+	/* This code cannot presently handle being reentered. */
+	VM_WARN_ON(!irqs_disabled());
+
+	if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
+		leave_mm(smp_processor_id());
+		return;
 	}
-#ifdef CONFIG_SMP
-	  else {
-		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
-		BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
-
-		if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
-			/*
-			 * On established mms, the mm_cpumask is only changed
-			 * from irq context, from ptep_clear_flush() while in
-			 * lazy tlb mode, and here. Irqs are blocked during
-			 * schedule, protecting us from simultaneous changes.
-			 */
-			cpumask_set_cpu(cpu, mm_cpumask(next));
 
-			/*
-			 * We were in lazy tlb mode and leave_mm disabled
-			 * tlb flush IPI delivery. We must reload CR3
-			 * to make sure to use no freed page tables.
-			 *
-			 * As above, load_cr3() is serializing and orders TLB
-			 * fills with respect to the mm_cpumask write.
-			 */
-			load_cr3(next->pgd);
-			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-			load_mm_cr4(next);
-			load_mm_ldt(next);
+	if (f->end == TLB_FLUSH_ALL) {
+		local_flush_tlb();
+		if (local)
+			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+		trace_tlb_flush(reason, TLB_FLUSH_ALL);
+	} else {
+		unsigned long addr;
+		unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
+		addr = f->start;
+		while (addr < f->end) {
+			__flush_tlb_single(addr);
+			addr += PAGE_SIZE;
 		}
+		if (local)
+			count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
+		trace_tlb_flush(reason, nr_pages);
 	}
-#endif
 }
 
-#ifdef CONFIG_SMP
+static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
+{
+	const struct flush_tlb_info *f = info;
 
-/*
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) set cpu_tlbstate to TLBSTATE_OK
- *	Now the tlb flush NMI handler flush_tlb_func won't call leave_mm
- *	if cpu0 was in lazy tlb mode.
- * 1a2) update cpu active_mm
- *	Now cpu0 accepts tlb flushes for the new mm.
- * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask);
- *	Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask);
- *	Stop ipi delivery for the old mm. This is not synchronized with
- *	the other cpus, but flush_tlb_func ignore flush ipis for the wrong
- *	mm, and in the worst case we perform a superfluous tlb flush.
- * 1b) thread switch without mm change
- *	cpu active_mm is correct, cpu0 already handles flush ipis.
- * 1b1) set cpu_tlbstate to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- *	Atomically set the bit [other cpus will start sending flush ipis],
- *	and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- *   runs in kernel space, the cpu could load tlb entries for user space
- *   pages.
- *
- * The good news is that cpu_tlbstate is local to each cpu, no
- * write/read ordering problems.
- */
+	flush_tlb_func_common(f, true, reason);
+}
 
-/*
- * TLB flush funcation:
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- */
-static void flush_tlb_func(void *info)
+static void flush_tlb_func_remote(void *info)
 {
-	struct flush_tlb_info *f = info;
+	const struct flush_tlb_info *f = info;
 
 	inc_irq_stat(irq_tlb_count);
 
-	if (f->flush_mm && f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
+	if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
 		return;
 
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
-	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
-		if (f->flush_end == TLB_FLUSH_ALL) {
-			local_flush_tlb();
-			trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
-		} else {
-			unsigned long addr;
-			unsigned long nr_pages =
-				(f->flush_end - f->flush_start) / PAGE_SIZE;
-			addr = f->flush_start;
-			while (addr < f->flush_end) {
-				__flush_tlb_single(addr);
-				addr += PAGE_SIZE;
-			}
-			trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
-		}
-	} else
-		leave_mm(smp_processor_id());
-
+	flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
 }
 
 void native_flush_tlb_others(const struct cpumask *cpumask,
-				 struct mm_struct *mm, unsigned long start,
-				 unsigned long end)
+			     const struct flush_tlb_info *info)
 {
-	struct flush_tlb_info info;
-
-	info.flush_mm = mm;
-	info.flush_start = start;
-	info.flush_end = end;
-
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
-	if (end == TLB_FLUSH_ALL)
+	if (info->end == TLB_FLUSH_ALL)
 		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
 	else
 		trace_tlb_flush(TLB_REMOTE_SEND_IPI,
-				(end - start) >> PAGE_SHIFT);
+				(info->end - info->start) >> PAGE_SHIFT);
 
 	if (is_uv_system()) {
 		unsigned int cpu;
 
 		cpu = smp_processor_id();
-		cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu);
+		cpumask = uv_flush_tlb_others(cpumask, info);
 		if (cpumask)
-			smp_call_function_many(cpumask, flush_tlb_func,
-								&info, 1);
+			smp_call_function_many(cpumask, flush_tlb_func_remote,
+					       (void *)info, 1);
 		return;
 	}
-	smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
+	smp_call_function_many(cpumask, flush_tlb_func_remote,
+			       (void *)info, 1);
 }
 
 /*
@@ -302,85 +242,41 @@ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 				unsigned long end, unsigned long vmflag)
 {
-	unsigned long addr;
-	/* do a global flush by default */
-	unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
-
-	preempt_disable();
+	int cpu;
 
-	if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
-		base_pages_to_flush = (end - start) >> PAGE_SHIFT;
-	if (base_pages_to_flush > tlb_single_page_flush_ceiling)
-		base_pages_to_flush = TLB_FLUSH_ALL;
+	struct flush_tlb_info info = {
+		.mm = mm,
+	};
 
-	if (current->active_mm != mm) {
-		/* Synchronize with switch_mm. */
-		smp_mb();
+	cpu = get_cpu();
 
-		goto out;
-	}
-
-	if (!current->mm) {
-		leave_mm(smp_processor_id());
+	/* Synchronize with switch_mm. */
+	smp_mb();
 
-		/* Synchronize with switch_mm. */
-		smp_mb();
-
-		goto out;
-	}
-
-	/*
-	 * Both branches below are implicit full barriers (MOV to CR or
-	 * INVLPG) that synchronize with switch_mm.
-	 */
-	if (base_pages_to_flush == TLB_FLUSH_ALL) {
-		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
-		local_flush_tlb();
+	/* Should we flush just the requested range? */
+	if ((end != TLB_FLUSH_ALL) &&
+	    !(vmflag & VM_HUGETLB) &&
+	    ((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
+		info.start = start;
+		info.end = end;
 	} else {
-		/* flush range by one by one 'invlpg' */
-		for (addr = start; addr < end;	addr += PAGE_SIZE) {
-			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
-			__flush_tlb_single(addr);
-		}
-	}
-	trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
-out:
-	if (base_pages_to_flush == TLB_FLUSH_ALL) {
-		start = 0UL;
-		end = TLB_FLUSH_ALL;
+		info.start = 0UL;
+		info.end = TLB_FLUSH_ALL;
 	}
-	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
-		flush_tlb_others(mm_cpumask(mm), mm, start, end);
-	preempt_enable();
-}
 
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
-{
-	struct mm_struct *mm = vma->vm_mm;
-
-	preempt_disable();
-
-	if (current->active_mm == mm) {
-		if (current->mm) {
-			/*
-			 * Implicit full barrier (INVLPG) that synchronizes
-			 * with switch_mm.
-			 */
-			__flush_tlb_one(start);
-		} else {
-			leave_mm(smp_processor_id());
-
-			/* Synchronize with switch_mm. */
-			smp_mb();
-		}
+	if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
+		VM_WARN_ON(irqs_disabled());
+		local_irq_disable();
+		flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
+		local_irq_enable();
 	}
 
-	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
-		flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE);
-
-	preempt_enable();
+	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
+		flush_tlb_others(mm_cpumask(mm), &info);
+	put_cpu();
 }
 
+
 static void do_flush_tlb_all(void *info)
 {
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
@@ -401,7 +297,7 @@ static void do_kernel_range_flush(void *info)
 	unsigned long addr;
 
 	/* flush range by one by one 'invlpg' */
-	for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE)
+	for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
 		__flush_tlb_single(addr);
 }
 
@@ -410,16 +306,40 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 
 	/* Balance as user space task's flush, a bit conservative */
 	if (end == TLB_FLUSH_ALL ||
-	    (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
+	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
 		on_each_cpu(do_flush_tlb_all, NULL, 1);
 	} else {
 		struct flush_tlb_info info;
-		info.flush_start = start;
-		info.flush_end = end;
+		info.start = start;
+		info.end = end;
 		on_each_cpu(do_kernel_range_flush, &info, 1);
 	}
 }
 
+void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+	struct flush_tlb_info info = {
+		.mm = NULL,
+		.start = 0UL,
+		.end = TLB_FLUSH_ALL,
+	};
+
+	int cpu = get_cpu();
+
+	if (cpumask_test_cpu(cpu, &batch->cpumask)) {
+		VM_WARN_ON(irqs_disabled());
+		local_irq_disable();
+		flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
+		local_irq_enable();
+	}
+
+	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
+		flush_tlb_others(&batch->cpumask, &info);
+	cpumask_clear(&batch->cpumask);
+
+	put_cpu();
+}
+
 static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
 			     size_t count, loff_t *ppos)
 {
@@ -465,5 +385,3 @@ static int __init create_tlb_single_page_flush_ceiling(void)
 	return 0;
 }
 late_initcall(create_tlb_single_page_flush_ceiling);
-
-#endif /* CONFIG_SMP */
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
index 90568c33ddb0..fefb4b619598 100644
--- a/arch/x86/net/Makefile
+++ b/arch/x86/net/Makefile
@@ -1,4 +1,6 @@
 #
 # Arch-specific network modules
 #
+OBJECT_FILES_NON_STANDARD_bpf_jit.o += y
+
 obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index f1d83b34c329..2f56e1ed61c3 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -1,4 +1,5 @@
 OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y
 
 obj-$(CONFIG_EFI) 		+= quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
 obj-$(CONFIG_EARLY_PRINTK_EFI)	+= early_printk.o
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 7e76a4d8304b..f084d8718ac4 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -828,9 +828,11 @@ static void __init kexec_enter_virtual_mode(void)
 
 	/*
 	 * We don't do virtual mode, since we don't do runtime services, on
-	 * non-native EFI
+	 * non-native EFI. With efi=old_map, we don't do runtime services in
+	 * kexec kernel because in the initial boot something else might
+	 * have been mapped at these virtual addresses.
 	 */
-	if (!efi_is_native()) {
+	if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) {
 		efi_memmap_unmap();
 		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 		return;
@@ -1012,7 +1014,6 @@ static void __init __efi_enter_virtual_mode(void)
 	 * necessary relocation fixups for the new virtual addresses.
 	 */
 	efi_runtime_update_mappings();
-	efi_dump_pagetable();
 
 	/* clean DUMMY object */
 	efi_delete_dummy_variable();
@@ -1027,6 +1028,8 @@ void __init efi_enter_virtual_mode(void)
 		kexec_enter_virtual_mode();
 	else
 		__efi_enter_virtual_mode();
+
+	efi_dump_pagetable();
 }
 
 /*
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 3481268da3d0..52f7faa1538f 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -44,7 +44,14 @@ int __init efi_alloc_page_tables(void)
 }
 
 void efi_sync_low_kernel_mappings(void) {}
-void __init efi_dump_pagetable(void) {}
+
+void __init efi_dump_pagetable(void)
+{
+#ifdef CONFIG_EFI_PGT_DUMP
+	ptdump_walk_pgd_level(NULL, swapper_pg_dir);
+#endif
+}
+
 int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 {
 	return 0;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index c488625c9712..9bf72f5bfedb 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -71,14 +71,16 @@ static void __init early_code_mapping_set_exec(int executable)
 
 pgd_t * __init efi_call_phys_prolog(void)
 {
-	unsigned long vaddress;
-	pgd_t *save_pgd;
+	unsigned long vaddr, addr_pgd, addr_p4d, addr_pud;
+	pgd_t *save_pgd, *pgd_k, *pgd_efi;
+	p4d_t *p4d, *p4d_k, *p4d_efi;
+	pud_t *pud;
 
 	int pgd;
-	int n_pgds;
+	int n_pgds, i, j;
 
 	if (!efi_enabled(EFI_OLD_MEMMAP)) {
-		save_pgd = (pgd_t *)read_cr3();
+		save_pgd = (pgd_t *)__read_cr3();
 		write_cr3((unsigned long)efi_scratch.efi_pgt);
 		goto out;
 	}
@@ -88,10 +90,49 @@ pgd_t * __init efi_call_phys_prolog(void)
 	n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
 	save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
 
+	/*
+	 * Build 1:1 identity mapping for efi=old_map usage. Note that
+	 * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while
+	 * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical
+	 * address X, the pud_index(X) != pud_index(__va(X)), we can only copy
+	 * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping.
+	 * This means here we can only reuse the PMD tables of the direct mapping.
+	 */
 	for (pgd = 0; pgd < n_pgds; pgd++) {
-		save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
-		vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
-		set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
+		addr_pgd = (unsigned long)(pgd * PGDIR_SIZE);
+		vaddr = (unsigned long)__va(pgd * PGDIR_SIZE);
+		pgd_efi = pgd_offset_k(addr_pgd);
+		save_pgd[pgd] = *pgd_efi;
+
+		p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd);
+		if (!p4d) {
+			pr_err("Failed to allocate p4d table!\n");
+			goto out;
+		}
+
+		for (i = 0; i < PTRS_PER_P4D; i++) {
+			addr_p4d = addr_pgd + i * P4D_SIZE;
+			p4d_efi = p4d + p4d_index(addr_p4d);
+
+			pud = pud_alloc(&init_mm, p4d_efi, addr_p4d);
+			if (!pud) {
+				pr_err("Failed to allocate pud table!\n");
+				goto out;
+			}
+
+			for (j = 0; j < PTRS_PER_PUD; j++) {
+				addr_pud = addr_p4d + j * PUD_SIZE;
+
+				if (addr_pud > (max_pfn << PAGE_SHIFT))
+					break;
+
+				vaddr = (unsigned long)__va(addr_pud);
+
+				pgd_k = pgd_offset_k(vaddr);
+				p4d_k = p4d_offset(pgd_k, vaddr);
+				pud[j] = *pud_offset(p4d_k, vaddr);
+			}
+		}
 	}
 out:
 	__flush_tlb_all();
@@ -104,8 +145,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 	/*
 	 * After the lock is released, the original page table is restored.
 	 */
-	int pgd_idx;
+	int pgd_idx, i;
 	int nr_pgds;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 
 	if (!efi_enabled(EFI_OLD_MEMMAP)) {
 		write_cr3((unsigned long)save_pgd);
@@ -115,9 +159,28 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 
 	nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
 
-	for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
+	for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
+		pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
 		set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
 
+		if (!(pgd_val(*pgd) & _PAGE_PRESENT))
+			continue;
+
+		for (i = 0; i < PTRS_PER_P4D; i++) {
+			p4d = p4d_offset(pgd,
+					 pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
+
+			if (!(p4d_val(*p4d) & _PAGE_PRESENT))
+				continue;
+
+			pud = (pud_t *)p4d_page_vaddr(*p4d);
+			pud_free(&init_mm, pud);
+		}
+
+		p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+		p4d_free(&init_mm, p4d);
+	}
+
 	kfree(save_pgd);
 
 	__flush_tlb_all();
@@ -526,7 +589,10 @@ void __init efi_runtime_update_mappings(void)
 void __init efi_dump_pagetable(void)
 {
 #ifdef CONFIG_EFI_PGT_DUMP
-	ptdump_walk_pgd_level(NULL, efi_pgd);
+	if (efi_enabled(EFI_OLD_MEMMAP))
+		ptdump_walk_pgd_level(NULL, swapper_pg_dir);
+	else
+		ptdump_walk_pgd_level(NULL, efi_pgd);
 #endif
 }
 
@@ -583,7 +649,7 @@ efi_status_t efi_thunk_set_virtual_address_map(
 	efi_sync_low_kernel_mappings();
 	local_irq_save(flags);
 
-	efi_scratch.prev_cr3 = read_cr3();
+	efi_scratch.prev_cr3 = __read_cr3();
 	write_cr3((unsigned long)efi_scratch.efi_pgt);
 	__flush_tlb_all();
 
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 26615991d69c..8a99a2e96537 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -15,12 +15,66 @@
 #include <asm/e820/api.h>
 #include <asm/efi.h>
 #include <asm/uv/uv.h>
+#include <asm/cpu_device_id.h>
 
 #define EFI_MIN_RESERVE 5120
 
 #define EFI_DUMMY_GUID \
 	EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9)
 
+#define QUARK_CSH_SIGNATURE		0x5f435348	/* _CSH */
+#define QUARK_SECURITY_HEADER_SIZE	0x400
+
+/*
+ * Header prepended to the standard EFI capsule on Quark systems the are based
+ * on Intel firmware BSP.
+ * @csh_signature:	Unique identifier to sanity check signed module
+ * 			presence ("_CSH").
+ * @version:		Current version of CSH used. Should be one for Quark A0.
+ * @modulesize:		Size of the entire module including the module header
+ * 			and payload.
+ * @security_version_number_index: Index of SVN to use for validation of signed
+ * 			module.
+ * @security_version_number: Used to prevent against roll back of modules.
+ * @rsvd_module_id:	Currently unused for Clanton (Quark).
+ * @rsvd_module_vendor:	Vendor Identifier. For Intel products value is
+ * 			0x00008086.
+ * @rsvd_date:		BCD representation of build date as yyyymmdd, where
+ * 			yyyy=4 digit year, mm=1-12, dd=1-31.
+ * @headersize:		Total length of the header including including any
+ * 			padding optionally added by the signing tool.
+ * @hash_algo:		What Hash is used in the module signing.
+ * @cryp_algo:		What Crypto is used in the module signing.
+ * @keysize:		Total length of the key data including including any
+ * 			padding optionally added by the signing tool.
+ * @signaturesize:	Total length of the signature including including any
+ * 			padding optionally added by the signing tool.
+ * @rsvd_next_header:	32-bit pointer to the next Secure Boot Module in the
+ * 			chain, if there is a next header.
+ * @rsvd:		Reserved, padding structure to required size.
+ *
+ * See also QuartSecurityHeader_t in
+ * Quark_EDKII_v1.2.1.1/QuarkPlatformPkg/Include/QuarkBootRom.h
+ * from https://downloadcenter.intel.com/download/23197/Intel-Quark-SoC-X1000-Board-Support-Package-BSP
+ */
+struct quark_security_header {
+	u32 csh_signature;
+	u32 version;
+	u32 modulesize;
+	u32 security_version_number_index;
+	u32 security_version_number;
+	u32 rsvd_module_id;
+	u32 rsvd_module_vendor;
+	u32 rsvd_date;
+	u32 headersize;
+	u32 hash_algo;
+	u32 cryp_algo;
+	u32 keysize;
+	u32 signaturesize;
+	u32 rsvd_next_header;
+	u32 rsvd[2];
+};
+
 static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 };
 
 static bool efi_no_storage_paranoia;
@@ -360,6 +414,9 @@ void __init efi_free_boot_services(void)
 		free_bootmem_late(start, size);
 	}
 
+	if (!num_entries)
+		return;
+
 	new_size = efi.memmap.desc_size * num_entries;
 	new_phys = efi_memmap_alloc(num_entries);
 	if (!new_phys) {
@@ -501,3 +558,86 @@ bool efi_poweroff_required(void)
 {
 	return acpi_gbl_reduced_hardware || acpi_no_s5;
 }
+
+#ifdef CONFIG_EFI_CAPSULE_QUIRK_QUARK_CSH
+
+static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff,
+				  size_t hdr_bytes)
+{
+	struct quark_security_header *csh = *pkbuff;
+
+	/* Only process data block that is larger than the security header */
+	if (hdr_bytes < sizeof(struct quark_security_header))
+		return 0;
+
+	if (csh->csh_signature != QUARK_CSH_SIGNATURE ||
+	    csh->headersize != QUARK_SECURITY_HEADER_SIZE)
+		return 1;
+
+	/* Only process data block if EFI header is included */
+	if (hdr_bytes < QUARK_SECURITY_HEADER_SIZE +
+			sizeof(efi_capsule_header_t))
+		return 0;
+
+	pr_debug("Quark security header detected\n");
+
+	if (csh->rsvd_next_header != 0) {
+		pr_err("multiple Quark security headers not supported\n");
+		return -EINVAL;
+	}
+
+	*pkbuff += csh->headersize;
+	cap_info->total_size = csh->headersize;
+
+	/*
+	 * Update the first page pointer to skip over the CSH header.
+	 */
+	cap_info->pages[0] += csh->headersize;
+
+	return 1;
+}
+
+#define ICPU(family, model, quirk_handler) \
+	{ X86_VENDOR_INTEL, family, model, X86_FEATURE_ANY, \
+	  (unsigned long)&quirk_handler }
+
+static const struct x86_cpu_id efi_capsule_quirk_ids[] = {
+	ICPU(5, 9, qrk_capsule_setup_info),	/* Intel Quark X1000 */
+	{ }
+};
+
+int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
+			   size_t hdr_bytes)
+{
+	int (*quirk_handler)(struct capsule_info *, void **, size_t);
+	const struct x86_cpu_id *id;
+	int ret;
+
+	if (hdr_bytes < sizeof(efi_capsule_header_t))
+		return 0;
+
+	cap_info->total_size = 0;
+
+	id = x86_match_cpu(efi_capsule_quirk_ids);
+	if (id) {
+		/*
+		 * The quirk handler is supposed to return
+		 *  - a value > 0 if the setup should continue, after advancing
+		 *    kbuff as needed
+		 *  - 0 if not enough hdr_bytes are available yet
+		 *  - a negative error code otherwise
+		 */
+		quirk_handler = (typeof(quirk_handler))id->driver_data;
+		ret = quirk_handler(cap_info, &kbuff, hdr_bytes);
+		if (ret <= 0)
+			return ret;
+	}
+
+	memcpy(&cap_info->header, kbuff, sizeof(cap_info->header));
+
+	cap_info->total_size += cap_info->header.imagesize;
+
+	return __efi_capsule_setup_info(cap_info);
+}
+
+#endif
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index c5350fd27d70..0668aaff8bfe 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -77,7 +77,7 @@ static int xo1_power_state_enter(suspend_state_t pm_state)
 
 asmlinkage __visible int xo1_do_sleep(u8 sleep_state)
 {
-	void *pgd_addr = __va(read_cr3());
+	void *pgd_addr = __va(read_cr3_pa());
 
 	/* Program wakeup mask (using dword access to CS5536_PM1_EN) */
 	outl(wakeup_mask << 16, acpi_base + CS5536_PM1_STS);
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 42e65fee5673..2983faab5b18 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -456,12 +456,13 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
  */
 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 {
-	struct cyc2ns_data *data = cyc2ns_read_begin();
+	struct cyc2ns_data data;
 	unsigned long long ns;
 
-	ns = mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
+	cyc2ns_read_begin(&data);
+	ns = mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
+	cyc2ns_read_end();
 
-	cyc2ns_read_end(data);
 	return ns;
 }
 
@@ -470,12 +471,13 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
  */
 static inline unsigned long long ns_2_cycles(unsigned long long ns)
 {
-	struct cyc2ns_data *data = cyc2ns_read_begin();
+	struct cyc2ns_data data;
 	unsigned long long cyc;
 
-	cyc = (ns << data->cyc2ns_shift) / data->cyc2ns_mul;
+	cyc2ns_read_begin(&data);
+	cyc = (ns << data.cyc2ns_shift) / data.cyc2ns_mul;
+	cyc2ns_read_end();
 
-	cyc2ns_read_end(data);
 	return cyc;
 }
 
@@ -1121,11 +1123,9 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
  * done.  The returned pointer is valid till preemption is re-enabled.
  */
 const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
-						struct mm_struct *mm,
-						unsigned long start,
-						unsigned long end,
-						unsigned int cpu)
+					  const struct flush_tlb_info *info)
 {
+	unsigned int cpu = smp_processor_id();
 	int locals = 0, remotes = 0, hubs = 0;
 	struct bau_desc *bau_desc;
 	struct cpumask *flush_mask;
@@ -1179,8 +1179,8 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 
 	record_send_statistics(stat, locals, hubs, remotes, bau_desc);
 
-	if (!end || (end - start) <= PAGE_SIZE)
-		address = start;
+	if (!info->end || (info->end - info->start) <= PAGE_SIZE)
+		address = info->start;
 	else
 		address = TLB_FLUSH_ALL;
 
diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile
index a6a198c33623..05041871ac90 100644
--- a/arch/x86/power/Makefile
+++ b/arch/x86/power/Makefile
@@ -1,3 +1,5 @@
+OBJECT_FILES_NON_STANDARD_hibernate_asm_$(BITS).o := y
+
 # __restore_processor_state() restores %gs after S3 resume and so should not
 # itself be stack-protected
 nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 6b05a9219ea2..78459a6d455a 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -129,7 +129,7 @@ static void __save_processor_state(struct saved_context *ctxt)
 	 */
 	ctxt->cr0 = read_cr0();
 	ctxt->cr2 = read_cr2();
-	ctxt->cr3 = read_cr3();
+	ctxt->cr3 = __read_cr3();
 	ctxt->cr4 = __read_cr4();
 #ifdef CONFIG_X86_64
 	ctxt->cr8 = read_cr8();
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 6a61194ffd58..e3e62c8a8e70 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -104,7 +104,7 @@ static int set_up_temporary_mappings(void)
 {
 	struct x86_mapping_info info = {
 		.alloc_pgt_page	= alloc_pgt_page,
-		.pmd_flag	= __PAGE_KERNEL_LARGE_EXEC,
+		.page_flag	= __PAGE_KERNEL_LARGE_EXEC,
 		.offset		= __PAGE_OFFSET,
 	};
 	unsigned long mstart, mend;
@@ -150,7 +150,8 @@ static int relocate_restore_code(void)
 	memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE);
 
 	/* Make the page containing the relocated code executable */
-	pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
+	pgd = (pgd_t *)__va(read_cr3_pa()) +
+		pgd_index(relocated_restore_code);
 	p4d = p4d_offset(pgd, relocated_restore_code);
 	if (p4d_large(*p4d)) {
 		set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index a163a90af4aa..cd4be19c36dc 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -102,7 +102,7 @@ static void __init setup_real_mode(void)
 
 	trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
 	trampoline_pgd[0] = trampoline_pgd_entry.pgd;
-	trampoline_pgd[511] = init_level4_pgt[511].pgd;
+	trampoline_pgd[511] = init_top_pgt[511].pgd;
 #endif
 }
 
diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
index a5c9910d234f..09a085bde0d4 100644
--- a/arch/x86/um/ptrace_64.c
+++ b/arch/x86/um/ptrace_64.c
@@ -125,7 +125,7 @@ int poke_user(struct task_struct *child, long addr, long data)
 	else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
 		(addr <= offsetof(struct user, u_debugreg[7]))) {
 		addr -= offsetof(struct user, u_debugreg[0]);
-		addr = addr >> 2;
+		addr = addr >> 3;
 		if ((addr == 4) || (addr == 5))
 			return -EIO;
 		child->thread.arch.debugregs[addr] = data;
diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h
index 46a9df99f3c5..7e1d35b6ad5c 100644
--- a/arch/x86/um/shared/sysdep/kernel-offsets.h
+++ b/arch/x86/um/shared/sysdep/kernel-offsets.h
@@ -2,16 +2,9 @@
 #include <linux/sched.h>
 #include <linux/elf.h>
 #include <linux/crypto.h>
+#include <linux/kbuild.h>
 #include <asm/mman.h>
 
-#define DEFINE(sym, val) \
-	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
-#define OFFSET(sym, str, mem) \
-	DEFINE(sym, offsetof(struct str, mem));
-
 void foo(void)
 {
 #include <common-offsets.h>
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index fffb0a16f9e3..bced7a369a11 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,3 +1,6 @@
+OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_xen-pvh.o := y
+
 ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
 CFLAGS_REMOVE_spinlock.o = -pg
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index 30bb2e80cfe7..a18703be9ead 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -54,38 +54,6 @@ static efi_system_table_t efi_systab_xen __initdata = {
 	.tables		= EFI_INVALID_TABLE_ADDR  /* Initialized later. */
 };
 
-static const struct efi efi_xen __initconst = {
-	.systab                   = NULL, /* Initialized later. */
-	.runtime_version	  = 0,    /* Initialized later. */
-	.mps                      = EFI_INVALID_TABLE_ADDR,
-	.acpi                     = EFI_INVALID_TABLE_ADDR,
-	.acpi20                   = EFI_INVALID_TABLE_ADDR,
-	.smbios                   = EFI_INVALID_TABLE_ADDR,
-	.smbios3                  = EFI_INVALID_TABLE_ADDR,
-	.sal_systab               = EFI_INVALID_TABLE_ADDR,
-	.boot_info                = EFI_INVALID_TABLE_ADDR,
-	.hcdp                     = EFI_INVALID_TABLE_ADDR,
-	.uga                      = EFI_INVALID_TABLE_ADDR,
-	.uv_systab                = EFI_INVALID_TABLE_ADDR,
-	.fw_vendor                = EFI_INVALID_TABLE_ADDR,
-	.runtime                  = EFI_INVALID_TABLE_ADDR,
-	.config_table             = EFI_INVALID_TABLE_ADDR,
-	.get_time                 = xen_efi_get_time,
-	.set_time                 = xen_efi_set_time,
-	.get_wakeup_time          = xen_efi_get_wakeup_time,
-	.set_wakeup_time          = xen_efi_set_wakeup_time,
-	.get_variable             = xen_efi_get_variable,
-	.get_next_variable        = xen_efi_get_next_variable,
-	.set_variable             = xen_efi_set_variable,
-	.query_variable_info      = xen_efi_query_variable_info,
-	.update_capsule           = xen_efi_update_capsule,
-	.query_capsule_caps       = xen_efi_query_capsule_caps,
-	.get_next_high_mono_count = xen_efi_get_next_high_mono_count,
-	.reset_system             = xen_efi_reset_system,
-	.set_virtual_address_map  = NULL, /* Not used under Xen. */
-	.flags			  = 0     /* Initialized later. */
-};
-
 static efi_system_table_t __init *xen_efi_probe(void)
 {
 	struct xen_platform_op op = {
@@ -102,7 +70,18 @@ static efi_system_table_t __init *xen_efi_probe(void)
 
 	/* Here we know that Xen runs on EFI platform. */
 
-	efi = efi_xen;
+	efi.get_time                 = xen_efi_get_time;
+	efi.set_time                 = xen_efi_set_time;
+	efi.get_wakeup_time          = xen_efi_get_wakeup_time;
+	efi.set_wakeup_time          = xen_efi_set_wakeup_time;
+	efi.get_variable             = xen_efi_get_variable;
+	efi.get_next_variable        = xen_efi_get_next_variable;
+	efi.set_variable             = xen_efi_set_variable;
+	efi.query_variable_info      = xen_efi_query_variable_info;
+	efi.update_capsule           = xen_efi_update_capsule;
+	efi.query_capsule_caps       = xen_efi_query_capsule_caps;
+	efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
+	efi.reset_system             = xen_efi_reset_system;
 
 	efi_systab_xen.tables = info->cfg.addr;
 	efi_systab_xen.nr_tables = info->cfg.nent;
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index a732bc2b9dfc..f33eef4ebd12 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -142,9 +142,7 @@ static void __init xen_banner(void)
 	struct xen_extraversion extra;
 	HYPERVISOR_xen_version(XENVER_extraversion, &extra);
 
-	pr_info("Booting paravirtualized kernel %son %s\n",
-		xen_feature(XENFEAT_auto_translated_physmap) ?
-			"with PVH extensions " : "", pv_info.name);
+	pr_info("Booting paravirtualized kernel on %s\n", pv_info.name);
 	printk(KERN_INFO "Xen version: %d.%d%s%s\n",
 	       version >> 16, version & 0xffff, extra.extraversion,
 	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
@@ -277,31 +275,19 @@ static bool __init xen_check_mwait(void)
 
 static bool __init xen_check_xsave(void)
 {
-	unsigned int err, eax, edx;
+	unsigned int cx, xsave_mask;
 
-	/*
-	 * Xen 4.0 and older accidentally leaked the host XSAVE flag into guest
-	 * view, despite not being able to support guests using the
-	 * functionality. Probe for the actual availability of XSAVE by seeing
-	 * whether xgetbv executes successfully or raises #UD.
-	 */
-	asm volatile("1: .byte 0x0f,0x01,0xd0\n\t" /* xgetbv */
-		     "xor %[err], %[err]\n"
-		     "2:\n\t"
-		     ".pushsection .fixup,\"ax\"\n\t"
-		     "3: movl $1,%[err]\n\t"
-		     "jmp 2b\n\t"
-		     ".popsection\n\t"
-		     _ASM_EXTABLE(1b, 3b)
-		     : [err] "=r" (err), "=a" (eax), "=d" (edx)
-		     : "c" (0));
-
-	return err == 0;
+	cx = cpuid_ecx(1);
+
+	xsave_mask = (1 << (X86_FEATURE_XSAVE % 32)) |
+		     (1 << (X86_FEATURE_OSXSAVE % 32));
+
+	/* Xen will set CR4.OSXSAVE if supported and not disabled by force */
+	return (cx & xsave_mask) == xsave_mask;
 }
 
 static void __init xen_init_capabilities(void)
 {
-	setup_clear_cpu_cap(X86_BUG_SYSRET_SS_ATTRS);
 	setup_force_cpu_cap(X86_FEATURE_XENPV);
 	setup_clear_cpu_cap(X86_FEATURE_DCA);
 	setup_clear_cpu_cap(X86_FEATURE_APERFMPERF);
@@ -317,10 +303,7 @@ static void __init xen_init_capabilities(void)
 	else
 		setup_clear_cpu_cap(X86_FEATURE_MWAIT);
 
-	if (xen_check_xsave()) {
-		setup_force_cpu_cap(X86_FEATURE_XSAVE);
-		setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
-	} else {
+	if (!xen_check_xsave()) {
 		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
 		setup_clear_cpu_cap(X86_FEATURE_OSXSAVE);
 	}
@@ -972,15 +955,10 @@ static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
 
 void xen_setup_shared_info(void)
 {
-	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-		set_fixmap(FIX_PARAVIRT_BOOTMAP,
-			   xen_start_info->shared_info);
+	set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
 
-		HYPERVISOR_shared_info =
-			(struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
-	} else
-		HYPERVISOR_shared_info =
-			(struct shared_info *)__va(xen_start_info->shared_info);
+	HYPERVISOR_shared_info =
+		(struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
 
 #ifndef CONFIG_SMP
 	/* In UP this is as good a place as any to set up shared info */
@@ -988,6 +966,13 @@ void xen_setup_shared_info(void)
 #endif
 
 	xen_setup_mfn_list_list();
+
+	/*
+	 * Now that shared info is set up we can start using routines that
+	 * point to pvclock area.
+	 */
+	if (system_state == SYSTEM_BOOTING)
+		xen_init_time_ops();
 }
 
 /* This is called once we have the cpu_possible_mask */
@@ -1286,8 +1271,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	x86_init.oem.arch_setup = xen_arch_setup;
 	x86_init.oem.banner = xen_banner;
 
-	xen_init_time_ops();
-
 	/*
 	 * Set up some pagetable state before starting to set any ptes.
 	 */
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5e375a5e815f..3be06f3caf3c 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -42,7 +42,7 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr)
 }
 EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
 
-void xen_flush_tlb_all(void)
+static void xen_flush_tlb_all(void)
 {
 	struct mmuext_op *op;
 	struct multicall_space mcs;
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 9d9ae6650aa1..1d7a7213a310 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -355,10 +355,8 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
 		pteval_t flags = val & PTE_FLAGS_MASK;
 		unsigned long mfn;
 
-		if (!xen_feature(XENFEAT_auto_translated_physmap))
-			mfn = __pfn_to_mfn(pfn);
-		else
-			mfn = pfn;
+		mfn = __pfn_to_mfn(pfn);
+
 		/*
 		 * If there's no mfn for the pfn, then just create an
 		 * empty non-present pte.  Unfortunately this loses
@@ -647,9 +645,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
 	limit--;
 	BUG_ON(limit >= FIXADDR_TOP);
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return 0;
-
 	/*
 	 * 64-bit has a great big hole in the middle of the address
 	 * space, which contains the Xen mappings.  On 32-bit these
@@ -980,37 +975,32 @@ static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 	spin_unlock(&mm->page_table_lock);
 }
 
-
-#ifdef CONFIG_SMP
-/* Another cpu may still have their %cr3 pointing at the pagetable, so
-   we need to repoint it somewhere else before we can unpin it. */
-static void drop_other_mm_ref(void *info)
+static void drop_mm_ref_this_cpu(void *info)
 {
 	struct mm_struct *mm = info;
-	struct mm_struct *active_mm;
-
-	active_mm = this_cpu_read(cpu_tlbstate.active_mm);
 
-	if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
+	if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm)
 		leave_mm(smp_processor_id());
 
-	/* If this cpu still has a stale cr3 reference, then make sure
-	   it has been flushed. */
+	/*
+	 * If this cpu still has a stale cr3 reference, then make sure
+	 * it has been flushed.
+	 */
 	if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
-		load_cr3(swapper_pg_dir);
+		xen_mc_flush();
 }
 
+#ifdef CONFIG_SMP
+/*
+ * Another cpu may still have their %cr3 pointing at the pagetable, so
+ * we need to repoint it somewhere else before we can unpin it.
+ */
 static void xen_drop_mm_ref(struct mm_struct *mm)
 {
 	cpumask_var_t mask;
 	unsigned cpu;
 
-	if (current->active_mm == mm) {
-		if (current->mm == mm)
-			load_cr3(swapper_pg_dir);
-		else
-			leave_mm(smp_processor_id());
-	}
+	drop_mm_ref_this_cpu(mm);
 
 	/* Get the "official" set of cpus referring to our pagetable. */
 	if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
@@ -1018,31 +1008,31 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
 			if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
 			    && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
 				continue;
-			smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
+			smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
 		}
 		return;
 	}
 	cpumask_copy(mask, mm_cpumask(mm));
 
-	/* It's possible that a vcpu may have a stale reference to our
-	   cr3, because its in lazy mode, and it hasn't yet flushed
-	   its set of pending hypercalls yet.  In this case, we can
-	   look at its actual current cr3 value, and force it to flush
-	   if needed. */
+	/*
+	 * It's possible that a vcpu may have a stale reference to our
+	 * cr3, because its in lazy mode, and it hasn't yet flushed
+	 * its set of pending hypercalls yet.  In this case, we can
+	 * look at its actual current cr3 value, and force it to flush
+	 * if needed.
+	 */
 	for_each_online_cpu(cpu) {
 		if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
 			cpumask_set_cpu(cpu, mask);
 	}
 
-	if (!cpumask_empty(mask))
-		smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
+	smp_call_function_many(mask, drop_mm_ref_this_cpu, mm, 1);
 	free_cpumask_var(mask);
 }
 #else
 static void xen_drop_mm_ref(struct mm_struct *mm)
 {
-	if (current->active_mm == mm)
-		load_cr3(swapper_pg_dir);
+	drop_mm_ref_this_cpu(mm);
 }
 #endif
 
@@ -1289,9 +1279,6 @@ static void __init xen_pagetable_cleanhighmap(void)
 
 static void __init xen_pagetable_p2m_setup(void)
 {
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return;
-
 	xen_vmalloc_p2m_tree();
 
 #ifdef CONFIG_X86_64
@@ -1314,8 +1301,7 @@ static void __init xen_pagetable_init(void)
 	xen_build_mfn_list_list();
 
 	/* Remap memory freed due to conflicts with E820 map */
-	if (!xen_feature(XENFEAT_auto_translated_physmap))
-		xen_remap_memory();
+	xen_remap_memory();
 
 	xen_setup_shared_info();
 }
@@ -1375,8 +1361,7 @@ static void xen_flush_tlb_single(unsigned long addr)
 }
 
 static void xen_flush_tlb_others(const struct cpumask *cpus,
-				 struct mm_struct *mm, unsigned long start,
-				 unsigned long end)
+				 const struct flush_tlb_info *info)
 {
 	struct {
 		struct mmuext_op op;
@@ -1388,7 +1373,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 	} *args;
 	struct multicall_space mcs;
 
-	trace_xen_mmu_flush_tlb_others(cpus, mm, start, end);
+	trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
 
 	if (cpumask_empty(cpus))
 		return;		/* nothing to do */
@@ -1402,9 +1387,10 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 	cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
 
 	args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
-	if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
+	if (info->end != TLB_FLUSH_ALL &&
+	    (info->end - info->start) <= PAGE_SIZE) {
 		args->op.cmd = MMUEXT_INVLPG_MULTI;
-		args->op.arg1.linear_addr = start;
+		args->op.arg1.linear_addr = info->start;
 	}
 
 	MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
@@ -1479,8 +1465,8 @@ static void xen_write_cr3(unsigned long cr3)
  * At the start of the day - when Xen launches a guest, it has already
  * built pagetables for the guest. We diligently look over them
  * in xen_setup_kernel_pagetable and graft as appropriate them in the
- * init_level4_pgt and its friends. Then when we are happy we load
- * the new init_level4_pgt - and continue on.
+ * init_top_pgt and its friends. Then when we are happy we load
+ * the new init_top_pgt - and continue on.
  *
  * The generic code starts (start_kernel) and 'init_mem_mapping' sets
  * up the rest of the pagetables. When it has completed it loads the cr3.
@@ -1923,23 +1909,22 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	pt_end = pt_base + xen_start_info->nr_pt_frames;
 
 	/* Zap identity mapping */
-	init_level4_pgt[0] = __pgd(0);
-
-	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-		/* Pre-constructed entries are in pfn, so convert to mfn */
-		/* L4[272] -> level3_ident_pgt
-		 * L4[511] -> level3_kernel_pgt */
-		convert_pfn_mfn(init_level4_pgt);
-
-		/* L3_i[0] -> level2_ident_pgt */
-		convert_pfn_mfn(level3_ident_pgt);
-		/* L3_k[510] -> level2_kernel_pgt
-		 * L3_k[511] -> level2_fixmap_pgt */
-		convert_pfn_mfn(level3_kernel_pgt);
-
-		/* L3_k[511][506] -> level1_fixmap_pgt */
-		convert_pfn_mfn(level2_fixmap_pgt);
-	}
+	init_top_pgt[0] = __pgd(0);
+
+	/* Pre-constructed entries are in pfn, so convert to mfn */
+	/* L4[272] -> level3_ident_pgt  */
+	/* L4[511] -> level3_kernel_pgt */
+	convert_pfn_mfn(init_top_pgt);
+
+	/* L3_i[0] -> level2_ident_pgt */
+	convert_pfn_mfn(level3_ident_pgt);
+	/* L3_k[510] -> level2_kernel_pgt */
+	/* L3_k[511] -> level2_fixmap_pgt */
+	convert_pfn_mfn(level3_kernel_pgt);
+
+	/* L3_k[511][506] -> level1_fixmap_pgt */
+	convert_pfn_mfn(level2_fixmap_pgt);
+
 	/* We get [511][511] and have Xen's version of level2_kernel_pgt */
 	l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
 	l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
@@ -1960,36 +1945,32 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	/* Copy the initial P->M table mappings if necessary. */
 	i = pgd_index(xen_start_info->mfn_list);
 	if (i && i < pgd_index(__START_KERNEL_map))
-		init_level4_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
-
-	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-		/* Make pagetable pieces RO */
-		set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
-		set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
-		set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
-		set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
-		set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
-		set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
-		set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
-		set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
-
-		/* Pin down new L4 */
-		pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
-				  PFN_DOWN(__pa_symbol(init_level4_pgt)));
-
-		/* Unpin Xen-provided one */
-		pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+		init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
+
+	/* Make pagetable pieces RO */
+	set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
+	set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
+
+	/* Pin down new L4 */
+	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
+			  PFN_DOWN(__pa_symbol(init_top_pgt)));
+
+	/* Unpin Xen-provided one */
+	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 
-		/*
-		 * At this stage there can be no user pgd, and no page
-		 * structure to attach it to, so make sure we just set kernel
-		 * pgd.
-		 */
-		xen_mc_batch();
-		__xen_write_cr3(true, __pa(init_level4_pgt));
-		xen_mc_issue(PARAVIRT_LAZY_CPU);
-	} else
-		native_write_cr3(__pa(init_level4_pgt));
+	/*
+	 * At this stage there can be no user pgd, and no page structure to
+	 * attach it to, so make sure we just set kernel pgd.
+	 */
+	xen_mc_batch();
+	__xen_write_cr3(true, __pa(init_top_pgt));
+	xen_mc_issue(PARAVIRT_LAZY_CPU);
 
 	/* We can't that easily rip out L3 and L2, as the Xen pagetables are
 	 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ...  for
@@ -2025,7 +2006,8 @@ static unsigned long __init xen_read_phys_ulong(phys_addr_t addr)
 
 /*
  * Translate a virtual address to a physical one without relying on mapped
- * page tables.
+ * page tables. Don't rely on big pages being aligned in (guest) physical
+ * space!
  */
 static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
 {
@@ -2035,7 +2017,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
 	pmd_t pmd;
 	pte_t pte;
 
-	pa = read_cr3();
+	pa = read_cr3_pa();
 	pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) *
 						       sizeof(pgd)));
 	if (!pgd_present(pgd))
@@ -2046,7 +2028,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
 						       sizeof(pud)));
 	if (!pud_present(pud))
 		return 0;
-	pa = pud_pfn(pud) << PAGE_SHIFT;
+	pa = pud_val(pud) & PTE_PFN_MASK;
 	if (pud_large(pud))
 		return pa + (vaddr & ~PUD_MASK);
 
@@ -2054,7 +2036,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
 						       sizeof(pmd)));
 	if (!pmd_present(pmd))
 		return 0;
-	pa = pmd_pfn(pmd) << PAGE_SHIFT;
+	pa = pmd_val(pmd) & PTE_PFN_MASK;
 	if (pmd_large(pmd))
 		return pa + (vaddr & ~PMD_MASK);
 
@@ -2115,7 +2097,7 @@ void __init xen_relocate_p2m(void)
 	pt_phys = pmd_phys + PFN_PHYS(n_pmd);
 	p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
 
-	pgd = __va(read_cr3());
+	pgd = __va(read_cr3_pa());
 	new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
 	idx_p4d = 0;
 	save_pud = n_pud;
@@ -2222,7 +2204,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
 {
 	unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
 
-	BUG_ON(read_cr3() != __pa(initial_page_table));
+	BUG_ON(read_cr3_pa() != __pa(initial_page_table));
 	BUG_ON(cr3 != __pa(swapper_pg_dir));
 
 	/*
@@ -2402,9 +2384,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 
 static void __init xen_post_allocator_init(void)
 {
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return;
-
 	pv_mmu_ops.set_pte = xen_set_pte;
 	pv_mmu_ops.set_pmd = xen_set_pmd;
 	pv_mmu_ops.set_pud = xen_set_pud;
@@ -2510,9 +2489,6 @@ void __init xen_init_mmu_ops(void)
 {
 	x86_init.paging.pagetable_init = xen_pagetable_init;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return;
-
 	pv_mmu_ops = xen_mmu_ops;
 
 	memset(dummy_mapping, 0xff, PAGE_SIZE);
@@ -2649,9 +2625,6 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
 	 * this function are redundant and can be ignored.
 	 */
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return 0;
-
 	if (unlikely(order > MAX_CONTIG_ORDER))
 		return -ENOMEM;
 
@@ -2688,9 +2661,6 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 	int success;
 	unsigned long vstart;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return;
-
 	if (unlikely(order > MAX_CONTIG_ORDER))
 		return;
 
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 090c7eb4dca9..a1895a8e85c1 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -406,7 +406,7 @@ static void __init xen_time_init(void)
 		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 }
 
-void __init xen_init_time_ops(void)
+void __ref xen_init_time_ops(void)
 {
 	pv_time_ops = xen_time_ops;
 
diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S
index 5e246716d58f..e1a5fbeae08d 100644
--- a/arch/x86/xen/xen-pvh.S
+++ b/arch/x86/xen/xen-pvh.S
@@ -87,7 +87,7 @@ ENTRY(pvh_start_xen)
 	wrmsr
 
 	/* Enable pre-constructed page tables. */
-	mov $_pa(init_level4_pgt), %eax
+	mov $_pa(init_top_pgt), %eax
 	mov %eax, %cr3
 	mov $(X86_CR0_PG | X86_CR0_PE), %eax
 	mov %eax, %cr0
diff --git a/arch/xtensa/include/asm/irq.h b/arch/xtensa/include/asm/irq.h
index f71f88ea7646..19707db966f1 100644
--- a/arch/xtensa/include/asm/irq.h
+++ b/arch/xtensa/include/asm/irq.h
@@ -29,7 +29,8 @@ static inline void variant_irq_disable(unsigned int irq) { }
 # define PLATFORM_NR_IRQS 0
 #endif
 #define XTENSA_NR_IRQS XCHAL_NUM_INTERRUPTS
-#define NR_IRQS (XTENSA_NR_IRQS + VARIANT_NR_IRQS + PLATFORM_NR_IRQS)
+#define NR_IRQS (XTENSA_NR_IRQS + VARIANT_NR_IRQS + PLATFORM_NR_IRQS + 1)
+#define XTENSA_PIC_LINUX_IRQ(hwirq) ((hwirq) + 1)
 
 #if VARIANT_NR_IRQS == 0
 static inline void variant_init_irq(void) { }
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 003eeee3fbc6..30ee8c608853 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -213,8 +213,6 @@ struct mm_struct;
 #define release_segments(mm)	do { } while(0)
 #define forget_segments()	do { } while (0)
 
-#define thread_saved_pc(tsk)	(task_pt_regs(tsk)->pc)
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)		(task_pt_regs(tsk)->pc)
diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild
index 56aad54e7fb7..b15bf6bc0e94 100644
--- a/arch/xtensa/include/uapi/asm/Kbuild
+++ b/arch/xtensa/include/uapi/asm/Kbuild
@@ -1,25 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += byteorder.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += swab.h
-header-y += termbits.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index a265edd6ac37..99341028cc77 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -34,11 +34,6 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
 {
 	int irq = irq_find_mapping(NULL, hwirq);
 
-	if (hwirq >= NR_IRQS) {
-		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
-				__func__, hwirq);
-	}
-
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 	/* Debugging check for stack overflow: is there less than 1KB free? */
 	{
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 394ef08300b6..33bfa5270d95 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -593,8 +593,7 @@ c_show(struct seq_file *f, void *slot)
 		      (ccount_freq/10000) % 100,
 		      loops_per_jiffy/(500000/HZ),
 		      (loops_per_jiffy/(5000/HZ)) % 100);
-
-	seq_printf(f,"flags\t\t: "
+	seq_puts(f, "flags\t\t: "
 #if XCHAL_HAVE_NMI
 		     "nmi "
 #endif
diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
index 06937928cb72..74afbf02d07e 100644
--- a/arch/xtensa/kernel/syscall.c
+++ b/arch/xtensa/kernel/syscall.c
@@ -88,7 +88,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		/* At this point:  (!vmm || addr < vmm->vm_end). */
 		if (TASK_SIZE - len < addr)
 			return -ENOMEM;
-		if (!vmm || addr + len <= vmm->vm_start)
+		if (!vmm || addr + len <= vm_start_gap(vmm))
 			return addr;
 		addr = vmm->vm_end;
 		if (flags & MAP_SHARED)
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index 30d9fc21e076..162c77e53ca8 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -118,7 +118,7 @@ SECTIONS
   SECTION_VECTOR (.KernelExceptionVector.text, KERNEL_VECTOR_VADDR)
   SECTION_VECTOR (.UserExceptionVector.literal, USER_VECTOR_VADDR - 4)
   SECTION_VECTOR (.UserExceptionVector.text, USER_VECTOR_VADDR)
-  SECTION_VECTOR (.DoubleExceptionVector.literal, DOUBLEEXC_VECTOR_VADDR - 48)
+  SECTION_VECTOR (.DoubleExceptionVector.literal, DOUBLEEXC_VECTOR_VADDR - 20)
   SECTION_VECTOR (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR)
 #endif
 
@@ -306,13 +306,13 @@ SECTIONS
 		  .UserExceptionVector.literal)
   SECTION_VECTOR (_DoubleExceptionVector_literal,
 		  .DoubleExceptionVector.literal,
-		  DOUBLEEXC_VECTOR_VADDR - 48,
+		  DOUBLEEXC_VECTOR_VADDR - 20,
 		  SIZEOF(.UserExceptionVector.text),
 		  .UserExceptionVector.text)
   SECTION_VECTOR (_DoubleExceptionVector_text,
 		  .DoubleExceptionVector.text,
 		  DOUBLEEXC_VECTOR_VADDR,
-		  48,
+		  20,
 		  .DoubleExceptionVector.literal)
 
   . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3;
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 02e94bb3ad3e..c45b90bb9339 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -317,8 +317,7 @@ static int __init simdisk_init(void)
 	if (simdisk_count > MAX_SIMDISK_COUNT)
 		simdisk_count = MAX_SIMDISK_COUNT;
 
-	sddev = kmalloc(simdisk_count * sizeof(struct simdisk),
-			GFP_KERNEL);
+	sddev = kmalloc_array(simdisk_count, sizeof(*sddev), GFP_KERNEL);
 	if (sddev == NULL)
 		goto out_unregister;
 
diff --git a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
index dbeea2b440a1..1fda7e20dfcb 100644
--- a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
+++ b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
@@ -24,16 +24,18 @@
 
 /* Interrupt configuration. */
 
-#define PLATFORM_NR_IRQS	10
+#define PLATFORM_NR_IRQS	0
 
 /* Default assignment of LX60 devices to external interrupts. */
 
 #ifdef CONFIG_XTENSA_MX
 #define DUART16552_INTNUM	XCHAL_EXTINT3_NUM
 #define OETH_IRQ		XCHAL_EXTINT4_NUM
+#define C67X00_IRQ		XCHAL_EXTINT8_NUM
 #else
 #define DUART16552_INTNUM	XCHAL_EXTINT0_NUM
 #define OETH_IRQ		XCHAL_EXTINT1_NUM
+#define C67X00_IRQ		XCHAL_EXTINT5_NUM
 #endif
 
 /*
@@ -63,5 +65,5 @@
 
 #define C67X00_PADDR		(XCHAL_KIO_PADDR + 0x0D0D0000)
 #define C67X00_SIZE		0x10
-#define C67X00_IRQ		5
+
 #endif /* __XTENSA_XTAVNET_HARDWARE_H */
diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c
index 779be723eb2b..42285f35d313 100644
--- a/arch/xtensa/platforms/xtfpga/setup.c
+++ b/arch/xtensa/platforms/xtfpga/setup.c
@@ -175,8 +175,8 @@ static struct resource ethoc_res[] = {
 		.flags = IORESOURCE_MEM,
 	},
 	[2] = { /* IRQ number */
-		.start = OETH_IRQ,
-		.end   = OETH_IRQ,
+		.start = XTENSA_PIC_LINUX_IRQ(OETH_IRQ),
+		.end   = XTENSA_PIC_LINUX_IRQ(OETH_IRQ),
 		.flags = IORESOURCE_IRQ,
 	},
 };
@@ -213,8 +213,8 @@ static struct resource c67x00_res[] = {
 		.flags = IORESOURCE_MEM,
 	},
 	[1] = { /* IRQ number */
-		.start = C67X00_IRQ,
-		.end   = C67X00_IRQ,
+		.start = XTENSA_PIC_LINUX_IRQ(C67X00_IRQ),
+		.end   = XTENSA_PIC_LINUX_IRQ(C67X00_IRQ),
 		.flags = IORESOURCE_IRQ,
 	},
 };
@@ -247,7 +247,7 @@ static struct resource serial_resource = {
 static struct plat_serial8250_port serial_platform_data[] = {
 	[0] = {
 		.mapbase	= DUART16552_PADDR,
-		.irq		= DUART16552_INTNUM,
+		.irq		= XTENSA_PIC_LINUX_IRQ(DUART16552_INTNUM),
 		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
 				  UPF_IOREMAP,
 		.iotype		= XCHAL_HAVE_BE ? UPIO_MEM32BE : UPIO_MEM32,
diff --git a/block/Kconfig b/block/Kconfig
index a8ad7e77db28..89cd28f8d051 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -6,7 +6,6 @@ menuconfig BLOCK
        default y
        select SBITMAP
        select SRCU
-       select DAX
        help
 	 Provide block layer support for the kernel.
 
diff --git a/block/badblocks.c b/block/badblocks.c
index 6ebcef282314..43c71166e1e2 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -533,6 +533,7 @@ ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
 	case 3:
 		if (newline != '\n')
 			return -EINVAL;
+		/* fall through */
 	case 2:
 		if (length <= 0)
 			return -EINVAL;
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index c8a32fb345cf..78b2e0db4fb2 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -52,7 +52,7 @@ BFQG_FLAG_FNS(idling)
 BFQG_FLAG_FNS(empty)
 #undef BFQG_FLAG_FNS
 
-/* This should be called with the queue_lock held. */
+/* This should be called with the scheduler lock held. */
 static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
 {
 	unsigned long long now;
@@ -67,7 +67,7 @@ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
 	bfqg_stats_clear_waiting(stats);
 }
 
-/* This should be called with the queue_lock held. */
+/* This should be called with the scheduler lock held. */
 static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
 						 struct bfq_group *curr_bfqg)
 {
@@ -81,7 +81,7 @@ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
 	bfqg_stats_mark_waiting(stats);
 }
 
-/* This should be called with the queue_lock held. */
+/* This should be called with the scheduler lock held. */
 static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
 {
 	unsigned long long now;
@@ -203,12 +203,30 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
 
 static void bfqg_get(struct bfq_group *bfqg)
 {
-	return blkg_get(bfqg_to_blkg(bfqg));
+	bfqg->ref++;
 }
 
 void bfqg_put(struct bfq_group *bfqg)
 {
-	return blkg_put(bfqg_to_blkg(bfqg));
+	bfqg->ref--;
+
+	if (bfqg->ref == 0)
+		kfree(bfqg);
+}
+
+static void bfqg_and_blkg_get(struct bfq_group *bfqg)
+{
+	/* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
+	bfqg_get(bfqg);
+
+	blkg_get(bfqg_to_blkg(bfqg));
+}
+
+void bfqg_and_blkg_put(struct bfq_group *bfqg)
+{
+	bfqg_put(bfqg);
+
+	blkg_put(bfqg_to_blkg(bfqg));
 }
 
 void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
@@ -312,7 +330,11 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
 	if (bfqq) {
 		bfqq->ioprio = bfqq->new_ioprio;
 		bfqq->ioprio_class = bfqq->new_ioprio_class;
-		bfqg_get(bfqg);
+		/*
+		 * Make sure that bfqg and its associated blkg do not
+		 * disappear before entity.
+		 */
+		bfqg_and_blkg_get(bfqg);
 	}
 	entity->parent = bfqg->my_entity; /* NULL for root group */
 	entity->sched_data = &bfqg->sched_data;
@@ -399,6 +421,8 @@ struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
 		return NULL;
 	}
 
+	/* see comments in bfq_bic_update_cgroup for why refcounting */
+	bfqg_get(bfqg);
 	return &bfqg->pd;
 }
 
@@ -426,7 +450,7 @@ void bfq_pd_free(struct blkg_policy_data *pd)
 	struct bfq_group *bfqg = pd_to_bfqg(pd);
 
 	bfqg_stats_exit(&bfqg->stats);
-	return kfree(bfqg);
+	bfqg_put(bfqg);
 }
 
 void bfq_pd_reset_stats(struct blkg_policy_data *pd)
@@ -496,9 +520,10 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
  * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
  * it on the new one.  Avoid putting the entity on the old group idle tree.
  *
- * Must be called under the queue lock; the cgroup owning @bfqg must
- * not disappear (by now this just means that we are called under
- * rcu_read_lock()).
+ * Must be called under the scheduler lock, to make sure that the blkg
+ * owning @bfqg does not disappear (see comments in
+ * bfq_bic_update_cgroup on guaranteeing the consistency of blkg
+ * objects).
  */
 void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 		   struct bfq_group *bfqg)
@@ -519,16 +544,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 		bfq_deactivate_bfqq(bfqd, bfqq, false, false);
 	else if (entity->on_st)
 		bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
-	bfqg_put(bfqq_group(bfqq));
+	bfqg_and_blkg_put(bfqq_group(bfqq));
 
-	/*
-	 * Here we use a reference to bfqg.  We don't need a refcounter
-	 * as the cgroup reference will not be dropped, so that its
-	 * destroy() callback will not be invoked.
-	 */
 	entity->parent = bfqg->my_entity;
 	entity->sched_data = &bfqg->sched_data;
-	bfqg_get(bfqg);
+	/* pin down bfqg and its associated blkg  */
+	bfqg_and_blkg_get(bfqg);
 
 	if (bfq_bfqq_busy(bfqq)) {
 		bfq_pos_tree_add_move(bfqd, bfqq);
@@ -545,8 +566,9 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  * @bic: the bic to move.
  * @blkcg: the blk-cgroup to move to.
  *
- * Move bic to blkcg, assuming that bfqd->queue is locked; the caller
- * has to make sure that the reference to cgroup is valid across the call.
+ * Move bic to blkcg, assuming that bfqd->lock is held; which makes
+ * sure that the reference to cgroup is valid across the call (see
+ * comments in bfq_bic_update_cgroup on this issue)
  *
  * NOTE: an alternative approach might have been to store the current
  * cgroup in bfqq and getting a reference to it, reducing the lookup
@@ -604,6 +626,57 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
 		goto out;
 
 	bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
+	/*
+	 * Update blkg_path for bfq_log_* functions. We cache this
+	 * path, and update it here, for the following
+	 * reasons. Operations on blkg objects in blk-cgroup are
+	 * protected with the request_queue lock, and not with the
+	 * lock that protects the instances of this scheduler
+	 * (bfqd->lock). This exposes BFQ to the following sort of
+	 * race.
+	 *
+	 * The blkg_lookup performed in bfq_get_queue, protected
+	 * through rcu, may happen to return the address of a copy of
+	 * the original blkg. If this is the case, then the
+	 * bfqg_and_blkg_get performed in bfq_get_queue, to pin down
+	 * the blkg, is useless: it does not prevent blk-cgroup code
+	 * from destroying both the original blkg and all objects
+	 * directly or indirectly referred by the copy of the
+	 * blkg.
+	 *
+	 * On the bright side, destroy operations on a blkg invoke, as
+	 * a first step, hooks of the scheduler associated with the
+	 * blkg. And these hooks are executed with bfqd->lock held for
+	 * BFQ. As a consequence, for any blkg associated with the
+	 * request queue this instance of the scheduler is attached
+	 * to, we are guaranteed that such a blkg is not destroyed, and
+	 * that all the pointers it contains are consistent, while we
+	 * are holding bfqd->lock. A blkg_lookup performed with
+	 * bfqd->lock held then returns a fully consistent blkg, which
+	 * remains consistent until this lock is held.
+	 *
+	 * Thanks to the last fact, and to the fact that: (1) bfqg has
+	 * been obtained through a blkg_lookup in the above
+	 * assignment, and (2) bfqd->lock is being held, here we can
+	 * safely use the policy data for the involved blkg (i.e., the
+	 * field bfqg->pd) to get to the blkg associated with bfqg,
+	 * and then we can safely use any field of blkg. After we
+	 * release bfqd->lock, even just getting blkg through this
+	 * bfqg may cause dangling references to be traversed, as
+	 * bfqg->pd may not exist any more.
+	 *
+	 * In view of the above facts, here we cache, in the bfqg, any
+	 * blkg data we may need for this bic, and for its associated
+	 * bfq_queue. As of now, we need to cache only the path of the
+	 * blkg, which is used in the bfq_log_* functions.
+	 *
+	 * Finally, note that bfqg itself needs to be protected from
+	 * destruction on the blkg_free of the original blkg (which
+	 * invokes bfq_pd_free). We use an additional private
+	 * refcounter for bfqg, to let it disappear only after no
+	 * bfq_queue refers to it any longer.
+	 */
+	blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
 	bic->blkcg_serial_nr = serial_nr;
 out:
 	rcu_read_unlock();
@@ -640,8 +713,6 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
  * @bfqd: the device data structure with the root group.
  * @bfqg: the group to move from.
  * @st: the service tree with the entities.
- *
- * Needs queue_lock to be taken and reference to be valid over the call.
  */
 static void bfq_reparent_active_entities(struct bfq_data *bfqd,
 					 struct bfq_group *bfqg,
@@ -692,8 +763,7 @@ void bfq_pd_offline(struct blkg_policy_data *pd)
 		/*
 		 * The idle tree may still contain bfq_queues belonging
 		 * to exited task because they never migrated to a different
-		 * cgroup from the one being destroyed now.  No one else
-		 * can access them so it's safe to act without any lock.
+		 * cgroup from the one being destroyed now.
 		 */
 		bfq_flush_idle_tree(st);
 
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index bd8499ef157c..12bbc6b8657d 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -56,6 +56,11 @@
  * rotational or flash-based devices, and to get the job done quickly
  * for applications consisting in many I/O-bound processes.
  *
+ * NOTE: if the main or only goal, with a given device, is to achieve
+ * the maximum-possible throughput at all times, then do switch off
+ * all low-latency heuristics for that device, by setting low_latency
+ * to 0.
+ *
  * BFQ is described in [1], where also a reference to the initial, more
  * theoretical paper on BFQ can be found. The interested reader can find
  * in the latter paper full details on the main algorithm, as well as
@@ -720,8 +725,12 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
 }
 
 static void
-bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
+		      struct bfq_io_cq *bic, bool bfq_already_existing)
 {
+	unsigned int old_wr_coeff = bfqq->wr_coeff;
+	bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq);
+
 	if (bic->saved_idle_window)
 		bfq_mark_bfqq_idle_window(bfqq);
 	else
@@ -749,6 +758,14 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
 
 	/* make sure weight will be updated, however we got here */
 	bfqq->entity.prio_changed = 1;
+
+	if (likely(!busy))
+		return;
+
+	if (old_wr_coeff == 1 && bfqq->wr_coeff > 1)
+		bfqd->wr_busy_queues++;
+	else if (old_wr_coeff > 1 && bfqq->wr_coeff == 1)
+		bfqd->wr_busy_queues--;
 }
 
 static int bfqq_process_refs(struct bfq_queue *bfqq)
@@ -3660,7 +3677,7 @@ void bfq_put_queue(struct bfq_queue *bfqq)
 
 	kmem_cache_free(bfq_pool, bfqq);
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
-	bfqg_put(bfqg);
+	bfqg_and_blkg_put(bfqg);
 #endif
 }
 
@@ -4285,10 +4302,16 @@ static void bfq_put_rq_priv_body(struct bfq_queue *bfqq)
 	bfq_put_queue(bfqq);
 }
 
-static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
+static void bfq_finish_request(struct request *rq)
 {
-	struct bfq_queue *bfqq = RQ_BFQQ(rq);
-	struct bfq_data *bfqd = bfqq->bfqd;
+	struct bfq_queue *bfqq;
+	struct bfq_data *bfqd;
+
+	if (!rq->elv.icq)
+		return;
+
+	bfqq = RQ_BFQQ(rq);
+	bfqd = bfqq->bfqd;
 
 	if (rq->rq_flags & RQF_STARTED)
 		bfqg_stats_update_completion(bfqq_group(bfqq),
@@ -4319,7 +4342,7 @@ static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
 		 */
 
 		if (!RB_EMPTY_NODE(&rq->rb_node))
-			bfq_remove_request(q, rq);
+			bfq_remove_request(rq->q, rq);
 		bfq_put_rq_priv_body(bfqq);
 	}
 
@@ -4389,20 +4412,21 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
 /*
  * Allocate bfq data structures associated with this request.
  */
-static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
-			      struct bio *bio)
+static void bfq_prepare_request(struct request *rq, struct bio *bio)
 {
+	struct request_queue *q = rq->q;
 	struct bfq_data *bfqd = q->elevator->elevator_data;
-	struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);
+	struct bfq_io_cq *bic;
 	const int is_sync = rq_is_sync(rq);
 	struct bfq_queue *bfqq;
 	bool new_queue = false;
-	bool split = false;
+	bool bfqq_already_existing = false, split = false;
 
-	spin_lock_irq(&bfqd->lock);
+	if (!rq->elv.icq)
+		return;
+	bic = icq_to_bic(rq->elv.icq);
 
-	if (!bic)
-		goto queue_fail;
+	spin_lock_irq(&bfqd->lock);
 
 	bfq_check_ioprio_change(bic, bio);
 
@@ -4427,6 +4451,8 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
 				bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio,
 								 true, is_sync,
 								 NULL);
+			else
+				bfqq_already_existing = true;
 		}
 	}
 
@@ -4452,7 +4478,8 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
 			 * queue: restore the idle window and the
 			 * possible weight raising period.
 			 */
-			bfq_bfqq_resume_state(bfqq, bic);
+			bfq_bfqq_resume_state(bfqq, bfqd, bic,
+					      bfqq_already_existing);
 		}
 	}
 
@@ -4460,13 +4487,6 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
 		bfq_handle_burst(bfqd, bfqq);
 
 	spin_unlock_irq(&bfqd->lock);
-
-	return 0;
-
-queue_fail:
-	spin_unlock_irq(&bfqd->lock);
-
-	return 1;
 }
 
 static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
@@ -4945,8 +4965,8 @@ static struct elv_fs_entry bfq_attrs[] = {
 
 static struct elevator_type iosched_bfq_mq = {
 	.ops.mq = {
-		.get_rq_priv		= bfq_get_rq_private,
-		.put_rq_priv		= bfq_put_rq_private,
+		.prepare_request	= bfq_prepare_request,
+		.finish_request		= bfq_finish_request,
 		.exit_icq		= bfq_exit_icq,
 		.insert_requests	= bfq_insert_requests,
 		.dispatch_request	= bfq_dispatch_request,
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index ae783c06dfd9..5c3bf9861492 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -759,6 +759,12 @@ struct bfq_group {
 	/* must be the first member */
 	struct blkg_policy_data pd;
 
+	/* cached path for this blkg (see comments in bfq_bic_update_cgroup) */
+	char blkg_path[128];
+
+	/* reference counter (see comments in bfq_bic_update_cgroup) */
+	int ref;
+
 	struct bfq_entity entity;
 	struct bfq_sched_data sched_data;
 
@@ -838,7 +844,7 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
 struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
 struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
 struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
-void bfqg_put(struct bfq_group *bfqg);
+void bfqg_and_blkg_put(struct bfq_group *bfqg);
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 extern struct cftype bfq_blkcg_legacy_files[];
@@ -910,20 +916,13 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq);
 struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
 
 #define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	do {			\
-	char __pbuf[128];						\
-									\
-	blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \
-	blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, (bfqq)->pid, \
+	blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, (bfqq)->pid,\
 			bfq_bfqq_sync((bfqq)) ? 'S' : 'A',		\
-			  __pbuf, ##args);				\
+			bfqq_group(bfqq)->blkg_path, ##args);		\
 } while (0)
 
-#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)	do {			\
-	char __pbuf[128];						\
-									\
-	blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf));		\
-	blk_add_trace_msg((bfqd)->queue, "%s " fmt, __pbuf, ##args);	\
-} while (0)
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)	\
+	blk_add_trace_msg((bfqd)->queue, "%s " fmt, (bfqg)->blkg_path, ##args)
 
 #else /* CONFIG_BFQ_GROUP_IOSCHED */
 
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index b4fc3e4260b7..8726ede19eef 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1114,12 +1114,21 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity,
 bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
 {
 	struct bfq_sched_data *sd = entity->sched_data;
-	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
-	int is_in_service = entity == sd->in_service_entity;
+	struct bfq_service_tree *st;
+	bool is_in_service;
 
 	if (!entity->on_st) /* entity never activated, or already inactive */
 		return false;
 
+	/*
+	 * If we get here, then entity is active, which implies that
+	 * bfq_group_set_parent has already been invoked for the group
+	 * represented by entity. Therefore, the field
+	 * entity->sched_data has been set, and we can safely use it.
+	 */
+	st = bfq_entity_service_tree(entity);
+	is_in_service = entity == sd->in_service_entity;
+
 	if (is_in_service)
 		bfq_calc_finish(entity, entity->service);
 
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 5384713d48bc..b8a3a65f7364 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -175,6 +175,9 @@ bool bio_integrity_enabled(struct bio *bio)
 	if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
 		return false;
 
+	if (!bio_sectors(bio))
+		return false;
+
 	/* Already protected? */
 	if (bio_integrity(bio))
 		return false;
@@ -221,7 +224,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
  * @bio:	bio to generate/verify integrity metadata for
  * @proc_fn:	Pointer to the relevant processing function
  */
-static int bio_integrity_process(struct bio *bio,
+static blk_status_t bio_integrity_process(struct bio *bio,
 				 integrity_processing_fn *proc_fn)
 {
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
@@ -229,7 +232,7 @@ static int bio_integrity_process(struct bio *bio,
 	struct bvec_iter bviter;
 	struct bio_vec bv;
 	struct bio_integrity_payload *bip = bio_integrity(bio);
-	unsigned int ret = 0;
+	blk_status_t ret = BLK_STS_OK;
 	void *prot_buf = page_address(bip->bip_vec->bv_page) +
 		bip->bip_vec->bv_offset;
 
@@ -366,7 +369,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 	struct bio *bio = bip->bip_bio;
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
-	bio->bi_error = bio_integrity_process(bio, bi->profile->verify_fn);
+	bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn);
 
 	/* Restore original bio completion handler */
 	bio->bi_end_io = bip->bip_end_io;
@@ -395,7 +398,7 @@ void bio_integrity_endio(struct bio *bio)
 	 * integrity metadata.  Restore original bio end_io handler
 	 * and run it.
 	 */
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		bio->bi_end_io = bip->bip_end_io;
 		bio_endio(bio);
 
diff --git a/block/bio.c b/block/bio.c
index 888e7801c638..1cfcd0df3f30 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -240,20 +240,21 @@ fallback:
 	return bvl;
 }
 
-static void __bio_free(struct bio *bio)
+void bio_uninit(struct bio *bio)
 {
 	bio_disassociate_task(bio);
 
 	if (bio_integrity(bio))
 		bio_integrity_free(bio);
 }
+EXPORT_SYMBOL(bio_uninit);
 
 static void bio_free(struct bio *bio)
 {
 	struct bio_set *bs = bio->bi_pool;
 	void *p;
 
-	__bio_free(bio);
+	bio_uninit(bio);
 
 	if (bs) {
 		bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
@@ -271,6 +272,11 @@ static void bio_free(struct bio *bio)
 	}
 }
 
+/*
+ * Users of this function have their own bio allocation. Subsequently,
+ * they must remember to pair any call to bio_init() with bio_uninit()
+ * when IO has completed, or when the bio is released.
+ */
 void bio_init(struct bio *bio, struct bio_vec *table,
 	      unsigned short max_vecs)
 {
@@ -297,7 +303,7 @@ void bio_reset(struct bio *bio)
 {
 	unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
 
-	__bio_free(bio);
+	bio_uninit(bio);
 
 	memset(bio, 0, BIO_RESET_BYTES);
 	bio->bi_flags = flags;
@@ -309,8 +315,8 @@ static struct bio *__bio_chain_endio(struct bio *bio)
 {
 	struct bio *parent = bio->bi_private;
 
-	if (!parent->bi_error)
-		parent->bi_error = bio->bi_error;
+	if (!parent->bi_status)
+		parent->bi_status = bio->bi_status;
 	bio_put(bio);
 	return parent;
 }
@@ -363,6 +369,8 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
 	struct bio_list punt, nopunt;
 	struct bio *bio;
 
+	if (WARN_ON_ONCE(!bs->rescue_workqueue))
+		return;
 	/*
 	 * In order to guarantee forward progress we must punt only bios that
 	 * were allocated from this bio_set; otherwise, if there was a bio on
@@ -474,7 +482,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
 
 		if (current->bio_list &&
 		    (!bio_list_empty(&current->bio_list[0]) ||
-		     !bio_list_empty(&current->bio_list[1])))
+		     !bio_list_empty(&current->bio_list[1])) &&
+		    bs->rescue_workqueue)
 			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
 		p = mempool_alloc(bs->bio_pool, gfp_mask);
@@ -544,7 +553,7 @@ EXPORT_SYMBOL(zero_fill_bio);
  *
  * Description:
  *   Put a reference to a &struct bio, either one you have gotten with
- *   bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
+ *   bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
  **/
 void bio_put(struct bio *bio)
 {
@@ -593,6 +602,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 	bio->bi_bdev = bio_src->bi_bdev;
 	bio_set_flag(bio, BIO_CLONED);
 	bio->bi_opf = bio_src->bi_opf;
+	bio->bi_write_hint = bio_src->bi_write_hint;
 	bio->bi_iter = bio_src->bi_iter;
 	bio->bi_io_vec = bio_src->bi_io_vec;
 
@@ -676,6 +686,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
 		return NULL;
 	bio->bi_bdev		= bio_src->bi_bdev;
 	bio->bi_opf		= bio_src->bi_opf;
+	bio->bi_write_hint	= bio_src->bi_write_hint;
 	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
 	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
 
@@ -918,7 +929,7 @@ static void submit_bio_wait_endio(struct bio *bio)
 {
 	struct submit_bio_ret *ret = bio->bi_private;
 
-	ret->error = bio->bi_error;
+	ret->error = blk_status_to_errno(bio->bi_status);
 	complete(&ret->event);
 }
 
@@ -1817,8 +1828,8 @@ again:
 	}
 
 	if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
-		trace_block_bio_complete(bdev_get_queue(bio->bi_bdev),
-					 bio, bio->bi_error);
+		trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio,
+					 blk_status_to_errno(bio->bi_status));
 		bio_clear_flag(bio, BIO_TRACE_COMPLETION);
 	}
 
@@ -1921,9 +1932,29 @@ void bioset_free(struct bio_set *bs)
 }
 EXPORT_SYMBOL(bioset_free);
 
-static struct bio_set *__bioset_create(unsigned int pool_size,
-				       unsigned int front_pad,
-				       bool create_bvec_pool)
+/**
+ * bioset_create  - Create a bio_set
+ * @pool_size:	Number of bio and bio_vecs to cache in the mempool
+ * @front_pad:	Number of bytes to allocate in front of the returned bio
+ * @flags:	Flags to modify behavior, currently %BIOSET_NEED_BVECS
+ *              and %BIOSET_NEED_RESCUER
+ *
+ * Description:
+ *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
+ *    to ask for a number of bytes to be allocated in front of the bio.
+ *    Front pad allocation is useful for embedding the bio inside
+ *    another structure, to avoid allocating extra data to go with the bio.
+ *    Note that the bio must be embedded at the END of that structure always,
+ *    or things will break badly.
+ *    If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
+ *    for allocating iovecs.  This pool is not needed e.g. for bio_clone_fast().
+ *    If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to
+ *    dispatch queued requests when the mempool runs out of space.
+ *
+ */
+struct bio_set *bioset_create(unsigned int pool_size,
+			      unsigned int front_pad,
+			      int flags)
 {
 	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
 	struct bio_set *bs;
@@ -1948,12 +1979,15 @@ static struct bio_set *__bioset_create(unsigned int pool_size,
 	if (!bs->bio_pool)
 		goto bad;
 
-	if (create_bvec_pool) {
+	if (flags & BIOSET_NEED_BVECS) {
 		bs->bvec_pool = biovec_create_pool(pool_size);
 		if (!bs->bvec_pool)
 			goto bad;
 	}
 
+	if (!(flags & BIOSET_NEED_RESCUER))
+		return bs;
+
 	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
 	if (!bs->rescue_workqueue)
 		goto bad;
@@ -1963,41 +1997,8 @@ bad:
 	bioset_free(bs);
 	return NULL;
 }
-
-/**
- * bioset_create  - Create a bio_set
- * @pool_size:	Number of bio and bio_vecs to cache in the mempool
- * @front_pad:	Number of bytes to allocate in front of the returned bio
- *
- * Description:
- *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
- *    to ask for a number of bytes to be allocated in front of the bio.
- *    Front pad allocation is useful for embedding the bio inside
- *    another structure, to avoid allocating extra data to go with the bio.
- *    Note that the bio must be embedded at the END of that structure always,
- *    or things will break badly.
- */
-struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
-{
-	return __bioset_create(pool_size, front_pad, true);
-}
 EXPORT_SYMBOL(bioset_create);
 
-/**
- * bioset_create_nobvec  - Create a bio_set without bio_vec mempool
- * @pool_size:	Number of bio to cache in the mempool
- * @front_pad:	Number of bytes to allocate in front of the returned bio
- *
- * Description:
- *    Same functionality as bioset_create() except that mempool is not
- *    created for bio_vecs. Saving some memory for bio_clone_fast() users.
- */
-struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
-{
-	return __bioset_create(pool_size, front_pad, false);
-}
-EXPORT_SYMBOL(bioset_create_nobvec);
-
 #ifdef CONFIG_BLK_CGROUP
 
 /**
@@ -2112,7 +2113,7 @@ static int __init init_bio(void)
 	bio_integrity_init();
 	biovec_init_slabs();
 
-	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
+	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!fs_bio_set)
 		panic("bio: can't allocate bios\n");
 
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 7c2947128f58..0480892e97e5 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -74,7 +74,7 @@ static void blkg_free(struct blkcg_gq *blkg)
 			blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
 
 	if (blkg->blkcg != &blkcg_root)
-		blk_exit_rl(&blkg->rl);
+		blk_exit_rl(blkg->q, &blkg->rl);
 
 	blkg_rwstat_exit(&blkg->stat_ios);
 	blkg_rwstat_exit(&blkg->stat_bytes);
diff --git a/block/blk-core.c b/block/blk-core.c
index c580b0138a7f..af393d5a9680 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -129,11 +129,70 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL(blk_rq_init);
 
+static const struct {
+	int		errno;
+	const char	*name;
+} blk_errors[] = {
+	[BLK_STS_OK]		= { 0,		"" },
+	[BLK_STS_NOTSUPP]	= { -EOPNOTSUPP, "operation not supported" },
+	[BLK_STS_TIMEOUT]	= { -ETIMEDOUT,	"timeout" },
+	[BLK_STS_NOSPC]		= { -ENOSPC,	"critical space allocation" },
+	[BLK_STS_TRANSPORT]	= { -ENOLINK,	"recoverable transport" },
+	[BLK_STS_TARGET]	= { -EREMOTEIO,	"critical target" },
+	[BLK_STS_NEXUS]		= { -EBADE,	"critical nexus" },
+	[BLK_STS_MEDIUM]	= { -ENODATA,	"critical medium" },
+	[BLK_STS_PROTECTION]	= { -EILSEQ,	"protection" },
+	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
+	[BLK_STS_AGAIN]		= { -EAGAIN,	"nonblocking retry" },
+
+	/* device mapper special case, should not leak out: */
+	[BLK_STS_DM_REQUEUE]	= { -EREMCHG, "dm internal retry" },
+
+	/* everything else not covered above: */
+	[BLK_STS_IOERR]		= { -EIO,	"I/O" },
+};
+
+blk_status_t errno_to_blk_status(int errno)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(blk_errors); i++) {
+		if (blk_errors[i].errno == errno)
+			return (__force blk_status_t)i;
+	}
+
+	return BLK_STS_IOERR;
+}
+EXPORT_SYMBOL_GPL(errno_to_blk_status);
+
+int blk_status_to_errno(blk_status_t status)
+{
+	int idx = (__force int)status;
+
+	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+		return -EIO;
+	return blk_errors[idx].errno;
+}
+EXPORT_SYMBOL_GPL(blk_status_to_errno);
+
+static void print_req_error(struct request *req, blk_status_t status)
+{
+	int idx = (__force int)status;
+
+	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+		return;
+
+	printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
+			   __func__, blk_errors[idx].name, req->rq_disk ?
+			   req->rq_disk->disk_name : "?",
+			   (unsigned long long)blk_rq_pos(req));
+}
+
 static void req_bio_endio(struct request *rq, struct bio *bio,
-			  unsigned int nbytes, int error)
+			  unsigned int nbytes, blk_status_t error)
 {
 	if (error)
-		bio->bi_error = error;
+		bio->bi_status = error;
 
 	if (unlikely(rq->rq_flags & RQF_QUIET))
 		bio_set_flag(bio, BIO_QUIET);
@@ -177,10 +236,13 @@ static void blk_delay_work(struct work_struct *work)
  * Description:
  *   Sometimes queueing needs to be postponed for a little while, to allow
  *   resources to come back. This function will make sure that queueing is
- *   restarted around the specified time. Queue lock must be held.
+ *   restarted around the specified time.
  */
 void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 {
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	if (likely(!blk_queue_dead(q)))
 		queue_delayed_work(kblockd_workqueue, &q->delay_work,
 				   msecs_to_jiffies(msecs));
@@ -198,6 +260,9 @@ EXPORT_SYMBOL(blk_delay_queue);
  **/
 void blk_start_queue_async(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 	blk_run_queue_async(q);
 }
@@ -210,11 +275,13 @@ EXPORT_SYMBOL(blk_start_queue_async);
  * Description:
  *   blk_start_queue() will clear the stop flag on the queue, and call
  *   the request_fn for the queue if it was in a stopped state when
- *   entered. Also see blk_stop_queue(). Queue lock must be held.
+ *   entered. Also see blk_stop_queue().
  **/
 void blk_start_queue(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
 	WARN_ON(!irqs_disabled());
+	WARN_ON_ONCE(q->mq_ops);
 
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 	__blk_run_queue(q);
@@ -233,10 +300,13 @@ EXPORT_SYMBOL(blk_start_queue);
  *   or if it simply chooses not to queue more I/O at one point, it can
  *   call this function to prevent the request_fn from being called until
  *   the driver has signalled it's ready to go again. This happens by calling
- *   blk_start_queue() to restart queue operations. Queue lock must be held.
+ *   blk_start_queue() to restart queue operations.
  **/
 void blk_stop_queue(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	cancel_delayed_work(&q->delay_work);
 	queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
@@ -289,6 +359,9 @@ EXPORT_SYMBOL(blk_sync_queue);
  */
 inline void __blk_run_queue_uncond(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	if (unlikely(blk_queue_dead(q)))
 		return;
 
@@ -310,11 +383,13 @@ EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
  * @q:	The queue to run
  *
  * Description:
- *    See @blk_run_queue. This variant must be called with the queue lock
- *    held and interrupts disabled.
+ *    See @blk_run_queue.
  */
 void __blk_run_queue(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 
@@ -328,10 +403,18 @@ EXPORT_SYMBOL(__blk_run_queue);
  *
  * Description:
  *    Tells kblockd to perform the equivalent of @blk_run_queue on behalf
- *    of us. The caller must hold the queue lock.
+ *    of us.
+ *
+ * Note:
+ *    Since it is not allowed to run q->delay_work after blk_cleanup_queue()
+ *    has canceled q->delay_work, callers must hold the queue lock to avoid
+ *    race conditions between blk_cleanup_queue() and blk_run_queue_async().
  */
 void blk_run_queue_async(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
 		mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 }
@@ -349,6 +432,8 @@ void blk_run_queue(struct request_queue *q)
 {
 	unsigned long flags;
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	spin_lock_irqsave(q->queue_lock, flags);
 	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
@@ -377,6 +462,7 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
 	int i;
 
 	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
 
 	while (true) {
 		bool drain = false;
@@ -455,6 +541,8 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
  */
 void blk_queue_bypass_start(struct request_queue *q)
 {
+	WARN_ON_ONCE(q->mq_ops);
+
 	spin_lock_irq(q->queue_lock);
 	q->bypass_depth++;
 	queue_flag_set(QUEUE_FLAG_BYPASS, q);
@@ -481,6 +569,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
  * @q: queue of interest
  *
  * Leave bypass mode and restore the normal queueing behavior.
+ *
+ * Note: although blk_queue_bypass_start() is only called for blk-sq queues,
+ * this function is called for both blk-sq and blk-mq queues.
  */
 void blk_queue_bypass_end(struct request_queue *q)
 {
@@ -648,13 +739,19 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q,
 	if (!rl->rq_pool)
 		return -ENOMEM;
 
+	if (rl != &q->root_rl)
+		WARN_ON_ONCE(!blk_get_queue(q));
+
 	return 0;
 }
 
-void blk_exit_rl(struct request_list *rl)
+void blk_exit_rl(struct request_queue *q, struct request_list *rl)
 {
-	if (rl->rq_pool)
+	if (rl->rq_pool) {
 		mempool_destroy(rl->rq_pool);
+		if (rl != &q->root_rl)
+			blk_put_queue(q);
+	}
 }
 
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
@@ -726,7 +823,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (q->id < 0)
 		goto fail_q;
 
-	q->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!q->bio_split)
 		goto fail_id;
 
@@ -872,6 +969,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
 
 int blk_init_allocated_queue(struct request_queue *q)
 {
+	WARN_ON_ONCE(q->mq_ops);
+
 	q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size);
 	if (!q->fq)
 		return -ENOMEM;
@@ -1009,6 +1108,8 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
 	struct request_list *rl;
 	int on_thresh, off_thresh;
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	spin_lock_irq(q->queue_lock);
 	q->nr_requests = nr;
 	blk_queue_congestion_threshold(q);
@@ -1071,6 +1172,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
 	int may_queue;
 	req_flags_t rq_flags = RQF_ALLOCED;
 
+	lockdep_assert_held(q->queue_lock);
+
 	if (unlikely(blk_queue_dying(q)))
 		return ERR_PTR(-ENODEV);
 
@@ -1244,12 +1347,20 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
 	struct request_list *rl;
 	struct request *rq;
 
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	rl = blk_get_rl(q, bio);	/* transferred to @rq on success */
 retry:
 	rq = __get_request(rl, op, bio, gfp_mask);
 	if (!IS_ERR(rq))
 		return rq;
 
+	if (op & REQ_NOWAIT) {
+		blk_put_rl(rl);
+		return ERR_PTR(-EAGAIN);
+	}
+
 	if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
 		blk_put_rl(rl);
 		return rq;
@@ -1277,16 +1388,18 @@ retry:
 	goto retry;
 }
 
-static struct request *blk_old_get_request(struct request_queue *q, int rw,
-		gfp_t gfp_mask)
+static struct request *blk_old_get_request(struct request_queue *q,
+					   unsigned int op, gfp_t gfp_mask)
 {
 	struct request *rq;
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	/* create ioc upfront */
 	create_io_context(gfp_mask, q->node);
 
 	spin_lock_irq(q->queue_lock);
-	rq = get_request(q, rw, NULL, gfp_mask);
+	rq = get_request(q, op, NULL, gfp_mask);
 	if (IS_ERR(rq)) {
 		spin_unlock_irq(q->queue_lock);
 		return rq;
@@ -1299,14 +1412,24 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
 	return rq;
 }
 
-struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
+struct request *blk_get_request(struct request_queue *q, unsigned int op,
+				gfp_t gfp_mask)
 {
-	if (q->mq_ops)
-		return blk_mq_alloc_request(q, rw,
+	struct request *req;
+
+	if (q->mq_ops) {
+		req = blk_mq_alloc_request(q, op,
 			(gfp_mask & __GFP_DIRECT_RECLAIM) ?
 				0 : BLK_MQ_REQ_NOWAIT);
-	else
-		return blk_old_get_request(q, rw, gfp_mask);
+		if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
+			q->mq_ops->initialize_rq_fn(req);
+	} else {
+		req = blk_old_get_request(q, op, gfp_mask);
+		if (!IS_ERR(req) && q->initialize_rq_fn)
+			q->initialize_rq_fn(req);
+	}
+
+	return req;
 }
 EXPORT_SYMBOL(blk_get_request);
 
@@ -1322,6 +1445,9 @@ EXPORT_SYMBOL(blk_get_request);
  */
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
@@ -1396,9 +1522,6 @@ static void blk_pm_put_request(struct request *rq)
 static inline void blk_pm_put_request(struct request *rq) {}
 #endif
 
-/*
- * queue lock must be held
- */
 void __blk_put_request(struct request_queue *q, struct request *req)
 {
 	req_flags_t rq_flags = req->rq_flags;
@@ -1411,6 +1534,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 		return;
 	}
 
+	lockdep_assert_held(q->queue_lock);
+
 	blk_pm_put_request(req);
 
 	elv_completed_request(q, req);
@@ -1640,6 +1765,7 @@ void blk_init_request_from_bio(struct request *req, struct bio *bio)
 		req->ioprio = ioc->ioprio;
 	else
 		req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+	req->write_hint = bio->bi_write_hint;
 	blk_rq_bio_prep(req->q, req, bio);
 }
 EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
@@ -1659,10 +1785,10 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	 */
 	blk_queue_bounce(q, &bio);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		return BLK_QC_T_NONE;
 	}
@@ -1720,7 +1846,10 @@ get_rq:
 	req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
 	if (IS_ERR(req)) {
 		__wbt_done(q->rq_wb, wb_acct);
-		bio->bi_error = PTR_ERR(req);
+		if (PTR_ERR(req) == -ENOMEM)
+			bio->bi_status = BLK_STS_RESOURCE;
+		else
+			bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		goto out_unlock;
 	}
@@ -1875,7 +2004,7 @@ generic_make_request_checks(struct bio *bio)
 {
 	struct request_queue *q;
 	int nr_sectors = bio_sectors(bio);
-	int err = -EIO;
+	blk_status_t status = BLK_STS_IOERR;
 	char b[BDEVNAME_SIZE];
 	struct hd_struct *part;
 
@@ -1894,6 +2023,14 @@ generic_make_request_checks(struct bio *bio)
 		goto end_io;
 	}
 
+	/*
+	 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
+	 * if queue is not a request based queue.
+	 */
+
+	if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
+		goto not_supported;
+
 	part = bio->bi_bdev->bd_part;
 	if (should_fail_request(part, bio->bi_iter.bi_size) ||
 	    should_fail_request(&part_to_disk(part)->part0,
@@ -1918,7 +2055,7 @@ generic_make_request_checks(struct bio *bio)
 	    !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
 		bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
 		if (!nr_sectors) {
-			err = 0;
+			status = BLK_STS_OK;
 			goto end_io;
 		}
 	}
@@ -1970,9 +2107,9 @@ generic_make_request_checks(struct bio *bio)
 	return true;
 
 not_supported:
-	err = -EOPNOTSUPP;
+	status = BLK_STS_NOTSUPP;
 end_io:
-	bio->bi_error = err;
+	bio->bi_status = status;
 	bio_endio(bio);
 	return false;
 }
@@ -2051,7 +2188,7 @@ blk_qc_t generic_make_request(struct bio *bio)
 	do {
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-		if (likely(blk_queue_enter(q, false) == 0)) {
+		if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
 			struct bio_list lower, same;
 
 			/* Create a fresh bio_list for all subordinate requests */
@@ -2076,7 +2213,11 @@ blk_qc_t generic_make_request(struct bio *bio)
 			bio_list_merge(&bio_list_on_stack[0], &same);
 			bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
 		} else {
-			bio_io_error(bio);
+			if (unlikely(!blk_queue_dying(q) &&
+					(bio->bi_opf & REQ_NOWAIT)))
+				bio_wouldblock_error(bio);
+			else
+				bio_io_error(bio);
 		}
 		bio = bio_list_pop(&bio_list_on_stack[0]);
 	} while (bio);
@@ -2177,29 +2318,29 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
  * @q:  the queue to submit the request
  * @rq: the request being queued
  */
-int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
+blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 {
 	unsigned long flags;
 	int where = ELEVATOR_INSERT_BACK;
 
 	if (blk_cloned_rq_check_limits(q, rq))
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (rq->rq_disk &&
 	    should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (q->mq_ops) {
 		if (blk_queue_io_stat(q))
 			blk_account_io_start(rq, true);
 		blk_mq_sched_insert_request(rq, false, true, false, false);
-		return 0;
+		return BLK_STS_OK;
 	}
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	if (unlikely(blk_queue_dying(q))) {
 		spin_unlock_irqrestore(q->queue_lock, flags);
-		return -ENODEV;
+		return BLK_STS_IOERR;
 	}
 
 	/*
@@ -2216,7 +2357,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 		__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	return 0;
+	return BLK_STS_OK;
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 
@@ -2232,9 +2373,6 @@ EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
  *
  * Return:
  *     The number of bytes to fail.
- *
- * Context:
- *     queue_lock must be held.
  */
 unsigned int blk_rq_err_bytes(const struct request *rq)
 {
@@ -2374,15 +2512,15 @@ void blk_account_io_start(struct request *rq, bool new_io)
  * Return:
  *     Pointer to the request at the top of @q if available.  Null
  *     otherwise.
- *
- * Context:
- *     queue_lock must be held.
  */
 struct request *blk_peek_request(struct request_queue *q)
 {
 	struct request *rq;
 	int ret;
 
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	while ((rq = __elv_next_request(q)) != NULL) {
 
 		rq = blk_pm_peek_request(q, rq);
@@ -2450,15 +2588,14 @@ struct request *blk_peek_request(struct request_queue *q)
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
-			int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
-
 			rq->rq_flags |= RQF_QUIET;
 			/*
 			 * Mark this request as started so we don't trigger
 			 * any debug logic in the end I/O path.
 			 */
 			blk_start_request(rq);
-			__blk_end_request_all(rq, err);
+			__blk_end_request_all(rq, ret == BLKPREP_INVALID ?
+					BLK_STS_TARGET : BLK_STS_IOERR);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
 			break;
@@ -2499,12 +2636,12 @@ void blk_dequeue_request(struct request *rq)
  *
  *     Block internal functions which don't want to start timer should
  *     call blk_dequeue_request().
- *
- * Context:
- *     queue_lock must be held.
  */
 void blk_start_request(struct request *req)
 {
+	lockdep_assert_held(req->q->queue_lock);
+	WARN_ON_ONCE(req->q->mq_ops);
+
 	blk_dequeue_request(req);
 
 	if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
@@ -2529,14 +2666,14 @@ EXPORT_SYMBOL(blk_start_request);
  * Return:
  *     Pointer to the request at the top of @q if available.  Null
  *     otherwise.
- *
- * Context:
- *     queue_lock must be held.
  */
 struct request *blk_fetch_request(struct request_queue *q)
 {
 	struct request *rq;
 
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	rq = blk_peek_request(q);
 	if (rq)
 		blk_start_request(rq);
@@ -2547,7 +2684,7 @@ EXPORT_SYMBOL(blk_fetch_request);
 /**
  * blk_update_request - Special helper function for request stacking drivers
  * @req:      the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete @req
  *
  * Description:
@@ -2566,49 +2703,19 @@ EXPORT_SYMBOL(blk_fetch_request);
  *     %false - this request doesn't have any more data
  *     %true  - this request has more data
  **/
-bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
+bool blk_update_request(struct request *req, blk_status_t error,
+		unsigned int nr_bytes)
 {
 	int total_bytes;
 
-	trace_block_rq_complete(req, error, nr_bytes);
+	trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
 
 	if (!req->bio)
 		return false;
 
-	if (error && !blk_rq_is_passthrough(req) &&
-	    !(req->rq_flags & RQF_QUIET)) {
-		char *error_type;
-
-		switch (error) {
-		case -ENOLINK:
-			error_type = "recoverable transport";
-			break;
-		case -EREMOTEIO:
-			error_type = "critical target";
-			break;
-		case -EBADE:
-			error_type = "critical nexus";
-			break;
-		case -ETIMEDOUT:
-			error_type = "timeout";
-			break;
-		case -ENOSPC:
-			error_type = "critical space allocation";
-			break;
-		case -ENODATA:
-			error_type = "critical medium";
-			break;
-		case -EIO:
-		default:
-			error_type = "I/O";
-			break;
-		}
-		printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
-				   __func__, error_type, req->rq_disk ?
-				   req->rq_disk->disk_name : "?",
-				   (unsigned long long)blk_rq_pos(req));
-
-	}
+	if (unlikely(error && !blk_rq_is_passthrough(req) &&
+		     !(req->rq_flags & RQF_QUIET)))
+		print_req_error(req, error);
 
 	blk_account_io_completion(req, nr_bytes);
 
@@ -2644,8 +2751,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		return false;
 	}
 
-	WARN_ON_ONCE(req->rq_flags & RQF_SPECIAL_PAYLOAD);
-
 	req->__data_len -= total_bytes;
 
 	/* update sector only for requests with clear definition of sector */
@@ -2658,23 +2763,25 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
 	}
 
-	/*
-	 * If total number of sectors is less than the first segment
-	 * size, something has gone terribly wrong.
-	 */
-	if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
-		blk_dump_rq_flags(req, "request botched");
-		req->__data_len = blk_rq_cur_bytes(req);
-	}
+	if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {
+		/*
+		 * If total number of sectors is less than the first segment
+		 * size, something has gone terribly wrong.
+		 */
+		if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
+			blk_dump_rq_flags(req, "request botched");
+			req->__data_len = blk_rq_cur_bytes(req);
+		}
 
-	/* recalculate the number of segments */
-	blk_recalc_rq_segments(req);
+		/* recalculate the number of segments */
+		blk_recalc_rq_segments(req);
+	}
 
 	return true;
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
 
-static bool blk_update_bidi_request(struct request *rq, int error,
+static bool blk_update_bidi_request(struct request *rq, blk_status_t error,
 				    unsigned int nr_bytes,
 				    unsigned int bidi_bytes)
 {
@@ -2712,13 +2819,13 @@ void blk_unprep_request(struct request *req)
 }
 EXPORT_SYMBOL_GPL(blk_unprep_request);
 
-/*
- * queue lock must be held
- */
-void blk_finish_request(struct request *req, int error)
+void blk_finish_request(struct request *req, blk_status_t error)
 {
 	struct request_queue *q = req->q;
 
+	lockdep_assert_held(req->q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	if (req->rq_flags & RQF_STATS)
 		blk_stat_add(req);
 
@@ -2752,7 +2859,7 @@ EXPORT_SYMBOL(blk_finish_request);
 /**
  * blk_end_bidi_request - Complete a bidi request
  * @rq:         the request to complete
- * @error:      %0 for success, < %0 for error
+ * @error:      block status code
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
@@ -2766,12 +2873,14 @@ EXPORT_SYMBOL(blk_finish_request);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-static bool blk_end_bidi_request(struct request *rq, int error,
+static bool blk_end_bidi_request(struct request *rq, blk_status_t error,
 				 unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	struct request_queue *q = rq->q;
 	unsigned long flags;
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
 
@@ -2785,7 +2894,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
 /**
  * __blk_end_bidi_request - Complete a bidi request with queue lock held
  * @rq:         the request to complete
- * @error:      %0 for success, < %0 for error
+ * @error:      block status code
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
@@ -2797,9 +2906,12 @@ static bool blk_end_bidi_request(struct request *rq, int error,
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-static bool __blk_end_bidi_request(struct request *rq, int error,
+static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
 				   unsigned int nr_bytes, unsigned int bidi_bytes)
 {
+	lockdep_assert_held(rq->q->queue_lock);
+	WARN_ON_ONCE(rq->q->mq_ops);
+
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
 
@@ -2811,7 +2923,7 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
 /**
  * blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete
  *
  * Description:
@@ -2822,8 +2934,10 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool blk_end_request(struct request *rq, blk_status_t error,
+		unsigned int nr_bytes)
 {
+	WARN_ON_ONCE(rq->q->mq_ops);
 	return blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL(blk_end_request);
@@ -2831,12 +2945,12 @@ EXPORT_SYMBOL(blk_end_request);
 /**
  * blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
- * @error: %0 for success, < %0 for error
+ * @error: block status code
  *
  * Description:
  *     Completely finish @rq.
  */
-void blk_end_request_all(struct request *rq, int error)
+void blk_end_request_all(struct request *rq, blk_status_t error)
 {
 	bool pending;
 	unsigned int bidi_bytes = 0;
@@ -2852,7 +2966,7 @@ EXPORT_SYMBOL(blk_end_request_all);
 /**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete
  *
  * Description:
@@ -2862,8 +2976,12 @@ EXPORT_SYMBOL(blk_end_request_all);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool __blk_end_request(struct request *rq, blk_status_t error,
+		unsigned int nr_bytes)
 {
+	lockdep_assert_held(rq->q->queue_lock);
+	WARN_ON_ONCE(rq->q->mq_ops);
+
 	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL(__blk_end_request);
@@ -2871,16 +2989,19 @@ EXPORT_SYMBOL(__blk_end_request);
 /**
  * __blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
- * @error: %0 for success, < %0 for error
+ * @error:    block status code
  *
  * Description:
  *     Completely finish @rq.  Must be called with queue lock held.
  */
-void __blk_end_request_all(struct request *rq, int error)
+void __blk_end_request_all(struct request *rq, blk_status_t error)
 {
 	bool pending;
 	unsigned int bidi_bytes = 0;
 
+	lockdep_assert_held(rq->q->queue_lock);
+	WARN_ON_ONCE(rq->q->mq_ops);
+
 	if (unlikely(blk_bidi_rq(rq)))
 		bidi_bytes = blk_rq_bytes(rq->next_rq);
 
@@ -2892,7 +3013,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
 /**
  * __blk_end_request_cur - Helper function to finish the current request chunk.
  * @rq: the request to finish the current chunk for
- * @error: %0 for success, < %0 for error
+ * @error:    block status code
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.  Must
@@ -2902,7 +3023,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
-bool __blk_end_request_cur(struct request *rq, int error)
+bool __blk_end_request_cur(struct request *rq, blk_status_t error)
 {
 	return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
@@ -3145,6 +3266,8 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 			    bool from_schedule)
 	__releases(q->queue_lock)
 {
+	lockdep_assert_held(q->queue_lock);
+
 	trace_block_unplug(q, depth, !from_schedule);
 
 	if (from_schedule)
@@ -3243,7 +3366,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 		 * Short-circuit if @q is dead
 		 */
 		if (unlikely(blk_queue_dying(q))) {
-			__blk_end_request_all(rq, -ENODEV);
+			__blk_end_request_all(rq, BLK_STS_IOERR);
 			continue;
 		}
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index a9451e3b8587..5c0f3dc446dc 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -16,7 +16,7 @@
  * @rq: request to complete
  * @error: end I/O status of the request
  */
-static void blk_end_sync_rq(struct request *rq, int error)
+static void blk_end_sync_rq(struct request *rq, blk_status_t error)
 {
 	struct completion *waiting = rq->end_io_data;
 
@@ -69,7 +69,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 
 	if (unlikely(blk_queue_dying(q))) {
 		rq->rq_flags |= RQF_QUIET;
-		__blk_end_request_all(rq, -ENXIO);
+		__blk_end_request_all(rq, BLK_STS_IOERR);
 		spin_unlock_irq(q->queue_lock);
 		return;
 	}
diff --git a/block/blk-flush.c b/block/blk-flush.c
index c4e0880b54bb..ed5fe322abba 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -164,7 +164,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
  */
 static bool blk_flush_complete_seq(struct request *rq,
 				   struct blk_flush_queue *fq,
-				   unsigned int seq, int error)
+				   unsigned int seq, blk_status_t error)
 {
 	struct request_queue *q = rq->q;
 	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
@@ -216,7 +216,7 @@ static bool blk_flush_complete_seq(struct request *rq,
 	return kicked | queued;
 }
 
-static void flush_end_io(struct request *flush_rq, int error)
+static void flush_end_io(struct request *flush_rq, blk_status_t error)
 {
 	struct request_queue *q = flush_rq->q;
 	struct list_head *running;
@@ -341,11 +341,13 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 	return blk_flush_queue_rq(flush_rq, false);
 }
 
-static void flush_data_end_io(struct request *rq, int error)
+static void flush_data_end_io(struct request *rq, blk_status_t error)
 {
 	struct request_queue *q = rq->q;
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 
+	lockdep_assert_held(q->queue_lock);
+
 	/*
 	 * Updating q->in_flight[] here for making this tag usable
 	 * early. Because in blk_queue_start_tag(),
@@ -382,7 +384,7 @@ static void flush_data_end_io(struct request *rq, int error)
 		blk_run_queue_async(q);
 }
 
-static void mq_flush_data_end_io(struct request *rq, int error)
+static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
 {
 	struct request_queue *q = rq->q;
 	struct blk_mq_hw_ctx *hctx;
@@ -411,9 +413,6 @@ static void mq_flush_data_end_io(struct request *rq, int error)
  * or __blk_mq_run_hw_queue() to dispatch request.
  * @rq is being submitted.  Analyze what needs to be done and put it on the
  * right queue.
- *
- * CONTEXT:
- * spin_lock_irq(q->queue_lock) in !mq case
  */
 void blk_insert_flush(struct request *rq)
 {
@@ -422,6 +421,9 @@ void blk_insert_flush(struct request *rq)
 	unsigned int policy = blk_flush_policy(fflags, rq);
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
 
+	if (!q->mq_ops)
+		lockdep_assert_held(q->queue_lock);
+
 	/*
 	 * @policy now records what operations need to be done.  Adjust
 	 * REQ_PREFLUSH and FUA for the driver.
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 0f891a9aff4d..feb30570eaf5 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -384,9 +384,9 @@ static struct kobj_type integrity_ktype = {
 	.sysfs_ops	= &integrity_ops,
 };
 
-static int blk_integrity_nop_fn(struct blk_integrity_iter *iter)
+static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
 {
-	return 0;
+	return BLK_STS_OK;
 }
 
 static const struct blk_integrity_profile nop_profile = {
diff --git a/block/blk-map.c b/block/blk-map.c
index 3b5cb863318f..2547016aa7aa 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -16,6 +16,8 @@
  */
 int blk_rq_append_bio(struct request *rq, struct bio *bio)
 {
+	blk_queue_bounce(rq->q, &bio);
+
 	if (!rq->bio) {
 		blk_rq_bio_prep(rq->q, rq, bio);
 	} else {
@@ -72,15 +74,13 @@ static int __blk_rq_map_user_iov(struct request *rq,
 		map_data->offset += bio->bi_iter.bi_size;
 
 	orig_bio = bio;
-	blk_queue_bounce(q, &bio);
 
 	/*
 	 * We link the bounce buffer in and could have to traverse it
 	 * later so we have to get a ref to prevent it from being freed
 	 */
-	bio_get(bio);
-
 	ret = blk_rq_append_bio(rq, bio);
+	bio_get(bio);
 	if (ret) {
 		bio_endio(bio);
 		__blk_rq_unmap_user(orig_bio);
@@ -249,7 +249,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 		return ret;
 	}
 
-	blk_queue_bounce(q, &rq->bio);
 	return 0;
 }
 EXPORT_SYMBOL(blk_rq_map_kern);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 3990ae406341..99038830fb42 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -108,31 +108,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 	bool do_split = true;
 	struct bio *new = NULL;
 	const unsigned max_sectors = get_max_io_size(q, bio);
-	unsigned bvecs = 0;
 
 	bio_for_each_segment(bv, bio, iter) {
 		/*
-		 * With arbitrary bio size, the incoming bio may be very
-		 * big. We have to split the bio into small bios so that
-		 * each holds at most BIO_MAX_PAGES bvecs because
-		 * bio_clone() can fail to allocate big bvecs.
-		 *
-		 * It should have been better to apply the limit per
-		 * request queue in which bio_clone() is involved,
-		 * instead of globally. The biggest blocker is the
-		 * bio_clone() in bio bounce.
-		 *
-		 * If bio is splitted by this reason, we should have
-		 * allowed to continue bios merging, but don't do
-		 * that now for making the change simple.
-		 *
-		 * TODO: deal with bio bounce's bio_clone() gracefully
-		 * and convert the global limit into per-queue limit.
-		 */
-		if (bvecs++ >= BIO_MAX_PAGES)
-			goto split;
-
-		/*
 		 * If the queue doesn't support SG gaps and adding this
 		 * offset would create a gap, disallow it.
 		 */
@@ -202,8 +180,7 @@ split:
 	return do_split ? new : NULL;
 }
 
-void blk_queue_split(struct request_queue *q, struct bio **bio,
-		     struct bio_set *bs)
+void blk_queue_split(struct request_queue *q, struct bio **bio)
 {
 	struct bio *split, *res;
 	unsigned nsegs;
@@ -211,13 +188,13 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
 	switch (bio_op(*bio)) {
 	case REQ_OP_DISCARD:
 	case REQ_OP_SECURE_ERASE:
-		split = blk_bio_discard_split(q, *bio, bs, &nsegs);
+		split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	case REQ_OP_WRITE_ZEROES:
-		split = blk_bio_write_zeroes_split(q, *bio, bs, &nsegs);
+		split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	case REQ_OP_WRITE_SAME:
-		split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
+		split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	default:
 		split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
@@ -671,6 +648,9 @@ static void blk_account_io_merge(struct request *req)
 static struct request *attempt_merge(struct request_queue *q,
 				     struct request *req, struct request *next)
 {
+	if (!q->mq_ops)
+		lockdep_assert_held(q->queue_lock);
+
 	if (!rq_mergeable(req) || !rq_mergeable(next))
 		return NULL;
 
@@ -693,6 +673,13 @@ static struct request *attempt_merge(struct request_queue *q,
 		return NULL;
 
 	/*
+	 * Don't allow merge of different write hints, or for a hint with
+	 * non-hint IO.
+	 */
+	if (req->write_hint != next->write_hint)
+		return NULL;
+
+	/*
 	 * If we are allowed to merge, then append bio list
 	 * from next to rq and release next. merge_requests_fn
 	 * will have updated segment counts, update sector
@@ -811,6 +798,13 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 	    !blk_write_same_mergeable(rq->bio, bio))
 		return false;
 
+	/*
+	 * Don't allow merge of different write hints, or for a hint with
+	 * non-hint IO.
+	 */
+	if (rq->write_hint != bio->bi_write_hint)
+		return false;
+
 	return true;
 }
 
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 8e61e8640e17..2cca4fc43f45 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -14,10 +14,15 @@
 #include "blk.h"
 #include "blk-mq.h"
 
-static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
-			      const int cpu)
+static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
+			      const struct cpumask *online_mask)
 {
-	return cpu * nr_queues / nr_cpus;
+	/*
+	 * Non online CPU will be mapped to queue index 0.
+	 */
+	if (!cpumask_test_cpu(cpu, online_mask))
+		return 0;
+	return cpu % nr_queues;
 }
 
 static int get_first_sibling(unsigned int cpu)
@@ -36,55 +41,26 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
 	unsigned int *map = set->mq_map;
 	unsigned int nr_queues = set->nr_hw_queues;
 	const struct cpumask *online_mask = cpu_online_mask;
-	unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
-	cpumask_var_t cpus;
-
-	if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
-		return -ENOMEM;
-
-	cpumask_clear(cpus);
-	nr_cpus = nr_uniq_cpus = 0;
-	for_each_cpu(i, online_mask) {
-		nr_cpus++;
-		first_sibling = get_first_sibling(i);
-		if (!cpumask_test_cpu(first_sibling, cpus))
-			nr_uniq_cpus++;
-		cpumask_set_cpu(i, cpus);
-	}
-
-	queue = 0;
-	for_each_possible_cpu(i) {
-		if (!cpumask_test_cpu(i, online_mask)) {
-			map[i] = 0;
-			continue;
-		}
+	unsigned int cpu, first_sibling;
 
+	for_each_possible_cpu(cpu) {
 		/*
-		 * Easy case - we have equal or more hardware queues. Or
-		 * there are no thread siblings to take into account. Do
-		 * 1:1 if enough, or sequential mapping if less.
+		 * First do sequential mapping between CPUs and queues.
+		 * In case we still have CPUs to map, and we have some number of
+		 * threads per cores then map sibling threads to the same queue for
+		 * performace optimizations.
 		 */
-		if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) {
-			map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue);
-			queue++;
-			continue;
+		if (cpu < nr_queues) {
+			map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+		} else {
+			first_sibling = get_first_sibling(cpu);
+			if (first_sibling == cpu)
+				map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+			else
+				map[cpu] = map[first_sibling];
 		}
-
-		/*
-		 * Less then nr_cpus queues, and we have some number of
-		 * threads per cores. Map sibling threads to the same
-		 * queue.
-		 */
-		first_sibling = get_first_sibling(i);
-		if (first_sibling == i) {
-			map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
-							queue);
-			queue++;
-		} else
-			map[i] = map[first_sibling];
 	}
 
-	free_cpumask_var(cpus);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_mq_map_queues);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 803aed4d7221..9ebc2945f991 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -114,10 +114,12 @@ static ssize_t queue_state_write(void *data, const char __user *buf,
 		blk_mq_run_hw_queues(q, true);
 	} else if (strcmp(op, "start") == 0) {
 		blk_mq_start_stopped_hw_queues(q, true);
+	} else if (strcmp(op, "kick") == 0) {
+		blk_mq_kick_requeue_list(q);
 	} else {
 		pr_err("%s: unsupported operation '%s'\n", __func__, op);
 inval:
-		pr_err("%s: use either 'run' or 'start'\n", __func__);
+		pr_err("%s: use 'run', 'start' or 'kick'\n", __func__);
 		return -EINVAL;
 	}
 	return count;
@@ -133,6 +135,29 @@ static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
 	}
 }
 
+static int queue_write_hint_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	int i;
+
+	for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
+		seq_printf(m, "hint%d: %llu\n", i, q->write_hints[i]);
+
+	return 0;
+}
+
+static ssize_t queue_write_hint_store(void *data, const char __user *buf,
+				      size_t count, loff_t *ppos)
+{
+	struct request_queue *q = data;
+	int i;
+
+	for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
+		q->write_hints[i] = 0;
+
+	return count;
+}
+
 static int queue_poll_stat_show(void *data, struct seq_file *m)
 {
 	struct request_queue *q = data;
@@ -267,6 +292,14 @@ static const char *const rqf_name[] = {
 };
 #undef RQF_NAME
 
+#define RQAF_NAME(name) [REQ_ATOM_##name] = #name
+static const char *const rqaf_name[] = {
+	RQAF_NAME(COMPLETE),
+	RQAF_NAME(STARTED),
+	RQAF_NAME(POLL_SLEPT),
+};
+#undef RQAF_NAME
+
 int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
 {
 	const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
@@ -283,6 +316,8 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
 	seq_puts(m, ", .rq_flags=");
 	blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
 		       ARRAY_SIZE(rqf_name));
+	seq_puts(m, ", .atomic_flags=");
+	blk_flags_show(m, rq->atomic_flags, rqaf_name, ARRAY_SIZE(rqaf_name));
 	seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
 		   rq->internal_tag);
 	if (mq_ops->show_rq)
@@ -298,6 +333,37 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
 }
 EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show);
 
+static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
+	__acquires(&q->requeue_lock)
+{
+	struct request_queue *q = m->private;
+
+	spin_lock_irq(&q->requeue_lock);
+	return seq_list_start(&q->requeue_list, *pos);
+}
+
+static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct request_queue *q = m->private;
+
+	return seq_list_next(v, &q->requeue_list, pos);
+}
+
+static void queue_requeue_list_stop(struct seq_file *m, void *v)
+	__releases(&q->requeue_lock)
+{
+	struct request_queue *q = m->private;
+
+	spin_unlock_irq(&q->requeue_lock);
+}
+
+static const struct seq_operations queue_requeue_list_seq_ops = {
+	.start	= queue_requeue_list_start,
+	.next	= queue_requeue_list_next,
+	.stop	= queue_requeue_list_stop,
+	.show	= blk_mq_debugfs_rq_show,
+};
+
 static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
 	__acquires(&hctx->lock)
 {
@@ -329,6 +395,36 @@ static const struct seq_operations hctx_dispatch_seq_ops = {
 	.show	= blk_mq_debugfs_rq_show,
 };
 
+struct show_busy_params {
+	struct seq_file		*m;
+	struct blk_mq_hw_ctx	*hctx;
+};
+
+/*
+ * Note: the state of a request may change while this function is in progress,
+ * e.g. due to a concurrent blk_mq_finish_request() call.
+ */
+static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved)
+{
+	const struct show_busy_params *params = data;
+
+	if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx &&
+	    test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
+		__blk_mq_debugfs_rq_show(params->m,
+					 list_entry_rq(&rq->queuelist));
+}
+
+static int hctx_busy_show(void *data, struct seq_file *m)
+{
+	struct blk_mq_hw_ctx *hctx = data;
+	struct show_busy_params params = { .m = m, .hctx = hctx };
+
+	blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq,
+				&params);
+
+	return 0;
+}
+
 static int hctx_ctx_map_show(void *data, struct seq_file *m)
 {
 	struct blk_mq_hw_ctx *hctx = data;
@@ -655,7 +751,9 @@ const struct file_operations blk_mq_debugfs_fops = {
 
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
 	{"poll_stat", 0400, queue_poll_stat_show},
+	{"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
 	{"state", 0600, queue_state_show, queue_state_write},
+	{"write_hints", 0600, queue_write_hint_show, queue_write_hint_store},
 	{},
 };
 
@@ -663,6 +761,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
 	{"state", 0400, hctx_state_show},
 	{"flags", 0400, hctx_flags_show},
 	{"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops},
+	{"busy", 0400, hctx_busy_show},
 	{"ctx_map", 0400, hctx_ctx_map_show},
 	{"tags", 0400, hctx_tags_show},
 	{"tags_bitmap", 0400, hctx_tags_bitmap_show},
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 1f5b692526ae..7f0dc48ffb40 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -31,11 +31,10 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q,
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
 
-static void __blk_mq_sched_assign_ioc(struct request_queue *q,
-				      struct request *rq,
-				      struct bio *bio,
-				      struct io_context *ioc)
+void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
 {
+	struct request_queue *q = rq->q;
+	struct io_context *ioc = rq_ioc(bio);
 	struct io_cq *icq;
 
 	spin_lock_irq(q->queue_lock);
@@ -47,90 +46,47 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q,
 		if (!icq)
 			return;
 	}
-
+	get_io_context(icq->ioc);
 	rq->elv.icq = icq;
-	if (!blk_mq_sched_get_rq_priv(q, rq, bio)) {
-		rq->rq_flags |= RQF_ELVPRIV;
-		get_io_context(icq->ioc);
-		return;
-	}
-
-	rq->elv.icq = NULL;
 }
 
-static void blk_mq_sched_assign_ioc(struct request_queue *q,
-				    struct request *rq, struct bio *bio)
+/*
+ * Mark a hardware queue as needing a restart. For shared queues, maintain
+ * a count of how many hardware queues are marked for restart.
+ */
+static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
 {
-	struct io_context *ioc;
+	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+		return;
+
+	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+		struct request_queue *q = hctx->queue;
 
-	ioc = rq_ioc(bio);
-	if (ioc)
-		__blk_mq_sched_assign_ioc(q, rq, bio, ioc);
+		if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+			atomic_inc(&q->shared_hctx_restart);
+	} else
+		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 }
 
-struct request *blk_mq_sched_get_request(struct request_queue *q,
-					 struct bio *bio,
-					 unsigned int op,
-					 struct blk_mq_alloc_data *data)
+static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
 {
-	struct elevator_queue *e = q->elevator;
-	struct request *rq;
-
-	blk_queue_enter_live(q);
-	data->q = q;
-	if (likely(!data->ctx))
-		data->ctx = blk_mq_get_ctx(q);
-	if (likely(!data->hctx))
-		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
-
-	if (e) {
-		data->flags |= BLK_MQ_REQ_INTERNAL;
-
-		/*
-		 * Flush requests are special and go directly to the
-		 * dispatch list.
-		 */
-		if (!op_is_flush(op) && e->type->ops.mq.get_request) {
-			rq = e->type->ops.mq.get_request(q, op, data);
-			if (rq)
-				rq->rq_flags |= RQF_QUEUED;
-		} else
-			rq = __blk_mq_alloc_request(data, op);
-	} else {
-		rq = __blk_mq_alloc_request(data, op);
-	}
-
-	if (rq) {
-		if (!op_is_flush(op)) {
-			rq->elv.icq = NULL;
-			if (e && e->type->icq_cache)
-				blk_mq_sched_assign_ioc(q, rq, bio);
-		}
-		data->hctx->queued++;
-		return rq;
-	}
+	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+		return false;
 
-	blk_queue_exit(q);
-	return NULL;
-}
+	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+		struct request_queue *q = hctx->queue;
 
-void blk_mq_sched_put_request(struct request *rq)
-{
-	struct request_queue *q = rq->q;
-	struct elevator_queue *e = q->elevator;
+		if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+			atomic_dec(&q->shared_hctx_restart);
+	} else
+		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 
-	if (rq->rq_flags & RQF_ELVPRIV) {
-		blk_mq_sched_put_rq_priv(rq->q, rq);
-		if (rq->elv.icq) {
-			put_io_context(rq->elv.icq->ioc);
-			rq->elv.icq = NULL;
-		}
+	if (blk_mq_hctx_has_pending(hctx)) {
+		blk_mq_run_hw_queue(hctx, true);
+		return true;
 	}
 
-	if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
-		e->type->ops.mq.put_request(rq);
-	else
-		blk_mq_finish_request(rq);
+	return false;
 }
 
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
@@ -141,7 +97,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 	bool did_work = false;
 	LIST_HEAD(rq_list);
 
-	if (unlikely(blk_mq_hctx_stopped(hctx)))
+	/* RCU or SRCU read lock is needed before checking quiesced flag */
+	if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
 		return;
 
 	hctx->run++;
@@ -221,19 +178,73 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
 
+/*
+ * Reverse check our software queue for entries that we could potentially
+ * merge with. Currently includes a hand-wavy stop count of 8, to not spend
+ * too much time checking for merges.
+ */
+static bool blk_mq_attempt_merge(struct request_queue *q,
+				 struct blk_mq_ctx *ctx, struct bio *bio)
+{
+	struct request *rq;
+	int checked = 8;
+
+	lockdep_assert_held(&ctx->lock);
+
+	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
+		bool merged = false;
+
+		if (!checked--)
+			break;
+
+		if (!blk_rq_merge_ok(rq, bio))
+			continue;
+
+		switch (blk_try_merge(rq, bio)) {
+		case ELEVATOR_BACK_MERGE:
+			if (blk_mq_sched_allow_merge(q, rq, bio))
+				merged = bio_attempt_back_merge(q, rq, bio);
+			break;
+		case ELEVATOR_FRONT_MERGE:
+			if (blk_mq_sched_allow_merge(q, rq, bio))
+				merged = bio_attempt_front_merge(q, rq, bio);
+			break;
+		case ELEVATOR_DISCARD_MERGE:
+			merged = bio_attempt_discard_merge(q, rq, bio);
+			break;
+		default:
+			continue;
+		}
+
+		if (merged)
+			ctx->rq_merged++;
+		return merged;
+	}
+
+	return false;
+}
+
 bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 {
 	struct elevator_queue *e = q->elevator;
+	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+	bool ret = false;
 
-	if (e->type->ops.mq.bio_merge) {
-		struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
-		struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-
+	if (e && e->type->ops.mq.bio_merge) {
 		blk_mq_put_ctx(ctx);
 		return e->type->ops.mq.bio_merge(hctx, bio);
 	}
 
-	return false;
+	if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
+		/* default per sw-queue merge */
+		spin_lock(&ctx->lock);
+		ret = blk_mq_attempt_merge(q, ctx, bio);
+		spin_unlock(&ctx->lock);
+	}
+
+	blk_mq_put_ctx(ctx);
+	return ret;
 }
 
 bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
@@ -266,18 +277,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
 	return true;
 }
 
-static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
-	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
-		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-		if (blk_mq_hctx_has_pending(hctx)) {
-			blk_mq_run_hw_queue(hctx, true);
-			return true;
-		}
-	}
-	return false;
-}
-
 /**
  * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
  * @pos:    loop cursor.
@@ -309,6 +308,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
 	unsigned int i, j;
 
 	if (set->flags & BLK_MQ_F_TAG_SHARED) {
+		/*
+		 * If this is 0, then we know that no hardware queues
+		 * have RESTART marked. We're done.
+		 */
+		if (!atomic_read(&queue->shared_hctx_restart))
+			return;
+
 		rcu_read_lock();
 		list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
 					   tag_set_list) {
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index edafb5383b7b..9267d0b7c197 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -7,8 +7,7 @@
 void blk_mq_sched_free_hctx_data(struct request_queue *q,
 				 void (*exit)(struct blk_mq_hw_ctx *));
 
-struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data);
-void blk_mq_sched_put_request(struct request *rq);
+void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio);
 
 void blk_mq_sched_request_inserted(struct request *rq);
 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
@@ -38,35 +37,12 @@ int blk_mq_sched_init(struct request_queue *q);
 static inline bool
 blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 {
-	struct elevator_queue *e = q->elevator;
-
-	if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
+	if (blk_queue_nomerges(q) || !bio_mergeable(bio))
 		return false;
 
 	return __blk_mq_sched_bio_merge(q, bio);
 }
 
-static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
-					   struct request *rq,
-					   struct bio *bio)
-{
-	struct elevator_queue *e = q->elevator;
-
-	if (e && e->type->ops.mq.get_rq_priv)
-		return e->type->ops.mq.get_rq_priv(q, rq, bio);
-
-	return 0;
-}
-
-static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
-					    struct request *rq)
-{
-	struct elevator_queue *e = q->elevator;
-
-	if (e && e->type->ops.mq.put_rq_priv)
-		e->type->ops.mq.put_rq_priv(q, rq);
-}
-
 static inline bool
 blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
 			 struct bio *bio)
@@ -115,15 +91,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
 	return false;
 }
 
-/*
- * Mark a hardware queue as needing a restart.
- */
-static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
-	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-}
-
 static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
 {
 	return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5d4ce7eb8dbf..ced2b000ca02 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -42,7 +42,6 @@ static LIST_HEAD(all_q_list);
 
 static void blk_mq_poll_stats_start(struct request_queue *q);
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
-static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync);
 
 static int blk_mq_poll_stats_bkt(const struct request *rq)
 {
@@ -154,13 +153,28 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
 
+/*
+ * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
+ * mpt3sas driver such that this function can be removed.
+ */
+void blk_mq_quiesce_queue_nowait(struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
+
 /**
- * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished
+ * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
  * @q: request queue.
  *
  * Note: this function does not prevent that the struct request end_io()
- * callback function is invoked. Additionally, it is not prevented that
- * new queue_rq() calls occur unless the queue has been stopped first.
+ * callback function is invoked. Once this function is returned, we make
+ * sure no dispatch can happen until the queue is unquiesced via
+ * blk_mq_unquiesce_queue().
  */
 void blk_mq_quiesce_queue(struct request_queue *q)
 {
@@ -168,11 +182,11 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 	unsigned int i;
 	bool rcu = false;
 
-	__blk_mq_stop_hw_queues(q, true);
+	blk_mq_quiesce_queue_nowait(q);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (hctx->flags & BLK_MQ_F_BLOCKING)
-			synchronize_srcu(&hctx->queue_rq_srcu);
+			synchronize_srcu(hctx->queue_rq_srcu);
 		else
 			rcu = true;
 	}
@@ -181,6 +195,26 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
 
+/*
+ * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
+ * @q: request queue.
+ *
+ * This function recovers queue into the state before quiescing
+ * which is done by blk_mq_quiesce_queue.
+ */
+void blk_mq_unquiesce_queue(struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	/* dispatch requests which are inserted during quiescing */
+	blk_mq_run_hw_queues(q, true);
+}
+EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
+
 void blk_mq_wake_waiters(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
@@ -204,15 +238,33 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 }
 EXPORT_SYMBOL(blk_mq_can_queue);
 
-void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-			struct request *rq, unsigned int op)
+static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
+		unsigned int tag, unsigned int op)
 {
+	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
+	struct request *rq = tags->static_rqs[tag];
+
+	rq->rq_flags = 0;
+
+	if (data->flags & BLK_MQ_REQ_INTERNAL) {
+		rq->tag = -1;
+		rq->internal_tag = tag;
+	} else {
+		if (blk_mq_tag_busy(data->hctx)) {
+			rq->rq_flags = RQF_MQ_INFLIGHT;
+			atomic_inc(&data->hctx->nr_active);
+		}
+		rq->tag = tag;
+		rq->internal_tag = -1;
+		data->hctx->tags->rqs[rq->tag] = rq;
+	}
+
 	INIT_LIST_HEAD(&rq->queuelist);
 	/* csd/requeue_work/fifo_time is initialized before use */
-	rq->q = q;
-	rq->mq_ctx = ctx;
+	rq->q = data->q;
+	rq->mq_ctx = data->ctx;
 	rq->cmd_flags = op;
-	if (blk_queue_io_stat(q))
+	if (blk_queue_io_stat(data->q))
 		rq->rq_flags |= RQF_IO_STAT;
 	/* do not touch atomic flags, it needs atomic ops against the timer */
 	rq->cpu = -1;
@@ -241,44 +293,60 @@ void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 	rq->end_io_data = NULL;
 	rq->next_rq = NULL;
 
-	ctx->rq_dispatched[op_is_sync(op)]++;
+	data->ctx->rq_dispatched[op_is_sync(op)]++;
+	return rq;
 }
-EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init);
 
-struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
-				       unsigned int op)
+static struct request *blk_mq_get_request(struct request_queue *q,
+		struct bio *bio, unsigned int op,
+		struct blk_mq_alloc_data *data)
 {
+	struct elevator_queue *e = q->elevator;
 	struct request *rq;
 	unsigned int tag;
 
-	tag = blk_mq_get_tag(data);
-	if (tag != BLK_MQ_TAG_FAIL) {
-		struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
+	blk_queue_enter_live(q);
+	data->q = q;
+	if (likely(!data->ctx))
+		data->ctx = blk_mq_get_ctx(q);
+	if (likely(!data->hctx))
+		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
+	if (op & REQ_NOWAIT)
+		data->flags |= BLK_MQ_REQ_NOWAIT;
 
-		rq = tags->static_rqs[tag];
+	if (e) {
+		data->flags |= BLK_MQ_REQ_INTERNAL;
 
-		if (data->flags & BLK_MQ_REQ_INTERNAL) {
-			rq->tag = -1;
-			rq->internal_tag = tag;
-		} else {
-			if (blk_mq_tag_busy(data->hctx)) {
-				rq->rq_flags = RQF_MQ_INFLIGHT;
-				atomic_inc(&data->hctx->nr_active);
-			}
-			rq->tag = tag;
-			rq->internal_tag = -1;
-			data->hctx->tags->rqs[rq->tag] = rq;
-		}
+		/*
+		 * Flush requests are special and go directly to the
+		 * dispatch list.
+		 */
+		if (!op_is_flush(op) && e->type->ops.mq.limit_depth)
+			e->type->ops.mq.limit_depth(op, data);
+	}
 
-		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
-		return rq;
+	tag = blk_mq_get_tag(data);
+	if (tag == BLK_MQ_TAG_FAIL) {
+		blk_queue_exit(q);
+		return NULL;
 	}
 
-	return NULL;
+	rq = blk_mq_rq_ctx_init(data, tag, op);
+	if (!op_is_flush(op)) {
+		rq->elv.icq = NULL;
+		if (e && e->type->ops.mq.prepare_request) {
+			if (e->type->icq_cache && rq_ioc(bio))
+				blk_mq_sched_assign_ioc(rq, bio);
+
+			e->type->ops.mq.prepare_request(rq, bio);
+			rq->rq_flags |= RQF_ELVPRIV;
+		}
+	}
+	data->hctx->queued++;
+	return rq;
 }
-EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
 
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 		unsigned int flags)
 {
 	struct blk_mq_alloc_data alloc_data = { .flags = flags };
@@ -289,7 +357,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 	if (ret)
 		return ERR_PTR(ret);
 
-	rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
+	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
 
 	blk_mq_put_ctx(alloc_data.ctx);
 	blk_queue_exit(q);
@@ -304,8 +372,8 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 }
 EXPORT_SYMBOL(blk_mq_alloc_request);
 
-struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
-		unsigned int flags, unsigned int hctx_idx)
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+		unsigned int op, unsigned int flags, unsigned int hctx_idx)
 {
 	struct blk_mq_alloc_data alloc_data = { .flags = flags };
 	struct request *rq;
@@ -340,7 +408,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 	cpu = cpumask_first(alloc_data.hctx->cpumask);
 	alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
 
-	rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
+	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
 
 	blk_queue_exit(q);
 
@@ -351,17 +419,28 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
 
-void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
-			     struct request *rq)
+void blk_mq_free_request(struct request *rq)
 {
-	const int sched_tag = rq->internal_tag;
 	struct request_queue *q = rq->q;
+	struct elevator_queue *e = q->elevator;
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+	const int sched_tag = rq->internal_tag;
 
+	if (rq->rq_flags & RQF_ELVPRIV) {
+		if (e && e->type->ops.mq.finish_request)
+			e->type->ops.mq.finish_request(rq);
+		if (rq->elv.icq) {
+			put_io_context(rq->elv.icq->ioc);
+			rq->elv.icq = NULL;
+		}
+	}
+
+	ctx->rq_completed[rq_is_sync(rq)]++;
 	if (rq->rq_flags & RQF_MQ_INFLIGHT)
 		atomic_dec(&hctx->nr_active);
 
 	wbt_done(q->rq_wb, &rq->issue_stat);
-	rq->rq_flags = 0;
 
 	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
 	clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
@@ -372,29 +451,9 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 	blk_mq_sched_restart(hctx);
 	blk_queue_exit(q);
 }
-
-static void blk_mq_finish_hctx_request(struct blk_mq_hw_ctx *hctx,
-				     struct request *rq)
-{
-	struct blk_mq_ctx *ctx = rq->mq_ctx;
-
-	ctx->rq_completed[rq_is_sync(rq)]++;
-	__blk_mq_finish_request(hctx, ctx, rq);
-}
-
-void blk_mq_finish_request(struct request *rq)
-{
-	blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
-}
-EXPORT_SYMBOL_GPL(blk_mq_finish_request);
-
-void blk_mq_free_request(struct request *rq)
-{
-	blk_mq_sched_put_request(rq);
-}
 EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
-inline void __blk_mq_end_request(struct request *rq, int error)
+inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 {
 	blk_account_io_done(rq);
 
@@ -409,7 +468,7 @@ inline void __blk_mq_end_request(struct request *rq, int error)
 }
 EXPORT_SYMBOL(__blk_mq_end_request);
 
-void blk_mq_end_request(struct request *rq, int error)
+void blk_mq_end_request(struct request *rq, blk_status_t error)
 {
 	if (blk_update_request(rq, error, blk_rq_bytes(rq)))
 		BUG();
@@ -628,25 +687,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
 }
 EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
 
-void blk_mq_abort_requeue_list(struct request_queue *q)
-{
-	unsigned long flags;
-	LIST_HEAD(rq_list);
-
-	spin_lock_irqsave(&q->requeue_lock, flags);
-	list_splice_init(&q->requeue_list, &rq_list);
-	spin_unlock_irqrestore(&q->requeue_lock, flags);
-
-	while (!list_empty(&rq_list)) {
-		struct request *rq;
-
-		rq = list_first_entry(&rq_list, struct request, queuelist);
-		list_del_init(&rq->queuelist);
-		blk_mq_end_request(rq, -EIO);
-	}
-}
-EXPORT_SYMBOL(blk_mq_abort_requeue_list);
-
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 {
 	if (tag < tags->nr_tags) {
@@ -772,50 +812,6 @@ static void blk_mq_timeout_work(struct work_struct *work)
 	blk_queue_exit(q);
 }
 
-/*
- * Reverse check our software queue for entries that we could potentially
- * merge with. Currently includes a hand-wavy stop count of 8, to not spend
- * too much time checking for merges.
- */
-static bool blk_mq_attempt_merge(struct request_queue *q,
-				 struct blk_mq_ctx *ctx, struct bio *bio)
-{
-	struct request *rq;
-	int checked = 8;
-
-	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
-		bool merged = false;
-
-		if (!checked--)
-			break;
-
-		if (!blk_rq_merge_ok(rq, bio))
-			continue;
-
-		switch (blk_try_merge(rq, bio)) {
-		case ELEVATOR_BACK_MERGE:
-			if (blk_mq_sched_allow_merge(q, rq, bio))
-				merged = bio_attempt_back_merge(q, rq, bio);
-			break;
-		case ELEVATOR_FRONT_MERGE:
-			if (blk_mq_sched_allow_merge(q, rq, bio))
-				merged = bio_attempt_front_merge(q, rq, bio);
-			break;
-		case ELEVATOR_DISCARD_MERGE:
-			merged = bio_attempt_discard_merge(q, rq, bio);
-			break;
-		default:
-			continue;
-		}
-
-		if (merged)
-			ctx->rq_merged++;
-		return merged;
-	}
-
-	return false;
-}
-
 struct flush_busy_ctx_data {
 	struct blk_mq_hw_ctx *hctx;
 	struct list_head *list;
@@ -945,14 +941,14 @@ static bool reorder_tags_to_front(struct list_head *list)
 	return first != NULL;
 }
 
-static int blk_mq_dispatch_wake(wait_queue_t *wait, unsigned mode, int flags,
+static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
 				void *key)
 {
 	struct blk_mq_hw_ctx *hctx;
 
 	hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
 
-	list_del(&wait->task_list);
+	list_del(&wait->entry);
 	clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state);
 	blk_mq_run_hw_queue(hctx, true);
 	return 1;
@@ -987,7 +983,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 {
 	struct blk_mq_hw_ctx *hctx;
 	struct request *rq;
-	int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK;
+	int errors, queued;
 
 	if (list_empty(list))
 		return false;
@@ -998,6 +994,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 	errors = queued = 0;
 	do {
 		struct blk_mq_queue_data bd;
+		blk_status_t ret;
 
 		rq = list_first_entry(list, struct request, queuelist);
 		if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
@@ -1038,25 +1035,20 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 		}
 
 		ret = q->mq_ops->queue_rq(hctx, &bd);
-		switch (ret) {
-		case BLK_MQ_RQ_QUEUE_OK:
-			queued++;
-			break;
-		case BLK_MQ_RQ_QUEUE_BUSY:
+		if (ret == BLK_STS_RESOURCE) {
 			blk_mq_put_driver_tag_hctx(hctx, rq);
 			list_add(&rq->queuelist, list);
 			__blk_mq_requeue_request(rq);
 			break;
-		default:
-			pr_err("blk-mq: bad return on queue: %d\n", ret);
-		case BLK_MQ_RQ_QUEUE_ERROR:
+		}
+
+		if (unlikely(ret != BLK_STS_OK)) {
 			errors++;
-			blk_mq_end_request(rq, -EIO);
-			break;
+			blk_mq_end_request(rq, BLK_STS_IOERR);
+			continue;
 		}
 
-		if (ret == BLK_MQ_RQ_QUEUE_BUSY)
-			break;
+		queued++;
 	} while (!list_empty(list));
 
 	hctx->dispatched[queued_to_index(queued)]++;
@@ -1094,7 +1086,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 		 * - blk_mq_run_hw_queue() checks whether or not a queue has
 		 *   been stopped before rerunning a queue.
 		 * - Some but not all block drivers stop a queue before
-		 *   returning BLK_MQ_RQ_QUEUE_BUSY. Two exceptions are scsi-mq
+		 *   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
 		 *   and dm-rq.
 		 */
 		if (!blk_mq_sched_needs_restart(hctx) &&
@@ -1119,9 +1111,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 	} else {
 		might_sleep();
 
-		srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+		srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
 		blk_mq_sched_dispatch_requests(hctx);
-		srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+		srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
 	}
 }
 
@@ -1153,8 +1145,10 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
 static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
 					unsigned long msecs)
 {
-	if (unlikely(blk_mq_hctx_stopped(hctx) ||
-		     !blk_mq_hw_queue_mapped(hctx)))
+	if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
+		return;
+
+	if (unlikely(blk_mq_hctx_stopped(hctx)))
 		return;
 
 	if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
@@ -1220,34 +1214,39 @@ bool blk_mq_queue_stopped(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_mq_queue_stopped);
 
-static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync)
+/*
+ * This function is often used for pausing .queue_rq() by driver when
+ * there isn't enough resource or some conditions aren't satisfied, and
+ * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
+ *
+ * We do not guarantee that dispatch can be drained or blocked
+ * after blk_mq_stop_hw_queue() returns. Please use
+ * blk_mq_quiesce_queue() for that requirement.
+ */
+void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
 {
-	if (sync)
-		cancel_delayed_work_sync(&hctx->run_work);
-	else
-		cancel_delayed_work(&hctx->run_work);
+	cancel_delayed_work(&hctx->run_work);
 
 	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
 }
-
-void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
-{
-	__blk_mq_stop_hw_queue(hctx, false);
-}
 EXPORT_SYMBOL(blk_mq_stop_hw_queue);
 
-void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
+/*
+ * This function is often used for pausing .queue_rq() by driver when
+ * there isn't enough resource or some conditions aren't satisfied, and
+ * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
+ *
+ * We do not guarantee that dispatch can be drained or blocked
+ * after blk_mq_stop_hw_queues() returns. Please use
+ * blk_mq_quiesce_queue() for that requirement.
+ */
+void blk_mq_stop_hw_queues(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i)
-		__blk_mq_stop_hw_queue(hctx, sync);
-}
-
-void blk_mq_stop_hw_queues(struct request_queue *q)
-{
-	__blk_mq_stop_hw_queues(q, false);
+		blk_mq_stop_hw_queue(hctx);
 }
 EXPORT_SYMBOL(blk_mq_stop_hw_queues);
 
@@ -1314,7 +1313,7 @@ static void blk_mq_run_work_fn(struct work_struct *work)
 
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
 {
-	if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
+	if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
 		return;
 
 	/*
@@ -1336,6 +1335,8 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
 {
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
 
+	lockdep_assert_held(&ctx->lock);
+
 	trace_block_rq_insert(hctx->queue, rq);
 
 	if (at_head)
@@ -1349,6 +1350,8 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 {
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
 
+	lockdep_assert_held(&ctx->lock);
+
 	__blk_mq_insert_req_list(hctx, rq, at_head);
 	blk_mq_hctx_mark_pending(hctx, ctx);
 }
@@ -1446,30 +1449,13 @@ static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
 		!blk_queue_nomerges(hctx->queue);
 }
 
-static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
-					 struct blk_mq_ctx *ctx,
-					 struct request *rq, struct bio *bio)
+static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx,
+				   struct blk_mq_ctx *ctx,
+				   struct request *rq)
 {
-	if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) {
-		blk_mq_bio_to_request(rq, bio);
-		spin_lock(&ctx->lock);
-insert_rq:
-		__blk_mq_insert_request(hctx, rq, false);
-		spin_unlock(&ctx->lock);
-		return false;
-	} else {
-		struct request_queue *q = hctx->queue;
-
-		spin_lock(&ctx->lock);
-		if (!blk_mq_attempt_merge(q, ctx, bio)) {
-			blk_mq_bio_to_request(rq, bio);
-			goto insert_rq;
-		}
-
-		spin_unlock(&ctx->lock);
-		__blk_mq_finish_request(hctx, ctx, rq);
-		return true;
-	}
+	spin_lock(&ctx->lock);
+	__blk_mq_insert_request(hctx, rq, false);
+	spin_unlock(&ctx->lock);
 }
 
 static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
@@ -1480,22 +1466,29 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
 	return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
 }
 
-static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
-				      bool may_sleep)
+static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+					struct request *rq,
+					blk_qc_t *cookie, bool may_sleep)
 {
 	struct request_queue *q = rq->q;
 	struct blk_mq_queue_data bd = {
 		.rq = rq,
 		.last = true,
 	};
-	struct blk_mq_hw_ctx *hctx;
 	blk_qc_t new_cookie;
-	int ret;
+	blk_status_t ret;
+	bool run_queue = true;
+
+	/* RCU or SRCU read lock is needed before checking quiesced flag */
+	if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
+		run_queue = false;
+		goto insert;
+	}
 
 	if (q->elevator)
 		goto insert;
 
-	if (!blk_mq_get_driver_tag(rq, &hctx, false))
+	if (!blk_mq_get_driver_tag(rq, NULL, false))
 		goto insert;
 
 	new_cookie = request_to_qc_t(hctx, rq);
@@ -1506,20 +1499,21 @@ static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
 	 * would have done
 	 */
 	ret = q->mq_ops->queue_rq(hctx, &bd);
-	if (ret == BLK_MQ_RQ_QUEUE_OK) {
+	switch (ret) {
+	case BLK_STS_OK:
 		*cookie = new_cookie;
 		return;
-	}
-
-	if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
+	case BLK_STS_RESOURCE:
+		__blk_mq_requeue_request(rq);
+		goto insert;
+	default:
 		*cookie = BLK_QC_T_NONE;
-		blk_mq_end_request(rq, -EIO);
+		blk_mq_end_request(rq, ret);
 		return;
 	}
 
-	__blk_mq_requeue_request(rq);
 insert:
-	blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
+	blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep);
 }
 
 static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
@@ -1527,16 +1521,16 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 {
 	if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
 		rcu_read_lock();
-		__blk_mq_try_issue_directly(rq, cookie, false);
+		__blk_mq_try_issue_directly(hctx, rq, cookie, false);
 		rcu_read_unlock();
 	} else {
 		unsigned int srcu_idx;
 
 		might_sleep();
 
-		srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
-		__blk_mq_try_issue_directly(rq, cookie, true);
-		srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+		srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
+		__blk_mq_try_issue_directly(hctx, rq, cookie, true);
+		srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
 	}
 }
 
@@ -1554,13 +1548,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_bounce(q, &bio);
 
+	blk_queue_split(q, &bio);
+
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio_io_error(bio);
 		return BLK_QC_T_NONE;
 	}
 
-	blk_queue_split(q, &bio, q->bio_split);
-
 	if (!is_flush_fua && !blk_queue_nomerges(q) &&
 	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
 		return BLK_QC_T_NONE;
@@ -1572,9 +1566,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	trace_block_getrq(q, bio, bio->bi_opf);
 
-	rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
+	rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
 	if (unlikely(!rq)) {
 		__wbt_done(q->rq_wb, wb_acct);
+		if (bio->bi_opf & REQ_NOWAIT)
+			bio_wouldblock_error(bio);
 		return BLK_QC_T_NONE;
 	}
 
@@ -1638,9 +1634,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 		blk_mq_put_ctx(data.ctx);
 
-		if (same_queue_rq)
+		if (same_queue_rq) {
+			data.hctx = blk_mq_map_queue(q,
+					same_queue_rq->mq_ctx->cpu);
 			blk_mq_try_issue_directly(data.hctx, same_queue_rq,
 					&cookie);
+		}
 	} else if (q->nr_hw_queues > 1 && is_sync) {
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
@@ -1649,11 +1648,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_sched_insert_request(rq, false, true, true, true);
-	} else if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
+	} else {
 		blk_mq_put_ctx(data.ctx);
+		blk_mq_bio_to_request(rq, bio);
+		blk_mq_queue_io(data.hctx, data.ctx, rq);
 		blk_mq_run_hw_queue(data.hctx, true);
-	} else
-		blk_mq_put_ctx(data.ctx);
+	}
 
 	return cookie;
 }
@@ -1876,7 +1876,7 @@ static void blk_mq_exit_hctx(struct request_queue *q,
 		set->ops->exit_hctx(hctx, hctx_idx);
 
 	if (hctx->flags & BLK_MQ_F_BLOCKING)
-		cleanup_srcu_struct(&hctx->queue_rq_srcu);
+		cleanup_srcu_struct(hctx->queue_rq_srcu);
 
 	blk_mq_remove_cpuhp(hctx);
 	blk_free_flush_queue(hctx->fq);
@@ -1910,7 +1910,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
 	spin_lock_init(&hctx->lock);
 	INIT_LIST_HEAD(&hctx->dispatch);
 	hctx->queue = q;
-	hctx->queue_num = hctx_idx;
 	hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
 
 	cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
@@ -1949,7 +1948,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 		goto free_fq;
 
 	if (hctx->flags & BLK_MQ_F_BLOCKING)
-		init_srcu_struct(&hctx->queue_rq_srcu);
+		init_srcu_struct(hctx->queue_rq_srcu);
 
 	blk_mq_debugfs_register_hctx(q, hctx);
 
@@ -2113,20 +2112,30 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 	}
 }
 
+/*
+ * Caller needs to ensure that we're either frozen/quiesced, or that
+ * the queue isn't live yet.
+ */
 static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		if (shared)
+		if (shared) {
+			if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+				atomic_inc(&q->shared_hctx_restart);
 			hctx->flags |= BLK_MQ_F_TAG_SHARED;
-		else
+		} else {
+			if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+				atomic_dec(&q->shared_hctx_restart);
 			hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
+		}
 	}
 }
 
-static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
+static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
+					bool shared)
 {
 	struct request_queue *q;
 
@@ -2224,6 +2233,20 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 }
 EXPORT_SYMBOL(blk_mq_init_queue);
 
+static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
+{
+	int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
+
+	BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, queue_rq_srcu),
+			   __alignof__(struct blk_mq_hw_ctx)) !=
+		     sizeof(struct blk_mq_hw_ctx));
+
+	if (tag_set->flags & BLK_MQ_F_BLOCKING)
+		hw_ctx_size += sizeof(struct srcu_struct);
+
+	return hw_ctx_size;
+}
+
 static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 						struct request_queue *q)
 {
@@ -2238,7 +2261,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 			continue;
 
 		node = blk_mq_hw_queue_to_node(q->mq_map, i);
-		hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
+		hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set),
 					GFP_KERNEL, node);
 		if (!hctxs[i])
 			break;
@@ -2341,15 +2364,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 
-	mutex_lock(&all_q_mutex);
 	get_online_cpus();
+	mutex_lock(&all_q_mutex);
 
 	list_add_tail(&q->all_q_node, &all_q_list);
 	blk_mq_add_queue_tag_set(set, q);
 	blk_mq_map_swqueue(q, cpu_online_mask);
 
-	put_online_cpus();
 	mutex_unlock(&all_q_mutex);
+	put_online_cpus();
 
 	if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
 		int ret;
@@ -2660,7 +2683,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 	return ret;
 }
 
-void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
+static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
+							int nr_hw_queues)
 {
 	struct request_queue *q;
 
@@ -2684,6 +2708,13 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
 		blk_mq_unfreeze_queue(q);
 }
+
+void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
+{
+	mutex_lock(&set->tag_list_lock);
+	__blk_mq_update_nr_hw_queues(set, nr_hw_queues);
+	mutex_unlock(&set->tag_list_lock);
+}
 EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
 
 /* Enable polling stats and return whether they were already enabled. */
diff --git a/block/blk-mq.h b/block/blk-mq.h
index cc67b48e3551..1a06fdf9fd4d 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -128,17 +128,6 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data
 	return data->hctx->tags;
 }
 
-/*
- * Internal helpers for request allocation/init/free
- */
-void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-			struct request *rq, unsigned int op);
-void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
-				struct request *rq);
-void blk_mq_finish_request(struct request *rq);
-struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
-					unsigned int op);
-
 static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
 {
 	return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 4fa81ed383ca..be1f115b538b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -172,11 +172,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	q->nr_batching = BLK_BATCH_REQ;
 
 	blk_set_default_limits(&q->limits);
-
-	/*
-	 * by default assume old behaviour and bounce for any highmem page
-	 */
-	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 }
 EXPORT_SYMBOL(blk_queue_make_request);
 
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 6c2f40940439..c52356d90fe3 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -96,13 +96,16 @@ void blk_stat_add(struct request *rq)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
-		if (blk_stat_is_active(cb)) {
-			bucket = cb->bucket_fn(rq);
-			if (bucket < 0)
-				continue;
-			stat = &this_cpu_ptr(cb->cpu_stat)[bucket];
-			__blk_stat_add(stat, value);
-		}
+		if (!blk_stat_is_active(cb))
+			continue;
+
+		bucket = cb->bucket_fn(rq);
+		if (bucket < 0)
+			continue;
+
+		stat = &get_cpu_ptr(cb->cpu_stat)[bucket];
+		__blk_stat_add(stat, value);
+		put_cpu_ptr(cb->cpu_stat);
 	}
 	rcu_read_unlock();
 }
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 504fee940052..27aceab1cc31 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -777,24 +777,25 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
 }
 
 /**
- * blk_release_queue: - release a &struct request_queue when it is no longer needed
- * @kobj:    the kobj belonging to the request queue to be released
+ * __blk_release_queue - release a request queue when it is no longer needed
+ * @work: pointer to the release_work member of the request queue to be released
  *
  * Description:
- *     blk_release_queue is the pair to blk_init_queue() or
- *     blk_queue_make_request().  It should be called when a request queue is
- *     being released; typically when a block device is being de-registered.
- *     Currently, its primary task it to free all the &struct request
- *     structures that were allocated to the queue and the queue itself.
+ *     blk_release_queue is the counterpart of blk_init_queue(). It should be
+ *     called when a request queue is being released; typically when a block
+ *     device is being de-registered. Its primary task it to free the queue
+ *     itself.
  *
- * Note:
+ * Notes:
  *     The low level driver must have finished any outstanding requests first
  *     via blk_cleanup_queue().
- **/
-static void blk_release_queue(struct kobject *kobj)
+ *
+ *     Although blk_release_queue() may be called with preemption disabled,
+ *     __blk_release_queue() may sleep.
+ */
+static void __blk_release_queue(struct work_struct *work)
 {
-	struct request_queue *q =
-		container_of(kobj, struct request_queue, kobj);
+	struct request_queue *q = container_of(work, typeof(*q), release_work);
 
 	if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
 		blk_stat_remove_callback(q, q->poll_cb);
@@ -809,7 +810,7 @@ static void blk_release_queue(struct kobject *kobj)
 
 	blk_free_queue_stats(q->stats);
 
-	blk_exit_rl(&q->root_rl);
+	blk_exit_rl(q, &q->root_rl);
 
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
@@ -834,6 +835,15 @@ static void blk_release_queue(struct kobject *kobj)
 	call_rcu(&q->rcu_head, blk_free_queue_rcu);
 }
 
+static void blk_release_queue(struct kobject *kobj)
+{
+	struct request_queue *q =
+		container_of(kobj, struct request_queue, kobj);
+
+	INIT_WORK(&q->release_work, __blk_release_queue);
+	schedule_work(&q->release_work);
+}
+
 static const struct sysfs_ops queue_sysfs_ops = {
 	.show	= queue_attr_show,
 	.store	= queue_attr_store,
@@ -887,10 +897,10 @@ int blk_register_queue(struct gendisk *disk)
 		goto unlock;
 	}
 
-	if (q->mq_ops)
+	if (q->mq_ops) {
 		__blk_mq_register_dev(dev, q);
-
-	blk_mq_debugfs_register(q);
+		blk_mq_debugfs_register(q);
+	}
 
 	kobject_uevent(&q->kobj, KOBJ_ADD);
 
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 07cc329fa4b0..2290f65b9d73 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -258,15 +258,14 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
  *    all transfers have been done for a request. It's important to call
  *    this function before end_that_request_last(), as that will put the
  *    request back on the free list thus corrupting the internal tag list.
- *
- *  Notes:
- *   queue lock must be held.
  **/
 void blk_queue_end_tag(struct request_queue *q, struct request *rq)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
 	unsigned tag = rq->tag; /* negative tags invalid */
 
+	lockdep_assert_held(q->queue_lock);
+
 	BUG_ON(tag >= bqt->real_max_depth);
 
 	list_del_init(&rq->queuelist);
@@ -307,9 +306,6 @@ EXPORT_SYMBOL(blk_queue_end_tag);
  *    calling this function.  The request will also be removed from
  *    the request queue, so it's the drivers responsibility to readd
  *    it if it should need to be restarted for some reason.
- *
- *  Notes:
- *   queue lock must be held.
  **/
 int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 {
@@ -317,6 +313,8 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	unsigned max_depth;
 	int tag;
 
+	lockdep_assert_held(q->queue_lock);
+
 	if (unlikely((rq->rq_flags & RQF_QUEUED))) {
 		printk(KERN_ERR
 		       "%s: request %p for device [%s] already tagged %d",
@@ -389,14 +387,13 @@ EXPORT_SYMBOL(blk_queue_start_tag);
  *   Hardware conditions may dictate a need to stop all pending requests.
  *   In this case, we will safely clear the block side of the tag queue and
  *   readd all requests to the request queue in the right order.
- *
- *  Notes:
- *   queue lock must be held.
  **/
 void blk_queue_invalidate_tags(struct request_queue *q)
 {
 	struct list_head *tmp, *n;
 
+	lockdep_assert_held(q->queue_lock);
+
 	list_for_each_safe(tmp, n, &q->tag_busy_list)
 		blk_requeue_request(q, list_entry_rq(tmp));
 }
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index b78db2e5fdff..a7285bf2831c 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -22,11 +22,18 @@ static int throtl_quantum = 32;
 #define DFL_THROTL_SLICE_HD (HZ / 10)
 #define DFL_THROTL_SLICE_SSD (HZ / 50)
 #define MAX_THROTL_SLICE (HZ)
-#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */
-#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */
 #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
-/* default latency target is 0, eg, guarantee IO latency by default */
-#define DFL_LATENCY_TARGET (0)
+#define MIN_THROTL_BPS (320 * 1024)
+#define MIN_THROTL_IOPS (10)
+#define DFL_LATENCY_TARGET (-1L)
+#define DFL_IDLE_THRESHOLD (0)
+#define DFL_HD_BASELINE_LATENCY (4000L) /* 4ms */
+#define LATENCY_FILTERED_SSD (0)
+/*
+ * For HD, very small latency comes from sequential IO. Such IO is helpless to
+ * help determine if its IO is impacted by others, hence we ignore the IO
+ */
+#define LATENCY_FILTERED_HD (1000L) /* 1ms */
 
 #define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
 
@@ -157,6 +164,7 @@ struct throtl_grp {
 	unsigned long last_check_time;
 
 	unsigned long latency_target; /* us */
+	unsigned long latency_target_conf; /* us */
 	/* When did we start a new slice */
 	unsigned long slice_start[2];
 	unsigned long slice_end[2];
@@ -165,6 +173,7 @@ struct throtl_grp {
 	unsigned long checked_last_finish_time; /* ns / 1024 */
 	unsigned long avg_idletime; /* ns / 1024 */
 	unsigned long idletime_threshold; /* us */
+	unsigned long idletime_threshold_conf; /* us */
 
 	unsigned int bio_cnt; /* total bios */
 	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
@@ -201,8 +210,6 @@ struct throtl_data
 	unsigned int limit_index;
 	bool limit_valid[LIMIT_CNT];
 
-	unsigned long dft_idletime_threshold; /* us */
-
 	unsigned long low_upgrade_time;
 	unsigned long low_downgrade_time;
 
@@ -212,6 +219,7 @@ struct throtl_data
 	struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE];
 	struct latency_bucket __percpu *latency_buckets;
 	unsigned long last_calculate_time;
+	unsigned long filtered_latency;
 
 	bool track_bio_latency;
 };
@@ -294,8 +302,14 @@ static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw)
 
 	td = tg->td;
 	ret = tg->bps[rw][td->limit_index];
-	if (ret == 0 && td->limit_index == LIMIT_LOW)
-		return tg->bps[rw][LIMIT_MAX];
+	if (ret == 0 && td->limit_index == LIMIT_LOW) {
+		/* intermediate node or iops isn't 0 */
+		if (!list_empty(&blkg->blkcg->css.children) ||
+		    tg->iops[rw][td->limit_index])
+			return U64_MAX;
+		else
+			return MIN_THROTL_BPS;
+	}
 
 	if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&
 	    tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {
@@ -315,10 +329,17 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
 
 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
 		return UINT_MAX;
+
 	td = tg->td;
 	ret = tg->iops[rw][td->limit_index];
-	if (ret == 0 && tg->td->limit_index == LIMIT_LOW)
-		return tg->iops[rw][LIMIT_MAX];
+	if (ret == 0 && tg->td->limit_index == LIMIT_LOW) {
+		/* intermediate node or bps isn't 0 */
+		if (!list_empty(&blkg->blkcg->css.children) ||
+		    tg->bps[rw][td->limit_index])
+			return UINT_MAX;
+		else
+			return MIN_THROTL_IOPS;
+	}
 
 	if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&
 	    tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {
@@ -482,6 +503,9 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
 	/* LIMIT_LOW will have default value 0 */
 
 	tg->latency_target = DFL_LATENCY_TARGET;
+	tg->latency_target_conf = DFL_LATENCY_TARGET;
+	tg->idletime_threshold = DFL_IDLE_THRESHOLD;
+	tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
 
 	return &tg->pd;
 }
@@ -510,8 +534,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
 		sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
 	tg->td = td;
-
-	tg->idletime_threshold = td->dft_idletime_threshold;
 }
 
 /*
@@ -684,7 +706,7 @@ static void throtl_dequeue_tg(struct throtl_grp *tg)
 static void throtl_schedule_pending_timer(struct throtl_service_queue *sq,
 					  unsigned long expires)
 {
-	unsigned long max_expire = jiffies + 8 * sq_to_tg(sq)->td->throtl_slice;
+	unsigned long max_expire = jiffies + 8 * sq_to_td(sq)->throtl_slice;
 
 	/*
 	 * Since we are adjusting the throttle limit dynamically, the sleep
@@ -1349,7 +1371,7 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v)
 	return 0;
 }
 
-static void tg_conf_updated(struct throtl_grp *tg)
+static void tg_conf_updated(struct throtl_grp *tg, bool global)
 {
 	struct throtl_service_queue *sq = &tg->service_queue;
 	struct cgroup_subsys_state *pos_css;
@@ -1367,8 +1389,26 @@ static void tg_conf_updated(struct throtl_grp *tg)
 	 * restrictions in the whole hierarchy and allows them to bypass
 	 * blk-throttle.
 	 */
-	blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg))
-		tg_update_has_rules(blkg_to_tg(blkg));
+	blkg_for_each_descendant_pre(blkg, pos_css,
+			global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {
+		struct throtl_grp *this_tg = blkg_to_tg(blkg);
+		struct throtl_grp *parent_tg;
+
+		tg_update_has_rules(this_tg);
+		/* ignore root/second level */
+		if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent ||
+		    !blkg->parent->parent)
+			continue;
+		parent_tg = blkg_to_tg(blkg->parent);
+		/*
+		 * make sure all children has lower idle time threshold and
+		 * higher latency target
+		 */
+		this_tg->idletime_threshold = min(this_tg->idletime_threshold,
+				parent_tg->idletime_threshold);
+		this_tg->latency_target = max(this_tg->latency_target,
+				parent_tg->latency_target);
+	}
 
 	/*
 	 * We're already holding queue_lock and know @tg is valid.  Let's
@@ -1413,7 +1453,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
 	else
 		*(unsigned int *)((void *)tg + of_cft(of)->private) = v;
 
-	tg_conf_updated(tg);
+	tg_conf_updated(tg, false);
 	ret = 0;
 out_finish:
 	blkg_conf_finish(&ctx);
@@ -1497,34 +1537,34 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
 	    tg->iops_conf[READ][off] == iops_dft &&
 	    tg->iops_conf[WRITE][off] == iops_dft &&
 	    (off != LIMIT_LOW ||
-	     (tg->idletime_threshold == tg->td->dft_idletime_threshold &&
-	      tg->latency_target == DFL_LATENCY_TARGET)))
+	     (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&
+	      tg->latency_target_conf == DFL_LATENCY_TARGET)))
 		return 0;
 
-	if (tg->bps_conf[READ][off] != bps_dft)
+	if (tg->bps_conf[READ][off] != U64_MAX)
 		snprintf(bufs[0], sizeof(bufs[0]), "%llu",
 			tg->bps_conf[READ][off]);
-	if (tg->bps_conf[WRITE][off] != bps_dft)
+	if (tg->bps_conf[WRITE][off] != U64_MAX)
 		snprintf(bufs[1], sizeof(bufs[1]), "%llu",
 			tg->bps_conf[WRITE][off]);
-	if (tg->iops_conf[READ][off] != iops_dft)
+	if (tg->iops_conf[READ][off] != UINT_MAX)
 		snprintf(bufs[2], sizeof(bufs[2]), "%u",
 			tg->iops_conf[READ][off]);
-	if (tg->iops_conf[WRITE][off] != iops_dft)
+	if (tg->iops_conf[WRITE][off] != UINT_MAX)
 		snprintf(bufs[3], sizeof(bufs[3]), "%u",
 			tg->iops_conf[WRITE][off]);
 	if (off == LIMIT_LOW) {
-		if (tg->idletime_threshold == ULONG_MAX)
+		if (tg->idletime_threshold_conf == ULONG_MAX)
 			strcpy(idle_time, " idle=max");
 		else
 			snprintf(idle_time, sizeof(idle_time), " idle=%lu",
-				tg->idletime_threshold);
+				tg->idletime_threshold_conf);
 
-		if (tg->latency_target == ULONG_MAX)
+		if (tg->latency_target_conf == ULONG_MAX)
 			strcpy(latency_time, " latency=max");
 		else
 			snprintf(latency_time, sizeof(latency_time),
-				" latency=%lu", tg->latency_target);
+				" latency=%lu", tg->latency_target_conf);
 	}
 
 	seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n",
@@ -1563,8 +1603,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 	v[2] = tg->iops_conf[READ][index];
 	v[3] = tg->iops_conf[WRITE][index];
 
-	idle_time = tg->idletime_threshold;
-	latency_time = tg->latency_target;
+	idle_time = tg->idletime_threshold_conf;
+	latency_time = tg->latency_target_conf;
 	while (true) {
 		char tok[27];	/* wiops=18446744073709551616 */
 		char *p;
@@ -1623,17 +1663,33 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 		tg->iops_conf[READ][LIMIT_MAX]);
 	tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
 		tg->iops_conf[WRITE][LIMIT_MAX]);
+	tg->idletime_threshold_conf = idle_time;
+	tg->latency_target_conf = latency_time;
+
+	/* force user to configure all settings for low limit  */
+	if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
+	      tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||
+	    tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||
+	    tg->latency_target_conf == DFL_LATENCY_TARGET) {
+		tg->bps[READ][LIMIT_LOW] = 0;
+		tg->bps[WRITE][LIMIT_LOW] = 0;
+		tg->iops[READ][LIMIT_LOW] = 0;
+		tg->iops[WRITE][LIMIT_LOW] = 0;
+		tg->idletime_threshold = DFL_IDLE_THRESHOLD;
+		tg->latency_target = DFL_LATENCY_TARGET;
+	} else if (index == LIMIT_LOW) {
+		tg->idletime_threshold = tg->idletime_threshold_conf;
+		tg->latency_target = tg->latency_target_conf;
+	}
 
-	if (index == LIMIT_LOW) {
-		blk_throtl_update_limit_valid(tg->td);
-		if (tg->td->limit_valid[LIMIT_LOW])
+	blk_throtl_update_limit_valid(tg->td);
+	if (tg->td->limit_valid[LIMIT_LOW]) {
+		if (index == LIMIT_LOW)
 			tg->td->limit_index = LIMIT_LOW;
-		tg->idletime_threshold = (idle_time == ULONG_MAX) ?
-			ULONG_MAX : idle_time;
-		tg->latency_target = (latency_time == ULONG_MAX) ?
-			ULONG_MAX : latency_time;
-	}
-	tg_conf_updated(tg);
+	} else
+		tg->td->limit_index = LIMIT_MAX;
+	tg_conf_updated(tg, index == LIMIT_LOW &&
+		tg->td->limit_valid[LIMIT_LOW]);
 	ret = 0;
 out_finish:
 	blkg_conf_finish(&ctx);
@@ -1722,17 +1778,25 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg)
 	/*
 	 * cgroup is idle if:
 	 * - single idle is too long, longer than a fixed value (in case user
-	 *   configure a too big threshold) or 4 times of slice
+	 *   configure a too big threshold) or 4 times of idletime threshold
 	 * - average think time is more than threshold
 	 * - IO latency is largely below threshold
 	 */
-	unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice);
-
-	time = min_t(unsigned long, MAX_IDLE_TIME, time);
-	return (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
-	       tg->avg_idletime > tg->idletime_threshold ||
-	       (tg->latency_target && tg->bio_cnt &&
+	unsigned long time;
+	bool ret;
+
+	time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
+	ret = tg->latency_target == DFL_LATENCY_TARGET ||
+	      tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
+	      (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
+	      tg->avg_idletime > tg->idletime_threshold ||
+	      (tg->latency_target && tg->bio_cnt &&
 		tg->bad_bio_cnt * 5 < tg->bio_cnt);
+	throtl_log(&tg->service_queue,
+		"avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
+		tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt,
+		tg->bio_cnt, ret, tg->td->scale);
+	return ret;
 }
 
 static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
@@ -1828,6 +1892,7 @@ static void throtl_upgrade_state(struct throtl_data *td)
 	struct cgroup_subsys_state *pos_css;
 	struct blkcg_gq *blkg;
 
+	throtl_log(&td->service_queue, "upgrade to max");
 	td->limit_index = LIMIT_MAX;
 	td->low_upgrade_time = jiffies;
 	td->scale = 0;
@@ -1850,6 +1915,7 @@ static void throtl_downgrade_state(struct throtl_data *td, int new)
 {
 	td->scale /= 2;
 
+	throtl_log(&td->service_queue, "downgrade, scale %d", td->scale);
 	if (td->scale) {
 		td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;
 		return;
@@ -2023,6 +2089,11 @@ static void throtl_update_latency_buckets(struct throtl_data *td)
 		td->avg_buckets[i].valid = true;
 		last_latency = td->avg_buckets[i].latency;
 	}
+
+	for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
+		throtl_log(&td->service_queue,
+			"Latency bucket %d: latency=%ld, valid=%d", i,
+			td->avg_buckets[i].latency, td->avg_buckets[i].valid);
 }
 #else
 static inline void throtl_update_latency_buckets(struct throtl_data *td)
@@ -2218,7 +2289,7 @@ void blk_throtl_bio_endio(struct bio *bio)
 		throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat),
 			bio_op(bio), lat);
 
-	if (tg->latency_target) {
+	if (tg->latency_target && lat >= tg->td->filtered_latency) {
 		int bucket;
 		unsigned int threshold;
 
@@ -2354,18 +2425,19 @@ void blk_throtl_exit(struct request_queue *q)
 void blk_throtl_register_queue(struct request_queue *q)
 {
 	struct throtl_data *td;
-	struct cgroup_subsys_state *pos_css;
-	struct blkcg_gq *blkg;
+	int i;
 
 	td = q->td;
 	BUG_ON(!td);
 
 	if (blk_queue_nonrot(q)) {
 		td->throtl_slice = DFL_THROTL_SLICE_SSD;
-		td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_SSD;
+		td->filtered_latency = LATENCY_FILTERED_SSD;
 	} else {
 		td->throtl_slice = DFL_THROTL_SLICE_HD;
-		td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_HD;
+		td->filtered_latency = LATENCY_FILTERED_HD;
+		for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
+			td->avg_buckets[i].latency = DFL_HD_BASELINE_LATENCY;
 	}
 #ifndef CONFIG_BLK_DEV_THROTTLING_LOW
 	/* if no low limit, use previous default */
@@ -2375,18 +2447,6 @@ void blk_throtl_register_queue(struct request_queue *q)
 	td->track_bio_latency = !q->mq_ops && !q->request_fn;
 	if (!td->track_bio_latency)
 		blk_stat_enable_accounting(q);
-
-	/*
-	 * some tg are created before queue is fully initialized, eg, nonrot
-	 * isn't initialized yet
-	 */
-	rcu_read_lock();
-	blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
-		struct throtl_grp *tg = blkg_to_tg(blkg);
-
-		tg->idletime_threshold = td->dft_idletime_threshold;
-	}
-	rcu_read_unlock();
 }
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index cbff183f3d9f..17ec83bb0900 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -189,13 +189,15 @@ unsigned long blk_rq_timeout(unsigned long timeout)
  * Notes:
  *    Each request has its own timer, and as it is added to the queue, we
  *    set up the timer. When the request completes, we cancel the timer.
- *    Queue lock must be held for the non-mq case, mq case doesn't care.
  */
 void blk_add_timer(struct request *req)
 {
 	struct request_queue *q = req->q;
 	unsigned long expiry;
 
+	if (!q->mq_ops)
+		lockdep_assert_held(q->queue_lock);
+
 	/* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
 	if (!q->mq_ops && !q->rq_timed_out_fn)
 		return;
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 17676f4d7fd1..6a9a0f03a67b 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -503,7 +503,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
 }
 
 static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
-			     wait_queue_t *wait, unsigned long rw)
+			     wait_queue_entry_t *wait, unsigned long rw)
 {
 	/*
 	 * inc it here even if disabled, since we'll dec it at completion.
@@ -520,7 +520,7 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
 	 * in line to be woken up, wait for our turn.
 	 */
 	if (waitqueue_active(&rqw->wait) &&
-	    rqw->wait.task_list.next != &wait->task_list)
+	    rqw->wait.head.next != &wait->entry)
 		return false;
 
 	return atomic_inc_below(&rqw->inflight, get_limit(rwb, rw));
diff --git a/block/blk.h b/block/blk.h
index 2ed70228e44f..01ebb8185f6b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -59,7 +59,7 @@ void blk_free_flush_queue(struct blk_flush_queue *q);
 
 int blk_init_rl(struct request_list *rl, struct request_queue *q,
 		gfp_t gfp_mask);
-void blk_exit_rl(struct request_list *rl);
+void blk_exit_rl(struct request_queue *q, struct request_list *rl);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 			struct bio *bio);
 void blk_queue_bypass_start(struct request_queue *q);
@@ -143,6 +143,8 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 	struct request *rq;
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	while (1) {
 		if (!list_empty(&q->queue_head)) {
 			rq = list_entry_rq(q->queue_head.next);
@@ -334,4 +336,17 @@ static inline void blk_throtl_bio_endio(struct bio *bio) { }
 static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
 #endif
 
+#ifdef CONFIG_BOUNCE
+extern int init_emergency_isa_pool(void);
+extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
+#else
+static inline int init_emergency_isa_pool(void)
+{
+	return 0;
+}
+static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
+{
+}
+#endif /* CONFIG_BOUNCE */
+
 #endif /* BLK_INTERNAL_H */
diff --git a/block/bounce.c b/block/bounce.c
index 1cb5dd3a5da1..5793c2dc1a15 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -22,10 +22,12 @@
 #include <asm/tlbflush.h>
 
 #include <trace/events/block.h>
+#include "blk.h"
 
 #define POOL_SIZE	64
 #define ISA_POOL_SIZE	16
 
+static struct bio_set *bounce_bio_set, *bounce_bio_split;
 static mempool_t *page_pool, *isa_page_pool;
 
 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_NEED_BOUNCE_POOL)
@@ -40,6 +42,14 @@ static __init int init_emergency_pool(void)
 	BUG_ON(!page_pool);
 	pr_info("pool size: %d pages\n", POOL_SIZE);
 
+	bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+	BUG_ON(!bounce_bio_set);
+	if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE))
+		BUG_ON(1);
+
+	bounce_bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
+	BUG_ON(!bounce_bio_split);
+
 	return 0;
 }
 
@@ -143,7 +153,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool)
 		mempool_free(bvec->bv_page, pool);
 	}
 
-	bio_orig->bi_error = bio->bi_error;
+	bio_orig->bi_status = bio->bi_status;
 	bio_endio(bio_orig);
 	bio_put(bio);
 }
@@ -163,7 +173,7 @@ static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
 {
 	struct bio *bio_orig = bio->bi_private;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		copy_to_high_bio_irq(bio_orig, bio);
 
 	bounce_end_io(bio, pool);
@@ -186,20 +196,31 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	int rw = bio_data_dir(*bio_orig);
 	struct bio_vec *to, from;
 	struct bvec_iter iter;
-	unsigned i;
-
-	bio_for_each_segment(from, *bio_orig, iter)
-		if (page_to_pfn(from.bv_page) > queue_bounce_pfn(q))
-			goto bounce;
+	unsigned i = 0;
+	bool bounce = false;
+	int sectors = 0;
+
+	bio_for_each_segment(from, *bio_orig, iter) {
+		if (i++ < BIO_MAX_PAGES)
+			sectors += from.bv_len >> 9;
+		if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn)
+			bounce = true;
+	}
+	if (!bounce)
+		return;
 
-	return;
-bounce:
-	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set);
+	if (sectors < bio_sectors(*bio_orig)) {
+		bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
+		bio_chain(bio, *bio_orig);
+		generic_make_request(*bio_orig);
+		*bio_orig = bio;
+	}
+	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
 
 	bio_for_each_segment_all(to, bio, i) {
 		struct page *page = to->bv_page;
 
-		if (page_to_pfn(page) <= queue_bounce_pfn(q))
+		if (page_to_pfn(page) <= q->limits.bounce_pfn)
 			continue;
 
 		to->bv_page = mempool_alloc(pool, q->bounce_gfp);
@@ -251,7 +272,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 	 * don't waste time iterating over bio segments
 	 */
 	if (!(q->bounce_gfp & GFP_DMA)) {
-		if (queue_bounce_pfn(q) >= blk_max_pfn)
+		if (q->limits.bounce_pfn >= blk_max_pfn)
 			return;
 		pool = page_pool;
 	} else {
@@ -264,5 +285,3 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 	 */
 	__blk_queue_bounce(q, bio_orig, pool);
 }
-
-EXPORT_SYMBOL(blk_queue_bounce);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 0a23dbba2d30..c4513b23f57a 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -37,7 +37,7 @@ static void bsg_destroy_job(struct kref *kref)
 	struct bsg_job *job = container_of(kref, struct bsg_job, kref);
 	struct request *rq = job->req;
 
-	blk_end_request_all(rq, scsi_req(rq)->result);
+	blk_end_request_all(rq, BLK_STS_OK);
 
 	put_device(job->dev);	/* release reference for the request */
 
@@ -202,7 +202,7 @@ static void bsg_request_fn(struct request_queue *q)
 		ret = bsg_create_job(dev, req);
 		if (ret) {
 			scsi_req(req)->result = ret;
-			blk_end_request_all(req, ret);
+			blk_end_request_all(req, BLK_STS_OK);
 			spin_lock_irq(q->queue_lock);
 			continue;
 		}
@@ -246,6 +246,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
 	q->bsg_job_size = dd_job_size;
 	q->bsg_job_fn = job_fn;
 	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	blk_queue_softirq_done(q, bsg_softirq_done);
 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
diff --git a/block/bsg.c b/block/bsg.c
index 6fd08544d77e..37663b664666 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -236,7 +236,6 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
 	rq = blk_get_request(q, op, GFP_KERNEL);
 	if (IS_ERR(rq))
 		return rq;
-	scsi_req_init(rq);
 
 	ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
 	if (ret)
@@ -294,14 +293,14 @@ out:
  * async completion call-back from the block layer, when scsi/ide/whatever
  * calls end_that_request_last() on a request
  */
-static void bsg_rq_end_io(struct request *rq, int uptodate)
+static void bsg_rq_end_io(struct request *rq, blk_status_t status)
 {
 	struct bsg_command *bc = rq->end_io_data;
 	struct bsg_device *bd = bc->bd;
 	unsigned long flags;
 
-	dprintk("%s: finished rq %p bc %p, bio %p stat %d\n",
-		bd->name, rq, bc, bc->bio, uptodate);
+	dprintk("%s: finished rq %p bc %p, bio %p\n",
+		bd->name, rq, bc, bc->bio);
 
 	bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration);
 
@@ -750,6 +749,12 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
 #ifdef BSG_DEBUG
 	unsigned char buf[32];
 #endif
+
+	if (!blk_queue_scsi_passthrough(rq)) {
+		WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
+		return ERR_PTR(-EINVAL);
+	}
+
 	if (!blk_get_queue(rq))
 		return ERR_PTR(-ENXIO);
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index da69b079725f..3d5c28945719 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -38,9 +38,13 @@ static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */
 static const int cfq_hist_divisor = 4;
 
 /*
- * offset from end of service tree
+ * offset from end of queue service tree for idle class
  */
 #define CFQ_IDLE_DELAY		(NSEC_PER_SEC / 5)
+/* offset from end of group service tree under time slice mode */
+#define CFQ_SLICE_MODE_GROUP_DELAY (NSEC_PER_SEC / 5)
+/* offset from end of group service under IOPS mode */
+#define CFQ_IOPS_MODE_GROUP_DELAY (HZ / 5)
 
 /*
  * below this threshold, we consider thinktime immediate
@@ -978,15 +982,6 @@ static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
 	return min_vdisktime;
 }
 
-static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
-{
-	s64 delta = (s64)(vdisktime - min_vdisktime);
-	if (delta < 0)
-		min_vdisktime = vdisktime;
-
-	return min_vdisktime;
-}
-
 static void update_min_vdisktime(struct cfq_rb_root *st)
 {
 	struct cfq_group *cfqg;
@@ -1362,6 +1357,14 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
 	cfqg->vfraction = max_t(unsigned, vfr, 1);
 }
 
+static inline u64 cfq_get_cfqg_vdisktime_delay(struct cfq_data *cfqd)
+{
+	if (!iops_mode(cfqd))
+		return CFQ_SLICE_MODE_GROUP_DELAY;
+	else
+		return CFQ_IOPS_MODE_GROUP_DELAY;
+}
+
 static void
 cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
@@ -1381,7 +1384,8 @@ cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
 	n = rb_last(&st->rb);
 	if (n) {
 		__cfqg = rb_entry_cfqg(n);
-		cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
+		cfqg->vdisktime = __cfqg->vdisktime +
+			cfq_get_cfqg_vdisktime_delay(cfqd);
 	} else
 		cfqg->vdisktime = st->min_vdisktime;
 	cfq_group_service_tree_add(st, cfqg);
diff --git a/block/elevator.c b/block/elevator.c
index ab726a5c0bf6..4bb2f0c93fa6 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -681,6 +681,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 		 */
 		if (elv_attempt_insert_merge(q, rq))
 			break;
+		/* fall through */
 	case ELEVATOR_INSERT_SORT:
 		BUG_ON(blk_rq_is_passthrough(rq));
 		rq->rq_flags |= RQF_SORTED;
@@ -1062,10 +1063,8 @@ static int __elevator_change(struct request_queue *q, const char *name)
 
 	strlcpy(elevator_name, name, sizeof(elevator_name));
 	e = elevator_get(strstrip(elevator_name), true);
-	if (!e) {
-		printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
+	if (!e)
 		return -EINVAL;
-	}
 
 	if (q->elevator &&
 	    !strcmp(elevator_name, q->elevator->type->elevator_name)) {
@@ -1105,7 +1104,6 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 	if (!ret)
 		return count;
 
-	printk(KERN_ERR "elevator: switch to %s failed\n", name);
 	return ret;
 }
 
diff --git a/block/genhd.c b/block/genhd.c
index d252d29fe837..7f520fa25d16 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -36,7 +36,7 @@ struct kobject *block_depr;
 static DEFINE_SPINLOCK(ext_devt_lock);
 static DEFINE_IDR(ext_devt_idr);
 
-static struct device_type disk_type;
+static const struct device_type disk_type;
 
 static void disk_check_events(struct disk_events *ev,
 			      unsigned int *clearing_ptr);
@@ -1183,7 +1183,7 @@ static char *block_devnode(struct device *dev, umode_t *mode,
 	return NULL;
 }
 
-static struct device_type disk_type = {
+static const struct device_type disk_type = {
 	.name		= "disk",
 	.groups		= disk_attr_groups,
 	.release	= disk_release,
diff --git a/block/ioprio.c b/block/ioprio.c
index 4b120c9cf7e8..6f5d0b6625e3 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -75,7 +75,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
 		case IOPRIO_CLASS_RT:
 			if (!capable(CAP_SYS_ADMIN))
 				return -EPERM;
-			/* fall through, rt has prio field too */
+			/* fall through */
+			/* rt has prio field too */
 		case IOPRIO_CLASS_BE:
 			if (data >= IOPRIO_BE_NR || data < 0)
 				return -EINVAL;
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b9faabc75fdb..f58cab82105b 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -99,7 +99,7 @@ struct kyber_hctx_data {
 	struct list_head rqs[KYBER_NUM_DOMAINS];
 	unsigned int cur_domain;
 	unsigned int batching;
-	wait_queue_t domain_wait[KYBER_NUM_DOMAINS];
+	wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
 	atomic_t wait_index[KYBER_NUM_DOMAINS];
 };
 
@@ -385,7 +385,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 
 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
 		INIT_LIST_HEAD(&khd->rqs[i]);
-		INIT_LIST_HEAD(&khd->domain_wait[i].task_list);
+		INIT_LIST_HEAD(&khd->domain_wait[i].entry);
 		atomic_set(&khd->wait_index[i], 0);
 	}
 
@@ -426,33 +426,29 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd,
 	}
 }
 
-static struct request *kyber_get_request(struct request_queue *q,
-					 unsigned int op,
-					 struct blk_mq_alloc_data *data)
+static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
 {
-	struct kyber_queue_data *kqd = q->elevator->elevator_data;
-	struct request *rq;
-
 	/*
 	 * We use the scheduler tags as per-hardware queue queueing tokens.
 	 * Async requests can be limited at this stage.
 	 */
-	if (!op_is_sync(op))
+	if (!op_is_sync(op)) {
+		struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
+
 		data->shallow_depth = kqd->async_depth;
+	}
+}
 
-	rq = __blk_mq_alloc_request(data, op);
-	if (rq)
-		rq_set_domain_token(rq, -1);
-	return rq;
+static void kyber_prepare_request(struct request *rq, struct bio *bio)
+{
+	rq_set_domain_token(rq, -1);
 }
 
-static void kyber_put_request(struct request *rq)
+static void kyber_finish_request(struct request *rq)
 {
-	struct request_queue *q = rq->q;
-	struct kyber_queue_data *kqd = q->elevator->elevator_data;
+	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
 
 	rq_clear_domain_token(kqd, rq);
-	blk_mq_finish_request(rq);
 }
 
 static void kyber_completed_request(struct request *rq)
@@ -507,12 +503,12 @@ static void kyber_flush_busy_ctxs(struct kyber_hctx_data *khd,
 	}
 }
 
-static int kyber_domain_wake(wait_queue_t *wait, unsigned mode, int flags,
+static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
 			     void *key)
 {
 	struct blk_mq_hw_ctx *hctx = READ_ONCE(wait->private);
 
-	list_del_init(&wait->task_list);
+	list_del_init(&wait->entry);
 	blk_mq_run_hw_queue(hctx, true);
 	return 1;
 }
@@ -523,7 +519,7 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
 {
 	unsigned int sched_domain = khd->cur_domain;
 	struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain];
-	wait_queue_t *wait = &khd->domain_wait[sched_domain];
+	wait_queue_entry_t *wait = &khd->domain_wait[sched_domain];
 	struct sbq_wait_state *ws;
 	int nr;
 
@@ -536,7 +532,7 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
 	 * run when one becomes available. Note that this is serialized on
 	 * khd->lock, but we still need to be careful about the waker.
 	 */
-	if (list_empty_careful(&wait->task_list)) {
+	if (list_empty_careful(&wait->entry)) {
 		init_waitqueue_func_entry(wait, kyber_domain_wake);
 		wait->private = hctx;
 		ws = sbq_wait_ptr(domain_tokens,
@@ -734,9 +730,9 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m)	\
 {									\
 	struct blk_mq_hw_ctx *hctx = data;				\
 	struct kyber_hctx_data *khd = hctx->sched_data;			\
-	wait_queue_t *wait = &khd->domain_wait[domain];			\
+	wait_queue_entry_t *wait = &khd->domain_wait[domain];		\
 									\
-	seq_printf(m, "%d\n", !list_empty_careful(&wait->task_list));	\
+	seq_printf(m, "%d\n", !list_empty_careful(&wait->entry));	\
 	return 0;							\
 }
 KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
@@ -815,8 +811,9 @@ static struct elevator_type kyber_sched = {
 		.exit_sched = kyber_exit_sched,
 		.init_hctx = kyber_init_hctx,
 		.exit_hctx = kyber_exit_hctx,
-		.get_request = kyber_get_request,
-		.put_request = kyber_put_request,
+		.limit_depth = kyber_limit_depth,
+		.prepare_request = kyber_prepare_request,
+		.finish_request = kyber_finish_request,
 		.completed_request = kyber_completed_request,
 		.dispatch_request = kyber_dispatch_request,
 		.has_work = kyber_has_work,
diff --git a/block/partition-generic.c b/block/partition-generic.c
index ff07b9143ca4..c5ec8246e25e 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -320,8 +320,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 
 	if (info) {
 		struct partition_meta_info *pinfo = alloc_part_info(disk);
-		if (!pinfo)
+		if (!pinfo) {
+			err = -ENOMEM;
 			goto out_free_stats;
+		}
 		memcpy(pinfo, info, sizeof(*info));
 		p->info = pinfo;
 	}
diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index edcea70674c9..2a365c756648 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -115,7 +115,7 @@ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
 		ldm_error("PRIVHEAD disk size doesn't match real disk size");
 		return false;
 	}
-	if (uuid_be_to_bin(data + 0x0030, (uuid_be *)ph->disk_id)) {
+	if (uuid_parse(data + 0x0030, &ph->disk_id)) {
 		ldm_error("PRIVHEAD contains an invalid GUID.");
 		return false;
 	}
@@ -234,7 +234,7 @@ static bool ldm_compare_privheads (const struct privhead *ph1,
 		(ph1->logical_disk_size  == ph2->logical_disk_size)	&&
 		(ph1->config_start       == ph2->config_start)		&&
 		(ph1->config_size        == ph2->config_size)		&&
-		!memcmp (ph1->disk_id, ph2->disk_id, GUID_SIZE));
+		uuid_equal(&ph1->disk_id, &ph2->disk_id));
 }
 
 /**
@@ -557,7 +557,7 @@ static struct vblk * ldm_get_disk_objid (const struct ldmdb *ldb)
 
 	list_for_each (item, &ldb->v_disk) {
 		struct vblk *v = list_entry (item, struct vblk, list);
-		if (!memcmp (v->vblk.disk.disk_id, ldb->ph.disk_id, GUID_SIZE))
+		if (uuid_equal(&v->vblk.disk.disk_id, &ldb->ph.disk_id))
 			return v;
 	}
 
@@ -892,7 +892,7 @@ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb)
 	disk = &vb->vblk.disk;
 	ldm_get_vstr (buffer + 0x18 + r_diskid, disk->alt_name,
 		sizeof (disk->alt_name));
-	if (uuid_be_to_bin(buffer + 0x19 + r_name, (uuid_be *)disk->disk_id))
+	if (uuid_parse(buffer + 0x19 + r_name, &disk->disk_id))
 		return false;
 
 	return true;
@@ -927,7 +927,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
 		return false;
 
 	disk = &vb->vblk.disk;
-	memcpy (disk->disk_id, buffer + 0x18 + r_name, GUID_SIZE);
+	uuid_copy(&disk->disk_id, (uuid_t *)(buffer + 0x18 + r_name));
 	return true;
 }
 
diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h
index 374242c0971a..f4c6055df956 100644
--- a/block/partitions/ldm.h
+++ b/block/partitions/ldm.h
@@ -112,8 +112,6 @@ struct frag {				/* VBLK Fragment handling */
 
 /* In memory LDM database structures. */
 
-#define GUID_SIZE		16
-
 struct privhead {			/* Offsets and sizes are in sectors. */
 	u16	ver_major;
 	u16	ver_minor;
@@ -121,7 +119,7 @@ struct privhead {			/* Offsets and sizes are in sectors. */
 	u64	logical_disk_size;
 	u64	config_start;
 	u64	config_size;
-	u8	disk_id[GUID_SIZE];
+	uuid_t	disk_id;
 };
 
 struct tocblock {			/* We have exactly two bitmaps. */
@@ -154,7 +152,7 @@ struct vblk_dgrp {			/* VBLK Disk Group */
 };
 
 struct vblk_disk {			/* VBLK Disk */
-	u8	disk_id[GUID_SIZE];
+	uuid_t	disk_id;
 	u8	alt_name[128];
 };
 
diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c
index 93e7c1b32edd..5610cd537da7 100644
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c
@@ -300,6 +300,8 @@ static void parse_bsd(struct parsed_partitions *state,
 			continue;
 		bsd_start = le32_to_cpu(p->p_offset);
 		bsd_size = le32_to_cpu(p->p_size);
+		if (memcmp(flavour, "bsd\0", 4) == 0)
+			bsd_start += offset;
 		if (offset == bsd_start && size == bsd_size)
 			/* full parent partition, we have it already */
 			continue;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 4a294a5f7fab..7440de44dd85 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -326,7 +326,6 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 	req = scsi_req(rq);
-	scsi_req_init(rq);
 
 	if (hdr->cmd_len > BLK_MAX_CDB) {
 		req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
@@ -456,7 +455,6 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 		goto error_free_buffer;
 	}
 	req = scsi_req(rq);
-	scsi_req_init(rq);
 
 	cmdlen = COMMAND_SIZE(opcode);
 
@@ -542,7 +540,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 	rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
-	scsi_req_init(rq);
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
 	scsi_req(rq)->cmd[0] = cmd;
 	scsi_req(rq)->cmd[4] = data;
@@ -744,10 +741,14 @@ int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
 }
 EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
 
-void scsi_req_init(struct request *rq)
+/**
+ * scsi_req_init - initialize certain fields of a scsi_request structure
+ * @req: Pointer to a scsi_request structure.
+ * Initializes .__cmd[], .cmd, .cmd_len and .sense_len but no other members
+ * of struct scsi_request.
+ */
+void scsi_req_init(struct scsi_request *req)
 {
-	struct scsi_request *req = scsi_req(rq);
-
 	memset(req->__cmd, 0, sizeof(req->__cmd));
 	req->cmd = req->__cmd;
 	req->cmd_len = BLK_MAX_CDB;
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 680c6d636298..3416dadf7b15 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -46,8 +46,8 @@ static __be16 t10_pi_ip_fn(void *data, unsigned int len)
  * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
  * tag.
  */
-static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
-			   unsigned int type)
+static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
+		csum_fn *fn, unsigned int type)
 {
 	unsigned int i;
 
@@ -67,11 +67,11 @@ static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
 		iter->seed++;
 	}
 
-	return 0;
+	return BLK_STS_OK;
 }
 
-static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
-				unsigned int type)
+static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
+		csum_fn *fn, unsigned int type)
 {
 	unsigned int i;
 
@@ -91,7 +91,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
 				       "(rcvd %u)\n", iter->disk_name,
 				       (unsigned long long)
 				       iter->seed, be32_to_cpu(pi->ref_tag));
-				return -EILSEQ;
+				return BLK_STS_PROTECTION;
 			}
 			break;
 		case 3:
@@ -108,7 +108,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
 			       "(rcvd %04x, want %04x)\n", iter->disk_name,
 			       (unsigned long long)iter->seed,
 			       be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
-			return -EILSEQ;
+			return BLK_STS_PROTECTION;
 		}
 
 next:
@@ -117,45 +117,45 @@ next:
 		iter->seed++;
 	}
 
-	return 0;
+	return BLK_STS_OK;
 }
 
-static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_crc_fn, 1);
 }
 
-static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_ip_fn, 1);
 }
 
-static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_crc_fn, 1);
 }
 
-static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_ip_fn, 1);
 }
 
-static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_crc_fn, 3);
 }
 
-static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_ip_fn, 3);
 }
 
-static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_crc_fn, 3);
 }
 
-static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_ip_fn, 3);
 }
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index d3a989e718f5..3cd6e12cfc46 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -141,7 +141,7 @@ int public_key_verify_signature(const struct public_key *pkey,
 	 * signature and returns that to us.
 	 */
 	ret = crypto_akcipher_verify(req);
-	if (ret == -EINPROGRESS) {
+	if ((ret == -EINPROGRESS) || (ret == -EBUSY)) {
 		wait_for_completion(&compl.completion);
 		ret = compl.err;
 	}
diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c
index 672a94c2c3ff..d178650fd524 100644
--- a/crypto/asymmetric_keys/verify_pefile.c
+++ b/crypto/asymmetric_keys/verify_pefile.c
@@ -381,7 +381,7 @@ static int pefile_digest_pe(const void *pebuf, unsigned int pelen,
 	}
 
 error:
-	kfree(desc);
+	kzfree(desc);
 error_no_desc:
 	crypto_free_shash(tfm);
 	kleave(" = %d", ret);
@@ -450,6 +450,6 @@ int verify_pefile_signature(const void *pebuf, unsigned pelen,
 	ret = pefile_digest_pe(pebuf, pelen, &ctx);
 
 error:
-	kfree(ctx.digest);
+	kzfree(ctx.digest);
 	return ret;
 }
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index c80765b211cf..dd03fead1ca3 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -102,6 +102,7 @@ struct x509_certificate *x509_cert_parse(const void *data, size_t datalen)
 		}
 	}
 
+	ret = -ENOMEM;
 	cert->pub->key = kmemdup(ctx->key, ctx->key_size, GFP_KERNEL);
 	if (!cert->pub->key)
 		goto error_decode;
diff --git a/crypto/drbg.c b/crypto/drbg.c
index fa749f470135..cdb27ac4b226 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -1767,9 +1767,8 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg,
 			break;
 		case -EINPROGRESS:
 		case -EBUSY:
-			ret = wait_for_completion_interruptible(
-				&drbg->ctr_completion);
-			if (!ret && !drbg->ctr_async_err) {
+			wait_for_completion(&drbg->ctr_completion);
+			if (!drbg->ctr_async_err) {
 				reinit_completion(&drbg->ctr_completion);
 				break;
 			}
diff --git a/crypto/gcm.c b/crypto/gcm.c
index b7ad808be3d4..3841b5eafa7e 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -152,10 +152,8 @@ static int crypto_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 
 	err = crypto_skcipher_encrypt(&data->req);
 	if (err == -EINPROGRESS || err == -EBUSY) {
-		err = wait_for_completion_interruptible(
-			&data->result.completion);
-		if (!err)
-			err = data->result.err;
+		wait_for_completion(&data->result.completion);
+		err = data->result.err;
 	}
 
 	if (err)
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 014af741fc6a..4faa0fd53b0c 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -764,6 +764,44 @@ static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static int skcipher_setkey_unaligned(struct crypto_skcipher *tfm,
+				     const u8 *key, unsigned int keylen)
+{
+	unsigned long alignmask = crypto_skcipher_alignmask(tfm);
+	struct skcipher_alg *cipher = crypto_skcipher_alg(tfm);
+	u8 *buffer, *alignbuffer;
+	unsigned long absize;
+	int ret;
+
+	absize = keylen + alignmask;
+	buffer = kmalloc(absize, GFP_ATOMIC);
+	if (!buffer)
+		return -ENOMEM;
+
+	alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
+	memcpy(alignbuffer, key, keylen);
+	ret = cipher->setkey(tfm, alignbuffer, keylen);
+	kzfree(buffer);
+	return ret;
+}
+
+static int skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keylen)
+{
+	struct skcipher_alg *cipher = crypto_skcipher_alg(tfm);
+	unsigned long alignmask = crypto_skcipher_alignmask(tfm);
+
+	if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) {
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	if ((unsigned long)key & alignmask)
+		return skcipher_setkey_unaligned(tfm, key, keylen);
+
+	return cipher->setkey(tfm, key, keylen);
+}
+
 static void crypto_skcipher_exit_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm);
@@ -784,7 +822,7 @@ static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm)
 	    tfm->__crt_alg->cra_type == &crypto_givcipher_type)
 		return crypto_init_skcipher_ops_ablkcipher(tfm);
 
-	skcipher->setkey = alg->setkey;
+	skcipher->setkey = skcipher_setkey;
 	skcipher->encrypt = alg->encrypt;
 	skcipher->decrypt = alg->decrypt;
 	skcipher->ivsize = alg->ivsize;
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 117ca14ccf85..ba2901e76769 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -204,4 +204,6 @@ source "drivers/fpga/Kconfig"
 
 source "drivers/fsi/Kconfig"
 
+source "drivers/tee/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index edba1edc6654..cfabd141dba2 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -180,3 +180,4 @@ obj-$(CONFIG_ANDROID)		+= android/
 obj-$(CONFIG_NVMEM)		+= nvmem/
 obj-$(CONFIG_FPGA)		+= fpga/
 obj-$(CONFIG_FSI)		+= fsi/
+obj-$(CONFIG_TEE)		+= tee/
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index d78065cc9324..b1aacfc62b1f 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -50,6 +50,7 @@ acpi-$(CONFIG_ACPI_REDUCED_HARDWARE_ONLY) += evged.o
 acpi-y				+= sysfs.o
 acpi-y				+= property.o
 acpi-$(CONFIG_X86)		+= acpi_cmos_rtc.o
+acpi-$(CONFIG_X86)		+= x86/utils.o
 acpi-$(CONFIG_DEBUG_FS)		+= debugfs.o
 acpi-$(CONFIG_ACPI_NUMA)	+= numa.o
 acpi-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o
diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index 17a1eb14847a..fc6c416f8724 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c
@@ -106,6 +106,16 @@ static const struct apd_device_desc vulcan_spi_desc = {
 	.setup = acpi_apd_setup,
 	.fixed_clk_rate = 133000000,
 };
+
+static const struct apd_device_desc hip07_i2c_desc = {
+	.setup = acpi_apd_setup,
+	.fixed_clk_rate = 200000000,
+};
+
+static const struct apd_device_desc hip08_i2c_desc = {
+	.setup = acpi_apd_setup,
+	.fixed_clk_rate = 250000000,
+};
 #endif
 
 #else
@@ -170,6 +180,8 @@ static const struct acpi_device_id acpi_apd_device_ids[] = {
 	{ "APMC0D0F", APD_ADDR(xgene_i2c_desc) },
 	{ "BRCM900D", APD_ADDR(vulcan_spi_desc) },
 	{ "CAV900D",  APD_ADDR(vulcan_spi_desc) },
+	{ "HISI0A21", APD_ADDR(hip07_i2c_desc) },
+	{ "HISI0A22", APD_ADDR(hip08_i2c_desc) },
 #endif
 	{ }
 };
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 502ea4dc2080..560fdae8cc59 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -141,9 +141,9 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	int	cpu = mce->extcpu;
 	struct acpi_hest_generic_status *estatus, *tmp;
 	struct acpi_hest_generic_data *gdata;
-	const uuid_le *fru_id = &NULL_UUID_LE;
+	const guid_t *fru_id = &guid_null;
 	char *fru_text = "";
-	uuid_le *sec_type;
+	guid_t *sec_type;
 	static u32 err_seq;
 
 	estatus = extlog_elog_entry_check(cpu, bank);
@@ -165,11 +165,11 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	err_seq++;
 	gdata = (struct acpi_hest_generic_data *)(tmp + 1);
 	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
-		fru_id = (uuid_le *)gdata->fru_id;
+		fru_id = (guid_t *)gdata->fru_id;
 	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
 		fru_text = gdata->fru_text;
-	sec_type = (uuid_le *)gdata->section_type;
-	if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
+	sec_type = (guid_t *)gdata->section_type;
+	if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
 		struct cper_sec_mem_err *mem = (void *)(gdata + 1);
 		if (gdata->error_data_length >= sizeof(*mem))
 			trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
@@ -182,17 +182,17 @@ out:
 
 static bool __init extlog_get_l1addr(void)
 {
-	u8 uuid[16];
+	guid_t guid;
 	acpi_handle handle;
 	union acpi_object *obj;
 
-	acpi_str_to_uuid(extlog_dsm_uuid, uuid);
-
+	if (guid_parse(extlog_dsm_uuid, &guid))
+		return false;
 	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
 		return false;
-	if (!acpi_check_dsm(handle, uuid, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR))
+	if (!acpi_check_dsm(handle, &guid, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR))
 		return false;
-	obj = acpi_evaluate_dsm_typed(handle, uuid, EXTLOG_DSM_REV,
+	obj = acpi_evaluate_dsm_typed(handle, &guid, EXTLOG_DSM_REV,
 				      EXTLOG_FN_ADDR, NULL, ACPI_TYPE_INTEGER);
 	if (!obj) {
 		return false;
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 5edfd9c49044..10347e3d73ad 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -143,6 +143,22 @@ static void lpss_deassert_reset(struct lpss_private_data *pdata)
 	writel(val, pdata->mmio_base + offset);
 }
 
+/*
+ * BYT PWM used for backlight control by the i915 driver on systems without
+ * the Crystal Cove PMIC.
+ */
+static struct pwm_lookup byt_pwm_lookup[] = {
+	PWM_LOOKUP_WITH_MODULE("80860F09:00", 0, "0000:00:02.0",
+			       "pwm_backlight", 0, PWM_POLARITY_NORMAL,
+			       "pwm-lpss-platform"),
+};
+
+static void byt_pwm_setup(struct lpss_private_data *pdata)
+{
+	if (!acpi_dev_present("INT33FD", NULL, -1))
+		pwm_add_table(byt_pwm_lookup, ARRAY_SIZE(byt_pwm_lookup));
+}
+
 #define LPSS_I2C_ENABLE			0x6c
 
 static void byt_i2c_setup(struct lpss_private_data *pdata)
@@ -200,6 +216,7 @@ static const struct lpss_device_desc lpt_sdio_dev_desc = {
 
 static const struct lpss_device_desc byt_pwm_dev_desc = {
 	.flags = LPSS_SAVE_CTX,
+	.setup = byt_pwm_setup,
 };
 
 static const struct lpss_device_desc bsw_pwm_dev_desc = {
diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index 32d93edbc479..dea65306b687 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile
@@ -2,7 +2,7 @@
 # Makefile for ACPICA Core interpreter
 #
 
-ccflags-y			:= -Os -DBUILDING_ACPICA
+ccflags-y			:= -Os -D_LINUX -DBUILDING_ACPICA
 ccflags-$(CONFIG_ACPI_DEBUG)	+= -DACPI_DEBUG_OUTPUT
 
 # use acpi.o to put all files here into acpi.o modparam namespace
diff --git a/drivers/acpi/acpica/acconvert.h b/drivers/acpi/acpica/acconvert.h
new file mode 100644
index 000000000000..c84223b60b35
--- /dev/null
+++ b/drivers/acpi/acpica/acconvert.h
@@ -0,0 +1,144 @@
+/******************************************************************************
+ *
+ * Module Name: acapps - common include for ACPI applications/tools
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2017, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#ifndef _ACCONVERT
+#define _ACCONVERT
+
+/* Definitions for comment state */
+
+#define ASL_COMMENT_STANDARD    1
+#define ASLCOMMENT_INLINE       2
+#define ASL_COMMENT_OPEN_PAREN  3
+#define ASL_COMMENT_CLOSE_PAREN 4
+#define ASL_COMMENT_CLOSE_BRACE 5
+
+/* Definitions for comment print function*/
+
+#define AML_COMMENT_STANDARD    1
+#define AMLCOMMENT_INLINE       2
+#define AML_COMMENT_END_NODE    3
+#define AML_NAMECOMMENT         4
+#define AML_COMMENT_CLOSE_BRACE 5
+#define AML_COMMENT_ENDBLK      6
+#define AML_COMMENT_INCLUDE     7
+
+#ifdef ACPI_ASL_COMPILER
+/*
+ * cvcompiler
+ */
+void
+cv_process_comment(struct asl_comment_state current_state,
+		   char *string_buffer, int c1);
+
+void
+cv_process_comment_type2(struct asl_comment_state current_state,
+			 char *string_buffer);
+
+u32 cv_calculate_comment_lengths(union acpi_parse_object *op);
+
+void cv_process_comment_state(char input);
+
+char *cv_append_inline_comment(char *inline_comment, char *to_add);
+
+void cv_add_to_comment_list(char *to_add);
+
+void cv_place_comment(u8 type, char *comment_string);
+
+u32 cv_parse_op_block_type(union acpi_parse_object *op);
+
+struct acpi_comment_node *cv_comment_node_calloc(void);
+
+void cg_write_aml_def_block_comment(union acpi_parse_object *op);
+
+void
+cg_write_one_aml_comment(union acpi_parse_object *op,
+			 char *comment_to_print, u8 input_option);
+
+void cg_write_aml_comment(union acpi_parse_object *op);
+
+/*
+ * cvparser
+ */
+void
+cv_init_file_tree(struct acpi_table_header *table,
+		  u8 *aml_start, u32 aml_length);
+
+void cv_clear_op_comments(union acpi_parse_object *op);
+
+struct acpi_file_node *cv_filename_exists(char *filename,
+					  struct acpi_file_node *head);
+
+void cv_label_file_node(union acpi_parse_object *op);
+
+void
+cv_capture_list_comments(struct acpi_parse_state *parser_state,
+			 struct acpi_comment_node *list_head,
+			 struct acpi_comment_node *list_tail);
+
+void cv_capture_comments_only(struct acpi_parse_state *parser_state);
+
+void cv_capture_comments(struct acpi_walk_state *walk_state);
+
+void cv_transfer_comments(union acpi_parse_object *op);
+
+/*
+ * cvdisasm
+ */
+void cv_switch_files(u32 level, union acpi_parse_object *op);
+
+u8 cv_file_has_switched(union acpi_parse_object *op);
+
+void cv_close_paren_write_comment(union acpi_parse_object *op, u32 level);
+
+void cv_close_brace_write_comment(union acpi_parse_object *op, u32 level);
+
+void
+cv_print_one_comment_list(struct acpi_comment_node *comment_list, u32 level);
+
+void
+cv_print_one_comment_type(union acpi_parse_object *op,
+			  u8 comment_type, char *end_str, u32 level);
+
+#endif
+
+#endif				/* _ACCONVERT */
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 1d955fe216c4..abe8c316908c 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -370,6 +370,59 @@ ACPI_GLOBAL(const char, *acpi_gbl_pld_shape_list[]);
 
 #endif
 
+/*
+ * Meant for the -ca option.
+ */
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_inline_comment, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_end_node_comment, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_open_brace_comment, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_close_brace_comment, NULL);
+
+ACPI_INIT_GLOBAL(char *, acpi_gbl_root_filename, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_filename, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_parent_filename, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_include_filename, NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_last_list_head, NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_def_blk_comment_list_head,
+		 NULL);
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_def_blk_comment_list_tail,
+		 NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_reg_comment_list_head,
+		 NULL);
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_reg_comment_list_tail,
+		 NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_inc_comment_list_head,
+		 NULL);
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_inc_comment_list_tail,
+		 NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_end_blk_comment_list_head,
+		 NULL);
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_end_blk_comment_list_tail,
+		 NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_addr_node,
+		 *acpi_gbl_comment_addr_list_head, NULL);
+
+ACPI_INIT_GLOBAL(union acpi_parse_object, *acpi_gbl_current_scope, NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_file_node, *acpi_gbl_file_tree_root, NULL);
+
+ACPI_GLOBAL(acpi_cache_t *, acpi_gbl_reg_comment_cache);
+ACPI_GLOBAL(acpi_cache_t *, acpi_gbl_comment_addr_cache);
+ACPI_GLOBAL(acpi_cache_t *, acpi_gbl_file_cache);
+
+ACPI_INIT_GLOBAL(u8, gbl_capture_comments, FALSE);
+
+ACPI_INIT_GLOBAL(u8, acpi_gbl_debug_asl_conversion, FALSE);
+ACPI_INIT_GLOBAL(ACPI_FILE, acpi_gbl_conv_debug_file, NULL);
+
+ACPI_GLOBAL(char, acpi_gbl_table_sig[4]);
+
 /*****************************************************************************
  *
  * Application globals
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 8fd495e8fdce..f9b3f7fef462 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -53,7 +53,7 @@ typedef u32 acpi_mutex_handle;
 
 /* Total number of aml opcodes defined */
 
-#define AML_NUM_OPCODES                 0x82
+#define AML_NUM_OPCODES                 0x83
 
 /* Forward declarations */
 
@@ -754,21 +754,52 @@ union acpi_parse_value {
 #define ACPI_DISASM_ONLY_MEMBERS(a)
 #endif
 
+#if defined(ACPI_ASL_COMPILER)
+#define ACPI_CONVERTER_ONLY_MEMBERS(a)  a;
+#else
+#define ACPI_CONVERTER_ONLY_MEMBERS(a)
+#endif
+
 #define ACPI_PARSE_COMMON \
-	union acpi_parse_object         *parent;        /* Parent op */\
-	u8                              descriptor_type; /* To differentiate various internal objs */\
-	u8                              flags;          /* Type of Op */\
-	u16                             aml_opcode;     /* AML opcode */\
-	u8                              *aml;           /* Address of declaration in AML */\
-	union acpi_parse_object         *next;          /* Next op */\
-	struct acpi_namespace_node      *node;          /* For use by interpreter */\
-	union acpi_parse_value          value;          /* Value or args associated with the opcode */\
-	u8                              arg_list_length; /* Number of elements in the arg list */\
-	ACPI_DISASM_ONLY_MEMBERS (\
-	u16                             disasm_flags;   /* Used during AML disassembly */\
-	u8                              disasm_opcode;  /* Subtype used for disassembly */\
-	char                            *operator_symbol;/* Used for C-style operator name strings */\
-	char                            aml_op_name[16])	/* Op name (debug only) */
+	union acpi_parse_object         *parent;            /* Parent op */\
+	u8                              descriptor_type;    /* To differentiate various internal objs */\
+	u8                              flags;              /* Type of Op */\
+	u16                             aml_opcode;         /* AML opcode */\
+	u8                              *aml;               /* Address of declaration in AML */\
+	union acpi_parse_object         *next;              /* Next op */\
+	struct acpi_namespace_node      *node;              /* For use by interpreter */\
+	union acpi_parse_value          value;              /* Value or args associated with the opcode */\
+	u8                              arg_list_length;    /* Number of elements in the arg list */\
+	 ACPI_DISASM_ONLY_MEMBERS (\
+	u16                             disasm_flags;       /* Used during AML disassembly */\
+	u8                              disasm_opcode;      /* Subtype used for disassembly */\
+	char                            *operator_symbol;   /* Used for C-style operator name strings */\
+	char                            aml_op_name[16])    /* Op name (debug only) */\
+	 ACPI_CONVERTER_ONLY_MEMBERS (\
+	char                            *inline_comment;    /* Inline comment */\
+	char                            *end_node_comment;  /* End of node comment */\
+	char                            *name_comment;      /* Comment associated with the first parameter of the name node */\
+	char                            *close_brace_comment; /* Comments that come after } on the same as } */\
+	struct acpi_comment_node        *comment_list;      /* comments that appears before this node */\
+	struct acpi_comment_node        *end_blk_comment;   /* comments that at the end of a block but before ) or } */\
+	char                            *cv_filename;       /* Filename associated with this node. Used for ASL/ASL+ converter */\
+	char                            *cv_parent_filename)	/* Parent filename associated with this node. Used for ASL/ASL+ converter */
+
+/* categories of comments */
+
+typedef enum {
+	STANDARD_COMMENT = 1,
+	INLINE_COMMENT,
+	ENDNODE_COMMENT,
+	OPENBRACE_COMMENT,
+	CLOSE_BRACE_COMMENT,
+	STD_DEFBLK_COMMENT,
+	END_DEFBLK_COMMENT,
+	FILENAME_COMMENT,
+	PARENTFILENAME_COMMENT,
+	ENDBLK_COMMENT,
+	INCLUDE_COMMENT
+} asl_comment_types;
 
 /* Internal opcodes for disasm_opcode field above */
 
@@ -784,9 +815,38 @@ union acpi_parse_value {
 #define ACPI_DASM_LNOT_SUFFIX           0x09	/* End  of a Lnot_equal (etc.) pair of opcodes */
 #define ACPI_DASM_HID_STRING            0x0A	/* String is a _HID or _CID */
 #define ACPI_DASM_IGNORE_SINGLE         0x0B	/* Ignore the opcode but not it's children */
-#define ACPI_DASM_SWITCH_PREDICATE      0x0C	/* Object is a predicate for a Switch or Case block */
-#define ACPI_DASM_CASE                  0x0D	/* If/Else is a Case in a Switch/Case block */
-#define ACPI_DASM_DEFAULT               0x0E	/* Else is a Default in a Switch/Case block */
+#define ACPI_DASM_SWITCH                0x0C	/* While is a Switch */
+#define ACPI_DASM_SWITCH_PREDICATE      0x0D	/* Object is a predicate for a Switch or Case block */
+#define ACPI_DASM_CASE                  0x0E	/* If/Else is a Case in a Switch/Case block */
+#define ACPI_DASM_DEFAULT               0x0F	/* Else is a Default in a Switch/Case block */
+
+/*
+ * List struct used in the -ca option
+ */
+struct acpi_comment_node {
+	char *comment;
+	struct acpi_comment_node *next;
+};
+
+struct acpi_comment_addr_node {
+	u8 *addr;
+	struct acpi_comment_addr_node *next;
+};
+
+/*
+ * File node - used for "Include" operator file stack and
+ * depdendency tree for the -ca option
+ */
+struct acpi_file_node {
+	void *file;
+	char *filename;
+	char *file_start;	/* Points to AML and indicates when the AML for this particular file starts. */
+	char *file_end;		/* Points to AML and indicates when the AML for this particular file ends. */
+	struct acpi_file_node *next;
+	struct acpi_file_node *parent;
+	u8 include_written;
+	struct acpi_comment_node *include_comment;
+};
 
 /*
  * Generic operation (for example:  If, While, Store)
@@ -813,6 +873,8 @@ struct acpi_parse_obj_asl {
 	ACPI_PARSE_COMMON union acpi_parse_object *child;
 	union acpi_parse_object *parent_method;
 	char *filename;
+	u8 file_changed;
+	char *parent_filename;
 	char *external_name;
 	char *namepath;
 	char name_seg[4];
@@ -842,6 +904,14 @@ union acpi_parse_object {
 	struct acpi_parse_obj_asl asl;
 };
 
+struct asl_comment_state {
+	u8 comment_type;
+	u32 spaces_before;
+	union acpi_parse_object *latest_parse_node;
+	union acpi_parse_object *parsing_paren_brace_node;
+	u8 capture_comments;
+};
+
 /*
  * Parse state - one state per parser invocation and each control
  * method.
diff --git a/drivers/acpi/acpica/acmacros.h b/drivers/acpi/acpica/acmacros.h
index c3337514e0ed..c7f0c96cc00f 100644
--- a/drivers/acpi/acpica/acmacros.h
+++ b/drivers/acpi/acpica/acmacros.h
@@ -493,4 +493,39 @@
 
 #define ACPI_IS_OCTAL_DIGIT(d)              (((char)(d) >= '0') && ((char)(d) <= '7'))
 
+/*
+ * Macors used for the ASL-/ASL+ converter utility
+ */
+#ifdef ACPI_ASL_COMPILER
+
+#define ASL_CV_LABEL_FILENODE(a)         cv_label_file_node(a);
+#define ASL_CV_CAPTURE_COMMENTS_ONLY(a)   cv_capture_comments_only (a);
+#define ASL_CV_CAPTURE_COMMENTS(a)       cv_capture_comments (a);
+#define ASL_CV_TRANSFER_COMMENTS(a)      cv_transfer_comments (a);
+#define ASL_CV_CLOSE_PAREN(a,b)          cv_close_paren_write_comment(a,b);
+#define ASL_CV_CLOSE_BRACE(a,b)          cv_close_brace_write_comment(a,b);
+#define ASL_CV_SWITCH_FILES(a,b)         cv_switch_files(a,b);
+#define ASL_CV_CLEAR_OP_COMMENTS(a)       cv_clear_op_comments(a);
+#define ASL_CV_PRINT_ONE_COMMENT(a,b,c,d) cv_print_one_comment_type (a,b,c,d);
+#define ASL_CV_PRINT_ONE_COMMENT_LIST(a,b) cv_print_one_comment_list (a,b);
+#define ASL_CV_FILE_HAS_SWITCHED(a)       cv_file_has_switched(a)
+#define ASL_CV_INIT_FILETREE(a,b,c)      cv_init_file_tree(a,b,c);
+
+#else
+
+#define ASL_CV_LABEL_FILENODE(a)
+#define ASL_CV_CAPTURE_COMMENTS_ONLY(a)
+#define ASL_CV_CAPTURE_COMMENTS(a)
+#define ASL_CV_TRANSFER_COMMENTS(a)
+#define ASL_CV_CLOSE_PAREN(a,b)          acpi_os_printf (")");
+#define ASL_CV_CLOSE_BRACE(a,b)          acpi_os_printf ("}");
+#define ASL_CV_SWITCH_FILES(a,b)
+#define ASL_CV_CLEAR_OP_COMMENTS(a)
+#define ASL_CV_PRINT_ONE_COMMENT(a,b,c,d)
+#define ASL_CV_PRINT_ONE_COMMENT_LIST(a,b)
+#define ASL_CV_FILE_HAS_SWITCHED(a)       0
+#define ASL_CV_INIT_FILETREE(a,b,c)
+
+#endif
+
 #endif				/* ACMACROS_H */
diff --git a/drivers/acpi/acpica/acopcode.h b/drivers/acpi/acpica/acopcode.h
index e758f098ff4b..a5d9af758c52 100644
--- a/drivers/acpi/acpica/acopcode.h
+++ b/drivers/acpi/acpica/acopcode.h
@@ -90,6 +90,7 @@
 #define ARGP_BUFFER_OP                  ARGP_LIST3 (ARGP_PKGLENGTH,  ARGP_TERMARG,       ARGP_BYTELIST)
 #define ARGP_BYTE_OP                    ARGP_LIST1 (ARGP_BYTEDATA)
 #define ARGP_BYTELIST_OP                ARGP_LIST1 (ARGP_NAMESTRING)
+#define ARGP_COMMENT_OP                 ARGP_LIST2 (ARGP_BYTEDATA,   ARGP_COMMENT)
 #define ARGP_CONCAT_OP                  ARGP_LIST3 (ARGP_TERMARG,    ARGP_TERMARG,       ARGP_TARGET)
 #define ARGP_CONCAT_RES_OP              ARGP_LIST3 (ARGP_TERMARG,    ARGP_TERMARG,       ARGP_TARGET)
 #define ARGP_COND_REF_OF_OP             ARGP_LIST2 (ARGP_SIMPLENAME, ARGP_TARGET)
@@ -223,6 +224,7 @@
 #define ARGI_BUFFER_OP                  ARGI_LIST1 (ARGI_INTEGER)
 #define ARGI_BYTE_OP                    ARGI_INVALID_OPCODE
 #define ARGI_BYTELIST_OP                ARGI_INVALID_OPCODE
+#define ARGI_COMMENT_OP                 ARGI_INVALID_OPCODE
 #define ARGI_CONCAT_OP                  ARGI_LIST3 (ARGI_ANYTYPE,    ARGI_ANYTYPE,       ARGI_TARGETREF)
 #define ARGI_CONCAT_RES_OP              ARGI_LIST3 (ARGI_BUFFER,     ARGI_BUFFER,        ARGI_TARGETREF)
 #define ARGI_COND_REF_OF_OP             ARGI_LIST2 (ARGI_OBJECT_REF, ARGI_TARGETREF)
diff --git a/drivers/acpi/acpica/amlcode.h b/drivers/acpi/acpica/amlcode.h
index b536fd471292..176f7e9b4d0e 100644
--- a/drivers/acpi/acpica/amlcode.h
+++ b/drivers/acpi/acpica/amlcode.h
@@ -48,11 +48,8 @@
 
 /* primary opcodes */
 
-#define AML_NULL_CHAR               (u16) 0x00
-
 #define AML_ZERO_OP                 (u16) 0x00
 #define AML_ONE_OP                  (u16) 0x01
-#define AML_UNASSIGNED              (u16) 0x02
 #define AML_ALIAS_OP                (u16) 0x06
 #define AML_NAME_OP                 (u16) 0x08
 #define AML_BYTE_OP                 (u16) 0x0a
@@ -63,17 +60,15 @@
 #define AML_SCOPE_OP                (u16) 0x10
 #define AML_BUFFER_OP               (u16) 0x11
 #define AML_PACKAGE_OP              (u16) 0x12
-#define AML_VAR_PACKAGE_OP          (u16) 0x13	/* ACPI 2.0 */
+#define AML_VARIABLE_PACKAGE_OP     (u16) 0x13	/* ACPI 2.0 */
 #define AML_METHOD_OP               (u16) 0x14
 #define AML_EXTERNAL_OP             (u16) 0x15	/* ACPI 6.0 */
 #define AML_DUAL_NAME_PREFIX        (u16) 0x2e
-#define AML_MULTI_NAME_PREFIX_OP    (u16) 0x2f
-#define AML_NAME_CHAR_SUBSEQ        (u16) 0x30
-#define AML_NAME_CHAR_FIRST         (u16) 0x41
-#define AML_EXTENDED_OP_PREFIX      (u16) 0x5b
+#define AML_MULTI_NAME_PREFIX       (u16) 0x2f
+#define AML_EXTENDED_PREFIX         (u16) 0x5b
 #define AML_ROOT_PREFIX             (u16) 0x5c
 #define AML_PARENT_PREFIX           (u16) 0x5e
-#define AML_LOCAL_OP                (u16) 0x60
+#define AML_FIRST_LOCAL_OP          (u16) 0x60	/* Used for Local op # calculations */
 #define AML_LOCAL0                  (u16) 0x60
 #define AML_LOCAL1                  (u16) 0x61
 #define AML_LOCAL2                  (u16) 0x62
@@ -82,7 +77,7 @@
 #define AML_LOCAL5                  (u16) 0x65
 #define AML_LOCAL6                  (u16) 0x66
 #define AML_LOCAL7                  (u16) 0x67
-#define AML_ARG_OP                  (u16) 0x68
+#define AML_FIRST_ARG_OP            (u16) 0x68	/* Used for Arg op # calculations */
 #define AML_ARG0                    (u16) 0x68
 #define AML_ARG1                    (u16) 0x69
 #define AML_ARG2                    (u16) 0x6a
@@ -93,7 +88,7 @@
 #define AML_STORE_OP                (u16) 0x70
 #define AML_REF_OF_OP               (u16) 0x71
 #define AML_ADD_OP                  (u16) 0x72
-#define AML_CONCAT_OP               (u16) 0x73
+#define AML_CONCATENATE_OP          (u16) 0x73
 #define AML_SUBTRACT_OP             (u16) 0x74
 #define AML_INCREMENT_OP            (u16) 0x75
 #define AML_DECREMENT_OP            (u16) 0x76
@@ -110,7 +105,7 @@
 #define AML_FIND_SET_LEFT_BIT_OP    (u16) 0x81
 #define AML_FIND_SET_RIGHT_BIT_OP   (u16) 0x82
 #define AML_DEREF_OF_OP             (u16) 0x83
-#define AML_CONCAT_RES_OP           (u16) 0x84	/* ACPI 2.0 */
+#define AML_CONCATENATE_TEMPLATE_OP (u16) 0x84	/* ACPI 2.0 */
 #define AML_MOD_OP                  (u16) 0x85	/* ACPI 2.0 */
 #define AML_NOTIFY_OP               (u16) 0x86
 #define AML_SIZE_OF_OP              (u16) 0x87
@@ -122,18 +117,18 @@
 #define AML_CREATE_BIT_FIELD_OP     (u16) 0x8d
 #define AML_OBJECT_TYPE_OP          (u16) 0x8e
 #define AML_CREATE_QWORD_FIELD_OP   (u16) 0x8f	/* ACPI 2.0 */
-#define AML_LAND_OP                 (u16) 0x90
-#define AML_LOR_OP                  (u16) 0x91
-#define AML_LNOT_OP                 (u16) 0x92
-#define AML_LEQUAL_OP               (u16) 0x93
-#define AML_LGREATER_OP             (u16) 0x94
-#define AML_LLESS_OP                (u16) 0x95
+#define AML_LOGICAL_AND_OP          (u16) 0x90
+#define AML_LOGICAL_OR_OP           (u16) 0x91
+#define AML_LOGICAL_NOT_OP          (u16) 0x92
+#define AML_LOGICAL_EQUAL_OP        (u16) 0x93
+#define AML_LOGICAL_GREATER_OP      (u16) 0x94
+#define AML_LOGICAL_LESS_OP         (u16) 0x95
 #define AML_TO_BUFFER_OP            (u16) 0x96	/* ACPI 2.0 */
-#define AML_TO_DECSTRING_OP         (u16) 0x97	/* ACPI 2.0 */
-#define AML_TO_HEXSTRING_OP         (u16) 0x98	/* ACPI 2.0 */
+#define AML_TO_DECIMAL_STRING_OP    (u16) 0x97	/* ACPI 2.0 */
+#define AML_TO_HEX_STRING_OP        (u16) 0x98	/* ACPI 2.0 */
 #define AML_TO_INTEGER_OP           (u16) 0x99	/* ACPI 2.0 */
 #define AML_TO_STRING_OP            (u16) 0x9c	/* ACPI 2.0 */
-#define AML_COPY_OP                 (u16) 0x9d	/* ACPI 2.0 */
+#define AML_COPY_OBJECT_OP          (u16) 0x9d	/* ACPI 2.0 */
 #define AML_MID_OP                  (u16) 0x9e	/* ACPI 2.0 */
 #define AML_CONTINUE_OP             (u16) 0x9f	/* ACPI 2.0 */
 #define AML_IF_OP                   (u16) 0xa0
@@ -142,18 +137,27 @@
 #define AML_NOOP_OP                 (u16) 0xa3
 #define AML_RETURN_OP               (u16) 0xa4
 #define AML_BREAK_OP                (u16) 0xa5
-#define AML_BREAK_POINT_OP          (u16) 0xcc
+#define AML_COMMENT_OP              (u16) 0xa9
+#define AML_BREAKPOINT_OP          (u16) 0xcc
 #define AML_ONES_OP                 (u16) 0xff
 
-/* prefixed opcodes */
+/*
+ * Combination opcodes (actually two one-byte opcodes)
+ * Used by the disassembler and iASL compiler
+ */
+#define AML_LOGICAL_GREATER_EQUAL_OP (u16) 0x9295	/* LNot (LLess) */
+#define AML_LOGICAL_LESS_EQUAL_OP    (u16) 0x9294	/* LNot (LGreater) */
+#define AML_LOGICAL_NOT_EQUAL_OP     (u16) 0x9293	/* LNot (LEqual) */
+
+/* Prefixed (2-byte) opcodes (with AML_EXTENDED_PREFIX) */
 
-#define AML_EXTENDED_OPCODE         (u16) 0x5b00	/* prefix for 2-byte opcodes */
+#define AML_EXTENDED_OPCODE         (u16) 0x5b00	/* Prefix for 2-byte opcodes */
 
 #define AML_MUTEX_OP                (u16) 0x5b01
 #define AML_EVENT_OP                (u16) 0x5b02
-#define AML_SHIFT_RIGHT_BIT_OP      (u16) 0x5b10
-#define AML_SHIFT_LEFT_BIT_OP       (u16) 0x5b11
-#define AML_COND_REF_OF_OP          (u16) 0x5b12
+#define AML_SHIFT_RIGHT_BIT_OP      (u16) 0x5b10	/* Obsolete, not in ACPI spec */
+#define AML_SHIFT_LEFT_BIT_OP       (u16) 0x5b11	/* Obsolete, not in ACPI spec */
+#define AML_CONDITIONAL_REF_OF_OP   (u16) 0x5b12
 #define AML_CREATE_FIELD_OP         (u16) 0x5b13
 #define AML_LOAD_TABLE_OP           (u16) 0x5b1f	/* ACPI 2.0 */
 #define AML_LOAD_OP                 (u16) 0x5b20
@@ -175,21 +179,13 @@
 #define AML_FIELD_OP                (u16) 0x5b81
 #define AML_DEVICE_OP               (u16) 0x5b82
 #define AML_PROCESSOR_OP            (u16) 0x5b83
-#define AML_POWER_RES_OP            (u16) 0x5b84
+#define AML_POWER_RESOURCE_OP       (u16) 0x5b84
 #define AML_THERMAL_ZONE_OP         (u16) 0x5b85
 #define AML_INDEX_FIELD_OP          (u16) 0x5b86
 #define AML_BANK_FIELD_OP           (u16) 0x5b87
 #define AML_DATA_REGION_OP          (u16) 0x5b88	/* ACPI 2.0 */
 
 /*
- * Combination opcodes (actually two one-byte opcodes)
- * Used by the disassembler and iASL compiler
- */
-#define AML_LGREATEREQUAL_OP        (u16) 0x9295
-#define AML_LLESSEQUAL_OP           (u16) 0x9294
-#define AML_LNOTEQUAL_OP            (u16) 0x9293
-
-/*
  * Opcodes for "Field" operators
  */
 #define AML_FIELD_OFFSET_OP         (u8) 0x00
@@ -241,6 +237,7 @@
 #define ARGP_SIMPLENAME             0x12	/* name_string | local_term | arg_term */
 #define ARGP_NAME_OR_REF            0x13	/* For object_type only */
 #define ARGP_MAX                    0x13
+#define ARGP_COMMENT                0x14
 
 /*
  * Resolved argument types for the AML Interpreter
@@ -308,24 +305,19 @@
 #define ARGI_INVALID_OPCODE         0xFFFFFFFF
 
 /*
- * hash offsets
- */
-#define AML_EXTOP_HASH_OFFSET       22
-#define AML_LNOT_HASH_OFFSET        19
-
-/*
- * opcode groups and types
+ * Some of the flags and types below are of the form:
+ *
+ * AML_FLAGS_EXEC_#A_#T,#R, or
+ * AML_TYPE_EXEC_#A_#T,#R where:
+ *
+ *      #A is the number of required arguments
+ *      #T is the number of target operands
+ *      #R indicates whether there is a return value
  */
-#define OPGRP_NAMED                 0x01
-#define OPGRP_FIELD                 0x02
-#define OPGRP_BYTELIST              0x04
 
 /*
- * Opcode information
+ * Opcode information flags
  */
-
-/* Opcode flags */
-
 #define AML_LOGICAL                 0x0001
 #define AML_LOGICAL_NUMERIC         0x0002
 #define AML_MATH                    0x0004
@@ -342,7 +334,7 @@
 #define AML_CONSTANT                0x2000
 #define AML_NO_OPERAND_RESOLVE      0x4000
 
-/* Convenient flag groupings */
+/* Convenient flag groupings of the flags above */
 
 #define AML_FLAGS_EXEC_0A_0T_1R                                     AML_HAS_RETVAL
 #define AML_FLAGS_EXEC_1A_0T_0R     AML_HAS_ARGS	/* Monadic1  */
@@ -359,7 +351,7 @@
 
 /*
  * The opcode Type is used in a dispatch table, do not change
- * without updating the table.
+ * or add anything new without updating the table.
  */
 #define AML_TYPE_EXEC_0A_0T_1R      0x00
 #define AML_TYPE_EXEC_1A_0T_0R      0x01	/* Monadic1  */
@@ -385,7 +377,7 @@
 
 #define AML_TYPE_METHOD_CALL        0x10
 
-/* Misc */
+/* Miscellaneous types */
 
 #define AML_TYPE_CREATE_FIELD       0x11
 #define AML_TYPE_CREATE_OBJECT      0x12
@@ -395,7 +387,6 @@
 #define AML_TYPE_NAMED_SIMPLE       0x16
 #define AML_TYPE_NAMED_COMPLEX      0x17
 #define AML_TYPE_RETURN             0x18
-
 #define AML_TYPE_UNDEFINED          0x19
 #define AML_TYPE_BOGUS              0x1A
 
diff --git a/drivers/acpi/acpica/dbmethod.c b/drivers/acpi/acpica/dbmethod.c
index 15c8237b8a80..df62c9245efc 100644
--- a/drivers/acpi/acpica/dbmethod.c
+++ b/drivers/acpi/acpica/dbmethod.c
@@ -422,6 +422,7 @@ acpi_db_walk_for_execute(acpi_handle obj_handle,
 
 	status = acpi_get_object_info(obj_handle, &obj_info);
 	if (ACPI_FAILURE(status)) {
+		ACPI_FREE(pathname);
 		return (status);
 	}
 
diff --git a/drivers/acpi/acpica/dbxface.c b/drivers/acpi/acpica/dbxface.c
index 205b8e0eded5..8f665d94b8b5 100644
--- a/drivers/acpi/acpica/dbxface.c
+++ b/drivers/acpi/acpica/dbxface.c
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "amlcode.h"
 #include "acdebug.h"
+#include "acinterp.h"
 
 #define _COMPONENT          ACPI_CA_DEBUGGER
 ACPI_MODULE_NAME("dbxface")
@@ -125,7 +126,7 @@ error_exit:
  *
  * RETURN:      Status
  *
- * DESCRIPTION: Called for AML_BREAK_POINT_OP
+ * DESCRIPTION: Called for AML_BREAKPOINT_OP
  *
  ******************************************************************************/
 
@@ -368,7 +369,9 @@ acpi_db_single_step(struct acpi_walk_state *walk_state,
 		walk_state->method_breakpoint = 1;	/* Must be non-zero! */
 	}
 
+	acpi_ex_exit_interpreter();
 	status = acpi_db_start_command(walk_state, op);
+	acpi_ex_enter_interpreter();
 
 	/* User commands complete, continue execution of the interrupted method */
 
diff --git a/drivers/acpi/acpica/dscontrol.c b/drivers/acpi/acpica/dscontrol.c
index d31b49feaa79..f470e81b0499 100644
--- a/drivers/acpi/acpica/dscontrol.c
+++ b/drivers/acpi/acpica/dscontrol.c
@@ -347,7 +347,7 @@ acpi_ds_exec_end_control_op(struct acpi_walk_state *walk_state,
 
 		break;
 
-	case AML_BREAK_POINT_OP:
+	case AML_BREAKPOINT_OP:
 
 		acpi_db_signal_break_point(walk_state);
 
diff --git a/drivers/acpi/acpica/dsmthdat.c b/drivers/acpi/acpica/dsmthdat.c
index adcc72cd53a7..27a7de95f7b0 100644
--- a/drivers/acpi/acpica/dsmthdat.c
+++ b/drivers/acpi/acpica/dsmthdat.c
@@ -672,7 +672,8 @@ acpi_ds_store_object_to_local(u8 type,
  *
  * FUNCTION:    acpi_ds_method_data_get_type
  *
- * PARAMETERS:  opcode              - Either AML_LOCAL_OP or AML_ARG_OP
+ * PARAMETERS:  opcode              - Either AML_FIRST LOCAL_OP or
+ *                                    AML_FIRST_ARG_OP
  *              index               - Which Local or Arg whose type to get
  *              walk_state          - Current walk state object
  *
diff --git a/drivers/acpi/acpica/dsobject.c b/drivers/acpi/acpica/dsobject.c
index 8deaa16493a0..7df3152ed856 100644
--- a/drivers/acpi/acpica/dsobject.c
+++ b/drivers/acpi/acpica/dsobject.c
@@ -114,7 +114,7 @@ acpi_ds_build_internal_object(struct acpi_walk_state *walk_state,
 				    ((op->common.parent->common.aml_opcode ==
 				      AML_PACKAGE_OP)
 				     || (op->common.parent->common.aml_opcode ==
-					 AML_VAR_PACKAGE_OP))) {
+					 AML_VARIABLE_PACKAGE_OP))) {
 					/*
 					 * We didn't find the target and we are populating elements
 					 * of a package - ignore if slack enabled. Some ASL code
@@ -144,7 +144,7 @@ acpi_ds_build_internal_object(struct acpi_walk_state *walk_state,
 
 		if ((op->common.parent->common.aml_opcode == AML_PACKAGE_OP) ||
 		    (op->common.parent->common.aml_opcode ==
-		     AML_VAR_PACKAGE_OP)) {
+		     AML_VARIABLE_PACKAGE_OP)) {
 			/*
 			 * Attempt to resolve the node to a value before we insert it into
 			 * the package. If this is a reference to a common data type,
@@ -398,7 +398,7 @@ acpi_ds_build_internal_package_obj(struct acpi_walk_state *walk_state,
 
 	parent = op->common.parent;
 	while ((parent->common.aml_opcode == AML_PACKAGE_OP) ||
-	       (parent->common.aml_opcode == AML_VAR_PACKAGE_OP)) {
+	       (parent->common.aml_opcode == AML_VARIABLE_PACKAGE_OP)) {
 		parent = parent->common.parent;
 	}
 
@@ -769,10 +769,10 @@ acpi_ds_init_object_from_op(struct acpi_walk_state *walk_state,
 		switch (op_info->type) {
 		case AML_TYPE_LOCAL_VARIABLE:
 
-			/* Local ID (0-7) is (AML opcode - base AML_LOCAL_OP) */
+			/* Local ID (0-7) is (AML opcode - base AML_FIRST_LOCAL_OP) */
 
 			obj_desc->reference.value =
-			    ((u32)opcode) - AML_LOCAL_OP;
+			    ((u32)opcode) - AML_FIRST_LOCAL_OP;
 			obj_desc->reference.class = ACPI_REFCLASS_LOCAL;
 
 #ifndef ACPI_NO_METHOD_EXECUTION
@@ -790,9 +790,10 @@ acpi_ds_init_object_from_op(struct acpi_walk_state *walk_state,
 
 		case AML_TYPE_METHOD_ARGUMENT:
 
-			/* Arg ID (0-6) is (AML opcode - base AML_ARG_OP) */
+			/* Arg ID (0-6) is (AML opcode - base AML_FIRST_ARG_OP) */
 
-			obj_desc->reference.value = ((u32)opcode) - AML_ARG_OP;
+			obj_desc->reference.value =
+			    ((u32)opcode) - AML_FIRST_ARG_OP;
 			obj_desc->reference.class = ACPI_REFCLASS_ARG;
 
 #ifndef ACPI_NO_METHOD_EXECUTION
diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c
index 148523205d41..9a8f8a992b3e 100644
--- a/drivers/acpi/acpica/dsopcode.c
+++ b/drivers/acpi/acpica/dsopcode.c
@@ -639,7 +639,7 @@ acpi_ds_eval_data_object_operands(struct acpi_walk_state *walk_state,
 		break;
 
 	case AML_PACKAGE_OP:
-	case AML_VAR_PACKAGE_OP:
+	case AML_VARIABLE_PACKAGE_OP:
 
 		status =
 		    acpi_ds_build_internal_package_obj(walk_state, op, length,
@@ -660,7 +660,7 @@ acpi_ds_eval_data_object_operands(struct acpi_walk_state *walk_state,
 		if ((!op->common.parent) ||
 		    ((op->common.parent->common.aml_opcode != AML_PACKAGE_OP) &&
 		     (op->common.parent->common.aml_opcode !=
-		      AML_VAR_PACKAGE_OP)
+		      AML_VARIABLE_PACKAGE_OP)
 		     && (op->common.parent->common.aml_opcode !=
 			 AML_NAME_OP))) {
 			walk_state->result_obj = obj_desc;
diff --git a/drivers/acpi/acpica/dsutils.c b/drivers/acpi/acpica/dsutils.c
index 049fbab4e5a6..406edec20de7 100644
--- a/drivers/acpi/acpica/dsutils.c
+++ b/drivers/acpi/acpica/dsutils.c
@@ -275,10 +275,10 @@ acpi_ds_is_result_used(union acpi_parse_object * op,
 		if ((op->common.parent->common.aml_opcode == AML_REGION_OP) ||
 		    (op->common.parent->common.aml_opcode == AML_DATA_REGION_OP)
 		    || (op->common.parent->common.aml_opcode == AML_PACKAGE_OP)
-		    || (op->common.parent->common.aml_opcode ==
-			AML_VAR_PACKAGE_OP)
 		    || (op->common.parent->common.aml_opcode == AML_BUFFER_OP)
 		    || (op->common.parent->common.aml_opcode ==
+			AML_VARIABLE_PACKAGE_OP)
+		    || (op->common.parent->common.aml_opcode ==
 			AML_INT_EVAL_SUBTREE_OP)
 		    || (op->common.parent->common.aml_opcode ==
 			AML_BANK_FIELD_OP)) {
@@ -551,7 +551,7 @@ acpi_ds_create_operand(struct acpi_walk_state *walk_state,
 			 */
 			if (status == AE_NOT_FOUND) {
 				if (parent_op->common.aml_opcode ==
-				    AML_COND_REF_OF_OP) {
+				    AML_CONDITIONAL_REF_OF_OP) {
 					/*
 					 * For the Conditional Reference op, it's OK if
 					 * the name is not found;  We just need a way to
@@ -806,7 +806,7 @@ acpi_status acpi_ds_evaluate_name_path(struct acpi_walk_state *walk_state)
 	}
 
 	if ((op->common.parent->common.aml_opcode == AML_PACKAGE_OP) ||
-	    (op->common.parent->common.aml_opcode == AML_VAR_PACKAGE_OP) ||
+	    (op->common.parent->common.aml_opcode == AML_VARIABLE_PACKAGE_OP) ||
 	    (op->common.parent->common.aml_opcode == AML_REF_OF_OP)) {
 
 		/* TBD: Should we specify this feature as a bit of op_info->Flags of these opcodes? */
diff --git a/drivers/acpi/acpica/dswexec.c b/drivers/acpi/acpica/dswexec.c
index 78f8e6a4f72f..a2ff8ad70d58 100644
--- a/drivers/acpi/acpica/dswexec.c
+++ b/drivers/acpi/acpica/dswexec.c
@@ -497,7 +497,7 @@ acpi_status acpi_ds_exec_end_op(struct acpi_walk_state *walk_state)
 			if ((op->asl.parent) &&
 			    ((op->asl.parent->asl.aml_opcode == AML_PACKAGE_OP)
 			     || (op->asl.parent->asl.aml_opcode ==
-				 AML_VAR_PACKAGE_OP))) {
+				 AML_VARIABLE_PACKAGE_OP))) {
 				ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH,
 						  "Method Reference in a Package, Op=%p\n",
 						  op));
diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c
index 44d4553dfbdd..8d510c7e20c8 100644
--- a/drivers/acpi/acpica/dswload2.c
+++ b/drivers/acpi/acpica/dswload2.c
@@ -528,7 +528,7 @@ acpi_status acpi_ds_load2_end_op(struct acpi_walk_state *walk_state)
 			status = acpi_ex_create_processor(walk_state);
 			break;
 
-		case AML_POWER_RES_OP:
+		case AML_POWER_RESOURCE_OP:
 
 			status = acpi_ex_create_power_resource(walk_state);
 			break;
diff --git a/drivers/acpi/acpica/exmisc.c b/drivers/acpi/acpica/exmisc.c
index 1a6f59079ea5..f222a80ca38e 100644
--- a/drivers/acpi/acpica/exmisc.c
+++ b/drivers/acpi/acpica/exmisc.c
@@ -249,14 +249,14 @@ acpi_ex_do_logical_numeric_op(u16 opcode,
 	ACPI_FUNCTION_TRACE(ex_do_logical_numeric_op);
 
 	switch (opcode) {
-	case AML_LAND_OP:	/* LAnd (Integer0, Integer1) */
+	case AML_LOGICAL_AND_OP:	/* LAnd (Integer0, Integer1) */
 
 		if (integer0 && integer1) {
 			local_result = TRUE;
 		}
 		break;
 
-	case AML_LOR_OP:	/* LOr (Integer0, Integer1) */
+	case AML_LOGICAL_OR_OP:	/* LOr (Integer0, Integer1) */
 
 		if (integer0 || integer1) {
 			local_result = TRUE;
@@ -365,21 +365,21 @@ acpi_ex_do_logical_op(u16 opcode,
 		integer1 = local_operand1->integer.value;
 
 		switch (opcode) {
-		case AML_LEQUAL_OP:	/* LEqual (Operand0, Operand1) */
+		case AML_LOGICAL_EQUAL_OP:	/* LEqual (Operand0, Operand1) */
 
 			if (integer0 == integer1) {
 				local_result = TRUE;
 			}
 			break;
 
-		case AML_LGREATER_OP:	/* LGreater (Operand0, Operand1) */
+		case AML_LOGICAL_GREATER_OP:	/* LGreater (Operand0, Operand1) */
 
 			if (integer0 > integer1) {
 				local_result = TRUE;
 			}
 			break;
 
-		case AML_LLESS_OP:	/* LLess (Operand0, Operand1) */
+		case AML_LOGICAL_LESS_OP:	/* LLess (Operand0, Operand1) */
 
 			if (integer0 < integer1) {
 				local_result = TRUE;
@@ -408,7 +408,7 @@ acpi_ex_do_logical_op(u16 opcode,
 				 (length0 > length1) ? length1 : length0);
 
 		switch (opcode) {
-		case AML_LEQUAL_OP:	/* LEqual (Operand0, Operand1) */
+		case AML_LOGICAL_EQUAL_OP:	/* LEqual (Operand0, Operand1) */
 
 			/* Length and all bytes must be equal */
 
@@ -420,7 +420,7 @@ acpi_ex_do_logical_op(u16 opcode,
 			}
 			break;
 
-		case AML_LGREATER_OP:	/* LGreater (Operand0, Operand1) */
+		case AML_LOGICAL_GREATER_OP:	/* LGreater (Operand0, Operand1) */
 
 			if (compare > 0) {
 				local_result = TRUE;
@@ -437,7 +437,7 @@ acpi_ex_do_logical_op(u16 opcode,
 			}
 			break;
 
-		case AML_LLESS_OP:	/* LLess (Operand0, Operand1) */
+		case AML_LOGICAL_LESS_OP:	/* LLess (Operand0, Operand1) */
 
 			if (compare > 0) {
 				goto cleanup;	/* FALSE */
diff --git a/drivers/acpi/acpica/exnames.c b/drivers/acpi/acpica/exnames.c
index ee7b62a86661..caa5ed1f65ec 100644
--- a/drivers/acpi/acpica/exnames.c
+++ b/drivers/acpi/acpica/exnames.c
@@ -122,7 +122,7 @@ static char *acpi_ex_allocate_name_string(u32 prefix_count, u32 num_name_segs)
 
 		/* Set up multi prefixes   */
 
-		*temp_ptr++ = AML_MULTI_NAME_PREFIX_OP;
+		*temp_ptr++ = AML_MULTI_NAME_PREFIX;
 		*temp_ptr++ = (char)num_name_segs;
 	} else if (2 == num_name_segs) {
 
@@ -342,7 +342,7 @@ acpi_ex_get_name_string(acpi_object_type data_type,
 			}
 			break;
 
-		case AML_MULTI_NAME_PREFIX_OP:
+		case AML_MULTI_NAME_PREFIX:
 
 			ACPI_DEBUG_PRINT((ACPI_DB_LOAD,
 					  "MultiNamePrefix at %p\n",
diff --git a/drivers/acpi/acpica/exoparg1.c b/drivers/acpi/acpica/exoparg1.c
index af73fcde7e5c..e327349675cd 100644
--- a/drivers/acpi/acpica/exoparg1.c
+++ b/drivers/acpi/acpica/exoparg1.c
@@ -274,7 +274,7 @@ acpi_status acpi_ex_opcode_1A_1T_1R(struct acpi_walk_state *walk_state)
 	case AML_FIND_SET_RIGHT_BIT_OP:
 	case AML_FROM_BCD_OP:
 	case AML_TO_BCD_OP:
-	case AML_COND_REF_OF_OP:
+	case AML_CONDITIONAL_REF_OF_OP:
 
 		/* Create a return object of type Integer for these opcodes */
 
@@ -405,7 +405,7 @@ acpi_status acpi_ex_opcode_1A_1T_1R(struct acpi_walk_state *walk_state)
 			}
 			break;
 
-		case AML_COND_REF_OF_OP:	/* cond_ref_of (source_object, Result) */
+		case AML_CONDITIONAL_REF_OF_OP:	/* cond_ref_of (source_object, Result) */
 			/*
 			 * This op is a little strange because the internal return value is
 			 * different than the return value stored in the result descriptor
@@ -475,14 +475,14 @@ acpi_status acpi_ex_opcode_1A_1T_1R(struct acpi_walk_state *walk_state)
 		/*
 		 * ACPI 2.0 Opcodes
 		 */
-	case AML_COPY_OP:	/* Copy (Source, Target) */
+	case AML_COPY_OBJECT_OP:	/* copy_object (Source, Target) */
 
 		status =
 		    acpi_ut_copy_iobject_to_iobject(operand[0], &return_desc,
 						    walk_state);
 		break;
 
-	case AML_TO_DECSTRING_OP:	/* to_decimal_string (Data, Result) */
+	case AML_TO_DECIMAL_STRING_OP:	/* to_decimal_string (Data, Result) */
 
 		status =
 		    acpi_ex_convert_to_string(operand[0], &return_desc,
@@ -495,7 +495,7 @@ acpi_status acpi_ex_opcode_1A_1T_1R(struct acpi_walk_state *walk_state)
 		}
 		break;
 
-	case AML_TO_HEXSTRING_OP:	/* to_hex_string (Data, Result) */
+	case AML_TO_HEX_STRING_OP:	/* to_hex_string (Data, Result) */
 
 		status =
 		    acpi_ex_convert_to_string(operand[0], &return_desc,
@@ -603,7 +603,7 @@ acpi_status acpi_ex_opcode_1A_0T_1R(struct acpi_walk_state *walk_state)
 	/* Examine the AML opcode */
 
 	switch (walk_state->opcode) {
-	case AML_LNOT_OP:	/* LNot (Operand) */
+	case AML_LOGICAL_NOT_OP:	/* LNot (Operand) */
 
 		return_desc = acpi_ut_create_integer_object((u64) 0);
 		if (!return_desc) {
@@ -652,9 +652,8 @@ acpi_status acpi_ex_opcode_1A_0T_1R(struct acpi_walk_state *walk_state)
 		 * NOTE:  We use LNOT_OP here in order to force resolution of the
 		 * reference operand to an actual integer.
 		 */
-		status =
-		    acpi_ex_resolve_operands(AML_LNOT_OP, &temp_desc,
-					     walk_state);
+		status = acpi_ex_resolve_operands(AML_LOGICAL_NOT_OP,
+						  &temp_desc, walk_state);
 		if (ACPI_FAILURE(status)) {
 			ACPI_EXCEPTION((AE_INFO, status,
 					"While resolving operands for [%s]",
diff --git a/drivers/acpi/acpica/exoparg2.c b/drivers/acpi/acpica/exoparg2.c
index 44ecba50c0da..eecb3bff7fd7 100644
--- a/drivers/acpi/acpica/exoparg2.c
+++ b/drivers/acpi/acpica/exoparg2.c
@@ -298,7 +298,7 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state)
 					NULL, &return_desc->integer.value);
 		break;
 
-	case AML_CONCAT_OP:	/* Concatenate (Data1, Data2, Result) */
+	case AML_CONCATENATE_OP:	/* Concatenate (Data1, Data2, Result) */
 
 		status =
 		    acpi_ex_do_concatenate(operand[0], operand[1], &return_desc,
@@ -343,7 +343,7 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state)
 		       operand[0]->buffer.pointer, length);
 		break;
 
-	case AML_CONCAT_RES_OP:
+	case AML_CONCATENATE_TEMPLATE_OP:
 
 		/* concatenate_res_template (Buffer, Buffer, Result) (ACPI 2.0) */
 
diff --git a/drivers/acpi/acpica/exoparg6.c b/drivers/acpi/acpica/exoparg6.c
index 31e4df97cbe1..688032b58a21 100644
--- a/drivers/acpi/acpica/exoparg6.c
+++ b/drivers/acpi/acpica/exoparg6.c
@@ -124,8 +124,8 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:     (M == P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LEQUAL_OP, match_obj, package_obj,
-					  &logical_result);
+		    acpi_ex_do_logical_op(AML_LOGICAL_EQUAL_OP, match_obj,
+					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
 		}
@@ -137,8 +137,8 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:                  (M >= P[i]) (M not_less than P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LLESS_OP, match_obj, package_obj,
-					  &logical_result);
+		    acpi_ex_do_logical_op(AML_LOGICAL_LESS_OP, match_obj,
+					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
 		}
@@ -151,7 +151,7 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:         (M > P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LGREATER_OP, match_obj,
+		    acpi_ex_do_logical_op(AML_LOGICAL_GREATER_OP, match_obj,
 					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
@@ -164,7 +164,7 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:                     (M <= P[i]) (M not_greater than P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LGREATER_OP, match_obj,
+		    acpi_ex_do_logical_op(AML_LOGICAL_GREATER_OP, match_obj,
 					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
@@ -178,8 +178,8 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:            (M < P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LLESS_OP, match_obj, package_obj,
-					  &logical_result);
+		    acpi_ex_do_logical_op(AML_LOGICAL_LESS_OP, match_obj,
+					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
 		}
diff --git a/drivers/acpi/acpica/exresolv.c b/drivers/acpi/acpica/exresolv.c
index 7fecefc2e1b4..aa8c6fd74cc3 100644
--- a/drivers/acpi/acpica/exresolv.c
+++ b/drivers/acpi/acpica/exresolv.c
@@ -196,7 +196,8 @@ acpi_ex_resolve_object_to_value(union acpi_operand_object **stack_ptr,
 
 				if ((walk_state->opcode ==
 				     AML_INT_METHODCALL_OP)
-				    || (walk_state->opcode == AML_COPY_OP)) {
+				    || (walk_state->opcode ==
+					AML_COPY_OBJECT_OP)) {
 					break;
 				}
 
diff --git a/drivers/acpi/acpica/exstore.c b/drivers/acpi/acpica/exstore.c
index a2f8001aeb86..bdd43cde8f36 100644
--- a/drivers/acpi/acpica/exstore.c
+++ b/drivers/acpi/acpica/exstore.c
@@ -416,7 +416,7 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc,
 
 	/* Only limited target types possible for everything except copy_object */
 
-	if (walk_state->opcode != AML_COPY_OP) {
+	if (walk_state->opcode != AML_COPY_OBJECT_OP) {
 		/*
 		 * Only copy_object allows all object types to be overwritten. For
 		 * target_ref(s), there are restrictions on the object types that
@@ -499,7 +499,8 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc,
 	case ACPI_TYPE_STRING:
 	case ACPI_TYPE_BUFFER:
 
-		if ((walk_state->opcode == AML_COPY_OP) || !implicit_conversion) {
+		if ((walk_state->opcode == AML_COPY_OBJECT_OP) ||
+		    !implicit_conversion) {
 			/*
 			 * However, copy_object and Stores to arg_x do not perform
 			 * an implicit conversion, as per the ACPI specification.
diff --git a/drivers/acpi/acpica/exstoren.c b/drivers/acpi/acpica/exstoren.c
index 85db4716a043..56f59cf5da29 100644
--- a/drivers/acpi/acpica/exstoren.c
+++ b/drivers/acpi/acpica/exstoren.c
@@ -107,7 +107,7 @@ acpi_ex_resolve_object(union acpi_operand_object **source_desc_ptr,
 
 		/* For copy_object, no further validation necessary */
 
-		if (walk_state->opcode == AML_COPY_OP) {
+		if (walk_state->opcode == AML_COPY_OBJECT_OP) {
 			break;
 		}
 
diff --git a/drivers/acpi/acpica/hwvalid.c b/drivers/acpi/acpica/hwvalid.c
index 531620abed80..3094cec4eab4 100644
--- a/drivers/acpi/acpica/hwvalid.c
+++ b/drivers/acpi/acpica/hwvalid.c
@@ -102,7 +102,7 @@ static const struct acpi_port_info acpi_protected_ports[] = {
 	{"PCI", 0x0CF8, 0x0CFF, ACPI_OSI_WIN_XP}
 };
 
-#define ACPI_PORT_INFO_ENTRIES  ACPI_ARRAY_LENGTH (acpi_protected_ports)
+#define ACPI_PORT_INFO_ENTRIES      ACPI_ARRAY_LENGTH (acpi_protected_ports)
 
 /******************************************************************************
  *
@@ -128,7 +128,7 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
 	acpi_io_address last_address;
 	const struct acpi_port_info *port_info;
 
-	ACPI_FUNCTION_TRACE(hw_validate_io_request);
+	ACPI_FUNCTION_NAME(hw_validate_io_request);
 
 	/* Supported widths are 8/16/32 */
 
@@ -153,13 +153,13 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
 		ACPI_ERROR((AE_INFO,
 			    "Illegal I/O port address/length above 64K: %8.8X%8.8X/0x%X",
 			    ACPI_FORMAT_UINT64(address), byte_width));
-		return_ACPI_STATUS(AE_LIMIT);
+		return (AE_LIMIT);
 	}
 
 	/* Exit if requested address is not within the protected port table */
 
 	if (address > acpi_protected_ports[ACPI_PORT_INFO_ENTRIES - 1].end) {
-		return_ACPI_STATUS(AE_OK);
+		return (AE_OK);
 	}
 
 	/* Check request against the list of protected I/O ports */
@@ -167,7 +167,7 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
 	for (i = 0; i < ACPI_PORT_INFO_ENTRIES; i++, port_info++) {
 		/*
 		 * Check if the requested address range will write to a reserved
-		 * port. Four cases to consider:
+		 * port. There are four cases to consider:
 		 *
 		 * 1) Address range is contained completely in the port address range
 		 * 2) Address range overlaps port range at the port range start
@@ -198,7 +198,7 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
 		}
 	}
 
-	return_ACPI_STATUS(AE_OK);
+	return (AE_OK);
 }
 
 /******************************************************************************
@@ -206,7 +206,7 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
  * FUNCTION:    acpi_hw_read_port
  *
  * PARAMETERS:  Address             Address of I/O port/register to read
- *              Value               Where value is placed
+ *              Value               Where value (data) is returned
  *              Width               Number of bits
  *
  * RETURN:      Status and value read from port
@@ -244,7 +244,7 @@ acpi_status acpi_hw_read_port(acpi_io_address address, u32 *value, u32 width)
 	/*
 	 * There has been a protection violation within the request. Fall
 	 * back to byte granularity port I/O and ignore the failing bytes.
-	 * This provides Windows compatibility.
+	 * This provides compatibility with other ACPI implementations.
 	 */
 	for (i = 0, *value = 0; i < width; i += 8) {
 
@@ -307,7 +307,7 @@ acpi_status acpi_hw_write_port(acpi_io_address address, u32 value, u32 width)
 	/*
 	 * There has been a protection violation within the request. Fall
 	 * back to byte granularity port I/O and ignore the failing bytes.
-	 * This provides Windows compatibility.
+	 * This provides compatibility with other ACPI implementations.
 	 */
 	for (i = 0; i < width; i += 8) {
 
diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c
index 498bb8f70e6b..fb265b5737de 100644
--- a/drivers/acpi/acpica/nsaccess.c
+++ b/drivers/acpi/acpica/nsaccess.c
@@ -485,7 +485,7 @@ acpi_ns_lookup(union acpi_generic_state *scope_info,
 					  flags));
 			break;
 
-		case AML_MULTI_NAME_PREFIX_OP:
+		case AML_MULTI_NAME_PREFIX:
 
 			/* More than one name_seg, search rules do not apply */
 
diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c
index 38316266521e..418ef2ac82ab 100644
--- a/drivers/acpi/acpica/nsrepair.c
+++ b/drivers/acpi/acpica/nsrepair.c
@@ -290,22 +290,12 @@ object_repaired:
 	/* Object was successfully repaired */
 
 	if (package_index != ACPI_NOT_PACKAGE_ELEMENT) {
-		/*
-		 * The original object is a package element. We need to
-		 * decrement the reference count of the original object,
-		 * for removing it from the package.
-		 *
-		 * However, if the original object was just wrapped with a
-		 * package object as part of the repair, we don't need to
-		 * change the reference count.
-		 */
+
+		/* Update reference count of new object */
+
 		if (!(info->return_flags & ACPI_OBJECT_WRAPPED)) {
 			new_object->common.reference_count =
 			    return_object->common.reference_count;
-
-			if (return_object->common.reference_count > 1) {
-				return_object->common.reference_count--;
-			}
 		}
 
 		ACPI_DEBUG_PRINT((ACPI_DB_REPAIR,
diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 352265498e90..06037e044694 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -403,16 +403,12 @@ acpi_ns_repair_CID(struct acpi_evaluate_info *info,
 			return (status);
 		}
 
-		/* Take care with reference counts */
-
 		if (original_element != *element_ptr) {
 
-			/* Element was replaced */
+			/* Update reference count of new object */
 
 			(*element_ptr)->common.reference_count =
 			    original_ref_count;
-
-			acpi_ut_remove_reference(original_element);
 		}
 
 		element_ptr++;
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index 661676714f7b..2fe87d0dd9d5 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -252,7 +252,7 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info)
 			internal_name[1] = AML_DUAL_NAME_PREFIX;
 			result = &internal_name[2];
 		} else {
-			internal_name[1] = AML_MULTI_NAME_PREFIX_OP;
+			internal_name[1] = AML_MULTI_NAME_PREFIX;
 			internal_name[2] = (char)num_segments;
 			result = &internal_name[3];
 		}
@@ -274,7 +274,7 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info)
 			internal_name[i] = AML_DUAL_NAME_PREFIX;
 			result = &internal_name[(acpi_size)i + 1];
 		} else {
-			internal_name[i] = AML_MULTI_NAME_PREFIX_OP;
+			internal_name[i] = AML_MULTI_NAME_PREFIX;
 			internal_name[(acpi_size)i + 1] = (char)num_segments;
 			result = &internal_name[(acpi_size)i + 2];
 		}
@@ -450,7 +450,7 @@ acpi_ns_externalize_name(u32 internal_name_length,
 	 */
 	if (prefix_length < internal_name_length) {
 		switch (internal_name[prefix_length]) {
-		case AML_MULTI_NAME_PREFIX_OP:
+		case AML_MULTI_NAME_PREFIX:
 
 			/* <count> 4-byte names */
 
@@ -594,25 +594,20 @@ struct acpi_namespace_node *acpi_ns_validate_handle(acpi_handle handle)
 void acpi_ns_terminate(void)
 {
 	acpi_status status;
+	union acpi_operand_object *prev;
+	union acpi_operand_object *next;
 
 	ACPI_FUNCTION_TRACE(ns_terminate);
 
-#ifdef ACPI_EXEC_APP
-	{
-		union acpi_operand_object *prev;
-		union acpi_operand_object *next;
+	/* Delete any module-level code blocks */
 
-		/* Delete any module-level code blocks */
-
-		next = acpi_gbl_module_code_list;
-		while (next) {
-			prev = next;
-			next = next->method.mutex;
-			prev->method.mutex = NULL;	/* Clear the Mutex (cheated) field */
-			acpi_ut_remove_reference(prev);
-		}
+	next = acpi_gbl_module_code_list;
+	while (next) {
+		prev = next;
+		next = next->method.mutex;
+		prev->method.mutex = NULL;	/* Clear the Mutex (cheated) field */
+		acpi_ut_remove_reference(prev);
 	}
-#endif
 
 	/*
 	 * Free the entire namespace -- all nodes and all objects
diff --git a/drivers/acpi/acpica/psargs.c b/drivers/acpi/acpica/psargs.c
index 05b62ad44c3e..eb9dfaca555f 100644
--- a/drivers/acpi/acpica/psargs.c
+++ b/drivers/acpi/acpica/psargs.c
@@ -47,6 +47,7 @@
 #include "amlcode.h"
 #include "acnamesp.h"
 #include "acdispat.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("psargs")
@@ -186,7 +187,7 @@ char *acpi_ps_get_next_namestring(struct acpi_parse_state *parser_state)
 		end += 1 + (2 * ACPI_NAME_SIZE);
 		break;
 
-	case AML_MULTI_NAME_PREFIX_OP:
+	case AML_MULTI_NAME_PREFIX:
 
 		/* Multiple name segments, 4 chars each, count in next byte */
 
@@ -339,7 +340,7 @@ acpi_ps_get_next_namepath(struct acpi_walk_state *walk_state,
 		/* 2) not_found during a cond_ref_of(x) is ok by definition */
 
 		else if (walk_state->op->common.aml_opcode ==
-			 AML_COND_REF_OF_OP) {
+			 AML_CONDITIONAL_REF_OF_OP) {
 			status = AE_OK;
 		}
 
@@ -352,7 +353,7 @@ acpi_ps_get_next_namepath(struct acpi_walk_state *walk_state,
 			 ((arg->common.parent->common.aml_opcode ==
 			   AML_PACKAGE_OP)
 			  || (arg->common.parent->common.aml_opcode ==
-			      AML_VAR_PACKAGE_OP))) {
+			      AML_VARIABLE_PACKAGE_OP))) {
 			status = AE_OK;
 		}
 	}
@@ -502,6 +503,7 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 
 	ACPI_FUNCTION_TRACE(ps_get_next_field);
 
+	ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 	aml = parser_state->aml;
 
 	/* Determine field type */
@@ -546,6 +548,7 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 
 	/* Decode the field type */
 
+	ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 	switch (opcode) {
 	case AML_INT_NAMEDFIELD_OP:
 
@@ -555,6 +558,22 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 		acpi_ps_set_name(field, name);
 		parser_state->aml += ACPI_NAME_SIZE;
 
+		ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
+
+#ifdef ACPI_ASL_COMPILER
+		/*
+		 * Because the package length isn't represented as a parse tree object,
+		 * take comments surrounding this and add to the previously created
+		 * parse node.
+		 */
+		if (field->common.inline_comment) {
+			field->common.name_comment =
+			    field->common.inline_comment;
+		}
+		field->common.inline_comment = acpi_gbl_current_inline_comment;
+		acpi_gbl_current_inline_comment = NULL;
+#endif
+
 		/* Get the length which is encoded as a package length */
 
 		field->common.value.size =
@@ -609,11 +628,13 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 		if (ACPI_GET8(parser_state->aml) == AML_BUFFER_OP) {
 			parser_state->aml++;
 
+			ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 			pkg_end = parser_state->aml;
 			pkg_length =
 			    acpi_ps_get_next_package_length(parser_state);
 			pkg_end += pkg_length;
 
+			ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 			if (parser_state->aml < pkg_end) {
 
 				/* Non-empty list */
@@ -630,6 +651,7 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 				opcode = ACPI_GET8(parser_state->aml);
 				parser_state->aml++;
 
+				ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 				switch (opcode) {
 				case AML_BYTE_OP:	/* AML_BYTEDATA_ARG */
 
@@ -660,6 +682,7 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 
 				/* Fill in bytelist data */
 
+				ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 				arg->named.value.size = buffer_length;
 				arg->named.data = parser_state->aml;
 			}
diff --git a/drivers/acpi/acpica/psloop.c b/drivers/acpi/acpica/psloop.c
index 14d689606d2f..b4224005783c 100644
--- a/drivers/acpi/acpica/psloop.c
+++ b/drivers/acpi/acpica/psloop.c
@@ -55,6 +55,7 @@
 #include "acparser.h"
 #include "acdispat.h"
 #include "amlcode.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("psloop")
@@ -132,6 +133,21 @@ acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 		       !walk_state->arg_count) {
 			walk_state->aml = walk_state->parser_state.aml;
 
+			switch (op->common.aml_opcode) {
+			case AML_METHOD_OP:
+			case AML_BUFFER_OP:
+			case AML_PACKAGE_OP:
+			case AML_VARIABLE_PACKAGE_OP:
+			case AML_WHILE_OP:
+
+				break;
+
+			default:
+
+				ASL_CV_CAPTURE_COMMENTS(walk_state);
+				break;
+			}
+
 			status =
 			    acpi_ps_get_next_arg(walk_state,
 						 &(walk_state->parser_state),
@@ -254,7 +270,7 @@ acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 
 		case AML_BUFFER_OP:
 		case AML_PACKAGE_OP:
-		case AML_VAR_PACKAGE_OP:
+		case AML_VARIABLE_PACKAGE_OP:
 
 			if ((op->common.parent) &&
 			    (op->common.parent->common.aml_opcode ==
@@ -480,6 +496,8 @@ acpi_status acpi_ps_parse_loop(struct acpi_walk_state *walk_state)
 	/* Iterative parsing loop, while there is more AML to process: */
 
 	while ((parser_state->aml < parser_state->aml_end) || (op)) {
+		ASL_CV_CAPTURE_COMMENTS(walk_state);
+
 		aml_op_start = parser_state->aml;
 		if (!op) {
 			status =
@@ -516,6 +534,20 @@ acpi_status acpi_ps_parse_loop(struct acpi_walk_state *walk_state)
 		 */
 		walk_state->arg_count = 0;
 
+		switch (op->common.aml_opcode) {
+		case AML_BYTE_OP:
+		case AML_WORD_OP:
+		case AML_DWORD_OP:
+		case AML_QWORD_OP:
+
+			break;
+
+		default:
+
+			ASL_CV_CAPTURE_COMMENTS(walk_state);
+			break;
+		}
+
 		/* Are there any arguments that must be processed? */
 
 		if (walk_state->arg_types) {
diff --git a/drivers/acpi/acpica/psobject.c b/drivers/acpi/acpica/psobject.c
index 5c4aff0f4f26..5bcb61831706 100644
--- a/drivers/acpi/acpica/psobject.c
+++ b/drivers/acpi/acpica/psobject.c
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "acparser.h"
 #include "amlcode.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("psobject")
@@ -190,6 +191,7 @@ acpi_ps_build_named_op(struct acpi_walk_state *walk_state,
 	 */
 	while (GET_CURRENT_ARG_TYPE(walk_state->arg_types) &&
 	       (GET_CURRENT_ARG_TYPE(walk_state->arg_types) != ARGP_NAME)) {
+		ASL_CV_CAPTURE_COMMENTS(walk_state);
 		status =
 		    acpi_ps_get_next_arg(walk_state,
 					 &(walk_state->parser_state),
@@ -203,6 +205,18 @@ acpi_ps_build_named_op(struct acpi_walk_state *walk_state,
 		INCREMENT_ARG_LIST(walk_state->arg_types);
 	}
 
+	/* are there any inline comments associated with the name_seg?? If so, save this. */
+
+	ASL_CV_CAPTURE_COMMENTS(walk_state);
+
+#ifdef ACPI_ASL_COMPILER
+	if (acpi_gbl_current_inline_comment != NULL) {
+		unnamed_op->common.name_comment =
+		    acpi_gbl_current_inline_comment;
+		acpi_gbl_current_inline_comment = NULL;
+	}
+#endif
+
 	/*
 	 * Make sure that we found a NAME and didn't run out of arguments
 	 */
@@ -243,6 +257,30 @@ acpi_ps_build_named_op(struct acpi_walk_state *walk_state,
 
 	acpi_ps_append_arg(*op, unnamed_op->common.value.arg);
 
+#ifdef ACPI_ASL_COMPILER
+
+	/* save any comments that might be associated with unnamed_op. */
+
+	(*op)->common.inline_comment = unnamed_op->common.inline_comment;
+	(*op)->common.end_node_comment = unnamed_op->common.end_node_comment;
+	(*op)->common.close_brace_comment =
+	    unnamed_op->common.close_brace_comment;
+	(*op)->common.name_comment = unnamed_op->common.name_comment;
+	(*op)->common.comment_list = unnamed_op->common.comment_list;
+	(*op)->common.end_blk_comment = unnamed_op->common.end_blk_comment;
+	(*op)->common.cv_filename = unnamed_op->common.cv_filename;
+	(*op)->common.cv_parent_filename =
+	    unnamed_op->common.cv_parent_filename;
+	(*op)->named.aml = unnamed_op->common.aml;
+
+	unnamed_op->common.inline_comment = NULL;
+	unnamed_op->common.end_node_comment = NULL;
+	unnamed_op->common.close_brace_comment = NULL;
+	unnamed_op->common.name_comment = NULL;
+	unnamed_op->common.comment_list = NULL;
+	unnamed_op->common.end_blk_comment = NULL;
+#endif
+
 	if ((*op)->common.aml_opcode == AML_REGION_OP ||
 	    (*op)->common.aml_opcode == AML_DATA_REGION_OP) {
 		/*
diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index 451b672915f1..c343a0d5a3d2 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -69,7 +69,7 @@ ACPI_MODULE_NAME("psopcode")
 	AML_DEVICE_OP
 	AML_THERMAL_ZONE_OP
 	AML_METHOD_OP
-	AML_POWER_RES_OP
+	AML_POWER_RESOURCE_OP
 	AML_PROCESSOR_OP
 	AML_FIELD_OP
 	AML_INDEX_FIELD_OP
@@ -95,7 +95,7 @@ ACPI_MODULE_NAME("psopcode")
 	AML_DEVICE_OP
 	AML_THERMAL_ZONE_OP
 	AML_METHOD_OP
-	AML_POWER_RES_OP
+	AML_POWER_RESOURCE_OP
 	AML_PROCESSOR_OP
 	AML_FIELD_OP
 	AML_INDEX_FIELD_OP
@@ -113,7 +113,7 @@ ACPI_MODULE_NAME("psopcode")
 	AML_DEVICE_OP
 	AML_THERMAL_ZONE_OP
 	AML_METHOD_OP
-	AML_POWER_RES_OP
+	AML_POWER_RESOURCE_OP
 	AML_PROCESSOR_OP
 	AML_NAME_OP
 	AML_ALIAS_OP
@@ -136,7 +136,7 @@ ACPI_MODULE_NAME("psopcode")
 	AML_DEVICE_OP
 	AML_THERMAL_ZONE_OP
 	AML_METHOD_OP
-	AML_POWER_RES_OP
+	AML_POWER_RESOURCE_OP
 	AML_PROCESSOR_OP
 	AML_NAME_OP
 	AML_ALIAS_OP
@@ -149,7 +149,7 @@ ACPI_MODULE_NAME("psopcode")
   must be deferred until needed
 
 	AML_METHOD_OP
-	AML_VAR_PACKAGE_OP
+	AML_VARIABLE_PACKAGE_OP
 	AML_CREATE_FIELD_OP
 	AML_CREATE_BIT_FIELD_OP
 	AML_CREATE_BYTE_FIELD_OP
@@ -652,7 +652,10 @@ const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES] = {
 
 	/* 81 */ ACPI_OP("External", ARGP_EXTERNAL_OP, ARGI_EXTERNAL_OP,
 			 ACPI_TYPE_ANY, AML_CLASS_EXECUTE, /* ? */
-			 AML_TYPE_EXEC_3A_0T_0R, AML_FLAGS_EXEC_3A_0T_0R)
+			 AML_TYPE_EXEC_3A_0T_0R, AML_FLAGS_EXEC_3A_0T_0R),
+/* 82 */ ACPI_OP("Comment", ARGP_COMMENT_OP, ARGI_COMMENT_OP,
+			 ACPI_TYPE_STRING, AML_CLASS_ARGUMENT,
+			 AML_TYPE_LITERAL, AML_CONSTANT)
 
 /*! [End] no source code translation !*/
 };
diff --git a/drivers/acpi/acpica/psopinfo.c b/drivers/acpi/acpica/psopinfo.c
index 89f95b7f26e9..eff22950232b 100644
--- a/drivers/acpi/acpica/psopinfo.c
+++ b/drivers/acpi/acpica/psopinfo.c
@@ -226,7 +226,7 @@ const u8 acpi_gbl_short_op_index[256] = {
 /* 0x90 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x73, 0x74,
 /* 0x98 */ 0x75, 0x76, _UNK, _UNK, 0x77, 0x78, 0x79, 0x7A,
 /* 0xA0 */ 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x60, 0x61,
-/* 0xA8 */ 0x62, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
+/* 0xA8 */ 0x62, 0x82, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
 /* 0xB0 */ _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
 /* 0xB8 */ _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
 /* 0xC0 */ _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
diff --git a/drivers/acpi/acpica/psparse.c b/drivers/acpi/acpica/psparse.c
index a813bbbd5a8b..8116a670de39 100644
--- a/drivers/acpi/acpica/psparse.c
+++ b/drivers/acpi/acpica/psparse.c
@@ -105,7 +105,7 @@ u16 acpi_ps_peek_opcode(struct acpi_parse_state * parser_state)
 	aml = parser_state->aml;
 	opcode = (u16) ACPI_GET8(aml);
 
-	if (opcode == AML_EXTENDED_OP_PREFIX) {
+	if (opcode == AML_EXTENDED_PREFIX) {
 
 		/* Extended opcode, get the second opcode byte */
 
@@ -210,7 +210,7 @@ acpi_ps_complete_this_op(struct acpi_walk_state *walk_state,
 			    || (op->common.parent->common.aml_opcode ==
 				AML_BANK_FIELD_OP)
 			    || (op->common.parent->common.aml_opcode ==
-				AML_VAR_PACKAGE_OP)) {
+				AML_VARIABLE_PACKAGE_OP)) {
 				replacement_op =
 				    acpi_ps_alloc_op(AML_INT_RETURN_VALUE_OP,
 						     op->common.aml);
@@ -225,7 +225,7 @@ acpi_ps_complete_this_op(struct acpi_walk_state *walk_state,
 				if ((op->common.aml_opcode == AML_BUFFER_OP)
 				    || (op->common.aml_opcode == AML_PACKAGE_OP)
 				    || (op->common.aml_opcode ==
-					AML_VAR_PACKAGE_OP)) {
+					AML_VARIABLE_PACKAGE_OP)) {
 					replacement_op =
 					    acpi_ps_alloc_op(op->common.
 							     aml_opcode,
diff --git a/drivers/acpi/acpica/pstree.c b/drivers/acpi/acpica/pstree.c
index 9677fff8fd47..c06d6e2fc7a5 100644
--- a/drivers/acpi/acpica/pstree.c
+++ b/drivers/acpi/acpica/pstree.c
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "acparser.h"
 #include "amlcode.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("pstree")
@@ -216,6 +217,7 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 
 	next = acpi_ps_get_arg(op, 0);
 	if (next) {
+		ASL_CV_LABEL_FILENODE(next);
 		return (next);
 	}
 
@@ -223,6 +225,7 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 
 	next = op->common.next;
 	if (next) {
+		ASL_CV_LABEL_FILENODE(next);
 		return (next);
 	}
 
@@ -233,6 +236,8 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 	while (parent) {
 		arg = acpi_ps_get_arg(parent, 0);
 		while (arg && (arg != origin) && (arg != op)) {
+
+			ASL_CV_LABEL_FILENODE(arg);
 			arg = arg->common.next;
 		}
 
@@ -247,6 +252,7 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 
 			/* Found sibling of parent */
 
+			ASL_CV_LABEL_FILENODE(parent->common.next);
 			return (parent->common.next);
 		}
 
@@ -254,6 +260,7 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 		parent = parent->common.parent;
 	}
 
+	ASL_CV_LABEL_FILENODE(next);
 	return (next);
 }
 
@@ -296,7 +303,7 @@ union acpi_parse_object *acpi_ps_get_child(union acpi_parse_object *op)
 		child = acpi_ps_get_arg(op, 1);
 		break;
 
-	case AML_POWER_RES_OP:
+	case AML_POWER_RESOURCE_OP:
 	case AML_INDEX_FIELD_OP:
 
 		child = acpi_ps_get_arg(op, 2);
diff --git a/drivers/acpi/acpica/psutils.c b/drivers/acpi/acpica/psutils.c
index 2fa38bb76a55..02642760cb93 100644
--- a/drivers/acpi/acpica/psutils.c
+++ b/drivers/acpi/acpica/psutils.c
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "acparser.h"
 #include "amlcode.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("psutils")
@@ -152,6 +153,15 @@ union acpi_parse_object *acpi_ps_alloc_op(u16 opcode, u8 *aml)
 		acpi_ps_init_op(op, opcode);
 		op->common.aml = aml;
 		op->common.flags = flags;
+		ASL_CV_CLEAR_OP_COMMENTS(op);
+
+		if (opcode == AML_SCOPE_OP) {
+			acpi_gbl_current_scope = op;
+		}
+	}
+
+	if (gbl_capture_comments) {
+		ASL_CV_TRANSFER_COMMENTS(op);
 	}
 
 	return (op);
@@ -174,6 +184,7 @@ void acpi_ps_free_op(union acpi_parse_object *op)
 {
 	ACPI_FUNCTION_NAME(ps_free_op);
 
+	ASL_CV_CLEAR_OP_COMMENTS(op);
 	if (op->common.aml_opcode == AML_INT_RETURN_VALUE_OP) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS,
 				  "Free retval op: %p\n", op));
diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 5a968a78652b..0d2e98920069 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -416,13 +416,18 @@ acpi_tb_get_table(struct acpi_table_desc *table_desc,
 		}
 	}
 
-	table_desc->validation_count++;
-	if (table_desc->validation_count == 0) {
-		ACPI_ERROR((AE_INFO,
-			    "Table %p, Validation count is zero after increment\n",
-			    table_desc));
-		table_desc->validation_count--;
-		return_ACPI_STATUS(AE_LIMIT);
+	if (table_desc->validation_count < ACPI_MAX_TABLE_VALIDATIONS) {
+		table_desc->validation_count++;
+
+		/*
+		 * Detect validation_count overflows to ensure that the warning
+		 * message will only be printed once.
+		 */
+		if (table_desc->validation_count >= ACPI_MAX_TABLE_VALIDATIONS) {
+			ACPI_WARNING((AE_INFO,
+				      "Table %p, Validation count overflows\n",
+				      table_desc));
+		}
 	}
 
 	*out_table = table_desc->pointer;
@@ -449,13 +454,20 @@ void acpi_tb_put_table(struct acpi_table_desc *table_desc)
 
 	ACPI_FUNCTION_TRACE(acpi_tb_put_table);
 
-	if (table_desc->validation_count == 0) {
-		ACPI_WARNING((AE_INFO,
-			      "Table %p, Validation count is zero before decrement\n",
-			      table_desc));
-		return_VOID;
+	if (table_desc->validation_count < ACPI_MAX_TABLE_VALIDATIONS) {
+		table_desc->validation_count--;
+
+		/*
+		 * Detect validation_count underflows to ensure that the warning
+		 * message will only be printed once.
+		 */
+		if (table_desc->validation_count >= ACPI_MAX_TABLE_VALIDATIONS) {
+			ACPI_WARNING((AE_INFO,
+				      "Table %p, Validation count underflows\n",
+				      table_desc));
+			return_VOID;
+		}
 	}
-	table_desc->validation_count--;
 
 	if (table_desc->validation_count == 0) {
 
diff --git a/drivers/acpi/acpica/utalloc.c b/drivers/acpi/acpica/utalloc.c
index a3401bd29413..5594a359dbf1 100644
--- a/drivers/acpi/acpica/utalloc.c
+++ b/drivers/acpi/acpica/utalloc.c
@@ -142,6 +142,45 @@ acpi_status acpi_ut_create_caches(void)
 	if (ACPI_FAILURE(status)) {
 		return (status);
 	}
+#ifdef ACPI_ASL_COMPILER
+	/*
+	 * For use with the ASL-/ASL+ option. This cache keeps track of regular
+	 * 0xA9 0x01 comments.
+	 */
+	status =
+	    acpi_os_create_cache("Acpi-Comment",
+				 sizeof(struct acpi_comment_node),
+				 ACPI_MAX_COMMENT_CACHE_DEPTH,
+				 &acpi_gbl_reg_comment_cache);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	/*
+	 * This cache keeps track of the starting addresses of where the comments
+	 * lie. This helps prevent duplication of comments.
+	 */
+	status =
+	    acpi_os_create_cache("Acpi-Comment-Addr",
+				 sizeof(struct acpi_comment_addr_node),
+				 ACPI_MAX_COMMENT_CACHE_DEPTH,
+				 &acpi_gbl_comment_addr_cache);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	/*
+	 * This cache will be used for nodes that represent files.
+	 */
+	status =
+	    acpi_os_create_cache("Acpi-File", sizeof(struct acpi_file_node),
+				 ACPI_MAX_COMMENT_CACHE_DEPTH,
+				 &acpi_gbl_file_cache);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+#endif
+
 #ifdef ACPI_DBG_TRACK_ALLOCATIONS
 
 	/* Memory allocation lists */
@@ -201,6 +240,17 @@ acpi_status acpi_ut_delete_caches(void)
 	(void)acpi_os_delete_cache(acpi_gbl_ps_node_ext_cache);
 	acpi_gbl_ps_node_ext_cache = NULL;
 
+#ifdef ACPI_ASL_COMPILER
+	(void)acpi_os_delete_cache(acpi_gbl_reg_comment_cache);
+	acpi_gbl_reg_comment_cache = NULL;
+
+	(void)acpi_os_delete_cache(acpi_gbl_comment_addr_cache);
+	acpi_gbl_comment_addr_cache = NULL;
+
+	(void)acpi_os_delete_cache(acpi_gbl_file_cache);
+	acpi_gbl_file_cache = NULL;
+#endif
+
 #ifdef ACPI_DBG_TRACK_ALLOCATIONS
 
 	/* Debug only - display leftover memory allocation, if any */
diff --git a/drivers/acpi/acpica/utcache.c b/drivers/acpi/acpica/utcache.c
index 11c7f72f2d56..531493306dee 100644
--- a/drivers/acpi/acpica/utcache.c
+++ b/drivers/acpi/acpica/utcache.c
@@ -71,7 +71,7 @@ acpi_os_create_cache(char *cache_name,
 
 	ACPI_FUNCTION_ENTRY();
 
-	if (!cache_name || !return_cache || (object_size < 16)) {
+	if (!cache_name || !return_cache || !object_size) {
 		return (AE_BAD_PARAMETER);
 	}
 
diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c
index bd5ea3101eb7..615a885e2ca3 100644
--- a/drivers/acpi/acpica/utdebug.c
+++ b/drivers/acpi/acpica/utdebug.c
@@ -627,4 +627,5 @@ acpi_trace_point(acpi_trace_event_type type, u8 begin, u8 *aml, char *pathname)
 }
 
 ACPI_EXPORT_SYMBOL(acpi_trace_point)
+
 #endif
diff --git a/drivers/acpi/acpica/utxferror.c b/drivers/acpi/acpica/utxferror.c
index a16bd9eac653..950a1e500bfa 100644
--- a/drivers/acpi/acpica/utxferror.c
+++ b/drivers/acpi/acpica/utxferror.c
@@ -91,7 +91,7 @@ ACPI_EXPORT_SYMBOL(acpi_error)
  *
  * PARAMETERS:  module_name         - Caller's module name (for error output)
  *              line_number         - Caller's line number (for error output)
- *              status              - Status to be formatted
+ *              status              - Status value to be decoded/formatted
  *              format              - Printf format string + additional args
  *
  * RETURN:      None
@@ -132,8 +132,8 @@ ACPI_EXPORT_SYMBOL(acpi_exception)
  *
  * FUNCTION:    acpi_warning
  *
- * PARAMETERS:  module_name         - Caller's module name (for error output)
- *              line_number         - Caller's line number (for error output)
+ * PARAMETERS:  module_name         - Caller's module name (for warning output)
+ *              line_number         - Caller's line number (for warning output)
  *              format              - Printf format string + additional args
  *
  * RETURN:      None
@@ -163,17 +163,13 @@ ACPI_EXPORT_SYMBOL(acpi_warning)
  *
  * FUNCTION:    acpi_info
  *
- * PARAMETERS:  module_name         - Caller's module name (for error output)
- *              line_number         - Caller's line number (for error output)
- *              format              - Printf format string + additional args
+ * PARAMETERS:  format              - Printf format string + additional args
  *
  * RETURN:      None
  *
  * DESCRIPTION: Print generic "ACPI:" information message. There is no
  *              module/line/version info in order to keep the message simple.
  *
- * TBD: module_name and line_number args are not needed, should be removed.
- *
  ******************************************************************************/
 void ACPI_INTERNAL_VAR_XFACE acpi_info(const char *format, ...)
 {
@@ -229,8 +225,8 @@ ACPI_EXPORT_SYMBOL(acpi_bios_error)
  *
  * FUNCTION:    acpi_bios_warning
  *
- * PARAMETERS:  module_name         - Caller's module name (for error output)
- *              line_number         - Caller's line number (for error output)
+ * PARAMETERS:  module_name         - Caller's module name (for warning output)
+ *              line_number         - Caller's line number (for warning output)
  *              format              - Printf format string + additional args
  *
  * RETURN:      None
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index d0855c09f32f..980515e029fa 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -431,12 +431,13 @@ static void ghes_do_proc(struct ghes *ghes,
 {
 	int sev, sec_sev;
 	struct acpi_hest_generic_data *gdata;
+	guid_t *sec_type;
 
 	sev = ghes_severity(estatus->error_severity);
 	apei_estatus_for_each_section(estatus, gdata) {
+		sec_type = (guid_t *)gdata->section_type;
 		sec_sev = ghes_severity(gdata->error_severity);
-		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
-				 CPER_SEC_PLATFORM_MEM)) {
+		if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
 			struct cper_sec_mem_err *mem_err;
 			mem_err = (struct cper_sec_mem_err *)(gdata+1);
 			ghes_edac_report_mem_error(ghes, sev, mem_err);
@@ -445,8 +446,7 @@ static void ghes_do_proc(struct ghes *ghes,
 			ghes_handle_memory_failure(gdata, sev);
 		}
 #ifdef CONFIG_ACPI_APEI_PCIEAER
-		else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
-				      CPER_SEC_PCIE)) {
+		else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
 			struct cper_sec_pcie *pcie_err;
 			pcie_err = (struct cper_sec_pcie *)(gdata+1);
 			if (sev == GHES_SEV_RECOVERABLE &&
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index c5fecf97ee2f..797b28dc7b34 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -666,14 +666,6 @@ static const struct iommu_ops *iort_iommu_xlate(struct device *dev,
 	int ret = -ENODEV;
 	struct fwnode_handle *iort_fwnode;
 
-	/*
-	 * If we already translated the fwspec there
-	 * is nothing left to do, return the iommu_ops.
-	 */
-	ops = iort_fwspec_iommu_ops(dev->iommu_fwspec);
-	if (ops)
-		return ops;
-
 	if (node) {
 		iort_fwnode = iort_get_fwnode(node);
 		if (!iort_fwnode)
@@ -735,6 +727,14 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 	u32 streamid = 0;
 	int err;
 
+	/*
+	 * If we already translated the fwspec there
+	 * is nothing left to do, return the iommu_ops.
+	 */
+	ops = iort_fwspec_iommu_ops(dev->iommu_fwspec);
+	if (ops)
+		return ops;
+
 	if (dev_is_pci(dev)) {
 		struct pci_bus *bus = to_pci_dev(dev)->bus;
 		u32 rid;
@@ -782,6 +782,12 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 	if (err)
 		ops = ERR_PTR(err);
 
+	/* Ignore all other errors apart from EPROBE_DEFER */
+	if (IS_ERR(ops) && (PTR_ERR(ops) != -EPROBE_DEFER)) {
+		dev_dbg(dev, "Adding to IOMMU failed: %ld\n", PTR_ERR(ops));
+		ops = NULL;
+	}
+
 	return ops;
 }
 
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 34fbe027e73a..5a6fbe0fcaf2 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -114,6 +114,11 @@ int acpi_bus_get_status(struct acpi_device *device)
 	acpi_status status;
 	unsigned long long sta;
 
+	if (acpi_device_always_present(device)) {
+		acpi_set_device_status(device, ACPI_STA_DEFAULT);
+		return 0;
+	}
+
 	status = acpi_bus_get_status_handle(device->handle, &sta);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
@@ -191,42 +196,19 @@ static void acpi_print_osc_error(acpi_handle handle,
 	pr_debug("\n");
 }
 
-acpi_status acpi_str_to_uuid(char *str, u8 *uuid)
-{
-	int i;
-	static int opc_map_to_uuid[16] = {6, 4, 2, 0, 11, 9, 16, 14, 19, 21,
-		24, 26, 28, 30, 32, 34};
-
-	if (strlen(str) != 36)
-		return AE_BAD_PARAMETER;
-	for (i = 0; i < 36; i++) {
-		if (i == 8 || i == 13 || i == 18 || i == 23) {
-			if (str[i] != '-')
-				return AE_BAD_PARAMETER;
-		} else if (!isxdigit(str[i]))
-			return AE_BAD_PARAMETER;
-	}
-	for (i = 0; i < 16; i++) {
-		uuid[i] = hex_to_bin(str[opc_map_to_uuid[i]]) << 4;
-		uuid[i] |= hex_to_bin(str[opc_map_to_uuid[i] + 1]);
-	}
-	return AE_OK;
-}
-EXPORT_SYMBOL_GPL(acpi_str_to_uuid);
-
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
 {
 	acpi_status status;
 	struct acpi_object_list input;
 	union acpi_object in_params[4];
 	union acpi_object *out_obj;
-	u8 uuid[16];
+	guid_t guid;
 	u32 errors;
 	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
 
 	if (!context)
 		return AE_ERROR;
-	if (ACPI_FAILURE(acpi_str_to_uuid(context->uuid_str, uuid)))
+	if (guid_parse(context->uuid_str, &guid))
 		return AE_ERROR;
 	context->ret.length = ACPI_ALLOCATE_BUFFER;
 	context->ret.pointer = NULL;
@@ -236,7 +218,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
 	input.pointer = in_params;
 	in_params[0].type 		= ACPI_TYPE_BUFFER;
 	in_params[0].buffer.length 	= 16;
-	in_params[0].buffer.pointer	= uuid;
+	in_params[0].buffer.pointer	= (u8 *)&guid;
 	in_params[1].type 		= ACPI_TYPE_INTEGER;
 	in_params[1].integer.value 	= context->rev;
 	in_params[2].type 		= ACPI_TYPE_INTEGER;
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 668137e4a069..e19f530f1083 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -57,6 +57,7 @@
 
 #define ACPI_BUTTON_LID_INIT_IGNORE	0x00
 #define ACPI_BUTTON_LID_INIT_OPEN	0x01
+#define ACPI_BUTTON_LID_INIT_METHOD	0x02
 
 #define _COMPONENT		ACPI_BUTTON_COMPONENT
 ACPI_MODULE_NAME("button");
@@ -112,7 +113,7 @@ struct acpi_button {
 
 static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier);
 static struct acpi_device *lid_device;
-static u8 lid_init_state = ACPI_BUTTON_LID_INIT_OPEN;
+static u8 lid_init_state = ACPI_BUTTON_LID_INIT_METHOD;
 
 static unsigned long lid_report_interval __read_mostly = 500;
 module_param(lid_report_interval, ulong, 0644);
@@ -376,6 +377,9 @@ static void acpi_lid_initialize_state(struct acpi_device *device)
 	case ACPI_BUTTON_LID_INIT_OPEN:
 		(void)acpi_lid_notify_state(device, 1);
 		break;
+	case ACPI_BUTTON_LID_INIT_METHOD:
+		(void)acpi_lid_update_state(device);
+		break;
 	case ACPI_BUTTON_LID_INIT_IGNORE:
 	default:
 		break;
@@ -559,6 +563,9 @@ static int param_set_lid_init_state(const char *val, struct kernel_param *kp)
 	if (!strncmp(val, "open", sizeof("open") - 1)) {
 		lid_init_state = ACPI_BUTTON_LID_INIT_OPEN;
 		pr_info("Notify initial lid state as open\n");
+	} else if (!strncmp(val, "method", sizeof("method") - 1)) {
+		lid_init_state = ACPI_BUTTON_LID_INIT_METHOD;
+		pr_info("Notify initial lid state with _LID return value\n");
 	} else if (!strncmp(val, "ignore", sizeof("ignore") - 1)) {
 		lid_init_state = ACPI_BUTTON_LID_INIT_IGNORE;
 		pr_info("Do not notify initial lid state\n");
@@ -572,6 +579,8 @@ static int param_get_lid_init_state(char *buffer, struct kernel_param *kp)
 	switch (lid_init_state) {
 	case ACPI_BUTTON_LID_INIT_OPEN:
 		return sprintf(buffer, "open");
+	case ACPI_BUTTON_LID_INIT_METHOD:
+		return sprintf(buffer, "method");
 	case ACPI_BUTTON_LID_INIT_IGNORE:
 		return sprintf(buffer, "ignore");
 	default:
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 656acb5d7166..097eff0b963d 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -74,11 +74,11 @@ struct nfit_table_prev {
 	struct list_head flushes;
 };
 
-static u8 nfit_uuid[NFIT_UUID_MAX][16];
+static guid_t nfit_uuid[NFIT_UUID_MAX];
 
-const u8 *to_nfit_uuid(enum nfit_uuids id)
+const guid_t *to_nfit_uuid(enum nfit_uuids id)
 {
-	return nfit_uuid[id];
+	return &nfit_uuid[id];
 }
 EXPORT_SYMBOL(to_nfit_uuid);
 
@@ -222,7 +222,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 	u32 offset, fw_status = 0;
 	acpi_handle handle;
 	unsigned int func;
-	const u8 *uuid;
+	const guid_t *guid;
 	int rc, i;
 
 	func = cmd;
@@ -245,7 +245,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		cmd_mask = nvdimm_cmd_mask(nvdimm);
 		dsm_mask = nfit_mem->dsm_mask;
 		desc = nd_cmd_dimm_desc(cmd);
-		uuid = to_nfit_uuid(nfit_mem->family);
+		guid = to_nfit_uuid(nfit_mem->family);
 		handle = adev->handle;
 	} else {
 		struct acpi_device *adev = to_acpi_dev(acpi_desc);
@@ -254,7 +254,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		cmd_mask = nd_desc->cmd_mask;
 		dsm_mask = cmd_mask;
 		desc = nd_cmd_bus_desc(cmd);
-		uuid = to_nfit_uuid(NFIT_DEV_BUS);
+		guid = to_nfit_uuid(NFIT_DEV_BUS);
 		handle = adev->handle;
 		dimm_name = "bus";
 	}
@@ -289,7 +289,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 			in_buf.buffer.pointer,
 			min_t(u32, 256, in_buf.buffer.length), true);
 
-	out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
+	out_obj = acpi_evaluate_dsm(handle, guid, 1, func, &in_obj);
 	if (!out_obj) {
 		dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
 				cmd_name);
@@ -409,7 +409,7 @@ int nfit_spa_type(struct acpi_nfit_system_address *spa)
 	int i;
 
 	for (i = 0; i < NFIT_UUID_MAX; i++)
-		if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
+		if (guid_equal(to_nfit_uuid(i), (guid_t *)&spa->range_guid))
 			return i;
 	return -1;
 }
@@ -1415,7 +1415,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 	struct acpi_device *adev, *adev_dimm;
 	struct device *dev = acpi_desc->dev;
 	unsigned long dsm_mask;
-	const u8 *uuid;
+	const guid_t *guid;
 	int i;
 	int family = -1;
 
@@ -1444,7 +1444,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 	/*
 	 * Until standardization materializes we need to consider 4
 	 * different command sets.  Note, that checking for function0 (bit0)
-	 * tells us if any commands are reachable through this uuid.
+	 * tells us if any commands are reachable through this GUID.
 	 */
 	for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++)
 		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
@@ -1474,9 +1474,9 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 		return 0;
 	}
 
-	uuid = to_nfit_uuid(nfit_mem->family);
+	guid = to_nfit_uuid(nfit_mem->family);
 	for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
-		if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
+		if (acpi_check_dsm(adev_dimm->handle, guid, 1, 1ULL << i))
 			set_bit(i, &nfit_mem->dsm_mask);
 
 	return 0;
@@ -1611,7 +1611,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-	const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
+	const guid_t *guid = to_nfit_uuid(NFIT_DEV_BUS);
 	struct acpi_device *adev;
 	int i;
 
@@ -1621,7 +1621,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
 		return;
 
 	for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
-		if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
+		if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i))
 			set_bit(i, &nd_desc->cmd_mask);
 }
 
@@ -3051,19 +3051,19 @@ static __init int nfit_init(void)
 	BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
 	BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
 
-	acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
-	acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
-	acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
-	acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
-	acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
-	acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
-	acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
-	acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
-	acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
-	acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
-	acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
-	acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
-	acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
+	guid_parse(UUID_VOLATILE_MEMORY, &nfit_uuid[NFIT_SPA_VOLATILE]);
+	guid_parse(UUID_PERSISTENT_MEMORY, &nfit_uuid[NFIT_SPA_PM]);
+	guid_parse(UUID_CONTROL_REGION, &nfit_uuid[NFIT_SPA_DCR]);
+	guid_parse(UUID_DATA_REGION, &nfit_uuid[NFIT_SPA_BDW]);
+	guid_parse(UUID_VOLATILE_VIRTUAL_DISK, &nfit_uuid[NFIT_SPA_VDISK]);
+	guid_parse(UUID_VOLATILE_VIRTUAL_CD, &nfit_uuid[NFIT_SPA_VCD]);
+	guid_parse(UUID_PERSISTENT_VIRTUAL_DISK, &nfit_uuid[NFIT_SPA_PDISK]);
+	guid_parse(UUID_PERSISTENT_VIRTUAL_CD, &nfit_uuid[NFIT_SPA_PCD]);
+	guid_parse(UUID_NFIT_BUS, &nfit_uuid[NFIT_DEV_BUS]);
+	guid_parse(UUID_NFIT_DIMM, &nfit_uuid[NFIT_DEV_DIMM]);
+	guid_parse(UUID_NFIT_DIMM_N_HPE1, &nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
+	guid_parse(UUID_NFIT_DIMM_N_HPE2, &nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
+	guid_parse(UUID_NFIT_DIMM_N_MSFT, &nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
 
 	nfit_wq = create_singlethread_workqueue("nfit");
 	if (!nfit_wq)
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index 3ba1c3472cf9..fd86bec98dea 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -26,7 +26,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
 	struct nfit_spa *nfit_spa;
 
 	/* We only care about memory errors */
-	if (!(mce->status & MCACOD))
+	if (!mce_is_memory_error(mce))
 		return NOTIFY_DONE;
 
 	/*
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 58fb7d68e04a..29bdd959517f 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -18,7 +18,6 @@
 #include <linux/libnvdimm.h>
 #include <linux/ndctl.h>
 #include <linux/types.h>
-#include <linux/uuid.h>
 #include <linux/acpi.h>
 #include <acpi/acuuid.h>
 
@@ -237,7 +236,7 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
 	return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
 }
 
-const u8 *to_nfit_uuid(enum nfit_uuids id);
+const guid_t *to_nfit_uuid(enum nfit_uuids id);
 int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
 void acpi_nfit_shutdown(void *data);
 void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event);
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 919be0aa2578..240544253ccd 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -523,7 +523,7 @@ static int acpi_pci_root_add(struct acpi_device *device,
 	struct acpi_pci_root *root;
 	acpi_handle handle = device->handle;
 	int no_aspm = 0;
-	bool hotadd = system_state != SYSTEM_BOOTING;
+	bool hotadd = system_state == SYSTEM_RUNNING;
 
 	root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL);
 	if (!root)
diff --git a/drivers/acpi/pmic/intel_pmic_xpower.c b/drivers/acpi/pmic/intel_pmic_xpower.c
index 55f51115f016..1a76c784cd4c 100644
--- a/drivers/acpi/pmic/intel_pmic_xpower.c
+++ b/drivers/acpi/pmic/intel_pmic_xpower.c
@@ -27,97 +27,97 @@ static struct pmic_table power_table[] = {
 		.address = 0x00,
 		.reg = 0x13,
 		.bit = 0x05,
-	},
+	}, /* ALD1 */
 	{
 		.address = 0x04,
 		.reg = 0x13,
 		.bit = 0x06,
-	},
+	}, /* ALD2 */
 	{
 		.address = 0x08,
 		.reg = 0x13,
 		.bit = 0x07,
-	},
+	}, /* ALD3 */
 	{
 		.address = 0x0c,
 		.reg = 0x12,
 		.bit = 0x03,
-	},
+	}, /* DLD1 */
 	{
 		.address = 0x10,
 		.reg = 0x12,
 		.bit = 0x04,
-	},
+	}, /* DLD2 */
 	{
 		.address = 0x14,
 		.reg = 0x12,
 		.bit = 0x05,
-	},
+	}, /* DLD3 */
 	{
 		.address = 0x18,
 		.reg = 0x12,
 		.bit = 0x06,
-	},
+	}, /* DLD4 */
 	{
 		.address = 0x1c,
 		.reg = 0x12,
 		.bit = 0x00,
-	},
+	}, /* ELD1 */
 	{
 		.address = 0x20,
 		.reg = 0x12,
 		.bit = 0x01,
-	},
+	}, /* ELD2 */
 	{
 		.address = 0x24,
 		.reg = 0x12,
 		.bit = 0x02,
-	},
+	}, /* ELD3 */
 	{
 		.address = 0x28,
 		.reg = 0x13,
 		.bit = 0x02,
-	},
+	}, /* FLD1 */
 	{
 		.address = 0x2c,
 		.reg = 0x13,
 		.bit = 0x03,
-	},
+	}, /* FLD2 */
 	{
 		.address = 0x30,
 		.reg = 0x13,
 		.bit = 0x04,
-	},
+	}, /* FLD3 */
 	{
-		.address = 0x38,
+		.address = 0x34,
 		.reg = 0x10,
 		.bit = 0x03,
-	},
+	}, /* BUC1 */
 	{
-		.address = 0x3c,
+		.address = 0x38,
 		.reg = 0x10,
 		.bit = 0x06,
-	},
+	}, /* BUC2 */
 	{
-		.address = 0x40,
+		.address = 0x3c,
 		.reg = 0x10,
 		.bit = 0x05,
-	},
+	}, /* BUC3 */
 	{
-		.address = 0x44,
+		.address = 0x40,
 		.reg = 0x10,
 		.bit = 0x04,
-	},
+	}, /* BUC4 */
 	{
-		.address = 0x48,
+		.address = 0x44,
 		.reg = 0x10,
 		.bit = 0x01,
-	},
+	}, /* BUC5 */
 	{
-		.address = 0x4c,
+		.address = 0x48,
 		.reg = 0x10,
 		.bit = 0x00
-	},
+	}, /* BUC6 */
 };
 
 /* TMP0 - TMP5 are the same, all from GPADC */
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 1c2b846c5776..3a6c9b741b23 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -864,6 +864,16 @@ void acpi_resume_power_resources(void)
 
 		mutex_unlock(&resource->resource_lock);
 	}
+
+	mutex_unlock(&power_resource_list_lock);
+}
+
+void acpi_turn_off_unused_power_resources(void)
+{
+	struct acpi_power_resource *resource;
+
+	mutex_lock(&power_resource_list_lock);
+
 	list_for_each_entry_reverse(resource, &acpi_power_resource_list, list_node) {
 		int result, state;
 
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index e39ec7b7cb67..d53162997f32 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1371,8 +1371,8 @@ int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr)
 	iort_set_dma_mask(dev);
 
 	iommu = iort_iommu_configure(dev);
-	if (IS_ERR(iommu))
-		return PTR_ERR(iommu);
+	if (IS_ERR(iommu) && PTR_ERR(iommu) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
 
 	size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
 	/*
@@ -1428,6 +1428,37 @@ static void acpi_init_coherency(struct acpi_device *adev)
 	adev->flags.coherent_dma = cca;
 }
 
+static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data)
+{
+	bool *is_spi_i2c_slave_p = data;
+
+	if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
+		return 1;
+
+	/*
+	 * devices that are connected to UART still need to be enumerated to
+	 * platform bus
+	 */
+	if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART)
+		*is_spi_i2c_slave_p = true;
+
+	 /* no need to do more checking */
+	return -1;
+}
+
+static bool acpi_is_spi_i2c_slave(struct acpi_device *device)
+{
+	struct list_head resource_list;
+	bool is_spi_i2c_slave = false;
+
+	INIT_LIST_HEAD(&resource_list);
+	acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
+			       &is_spi_i2c_slave);
+	acpi_dev_free_resource_list(&resource_list);
+
+	return is_spi_i2c_slave;
+}
+
 void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
 			     int type, unsigned long long sta)
 {
@@ -1443,6 +1474,7 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
 	acpi_bus_get_flags(device);
 	device->flags.match_driver = false;
 	device->flags.initialized = true;
+	device->flags.spi_i2c_slave = acpi_is_spi_i2c_slave(device);
 	acpi_device_clear_enumerated(device);
 	device_initialize(&device->dev);
 	dev_set_uevent_suppress(&device->dev, true);
@@ -1727,38 +1759,13 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used,
 	return AE_OK;
 }
 
-static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data)
-{
-	bool *is_spi_i2c_slave_p = data;
-
-	if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
-		return 1;
-
-	/*
-	 * devices that are connected to UART still need to be enumerated to
-	 * platform bus
-	 */
-	if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART)
-		*is_spi_i2c_slave_p = true;
-
-	 /* no need to do more checking */
-	return -1;
-}
-
 static void acpi_default_enumeration(struct acpi_device *device)
 {
-	struct list_head resource_list;
-	bool is_spi_i2c_slave = false;
-
 	/*
 	 * Do not enumerate SPI/I2C slaves as they will be enumerated by their
 	 * respective parents.
 	 */
-	INIT_LIST_HEAD(&resource_list);
-	acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
-			       &is_spi_i2c_slave);
-	acpi_dev_free_resource_list(&resource_list);
-	if (!is_spi_i2c_slave) {
+	if (!device->flags.spi_i2c_slave) {
 		acpi_create_platform_device(device, NULL);
 		acpi_device_set_enumerated(device);
 	} else {
@@ -1854,7 +1861,7 @@ static void acpi_bus_attach(struct acpi_device *device)
 		return;
 
 	device->flags.match_driver = true;
-	if (ret > 0) {
+	if (ret > 0 && !device->flags.spi_i2c_slave) {
 		acpi_device_set_enumerated(device);
 		goto ok;
 	}
@@ -1863,10 +1870,10 @@ static void acpi_bus_attach(struct acpi_device *device)
 	if (ret < 0)
 		return;
 
-	if (device->pnp.type.platform_id)
-		acpi_default_enumeration(device);
-	else
+	if (!device->pnp.type.platform_id && !device->flags.spi_i2c_slave)
 		acpi_device_set_enumerated(device);
+	else
+		acpi_default_enumeration(device);
 
  ok:
 	list_for_each_entry(child, &device->children, node)
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index a4327af676fe..097d630ab886 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -474,6 +474,7 @@ static void acpi_pm_start(u32 acpi_state)
  */
 static void acpi_pm_end(void)
 {
+	acpi_turn_off_unused_power_resources();
 	acpi_scan_lock_release();
 	/*
 	 * This is necessary in case acpi_pm_finish() is not called during a
diff --git a/drivers/acpi/sleep.h b/drivers/acpi/sleep.h
index a9cc34e663f9..a82ff74faf7a 100644
--- a/drivers/acpi/sleep.h
+++ b/drivers/acpi/sleep.h
@@ -6,6 +6,7 @@ extern struct list_head acpi_wakeup_device_list;
 extern struct mutex acpi_device_lock;
 
 extern void acpi_resume_power_resources(void);
+extern void acpi_turn_off_unused_power_resources(void);
 
 static inline acpi_status acpi_set_waking_vector(u32 wakeup_address)
 {
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 1b5ee1e0e5a3..e414fabf7315 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -333,14 +333,17 @@ static ssize_t acpi_table_show(struct file *filp, struct kobject *kobj,
 	    container_of(bin_attr, struct acpi_table_attr, attr);
 	struct acpi_table_header *table_header = NULL;
 	acpi_status status;
+	ssize_t rc;
 
 	status = acpi_get_table(table_attr->name, table_attr->instance,
 				&table_header);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 
-	return memory_read_from_buffer(buf, count, &offset,
-				       table_header, table_header->length);
+	rc = memory_read_from_buffer(buf, count, &offset, table_header,
+			table_header->length);
+	acpi_put_table(table_header);
+	return rc;
 }
 
 static int acpi_table_attr_init(struct kobject *tables_obj,
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 27d0dcfcf47d..b9d956c916f5 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -613,19 +613,19 @@ acpi_status acpi_evaluate_lck(acpi_handle handle, int lock)
 /**
  * acpi_evaluate_dsm - evaluate device's _DSM method
  * @handle: ACPI device handle
- * @uuid: UUID of requested functions, should be 16 bytes
+ * @guid: GUID of requested functions, should be 16 bytes
  * @rev: revision number of requested function
  * @func: requested function number
  * @argv4: the function specific parameter
  *
- * Evaluate device's _DSM method with specified UUID, revision id and
+ * Evaluate device's _DSM method with specified GUID, revision id and
  * function number. Caller needs to free the returned object.
  *
  * Though ACPI defines the fourth parameter for _DSM should be a package,
  * some old BIOSes do expect a buffer or an integer etc.
  */
 union acpi_object *
-acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
+acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func,
 		  union acpi_object *argv4)
 {
 	acpi_status ret;
@@ -638,7 +638,7 @@ acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
 
 	params[0].type = ACPI_TYPE_BUFFER;
 	params[0].buffer.length = 16;
-	params[0].buffer.pointer = (char *)uuid;
+	params[0].buffer.pointer = (u8 *)guid;
 	params[1].type = ACPI_TYPE_INTEGER;
 	params[1].integer.value = rev;
 	params[2].type = ACPI_TYPE_INTEGER;
@@ -666,7 +666,7 @@ EXPORT_SYMBOL(acpi_evaluate_dsm);
 /**
  * acpi_check_dsm - check if _DSM method supports requested functions.
  * @handle: ACPI device handle
- * @uuid: UUID of requested functions, should be 16 bytes at least
+ * @guid: GUID of requested functions, should be 16 bytes at least
  * @rev: revision number of requested functions
  * @funcs: bitmap of requested functions
  *
@@ -674,7 +674,7 @@ EXPORT_SYMBOL(acpi_evaluate_dsm);
  * functions. Currently only support 64 functions at maximum, should be
  * enough for now.
  */
-bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
+bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs)
 {
 	int i;
 	u64 mask = 0;
@@ -683,7 +683,7 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 	if (funcs == 0)
 		return false;
 
-	obj = acpi_evaluate_dsm(handle, uuid, rev, 0, NULL);
+	obj = acpi_evaluate_dsm(handle, guid, rev, 0, NULL);
 	if (!obj)
 		return false;
 
@@ -697,7 +697,7 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 
 	/*
 	 * Bit 0 indicates whether there's support for any functions other than
-	 * function 0 for the specified UUID and revision.
+	 * function 0 for the specified GUID and revision.
 	 */
 	if ((mask & 0x1) && (mask & funcs) == funcs)
 		return true;
diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
new file mode 100644
index 000000000000..bd86b809c848
--- /dev/null
+++ b/drivers/acpi/x86/utils.c
@@ -0,0 +1,90 @@
+/*
+ * X86 ACPI Utility Functions
+ *
+ * Copyright (C) 2017 Hans de Goede <hdegoede@redhat.com>
+ *
+ * Based on various non upstream patches to support the CHT Whiskey Cove PMIC:
+ * Copyright (C) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include "../internal.h"
+
+/*
+ * Some ACPI devices are hidden (status == 0x0) in recent BIOS-es because
+ * some recent Windows drivers bind to one device but poke at multiple
+ * devices at the same time, so the others get hidden.
+ * We work around this by always reporting ACPI_STA_DEFAULT for these
+ * devices. Note this MUST only be done for devices where this is safe.
+ *
+ * This forcing of devices to be present is limited to specific CPU (SoC)
+ * models both to avoid potentially causing trouble on other models and
+ * because some HIDs are re-used on different SoCs for completely
+ * different devices.
+ */
+struct always_present_id {
+	struct acpi_device_id hid[2];
+	struct x86_cpu_id cpu_ids[2];
+	const char *uid;
+};
+
+#define ICPU(model)	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
+
+#define ENTRY(hid, uid, cpu_models) {					\
+	{ { hid, }, {} },						\
+	{ cpu_models, {} },						\
+	uid,								\
+}
+
+static const struct always_present_id always_present_ids[] = {
+	/*
+	 * Bay / Cherry Trail PWM directly poked by GPU driver in win10,
+	 * but Linux uses a separate PWM driver, harmless if not used.
+	 */
+	ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT1)),
+	ENTRY("80862288", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT)),
+	/*
+	 * The INT0002 device is necessary to clear wakeup interrupt sources
+	 * on Cherry Trail devices, without it we get nobody cared IRQ msgs.
+	 */
+	ENTRY("INT0002", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT)),
+};
+
+bool acpi_device_always_present(struct acpi_device *adev)
+{
+	u32 *status = (u32 *)&adev->status;
+	u32 old_status = *status;
+	bool ret = false;
+	unsigned int i;
+
+	/* acpi_match_device_ids checks status, so set it to default */
+	*status = ACPI_STA_DEFAULT;
+	for (i = 0; i < ARRAY_SIZE(always_present_ids); i++) {
+		if (acpi_match_device_ids(adev, always_present_ids[i].hid))
+			continue;
+
+		if (!adev->pnp.unique_id ||
+		    strcmp(adev->pnp.unique_id, always_present_ids[i].uid))
+			continue;
+
+		if (!x86_match_cpu(always_present_ids[i].cpu_ids))
+			continue;
+
+		if (old_status != ACPI_STA_DEFAULT) /* Log only once */
+			dev_info(&adev->dev,
+				 "Device [%s] is in always present list\n",
+				 adev->pnp.bus_id);
+
+		ret = true;
+		break;
+	}
+	*status = old_status;
+
+	return ret;
+}
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 2fc52407306c..c69954023c2e 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1364,6 +1364,40 @@ static inline void ahci_gtf_filter_workaround(struct ata_host *host)
 {}
 #endif
 
+/*
+ * On the Acer Aspire Switch Alpha 12, sometimes all SATA ports are detected
+ * as DUMMY, or detected but eventually get a "link down" and never get up
+ * again. When this happens, CAP.NP may hold a value of 0x00 or 0x01, and the
+ * port_map may hold a value of 0x00.
+ *
+ * Overriding CAP.NP to 0x02 and the port_map to 0x7 will reveal all 3 ports
+ * and can significantly reduce the occurrence of the problem.
+ *
+ * https://bugzilla.kernel.org/show_bug.cgi?id=189471
+ */
+static void acer_sa5_271_workaround(struct ahci_host_priv *hpriv,
+				    struct pci_dev *pdev)
+{
+	static const struct dmi_system_id sysids[] = {
+		{
+			.ident = "Acer Switch Alpha 12",
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+				DMI_MATCH(DMI_PRODUCT_NAME, "Switch SA5-271")
+			},
+		},
+		{ }
+	};
+
+	if (dmi_check_system(sysids)) {
+		dev_info(&pdev->dev, "enabling Acer Switch Alpha 12 workaround\n");
+		if ((hpriv->saved_cap & 0xC734FF00) == 0xC734FF00) {
+			hpriv->port_map = 0x7;
+			hpriv->cap = 0xC734FF02;
+		}
+	}
+}
+
 #ifdef CONFIG_ARM64
 /*
  * Due to ERRATA#22536, ThunderX needs to handle HOST_IRQ_STAT differently.
@@ -1636,6 +1670,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			 "online status unreliable, applying workaround\n");
 	}
 
+
+	/* Acer SA5-271 workaround modifies private_data */
+	acer_sa5_271_workaround(hpriv, pdev);
+
 	/* CAP.NP sometimes indicate the index of the last enabled
 	 * port, at other times, that of the last possible port, so
 	 * determining the maximum port number requires looking at
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index aaa761b9081c..cd2eab6aa92e 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -514,8 +514,9 @@ int ahci_platform_init_host(struct platform_device *pdev,
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq <= 0) {
-		dev_err(dev, "no irq\n");
-		return -EINVAL;
+		if (irq != -EPROBE_DEFER)
+			dev_err(dev, "no irq\n");
+		return irq;
 	}
 
 	hpriv->irq = irq;
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 2d83b8c75965..e157a0e44419 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -6800,7 +6800,7 @@ static int __init ata_parse_force_one(char **cur,
 	}
 
 	force_ent->port = simple_strtoul(id, &endp, 10);
-	if (p == endp || *endp != '\0') {
+	if (id == endp || *endp != '\0') {
 		*reason = "invalid port/link";
 		return -EINVAL;
 	}
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index b66bcda88320..3b2246dded74 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -4067,7 +4067,6 @@ static int mv_platform_probe(struct platform_device *pdev)
 	struct ata_host *host;
 	struct mv_host_priv *hpriv;
 	struct resource *res;
-	void __iomem *mmio;
 	int n_ports = 0, irq = 0;
 	int rc;
 	int port;
@@ -4086,9 +4085,8 @@ static int mv_platform_probe(struct platform_device *pdev)
 	 * Get the register base first
 	 */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	mmio = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(mmio))
-		return PTR_ERR(mmio);
+	if (res == NULL)
+		return -EINVAL;
 
 	/* allocate host */
 	if (pdev->dev.of_node) {
@@ -4132,7 +4130,12 @@ static int mv_platform_probe(struct platform_device *pdev)
 	hpriv->board_idx = chip_soc;
 
 	host->iomap = NULL;
-	hpriv->base = mmio - SATAHC0_REG_BASE;
+	hpriv->base = devm_ioremap(&pdev->dev, res->start,
+				   resource_size(res));
+	if (!hpriv->base)
+		return -ENOMEM;
+
+	hpriv->base -= SATAHC0_REG_BASE;
 
 	hpriv->clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(hpriv->clk))
diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
index 5d38245a7a73..b7939a2c1fab 100644
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c
@@ -890,7 +890,10 @@ static int sata_rcar_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "failed to get access to sata clock\n");
 		return PTR_ERR(priv->clk);
 	}
-	clk_prepare_enable(priv->clk);
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		return ret;
 
 	host = ata_host_alloc(&pdev->dev, 1);
 	if (!host) {
@@ -970,8 +973,11 @@ static int sata_rcar_resume(struct device *dev)
 	struct ata_host *host = dev_get_drvdata(dev);
 	struct sata_rcar_priv *priv = host->private_data;
 	void __iomem *base = priv->base;
+	int ret;
 
-	clk_prepare_enable(priv->clk);
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		return ret;
 
 	/* ack and mask */
 	iowrite32(0, base + SATAINTSTAT_REG);
@@ -988,8 +994,11 @@ static int sata_rcar_restore(struct device *dev)
 {
 	struct ata_host *host = dev_get_drvdata(dev);
 	struct sata_rcar_priv *priv = host->private_data;
+	int ret;
 
-	clk_prepare_enable(priv->clk);
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		return ret;
 
 	sata_rcar_setup_port(host);
 
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 5548f9686016..0440d95c9b5b 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -377,7 +377,7 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
 	if (!pfn_valid_within(pfn))
 		return -1;
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-	if (system_state == SYSTEM_BOOTING)
+	if (system_state < SYSTEM_RUNNING)
 		return early_pfn_to_nid(pfn);
 #endif
 	page = pfn_to_page(pfn);
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 136854970489..c313b600d356 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -525,12 +525,6 @@ static void wakeup_source_activate(struct wakeup_source *ws)
 			"unregistered wakeup source\n"))
 		return;
 
-	/*
-	 * active wakeup source should bring the system
-	 * out of PM_SUSPEND_FREEZE state
-	 */
-	freeze_wake();
-
 	ws->active = true;
 	ws->active_count++;
 	ws->last_time = ktime_get();
@@ -546,8 +540,9 @@ static void wakeup_source_activate(struct wakeup_source *ws)
 /**
  * wakeup_source_report_event - Report wakeup event using the given source.
  * @ws: Wakeup source to report the event for.
+ * @hard: If set, abort suspends in progress and wake up from suspend-to-idle.
  */
-static void wakeup_source_report_event(struct wakeup_source *ws)
+static void wakeup_source_report_event(struct wakeup_source *ws, bool hard)
 {
 	ws->event_count++;
 	/* This is racy, but the counter is approximate anyway. */
@@ -556,6 +551,9 @@ static void wakeup_source_report_event(struct wakeup_source *ws)
 
 	if (!ws->active)
 		wakeup_source_activate(ws);
+
+	if (hard)
+		pm_system_wakeup();
 }
 
 /**
@@ -573,7 +571,7 @@ void __pm_stay_awake(struct wakeup_source *ws)
 
 	spin_lock_irqsave(&ws->lock, flags);
 
-	wakeup_source_report_event(ws);
+	wakeup_source_report_event(ws, false);
 	del_timer(&ws->timer);
 	ws->timer_expires = 0;
 
@@ -739,9 +737,10 @@ static void pm_wakeup_timer_fn(unsigned long data)
 }
 
 /**
- * __pm_wakeup_event - Notify the PM core of a wakeup event.
+ * pm_wakeup_ws_event - Notify the PM core of a wakeup event.
  * @ws: Wakeup source object associated with the event source.
  * @msec: Anticipated event processing time (in milliseconds).
+ * @hard: If set, abort suspends in progress and wake up from suspend-to-idle.
  *
  * Notify the PM core of a wakeup event whose source is @ws that will take
  * approximately @msec milliseconds to be processed by the kernel.  If @ws is
@@ -750,7 +749,7 @@ static void pm_wakeup_timer_fn(unsigned long data)
  *
  * It is safe to call this function from interrupt context.
  */
-void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
+void pm_wakeup_ws_event(struct wakeup_source *ws, unsigned int msec, bool hard)
 {
 	unsigned long flags;
 	unsigned long expires;
@@ -760,7 +759,7 @@ void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
 
 	spin_lock_irqsave(&ws->lock, flags);
 
-	wakeup_source_report_event(ws);
+	wakeup_source_report_event(ws, hard);
 
 	if (!msec) {
 		wakeup_source_deactivate(ws);
@@ -779,17 +778,17 @@ void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
  unlock:
 	spin_unlock_irqrestore(&ws->lock, flags);
 }
-EXPORT_SYMBOL_GPL(__pm_wakeup_event);
-
+EXPORT_SYMBOL_GPL(pm_wakeup_ws_event);
 
 /**
  * pm_wakeup_event - Notify the PM core of a wakeup event.
  * @dev: Device the wakeup event is related to.
  * @msec: Anticipated event processing time (in milliseconds).
+ * @hard: If set, abort suspends in progress and wake up from suspend-to-idle.
  *
- * Call __pm_wakeup_event() for the @dev's wakeup source object.
+ * Call pm_wakeup_ws_event() for the @dev's wakeup source object.
  */
-void pm_wakeup_event(struct device *dev, unsigned int msec)
+void pm_wakeup_dev_event(struct device *dev, unsigned int msec, bool hard)
 {
 	unsigned long flags;
 
@@ -797,10 +796,10 @@ void pm_wakeup_event(struct device *dev, unsigned int msec)
 		return;
 
 	spin_lock_irqsave(&dev->power.lock, flags);
-	__pm_wakeup_event(dev->power.wakeup, msec);
+	pm_wakeup_ws_event(dev->power.wakeup, msec, hard);
 	spin_unlock_irqrestore(&dev->power.lock, flags);
 }
-EXPORT_SYMBOL_GPL(pm_wakeup_event);
+EXPORT_SYMBOL_GPL(pm_wakeup_dev_event);
 
 void pm_print_active_wakeup_sources(void)
 {
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 26a51be77227..245a879b036e 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3464,7 +3464,7 @@ static inline bool DAC960_ProcessCompletedRequest(DAC960_Command_T *Command,
 						 bool SuccessfulIO)
 {
 	struct request *Request = Command->Request;
-	int Error = SuccessfulIO ? 0 : -EIO;
+	blk_status_t Error = SuccessfulIO ? BLK_STS_OK : BLK_STS_IOERR;
 
 	pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist,
 		Command->SegmentCount, Command->DmaDirection);
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a328f673adfe..49908c74bfcb 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1378,7 +1378,7 @@ static void redo_fd_request(void)
 	struct amiga_floppy_struct *floppy;
 	char *data;
 	unsigned long flags;
-	int err;
+	blk_status_t err;
 
 next_req:
 	rq = set_next_request();
@@ -1392,7 +1392,7 @@ next_req:
 
 next_segment:
 	/* Here someone could investigate to be more efficient */
-	for (cnt = 0, err = 0; cnt < blk_rq_cur_sectors(rq); cnt++) {
+	for (cnt = 0, err = BLK_STS_OK; cnt < blk_rq_cur_sectors(rq); cnt++) {
 #ifdef DEBUG
 		printk("fd: sector %ld + %d requested for %s\n",
 		       blk_rq_pos(rq), cnt,
@@ -1400,7 +1400,7 @@ next_segment:
 #endif
 		block = blk_rq_pos(rq) + cnt;
 		if ((int)block > floppy->blocks) {
-			err = -EIO;
+			err = BLK_STS_IOERR;
 			break;
 		}
 
@@ -1413,7 +1413,7 @@ next_segment:
 #endif
 
 		if (get_track(drive, track) == -1) {
-			err = -EIO;
+			err = BLK_STS_IOERR;
 			break;
 		}
 
@@ -1424,7 +1424,7 @@ next_segment:
 
 			/* keep the drive spinning while writes are scheduled */
 			if (!fd_motor_on(drive)) {
-				err = -EIO;
+				err = BLK_STS_IOERR;
 				break;
 			}
 			/*
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 027b876370bc..6797e6c23c8a 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -388,6 +388,7 @@ aoeblk_gdalloc(void *vp)
 			d->aoemajor, d->aoeminor);
 		goto err_mempool;
 	}
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 
 	spin_lock_irqsave(&d->lock, flags);
 	WARN_ON(!(d->flags & DEVFL_GD_NOW));
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 3c606c09fd5a..dc43254e05a4 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1070,8 +1070,8 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
 		d->ip.rq = NULL;
 	do {
 		bio = rq->bio;
-		bok = !fastfail && !bio->bi_error;
-	} while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
+		bok = !fastfail && !bio->bi_status;
+	} while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
 
 	/* cf. http://lkml.org/lkml/2006/10/31/28 */
 	if (!fastfail)
@@ -1131,7 +1131,7 @@ ktiocomplete(struct frame *f)
 			ahout->cmdstat, ahin->cmdstat,
 			d->aoemajor, d->aoeminor);
 noskb:		if (buf)
-			buf->bio->bi_error = -EIO;
+			buf->bio->bi_status = BLK_STS_IOERR;
 		goto out;
 	}
 
@@ -1144,7 +1144,7 @@ noskb:		if (buf)
 				"aoe: runt data size in read from",
 				(long) d->aoemajor, d->aoeminor,
 			       skb->len, n);
-			buf->bio->bi_error = -EIO;
+			buf->bio->bi_status = BLK_STS_IOERR;
 			break;
 		}
 		if (n > f->iter.bi_size) {
@@ -1152,7 +1152,7 @@ noskb:		if (buf)
 				"aoe: too-large data size in read from",
 				(long) d->aoemajor, d->aoeminor,
 				n, f->iter.bi_size);
-			buf->bio->bi_error = -EIO;
+			buf->bio->bi_status = BLK_STS_IOERR;
 			break;
 		}
 		bvcpy(skb, f->buf->bio, f->iter, n);
@@ -1654,7 +1654,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
 	if (buf == NULL)
 		return;
 	buf->iter.bi_size = 0;
-	buf->bio->bi_error = -EIO;
+	buf->bio->bi_status = BLK_STS_IOERR;
 	if (buf->nframesout == 0)
 		aoe_end_buf(d, buf);
 }
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index ffd1947500c6..b28fefb90391 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -170,7 +170,7 @@ aoe_failip(struct aoedev *d)
 	if (rq == NULL)
 		return;
 	while ((bio = d->ip.nxbio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		d->ip.nxbio = bio->bi_next;
 		n = (unsigned long) rq->special;
 		rq->special = (void *) --n;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index fa69ecd52cb5..92da886180aa 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -378,7 +378,7 @@ static DEFINE_TIMER(readtrack_timer, fd_readtrack_check, 0, 0);
 static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0);
 static DEFINE_TIMER(fd_timer, check_change, 0, 0);
 	
-static void fd_end_request_cur(int err)
+static void fd_end_request_cur(blk_status_t err)
 {
 	if (!__blk_end_request_cur(fd_request, err))
 		fd_request = NULL;
@@ -620,7 +620,7 @@ static void fd_error( void )
 	fd_request->error_count++;
 	if (fd_request->error_count >= MAX_ERRORS) {
 		printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
-		fd_end_request_cur(-EIO);
+		fd_end_request_cur(BLK_STS_IOERR);
 	}
 	else if (fd_request->error_count == RECALIBRATE_ERRORS) {
 		printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
@@ -739,7 +739,7 @@ static void do_fd_action( int drive )
 		    }
 		    else {
 			/* all sectors finished */
-			fd_end_request_cur(0);
+			fd_end_request_cur(BLK_STS_OK);
 			redo_fd_request();
 			return;
 		    }
@@ -1144,7 +1144,7 @@ static void fd_rwsec_done1(int status)
 	}
 	else {
 		/* all sectors finished */
-		fd_end_request_cur(0);
+		fd_end_request_cur(BLK_STS_OK);
 		redo_fd_request();
 	}
 	return;
@@ -1445,7 +1445,7 @@ repeat:
 	if (!UD.connected) {
 		/* drive not connected */
 		printk(KERN_ERR "Unknown Device: fd%d\n", drive );
-		fd_end_request_cur(-EIO);
+		fd_end_request_cur(BLK_STS_IOERR);
 		goto repeat;
 	}
 		
@@ -1461,12 +1461,12 @@ repeat:
 		/* user supplied disk type */
 		if (--type >= NUM_DISK_MINORS) {
 			printk(KERN_WARNING "fd%d: invalid disk format", drive );
-			fd_end_request_cur(-EIO);
+			fd_end_request_cur(BLK_STS_IOERR);
 			goto repeat;
 		}
 		if (minor2disktype[type].drive_types > DriveType)  {
 			printk(KERN_WARNING "fd%d: unsupported disk format", drive );
-			fd_end_request_cur(-EIO);
+			fd_end_request_cur(BLK_STS_IOERR);
 			goto repeat;
 		}
 		type = minor2disktype[type].index;
@@ -1476,7 +1476,7 @@ repeat:
 	}
 	
 	if (blk_rq_pos(fd_request) + 1 > UDT->blocks) {
-		fd_end_request_cur(-EIO);
+		fd_end_request_cur(BLK_STS_IOERR);
 		goto repeat;
 	}
 
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 57b574f2f66a..6112e99bedf7 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -418,7 +418,6 @@ static struct brd_device *brd_alloc(int i)
 
 	blk_queue_make_request(brd->brd_queue, brd_make_request);
 	blk_queue_max_hw_sectors(brd->brd_queue, 1024);
-	blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
 
 	/* This is so fdisk will align partitions on 4k, because of
 	 * direct_access API needing 4k alignment, returning a PFN
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index cd375503f7b0..02a611993bb4 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1864,7 +1864,8 @@ static void cciss_softirq_done(struct request *rq)
 	/* set the residual count for pc requests */
 	if (blk_rq_is_passthrough(rq))
 		scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
-	blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0);
+	blk_end_request_all(rq, scsi_req(rq)->result ?
+			BLK_STS_IOERR : BLK_STS_OK);
 
 	spin_lock_irqsave(&h->lock, flags);
 	cmd_free(h, c);
@@ -1956,6 +1957,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 	disk->queue->cmd_size = sizeof(struct scsi_request);
 	disk->queue->request_fn = do_cciss_request;
 	disk->queue->queue_lock = &h->lock;
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
 	if (blk_init_allocated_queue(disk->queue) < 0)
 		goto cleanup_queue;
 
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 8d7bcfa49c12..e02c45cd3c5a 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -178,7 +178,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
 	else
 		submit_bio(bio);
 	wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		err = device->md_io.error;
 
  out:
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index a804a4107fbc..809fd245c3dc 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -959,16 +959,16 @@ static void drbd_bm_endio(struct bio *bio)
 	    !bm_test_page_unchanged(b->bm_pages[idx]))
 		drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		/* ctx error will hold the completed-last non-zero error code,
 		 * in case error codes differ. */
-		ctx->error = bio->bi_error;
+		ctx->error = blk_status_to_errno(bio->bi_status);
 		bm_set_page_io_err(b->bm_pages[idx]);
 		/* Not identical to on disk version of it.
 		 * Is BM_PAGE_IO_ERROR enough? */
 		if (__ratelimit(&drbd_ratelimit_state))
 			drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
-					bio->bi_error, idx);
+					bio->bi_status, idx);
 	} else {
 		bm_clear_page_io_err(b->bm_pages[idx]);
 		dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d5da45bb03a6..d17b6e6393c7 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1441,6 +1441,9 @@ extern struct bio_set *drbd_md_io_bio_set;
 /* to allocate from that set */
 extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
 
+/* And a bio_set for cloning */
+extern struct bio_set *drbd_io_bio_set;
+
 extern struct mutex resources_mutex;
 
 extern int conn_lowest_minor(struct drbd_connection *connection);
@@ -1627,7 +1630,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
 	__release(local);
 	if (!bio->bi_bdev) {
 		drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
-		bio->bi_error = -ENODEV;
+		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		return;
 	}
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 84455c365f57..5fb99e06ebe4 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -128,6 +128,7 @@ mempool_t *drbd_request_mempool;
 mempool_t *drbd_ee_mempool;
 mempool_t *drbd_md_io_page_pool;
 struct bio_set *drbd_md_io_bio_set;
+struct bio_set *drbd_io_bio_set;
 
 /* I do not use a standard mempool, because:
    1) I want to hand out the pre-allocated objects first.
@@ -2098,6 +2099,8 @@ static void drbd_destroy_mempools(void)
 
 	/* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
 
+	if (drbd_io_bio_set)
+		bioset_free(drbd_io_bio_set);
 	if (drbd_md_io_bio_set)
 		bioset_free(drbd_md_io_bio_set);
 	if (drbd_md_io_page_pool)
@@ -2115,6 +2118,7 @@ static void drbd_destroy_mempools(void)
 	if (drbd_al_ext_cache)
 		kmem_cache_destroy(drbd_al_ext_cache);
 
+	drbd_io_bio_set      = NULL;
 	drbd_md_io_bio_set   = NULL;
 	drbd_md_io_page_pool = NULL;
 	drbd_ee_mempool      = NULL;
@@ -2142,6 +2146,7 @@ static int drbd_create_mempools(void)
 	drbd_pp_pool         = NULL;
 	drbd_md_io_page_pool = NULL;
 	drbd_md_io_bio_set   = NULL;
+	drbd_io_bio_set      = NULL;
 
 	/* caches */
 	drbd_request_cache = kmem_cache_create(
@@ -2165,7 +2170,13 @@ static int drbd_create_mempools(void)
 		goto Enomem;
 
 	/* mempools */
-	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
+	drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_RESCUER);
+	if (drbd_io_bio_set == NULL)
+		goto Enomem;
+
+	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
+					   BIOSET_NEED_BVECS |
+					   BIOSET_NEED_RESCUER);
 	if (drbd_md_io_bio_set == NULL)
 		goto Enomem;
 
@@ -2839,7 +2850,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 	/* Setting the max_hw_sectors to an odd value of 8kibyte here
 	   This triggers a max_bio_size message upon first attach or connect */
 	blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
-	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	q->queue_lock = &resource->req_lock;
 
 	device->md_io.page = alloc_page(GFP_KERNEL);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 02255a0d68b9..ad0fcb43e45c 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2294,7 +2294,7 @@ _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_
 static enum drbd_ret_code
 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
 {
-	static enum drbd_ret_code rv;
+	enum drbd_ret_code rv;
 	struct drbd_peer_device *peer_device;
 	int i;
 
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 1b0a2be24f39..c7e95e6380fb 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1229,9 +1229,9 @@ void one_flush_endio(struct bio *bio)
 	struct drbd_device *device = octx->device;
 	struct issue_flush_context *ctx = octx->ctx;
 
-	if (bio->bi_error) {
-		ctx->error = bio->bi_error;
-		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error);
+	if (bio->bi_status) {
+		ctx->error = blk_status_to_errno(bio->bi_status);
+		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
 	}
 	kfree(octx);
 	bio_put(bio);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index b5730e17b455..f6e865b2d543 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -203,7 +203,7 @@ void start_new_tl_epoch(struct drbd_connection *connection)
 void complete_master_bio(struct drbd_device *device,
 		struct bio_and_error *m)
 {
-	m->bio->bi_error = m->error;
+	m->bio->bi_status = errno_to_blk_status(m->error);
 	bio_endio(m->bio);
 	dec_ap_bio(device);
 }
@@ -315,24 +315,32 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
 }
 
 /* still holds resource->req_lock */
-static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
+static void drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
 {
 	struct drbd_device *device = req->device;
 	D_ASSERT(device, m || (req->rq_state & RQ_POSTPONED));
 
+	if (!put)
+		return;
+
 	if (!atomic_sub_and_test(put, &req->completion_ref))
-		return 0;
+		return;
 
 	drbd_req_complete(req, m);
 
+	/* local completion may still come in later,
+	 * we need to keep the req object around. */
+	if (req->rq_state & RQ_LOCAL_ABORTED)
+		return;
+
 	if (req->rq_state & RQ_POSTPONED) {
 		/* don't destroy the req object just yet,
 		 * but queue it for retry */
 		drbd_restart_request(req);
-		return 0;
+		return;
 	}
 
-	return 1;
+	kref_put(&req->kref, drbd_req_destroy);
 }
 
 static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
@@ -519,12 +527,8 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
 	if (req->i.waiting)
 		wake_up(&device->misc_wait);
 
-	if (c_put) {
-		if (drbd_req_put_completion_ref(req, m, c_put))
-			kref_put(&req->kref, drbd_req_destroy);
-	} else {
-		kref_put(&req->kref, drbd_req_destroy);
-	}
+	drbd_req_put_completion_ref(req, m, c_put);
+	kref_put(&req->kref, drbd_req_destroy);
 }
 
 static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req)
@@ -1153,7 +1157,7 @@ static void drbd_process_discard_req(struct drbd_request *req)
 
 	if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
 			GFP_NOIO, 0))
-		req->private_bio->bi_error = -EIO;
+		req->private_bio->bi_status = BLK_STS_IOERR;
 	bio_endio(req->private_bio);
 }
 
@@ -1221,7 +1225,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
 		/* only pass the error to the upper layers.
 		 * if user cannot handle io errors, that's not our business. */
 		drbd_err(device, "could not kmalloc() req\n");
-		bio->bi_error = -ENOMEM;
+		bio->bi_status = BLK_STS_RESOURCE;
 		bio_endio(bio);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1366,8 +1370,7 @@ nodata:
 	}
 
 out:
-	if (drbd_req_put_completion_ref(req, &m, 1))
-		kref_put(&req->kref, drbd_req_destroy);
+	drbd_req_put_completion_ref(req, &m, 1);
 	spin_unlock_irq(&resource->req_lock);
 
 	/* Even though above is a kref_put(), this is safe.
@@ -1557,7 +1560,7 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
 	struct drbd_device *device = (struct drbd_device *) q->queuedata;
 	unsigned long start_jif;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	start_jif = jiffies;
 
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index eb49e7f2da91..9e1866ab238f 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -263,7 +263,7 @@ enum drbd_req_state_bits {
 static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
 {
 	struct bio *bio;
-	bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
+	bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set);
 
 	req->private_bio = bio;
 
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 1afcb4e02d8d..1d8726a8df34 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -63,7 +63,7 @@ void drbd_md_endio(struct bio *bio)
 	struct drbd_device *device;
 
 	device = bio->bi_private;
-	device->md_io.error = bio->bi_error;
+	device->md_io.error = blk_status_to_errno(bio->bi_status);
 
 	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
 	 * to timeout on the lower level device, and eventually detach from it.
@@ -177,13 +177,13 @@ void drbd_peer_request_endio(struct bio *bio)
 	bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
 			  bio_op(bio) == REQ_OP_DISCARD;
 
-	if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
+	if (bio->bi_status && __ratelimit(&drbd_ratelimit_state))
 		drbd_warn(device, "%s: error=%d s=%llus\n",
 				is_write ? (is_discard ? "discard" : "write")
-					: "read", bio->bi_error,
+					: "read", bio->bi_status,
 				(unsigned long long)peer_req->i.sector);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		set_bit(__EE_WAS_ERROR, &peer_req->flags);
 
 	bio_put(bio); /* no need for the bio anymore */
@@ -243,16 +243,16 @@ void drbd_request_endio(struct bio *bio)
 		if (__ratelimit(&drbd_ratelimit_state))
 			drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
 
-		if (!bio->bi_error)
+		if (!bio->bi_status)
 			drbd_panic_after_delayed_completion_of_aborted_request(device);
 	}
 
 	/* to avoid recursion in __req_mod */
-	if (unlikely(bio->bi_error)) {
+	if (unlikely(bio->bi_status)) {
 		switch (bio_op(bio)) {
 		case REQ_OP_WRITE_ZEROES:
 		case REQ_OP_DISCARD:
-			if (bio->bi_error == -EOPNOTSUPP)
+			if (bio->bi_status == BLK_STS_NOTSUPP)
 				what = DISCARD_COMPLETED_NOTSUPP;
 			else
 				what = DISCARD_COMPLETED_WITH_ERROR;
@@ -272,7 +272,7 @@ void drbd_request_endio(struct bio *bio)
 	}
 
 	bio_put(req->private_bio);
-	req->private_bio = ERR_PTR(bio->bi_error);
+	req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));
 
 	/* not req_mod(), we need irqsave here! */
 	spin_lock_irqsave(&device->resource->req_lock, flags);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 60d4c7653178..ce823647a9c4 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2202,7 +2202,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
  * =============================
  */
 
-static void floppy_end_request(struct request *req, int error)
+static void floppy_end_request(struct request *req, blk_status_t error)
 {
 	unsigned int nr_sectors = current_count_sectors;
 	unsigned int drive = (unsigned long)req->rq_disk->private_data;
@@ -2263,7 +2263,7 @@ static void request_done(int uptodate)
 			DRWE->last_error_generation = DRS->generation;
 		}
 		spin_lock_irqsave(q->queue_lock, flags);
-		floppy_end_request(req, -EIO);
+		floppy_end_request(req, BLK_STS_IOERR);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 	}
 }
@@ -3780,9 +3780,9 @@ static void floppy_rb0_cb(struct bio *bio)
 	struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
 	int drive = cbdata->drive;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		pr_info("floppy: error %d while reading block 0\n",
-			bio->bi_error);
+			bio->bi_status);
 		set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
 	}
 	complete(&cbdata->complete);
@@ -4203,6 +4203,7 @@ static int __init do_floppy_init(void)
 			goto out_put_disk;
 		}
 
+		blk_queue_bounce_limit(disks[drive]->queue, BLK_BOUNCE_HIGH);
 		blk_queue_max_hw_sectors(disks[drive]->queue, 64);
 		disks[drive]->major = FLOPPY_MAJOR;
 		disks[drive]->first_minor = TOMINOR(drive);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 28d932906f24..0de11444e317 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -221,7 +221,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
 }
 
 static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
+figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
+		 loff_t logical_blocksize)
 {
 	loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
 	sector_t x = (sector_t)size;
@@ -233,6 +234,12 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
 		lo->lo_offset = offset;
 	if (lo->lo_sizelimit != sizelimit)
 		lo->lo_sizelimit = sizelimit;
+	if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
+		lo->lo_logical_blocksize = logical_blocksize;
+		blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
+		blk_queue_logical_block_size(lo->lo_queue,
+					     lo->lo_logical_blocksize);
+	}
 	set_capacity(lo->lo_disk, x);
 	bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
 	/* let user-space know about the new size */
@@ -457,7 +464,7 @@ static void lo_complete_rq(struct request *rq)
 		zero_fill_bio(bio);
 	}
 
-	blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0);
+	blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
 }
 
 static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
@@ -608,6 +615,9 @@ static int loop_switch(struct loop_device *lo, struct file *file)
  */
 static int loop_flush(struct loop_device *lo)
 {
+	/* loop not yet configured, no running thread, nothing to flush */
+	if (lo->lo_state != Lo_bound)
+		return 0;
 	return loop_switch(lo, NULL);
 }
 
@@ -810,6 +820,7 @@ static void loop_config_discard(struct loop_device *lo)
 	struct file *file = lo->lo_backing_file;
 	struct inode *inode = file->f_mapping->host;
 	struct request_queue *q = lo->lo_queue;
+	int lo_bits = 9;
 
 	/*
 	 * We use punch hole to reclaim the free space used by the
@@ -829,8 +840,11 @@ static void loop_config_discard(struct loop_device *lo)
 
 	q->limits.discard_granularity = inode->i_sb->s_blocksize;
 	q->limits.discard_alignment = 0;
-	blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
-	blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
+	if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
+		lo_bits = blksize_bits(lo->lo_logical_blocksize);
+
+	blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits);
+	blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits);
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
 }
 
@@ -840,10 +854,16 @@ static void loop_unprepare_queue(struct loop_device *lo)
 	kthread_stop(lo->worker_task);
 }
 
+static int loop_kthread_worker_fn(void *worker_ptr)
+{
+	current->flags |= PF_LESS_THROTTLE;
+	return kthread_worker_fn(worker_ptr);
+}
+
 static int loop_prepare_queue(struct loop_device *lo)
 {
 	kthread_init_worker(&lo->worker);
-	lo->worker_task = kthread_run(kthread_worker_fn,
+	lo->worker_task = kthread_run(loop_kthread_worker_fn,
 			&lo->worker, "loop%d", lo->lo_number);
 	if (IS_ERR(lo->worker_task))
 		return -ENOMEM;
@@ -918,6 +938,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 
 	lo->use_dio = false;
 	lo->lo_blocksize = lo_blocksize;
+	lo->lo_logical_blocksize = 512;
 	lo->lo_device = bdev;
 	lo->lo_flags = lo_flags;
 	lo->lo_backing_file = file;
@@ -1083,6 +1104,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	int err;
 	struct loop_func_table *xfer;
 	kuid_t uid = current_uid();
+	int lo_flags = lo->lo_flags;
 
 	if (lo->lo_encrypt_key_size &&
 	    !uid_eq(lo->lo_key_owner, uid) &&
@@ -1115,12 +1137,30 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	if (err)
 		goto exit;
 
+	if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
+		if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE))
+			lo->lo_logical_blocksize = 512;
+		lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
+		if (LO_INFO_BLOCKSIZE(info) != 512 &&
+		    LO_INFO_BLOCKSIZE(info) != 1024 &&
+		    LO_INFO_BLOCKSIZE(info) != 2048 &&
+		    LO_INFO_BLOCKSIZE(info) != 4096)
+			return -EINVAL;
+		if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize)
+			return -EINVAL;
+	}
+
 	if (lo->lo_offset != info->lo_offset ||
-	    lo->lo_sizelimit != info->lo_sizelimit)
-		if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
+	    lo->lo_sizelimit != info->lo_sizelimit ||
+	    lo->lo_flags != lo_flags ||
+	    ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
+	     lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) {
+		if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
+				     LO_INFO_BLOCKSIZE(info))) {
 			err = -EFBIG;
 			goto exit;
 		}
+	}
 
 	loop_config_discard(lo);
 
@@ -1303,12 +1343,13 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
 	return err;
 }
 
-static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
+static int loop_set_capacity(struct loop_device *lo)
 {
 	if (unlikely(lo->lo_state != Lo_bound))
 		return -ENXIO;
 
-	return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
+	return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
+				lo->lo_logical_blocksize);
 }
 
 static int loop_set_dio(struct loop_device *lo, unsigned long arg)
@@ -1366,7 +1407,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 	case LOOP_SET_CAPACITY:
 		err = -EPERM;
 		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
-			err = loop_set_capacity(lo, bdev);
+			err = loop_set_capacity(lo);
 		break;
 	case LOOP_SET_DIRECT_IO:
 		err = -EPERM;
@@ -1642,7 +1683,7 @@ int loop_unregister_transfer(int number)
 EXPORT_SYMBOL(loop_register_transfer);
 EXPORT_SYMBOL(loop_unregister_transfer);
 
-static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -1651,7 +1692,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(bd->rq);
 
 	if (lo->lo_state != Lo_bound)
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 
 	switch (req_op(cmd->rq)) {
 	case REQ_OP_FLUSH:
@@ -1666,7 +1707,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	kthread_queue_work(&lo->worker, &cmd->work);
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void loop_handle_cmd(struct loop_cmd *cmd)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index fecd3f97ef8c..2c096b9a17b8 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -49,6 +49,7 @@ struct loop_device {
 	struct file *	lo_backing_file;
 	struct block_device *lo_device;
 	unsigned	lo_blocksize;
+	unsigned	lo_logical_blocksize;
 	void		*key_data; 
 
 	gfp_t		old_gfp_mask;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3a779a4f5653..61b046f256ca 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -532,7 +532,7 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
 static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
 						struct smart_attr *attrib);
 
-static void mtip_complete_command(struct mtip_cmd *cmd, int status)
+static void mtip_complete_command(struct mtip_cmd *cmd, blk_status_t status)
 {
 	struct request *req = blk_mq_rq_from_pdu(cmd);
 
@@ -568,7 +568,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
 		cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
 		dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
-		mtip_complete_command(cmd, -EIO);
+		mtip_complete_command(cmd, BLK_STS_IOERR);
 		return;
 	}
 
@@ -667,7 +667,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 					tag,
 					fail_reason != NULL ?
 						fail_reason : "unknown");
-					mtip_complete_command(cmd, -ENODATA);
+					mtip_complete_command(cmd, BLK_STS_MEDIUM);
 					continue;
 				}
 			}
@@ -690,7 +690,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 			dev_warn(&port->dd->pdev->dev,
 				"retiring tag %d\n", tag);
 
-			mtip_complete_command(cmd, -EIO);
+			mtip_complete_command(cmd, BLK_STS_IOERR);
 		}
 	}
 	print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
@@ -1063,23 +1063,10 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 	/* insert request and run queue */
 	blk_execute_rq(rq->q, NULL, rq, true);
 
-	rv = int_cmd->status;
-	if (rv < 0) {
-		if (rv == -ERESTARTSYS) { /* interrupted */
-			dev_err(&dd->pdev->dev,
-				"Internal command [%02X] was interrupted after %u ms\n",
-				fis->command,
-				jiffies_to_msecs(jiffies - start));
-			rv = -EINTR;
-			goto exec_ic_exit;
-		} else if (rv == 0) /* timeout */
-			dev_err(&dd->pdev->dev,
-				"Internal command did not complete [%02X] within timeout of  %lu ms\n",
-				fis->command, timeout);
-		else
-			dev_err(&dd->pdev->dev,
-				"Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n",
-				fis->command, rv, timeout);
+	if (int_cmd->status) {
+		dev_err(&dd->pdev->dev, "Internal command [%02X] failed %d\n",
+				fis->command, int_cmd->status);
+		rv = -EIO;
 
 		if (mtip_check_surprise_removal(dd->pdev) ||
 			test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
@@ -2753,7 +2740,7 @@ static void mtip_abort_cmd(struct request *req, void *data,
 	dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
 
 	clear_bit(req->tag, dd->port->cmds_to_issue);
-	cmd->status = -EIO;
+	cmd->status = BLK_STS_IOERR;
 	mtip_softirq_done_fn(req);
 }
 
@@ -3597,7 +3584,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
 		int err;
 
 		err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
-		blk_mq_end_request(rq, err);
+		blk_mq_end_request(rq, err ? BLK_STS_IOERR : BLK_STS_OK);
 		return 0;
 	}
 
@@ -3633,8 +3620,8 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
 	return false;
 }
 
-static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
-				   struct request *rq)
+static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
+		struct request *rq)
 {
 	struct driver_data *dd = hctx->queue->queuedata;
 	struct mtip_int_cmd *icmd = rq->special;
@@ -3642,7 +3629,7 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
 	struct mtip_cmd_sg *command_sg;
 
 	if (mtip_commands_active(dd->port))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	/* Populate the SG list */
 	cmd->command_header->opts =
@@ -3666,10 +3653,10 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(rq);
 	mtip_issue_non_ncq_command(dd->port, rq->tag);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return 0;
 }
 
-static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct request *rq = bd->rq;
@@ -3681,15 +3668,14 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
 		return mtip_issue_reserved_cmd(hctx, rq);
 
 	if (unlikely(mtip_check_unal_depth(hctx, rq)))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	blk_mq_start_request(rq);
 
 	ret = mtip_submit_request(hctx, rq);
 	if (likely(!ret))
-		return BLK_MQ_RQ_QUEUE_OK;
-
-	return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_OK;
+	return BLK_STS_IOERR;
 }
 
 static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq,
@@ -3730,7 +3716,7 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
 	if (reserved) {
 		struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 
-		cmd->status = -ETIME;
+		cmd->status = BLK_STS_TIMEOUT;
 		return BLK_EH_HANDLED;
 	}
 
@@ -3961,7 +3947,7 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
 {
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
-	cmd->status = -ENODEV;
+	cmd->status = BLK_STS_IOERR;
 	blk_mq_complete_request(rq);
 }
 
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 37b8e3e0bb78..e8286af50e16 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -342,7 +342,7 @@ struct mtip_cmd {
 	int retries; /* The number of retries left for this command. */
 
 	int direction; /* Data transfer direction */
-	int status;
+	blk_status_t status;
 };
 
 /* Structure used to describe a port. */
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 9a7bb2c29447..977ec960dd2f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -116,7 +116,7 @@ struct nbd_cmd {
 	int index;
 	int cookie;
 	struct completion send_complete;
-	int status;
+	blk_status_t status;
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -286,7 +286,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 	struct nbd_config *config;
 
 	if (!refcount_inc_not_zero(&nbd->config_refs)) {
-		cmd->status = -EIO;
+		cmd->status = BLK_STS_TIMEOUT;
 		return BLK_EH_HANDLED;
 	}
 
@@ -331,7 +331,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 				    "Connection timed out\n");
 	}
 	set_bit(NBD_TIMEDOUT, &config->runtime_flags);
-	cmd->status = -EIO;
+	cmd->status = BLK_STS_IOERR;
 	sock_shutdown(nbd);
 	nbd_config_put(nbd);
 
@@ -400,6 +400,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	unsigned long size = blk_rq_bytes(req);
 	struct bio *bio;
 	u32 type;
+	u32 nbd_cmd_flags = 0;
 	u32 tag = blk_mq_unique_tag(req);
 	int sent = nsock->sent, skip = 0;
 
@@ -429,6 +430,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 		return -EIO;
 	}
 
+	if (req->cmd_flags & REQ_FUA)
+		nbd_cmd_flags |= NBD_CMD_FLAG_FUA;
+
 	/* We did a partial send previously, and we at least sent the whole
 	 * request struct, so just go and send the rest of the pages in the
 	 * request.
@@ -442,7 +446,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	}
 	cmd->index = index;
 	cmd->cookie = nsock->cookie;
-	request.type = htonl(type);
+	request.type = htonl(type | nbd_cmd_flags);
 	if (type != NBD_CMD_FLUSH) {
 		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
 		request.len = htonl(size);
@@ -465,7 +469,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 				nsock->pending = req;
 				nsock->sent = sent;
 			}
-			return BLK_MQ_RQ_QUEUE_BUSY;
+			return BLK_STS_RESOURCE;
 		}
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
 			"Send control failed (result %d)\n", result);
@@ -506,7 +510,7 @@ send_pages:
 					 */
 					nsock->pending = req;
 					nsock->sent = sent;
-					return BLK_MQ_RQ_QUEUE_BUSY;
+					return BLK_STS_RESOURCE;
 				}
 				dev_err(disk_to_dev(nbd->disk),
 					"Send data failed (result %d)\n",
@@ -574,7 +578,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 	if (ntohl(reply.error)) {
 		dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
 			ntohl(reply.error));
-		cmd->status = -EIO;
+		cmd->status = BLK_STS_IOERR;
 		return cmd;
 	}
 
@@ -599,7 +603,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 				 */
 				if (nbd_disconnected(config) ||
 				    config->num_connections <= 1) {
-					cmd->status = -EIO;
+					cmd->status = BLK_STS_IOERR;
 					return cmd;
 				}
 				return ERR_PTR(-EIO);
@@ -651,7 +655,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
 	if (!blk_mq_request_started(req))
 		return;
 	cmd = blk_mq_rq_to_pdu(req);
-	cmd->status = -EIO;
+	cmd->status = BLK_STS_IOERR;
 	blk_mq_complete_request(req);
 }
 
@@ -740,7 +744,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
 		nbd_config_put(nbd);
 		return -EINVAL;
 	}
-	cmd->status = 0;
+	cmd->status = BLK_STS_OK;
 again:
 	nsock = config->socks[index];
 	mutex_lock(&nsock->tx_lock);
@@ -794,7 +798,7 @@ out:
 	return ret;
 }
 
-static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 			const struct blk_mq_queue_data *bd)
 {
 	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -818,13 +822,9 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	 * appropriate.
 	 */
 	ret = nbd_handle_cmd(cmd, hctx->queue_num);
-	if (ret < 0)
-		ret = BLK_MQ_RQ_QUEUE_ERROR;
-	if (!ret)
-		ret = BLK_MQ_RQ_QUEUE_OK;
 	complete(&cmd->send_complete);
 
-	return ret;
+	return ret < 0 ? BLK_STS_IOERR : BLK_STS_OK;
 }
 
 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
@@ -910,6 +910,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
 			continue;
 		}
 		sk_set_memalloc(sock->sk);
+		sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
 		atomic_inc(&config->recv_threads);
 		refcount_inc(&nbd->config_refs);
 		old = nsock->sock;
@@ -937,14 +938,6 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
 	return -ENOSPC;
 }
 
-/* Reset all properties of an NBD device */
-static void nbd_reset(struct nbd_device *nbd)
-{
-	nbd->config = NULL;
-	nbd->tag_set.timeout = 0;
-	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
-}
-
 static void nbd_bdev_reset(struct block_device *bdev)
 {
 	if (bdev->bd_openers > 1)
@@ -965,8 +958,12 @@ static void nbd_parse_flags(struct nbd_device *nbd)
 		set_disk_ro(nbd->disk, false);
 	if (config->flags & NBD_FLAG_SEND_TRIM)
 		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
-	if (config->flags & NBD_FLAG_SEND_FLUSH)
-		blk_queue_write_cache(nbd->disk->queue, true, false);
+	if (config->flags & NBD_FLAG_SEND_FLUSH) {
+		if (config->flags & NBD_FLAG_SEND_FUA)
+			blk_queue_write_cache(nbd->disk->queue, true, true);
+		else
+			blk_queue_write_cache(nbd->disk->queue, true, false);
+	}
 	else
 		blk_queue_write_cache(nbd->disk->queue, false, false);
 }
@@ -1029,7 +1026,11 @@ static void nbd_config_put(struct nbd_device *nbd)
 			}
 			kfree(config->socks);
 		}
-		nbd_reset(nbd);
+		kfree(nbd->config);
+		nbd->config = NULL;
+
+		nbd->tag_set.timeout = 0;
+		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
 
 		mutex_unlock(&nbd->config_lock);
 		nbd_put(nbd);
@@ -1075,6 +1076,7 @@ static int nbd_start_device(struct nbd_device *nbd)
 			return -ENOMEM;
 		}
 		sk_set_memalloc(config->socks[i]->sock->sk);
+		config->socks[i]->sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
 		atomic_inc(&config->recv_threads);
 		refcount_inc(&nbd->config_refs);
 		INIT_WORK(&args->work, recv_work);
@@ -1309,6 +1311,8 @@ static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
 		seq_puts(s, "NBD_FLAG_READ_ONLY\n");
 	if (flags & NBD_FLAG_SEND_FLUSH)
 		seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
+	if (flags & NBD_FLAG_SEND_FUA)
+		seq_puts(s, "NBD_FLAG_SEND_FUA\n");
 	if (flags & NBD_FLAG_SEND_TRIM)
 		seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
 
@@ -1483,7 +1487,6 @@ static int nbd_dev_add(int index)
 	disk->fops = &nbd_fops;
 	disk->private_data = nbd;
 	sprintf(disk->disk_name, "nbd%d", index);
-	nbd_reset(nbd);
 	add_disk(disk);
 	nbd_total_devices++;
 	return index;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index d946e1eeac8e..71f4422eba81 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -35,7 +35,8 @@ struct nullb {
 	struct request_queue *q;
 	struct gendisk *disk;
 	struct nvm_dev *ndev;
-	struct blk_mq_tag_set tag_set;
+	struct blk_mq_tag_set *tag_set;
+	struct blk_mq_tag_set __tag_set;
 	struct hrtimer timer;
 	unsigned int queue_depth;
 	spinlock_t lock;
@@ -50,6 +51,7 @@ static struct mutex lock;
 static int null_major;
 static int nullb_indexes;
 static struct kmem_cache *ppa_cache;
+static struct blk_mq_tag_set tag_set;
 
 enum {
 	NULL_IRQ_NONE		= 0,
@@ -109,7 +111,7 @@ static int bs = 512;
 module_param(bs, int, S_IRUGO);
 MODULE_PARM_DESC(bs, "Block size (in bytes)");
 
-static int nr_devices = 2;
+static int nr_devices = 1;
 module_param(nr_devices, int, S_IRUGO);
 MODULE_PARM_DESC(nr_devices, "Number of devices to register");
 
@@ -121,6 +123,10 @@ static bool blocking;
 module_param(blocking, bool, S_IRUGO);
 MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
 
+static bool shared_tags;
+module_param(shared_tags, bool, S_IRUGO);
+MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
+
 static int irqmode = NULL_IRQ_SOFTIRQ;
 
 static int null_set_irqmode(const char *str, const struct kernel_param *kp)
@@ -229,11 +235,11 @@ static void end_cmd(struct nullb_cmd *cmd)
 
 	switch (queue_mode)  {
 	case NULL_Q_MQ:
-		blk_mq_end_request(cmd->rq, 0);
+		blk_mq_end_request(cmd->rq, BLK_STS_OK);
 		return;
 	case NULL_Q_RQ:
 		INIT_LIST_HEAD(&cmd->rq->queuelist);
-		blk_end_request_all(cmd->rq, 0);
+		blk_end_request_all(cmd->rq, BLK_STS_OK);
 		break;
 	case NULL_Q_BIO:
 		bio_endio(cmd->bio);
@@ -356,7 +362,7 @@ static void null_request_fn(struct request_queue *q)
 	}
 }
 
-static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -373,34 +379,11 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(bd->rq);
 
 	null_handle_cmd(cmd);
-	return BLK_MQ_RQ_QUEUE_OK;
-}
-
-static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
-{
-	BUG_ON(!nullb);
-	BUG_ON(!nq);
-
-	init_waitqueue_head(&nq->wait);
-	nq->queue_depth = nullb->queue_depth;
-}
-
-static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
-			  unsigned int index)
-{
-	struct nullb *nullb = data;
-	struct nullb_queue *nq = &nullb->queues[index];
-
-	hctx->driver_data = nq;
-	null_init_queue(nullb, nq);
-	nullb->nr_queues++;
-
-	return 0;
+	return BLK_STS_OK;
 }
 
 static const struct blk_mq_ops null_mq_ops = {
 	.queue_rq       = null_queue_rq,
-	.init_hctx	= null_init_hctx,
 	.complete	= null_softirq_done_fn,
 };
 
@@ -422,11 +405,12 @@ static void cleanup_queues(struct nullb *nullb)
 
 #ifdef CONFIG_NVM
 
-static void null_lnvm_end_io(struct request *rq, int error)
+static void null_lnvm_end_io(struct request *rq, blk_status_t status)
 {
 	struct nvm_rq *rqd = rq->end_io_data;
 
-	rqd->error = error;
+	/* XXX: lighnvm core seems to expect NVM_RSP_* values here.. */
+	rqd->error = status ? -EIO : 0;
 	nvm_end_io(rqd);
 
 	blk_put_request(rq);
@@ -591,8 +575,8 @@ static void null_del_dev(struct nullb *nullb)
 	else
 		del_gendisk(nullb->disk);
 	blk_cleanup_queue(nullb->q);
-	if (queue_mode == NULL_Q_MQ)
-		blk_mq_free_tag_set(&nullb->tag_set);
+	if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+		blk_mq_free_tag_set(nullb->tag_set);
 	if (!use_lightnvm)
 		put_disk(nullb->disk);
 	cleanup_queues(nullb);
@@ -614,6 +598,32 @@ static const struct block_device_operations null_fops = {
 	.release =	null_release,
 };
 
+static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
+{
+	BUG_ON(!nullb);
+	BUG_ON(!nq);
+
+	init_waitqueue_head(&nq->wait);
+	nq->queue_depth = nullb->queue_depth;
+}
+
+static void null_init_queues(struct nullb *nullb)
+{
+	struct request_queue *q = nullb->q;
+	struct blk_mq_hw_ctx *hctx;
+	struct nullb_queue *nq;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		if (!hctx->nr_ctx || !hctx->tags)
+			continue;
+		nq = &nullb->queues[i];
+		hctx->driver_data = nq;
+		null_init_queue(nullb, nq);
+		nullb->nr_queues++;
+	}
+}
+
 static int setup_commands(struct nullb_queue *nq)
 {
 	struct nullb_cmd *cmd;
@@ -694,6 +704,22 @@ static int null_gendisk_register(struct nullb *nullb)
 	return 0;
 }
 
+static int null_init_tag_set(struct blk_mq_tag_set *set)
+{
+	set->ops = &null_mq_ops;
+	set->nr_hw_queues = submit_queues;
+	set->queue_depth = hw_queue_depth;
+	set->numa_node = home_node;
+	set->cmd_size	= sizeof(struct nullb_cmd);
+	set->flags = BLK_MQ_F_SHOULD_MERGE;
+	set->driver_data = NULL;
+
+	if (blocking)
+		set->flags |= BLK_MQ_F_BLOCKING;
+
+	return blk_mq_alloc_tag_set(set);
+}
+
 static int null_add_dev(void)
 {
 	struct nullb *nullb;
@@ -715,26 +741,23 @@ static int null_add_dev(void)
 		goto out_free_nullb;
 
 	if (queue_mode == NULL_Q_MQ) {
-		nullb->tag_set.ops = &null_mq_ops;
-		nullb->tag_set.nr_hw_queues = submit_queues;
-		nullb->tag_set.queue_depth = hw_queue_depth;
-		nullb->tag_set.numa_node = home_node;
-		nullb->tag_set.cmd_size	= sizeof(struct nullb_cmd);
-		nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
-		nullb->tag_set.driver_data = nullb;
-
-		if (blocking)
-			nullb->tag_set.flags |= BLK_MQ_F_BLOCKING;
-
-		rv = blk_mq_alloc_tag_set(&nullb->tag_set);
+		if (shared_tags) {
+			nullb->tag_set = &tag_set;
+			rv = 0;
+		} else {
+			nullb->tag_set = &nullb->__tag_set;
+			rv = null_init_tag_set(nullb->tag_set);
+		}
+
 		if (rv)
 			goto out_cleanup_queues;
 
-		nullb->q = blk_mq_init_queue(&nullb->tag_set);
+		nullb->q = blk_mq_init_queue(nullb->tag_set);
 		if (IS_ERR(nullb->q)) {
 			rv = -ENOMEM;
 			goto out_cleanup_tags;
 		}
+		null_init_queues(nullb);
 	} else if (queue_mode == NULL_Q_BIO) {
 		nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
 		if (!nullb->q) {
@@ -787,8 +810,8 @@ static int null_add_dev(void)
 out_cleanup_blk_queue:
 	blk_cleanup_queue(nullb->q);
 out_cleanup_tags:
-	if (queue_mode == NULL_Q_MQ)
-		blk_mq_free_tag_set(&nullb->tag_set);
+	if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+		blk_mq_free_tag_set(nullb->tag_set);
 out_cleanup_queues:
 	cleanup_queues(nullb);
 out_free_nullb:
@@ -821,6 +844,9 @@ static int __init null_init(void)
 		queue_mode = NULL_Q_MQ;
 	}
 
+	if (queue_mode == NULL_Q_MQ && shared_tags)
+		null_init_tag_set(&tag_set);
+
 	if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
 		if (submit_queues < nr_online_nodes) {
 			pr_warn("null_blk: submit_queues param is set to %u.",
@@ -881,6 +907,9 @@ static void __exit null_exit(void)
 	}
 	mutex_unlock(&lock);
 
+	if (queue_mode == NULL_Q_MQ && shared_tags)
+		blk_mq_free_tag_set(&tag_set);
+
 	kmem_cache_destroy(ppa_cache);
 }
 
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index b1267ef34d5a..7b8c6368beb7 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -305,6 +305,7 @@ static void pcd_init_units(void)
 			put_disk(disk);
 			continue;
 		}
+		blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
 		cd->disk = disk;
 		cd->pi = &cd->pia;
 		cd->present = 0;
@@ -783,7 +784,7 @@ static void pcd_request(void)
 			ps_set_intr(do_pcd_read, NULL, 0, nice);
 			return;
 		} else {
-			__blk_end_request_all(pcd_req, -EIO);
+			__blk_end_request_all(pcd_req, BLK_STS_IOERR);
 			pcd_req = NULL;
 		}
 	}
@@ -794,7 +795,7 @@ static void do_pcd_request(struct request_queue *q)
 	pcd_request();
 }
 
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
 {
 	unsigned long saved_flags;
 
@@ -837,7 +838,7 @@ static void pcd_start(void)
 
 	if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) {
 		pcd_bufblk = -1;
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 
@@ -871,7 +872,7 @@ static void do_pcd_read_drq(void)
 			return;
 		}
 		pcd_bufblk = -1;
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 7d2402f90978..27a44b97393a 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -438,7 +438,7 @@ static void run_fsm(void)
 				phase = NULL;
 				spin_lock_irqsave(&pd_lock, saved_flags);
 				if (!__blk_end_request_cur(pd_req,
-						res == Ok ? 0 : -EIO)) {
+						res == Ok ? 0 : BLK_STS_IOERR)) {
 					if (!set_next_request())
 						stop = 1;
 				}
@@ -863,6 +863,7 @@ static void pd_probe_drive(struct pd_unit *disk)
 		return;
 	}
 	blk_queue_max_hw_sectors(p->queue, cluster);
+	blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH);
 
 	if (disk->drive == -1) {
 		for (disk->drive = 0; disk->drive <= 1; disk->drive++)
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index f24ca7315ddc..eef7a91f667d 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -293,6 +293,7 @@ static void __init pf_init_units(void)
 			return;
 		}
 		blk_queue_max_segments(disk->queue, cluster);
+		blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
 		pf->disk = disk;
 		pf->pi = &pf->pia;
 		pf->media_status = PF_NM;
@@ -801,7 +802,7 @@ static int set_next_request(void)
 	return pf_req != NULL;
 }
 
-static void pf_end_request(int err)
+static void pf_end_request(blk_status_t err)
 {
 	if (pf_req && !__blk_end_request_cur(pf_req, err))
 		pf_req = NULL;
@@ -821,7 +822,7 @@ repeat:
 	pf_count = blk_rq_cur_sectors(pf_req);
 
 	if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
-		pf_end_request(-EIO);
+		pf_end_request(BLK_STS_IOERR);
 		goto repeat;
 	}
 
@@ -836,7 +837,7 @@ repeat:
 		pi_do_claimed(pf_current->pi, do_pf_write);
 	else {
 		pf_busy = 0;
-		pf_end_request(-EIO);
+		pf_end_request(BLK_STS_IOERR);
 		goto repeat;
 	}
 }
@@ -868,7 +869,7 @@ static int pf_next_buf(void)
 	return 0;
 }
 
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
 {
 	unsigned long saved_flags;
 
@@ -896,7 +897,7 @@ static void do_pf_read_start(void)
 			pi_do_claimed(pf_current->pi, do_pf_read_start);
 			return;
 		}
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 	pf_mask = STAT_DRQ;
@@ -915,7 +916,7 @@ static void do_pf_read_drq(void)
 				pi_do_claimed(pf_current->pi, do_pf_read_start);
 				return;
 			}
-			next_request(-EIO);
+			next_request(BLK_STS_IOERR);
 			return;
 		}
 		pi_read_block(pf_current->pi, pf_buf, 512);
@@ -942,7 +943,7 @@ static void do_pf_write_start(void)
 			pi_do_claimed(pf_current->pi, do_pf_write_start);
 			return;
 		}
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 
@@ -955,7 +956,7 @@ static void do_pf_write_start(void)
 				pi_do_claimed(pf_current->pi, do_pf_write_start);
 				return;
 			}
-			next_request(-EIO);
+			next_request(BLK_STS_IOERR);
 			return;
 		}
 		pi_write_block(pf_current->pi, pf_buf, 512);
@@ -975,7 +976,7 @@ static void do_pf_write_done(void)
 			pi_do_claimed(pf_current->pi, do_pf_write_start);
 			return;
 		}
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 	pi_disconnect(pf_current->pi);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 205b865ebeb9..467beca397a2 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -98,6 +98,7 @@ static int write_congestion_on  = PKT_WRITE_CONGESTION_ON;
 static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
 static struct mutex ctl_mutex;	/* Serialize open/close/setup/teardown */
 static mempool_t *psd_pool;
+static struct bio_set *pkt_bio_set;
 
 static struct class	*class_pktcdvd = NULL;    /* /sys/class/pktcdvd */
 static struct dentry	*pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
@@ -707,7 +708,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 			     REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
-	scsi_req_init(rq);
 
 	if (cgc->buflen) {
 		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
@@ -952,9 +952,9 @@ static void pkt_end_io_read(struct bio *bio)
 
 	pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
 		bio, (unsigned long long)pkt->sector,
-		(unsigned long long)bio->bi_iter.bi_sector, bio->bi_error);
+		(unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		atomic_inc(&pkt->io_errors);
 	if (atomic_dec_and_test(&pkt->io_wait)) {
 		atomic_inc(&pkt->run_sm);
@@ -969,7 +969,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
 	struct pktcdvd_device *pd = pkt->pd;
 	BUG_ON(!pd);
 
-	pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_error);
+	pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
 
 	pd->stats.pkt_ended++;
 
@@ -1305,16 +1305,16 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 	pkt_queue_bio(pd, pkt->w_bio);
 }
 
-static void pkt_finish_packet(struct packet_data *pkt, int error)
+static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
 {
 	struct bio *bio;
 
-	if (error)
+	if (status)
 		pkt->cache_valid = 0;
 
 	/* Finish all bios corresponding to this packet */
 	while ((bio = bio_list_pop(&pkt->orig_bios))) {
-		bio->bi_error = error;
+		bio->bi_status = status;
 		bio_endio(bio);
 	}
 }
@@ -1349,7 +1349,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
 			if (atomic_read(&pkt->io_wait) > 0)
 				return;
 
-			if (!pkt->w_bio->bi_error) {
+			if (!pkt->w_bio->bi_status) {
 				pkt_set_state(pkt, PACKET_FINISHED_STATE);
 			} else {
 				pkt_set_state(pkt, PACKET_RECOVERY_STATE);
@@ -1366,7 +1366,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
 			break;
 
 		case PACKET_FINISHED_STATE:
-			pkt_finish_packet(pkt, pkt->w_bio->bi_error);
+			pkt_finish_packet(pkt, pkt->w_bio->bi_status);
 			return;
 
 		default:
@@ -2301,7 +2301,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
 	struct packet_stacked_data *psd = bio->bi_private;
 	struct pktcdvd_device *pd = psd->pd;
 
-	psd->bio->bi_error = bio->bi_error;
+	psd->bio->bi_status = bio->bi_status;
 	bio_put(bio);
 	bio_endio(psd->bio);
 	mempool_free(psd, psd_pool);
@@ -2310,7 +2310,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
 
 static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
 {
-	struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
+	struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, pkt_bio_set);
 	struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
 
 	psd->pd = pd;
@@ -2412,9 +2412,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
 	char b[BDEVNAME_SIZE];
 	struct bio *split;
 
-	blk_queue_bounce(q, &bio);
-
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	pd = q->queuedata;
 	if (!pd) {
@@ -2455,7 +2453,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
 
 			split = bio_split(bio, last_zone -
 					  bio->bi_iter.bi_sector,
-					  GFP_NOIO, fs_bio_set);
+					  GFP_NOIO, pkt_bio_set);
 			bio_chain(split, bio);
 		} else {
 			split = bio;
@@ -2583,6 +2581,11 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 	bdev = bdget(dev);
 	if (!bdev)
 		return -ENOMEM;
+	if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) {
+		WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
+		bdput(bdev);
+		return -EINVAL;
+	}
 	ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
 	if (ret)
 		return ret;
@@ -2919,6 +2922,11 @@ static int __init pkt_init(void)
 					sizeof(struct packet_stacked_data));
 	if (!psd_pool)
 		return -ENOMEM;
+	pkt_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
+	if (!pkt_bio_set) {
+		mempool_destroy(psd_pool);
+		return -ENOMEM;
+	}
 
 	ret = register_blkdev(pktdev_major, DRIVER_NAME);
 	if (ret < 0) {
@@ -2951,6 +2959,7 @@ out:
 	unregister_blkdev(pktdev_major, DRIVER_NAME);
 out2:
 	mempool_destroy(psd_pool);
+	bioset_free(pkt_bio_set);
 	return ret;
 }
 
@@ -2964,6 +2973,7 @@ static void __exit pkt_exit(void)
 
 	unregister_blkdev(pktdev_major, DRIVER_NAME);
 	mempool_destroy(psd_pool);
+	bioset_free(pkt_bio_set);
 }
 
 MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index a809e3e9feb8..075662f2cf46 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -158,7 +158,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
 			__LINE__, op, res);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 		return 0;
 	}
 
@@ -180,7 +180,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
 			__func__, __LINE__, res);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 		return 0;
 	}
 
@@ -208,7 +208,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
 			break;
 		default:
 			blk_dump_rq_flags(req, DEVICE_NAME " bad request");
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		}
 	}
 }
@@ -231,7 +231,8 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
 	struct ps3_storage_device *dev = data;
 	struct ps3disk_private *priv;
 	struct request *req;
-	int res, read, error;
+	int res, read;
+	blk_status_t error;
 	u64 tag, status;
 	const char *op;
 
@@ -269,7 +270,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
 	if (status) {
 		dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__,
 			__LINE__, op, status);
-		error = -EIO;
+		error = BLK_STS_IOERR;
 	} else {
 		dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__,
 			__LINE__, op);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 456b4fe21559..e0e81cacd781 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -428,7 +428,7 @@ static void ps3vram_cache_cleanup(struct ps3_system_bus_device *dev)
 	kfree(priv->cache.tags);
 }
 
-static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
+static blk_status_t ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
 			size_t len, size_t *retlen, u_char *buf)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -438,7 +438,7 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
 		(unsigned int)from, len);
 
 	if (from >= priv->size)
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (len > priv->size - from)
 		len = priv->size - from;
@@ -472,14 +472,14 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
 	return 0;
 }
 
-static int ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
+static blk_status_t ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
 			 size_t len, size_t *retlen, const u_char *buf)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
 	unsigned int cached, count;
 
 	if (to >= priv->size)
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (len > priv->size - to)
 		len = priv->size - to;
@@ -554,7 +554,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
 	int write = bio_data_dir(bio) == WRITE;
 	const char *op = write ? "write" : "read";
 	loff_t offset = bio->bi_iter.bi_sector << 9;
-	int error = 0;
+	blk_status_t error = 0;
 	struct bio_vec bvec;
 	struct bvec_iter iter;
 	struct bio *next;
@@ -578,7 +578,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
 
 		if (retlen != len) {
 			dev_err(&dev->core, "Short %s\n", op);
-			error = -EIO;
+			error = BLK_STS_IOERR;
 			goto out;
 		}
 
@@ -593,7 +593,7 @@ out:
 	next = bio_list_peek(&priv->list);
 	spin_unlock_irq(&priv->lock);
 
-	bio->bi_error = error;
+	bio->bi_status = error;
 	bio_endio(bio);
 	return next;
 }
@@ -606,7 +606,7 @@ static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
 
 	dev_dbg(&dev->core, "%s\n", __func__);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	spin_lock_irq(&priv->lock);
 	busy = !bio_list_empty(&priv->list);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 26812c1ed0cf..b008b6a98098 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -387,6 +387,7 @@ struct rbd_device {
 
 	struct rw_semaphore	lock_rwsem;
 	enum rbd_lock_state	lock_state;
+	char			lock_cookie[32];
 	struct rbd_client_id	owner_cid;
 	struct work_struct	acquired_lock_work;
 	struct work_struct	released_lock_work;
@@ -441,6 +442,8 @@ static DEFINE_SPINLOCK(rbd_client_list_lock);
 static struct kmem_cache	*rbd_img_request_cache;
 static struct kmem_cache	*rbd_obj_request_cache;
 
+static struct bio_set		*rbd_bio_clone;
+
 static int rbd_major;
 static DEFINE_IDA(rbd_dev_id_ida);
 
@@ -477,13 +480,6 @@ static int minor_to_rbd_dev_id(int minor)
 	return minor >> RBD_SINGLE_MAJOR_PART_SHIFT;
 }
 
-static bool rbd_is_lock_supported(struct rbd_device *rbd_dev)
-{
-	return (rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK) &&
-	       rbd_dev->spec->snap_id == CEPH_NOSNAP &&
-	       !rbd_dev->mapping.read_only;
-}
-
 static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev)
 {
 	return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED ||
@@ -731,7 +727,7 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
 	kref_init(&rbdc->kref);
 	INIT_LIST_HEAD(&rbdc->node);
 
-	rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0);
+	rbdc->client = ceph_create_client(ceph_opts, rbdc);
 	if (IS_ERR(rbdc->client))
 		goto out_rbdc;
 	ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */
@@ -804,6 +800,7 @@ enum {
 	Opt_read_only,
 	Opt_read_write,
 	Opt_lock_on_read,
+	Opt_exclusive,
 	Opt_err
 };
 
@@ -816,6 +813,7 @@ static match_table_t rbd_opts_tokens = {
 	{Opt_read_write, "read_write"},
 	{Opt_read_write, "rw"},		/* Alternate spelling */
 	{Opt_lock_on_read, "lock_on_read"},
+	{Opt_exclusive, "exclusive"},
 	{Opt_err, NULL}
 };
 
@@ -823,11 +821,13 @@ struct rbd_options {
 	int	queue_depth;
 	bool	read_only;
 	bool	lock_on_read;
+	bool	exclusive;
 };
 
 #define RBD_QUEUE_DEPTH_DEFAULT	BLKDEV_MAX_RQ
 #define RBD_READ_ONLY_DEFAULT	false
 #define RBD_LOCK_ON_READ_DEFAULT false
+#define RBD_EXCLUSIVE_DEFAULT	false
 
 static int parse_rbd_opts_token(char *c, void *private)
 {
@@ -866,6 +866,9 @@ static int parse_rbd_opts_token(char *c, void *private)
 	case Opt_lock_on_read:
 		rbd_opts->lock_on_read = true;
 		break;
+	case Opt_exclusive:
+		rbd_opts->exclusive = true;
+		break;
 	default:
 		/* libceph prints "bad option" msg */
 		return -EINVAL;
@@ -1362,7 +1365,7 @@ static struct bio *bio_clone_range(struct bio *bio_src,
 {
 	struct bio *bio;
 
-	bio = bio_clone(bio_src, gfpmask);
+	bio = bio_clone_fast(bio_src, gfpmask, rbd_bio_clone);
 	if (!bio)
 		return NULL;	/* ENOMEM */
 
@@ -2292,11 +2295,13 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
 		rbd_assert(img_request->obj_request != NULL);
 		more = obj_request->which < img_request->obj_request_count - 1;
 	} else {
+		blk_status_t status = errno_to_blk_status(result);
+
 		rbd_assert(img_request->rq != NULL);
 
-		more = blk_update_request(img_request->rq, result, xferred);
+		more = blk_update_request(img_request->rq, status, xferred);
 		if (!more)
-			__blk_mq_end_request(img_request->rq, result);
+			__blk_mq_end_request(img_request->rq, status);
 	}
 
 	return more;
@@ -3079,7 +3084,8 @@ static int rbd_lock(struct rbd_device *rbd_dev)
 	char cookie[32];
 	int ret;
 
-	WARN_ON(__rbd_is_lock_owner(rbd_dev));
+	WARN_ON(__rbd_is_lock_owner(rbd_dev) ||
+		rbd_dev->lock_cookie[0] != '\0');
 
 	format_lock_cookie(rbd_dev, cookie);
 	ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
@@ -3089,6 +3095,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
 		return ret;
 
 	rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
+	strcpy(rbd_dev->lock_cookie, cookie);
 	rbd_set_owner_cid(rbd_dev, &cid);
 	queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
 	return 0;
@@ -3097,27 +3104,24 @@ static int rbd_lock(struct rbd_device *rbd_dev)
 /*
  * lock_rwsem must be held for write
  */
-static int rbd_unlock(struct rbd_device *rbd_dev)
+static void rbd_unlock(struct rbd_device *rbd_dev)
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
-	char cookie[32];
 	int ret;
 
-	WARN_ON(!__rbd_is_lock_owner(rbd_dev));
-
-	rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
+	WARN_ON(!__rbd_is_lock_owner(rbd_dev) ||
+		rbd_dev->lock_cookie[0] == '\0');
 
-	format_lock_cookie(rbd_dev, cookie);
 	ret = ceph_cls_unlock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
-			      RBD_LOCK_NAME, cookie);
-	if (ret && ret != -ENOENT) {
-		rbd_warn(rbd_dev, "cls_unlock failed: %d", ret);
-		return ret;
-	}
+			      RBD_LOCK_NAME, rbd_dev->lock_cookie);
+	if (ret && ret != -ENOENT)
+		rbd_warn(rbd_dev, "failed to unlock: %d", ret);
 
+	/* treat errors as the image is unlocked */
+	rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
+	rbd_dev->lock_cookie[0] = '\0';
 	rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
 	queue_work(rbd_dev->task_wq, &rbd_dev->released_lock_work);
-	return 0;
 }
 
 static int __rbd_notify_op_lock(struct rbd_device *rbd_dev,
@@ -3447,6 +3451,18 @@ again:
 	ret = rbd_request_lock(rbd_dev);
 	if (ret == -ETIMEDOUT) {
 		goto again; /* treat this as a dead client */
+	} else if (ret == -EROFS) {
+		rbd_warn(rbd_dev, "peer will not release lock");
+		/*
+		 * If this is rbd_add_acquire_lock(), we want to fail
+		 * immediately -- reuse BLACKLISTED flag.  Otherwise we
+		 * want to block.
+		 */
+		if (!(rbd_dev->disk->flags & GENHD_FL_UP)) {
+			set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags);
+			/* wake "rbd map --exclusive" process */
+			wake_requests(rbd_dev, false);
+		}
 	} else if (ret < 0) {
 		rbd_warn(rbd_dev, "error requesting lock: %d", ret);
 		mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
@@ -3490,16 +3506,15 @@ static bool rbd_release_lock(struct rbd_device *rbd_dev)
 	if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
 		return false;
 
-	if (!rbd_unlock(rbd_dev))
-		/*
-		 * Give others a chance to grab the lock - we would re-acquire
-		 * almost immediately if we got new IO during ceph_osdc_sync()
-		 * otherwise.  We need to ack our own notifications, so this
-		 * lock_dwork will be requeued from rbd_wait_state_locked()
-		 * after wake_requests() in rbd_handle_released_lock().
-		 */
-		cancel_delayed_work(&rbd_dev->lock_dwork);
-
+	rbd_unlock(rbd_dev);
+	/*
+	 * Give others a chance to grab the lock - we would re-acquire
+	 * almost immediately if we got new IO during ceph_osdc_sync()
+	 * otherwise.  We need to ack our own notifications, so this
+	 * lock_dwork will be requeued from rbd_wait_state_locked()
+	 * after wake_requests() in rbd_handle_released_lock().
+	 */
+	cancel_delayed_work(&rbd_dev->lock_dwork);
 	return true;
 }
 
@@ -3580,12 +3595,16 @@ static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v,
 	up_read(&rbd_dev->lock_rwsem);
 }
 
-static bool rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
-				    void **p)
+/*
+ * Returns result for ResponseMessage to be encoded (<= 0), or 1 if no
+ * ResponseMessage is needed.
+ */
+static int rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
+				   void **p)
 {
 	struct rbd_client_id my_cid = rbd_get_cid(rbd_dev);
 	struct rbd_client_id cid = { 0 };
-	bool need_to_send;
+	int result = 1;
 
 	if (struct_v >= 2) {
 		cid.gid = ceph_decode_64(p);
@@ -3595,19 +3614,36 @@ static bool rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
 	dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
 	     cid.handle);
 	if (rbd_cid_equal(&cid, &my_cid))
-		return false;
+		return result;
 
 	down_read(&rbd_dev->lock_rwsem);
-	need_to_send = __rbd_is_lock_owner(rbd_dev);
-	if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) {
-		if (!rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid)) {
-			dout("%s rbd_dev %p queueing unlock_work\n", __func__,
-			     rbd_dev);
-			queue_work(rbd_dev->task_wq, &rbd_dev->unlock_work);
+	if (__rbd_is_lock_owner(rbd_dev)) {
+		if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED &&
+		    rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid))
+			goto out_unlock;
+
+		/*
+		 * encode ResponseMessage(0) so the peer can detect
+		 * a missing owner
+		 */
+		result = 0;
+
+		if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) {
+			if (!rbd_dev->opts->exclusive) {
+				dout("%s rbd_dev %p queueing unlock_work\n",
+				     __func__, rbd_dev);
+				queue_work(rbd_dev->task_wq,
+					   &rbd_dev->unlock_work);
+			} else {
+				/* refuse to release the lock */
+				result = -EROFS;
+			}
 		}
 	}
+
+out_unlock:
 	up_read(&rbd_dev->lock_rwsem);
-	return need_to_send;
+	return result;
 }
 
 static void __rbd_acknowledge_notify(struct rbd_device *rbd_dev,
@@ -3690,13 +3726,10 @@ static void rbd_watch_cb(void *arg, u64 notify_id, u64 cookie,
 		rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
 		break;
 	case RBD_NOTIFY_OP_REQUEST_LOCK:
-		if (rbd_handle_request_lock(rbd_dev, struct_v, &p))
-			/*
-			 * send ResponseMessage(0) back so the client
-			 * can detect a missing owner
-			 */
+		ret = rbd_handle_request_lock(rbd_dev, struct_v, &p);
+		if (ret <= 0)
 			rbd_acknowledge_notify_result(rbd_dev, notify_id,
-						      cookie, 0);
+						      cookie, ret);
 		else
 			rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
 		break;
@@ -3821,24 +3854,51 @@ static void rbd_unregister_watch(struct rbd_device *rbd_dev)
 	ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
 }
 
+/*
+ * lock_rwsem must be held for write
+ */
+static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
+{
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	char cookie[32];
+	int ret;
+
+	WARN_ON(rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED);
+
+	format_lock_cookie(rbd_dev, cookie);
+	ret = ceph_cls_set_cookie(osdc, &rbd_dev->header_oid,
+				  &rbd_dev->header_oloc, RBD_LOCK_NAME,
+				  CEPH_CLS_LOCK_EXCLUSIVE, rbd_dev->lock_cookie,
+				  RBD_LOCK_TAG, cookie);
+	if (ret) {
+		if (ret != -EOPNOTSUPP)
+			rbd_warn(rbd_dev, "failed to update lock cookie: %d",
+				 ret);
+
+		/*
+		 * Lock cookie cannot be updated on older OSDs, so do
+		 * a manual release and queue an acquire.
+		 */
+		if (rbd_release_lock(rbd_dev))
+			queue_delayed_work(rbd_dev->task_wq,
+					   &rbd_dev->lock_dwork, 0);
+	} else {
+		strcpy(rbd_dev->lock_cookie, cookie);
+	}
+}
+
 static void rbd_reregister_watch(struct work_struct *work)
 {
 	struct rbd_device *rbd_dev = container_of(to_delayed_work(work),
 					    struct rbd_device, watch_dwork);
-	bool was_lock_owner = false;
-	bool need_to_wake = false;
 	int ret;
 
 	dout("%s rbd_dev %p\n", __func__, rbd_dev);
 
-	down_write(&rbd_dev->lock_rwsem);
-	if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
-		was_lock_owner = rbd_release_lock(rbd_dev);
-
 	mutex_lock(&rbd_dev->watch_mutex);
 	if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) {
 		mutex_unlock(&rbd_dev->watch_mutex);
-		goto out;
+		return;
 	}
 
 	ret = __rbd_register_watch(rbd_dev);
@@ -3846,36 +3906,28 @@ static void rbd_reregister_watch(struct work_struct *work)
 		rbd_warn(rbd_dev, "failed to reregister watch: %d", ret);
 		if (ret == -EBLACKLISTED || ret == -ENOENT) {
 			set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags);
-			need_to_wake = true;
+			wake_requests(rbd_dev, true);
 		} else {
 			queue_delayed_work(rbd_dev->task_wq,
 					   &rbd_dev->watch_dwork,
 					   RBD_RETRY_DELAY);
 		}
 		mutex_unlock(&rbd_dev->watch_mutex);
-		goto out;
+		return;
 	}
 
-	need_to_wake = true;
 	rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED;
 	rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id;
 	mutex_unlock(&rbd_dev->watch_mutex);
 
+	down_write(&rbd_dev->lock_rwsem);
+	if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
+		rbd_reacquire_lock(rbd_dev);
+	up_write(&rbd_dev->lock_rwsem);
+
 	ret = rbd_dev_refresh(rbd_dev);
 	if (ret)
 		rbd_warn(rbd_dev, "reregisteration refresh failed: %d", ret);
-
-	if (was_lock_owner) {
-		ret = rbd_try_lock(rbd_dev);
-		if (ret)
-			rbd_warn(rbd_dev, "reregisteration lock failed: %d",
-				 ret);
-	}
-
-out:
-	up_write(&rbd_dev->lock_rwsem);
-	if (need_to_wake)
-		wake_requests(rbd_dev, true);
 }
 
 /*
@@ -3975,6 +4027,7 @@ static void rbd_queue_workfn(struct work_struct *work)
 
 	switch (req_op(rq)) {
 	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
 		op_type = OBJ_OP_DISCARD;
 		break;
 	case REQ_OP_WRITE:
@@ -4034,10 +4087,6 @@ static void rbd_queue_workfn(struct work_struct *work)
 	if (op_type != OBJ_OP_READ) {
 		snapc = rbd_dev->header.snapc;
 		ceph_get_snap_context(snapc);
-		must_be_locked = rbd_is_lock_supported(rbd_dev);
-	} else {
-		must_be_locked = rbd_dev->opts->lock_on_read &&
-					rbd_is_lock_supported(rbd_dev);
 	}
 	up_read(&rbd_dev->header_rwsem);
 
@@ -4048,14 +4097,20 @@ static void rbd_queue_workfn(struct work_struct *work)
 		goto err_rq;
 	}
 
+	must_be_locked =
+	    (rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK) &&
+	    (op_type != OBJ_OP_READ || rbd_dev->opts->lock_on_read);
 	if (must_be_locked) {
 		down_read(&rbd_dev->lock_rwsem);
 		if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED &&
-		    !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags))
+		    !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
+			if (rbd_dev->opts->exclusive) {
+				rbd_warn(rbd_dev, "exclusive lock required");
+				result = -EROFS;
+				goto err_unlock;
+			}
 			rbd_wait_state_locked(rbd_dev);
-
-		WARN_ON((rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) ^
-			!test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags));
+		}
 		if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
 			result = -EBLACKLISTED;
 			goto err_unlock;
@@ -4099,34 +4154,25 @@ err_rq:
 			 obj_op_name(op_type), length, offset, result);
 	ceph_put_snap_context(snapc);
 err:
-	blk_mq_end_request(rq, result);
+	blk_mq_end_request(rq, errno_to_blk_status(result));
 }
 
-static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct request *rq = bd->rq;
 	struct work_struct *work = blk_mq_rq_to_pdu(rq);
 
 	queue_work(rbd_wq, work);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void rbd_free_disk(struct rbd_device *rbd_dev)
 {
-	struct gendisk *disk = rbd_dev->disk;
-
-	if (!disk)
-		return;
-
+	blk_cleanup_queue(rbd_dev->disk->queue);
+	blk_mq_free_tag_set(&rbd_dev->tag_set);
+	put_disk(rbd_dev->disk);
 	rbd_dev->disk = NULL;
-	if (disk->flags & GENHD_FL_UP) {
-		del_gendisk(disk);
-		if (disk->queue)
-			blk_cleanup_queue(disk->queue);
-		blk_mq_free_tag_set(&rbd_dev->tag_set);
-	}
-	put_disk(disk);
 }
 
 static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
@@ -4379,12 +4425,17 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 	q->limits.discard_granularity = segment_size;
 	q->limits.discard_alignment = segment_size;
 	blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
+	blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE);
 
 	if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
 		q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
 
+	/*
+	 * disk_release() expects a queue ref from add_disk() and will
+	 * put it.  Hold an extra ref until add_disk() is called.
+	 */
+	WARN_ON(!blk_get_queue(q));
 	disk->queue = q;
-
 	q->queuedata = rbd_dev;
 
 	rbd_dev->disk = disk;
@@ -5624,6 +5675,7 @@ static int rbd_add_parse_args(const char *buf,
 	rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
 	rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
 	rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
+	rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
 
 	copts = ceph_parse_options(options, mon_addrs,
 					mon_addrs + mon_addrs_size - 1,
@@ -5682,6 +5734,33 @@ again:
 	return ret;
 }
 
+static void rbd_dev_image_unlock(struct rbd_device *rbd_dev)
+{
+	down_write(&rbd_dev->lock_rwsem);
+	if (__rbd_is_lock_owner(rbd_dev))
+		rbd_unlock(rbd_dev);
+	up_write(&rbd_dev->lock_rwsem);
+}
+
+static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
+{
+	if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) {
+		rbd_warn(rbd_dev, "exclusive-lock feature is not enabled");
+		return -EINVAL;
+	}
+
+	/* FIXME: "rbd map --exclusive" should be in interruptible */
+	down_read(&rbd_dev->lock_rwsem);
+	rbd_wait_state_locked(rbd_dev);
+	up_read(&rbd_dev->lock_rwsem);
+	if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
+		rbd_warn(rbd_dev, "failed to acquire exclusive lock");
+		return -EROFS;
+	}
+
+	return 0;
+}
+
 /*
  * An rbd format 2 image has a unique identifier, distinct from the
  * name given to it by the user.  Internally, that identifier is
@@ -5873,6 +5952,15 @@ out_err:
 	return ret;
 }
 
+static void rbd_dev_device_release(struct rbd_device *rbd_dev)
+{
+	clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+	rbd_dev_mapping_clear(rbd_dev);
+	rbd_free_disk(rbd_dev);
+	if (!single_major)
+		unregister_blkdev(rbd_dev->major, rbd_dev->name);
+}
+
 /*
  * rbd_dev->header_rwsem must be locked for write and will be unlocked
  * upon return.
@@ -5908,26 +5996,13 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
 	set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
 	set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only);
 
-	dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
-	ret = device_add(&rbd_dev->dev);
+	ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
 	if (ret)
 		goto err_out_mapping;
 
-	/* Everything's ready.  Announce the disk to the world. */
-
 	set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
 	up_write(&rbd_dev->header_rwsem);
-
-	spin_lock(&rbd_dev_list_lock);
-	list_add_tail(&rbd_dev->node, &rbd_dev_list);
-	spin_unlock(&rbd_dev_list_lock);
-
-	add_disk(rbd_dev->disk);
-	pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name,
-		(unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT,
-		rbd_dev->header.features);
-
-	return ret;
+	return 0;
 
 err_out_mapping:
 	rbd_dev_mapping_clear(rbd_dev);
@@ -5962,11 +6037,11 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev)
 static void rbd_dev_image_release(struct rbd_device *rbd_dev)
 {
 	rbd_dev_unprobe(rbd_dev);
+	if (rbd_dev->opts)
+		rbd_unregister_watch(rbd_dev);
 	rbd_dev->image_format = 0;
 	kfree(rbd_dev->spec->image_id);
 	rbd_dev->spec->image_id = NULL;
-
-	rbd_dev_destroy(rbd_dev);
 }
 
 /*
@@ -6126,22 +6201,43 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 	rbd_dev->mapping.read_only = read_only;
 
 	rc = rbd_dev_device_setup(rbd_dev);
-	if (rc) {
-		/*
-		 * rbd_unregister_watch() can't be moved into
-		 * rbd_dev_image_release() without refactoring, see
-		 * commit 1f3ef78861ac.
-		 */
-		rbd_unregister_watch(rbd_dev);
-		rbd_dev_image_release(rbd_dev);
-		goto out;
+	if (rc)
+		goto err_out_image_probe;
+
+	if (rbd_dev->opts->exclusive) {
+		rc = rbd_add_acquire_lock(rbd_dev);
+		if (rc)
+			goto err_out_device_setup;
 	}
 
+	/* Everything's ready.  Announce the disk to the world. */
+
+	rc = device_add(&rbd_dev->dev);
+	if (rc)
+		goto err_out_image_lock;
+
+	add_disk(rbd_dev->disk);
+	/* see rbd_init_disk() */
+	blk_put_queue(rbd_dev->disk->queue);
+
+	spin_lock(&rbd_dev_list_lock);
+	list_add_tail(&rbd_dev->node, &rbd_dev_list);
+	spin_unlock(&rbd_dev_list_lock);
+
+	pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name,
+		(unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT,
+		rbd_dev->header.features);
 	rc = count;
 out:
 	module_put(THIS_MODULE);
 	return rc;
 
+err_out_image_lock:
+	rbd_dev_image_unlock(rbd_dev);
+err_out_device_setup:
+	rbd_dev_device_release(rbd_dev);
+err_out_image_probe:
+	rbd_dev_image_release(rbd_dev);
 err_out_rbd_dev:
 	rbd_dev_destroy(rbd_dev);
 err_out_client:
@@ -6169,21 +6265,6 @@ static ssize_t rbd_add_single_major(struct bus_type *bus,
 	return do_rbd_add(bus, buf, count);
 }
 
-static void rbd_dev_device_release(struct rbd_device *rbd_dev)
-{
-	rbd_free_disk(rbd_dev);
-
-	spin_lock(&rbd_dev_list_lock);
-	list_del_init(&rbd_dev->node);
-	spin_unlock(&rbd_dev_list_lock);
-
-	clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
-	device_del(&rbd_dev->dev);
-	rbd_dev_mapping_clear(rbd_dev);
-	if (!single_major)
-		unregister_blkdev(rbd_dev->major, rbd_dev->name);
-}
-
 static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
 {
 	while (rbd_dev->parent) {
@@ -6201,6 +6282,7 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
 		}
 		rbd_assert(second);
 		rbd_dev_image_release(second);
+		rbd_dev_destroy(second);
 		first->parent = NULL;
 		first->parent_overlap = 0;
 
@@ -6269,21 +6351,16 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
 		blk_set_queue_dying(rbd_dev->disk->queue);
 	}
 
-	down_write(&rbd_dev->lock_rwsem);
-	if (__rbd_is_lock_owner(rbd_dev))
-		rbd_unlock(rbd_dev);
-	up_write(&rbd_dev->lock_rwsem);
-	rbd_unregister_watch(rbd_dev);
+	del_gendisk(rbd_dev->disk);
+	spin_lock(&rbd_dev_list_lock);
+	list_del_init(&rbd_dev->node);
+	spin_unlock(&rbd_dev_list_lock);
+	device_del(&rbd_dev->dev);
 
-	/*
-	 * Don't free anything from rbd_dev->disk until after all
-	 * notifies are completely processed. Otherwise
-	 * rbd_bus_del_dev() will race with rbd_watch_cb(), resulting
-	 * in a potential use after free of rbd_dev->disk or rbd_dev.
-	 */
+	rbd_dev_image_unlock(rbd_dev);
 	rbd_dev_device_release(rbd_dev);
 	rbd_dev_image_release(rbd_dev);
-
+	rbd_dev_destroy(rbd_dev);
 	return count;
 }
 
@@ -6341,8 +6418,16 @@ static int rbd_slab_init(void)
 	if (!rbd_obj_request_cache)
 		goto out_err;
 
+	rbd_assert(!rbd_bio_clone);
+	rbd_bio_clone = bioset_create(BIO_POOL_SIZE, 0, 0);
+	if (!rbd_bio_clone)
+		goto out_err_clone;
+
 	return 0;
 
+out_err_clone:
+	kmem_cache_destroy(rbd_obj_request_cache);
+	rbd_obj_request_cache = NULL;
 out_err:
 	kmem_cache_destroy(rbd_img_request_cache);
 	rbd_img_request_cache = NULL;
@@ -6358,6 +6443,10 @@ static void rbd_slab_exit(void)
 	rbd_assert(rbd_img_request_cache);
 	kmem_cache_destroy(rbd_img_request_cache);
 	rbd_img_request_cache = NULL;
+
+	rbd_assert(rbd_bio_clone);
+	bioset_free(rbd_bio_clone);
+	rbd_bio_clone = NULL;
 }
 
 static int __init rbd_init(void)
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 9c566364ac9c..7f4acebf4657 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -149,9 +149,9 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct rsxx_cardinfo *card = q->queuedata;
 	struct rsxx_bio_meta *bio_meta;
-	int st = -EINVAL;
+	blk_status_t st = BLK_STS_IOERR;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	might_sleep();
 
@@ -161,15 +161,11 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 	if (bio_end_sector(bio) > get_capacity(card->gendisk))
 		goto req_err;
 
-	if (unlikely(card->halt)) {
-		st = -EFAULT;
+	if (unlikely(card->halt))
 		goto req_err;
-	}
 
-	if (unlikely(card->dma_fault)) {
-		st = (-EFAULT);
+	if (unlikely(card->dma_fault))
 		goto req_err;
-	}
 
 	if (bio->bi_iter.bi_size == 0) {
 		dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
@@ -178,7 +174,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 
 	bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL);
 	if (!bio_meta) {
-		st = -ENOMEM;
+		st = BLK_STS_RESOURCE;
 		goto req_err;
 	}
 
@@ -205,7 +201,7 @@ queue_err:
 	kmem_cache_free(bio_meta_pool, bio_meta);
 req_err:
 	if (st)
-		bio->bi_error = st;
+		bio->bi_status = st;
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
 }
@@ -288,7 +284,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
 	}
 
 	blk_queue_make_request(card->queue, rsxx_make_request);
-	blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
 	blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
 	blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
 
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 5a20385f87d0..6a1b2177951c 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -611,7 +611,7 @@ static void rsxx_schedule_done(struct work_struct *work)
 	mutex_unlock(&ctrl->work_lock);
 }
 
-static int rsxx_queue_discard(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_discard(struct rsxx_cardinfo *card,
 				  struct list_head *q,
 				  unsigned int laddr,
 				  rsxx_dma_cb cb,
@@ -621,7 +621,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
 
 	dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
 	if (!dma)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	dma->cmd          = HW_CMD_BLK_DISCARD;
 	dma->laddr        = laddr;
@@ -640,7 +640,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
 	return 0;
 }
 
-static int rsxx_queue_dma(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_dma(struct rsxx_cardinfo *card,
 			      struct list_head *q,
 			      int dir,
 			      unsigned int dma_off,
@@ -655,7 +655,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
 
 	dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
 	if (!dma)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	dma->cmd          = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
 	dma->laddr        = laddr;
@@ -677,7 +677,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
 	return 0;
 }
 
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 			   struct bio *bio,
 			   atomic_t *n_dmas,
 			   rsxx_dma_cb cb,
@@ -694,7 +694,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 	unsigned int dma_len;
 	int dma_cnt[RSXX_MAX_TARGETS];
 	int tgt;
-	int st;
+	blk_status_t st;
 	int i;
 
 	addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
@@ -769,7 +769,6 @@ bvec_err:
 	for (i = 0; i < card->n_targets; i++)
 		rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i],
 					FREE_DMA);
-
 	return st;
 }
 
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 6bbc64d0f690..277f27e673a2 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -391,7 +391,7 @@ int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
 void rsxx_dma_cleanup(void);
 void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
 int rsxx_dma_configure(struct rsxx_cardinfo *card);
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 			   struct bio *bio,
 			   atomic_t *n_dmas,
 			   rsxx_dma_cb cb,
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 27833e4dae2a..d0368682bd43 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -451,8 +451,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
 				    struct skd_special_context *skspcl);
 static void skd_request_fn(struct request_queue *rq);
 static void skd_end_request(struct skd_device *skdev,
-			    struct skd_request_context *skreq, int error);
-static int skd_preop_sg_list(struct skd_device *skdev,
+		struct skd_request_context *skreq, blk_status_t status);
+static bool skd_preop_sg_list(struct skd_device *skdev,
 			     struct skd_request_context *skreq);
 static void skd_postop_sg_list(struct skd_device *skdev,
 			       struct skd_request_context *skreq);
@@ -491,7 +491,7 @@ static void skd_fail_all_pending(struct skd_device *skdev)
 		if (req == NULL)
 			break;
 		blk_start_request(req);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 	}
 }
 
@@ -545,7 +545,6 @@ static void skd_request_fn(struct request_queue *q)
 	struct request *req = NULL;
 	struct skd_scsi_request *scsi_req;
 	unsigned long io_flags;
-	int error;
 	u32 lba;
 	u32 count;
 	int data_dir;
@@ -716,9 +715,7 @@ static void skd_request_fn(struct request_queue *q)
 		if (!req->bio)
 			goto skip_sg;
 
-		error = skd_preop_sg_list(skdev, skreq);
-
-		if (error != 0) {
+		if (!skd_preop_sg_list(skdev, skreq)) {
 			/*
 			 * Complete the native request with error.
 			 * Note that the request context is still at the
@@ -730,7 +727,7 @@ static void skd_request_fn(struct request_queue *q)
 			 */
 			pr_debug("%s:%s:%d error Out\n",
 				 skdev->name, __func__, __LINE__);
-			skd_end_request(skdev, skreq, error);
+			skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
 			continue;
 		}
 
@@ -805,7 +802,7 @@ skip_sg:
 }
 
 static void skd_end_request(struct skd_device *skdev,
-			    struct skd_request_context *skreq, int error)
+		struct skd_request_context *skreq, blk_status_t error)
 {
 	if (unlikely(error)) {
 		struct request *req = skreq->req;
@@ -822,7 +819,7 @@ static void skd_end_request(struct skd_device *skdev,
 	__blk_end_request_all(skreq->req, error);
 }
 
-static int skd_preop_sg_list(struct skd_device *skdev,
+static bool skd_preop_sg_list(struct skd_device *skdev,
 			     struct skd_request_context *skreq)
 {
 	struct request *req = skreq->req;
@@ -839,7 +836,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
 
 	n_sg = blk_rq_map_sg(skdev->queue, req, sg);
 	if (n_sg <= 0)
-		return -EINVAL;
+		return false;
 
 	/*
 	 * Map scatterlist to PCI bus addresses.
@@ -847,7 +844,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
 	 */
 	n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
 	if (n_sg <= 0)
-		return -EINVAL;
+		return false;
 
 	SKD_ASSERT(n_sg <= skdev->sgs_per_request);
 
@@ -882,7 +879,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
 		}
 	}
 
-	return 0;
+	return true;
 }
 
 static void skd_postop_sg_list(struct skd_device *skdev,
@@ -2333,7 +2330,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
 	switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
 	case SKD_CHECK_STATUS_REPORT_GOOD:
 	case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
-		skd_end_request(skdev, skreq, 0);
+		skd_end_request(skdev, skreq, BLK_STS_OK);
 		break;
 
 	case SKD_CHECK_STATUS_BUSY_IMMINENT:
@@ -2355,7 +2352,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
 
 	case SKD_CHECK_STATUS_REPORT_ERROR:
 	default:
-		skd_end_request(skdev, skreq, -EIO);
+		skd_end_request(skdev, skreq, BLK_STS_IOERR);
 		break;
 	}
 }
@@ -2748,7 +2745,7 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
 			 * native request.
 			 */
 			if (likely(cmp_status == SAM_STAT_GOOD))
-				skd_end_request(skdev, skreq, 0);
+				skd_end_request(skdev, skreq, BLK_STS_OK);
 			else
 				skd_resolve_req_exception(skdev, skreq);
 		}
@@ -3190,7 +3187,7 @@ static void skd_recover_requests(struct skd_device *skdev, int requeue)
 			    SKD_MAX_RETRIES)
 				blk_requeue_request(skdev->queue, skreq->req);
 			else
-				skd_end_request(skdev, skreq, -EIO);
+				skd_end_request(skdev, skreq, BLK_STS_IOERR);
 
 			skreq->req = NULL;
 
@@ -4276,6 +4273,7 @@ static int skd_cons_disk(struct skd_device *skdev)
 		rc = -ENOMEM;
 		goto err_out;
 	}
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 
 	skdev->queue = q;
 	disk->queue = q;
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 3f3a3ab3d50a..6b16ead1da58 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -316,7 +316,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
 
 	rqe->req = NULL;
 
-	__blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
+	__blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size);
 
 	vdc_blk_queue_start(port);
 }
@@ -1023,7 +1023,7 @@ static void vdc_queue_drain(struct vdc_port *port)
 	struct request *req;
 
 	while ((req = blk_fetch_request(port->disk->queue)) != NULL)
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 }
 
 static void vdc_ldc_reset_timer(unsigned long _arg)
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 3064be6cf375..84434d3ea19b 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -493,7 +493,7 @@ static inline int swim_read_sector(struct floppy_state *fs,
 	return ret;
 }
 
-static int floppy_read_sectors(struct floppy_state *fs,
+static blk_status_t floppy_read_sectors(struct floppy_state *fs,
 			       int req_sector, int sectors_nb,
 			       unsigned char *buffer)
 {
@@ -516,7 +516,7 @@ static int floppy_read_sectors(struct floppy_state *fs,
 			ret = swim_read_sector(fs, side, track, sector,
 						buffer);
 			if (try-- == 0)
-				return -EIO;
+				return BLK_STS_IOERR;
 		} while (ret != 512);
 
 		buffer += ret;
@@ -553,7 +553,7 @@ static void do_fd_request(struct request_queue *q)
 
 	req = swim_next_request(swd);
 	while (req) {
-		int err = -EIO;
+		blk_status_t err = BLK_STS_IOERR;
 
 		fs = req->rq_disk->private_data;
 		if (blk_rq_pos(req) >= fs->total_secs)
@@ -864,6 +864,8 @@ static int swim_floppy_init(struct swim_priv *swd)
 			put_disk(swd->unit[drive].disk);
 			goto exit_put_disks;
 		}
+		blk_queue_bounce_limit(swd->unit[drive].disk->queue,
+				BLK_BOUNCE_HIGH);
 		swd->unit[drive].disk->queue->queuedata = swd;
 		swd->unit[drive].swd = swd;
 	}
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index ba4809c9bdba..9f931f8f6b4c 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -257,7 +257,7 @@ static unsigned int floppy_check_events(struct gendisk *disk,
 					unsigned int clearing);
 static int floppy_revalidate(struct gendisk *disk);
 
-static bool swim3_end_request(struct floppy_state *fs, int err, unsigned int nr_bytes)
+static bool swim3_end_request(struct floppy_state *fs, blk_status_t err, unsigned int nr_bytes)
 {
 	struct request *req = fs->cur_req;
 	int rc;
@@ -334,7 +334,7 @@ static void start_request(struct floppy_state *fs)
 		if (fs->mdev->media_bay &&
 		    check_media_bay(fs->mdev->media_bay) != MB_FD) {
 			swim3_dbg("%s", "  media bay absent, dropping req\n");
-			swim3_end_request(fs, -ENODEV, 0);
+			swim3_end_request(fs, BLK_STS_IOERR, 0);
 			continue;
 		}
 
@@ -350,12 +350,12 @@ static void start_request(struct floppy_state *fs)
 		if (blk_rq_pos(req) >= fs->total_secs) {
 			swim3_dbg("  pos out of bounds (%ld, max is %ld)\n",
 				  (long)blk_rq_pos(req), (long)fs->total_secs);
-			swim3_end_request(fs, -EIO, 0);
+			swim3_end_request(fs, BLK_STS_IOERR, 0);
 			continue;
 		}
 		if (fs->ejected) {
 			swim3_dbg("%s", "  disk ejected\n");
-			swim3_end_request(fs, -EIO, 0);
+			swim3_end_request(fs, BLK_STS_IOERR, 0);
 			continue;
 		}
 
@@ -364,7 +364,7 @@ static void start_request(struct floppy_state *fs)
 				fs->write_prot = swim3_readbit(fs, WRITE_PROT);
 			if (fs->write_prot) {
 				swim3_dbg("%s", "  try to write, disk write protected\n");
-				swim3_end_request(fs, -EIO, 0);
+				swim3_end_request(fs, BLK_STS_IOERR, 0);
 				continue;
 			}
 		}
@@ -548,7 +548,7 @@ static void act(struct floppy_state *fs)
 				if (fs->retries > 5) {
 					swim3_err("Wrong cylinder in transfer, want: %d got %d\n",
 						  fs->req_cyl, fs->cur_cyl);
-					swim3_end_request(fs, -EIO, 0);
+					swim3_end_request(fs, BLK_STS_IOERR, 0);
 					fs->state = idle;
 					return;
 				}
@@ -584,7 +584,7 @@ static void scan_timeout(unsigned long data)
 	out_8(&sw->intr_enable, 0);
 	fs->cur_cyl = -1;
 	if (fs->retries > 5) {
-		swim3_end_request(fs, -EIO, 0);
+		swim3_end_request(fs, BLK_STS_IOERR, 0);
 		fs->state = idle;
 		start_request(fs);
 	} else {
@@ -608,7 +608,7 @@ static void seek_timeout(unsigned long data)
 	out_8(&sw->select, RELAX);
 	out_8(&sw->intr_enable, 0);
 	swim3_err("%s", "Seek timeout\n");
-	swim3_end_request(fs, -EIO, 0);
+	swim3_end_request(fs, BLK_STS_IOERR, 0);
 	fs->state = idle;
 	start_request(fs);
 	spin_unlock_irqrestore(&swim3_lock, flags);
@@ -637,7 +637,7 @@ static void settle_timeout(unsigned long data)
 		goto unlock;
 	}
 	swim3_err("%s", "Seek settle timeout\n");
-	swim3_end_request(fs, -EIO, 0);
+	swim3_end_request(fs, BLK_STS_IOERR, 0);
 	fs->state = idle;
 	start_request(fs);
  unlock:
@@ -666,7 +666,7 @@ static void xfer_timeout(unsigned long data)
 	swim3_err("Timeout %sing sector %ld\n",
 	       (rq_data_dir(fs->cur_req)==WRITE? "writ": "read"),
 	       (long)blk_rq_pos(fs->cur_req));
-	swim3_end_request(fs, -EIO, 0);
+	swim3_end_request(fs, BLK_STS_IOERR, 0);
 	fs->state = idle;
 	start_request(fs);
 	spin_unlock_irqrestore(&swim3_lock, flags);
@@ -703,7 +703,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				swim3_err("%s", "Seen sector but cyl=ff?\n");
 				fs->cur_cyl = -1;
 				if (fs->retries > 5) {
-					swim3_end_request(fs, -EIO, 0);
+					swim3_end_request(fs, BLK_STS_IOERR, 0);
 					fs->state = idle;
 					start_request(fs);
 				} else {
@@ -786,7 +786,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				swim3_err("Error %sing block %ld (err=%x)\n",
 				       rq_data_dir(req) == WRITE? "writ": "read",
 				       (long)blk_rq_pos(req), err);
-				swim3_end_request(fs, -EIO, 0);
+				swim3_end_request(fs, BLK_STS_IOERR, 0);
 				fs->state = idle;
 			}
 		} else {
@@ -795,7 +795,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				swim3_err("fd dma error: stat=%x resid=%d\n", stat, resid);
 				swim3_err("  state=%d, dir=%x, intr=%x, err=%x\n",
 					  fs->state, rq_data_dir(req), intr, err);
-				swim3_end_request(fs, -EIO, 0);
+				swim3_end_request(fs, BLK_STS_IOERR, 0);
 				fs->state = idle;
 				start_request(fs);
 				break;
@@ -1223,6 +1223,7 @@ static int swim3_attach(struct macio_dev *mdev,
 		put_disk(disk);
 		return -ENOMEM;
 	}
+	blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
 	disk->queue->queuedata = &floppy_states[index];
 
 	if (index == 0) {
@@ -1245,7 +1246,7 @@ static int swim3_attach(struct macio_dev *mdev,
 	return 0;
 }
 
-static struct of_device_id swim3_match[] =
+static const struct of_device_id swim3_match[] =
 {
 	{
 	.name		= "swim3",
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index c8e072caf56f..08586dc14e85 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -745,7 +745,7 @@ static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
 
 static inline void carm_end_request_queued(struct carm_host *host,
 					   struct carm_request *crq,
-					   int error)
+					   blk_status_t error)
 {
 	struct request *req = crq->rq;
 	int rc;
@@ -791,7 +791,7 @@ static inline void carm_round_robin(struct carm_host *host)
 }
 
 static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
-			       int error)
+			       blk_status_t error)
 {
 	carm_end_request_queued(host, crq, error);
 	if (max_queue == 1)
@@ -869,14 +869,14 @@ queue_one_request:
 	sg = &crq->sg[0];
 	n_elem = blk_rq_map_sg(q, rq, sg);
 	if (n_elem <= 0) {
-		carm_end_rq(host, crq, -EIO);
+		carm_end_rq(host, crq, BLK_STS_IOERR);
 		return;		/* request with no s/g entries? */
 	}
 
 	/* map scatterlist to PCI bus addresses */
 	n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);
 	if (n_elem <= 0) {
-		carm_end_rq(host, crq, -EIO);
+		carm_end_rq(host, crq, BLK_STS_IOERR);
 		return;		/* request with no s/g entries? */
 	}
 	crq->n_elem = n_elem;
@@ -937,7 +937,7 @@ queue_one_request:
 
 static void carm_handle_array_info(struct carm_host *host,
 				   struct carm_request *crq, u8 *mem,
-				   int error)
+				   blk_status_t error)
 {
 	struct carm_port *port;
 	u8 *msg_data = mem + sizeof(struct carm_array_info);
@@ -997,7 +997,7 @@ out:
 
 static void carm_handle_scan_chan(struct carm_host *host,
 				  struct carm_request *crq, u8 *mem,
-				  int error)
+				  blk_status_t error)
 {
 	u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
 	unsigned int i, dev_count = 0;
@@ -1029,7 +1029,7 @@ out:
 }
 
 static void carm_handle_generic(struct carm_host *host,
-				struct carm_request *crq, int error,
+				struct carm_request *crq, blk_status_t error,
 				int cur_state, int next_state)
 {
 	DPRINTK("ENTER\n");
@@ -1045,7 +1045,7 @@ static void carm_handle_generic(struct carm_host *host,
 }
 
 static inline void carm_handle_rw(struct carm_host *host,
-				  struct carm_request *crq, int error)
+				  struct carm_request *crq, blk_status_t error)
 {
 	int pci_dir;
 
@@ -1067,7 +1067,7 @@ static inline void carm_handle_resp(struct carm_host *host,
 	u32 handle = le32_to_cpu(ret_handle_le);
 	unsigned int msg_idx;
 	struct carm_request *crq;
-	int error = (status == RMSG_OK) ? 0 : -EIO;
+	blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
 	u8 *mem;
 
 	VPRINTK("ENTER, handle == 0x%x\n", handle);
@@ -1155,7 +1155,7 @@ static inline void carm_handle_resp(struct carm_host *host,
 err_out:
 	printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
 	       pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
-	carm_end_rq(host, crq, -EIO);
+	carm_end_rq(host, crq, BLK_STS_IOERR);
 }
 
 static inline void carm_handle_responses(struct carm_host *host)
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index c141cc3be22b..0677d2514665 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -454,7 +454,7 @@ static void process_page(unsigned long data)
 				PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
 		if (control & DMASCR_HARD_ERROR) {
 			/* error */
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			dev_printk(KERN_WARNING, &card->dev->dev,
 				"I/O error on sector %d/%d\n",
 				le32_to_cpu(desc->local_addr)>>9,
@@ -529,7 +529,7 @@ static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
 		 (unsigned long long)bio->bi_iter.bi_sector,
 		 bio->bi_iter.bi_size);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	spin_lock_irq(&card->lock);
 	*card->biotail = bio;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 94173de1efaa..0297ad7c1452 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -64,15 +64,15 @@ struct virtblk_req {
 	struct scatterlist sg[];
 };
 
-static inline int virtblk_result(struct virtblk_req *vbr)
+static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
 {
 	switch (vbr->status) {
 	case VIRTIO_BLK_S_OK:
-		return 0;
+		return BLK_STS_OK;
 	case VIRTIO_BLK_S_UNSUPP:
-		return -ENOTTY;
+		return BLK_STS_NOTSUPP;
 	default:
-		return -EIO;
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -214,7 +214,7 @@ static void virtblk_done(struct virtqueue *vq)
 	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 }
 
-static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 			   const struct blk_mq_queue_data *bd)
 {
 	struct virtio_blk *vblk = hctx->queue->queuedata;
@@ -246,7 +246,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 		break;
 	default:
 		WARN_ON_ONCE(1);
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 	vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
@@ -276,8 +276,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 		/* Out of mem doesn't actually happen, since we fall back
 		 * to direct descriptors */
 		if (err == -ENOMEM || err == -ENOSPC)
-			return BLK_MQ_RQ_QUEUE_BUSY;
-		return BLK_MQ_RQ_QUEUE_ERROR;
+			return BLK_STS_RESOURCE;
+		return BLK_STS_IOERR;
 	}
 
 	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
@@ -286,7 +286,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (notify)
 		virtqueue_notify(vblk->vqs[qid].vq);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 /* return id (s/n) string for *disk to *id_str
@@ -307,7 +307,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
 		goto out;
 
 	blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
-	err = virtblk_result(blk_mq_rq_to_pdu(req));
+	err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
 out:
 	blk_put_request(req);
 	return err;
@@ -452,8 +452,7 @@ static int init_vq(struct virtio_blk *vblk)
 	}
 
 	/* Discover virtqueues and write information to configuration.  */
-	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
-			&desc);
+	err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc);
 	if (err)
 		goto out;
 
@@ -721,9 +720,6 @@ static int virtblk_probe(struct virtio_device *vdev)
 	/* We can handle whatever the host told us to handle. */
 	blk_queue_max_segments(q, vblk->sg_elems-2);
 
-	/* No need to bounce any requests */
-	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
-
 	/* No real sector limit. */
 	blk_queue_max_hw_sectors(q, -1U);
 
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 726c32e35db9..fe7cd58c43d0 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -609,8 +609,6 @@ int xen_blkif_schedule(void *arg)
 	unsigned long timeout;
 	int ret;
 
-	xen_blkif_get(blkif);
-
 	set_freezable();
 	while (!kthread_should_stop()) {
 		if (try_to_freeze())
@@ -665,7 +663,6 @@ purge_gnt_list:
 		print_stats(ring);
 
 	ring->xenblkd = NULL;
-	xen_blkif_put(blkif);
 
 	return 0;
 }
@@ -1069,20 +1066,17 @@ static void xen_blk_drain_io(struct xen_blkif_ring *ring)
 	atomic_set(&blkif->drain, 0);
 }
 
-/*
- * Completion callback on the bio's. Called as bh->b_end_io()
- */
-
-static void __end_block_io_op(struct pending_req *pending_req, int error)
+static void __end_block_io_op(struct pending_req *pending_req,
+		blk_status_t error)
 {
 	/* An error fails the entire request. */
-	if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
-	    (error == -EOPNOTSUPP)) {
+	if (pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE &&
+	    error == BLK_STS_NOTSUPP) {
 		pr_debug("flush diskcache op failed, not supported\n");
 		xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0);
 		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
-	} else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
-		    (error == -EOPNOTSUPP)) {
+	} else if (pending_req->operation == BLKIF_OP_WRITE_BARRIER &&
+		   error == BLK_STS_NOTSUPP) {
 		pr_debug("write barrier op failed, not supported\n");
 		xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0);
 		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
@@ -1106,7 +1100,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
  */
 static void end_block_io_op(struct bio *bio)
 {
-	__end_block_io_op(bio->bi_private, bio->bi_error);
+	__end_block_io_op(bio->bi_private, bio->bi_status);
 	bio_put(bio);
 }
 
@@ -1423,7 +1417,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 	for (i = 0; i < nbio; i++)
 		bio_put(biolist[i]);
 	atomic_set(&pending_req->pendcnt, 1);
-	__end_block_io_op(pending_req, -EINVAL);
+	__end_block_io_op(pending_req, BLK_STS_RESOURCE);
 	msleep(1); /* back off a bit */
 	return -EIO;
 }
@@ -1436,34 +1430,35 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 static void make_response(struct xen_blkif_ring *ring, u64 id,
 			  unsigned short op, int st)
 {
-	struct blkif_response  resp;
+	struct blkif_response *resp;
 	unsigned long     flags;
 	union blkif_back_rings *blk_rings;
 	int notify;
 
-	resp.id        = id;
-	resp.operation = op;
-	resp.status    = st;
-
 	spin_lock_irqsave(&ring->blk_ring_lock, flags);
 	blk_rings = &ring->blk_rings;
 	/* Place on the response ring for the relevant domain. */
 	switch (ring->blkif->blk_protocol) {
 	case BLKIF_PROTOCOL_NATIVE:
-		memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
-		       &resp, sizeof(resp));
+		resp = RING_GET_RESPONSE(&blk_rings->native,
+					 blk_rings->native.rsp_prod_pvt);
 		break;
 	case BLKIF_PROTOCOL_X86_32:
-		memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
-		       &resp, sizeof(resp));
+		resp = RING_GET_RESPONSE(&blk_rings->x86_32,
+					 blk_rings->x86_32.rsp_prod_pvt);
 		break;
 	case BLKIF_PROTOCOL_X86_64:
-		memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
-		       &resp, sizeof(resp));
+		resp = RING_GET_RESPONSE(&blk_rings->x86_64,
+					 blk_rings->x86_64.rsp_prod_pvt);
 		break;
 	default:
 		BUG();
 	}
+
+	resp->id        = id;
+	resp->operation = op;
+	resp->status    = st;
+
 	blk_rings->common.rsp_prod_pvt++;
 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
 	spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index dea61f6ab8cb..ecb35fe8ca8d 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -75,9 +75,8 @@ extern unsigned int xenblk_max_queues;
 struct blkif_common_request {
 	char dummy;
 };
-struct blkif_common_response {
-	char dummy;
-};
+
+/* i386 protocol version */
 
 struct blkif_x86_32_request_rw {
 	uint8_t        nr_segments;  /* number of segments                   */
@@ -129,14 +128,6 @@ struct blkif_x86_32_request {
 	} u;
 } __attribute__((__packed__));
 
-/* i386 protocol version */
-#pragma pack(push, 4)
-struct blkif_x86_32_response {
-	uint64_t        id;              /* copied from request */
-	uint8_t         operation;       /* copied from request */
-	int16_t         status;          /* BLKIF_RSP_???       */
-};
-#pragma pack(pop)
 /* x86_64 protocol version */
 
 struct blkif_x86_64_request_rw {
@@ -193,18 +184,12 @@ struct blkif_x86_64_request {
 	} u;
 } __attribute__((__packed__));
 
-struct blkif_x86_64_response {
-	uint64_t       __attribute__((__aligned__(8))) id;
-	uint8_t         operation;       /* copied from request */
-	int16_t         status;          /* BLKIF_RSP_???       */
-};
-
 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
-		  struct blkif_common_response);
+		  struct blkif_response);
 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
-		  struct blkif_x86_32_response);
+		  struct blkif_response __packed);
 DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
-		  struct blkif_x86_64_response);
+		  struct blkif_response);
 
 union blkif_back_rings {
 	struct blkif_back_ring        native;
@@ -281,6 +266,7 @@ struct xen_blkif_ring {
 
 	wait_queue_head_t	wq;
 	atomic_t		inflight;
+	bool			active;
 	/* One thread per blkif ring. */
 	struct task_struct	*xenblkd;
 	unsigned int		waiting_reqs;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 8fe61b5dc5a6..792da683e70d 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -159,7 +159,7 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
 		init_waitqueue_head(&ring->shutdown_wq);
 		ring->blkif = blkif;
 		ring->st_print = jiffies;
-		xen_blkif_get(blkif);
+		ring->active = true;
 	}
 
 	return 0;
@@ -249,10 +249,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
 		struct xen_blkif_ring *ring = &blkif->rings[r];
 		unsigned int i = 0;
 
+		if (!ring->active)
+			continue;
+
 		if (ring->xenblkd) {
 			kthread_stop(ring->xenblkd);
 			wake_up(&ring->shutdown_wq);
-			ring->xenblkd = NULL;
 		}
 
 		/* The above kthread_stop() guarantees that at this point we
@@ -296,7 +298,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
 		BUG_ON(ring->free_pages_num != 0);
 		BUG_ON(ring->persistent_gnt_c != 0);
 		WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
-		xen_blkif_put(blkif);
+		ring->active = false;
 	}
 	blkif->nr_ring_pages = 0;
 	/*
@@ -312,9 +314,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
 
 static void xen_blkif_free(struct xen_blkif *blkif)
 {
-
-	xen_blkif_disconnect(blkif);
+	WARN_ON(xen_blkif_disconnect(blkif));
 	xen_vbd_free(&blkif->vbd);
+	kfree(blkif->be->mode);
+	kfree(blkif->be);
 
 	/* Make sure everything is drained before shutting down */
 	kmem_cache_free(xen_blkif_cachep, blkif);
@@ -504,13 +507,13 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
 
 	dev_set_drvdata(&dev->dev, NULL);
 
-	if (be->blkif)
+	if (be->blkif) {
 		xen_blkif_disconnect(be->blkif);
 
-	/* Put the reference we set in xen_blkif_alloc(). */
-	xen_blkif_put(be->blkif);
-	kfree(be->mode);
-	kfree(be);
+		/* Put the reference we set in xen_blkif_alloc(). */
+		xen_blkif_put(be->blkif);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 39459631667c..c852ed3c01d5 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -110,11 +110,6 @@ struct blk_shadow {
 	unsigned long associated_id;
 };
 
-struct split_bio {
-	struct bio *bio;
-	atomic_t pending;
-};
-
 struct blkif_req {
 	int	error;
 };
@@ -881,7 +876,7 @@ static inline bool blkif_request_flush_invalid(struct request *req,
 		 !info->feature_fua));
 }
 
-static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
 			  const struct blk_mq_queue_data *qd)
 {
 	unsigned long flags;
@@ -904,16 +899,16 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	flush_requests(rinfo);
 	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 
 out_err:
 	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
-	return BLK_MQ_RQ_QUEUE_ERROR;
+	return BLK_STS_IOERR;
 
 out_busy:
 	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
 	blk_mq_stop_hw_queue(hctx);
-	return BLK_MQ_RQ_QUEUE_BUSY;
+	return BLK_STS_RESOURCE;
 }
 
 static void blkif_complete_rq(struct request *rq)
@@ -958,9 +953,6 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
 
 	/* Make sure buffer addresses are sector-aligned. */
 	blk_queue_dma_alignment(rq, 511);
-
-	/* Make sure we don't use bounce buffers. */
-	blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
 }
 
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
@@ -1601,14 +1593,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			continue;
 		}
 
-		blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
+		if (bret->status == BLKIF_RSP_OKAY)
+			blkif_req(req)->error = BLK_STS_OK;
+		else
+			blkif_req(req)->error = BLK_STS_IOERR;
+
 		switch (bret->operation) {
 		case BLKIF_OP_DISCARD:
 			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
 				struct request_queue *rq = info->rq;
 				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
 					   info->gd->disk_name, op_name(bret->operation));
-				blkif_req(req)->error = -EOPNOTSUPP;
+				blkif_req(req)->error = BLK_STS_NOTSUPP;
 				info->feature_discard = 0;
 				info->feature_secdiscard = 0;
 				queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
@@ -1626,11 +1622,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
 				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
 				       info->gd->disk_name, op_name(bret->operation));
-				blkif_req(req)->error = -EOPNOTSUPP;
+				blkif_req(req)->error = BLK_STS_NOTSUPP;
 			}
 			if (unlikely(blkif_req(req)->error)) {
-				if (blkif_req(req)->error == -EOPNOTSUPP)
-					blkif_req(req)->error = 0;
+				if (blkif_req(req)->error == BLK_STS_NOTSUPP)
+					blkif_req(req)->error = BLK_STS_OK;
 				info->feature_fua = 0;
 				info->feature_flush = 0;
 				xlvbd_flush(info);
@@ -1996,28 +1992,13 @@ static int blkfront_probe(struct xenbus_device *dev,
 	return 0;
 }
 
-static void split_bio_end(struct bio *bio)
-{
-	struct split_bio *split_bio = bio->bi_private;
-
-	if (atomic_dec_and_test(&split_bio->pending)) {
-		split_bio->bio->bi_phys_segments = 0;
-		split_bio->bio->bi_error = bio->bi_error;
-		bio_endio(split_bio->bio);
-		kfree(split_bio);
-	}
-	bio_put(bio);
-}
-
 static int blkif_recover(struct blkfront_info *info)
 {
-	unsigned int i, r_index;
+	unsigned int r_index;
 	struct request *req, *n;
 	int rc;
-	struct bio *bio, *cloned_bio;
-	unsigned int segs, offset;
-	int pending, size;
-	struct split_bio *split_bio;
+	struct bio *bio;
+	unsigned int segs;
 
 	blkfront_gather_backend_features(info);
 	/* Reset limits changed by blk_mq_update_nr_hw_queues(). */
@@ -2056,34 +2037,6 @@ static int blkif_recover(struct blkfront_info *info)
 
 	while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
 		/* Traverse the list of pending bios and re-queue them */
-		if (bio_segments(bio) > segs) {
-			/*
-			 * This bio has more segments than what we can
-			 * handle, we have to split it.
-			 */
-			pending = (bio_segments(bio) + segs - 1) / segs;
-			split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
-			BUG_ON(split_bio == NULL);
-			atomic_set(&split_bio->pending, pending);
-			split_bio->bio = bio;
-			for (i = 0; i < pending; i++) {
-				offset = (i * segs * XEN_PAGE_SIZE) >> 9;
-				size = min((unsigned int)(segs * XEN_PAGE_SIZE) >> 9,
-					   (unsigned int)bio_sectors(bio) - offset);
-				cloned_bio = bio_clone(bio, GFP_NOIO);
-				BUG_ON(cloned_bio == NULL);
-				bio_trim(cloned_bio, offset, size);
-				cloned_bio->bi_private = split_bio;
-				cloned_bio->bi_end_io = split_bio_end;
-				submit_bio(cloned_bio);
-			}
-			/*
-			 * Now we have to wait for all those smaller bios to
-			 * end, so we can also end the "parent" bio.
-			 */
-			continue;
-		}
-		/* We don't need to split this bio */
 		submit_bio(bio);
 	}
 
@@ -2137,7 +2090,7 @@ static int blkfront_resume(struct xenbus_device *dev)
 			merge_bio.tail = shadow[j].request->biotail;
 			bio_list_merge(&info->bio_list, &merge_bio);
 			shadow[j].request->bio = NULL;
-			blk_mq_end_request(shadow[j].request, 0);
+			blk_mq_end_request(shadow[j].request, BLK_STS_OK);
 		}
 	}
 
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 757dce2147e0..14459d66ef0c 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -471,7 +471,7 @@ static struct request *ace_get_next_request(struct request_queue *q)
 		if (!blk_rq_is_passthrough(req))
 			break;
 		blk_start_request(req);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 	}
 	return req;
 }
@@ -499,11 +499,11 @@ static void ace_fsm_dostate(struct ace_device *ace)
 
 		/* Drop all in-flight and pending requests */
 		if (ace->req) {
-			__blk_end_request_all(ace->req, -EIO);
+			__blk_end_request_all(ace->req, BLK_STS_IOERR);
 			ace->req = NULL;
 		}
 		while ((req = blk_fetch_request(ace->queue)) != NULL)
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 
 		/* Drop back to IDLE state and notify waiters */
 		ace->fsm_state = ACE_FSM_STATE_IDLE;
@@ -728,7 +728,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 		}
 
 		/* bio finished; is there another one? */
-		if (__blk_end_request_cur(ace->req, 0)) {
+		if (__blk_end_request_cur(ace->req, BLK_STS_OK)) {
 			/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
 			 *      blk_rq_sectors(ace->req),
 			 *      blk_rq_cur_sectors(ace->req));
@@ -993,6 +993,7 @@ static int ace_setup(struct ace_device *ace)
 	if (ace->queue == NULL)
 		goto err_blk_initq;
 	blk_queue_logical_block_size(ace->queue, 512);
+	blk_queue_bounce_limit(ace->queue, BLK_BOUNCE_HIGH);
 
 	/*
 	 * Allocate and initialize GD structure
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 968f9e52effa..41c95c9b2ab4 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -74,14 +74,14 @@ static void do_z2_request(struct request_queue *q)
 	while (req) {
 		unsigned long start = blk_rq_pos(req) << 9;
 		unsigned long len  = blk_rq_cur_bytes(req);
-		int err = 0;
+		blk_status_t err = BLK_STS_OK;
 
 		if (start + len > z2ram_size) {
 			pr_err(DEVICE_NAME ": bad access: block=%llu, "
 			       "count=%u\n",
 			       (unsigned long long)blk_rq_pos(req),
 			       blk_rq_cur_sectors(req));
-			err = -EIO;
+			err = BLK_STS_IOERR;
 			goto done;
 		}
 		while (len) {
diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c
index c38cb5b91291..fe850f0567cb 100644
--- a/drivers/bluetooth/btmrvl_main.c
+++ b/drivers/bluetooth/btmrvl_main.c
@@ -602,7 +602,7 @@ static int btmrvl_service_main_thread(void *data)
 	struct btmrvl_thread *thread = data;
 	struct btmrvl_private *priv = thread->priv;
 	struct btmrvl_adapter *adapter = priv->adapter;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct sk_buff *skb;
 	ulong flags;
 
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 76c952fd9ab9..e36d160c458f 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2178,6 +2178,12 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 	if (!q)
 		return -ENXIO;
 
+	if (!blk_queue_scsi_passthrough(q)) {
+		WARN_ONCE(true,
+			  "Attempt read CDDA info through a non-SCSI queue\n");
+		return -EINVAL;
+	}
+
 	cdi->last_sense = 0;
 
 	while (nframes) {
@@ -2195,7 +2201,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 			break;
 		}
 		req = scsi_req(rq);
-		scsi_req_init(rq);
 
 		ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
 		if (ret) {
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 1372763a948f..6495b03f576c 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -583,7 +583,8 @@ static int gdrom_set_interrupt_handlers(void)
  */
 static void gdrom_readdisk_dma(struct work_struct *work)
 {
-	int err, block, block_cnt;
+	int block, block_cnt;
+	blk_status_t err;
 	struct packet_command *read_command;
 	struct list_head *elem, *next;
 	struct request *req;
@@ -641,7 +642,7 @@ static void gdrom_readdisk_dma(struct work_struct *work)
 		__raw_writeb(1, GDROM_DMA_STATUS_REG);
 		wait_event_interruptible_timeout(request_queue,
 			gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
-		err = gd.transfer ? -EIO : 0;
+		err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK;
 		gd.transfer = 0;
 		gd.pending = 0;
 		/* now seek to take the request spinlock
@@ -670,11 +671,11 @@ static void gdrom_request(struct request_queue *rq)
 			break;
 		case REQ_OP_WRITE:
 			pr_notice("Read only device - write request ignored\n");
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			break;
 		default:
 			printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			break;
 		}
 	}
@@ -812,6 +813,7 @@ static int probe_gdrom(struct platform_device *devptr)
 		err = -ENOMEM;
 		goto probe_fail_requestq;
 	}
+	blk_queue_bounce_limit(gd.gdrom_rq, BLK_BOUNCE_HIGH);
 
 	err = probe_gdrom_setupqueue();
 	if (err)
diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
index e770ad977472..b67263d6e34b 100644
--- a/drivers/char/applicom.c
+++ b/drivers/char/applicom.c
@@ -94,9 +94,9 @@ static struct applicom_board {
 static unsigned int irq = 0;	/* interrupt number IRQ       */
 static unsigned long mem = 0;	/* physical segment of board  */
 
-module_param(irq, uint, 0);
+module_param_hw(irq, uint, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ of the Applicom board");
-module_param(mem, ulong, 0);
+module_param_hw(mem, ulong, iomem, 0);
 MODULE_PARM_DESC(mem, "Shared Memory Address of Applicom board");
 
 static unsigned int numboards;	/* number of installed boards */
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index b2b618f066e0..59ee93ea84eb 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1375,39 +1375,39 @@ MODULE_PARM_DESC(type, "Defines the type of each interface, each"
 		 " interface separated by commas.  The types are 'kcs',"
 		 " 'smic', and 'bt'.  For example si_type=kcs,bt will set"
 		 " the first interface to kcs and the second to bt");
-module_param_array(addrs, ulong, &num_addrs, 0);
+module_param_hw_array(addrs, ulong, iomem, &num_addrs, 0);
 MODULE_PARM_DESC(addrs, "Sets the memory address of each interface, the"
 		 " addresses separated by commas.  Only use if an interface"
 		 " is in memory.  Otherwise, set it to zero or leave"
 		 " it blank.");
-module_param_array(ports, uint, &num_ports, 0);
+module_param_hw_array(ports, uint, ioport, &num_ports, 0);
 MODULE_PARM_DESC(ports, "Sets the port address of each interface, the"
 		 " addresses separated by commas.  Only use if an interface"
 		 " is a port.  Otherwise, set it to zero or leave"
 		 " it blank.");
-module_param_array(irqs, int, &num_irqs, 0);
+module_param_hw_array(irqs, int, irq, &num_irqs, 0);
 MODULE_PARM_DESC(irqs, "Sets the interrupt of each interface, the"
 		 " addresses separated by commas.  Only use if an interface"
 		 " has an interrupt.  Otherwise, set it to zero or leave"
 		 " it blank.");
-module_param_array(regspacings, int, &num_regspacings, 0);
+module_param_hw_array(regspacings, int, other, &num_regspacings, 0);
 MODULE_PARM_DESC(regspacings, "The number of bytes between the start address"
 		 " and each successive register used by the interface.  For"
 		 " instance, if the start address is 0xca2 and the spacing"
 		 " is 2, then the second address is at 0xca4.  Defaults"
 		 " to 1.");
-module_param_array(regsizes, int, &num_regsizes, 0);
+module_param_hw_array(regsizes, int, other, &num_regsizes, 0);
 MODULE_PARM_DESC(regsizes, "The size of the specific IPMI register in bytes."
 		 " This should generally be 1, 2, 4, or 8 for an 8-bit,"
 		 " 16-bit, 32-bit, or 64-bit register.  Use this if you"
 		 " the 8-bit IPMI register has to be read from a larger"
 		 " register.");
-module_param_array(regshifts, int, &num_regshifts, 0);
+module_param_hw_array(regshifts, int, other, &num_regshifts, 0);
 MODULE_PARM_DESC(regshifts, "The amount to shift the data read from the."
 		 " IPMI register, in bits.  For instance, if the data"
 		 " is read from a 32-bit word and the IPMI data is in"
 		 " bit 8-15, then the shift would be 8");
-module_param_array(slave_addrs, int, &num_slave_addrs, 0);
+module_param_hw_array(slave_addrs, int, other, &num_slave_addrs, 0);
 MODULE_PARM_DESC(slave_addrs, "Set the default IPMB slave address for"
 		 " the controller.  Normally this is 0x20, but can be"
 		 " overridden by this parm.  This is an array indexed"
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index d165af8abe36..a5c6cfe71a8e 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -821,7 +821,7 @@ static ssize_t ipmi_read(struct file *file,
 			 loff_t      *ppos)
 {
 	int          rv = 0;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (count <= 0)
 		return 0;
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index 565e4cf04a02..8249762192d5 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -859,7 +859,11 @@ static int __init lp_setup (char *str)
 	} else if (!strcmp(str, "auto")) {
 		parport_nr[0] = LP_PARPORT_AUTO;
 	} else if (!strcmp(str, "none")) {
-		parport_nr[parport_ptr++] = LP_PARPORT_NONE;
+		if (parport_ptr < LP_NO)
+			parport_nr[parport_ptr++] = LP_PARPORT_NONE;
+		else
+			printk(KERN_INFO "lp: too many ports, %s ignored.\n",
+			       str);
 	} else if (!strcmp(str, "reset")) {
 		reset = 1;
 	}
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 7e4a9d1296bb..593a8818aca9 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -340,6 +340,11 @@ static const struct vm_operations_struct mmap_mem_ops = {
 static int mmap_mem(struct file *file, struct vm_area_struct *vma)
 {
 	size_t size = vma->vm_end - vma->vm_start;
+	phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
+
+	/* It's illegal to wrap around the end of the physical address space. */
+	if (offset + (phys_addr_t)size - 1 < offset)
+		return -EINVAL;
 
 	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
 		return -EINVAL;
diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c
index 3a3ff2eb6cba..b5e3103c1175 100644
--- a/drivers/char/mwave/mwavedd.c
+++ b/drivers/char/mwave/mwavedd.c
@@ -80,10 +80,10 @@ int mwave_3780i_io = 0;
 int mwave_uart_irq = 0;
 int mwave_uart_io = 0;
 module_param(mwave_debug, int, 0);
-module_param(mwave_3780i_irq, int, 0);
-module_param(mwave_3780i_io, int, 0);
-module_param(mwave_uart_irq, int, 0);
-module_param(mwave_uart_io, int, 0);
+module_param_hw(mwave_3780i_irq, int, irq, 0);
+module_param_hw(mwave_3780i_io, int, ioport, 0);
+module_param_hw(mwave_uart_irq, int, irq, 0);
+module_param_hw(mwave_uart_io, int, ioport, 0);
 
 static int mwave_open(struct inode *inode, struct file *file);
 static int mwave_close(struct inode *inode, struct file *file);
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index d4dbd8d8e524..382c864814d9 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -374,7 +374,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf,
 
 	rc = write_sync_reg(SCR_HOST_TO_READER_START, dev);
 	if (rc <= 0) {
-		DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc);
+		DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc);
 		DEBUGP(2, dev, "<- cm4040_write (failed)\n");
 		if (rc == -ERESTARTSYS)
 			return rc;
@@ -387,7 +387,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf,
 	for (i = 0; i < bytes_to_write; i++) {
 		rc = wait_for_bulk_out_ready(dev);
 		if (rc <= 0) {
-			DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2Zx\n",
+			DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2zx\n",
 			       rc);
 			DEBUGP(2, dev, "<- cm4040_write (failed)\n");
 			if (rc == -ERESTARTSYS)
@@ -403,7 +403,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf,
 	rc = write_sync_reg(SCR_HOST_TO_READER_DONE, dev);
 
 	if (rc <= 0) {
-		DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc);
+		DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc);
 		DEBUGP(2, dev, "<- cm4040_write (failed)\n");
 		if (rc == -ERESTARTSYS)
 			return rc;
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 0ab024918907..01a260f67437 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1,6 +1,9 @@
 /*
  * random.c -- A strong random number generator
  *
+ * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All
+ * Rights Reserved.
+ *
  * Copyright Matt Mackall <mpm@selenic.com>, 2003, 2004, 2005
  *
  * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999.  All
@@ -762,6 +765,8 @@ static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
 static struct crng_state **crng_node_pool __read_mostly;
 #endif
 
+static void invalidate_batched_entropy(void);
+
 static void crng_initialize(struct crng_state *crng)
 {
 	int		i;
@@ -798,12 +803,13 @@ static int crng_fast_load(const char *cp, size_t len)
 		p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp;
 		cp++; crng_init_cnt++; len--;
 	}
+	spin_unlock_irqrestore(&primary_crng.lock, flags);
 	if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
+		invalidate_batched_entropy();
 		crng_init = 1;
 		wake_up_interruptible(&crng_init_wait);
 		pr_notice("random: fast init done\n");
 	}
-	spin_unlock_irqrestore(&primary_crng.lock, flags);
 	return 1;
 }
 
@@ -835,13 +841,14 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 	}
 	memzero_explicit(&buf, sizeof(buf));
 	crng->init_time = jiffies;
+	spin_unlock_irqrestore(&primary_crng.lock, flags);
 	if (crng == &primary_crng && crng_init < 2) {
+		invalidate_batched_entropy();
 		crng_init = 2;
 		process_random_ready_list();
 		wake_up_interruptible(&crng_init_wait);
 		pr_notice("random: crng init done\n");
 	}
-	spin_unlock_irqrestore(&primary_crng.lock, flags);
 }
 
 static inline void crng_wait_ready(void)
@@ -1097,12 +1104,16 @@ static void add_interrupt_bench(cycles_t start)
 static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
 {
 	__u32 *ptr = (__u32 *) regs;
+	unsigned int idx;
 
 	if (regs == NULL)
 		return 0;
-	if (f->reg_idx >= sizeof(struct pt_regs) / sizeof(__u32))
-		f->reg_idx = 0;
-	return *(ptr + f->reg_idx++);
+	idx = READ_ONCE(f->reg_idx);
+	if (idx >= sizeof(struct pt_regs) / sizeof(__u32))
+		idx = 0;
+	ptr += idx++;
+	WRITE_ONCE(f->reg_idx, idx);
+	return *ptr;
 }
 
 void add_interrupt_randomness(int irq, int irq_flags)
@@ -2019,6 +2030,7 @@ struct batched_entropy {
 	};
 	unsigned int position;
 };
+static rwlock_t batched_entropy_reset_lock = __RW_LOCK_UNLOCKED(batched_entropy_reset_lock);
 
 /*
  * Get a random word for internal kernel use only. The quality of the random
@@ -2029,6 +2041,8 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
 u64 get_random_u64(void)
 {
 	u64 ret;
+	bool use_lock = READ_ONCE(crng_init) < 2;
+	unsigned long flags = 0;
 	struct batched_entropy *batch;
 
 #if BITS_PER_LONG == 64
@@ -2041,11 +2055,15 @@ u64 get_random_u64(void)
 #endif
 
 	batch = &get_cpu_var(batched_entropy_u64);
+	if (use_lock)
+		read_lock_irqsave(&batched_entropy_reset_lock, flags);
 	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
 		extract_crng((u8 *)batch->entropy_u64);
 		batch->position = 0;
 	}
 	ret = batch->entropy_u64[batch->position++];
+	if (use_lock)
+		read_unlock_irqrestore(&batched_entropy_reset_lock, flags);
 	put_cpu_var(batched_entropy_u64);
 	return ret;
 }
@@ -2055,22 +2073,45 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
 u32 get_random_u32(void)
 {
 	u32 ret;
+	bool use_lock = READ_ONCE(crng_init) < 2;
+	unsigned long flags = 0;
 	struct batched_entropy *batch;
 
 	if (arch_get_random_int(&ret))
 		return ret;
 
 	batch = &get_cpu_var(batched_entropy_u32);
+	if (use_lock)
+		read_lock_irqsave(&batched_entropy_reset_lock, flags);
 	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
 		extract_crng((u8 *)batch->entropy_u32);
 		batch->position = 0;
 	}
 	ret = batch->entropy_u32[batch->position++];
+	if (use_lock)
+		read_unlock_irqrestore(&batched_entropy_reset_lock, flags);
 	put_cpu_var(batched_entropy_u32);
 	return ret;
 }
 EXPORT_SYMBOL(get_random_u32);
 
+/* It's important to invalidate all potential batched entropy that might
+ * be stored before the crng is initialized, which we can do lazily by
+ * simply resetting the counter to zero so that it's re-extracted on the
+ * next usage. */
+static void invalidate_batched_entropy(void)
+{
+	int cpu;
+	unsigned long flags;
+
+	write_lock_irqsave(&batched_entropy_reset_lock, flags);
+	for_each_possible_cpu (cpu) {
+		per_cpu_ptr(&batched_entropy_u32, cpu)->position = 0;
+		per_cpu_ptr(&batched_entropy_u64, cpu)->position = 0;
+	}
+	write_unlock_irqrestore(&batched_entropy_reset_lock, flags);
+}
+
 /**
  * randomize_page - Generate a random, page aligned address
  * @start:	The smallest acceptable address the caller will take.
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index b917b9d5f710..c378c7b15d49 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -27,10 +27,9 @@
 
 #define ACPI_SIG_TPM2 "TPM2"
 
-static const u8 CRB_ACPI_START_UUID[] = {
-	/* 0000 */ 0xAB, 0x6C, 0xBF, 0x6B, 0x63, 0x54, 0x14, 0x47,
-	/* 0008 */ 0xB7, 0xCD, 0xF0, 0x20, 0x3C, 0x03, 0x68, 0xD4
-};
+static const guid_t crb_acpi_start_guid =
+	GUID_INIT(0x6BBF6CAB, 0x5463, 0x4714,
+		  0xB7, 0xCD, 0xF0, 0x20, 0x3C, 0x03, 0x68, 0xD4);
 
 enum crb_defaults {
 	CRB_ACPI_START_REVISION_ID = 1,
@@ -266,7 +265,7 @@ static int crb_do_acpi_start(struct tpm_chip *chip)
 	int rc;
 
 	obj = acpi_evaluate_dsm(chip->acpi_dev_handle,
-				CRB_ACPI_START_UUID,
+				&crb_acpi_start_guid,
 				CRB_ACPI_START_REVISION_ID,
 				CRB_ACPI_START_INDEX,
 				NULL);
diff --git a/drivers/char/tpm/tpm_ppi.c b/drivers/char/tpm/tpm_ppi.c
index 692a2c6ae036..86dd8521feef 100644
--- a/drivers/char/tpm/tpm_ppi.c
+++ b/drivers/char/tpm/tpm_ppi.c
@@ -32,20 +32,16 @@
 #define PPI_VS_REQ_START	128
 #define PPI_VS_REQ_END		255
 
-static const u8 tpm_ppi_uuid[] = {
-	0xA6, 0xFA, 0xDD, 0x3D,
-	0x1B, 0x36,
-	0xB4, 0x4E,
-	0xA4, 0x24,
-	0x8D, 0x10, 0x08, 0x9D, 0x16, 0x53
-};
+static const guid_t tpm_ppi_guid =
+	GUID_INIT(0x3DDDFAA6, 0x361B, 0x4EB4,
+		  0xA4, 0x24, 0x8D, 0x10, 0x08, 0x9D, 0x16, 0x53);
 
 static inline union acpi_object *
 tpm_eval_dsm(acpi_handle ppi_handle, int func, acpi_object_type type,
 	     union acpi_object *argv4)
 {
 	BUG_ON(!ppi_handle);
-	return acpi_evaluate_dsm_typed(ppi_handle, tpm_ppi_uuid,
+	return acpi_evaluate_dsm_typed(ppi_handle, &tpm_ppi_guid,
 				       TPM_PPI_REVISION_ID,
 				       func, argv4, type);
 }
@@ -107,7 +103,7 @@ static ssize_t tpm_store_ppi_request(struct device *dev,
 	 * is updated with function index from SUBREQ to SUBREQ2 since PPI
 	 * version 1.1
 	 */
-	if (acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid,
+	if (acpi_check_dsm(chip->acpi_dev_handle, &tpm_ppi_guid,
 			   TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_SUBREQ2))
 		func = TPM_PPI_FN_SUBREQ2;
 
@@ -268,7 +264,7 @@ static ssize_t show_ppi_operations(acpi_handle dev_handle, char *buf, u32 start,
 		"User not required",
 	};
 
-	if (!acpi_check_dsm(dev_handle, tpm_ppi_uuid, TPM_PPI_REVISION_ID,
+	if (!acpi_check_dsm(dev_handle, &tpm_ppi_guid, TPM_PPI_REVISION_ID,
 			    1 << TPM_PPI_FN_GETOPR))
 		return -EPERM;
 
@@ -341,12 +337,12 @@ void tpm_add_ppi(struct tpm_chip *chip)
 	if (!chip->acpi_dev_handle)
 		return;
 
-	if (!acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid,
+	if (!acpi_check_dsm(chip->acpi_dev_handle, &tpm_ppi_guid,
 			    TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_VERSION))
 		return;
 
 	/* Cache PPI version string. */
-	obj = acpi_evaluate_dsm_typed(chip->acpi_dev_handle, tpm_ppi_uuid,
+	obj = acpi_evaluate_dsm_typed(chip->acpi_dev_handle, &tpm_ppi_guid,
 				      TPM_PPI_REVISION_ID, TPM_PPI_FN_VERSION,
 				      NULL, ACPI_TYPE_STRING);
 	if (obj) {
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 7d041d026680..ad843eb02ae7 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1945,9 +1945,9 @@ static int init_vqs(struct ports_device *portdev)
 		}
 	}
 	/* Find the queues. */
-	err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs,
-					      io_callbacks,
-					      (const char **)io_names, NULL);
+	err = virtio_find_vqs(portdev->vdev, nr_queues, vqs,
+			      io_callbacks,
+			      (const char **)io_names, NULL);
 	if (err)
 		goto free;
 
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 9356ab4b7d76..36cfea38135f 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -47,6 +47,14 @@ config COMMON_CLK_RK808
 	  clocked at 32KHz each. Clkout1 is always on, Clkout2 can off
 	  by control register.
 
+config COMMON_CLK_HI655X
+	tristate "Clock driver for Hi655x"
+	depends on MFD_HI655X_PMIC || COMPILE_TEST
+	---help---
+	  This driver supports the hi655x PMIC clock. This
+	  multi-function device has one fixed-rate oscillator, clocked
+	  at 32KHz.
+
 config COMMON_CLK_SCPI
 	tristate "Clock driver controlled via SCPI interface"
 	depends on ARM_SCPI_PROTOCOL || COMPILE_TEST
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 92c12b86c2e8..c19983afcb81 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_COMMON_CLK_PALMAS)		+= clk-palmas.o
 obj-$(CONFIG_COMMON_CLK_PWM)		+= clk-pwm.o
 obj-$(CONFIG_CLK_QORIQ)			+= clk-qoriq.o
 obj-$(CONFIG_COMMON_CLK_RK808)		+= clk-rk808.o
+obj-$(CONFIG_COMMON_CLK_HI655X)		+= clk-hi655x.o
 obj-$(CONFIG_COMMON_CLK_S2MPS11)	+= clk-s2mps11.o
 obj-$(CONFIG_COMMON_CLK_SCPI)           += clk-scpi.o
 obj-$(CONFIG_COMMON_CLK_SI5351)		+= clk-si5351.o
diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c
index 45ad168e1496..7d3223fc7161 100644
--- a/drivers/clk/at91/clk-pll.c
+++ b/drivers/clk/at91/clk-pll.c
@@ -399,18 +399,18 @@ of_at91_clk_pll_get_characteristics(struct device_node *np)
 	if (!characteristics)
 		return NULL;
 
-	output = kzalloc(sizeof(*output) * num_output, GFP_KERNEL);
+	output = kcalloc(num_output, sizeof(*output), GFP_KERNEL);
 	if (!output)
 		goto out_free_characteristics;
 
 	if (num_cells > 2) {
-		out = kzalloc(sizeof(*out) * num_output, GFP_KERNEL);
+		out = kcalloc(num_output, sizeof(*out), GFP_KERNEL);
 		if (!out)
 			goto out_free_output;
 	}
 
 	if (num_cells > 3) {
-		icpll = kzalloc(sizeof(*icpll) * num_output, GFP_KERNEL);
+		icpll = kcalloc(num_output, sizeof(*icpll), GFP_KERNEL);
 		if (!icpll)
 			goto out_free_output;
 	}
diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c
index e04634c46395..2d61893da024 100644
--- a/drivers/clk/bcm/clk-iproc-pll.c
+++ b/drivers/clk/bcm/clk-iproc-pll.c
@@ -277,7 +277,7 @@ static int pll_set_rate(struct iproc_clk *clk, unsigned int rate_index,
 	if (rate >= VCO_LOW && rate < VCO_HIGH) {
 		ki = 4;
 		kp_index = KP_BAND_MID;
-	} else if (rate >= VCO_HIGH && rate && rate < VCO_HIGH_HIGH) {
+	} else if (rate >= VCO_HIGH && rate < VCO_HIGH_HIGH) {
 		ki = 3;
 		kp_index = KP_BAND_HIGH;
 	} else if (rate >= VCO_HIGH_HIGH && rate < VCO_MAX) {
diff --git a/drivers/clk/bcm/clk-ns2.c b/drivers/clk/bcm/clk-ns2.c
index a564e9248814..adc14145861a 100644
--- a/drivers/clk/bcm/clk-ns2.c
+++ b/drivers/clk/bcm/clk-ns2.c
@@ -103,7 +103,7 @@ CLK_OF_DECLARE(ns2_genpll_src_clk, "brcm,ns2-genpll-scr",
 
 static const struct iproc_pll_ctrl genpll_sw = {
 	.flags = IPROC_CLK_AON | IPROC_CLK_PLL_SPLIT_STAT_CTRL,
-	.aon = AON_VAL(0x0, 2, 9, 8),
+	.aon = AON_VAL(0x0, 1, 11, 10),
 	.reset = RESET_VAL(0x4, 2, 1),
 	.dig_filter = DF_VAL(0x0, 9, 3, 5, 4, 2, 3),
 	.ndiv_int = REG_VAL(0x8, 4, 10),
diff --git a/drivers/clk/clk-cs2000-cp.c b/drivers/clk/clk-cs2000-cp.c
index 3fca0526d940..c54baede4d68 100644
--- a/drivers/clk/clk-cs2000-cp.c
+++ b/drivers/clk/clk-cs2000-cp.c
@@ -36,15 +36,35 @@
 
 /* DEVICE_CTRL */
 #define PLL_UNLOCK	(1 << 7)
+#define AUXOUTDIS	(1 << 1)
+#define CLKOUTDIS	(1 << 0)
 
 /* DEVICE_CFG1 */
 #define RSEL(x)		(((x) & 0x3) << 3)
 #define RSEL_MASK	RSEL(0x3)
 #define ENDEV1		(0x1)
 
+/* DEVICE_CFG2 */
+#define AUTORMOD	(1 << 3)
+#define LOCKCLK(x)	(((x) & 0x3) << 1)
+#define LOCKCLK_MASK	LOCKCLK(0x3)
+#define FRACNSRC_MASK	(1 << 0)
+#define FRACNSRC_STATIC		(0 << 0)
+#define FRACNSRC_DYNAMIC	(1 << 1)
+
 /* GLOBAL_CFG */
 #define ENDEV2		(0x1)
 
+/* FUNC_CFG1 */
+#define CLKSKIPEN	(1 << 7)
+#define REFCLKDIV(x)	(((x) & 0x3) << 3)
+#define REFCLKDIV_MASK	REFCLKDIV(0x3)
+
+/* FUNC_CFG2 */
+#define LFRATIO_MASK	(1 << 3)
+#define LFRATIO_20_12	(0 << 3)
+#define LFRATIO_12_20	(1 << 3)
+
 #define CH_SIZE_ERR(ch)		((ch < 0) || (ch >= CH_MAX))
 #define hw_to_priv(_hw)		container_of(_hw, struct cs2000_priv, hw)
 #define priv_to_client(priv)	(priv->client)
@@ -110,6 +130,17 @@ static int cs2000_enable_dev_config(struct cs2000_priv *priv, bool enable)
 	if (ret < 0)
 		return ret;
 
+	ret = cs2000_bset(priv, FUNC_CFG1, CLKSKIPEN,
+			  enable ? CLKSKIPEN : 0);
+	if (ret < 0)
+		return ret;
+
+	/* FIXME: for Static ratio mode */
+	ret = cs2000_bset(priv, FUNC_CFG2, LFRATIO_MASK,
+			  LFRATIO_12_20);
+	if (ret < 0)
+		return ret;
+
 	return 0;
 }
 
@@ -127,7 +158,9 @@ static int cs2000_clk_in_bound_rate(struct cs2000_priv *priv,
 	else
 		return -EINVAL;
 
-	return cs2000_bset(priv, FUNC_CFG1, 0x3 << 3, val << 3);
+	return cs2000_bset(priv, FUNC_CFG1,
+			   REFCLKDIV_MASK,
+			   REFCLKDIV(val));
 }
 
 static int cs2000_wait_pll_lock(struct cs2000_priv *priv)
@@ -153,7 +186,10 @@ static int cs2000_wait_pll_lock(struct cs2000_priv *priv)
 static int cs2000_clk_out_enable(struct cs2000_priv *priv, bool enable)
 {
 	/* enable both AUX_OUT, CLK_OUT */
-	return cs2000_write(priv, DEVICE_CTRL, enable ? 0 : 0x3);
+	return cs2000_bset(priv, DEVICE_CTRL,
+			   (AUXOUTDIS | CLKOUTDIS),
+			   enable ? 0 :
+			   (AUXOUTDIS | CLKOUTDIS));
 }
 
 static u32 cs2000_rate_to_ratio(u32 rate_in, u32 rate_out)
@@ -243,7 +279,9 @@ static int cs2000_ratio_select(struct cs2000_priv *priv, int ch)
 	if (ret < 0)
 		return ret;
 
-	ret = cs2000_write(priv, DEVICE_CFG2, 0x0);
+	ret = cs2000_bset(priv, DEVICE_CFG2,
+			  (AUTORMOD | LOCKCLK_MASK | FRACNSRC_MASK),
+			  (LOCKCLK(ch) | FRACNSRC_STATIC));
 	if (ret < 0)
 		return ret;
 
@@ -351,8 +389,7 @@ static const struct clk_ops cs2000_ops = {
 
 static int cs2000_clk_get(struct cs2000_priv *priv)
 {
-	struct i2c_client *client = priv_to_client(priv);
-	struct device *dev = &client->dev;
+	struct device *dev = priv_to_dev(priv);
 	struct clk *clk_in, *ref_clk;
 
 	clk_in = devm_clk_get(dev, "clk_in");
@@ -420,8 +457,7 @@ static int cs2000_clk_register(struct cs2000_priv *priv)
 
 static int cs2000_version_print(struct cs2000_priv *priv)
 {
-	struct i2c_client *client = priv_to_client(priv);
-	struct device *dev = &client->dev;
+	struct device *dev = priv_to_dev(priv);
 	s32 val;
 	const char *revision;
 
@@ -452,7 +488,7 @@ static int cs2000_version_print(struct cs2000_priv *priv)
 static int cs2000_remove(struct i2c_client *client)
 {
 	struct cs2000_priv *priv = i2c_get_clientdata(client);
-	struct device *dev = &client->dev;
+	struct device *dev = priv_to_dev(priv);
 	struct device_node *np = dev->of_node;
 
 	of_clk_del_provider(np);
diff --git a/drivers/clk/clk-hi655x.c b/drivers/clk/clk-hi655x.c
new file mode 100644
index 000000000000..403a0188634a
--- /dev/null
+++ b/drivers/clk/clk-hi655x.c
@@ -0,0 +1,126 @@
+/*
+ * Clock driver for Hi655x
+ *
+ * Copyright (c) 2017, Linaro Ltd.
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/hi655x-pmic.h>
+
+#define HI655X_CLK_BASE	HI655X_BUS_ADDR(0x1c)
+#define HI655X_CLK_SET	BIT(6)
+
+struct hi655x_clk {
+	struct hi655x_pmic *hi655x;
+	struct clk_hw       clk_hw;
+};
+
+static unsigned long hi655x_clk_recalc_rate(struct clk_hw *hw,
+					    unsigned long parent_rate)
+{
+	return 32768;
+}
+
+static int hi655x_clk_enable(struct clk_hw *hw, bool enable)
+{
+	struct hi655x_clk *hi655x_clk =
+		container_of(hw, struct hi655x_clk, clk_hw);
+
+	struct hi655x_pmic *hi655x = hi655x_clk->hi655x;
+
+	return regmap_update_bits(hi655x->regmap, HI655X_CLK_BASE,
+				  HI655X_CLK_SET, enable ? HI655X_CLK_SET : 0);
+}
+
+static int hi655x_clk_prepare(struct clk_hw *hw)
+{
+	return hi655x_clk_enable(hw, true);
+}
+
+static void hi655x_clk_unprepare(struct clk_hw *hw)
+{
+	hi655x_clk_enable(hw, false);
+}
+
+static int hi655x_clk_is_prepared(struct clk_hw *hw)
+{
+	struct hi655x_clk *hi655x_clk =
+		container_of(hw, struct hi655x_clk, clk_hw);
+	struct hi655x_pmic *hi655x = hi655x_clk->hi655x;
+	int ret;
+	uint32_t val;
+
+	ret = regmap_read(hi655x->regmap, HI655X_CLK_BASE, &val);
+	if (ret < 0)
+		return ret;
+
+	return val & HI655X_CLK_BASE;
+}
+
+static const struct clk_ops hi655x_clk_ops = {
+	.prepare     = hi655x_clk_prepare,
+	.unprepare   = hi655x_clk_unprepare,
+	.is_prepared = hi655x_clk_is_prepared,
+	.recalc_rate = hi655x_clk_recalc_rate,
+};
+
+static int hi655x_clk_probe(struct platform_device *pdev)
+{
+	struct device *parent = pdev->dev.parent;
+	struct hi655x_pmic *hi655x = dev_get_drvdata(parent);
+	struct hi655x_clk *hi655x_clk;
+	const char *clk_name = "hi655x-clk";
+	struct clk_init_data init = {
+		.name = clk_name,
+		.ops = &hi655x_clk_ops
+	};
+	int ret;
+
+	hi655x_clk = devm_kzalloc(&pdev->dev, sizeof(*hi655x_clk), GFP_KERNEL);
+	if (!hi655x_clk)
+		return -ENOMEM;
+
+	of_property_read_string_index(parent->of_node, "clock-output-names",
+				      0, &clk_name);
+
+	hi655x_clk->clk_hw.init	= &init;
+	hi655x_clk->hi655x	= hi655x;
+
+	platform_set_drvdata(pdev, hi655x_clk);
+
+	ret = devm_clk_hw_register(&pdev->dev, &hi655x_clk->clk_hw);
+	if (ret)
+		return ret;
+
+	return of_clk_add_hw_provider(parent->of_node, of_clk_hw_simple_get,
+				     &hi655x_clk->clk_hw);
+}
+
+static struct platform_driver hi655x_clk_driver = {
+	.probe =  hi655x_clk_probe,
+	.driver		= {
+		.name	= "hi655x-clk",
+	},
+};
+
+module_platform_driver(hi655x_clk_driver);
+
+MODULE_DESCRIPTION("Clk driver for the hi655x series PMICs");
+MODULE_AUTHOR("Daniel Lezcano <daniel.lezcano@linaro.org>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:hi655x-clk");
diff --git a/drivers/clk/clk-nomadik.c b/drivers/clk/clk-nomadik.c
index 71677eb12565..13ad6d1e5090 100644
--- a/drivers/clk/clk-nomadik.c
+++ b/drivers/clk/clk-nomadik.c
@@ -267,10 +267,8 @@ pll_clk_register(struct device *dev, const char *name,
 	}
 
 	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
-	if (!pll) {
-		pr_err("%s: could not allocate PLL clk\n", __func__);
+	if (!pll)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	init.name = name;
 	init.ops = &pll_clk_ops;
@@ -356,11 +354,9 @@ src_clk_register(struct device *dev, const char *name,
 	struct clk_init_data init;
 
 	sclk = kzalloc(sizeof(*sclk), GFP_KERNEL);
-	if (!sclk) {
-		pr_err("could not allocate SRC clock %s\n",
-			name);
+	if (!sclk)
 		return ERR_PTR(-ENOMEM);
-	}
+
 	init.name = name;
 	init.ops = &src_clk_ops;
 	/* Do not force-disable the static SDRAM controller */
@@ -467,7 +463,7 @@ static int nomadik_src_clk_show(struct seq_file *s, void *what)
 	u32 src_pckensr0 = readl(src_base + SRC_PCKENSR0);
 	u32 src_pckensr1 = readl(src_base + SRC_PCKENSR1);
 
-	seq_printf(s, "Clock:      Boot:   Now:    Request: ASKED:\n");
+	seq_puts(s, "Clock:      Boot:   Now:    Request: ASKED:\n");
 	for (i = 0; i < ARRAY_SIZE(src_clk_names); i++) {
 		u32 pcksrb = (i < 0x20) ? src_pcksr0_boot : src_pcksr1_boot;
 		u32 pcksr = (i < 0x20) ? src_pcksr0 : src_pcksr1;
diff --git a/drivers/clk/clk-si5351.c b/drivers/clk/clk-si5351.c
index b051db43fae1..2492442eea77 100644
--- a/drivers/clk/clk-si5351.c
+++ b/drivers/clk/clk-si5351.c
@@ -1354,10 +1354,8 @@ static int si5351_i2c_probe(struct i2c_client *client,
 		return -EINVAL;
 
 	drvdata = devm_kzalloc(&client->dev, sizeof(*drvdata), GFP_KERNEL);
-	if (drvdata == NULL) {
-		dev_err(&client->dev, "unable to allocate driver data\n");
+	if (!drvdata)
 		return -ENOMEM;
-	}
 
 	i2c_set_clientdata(client, drvdata);
 	drvdata->client = client;
@@ -1535,9 +1533,9 @@ static int si5351_i2c_probe(struct i2c_client *client,
 	else
 		parent_names[1] = si5351_pll_names[1];
 
-	drvdata->msynth = devm_kzalloc(&client->dev, num_clocks *
+	drvdata->msynth = devm_kcalloc(&client->dev, num_clocks,
 				       sizeof(*drvdata->msynth), GFP_KERNEL);
-	drvdata->clkout = devm_kzalloc(&client->dev, num_clocks *
+	drvdata->clkout = devm_kcalloc(&client->dev, num_clocks,
 				       sizeof(*drvdata->clkout), GFP_KERNEL);
 	drvdata->num_clkout = num_clocks;
 
diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c
index cf9449b3dbd9..68e2a4e499f1 100644
--- a/drivers/clk/clk-stm32f4.c
+++ b/drivers/clk/clk-stm32f4.c
@@ -531,19 +531,26 @@ static int stm32f4_pll_is_enabled(struct clk_hw *hw)
 	return clk_gate_ops.is_enabled(hw);
 }
 
+#define PLL_TIMEOUT 10000
+
 static int stm32f4_pll_enable(struct clk_hw *hw)
 {
 	struct clk_gate *gate = to_clk_gate(hw);
 	struct stm32f4_pll *pll = to_stm32f4_pll(gate);
-	int ret = 0;
-	unsigned long reg;
+	int bit_status;
+	unsigned int timeout = PLL_TIMEOUT;
 
-	ret = clk_gate_ops.enable(hw);
+	if (clk_gate_ops.is_enabled(hw))
+		return 0;
+
+	clk_gate_ops.enable(hw);
 
-	ret = readl_relaxed_poll_timeout_atomic(base + STM32F4_RCC_CR, reg,
-			reg & (1 << pll->bit_rdy_idx), 0, 10000);
+	do {
+		bit_status = !(readl(gate->reg) & BIT(pll->bit_rdy_idx));
 
-	return ret;
+	} while (bit_status && --timeout);
+
+	return bit_status;
 }
 
 static void stm32f4_pll_disable(struct clk_hw *hw)
@@ -834,24 +841,32 @@ struct stm32_rgate {
 	u8	bit_rdy_idx;
 };
 
-#define RTC_TIMEOUT 1000000
+#define RGATE_TIMEOUT 50000
 
 static int rgclk_enable(struct clk_hw *hw)
 {
 	struct clk_gate *gate = to_clk_gate(hw);
 	struct stm32_rgate *rgate = to_rgclk(gate);
-	u32 reg;
-	int ret;
+	int bit_status;
+	unsigned int timeout = RGATE_TIMEOUT;
+
+	if (clk_gate_ops.is_enabled(hw))
+		return 0;
 
 	disable_power_domain_write_protection();
 
 	clk_gate_ops.enable(hw);
 
-	ret = readl_relaxed_poll_timeout_atomic(gate->reg, reg,
-			reg & rgate->bit_rdy_idx, 1000, RTC_TIMEOUT);
+	do {
+		bit_status = !(readl(gate->reg) & BIT(rgate->bit_rdy_idx));
+		if (bit_status)
+			udelay(100);
+
+	} while (bit_status && --timeout);
 
 	enable_power_domain_write_protection();
-	return ret;
+
+	return bit_status;
 }
 
 static void rgclk_disable(struct clk_hw *hw)
@@ -1533,7 +1548,7 @@ static void __init stm32f4_rcc_init(struct device_node *np)
 	}
 
 	clks[CLK_LSI] = clk_register_rgate(NULL, "lsi", "clk-lsi", 0,
-			base + STM32F4_RCC_CSR, 0, 2, 0, &stm32f4_clk_lock);
+			base + STM32F4_RCC_CSR, 0, 1, 0, &stm32f4_clk_lock);
 
 	if (IS_ERR(clks[CLK_LSI])) {
 		pr_err("Unable to register lsi clock\n");
@@ -1541,7 +1556,7 @@ static void __init stm32f4_rcc_init(struct device_node *np)
 	}
 
 	clks[CLK_LSE] = clk_register_rgate(NULL, "lse", "clk-lse", 0,
-			base + STM32F4_RCC_BDCR, 0, 2, 0, &stm32f4_clk_lock);
+			base + STM32F4_RCC_BDCR, 0, 1, 0, &stm32f4_clk_lock);
 
 	if (IS_ERR(clks[CLK_LSE])) {
 		pr_err("Unable to register lse clock\n");
diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c
index 56741f3cf0a3..ea7d552a2f2b 100644
--- a/drivers/clk/clk-versaclock5.c
+++ b/drivers/clk/clk-versaclock5.c
@@ -113,10 +113,29 @@
 #define VC5_MUX_IN_XIN		BIT(0)
 #define VC5_MUX_IN_CLKIN	BIT(1)
 
+/* Maximum number of clk_out supported by this driver */
+#define VC5_MAX_CLK_OUT_NUM	5
+
+/* Maximum number of FODs supported by this driver */
+#define VC5_MAX_FOD_NUM	4
+
+/* flags to describe chip features */
+/* chip has built-in oscilator */
+#define VC5_HAS_INTERNAL_XTAL	BIT(0)
+
 /* Supported IDT VC5 models. */
 enum vc5_model {
 	IDT_VC5_5P49V5923,
 	IDT_VC5_5P49V5933,
+	IDT_VC5_5P49V5935,
+};
+
+/* Structure to describe features of a particular VC5 model */
+struct vc5_chip_info {
+	const enum vc5_model	model;
+	const unsigned int	clk_fod_cnt;
+	const unsigned int	clk_out_cnt;
+	const u32		flags;
 };
 
 struct vc5_driver_data;
@@ -132,15 +151,15 @@ struct vc5_hw_data {
 struct vc5_driver_data {
 	struct i2c_client	*client;
 	struct regmap		*regmap;
-	enum vc5_model		model;
+	const struct vc5_chip_info	*chip_info;
 
 	struct clk		*pin_xin;
 	struct clk		*pin_clkin;
 	unsigned char		clk_mux_ins;
 	struct clk_hw		clk_mux;
 	struct vc5_hw_data	clk_pll;
-	struct vc5_hw_data	clk_fod[2];
-	struct vc5_hw_data	clk_out[3];
+	struct vc5_hw_data	clk_fod[VC5_MAX_FOD_NUM];
+	struct vc5_hw_data	clk_out[VC5_MAX_CLK_OUT_NUM];
 };
 
 static const char * const vc5_mux_names[] = {
@@ -563,7 +582,7 @@ static struct clk_hw *vc5_of_clk_get(struct of_phandle_args *clkspec,
 	struct vc5_driver_data *vc5 = data;
 	unsigned int idx = clkspec->args[0];
 
-	if (idx > 2)
+	if (idx >= vc5->chip_info->clk_out_cnt)
 		return ERR_PTR(-EINVAL);
 
 	return &vc5->clk_out[idx].hw;
@@ -576,6 +595,7 @@ static int vc5_map_index_to_output(const enum vc5_model model,
 	case IDT_VC5_5P49V5933:
 		return (n == 0) ? 0 : 3;
 	case IDT_VC5_5P49V5923:
+	case IDT_VC5_5P49V5935:
 	default:
 		return n;
 	}
@@ -586,12 +606,10 @@ static const struct of_device_id clk_vc5_of_match[];
 static int vc5_probe(struct i2c_client *client,
 		     const struct i2c_device_id *id)
 {
-	const struct of_device_id *of_id =
-		of_match_device(clk_vc5_of_match, &client->dev);
 	struct vc5_driver_data *vc5;
 	struct clk_init_data init;
 	const char *parent_names[2];
-	unsigned int n, idx;
+	unsigned int n, idx = 0;
 	int ret;
 
 	vc5 = devm_kzalloc(&client->dev, sizeof(*vc5), GFP_KERNEL);
@@ -600,7 +618,7 @@ static int vc5_probe(struct i2c_client *client,
 
 	i2c_set_clientdata(client, vc5);
 	vc5->client = client;
-	vc5->model = (enum vc5_model)of_id->data;
+	vc5->chip_info = of_device_get_match_data(&client->dev);
 
 	vc5->pin_xin = devm_clk_get(&client->dev, "xin");
 	if (PTR_ERR(vc5->pin_xin) == -EPROBE_DEFER)
@@ -622,8 +640,7 @@ static int vc5_probe(struct i2c_client *client,
 	if (!IS_ERR(vc5->pin_xin)) {
 		vc5->clk_mux_ins |= VC5_MUX_IN_XIN;
 		parent_names[init.num_parents++] = __clk_get_name(vc5->pin_xin);
-	} else if (vc5->model == IDT_VC5_5P49V5933) {
-		/* IDT VC5 5P49V5933 has built-in oscilator. */
+	} else if (vc5->chip_info->flags & VC5_HAS_INTERNAL_XTAL) {
 		vc5->pin_xin = clk_register_fixed_rate(&client->dev,
 						       "internal-xtal", NULL,
 						       0, 25000000);
@@ -672,8 +689,8 @@ static int vc5_probe(struct i2c_client *client,
 	}
 
 	/* Register FODs */
-	for (n = 0; n < 2; n++) {
-		idx = vc5_map_index_to_output(vc5->model, n);
+	for (n = 0; n < vc5->chip_info->clk_fod_cnt; n++) {
+		idx = vc5_map_index_to_output(vc5->chip_info->model, n);
 		memset(&init, 0, sizeof(init));
 		init.name = vc5_fod_names[idx];
 		init.ops = &vc5_fod_ops;
@@ -709,8 +726,8 @@ static int vc5_probe(struct i2c_client *client,
 	}
 
 	/* Register FOD-connected OUTx outputs */
-	for (n = 1; n < 3; n++) {
-		idx = vc5_map_index_to_output(vc5->model, n - 1);
+	for (n = 1; n < vc5->chip_info->clk_out_cnt; n++) {
+		idx = vc5_map_index_to_output(vc5->chip_info->model, n - 1);
 		parent_names[0] = vc5_fod_names[idx];
 		if (n == 1)
 			parent_names[1] = vc5_mux_names[0];
@@ -744,7 +761,7 @@ static int vc5_probe(struct i2c_client *client,
 	return 0;
 
 err_clk:
-	if (vc5->model == IDT_VC5_5P49V5933)
+	if (vc5->chip_info->flags & VC5_HAS_INTERNAL_XTAL)
 		clk_unregister_fixed_rate(vc5->pin_xin);
 	return ret;
 }
@@ -755,22 +772,45 @@ static int vc5_remove(struct i2c_client *client)
 
 	of_clk_del_provider(client->dev.of_node);
 
-	if (vc5->model == IDT_VC5_5P49V5933)
+	if (vc5->chip_info->flags & VC5_HAS_INTERNAL_XTAL)
 		clk_unregister_fixed_rate(vc5->pin_xin);
 
 	return 0;
 }
 
+static const struct vc5_chip_info idt_5p49v5923_info = {
+	.model = IDT_VC5_5P49V5923,
+	.clk_fod_cnt = 2,
+	.clk_out_cnt = 3,
+	.flags = 0,
+};
+
+static const struct vc5_chip_info idt_5p49v5933_info = {
+	.model = IDT_VC5_5P49V5933,
+	.clk_fod_cnt = 2,
+	.clk_out_cnt = 3,
+	.flags = VC5_HAS_INTERNAL_XTAL,
+};
+
+static const struct vc5_chip_info idt_5p49v5935_info = {
+	.model = IDT_VC5_5P49V5935,
+	.clk_fod_cnt = 4,
+	.clk_out_cnt = 5,
+	.flags = VC5_HAS_INTERNAL_XTAL,
+};
+
 static const struct i2c_device_id vc5_id[] = {
 	{ "5p49v5923", .driver_data = IDT_VC5_5P49V5923 },
 	{ "5p49v5933", .driver_data = IDT_VC5_5P49V5933 },
+	{ "5p49v5935", .driver_data = IDT_VC5_5P49V5935 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, vc5_id);
 
 static const struct of_device_id clk_vc5_of_match[] = {
-	{ .compatible = "idt,5p49v5923", .data = (void *)IDT_VC5_5P49V5923 },
-	{ .compatible = "idt,5p49v5933", .data = (void *)IDT_VC5_5P49V5933 },
+	{ .compatible = "idt,5p49v5923", .data = &idt_5p49v5923_info },
+	{ .compatible = "idt,5p49v5933", .data = &idt_5p49v5933_info },
+	{ .compatible = "idt,5p49v5935", .data = &idt_5p49v5935_info },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, clk_vc5_of_match);
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 67201f67a14a..fc58c52a26b4 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -966,6 +966,8 @@ static int __clk_notify(struct clk_core *core, unsigned long msg,
 			cnd.clk = cn->clk;
 			ret = srcu_notifier_call_chain(&cn->notifier_head, msg,
 					&cnd);
+			if (ret & NOTIFY_STOP_MASK)
+				return ret;
 		}
 	}
 
@@ -2081,11 +2083,11 @@ static void clk_dump_subtree(struct seq_file *s, struct clk_core *c, int level)
 	clk_dump_one(s, c, level);
 
 	hlist_for_each_entry(child, &c->children, child_node) {
-		seq_printf(s, ",");
+		seq_putc(s, ',');
 		clk_dump_subtree(s, child, level + 1);
 	}
 
-	seq_printf(s, "}");
+	seq_putc(s, '}');
 }
 
 static int clk_dump(struct seq_file *s, void *data)
@@ -2094,14 +2096,13 @@ static int clk_dump(struct seq_file *s, void *data)
 	bool first_node = true;
 	struct hlist_head **lists = (struct hlist_head **)s->private;
 
-	seq_printf(s, "{");
-
+	seq_putc(s, '{');
 	clk_prepare_lock();
 
 	for (; *lists; lists++) {
 		hlist_for_each_entry(c, *lists, child_node) {
 			if (!first_node)
-				seq_puts(s, ",");
+				seq_putc(s, ',');
 			first_node = false;
 			clk_dump_subtree(s, c, 0);
 		}
@@ -2126,6 +2127,31 @@ static const struct file_operations clk_dump_fops = {
 	.release	= single_release,
 };
 
+static int possible_parents_dump(struct seq_file *s, void *data)
+{
+	struct clk_core *core = s->private;
+	int i;
+
+	for (i = 0; i < core->num_parents - 1; i++)
+		seq_printf(s, "%s ", core->parent_names[i]);
+
+	seq_printf(s, "%s\n", core->parent_names[i]);
+
+	return 0;
+}
+
+static int possible_parents_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, possible_parents_dump, inode->i_private);
+}
+
+static const struct file_operations possible_parents_fops = {
+	.open		= possible_parents_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int clk_debug_create_one(struct clk_core *core, struct dentry *pdentry)
 {
 	struct dentry *d;
@@ -2177,6 +2203,13 @@ static int clk_debug_create_one(struct clk_core *core, struct dentry *pdentry)
 	if (!d)
 		goto err_out;
 
+	if (core->num_parents > 1) {
+		d = debugfs_create_file("clk_possible_parents", S_IRUGO,
+				core->dentry, core, &possible_parents_fops);
+		if (!d)
+			goto err_out;
+	}
+
 	if (core->ops->debug_init) {
 		ret = core->ops->debug_init(core->hw, core->dentry);
 		if (ret)
@@ -2940,7 +2973,7 @@ int clk_notifier_register(struct clk *clk, struct notifier_block *nb)
 
 	/* if clk wasn't in the notifier list, allocate new clk_notifier */
 	if (cn->clk != clk) {
-		cn = kzalloc(sizeof(struct clk_notifier), GFP_KERNEL);
+		cn = kzalloc(sizeof(*cn), GFP_KERNEL);
 		if (!cn)
 			goto out;
 
@@ -3088,7 +3121,7 @@ int of_clk_add_provider(struct device_node *np,
 	struct of_clk_provider *cp;
 	int ret;
 
-	cp = kzalloc(sizeof(struct of_clk_provider), GFP_KERNEL);
+	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
 	if (!cp)
 		return -ENOMEM;
 
diff --git a/drivers/clk/hisilicon/clk-hi3620.c b/drivers/clk/hisilicon/clk-hi3620.c
index d04a104ce1b4..fa0fba653898 100644
--- a/drivers/clk/hisilicon/clk-hi3620.c
+++ b/drivers/clk/hisilicon/clk-hi3620.c
@@ -144,7 +144,7 @@ static struct hisi_divider_clock hi3620_div_clks[] __initdata = {
 	{ HI3620_MMC3_DIV,     "mmc3_div",   "mmc3_mux",  0, 0x140, 5, 4, CLK_DIVIDER_HIWORD_MASK, NULL, },
 };
 
-static struct hisi_gate_clock hi3620_seperated_gate_clks[] __initdata = {
+static struct hisi_gate_clock hi3620_separated_gate_clks[] __initdata = {
 	{ HI3620_TIMERCLK01,   "timerclk01",   "timer_rclk01", CLK_SET_RATE_PARENT, 0x20, 0, 0, },
 	{ HI3620_TIMER_RCLK01, "timer_rclk01", "rclk_tcxo",    CLK_SET_RATE_PARENT, 0x20, 1, 0, },
 	{ HI3620_TIMERCLK23,   "timerclk23",   "timer_rclk23", CLK_SET_RATE_PARENT, 0x20, 2, 0, },
@@ -224,8 +224,8 @@ static void __init hi3620_clk_init(struct device_node *np)
 			      clk_data);
 	hisi_clk_register_divider(hi3620_div_clks, ARRAY_SIZE(hi3620_div_clks),
 				  clk_data);
-	hisi_clk_register_gate_sep(hi3620_seperated_gate_clks,
-				   ARRAY_SIZE(hi3620_seperated_gate_clks),
+	hisi_clk_register_gate_sep(hi3620_separated_gate_clks,
+				   ARRAY_SIZE(hi3620_separated_gate_clks),
 				   clk_data);
 }
 CLK_OF_DECLARE(hi3620_clk, "hisilicon,hi3620-clock", hi3620_clk_init);
@@ -430,10 +430,8 @@ static struct clk *hisi_register_clk_mmc(struct hisi_mmc_clock *mmc_clk,
 	struct clk_init_data init;
 
 	mclk = kzalloc(sizeof(*mclk), GFP_KERNEL);
-	if (!mclk) {
-		pr_err("%s: fail to allocate mmc clk\n", __func__);
+	if (!mclk)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	init.name = mmc_clk->name;
 	init.ops = &clk_mmc_ops;
@@ -482,11 +480,9 @@ static void __init hi3620_mmc_clk_init(struct device_node *node)
 	if (WARN_ON(!clk_data))
 		return;
 
-	clk_data->clks = kzalloc(sizeof(struct clk *) * num, GFP_KERNEL);
-	if (!clk_data->clks) {
-		pr_err("%s: fail to allocate mmc clk\n", __func__);
+	clk_data->clks = kcalloc(num, sizeof(*clk_data->clks), GFP_KERNEL);
+	if (!clk_data->clks)
 		return;
-	}
 
 	for (i = 0; i < num; i++) {
 		struct hisi_mmc_clock *mmc_clk = &hi3620_mmc_clks[i];
diff --git a/drivers/clk/hisilicon/clk-hi6220.c b/drivers/clk/hisilicon/clk-hi6220.c
index c0e8e1f196aa..2ae151ce623a 100644
--- a/drivers/clk/hisilicon/clk-hi6220.c
+++ b/drivers/clk/hisilicon/clk-hi6220.c
@@ -134,6 +134,7 @@ static struct hisi_gate_clock hi6220_separated_gate_clks_sys[] __initdata = {
 	{ HI6220_UART4_PCLK,    "uart4_pclk",    "uart4_src",      CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x230, 8,  0, },
 	{ HI6220_SPI_CLK,       "spi_clk",       "clk_150m",       CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x230, 9,  0, },
 	{ HI6220_TSENSOR_CLK,   "tsensor_clk",   "clk_bus",        CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x230, 12, 0, },
+	{ HI6220_DAPB_CLK,      "dapb_clk",      "cs_dapb",        CLK_SET_RATE_PARENT|CLK_IS_CRITICAL,   0x230, 18, 0, },
 	{ HI6220_MMU_CLK,       "mmu_clk",       "ddrc_axi1",      CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x240, 11, 0, },
 	{ HI6220_HIFI_SEL,      "hifi_sel",      "hifi_src",       CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 0,  0, },
 	{ HI6220_MMC0_SYSPLL,   "mmc0_syspll",   "syspll",         CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 1,  0, },
diff --git a/drivers/clk/hisilicon/clk.c b/drivers/clk/hisilicon/clk.c
index 9ba2d91f4d3a..b73c1dfae7f1 100644
--- a/drivers/clk/hisilicon/clk.c
+++ b/drivers/clk/hisilicon/clk.c
@@ -54,8 +54,9 @@ struct hisi_clock_data *hisi_clk_alloc(struct platform_device *pdev,
 	if (!clk_data->base)
 		return NULL;
 
-	clk_table = devm_kmalloc(&pdev->dev, sizeof(struct clk *) * nr_clks,
-				GFP_KERNEL);
+	clk_table = devm_kmalloc_array(&pdev->dev, nr_clks,
+				       sizeof(*clk_table),
+				       GFP_KERNEL);
 	if (!clk_table)
 		return NULL;
 
@@ -80,17 +81,14 @@ struct hisi_clock_data *hisi_clk_init(struct device_node *np,
 	}
 
 	clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
-	if (!clk_data) {
-		pr_err("%s: could not allocate clock data\n", __func__);
+	if (!clk_data)
 		goto err;
-	}
-	clk_data->base = base;
 
-	clk_table = kzalloc(sizeof(struct clk *) * nr_clks, GFP_KERNEL);
-	if (!clk_table) {
-		pr_err("%s: could not allocate clock lookup table\n", __func__);
+	clk_data->base = base;
+	clk_table = kcalloc(nr_clks, sizeof(*clk_table), GFP_KERNEL);
+	if (!clk_table)
 		goto err_data;
-	}
+
 	clk_data->clk_data.clks = clk_table;
 	clk_data->clk_data.clk_num = nr_clks;
 	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data->clk_data);
diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c
index 75c35fb12b60..b4e0dff3c8c2 100644
--- a/drivers/clk/imx/clk-imx6ul.c
+++ b/drivers/clk/imx/clk-imx6ul.c
@@ -73,7 +73,7 @@ static struct clk *clks[IMX6UL_CLK_END];
 static struct clk_onecell_data clk_data;
 
 static int const clks_init_on[] __initconst = {
-	IMX6UL_CLK_AIPSTZ1, IMX6UL_CLK_AIPSTZ2, IMX6UL_CLK_AIPSTZ3,
+	IMX6UL_CLK_AIPSTZ1, IMX6UL_CLK_AIPSTZ2,
 	IMX6UL_CLK_AXI, IMX6UL_CLK_ARM, IMX6UL_CLK_ROM,
 	IMX6UL_CLK_MMDC_P0_FAST, IMX6UL_CLK_MMDC_P0_IPG,
 };
@@ -341,9 +341,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clks[IMX6UL_CLK_GPT2_SERIAL]	= imx_clk_gate2("gpt2_serial",	"perclk",	base + 0x68,	26);
 	clks[IMX6UL_CLK_UART2_IPG]	= imx_clk_gate2("uart2_ipg",	"ipg",		base + 0x68,	28);
 	clks[IMX6UL_CLK_UART2_SERIAL]	= imx_clk_gate2("uart2_serial",	"uart_podf",	base + 0x68,	28);
-	if (clk_on_imx6ul())
-		clks[IMX6UL_CLK_AIPSTZ3]	= imx_clk_gate2("aips_tz3",	"ahb",		base + 0x68,	30);
-	else if (clk_on_imx6ull())
+	if (clk_on_imx6ull())
 		clks[IMX6UL_CLK_AIPSTZ3]	= imx_clk_gate2("aips_tz3",	"ahb",		 base + 0x80,	18);
 
 	/* CCGR1 */
@@ -360,7 +358,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clks[IMX6UL_CLK_GPT1_BUS]	= imx_clk_gate2("gpt1_bus",	"perclk",	base + 0x6c,	20);
 	clks[IMX6UL_CLK_GPT1_SERIAL]	= imx_clk_gate2("gpt1_serial",	"perclk",	base + 0x6c,	22);
 	clks[IMX6UL_CLK_UART4_IPG]	= imx_clk_gate2("uart4_ipg",	"ipg",		base + 0x6c,	24);
-	clks[IMX6UL_CLK_UART4_SERIAL]	= imx_clk_gate2("uart4_serail",	"uart_podf",	base + 0x6c,	24);
+	clks[IMX6UL_CLK_UART4_SERIAL]	= imx_clk_gate2("uart4_serial",	"uart_podf",	base + 0x6c,	24);
 
 	/* CCGR2 */
 	if (clk_on_imx6ull()) {
@@ -482,6 +480,9 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	for (i = 0; i < ARRAY_SIZE(clks_init_on); i++)
 		clk_prepare_enable(clks[clks_init_on[i]]);
 
+	if (clk_on_imx6ull())
+		clk_prepare_enable(clks[IMX6UL_CLK_AIPSTZ3]);
+
 	if (IS_ENABLED(CONFIG_USB_MXS_PHY)) {
 		clk_prepare_enable(clks[IMX6UL_CLK_USBPHY1_GATE]);
 		clk_prepare_enable(clks[IMX6UL_CLK_USBPHY2_GATE]);
diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c
index ae1d31be906e..93b03640da9b 100644
--- a/drivers/clk/imx/clk-imx7d.c
+++ b/drivers/clk/imx/clk-imx7d.c
@@ -386,7 +386,7 @@ static int const clks_init_on[] __initconst = {
 	IMX7D_PLL_SYS_MAIN_480M_CLK, IMX7D_NAND_USDHC_BUS_ROOT_CLK,
 	IMX7D_DRAM_PHYM_ROOT_CLK, IMX7D_DRAM_ROOT_CLK,
 	IMX7D_DRAM_PHYM_ALT_ROOT_CLK, IMX7D_DRAM_ALT_ROOT_CLK,
-	IMX7D_AHB_CHANNEL_ROOT_CLK,
+	IMX7D_AHB_CHANNEL_ROOT_CLK, IMX7D_IPG_ROOT_CLK,
 };
 
 static struct clk_onecell_data clk_data;
@@ -724,8 +724,9 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
 	clks[IMX7D_MAIN_AXI_ROOT_DIV] = imx_clk_divider2("axi_post_div", "axi_pre_div", base + 0x8800, 0, 6);
 	clks[IMX7D_DISP_AXI_ROOT_DIV] = imx_clk_divider2("disp_axi_post_div", "disp_axi_pre_div", base + 0x8880, 0, 6);
 	clks[IMX7D_ENET_AXI_ROOT_DIV] = imx_clk_divider2("enet_axi_post_div", "enet_axi_pre_div", base + 0x8900, 0, 6);
-	clks[IMX7D_NAND_USDHC_BUS_ROOT_DIV] = imx_clk_divider2("nand_usdhc_post_div", "nand_usdhc_pre_div", base + 0x8980, 0, 6);
-	clks[IMX7D_AHB_CHANNEL_ROOT_DIV] = imx_clk_divider2("ahb_post_div", "ahb_pre_div", base + 0x9000, 0, 6);
+	clks[IMX7D_NAND_USDHC_BUS_ROOT_CLK] = imx_clk_divider2("nand_usdhc_root_clk", "nand_usdhc_pre_div", base + 0x8980, 0, 6);
+	clks[IMX7D_AHB_CHANNEL_ROOT_DIV] = imx_clk_divider2("ahb_root_clk", "ahb_pre_div", base + 0x9000, 0, 6);
+	clks[IMX7D_IPG_ROOT_CLK] = imx_clk_divider2("ipg_root_clk", "ahb_root_clk", base + 0x9080, 0, 2);
 	clks[IMX7D_DRAM_ROOT_DIV] = imx_clk_divider2("dram_post_div", "dram_cg", base + 0x9880, 0, 3);
 	clks[IMX7D_DRAM_PHYM_ALT_ROOT_DIV] = imx_clk_divider2("dram_phym_alt_post_div", "dram_phym_alt_pre_div", base + 0xa000, 0, 3);
 	clks[IMX7D_DRAM_ALT_ROOT_DIV] = imx_clk_divider2("dram_alt_post_div", "dram_alt_pre_div", base + 0xa080, 0, 3);
@@ -796,9 +797,7 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
 	clks[IMX7D_DISP_AXI_ROOT_CLK] = imx_clk_gate4("disp_axi_root_clk", "disp_axi_post_div", base + 0x4050, 0);
 	clks[IMX7D_ENET_AXI_ROOT_CLK] = imx_clk_gate4("enet_axi_root_clk", "enet_axi_post_div", base + 0x4060, 0);
 	clks[IMX7D_OCRAM_CLK] = imx_clk_gate4("ocram_clk", "axi_post_div", base + 0x4110, 0);
-	clks[IMX7D_OCRAM_S_CLK] = imx_clk_gate4("ocram_s_clk", "ahb_post_div", base + 0x4120, 0);
-	clks[IMX7D_NAND_USDHC_BUS_ROOT_CLK] = imx_clk_gate4("nand_usdhc_root_clk", "nand_usdhc_post_div", base + 0x4130, 0);
-	clks[IMX7D_AHB_CHANNEL_ROOT_CLK] = imx_clk_gate4("ahb_root_clk", "ahb_post_div", base + 0x4200, 0);
+	clks[IMX7D_OCRAM_S_CLK] = imx_clk_gate4("ocram_s_clk", "ahb_root_clk", base + 0x4120, 0);
 	clks[IMX7D_DRAM_ROOT_CLK] = imx_clk_gate4("dram_root_clk", "dram_post_div", base + 0x4130, 0);
 	clks[IMX7D_DRAM_PHYM_ROOT_CLK] = imx_clk_gate4("dram_phym_root_clk", "dram_phym_cg", base + 0x4130, 0);
 	clks[IMX7D_DRAM_PHYM_ALT_ROOT_CLK] = imx_clk_gate4("dram_phym_alt_root_clk", "dram_phym_alt_post_div", base + 0x4130, 0);
diff --git a/drivers/clk/mediatek/Kconfig b/drivers/clk/mediatek/Kconfig
index a01ef7806aed..28739a9a6e37 100644
--- a/drivers/clk/mediatek/Kconfig
+++ b/drivers/clk/mediatek/Kconfig
@@ -50,6 +50,38 @@ config COMMON_CLK_MT2701_BDPSYS
 	---help---
 	  This driver supports Mediatek MT2701 bdpsys clocks.
 
+config COMMON_CLK_MT6797
+       bool "Clock driver for Mediatek MT6797"
+       depends on (ARCH_MEDIATEK && ARM64) || COMPILE_TEST
+       select COMMON_CLK_MEDIATEK
+       default ARCH_MEDIATEK && ARM64
+       ---help---
+         This driver supports Mediatek MT6797 basic clocks.
+
+config COMMON_CLK_MT6797_MMSYS
+       bool "Clock driver for Mediatek MT6797 mmsys"
+       depends on COMMON_CLK_MT6797
+       ---help---
+         This driver supports Mediatek MT6797 mmsys clocks.
+
+config COMMON_CLK_MT6797_IMGSYS
+       bool "Clock driver for Mediatek MT6797 imgsys"
+       depends on COMMON_CLK_MT6797
+       ---help---
+         This driver supports Mediatek MT6797 imgsys clocks.
+
+config COMMON_CLK_MT6797_VDECSYS
+       bool "Clock driver for Mediatek MT6797 vdecsys"
+       depends on COMMON_CLK_MT6797
+       ---help---
+         This driver supports Mediatek MT6797 vdecsys clocks.
+
+config COMMON_CLK_MT6797_VENCSYS
+       bool "Clock driver for Mediatek MT6797 vencsys"
+       depends on COMMON_CLK_MT6797
+       ---help---
+         This driver supports Mediatek MT6797 vencsys clocks.
+
 config COMMON_CLK_MT8135
 	bool "Clock driver for Mediatek MT8135"
 	depends on (ARCH_MEDIATEK && ARM) || COMPILE_TEST
diff --git a/drivers/clk/mediatek/Makefile b/drivers/clk/mediatek/Makefile
index 19ae7ef79b57..5c3afb86b9ec 100644
--- a/drivers/clk/mediatek/Makefile
+++ b/drivers/clk/mediatek/Makefile
@@ -1,5 +1,10 @@
 obj-$(CONFIG_COMMON_CLK_MEDIATEK) += clk-mtk.o clk-pll.o clk-gate.o clk-apmixed.o
 obj-$(CONFIG_RESET_CONTROLLER) += reset.o
+obj-$(CONFIG_COMMON_CLK_MT6797) += clk-mt6797.o
+obj-$(CONFIG_COMMON_CLK_MT6797_IMGSYS) += clk-mt6797-img.o
+obj-$(CONFIG_COMMON_CLK_MT6797_MMSYS) += clk-mt6797-mm.o
+obj-$(CONFIG_COMMON_CLK_MT6797_VDECSYS) += clk-mt6797-vdec.o
+obj-$(CONFIG_COMMON_CLK_MT6797_VENCSYS) += clk-mt6797-venc.o
 obj-$(CONFIG_COMMON_CLK_MT2701) += clk-mt2701.o
 obj-$(CONFIG_COMMON_CLK_MT2701_BDPSYS) += clk-mt2701-bdp.o
 obj-$(CONFIG_COMMON_CLK_MT2701_ETHSYS) += clk-mt2701-eth.o
diff --git a/drivers/clk/mediatek/clk-mt2701-eth.c b/drivers/clk/mediatek/clk-mt2701-eth.c
index 877be8715afa..9251a6551522 100644
--- a/drivers/clk/mediatek/clk-mt2701-eth.c
+++ b/drivers/clk/mediatek/clk-mt2701-eth.c
@@ -66,6 +66,8 @@ static int clk_mt2701_eth_probe(struct platform_device *pdev)
 			"could not register clock provider: %s: %d\n",
 			pdev->name, r);
 
+	mtk_register_reset_controller(node, 1, 0x34);
+
 	return r;
 }
 
diff --git a/drivers/clk/mediatek/clk-mt6797-img.c b/drivers/clk/mediatek/clk-mt6797-img.c
new file mode 100644
index 000000000000..94cc48065918
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt6797-img.c
@@ -0,0 +1,76 @@
+/* Copyright (c) 2017 MediaTek Inc.
+ * Author: Kevin Chen <kevin-cw.chen@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+#include <dt-bindings/clock/mt6797-clk.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+static const struct mtk_gate_regs img_cg_regs = {
+	.set_ofs = 0x0004,
+	.clr_ofs = 0x0008,
+	.sta_ofs = 0x0000,
+};
+
+#define GATE_IMG(_id, _name, _parent, _shift) {		\
+		.id = _id,				\
+		.name = _name,				\
+		.parent_name = _parent,			\
+		.regs = &img_cg_regs,			\
+		.shift = _shift,			\
+		.ops = &mtk_clk_gate_ops_setclr,	\
+	}
+
+static const struct mtk_gate img_clks[] = {
+	GATE_IMG(CLK_IMG_FDVT, "img_fdvt", "mm_sel", 11),
+	GATE_IMG(CLK_IMG_DPE, "img_dpe", "mm_sel", 10),
+	GATE_IMG(CLK_IMG_DIP, "img_dip", "mm_sel", 6),
+	GATE_IMG(CLK_IMG_LARB6, "img_larb6", "mm_sel", 0),
+};
+
+static const struct of_device_id of_match_clk_mt6797_img[] = {
+	{ .compatible = "mediatek,mt6797-imgsys", },
+	{}
+};
+
+static int clk_mt6797_img_probe(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	int r;
+	struct device_node *node = pdev->dev.of_node;
+
+	clk_data = mtk_alloc_clk_data(CLK_IMG_NR);
+
+	mtk_clk_register_gates(node, img_clks, ARRAY_SIZE(img_clks),
+			       clk_data);
+
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+	if (r)
+		dev_err(&pdev->dev,
+			"could not register clock provider: %s: %d\n",
+			pdev->name, r);
+
+	return r;
+}
+
+static struct platform_driver clk_mt6797_img_drv = {
+	.probe = clk_mt6797_img_probe,
+	.driver = {
+		.name = "clk-mt6797-img",
+		.of_match_table = of_match_clk_mt6797_img,
+	},
+};
+
+builtin_platform_driver(clk_mt6797_img_drv);
diff --git a/drivers/clk/mediatek/clk-mt6797-mm.c b/drivers/clk/mediatek/clk-mt6797-mm.c
new file mode 100644
index 000000000000..c57d3eed270d
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt6797-mm.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2017 MediaTek Inc.
+ * Author: Kevin Chen <kevin-cw.chen@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+#include <dt-bindings/clock/mt6797-clk.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+static const struct mtk_gate_regs mm0_cg_regs = {
+	.set_ofs = 0x0104,
+	.clr_ofs = 0x0108,
+	.sta_ofs = 0x0100,
+};
+
+static const struct mtk_gate_regs mm1_cg_regs = {
+	.set_ofs = 0x0114,
+	.clr_ofs = 0x0118,
+	.sta_ofs = 0x0110,
+};
+
+#define GATE_MM0(_id, _name, _parent, _shift) {			\
+	.id = _id,					\
+	.name = _name,					\
+	.parent_name = _parent,				\
+	.regs = &mm0_cg_regs,				\
+	.shift = _shift,				\
+	.ops = &mtk_clk_gate_ops_setclr,		\
+}
+
+#define GATE_MM1(_id, _name, _parent, _shift) {			\
+	.id = _id,					\
+	.name = _name,					\
+	.parent_name = _parent,				\
+	.regs = &mm1_cg_regs,				\
+	.shift = _shift,				\
+	.ops = &mtk_clk_gate_ops_setclr,		\
+}
+
+static const struct mtk_gate mm_clks[] = {
+	GATE_MM0(CLK_MM_SMI_COMMON, "mm_smi_common", "mm_sel", 0),
+	GATE_MM0(CLK_MM_SMI_LARB0, "mm_smi_larb0", "mm_sel", 1),
+	GATE_MM0(CLK_MM_SMI_LARB5, "mm_smi_larb5", "mm_sel", 2),
+	GATE_MM0(CLK_MM_CAM_MDP, "mm_cam_mdp", "mm_sel", 3),
+	GATE_MM0(CLK_MM_MDP_RDMA0, "mm_mdp_rdma0", "mm_sel", 4),
+	GATE_MM0(CLK_MM_MDP_RDMA1, "mm_mdp_rdma1", "mm_sel", 5),
+	GATE_MM0(CLK_MM_MDP_RSZ0, "mm_mdp_rsz0", "mm_sel", 6),
+	GATE_MM0(CLK_MM_MDP_RSZ1, "mm_mdp_rsz1", "mm_sel", 7),
+	GATE_MM0(CLK_MM_MDP_RSZ2, "mm_mdp_rsz2", "mm_sel", 8),
+	GATE_MM0(CLK_MM_MDP_TDSHP, "mm_mdp_tdshp", "mm_sel", 9),
+	GATE_MM0(CLK_MM_MDP_COLOR, "mm_mdp_color", "mm_sel", 10),
+	GATE_MM0(CLK_MM_MDP_WDMA, "mm_mdp_wdma", "mm_sel", 11),
+	GATE_MM0(CLK_MM_MDP_WROT0, "mm_mdp_wrot0", "mm_sel", 12),
+	GATE_MM0(CLK_MM_MDP_WROT1, "mm_mdp_wrot1", "mm_sel", 13),
+	GATE_MM0(CLK_MM_FAKE_ENG, "mm_fake_eng", "mm_sel", 14),
+	GATE_MM0(CLK_MM_DISP_OVL0, "mm_disp_ovl0", "mm_sel", 15),
+	GATE_MM0(CLK_MM_DISP_OVL1, "mm_disp_ovl1", "mm_sel", 16),
+	GATE_MM0(CLK_MM_DISP_OVL0_2L, "mm_disp_ovl0_2l", "mm_sel", 17),
+	GATE_MM0(CLK_MM_DISP_OVL1_2L, "mm_disp_ovl1_2l", "mm_sel", 18),
+	GATE_MM0(CLK_MM_DISP_RDMA0, "mm_disp_rdma0", "mm_sel", 19),
+	GATE_MM0(CLK_MM_DISP_RDMA1, "mm_disp_rdma1", "mm_sel", 20),
+	GATE_MM0(CLK_MM_DISP_WDMA0, "mm_disp_wdma0", "mm_sel", 21),
+	GATE_MM0(CLK_MM_DISP_WDMA1, "mm_disp_wdma1", "mm_sel", 22),
+	GATE_MM0(CLK_MM_DISP_COLOR, "mm_disp_color", "mm_sel", 23),
+	GATE_MM0(CLK_MM_DISP_CCORR, "mm_disp_ccorr", "mm_sel", 24),
+	GATE_MM0(CLK_MM_DISP_AAL, "mm_disp_aal", "mm_sel", 25),
+	GATE_MM0(CLK_MM_DISP_GAMMA, "mm_disp_gamma", "mm_sel", 26),
+	GATE_MM0(CLK_MM_DISP_OD, "mm_disp_od", "mm_sel", 27),
+	GATE_MM0(CLK_MM_DISP_DITHER, "mm_disp_dither", "mm_sel", 28),
+	GATE_MM0(CLK_MM_DISP_UFOE, "mm_disp_ufoe", "mm_sel", 29),
+	GATE_MM0(CLK_MM_DISP_DSC, "mm_disp_dsc", "mm_sel", 30),
+	GATE_MM0(CLK_MM_DISP_SPLIT, "mm_disp_split", "mm_sel", 31),
+	GATE_MM1(CLK_MM_DSI0_MM_CLOCK, "mm_dsi0_mm_clock", "mm_sel", 0),
+	GATE_MM1(CLK_MM_DSI1_MM_CLOCK, "mm_dsi1_mm_clock", "mm_sel", 2),
+	GATE_MM1(CLK_MM_DPI_MM_CLOCK, "mm_dpi_mm_clock", "mm_sel", 4),
+	GATE_MM1(CLK_MM_DPI_INTERFACE_CLOCK, "mm_dpi_interface_clock",
+		 "dpi0_sel", 5),
+	GATE_MM1(CLK_MM_LARB4_AXI_ASIF_MM_CLOCK, "mm_larb4_axi_asif_mm_clock",
+		 "mm_sel", 6),
+	GATE_MM1(CLK_MM_LARB4_AXI_ASIF_MJC_CLOCK, "mm_larb4_axi_asif_mjc_clock",
+		 "mjc_sel", 7),
+	GATE_MM1(CLK_MM_DISP_OVL0_MOUT_CLOCK, "mm_disp_ovl0_mout_clock",
+		 "mm_sel", 8),
+	GATE_MM1(CLK_MM_FAKE_ENG2, "mm_fake_eng2", "mm_sel", 9),
+	GATE_MM1(CLK_MM_DSI0_INTERFACE_CLOCK, "mm_dsi0_interface_clock",
+		 "clk26m", 1),
+	GATE_MM1(CLK_MM_DSI1_INTERFACE_CLOCK, "mm_dsi1_interface_clock",
+		 "clk26m", 3),
+};
+
+static const struct of_device_id of_match_clk_mt6797_mm[] = {
+	{ .compatible = "mediatek,mt6797-mmsys", },
+	{}
+};
+
+static int clk_mt6797_mm_probe(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	int r;
+	struct device_node *node = pdev->dev.of_node;
+
+	clk_data = mtk_alloc_clk_data(CLK_MM_NR);
+
+	mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks),
+			       clk_data);
+
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+	if (r)
+		dev_err(&pdev->dev,
+			"could not register clock provider: %s: %d\n",
+			pdev->name, r);
+
+	return r;
+}
+
+static struct platform_driver clk_mt6797_mm_drv = {
+	.probe = clk_mt6797_mm_probe,
+	.driver = {
+		.name = "clk-mt6797-mm",
+		.of_match_table = of_match_clk_mt6797_mm,
+	},
+};
+
+builtin_platform_driver(clk_mt6797_mm_drv);
diff --git a/drivers/clk/mediatek/clk-mt6797-vdec.c b/drivers/clk/mediatek/clk-mt6797-vdec.c
new file mode 100644
index 000000000000..7c402ca6c0b2
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt6797-vdec.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2017 MediaTek Inc.
+ * Author: Kevin-CW Chen <kevin-cw.chen@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt6797-clk.h>
+
+static const struct mtk_gate_regs vdec0_cg_regs = {
+	.set_ofs = 0x0000,
+	.clr_ofs = 0x0004,
+	.sta_ofs = 0x0000,
+};
+
+static const struct mtk_gate_regs vdec1_cg_regs = {
+	.set_ofs = 0x0008,
+	.clr_ofs = 0x000c,
+	.sta_ofs = 0x0008,
+};
+
+#define GATE_VDEC0(_id, _name, _parent, _shift) {		\
+	.id = _id,					\
+	.name = _name,					\
+	.parent_name = _parent,				\
+	.regs = &vdec0_cg_regs,				\
+	.shift = _shift,				\
+	.ops = &mtk_clk_gate_ops_setclr_inv,		\
+}
+
+#define GATE_VDEC1(_id, _name, _parent, _shift) {		\
+	.id = _id,					\
+	.name = _name,					\
+	.parent_name = _parent,				\
+	.regs = &vdec1_cg_regs,				\
+	.shift = _shift,				\
+	.ops = &mtk_clk_gate_ops_setclr_inv,		\
+}
+
+static const struct mtk_gate vdec_clks[] = {
+	GATE_VDEC0(CLK_VDEC_CKEN_ENG, "vdec_cken_eng", "vdec_sel", 8),
+	GATE_VDEC0(CLK_VDEC_ACTIVE, "vdec_active", "vdec_sel", 4),
+	GATE_VDEC0(CLK_VDEC_CKEN, "vdec_cken", "vdec_sel", 0),
+	GATE_VDEC1(CLK_VDEC_LARB1_CKEN, "vdec_larb1_cken", "mm_sel", 0),
+};
+
+static const struct of_device_id of_match_clk_mt6797_vdec[] = {
+	{ .compatible = "mediatek,mt6797-vdecsys", },
+	{}
+};
+
+static int clk_mt6797_vdec_probe(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	int r;
+	struct device_node *node = pdev->dev.of_node;
+
+	clk_data = mtk_alloc_clk_data(CLK_VDEC_NR);
+
+	mtk_clk_register_gates(node, vdec_clks, ARRAY_SIZE(vdec_clks),
+			       clk_data);
+
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+	if (r)
+		dev_err(&pdev->dev,
+			"could not register clock provider: %s: %d\n",
+			pdev->name, r);
+
+	return r;
+}
+
+static struct platform_driver clk_mt6797_vdec_drv = {
+	.probe = clk_mt6797_vdec_probe,
+	.driver = {
+		.name = "clk-mt6797-vdec",
+		.of_match_table = of_match_clk_mt6797_vdec,
+	},
+};
+
+builtin_platform_driver(clk_mt6797_vdec_drv);
diff --git a/drivers/clk/mediatek/clk-mt6797-venc.c b/drivers/clk/mediatek/clk-mt6797-venc.c
new file mode 100644
index 000000000000..e73d51756f13
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt6797-venc.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 MediaTek Inc.
+ * Author: Kevin Chen <kevin-cw.chen@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt6797-clk.h>
+
+static const struct mtk_gate_regs venc_cg_regs = {
+	.set_ofs = 0x0004,
+	.clr_ofs = 0x0008,
+	.sta_ofs = 0x0000,
+};
+
+#define GATE_VENC(_id, _name, _parent, _shift) {	\
+		.id = _id,				\
+		.name = _name,				\
+		.parent_name = _parent,			\
+		.regs = &venc_cg_regs,			\
+		.shift = _shift,			\
+		.ops = &mtk_clk_gate_ops_setclr_inv,	\
+	}
+
+static const struct mtk_gate venc_clks[] = {
+	GATE_VENC(CLK_VENC_0, "venc_0", "mm_sel", 0),
+	GATE_VENC(CLK_VENC_1, "venc_1", "venc_sel", 4),
+	GATE_VENC(CLK_VENC_2, "venc_2", "venc_sel", 8),
+	GATE_VENC(CLK_VENC_3, "venc_3", "venc_sel", 12),
+};
+
+static const struct of_device_id of_match_clk_mt6797_venc[] = {
+	{ .compatible = "mediatek,mt6797-vencsys", },
+	{}
+};
+
+static int clk_mt6797_venc_probe(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	int r;
+	struct device_node *node = pdev->dev.of_node;
+
+	clk_data = mtk_alloc_clk_data(CLK_VENC_NR);
+
+	mtk_clk_register_gates(node, venc_clks, ARRAY_SIZE(venc_clks),
+			       clk_data);
+
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+	if (r)
+		dev_err(&pdev->dev,
+			"could not register clock provider: %s: %d\n",
+			pdev->name, r);
+
+	return r;
+}
+
+static struct platform_driver clk_mt6797_venc_drv = {
+	.probe = clk_mt6797_venc_probe,
+	.driver = {
+		.name = "clk-mt6797-venc",
+		.of_match_table = of_match_clk_mt6797_venc,
+	},
+};
+
+builtin_platform_driver(clk_mt6797_venc_drv);
diff --git a/drivers/clk/mediatek/clk-mt6797.c b/drivers/clk/mediatek/clk-mt6797.c
new file mode 100644
index 000000000000..5702bc974ed9
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt6797.c
@@ -0,0 +1,714 @@
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: Kevin Chen <kevin-cw.chen@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt6797-clk.h>
+
+/*
+ * For some clocks, we don't care what their actual rates are. And these
+ * clocks may change their rate on different products or different scenarios.
+ * So we model these clocks' rate as 0, to denote it's not an actual rate.
+ */
+
+static DEFINE_SPINLOCK(mt6797_clk_lock);
+
+static const struct mtk_fixed_factor top_fixed_divs[] = {
+	FACTOR(CLK_TOP_SYSPLL_CK, "syspll_ck", "mainpll", 1, 1),
+	FACTOR(CLK_TOP_SYSPLL_D2, "syspll_d2", "mainpll", 1, 2),
+	FACTOR(CLK_TOP_SYSPLL1_D2, "syspll1_d2", "syspll_d2", 1, 2),
+	FACTOR(CLK_TOP_SYSPLL1_D4, "syspll1_d4", "syspll_d2", 1, 4),
+	FACTOR(CLK_TOP_SYSPLL1_D8, "syspll1_d8", "syspll_d2", 1, 8),
+	FACTOR(CLK_TOP_SYSPLL1_D16, "syspll1_d16", "syspll_d2", 1, 16),
+	FACTOR(CLK_TOP_SYSPLL_D3, "syspll_d3", "mainpll", 1, 3),
+	FACTOR(CLK_TOP_SYSPLL_D3_D3, "syspll_d3_d3", "syspll_d3", 1, 3),
+	FACTOR(CLK_TOP_SYSPLL2_D2, "syspll2_d2", "syspll_d3", 1, 2),
+	FACTOR(CLK_TOP_SYSPLL2_D4, "syspll2_d4", "syspll_d3", 1, 4),
+	FACTOR(CLK_TOP_SYSPLL2_D8, "syspll2_d8", "syspll_d3", 1, 8),
+	FACTOR(CLK_TOP_SYSPLL_D5, "syspll_d5", "mainpll", 1, 5),
+	FACTOR(CLK_TOP_SYSPLL3_D2, "syspll3_d2", "syspll_d5", 1, 2),
+	FACTOR(CLK_TOP_SYSPLL3_D4, "syspll3_d4", "syspll_d5", 1, 4),
+	FACTOR(CLK_TOP_SYSPLL_D7, "syspll_d7", "mainpll", 1, 7),
+	FACTOR(CLK_TOP_SYSPLL4_D2, "syspll4_d2", "syspll_d7", 1, 2),
+	FACTOR(CLK_TOP_SYSPLL4_D4, "syspll4_d4", "syspll_d7", 1, 4),
+	FACTOR(CLK_TOP_UNIVPLL_CK, "univpll_ck", "univpll", 1, 1),
+	FACTOR(CLK_TOP_UNIVPLL_D7, "univpll_d7", "univpll", 1, 7),
+	FACTOR(CLK_TOP_UNIVPLL_D26, "univpll_d26", "univpll", 1, 26),
+	FACTOR(CLK_TOP_SSUSB_PHY_48M_CK, "ssusb_phy_48m_ck", "univpll", 1, 1),
+	FACTOR(CLK_TOP_USB_PHY48M_CK, "usb_phy48m_ck", "univpll", 1, 1),
+	FACTOR(CLK_TOP_UNIVPLL_D2, "univpll_d2", "univpll", 1, 2),
+	FACTOR(CLK_TOP_UNIVPLL1_D2, "univpll1_d2", "univpll_d2", 1, 2),
+	FACTOR(CLK_TOP_UNIVPLL1_D4, "univpll1_d4", "univpll_d2", 1, 4),
+	FACTOR(CLK_TOP_UNIVPLL1_D8, "univpll1_d8", "univpll_d2", 1, 8),
+	FACTOR(CLK_TOP_UNIVPLL_D3, "univpll_d3", "univpll", 1, 3),
+	FACTOR(CLK_TOP_UNIVPLL2_D2, "univpll2_d2", "univpll", 1, 2),
+	FACTOR(CLK_TOP_UNIVPLL2_D4, "univpll2_d4", "univpll", 1, 4),
+	FACTOR(CLK_TOP_UNIVPLL2_D8, "univpll2_d8", "univpll", 1, 8),
+	FACTOR(CLK_TOP_UNIVPLL_D5, "univpll_d5", "univpll", 1, 5),
+	FACTOR(CLK_TOP_UNIVPLL3_D2, "univpll3_d2", "univpll_d5", 1, 2),
+	FACTOR(CLK_TOP_UNIVPLL3_D4, "univpll3_d4", "univpll_d5", 1, 4),
+	FACTOR(CLK_TOP_UNIVPLL3_D8, "univpll3_d8", "univpll_d5", 1, 8),
+	FACTOR(CLK_TOP_ULPOSC_CK_ORG, "ulposc_ck_org", "ulposc", 1, 1),
+	FACTOR(CLK_TOP_ULPOSC_CK, "ulposc_ck", "ulposc_ck_org", 1, 3),
+	FACTOR(CLK_TOP_ULPOSC_D2, "ulposc_d2", "ulposc_ck", 1, 2),
+	FACTOR(CLK_TOP_ULPOSC_D3, "ulposc_d3", "ulposc_ck", 1, 4),
+	FACTOR(CLK_TOP_ULPOSC_D4, "ulposc_d4", "ulposc_ck", 1, 8),
+	FACTOR(CLK_TOP_ULPOSC_D8, "ulposc_d8", "ulposc_ck", 1, 10),
+	FACTOR(CLK_TOP_ULPOSC_D10, "ulposc_d10", "ulposc_ck_org", 1, 1),
+	FACTOR(CLK_TOP_APLL1_CK, "apll1_ck", "apll1", 1, 1),
+	FACTOR(CLK_TOP_APLL2_CK, "apll2_ck", "apll2", 1, 1),
+	FACTOR(CLK_TOP_MFGPLL_CK, "mfgpll_ck", "mfgpll", 1, 1),
+	FACTOR(CLK_TOP_MFGPLL_D2, "mfgpll_d2", "mfgpll_ck", 1, 2),
+	FACTOR(CLK_TOP_IMGPLL_CK, "imgpll_ck", "imgpll", 1, 1),
+	FACTOR(CLK_TOP_IMGPLL_D2, "imgpll_d2", "imgpll_ck", 1, 2),
+	FACTOR(CLK_TOP_IMGPLL_D4, "imgpll_d4", "imgpll_ck", 1, 4),
+	FACTOR(CLK_TOP_CODECPLL_CK, "codecpll_ck", "codecpll", 1, 1),
+	FACTOR(CLK_TOP_CODECPLL_D2, "codecpll_d2", "codecpll_ck", 1, 2),
+	FACTOR(CLK_TOP_VDECPLL_CK, "vdecpll_ck", "vdecpll", 1, 1),
+	FACTOR(CLK_TOP_TVDPLL_CK, "tvdpll_ck", "tvdpll", 1, 1),
+	FACTOR(CLK_TOP_TVDPLL_D2, "tvdpll_d2", "tvdpll_ck", 1, 2),
+	FACTOR(CLK_TOP_TVDPLL_D4, "tvdpll_d4", "tvdpll_ck", 1, 4),
+	FACTOR(CLK_TOP_TVDPLL_D8, "tvdpll_d8", "tvdpll_ck", 1, 8),
+	FACTOR(CLK_TOP_TVDPLL_D16, "tvdpll_d16", "tvdpll_ck", 1, 16),
+	FACTOR(CLK_TOP_MSDCPLL_CK, "msdcpll_ck", "msdcpll", 1, 1),
+	FACTOR(CLK_TOP_MSDCPLL_D2, "msdcpll_d2", "msdcpll_ck", 1, 2),
+	FACTOR(CLK_TOP_MSDCPLL_D4, "msdcpll_d4", "msdcpll_ck", 1, 4),
+	FACTOR(CLK_TOP_MSDCPLL_D8, "msdcpll_d8", "msdcpll_ck", 1, 8),
+};
+
+static const char * const axi_parents[] = {
+	"clk26m",
+	"syspll_d7",
+	"ulposc_axi_ck_mux",
+};
+
+static const char * const ulposc_axi_ck_mux_parents[] = {
+	"syspll1_d4",
+	"ulposc_axi_ck_mux_pre",
+};
+
+static const char * const ulposc_axi_ck_mux_pre_parents[] = {
+	"ulposc_d2",
+	"ulposc_d3",
+};
+
+static const char * const ddrphycfg_parents[] = {
+	"clk26m",
+	"syspll3_d2",
+	"syspll2_d4",
+	"syspll1_d8",
+};
+
+static const char * const mm_parents[] = {
+	"clk26m",
+	"imgpll_ck",
+	"univpll1_d2",
+	"syspll1_d2",
+};
+
+static const char * const pwm_parents[] = {
+	"clk26m",
+	"univpll2_d4",
+	"ulposc_d2",
+	"ulposc_d3",
+	"ulposc_d8",
+	"ulposc_d10",
+	"ulposc_d4",
+};
+
+static const char * const vdec_parents[] = {
+	"clk26m",
+	"vdecpll_ck",
+	"imgpll_ck",
+	"syspll_d3",
+	"univpll_d5",
+	"clk26m",
+	"clk26m",
+};
+
+static const char * const venc_parents[] = {
+	"clk26m",
+	"codecpll_ck",
+	"syspll_d3",
+};
+
+static const char * const mfg_parents[] = {
+	"clk26m",
+	"mfgpll_ck",
+	"syspll_d3",
+	"univpll_d3",
+};
+
+static const char * const camtg[] = {
+	"clk26m",
+	"univpll_d26",
+	"univpll2_d2",
+};
+
+static const char * const uart_parents[] = {
+	"clk26m",
+	"univpll2_d8",
+};
+
+static const char * const spi_parents[] = {
+	"clk26m",
+	"syspll3_d2",
+	"syspll2_d4",
+	"ulposc_spi_ck_mux",
+};
+
+static const char * const ulposc_spi_ck_mux_parents[] = {
+	"ulposc_d2",
+	"ulposc_d3",
+};
+
+static const char * const usb20_parents[] = {
+	"clk26m",
+	"univpll1_d8",
+	"syspll4_d2",
+};
+
+static const char * const msdc50_0_hclk_parents[] = {
+	"clk26m",
+	"syspll1_d2",
+	"syspll2_d2",
+	"syspll4_d2",
+};
+
+static const char * const msdc50_0_parents[] = {
+	"clk26m",
+	"msdcpll",
+	"syspll_d3",
+	"univpll1_d4",
+	"syspll2_d2",
+	"syspll_d7",
+	"msdcpll_d2",
+	"univpll1_d2",
+	"univpll_d3",
+};
+
+static const char * const msdc30_1_parents[] = {
+	"clk26m",
+	"univpll2_d2",
+	"msdcpll_d2",
+	"univpll1_d4",
+	"syspll2_d2",
+	"syspll_d7",
+	"univpll_d7",
+};
+
+static const char * const msdc30_2_parents[] = {
+	"clk26m",
+	"univpll2_d8",
+	"syspll2_d8",
+	"syspll1_d8",
+	"msdcpll_d8",
+	"syspll3_d4",
+	"univpll_d26",
+};
+
+static const char * const audio_parents[] = {
+	"clk26m",
+	"syspll3_d4",
+	"syspll4_d4",
+	"syspll1_d16",
+};
+
+static const char * const aud_intbus_parents[] = {
+	"clk26m",
+	"syspll1_d4",
+	"syspll4_d2",
+};
+
+static const char * const pmicspi_parents[] = {
+	"clk26m",
+	"univpll_d26",
+	"syspll3_d4",
+	"syspll1_d8",
+	"ulposc_d4",
+	"ulposc_d8",
+	"syspll2_d8",
+};
+
+static const char * const scp_parents[] = {
+	"clk26m",
+	"syspll_d3",
+	"ulposc_ck",
+	"univpll_d5",
+};
+
+static const char * const atb_parents[] = {
+	"clk26m",
+	"syspll1_d2",
+	"syspll_d5",
+};
+
+static const char * const mjc_parents[] = {
+	"clk26m",
+	"imgpll_ck",
+	"univpll_d5",
+	"syspll1_d2",
+};
+
+static const char * const dpi0_parents[] = {
+	"clk26m",
+	"tvdpll_d2",
+	"tvdpll_d4",
+	"tvdpll_d8",
+	"tvdpll_d16",
+	"clk26m",
+	"clk26m",
+};
+
+static const char * const aud_1_parents[] = {
+	"clk26m",
+	"apll1_ck",
+};
+
+static const char * const aud_2_parents[] = {
+	"clk26m",
+	"apll2_ck",
+};
+
+static const char * const ssusb_top_sys_parents[] = {
+	"clk26m",
+	"univpll3_d2",
+};
+
+static const char * const spm_parents[] = {
+	"clk26m",
+	"syspll1_d8",
+};
+
+static const char * const bsi_spi_parents[] = {
+	"clk26m",
+	"syspll_d3_d3",
+	"syspll1_d4",
+	"syspll_d7",
+};
+
+static const char * const audio_h_parents[] = {
+	"clk26m",
+	"apll2_ck",
+	"apll1_ck",
+	"univpll_d7",
+};
+
+static const char * const mfg_52m_parents[] = {
+	"clk26m",
+	"univpll2_d8",
+	"univpll2_d4",
+	"univpll2_d4",
+};
+
+static const char * const anc_md32_parents[] = {
+	"clk26m",
+	"syspll1_d2",
+	"univpll_d5",
+};
+
+static const struct mtk_composite top_muxes[] = {
+	MUX(CLK_TOP_MUX_ULPOSC_AXI_CK_MUX_PRE, "ulposc_axi_ck_mux_pre",
+	    ulposc_axi_ck_mux_pre_parents, 0x0040, 3, 1),
+	MUX(CLK_TOP_MUX_ULPOSC_AXI_CK_MUX, "ulposc_axi_ck_mux",
+	    ulposc_axi_ck_mux_parents, 0x0040, 2, 1),
+	MUX(CLK_TOP_MUX_AXI, "axi_sel", axi_parents,
+	    0x0040, 0, 2),
+	MUX(CLK_TOP_MUX_DDRPHYCFG, "ddrphycfg_sel", ddrphycfg_parents,
+	    0x0040, 16, 2),
+	MUX(CLK_TOP_MUX_MM, "mm_sel", mm_parents,
+	    0x0040, 24, 2),
+	MUX_GATE(CLK_TOP_MUX_PWM, "pwm_sel", pwm_parents, 0x0050, 0, 3, 7),
+	MUX_GATE(CLK_TOP_MUX_VDEC, "vdec_sel", vdec_parents, 0x0050, 8, 3, 15),
+	MUX_GATE(CLK_TOP_MUX_VENC, "venc_sel", venc_parents, 0x0050, 16, 2, 23),
+	MUX_GATE(CLK_TOP_MUX_MFG, "mfg_sel", mfg_parents, 0x0050, 24, 2, 31),
+	MUX_GATE(CLK_TOP_MUX_CAMTG, "camtg_sel", camtg, 0x0060, 0, 2, 7),
+	MUX_GATE(CLK_TOP_MUX_UART, "uart_sel", uart_parents, 0x0060, 8, 1, 15),
+	MUX_GATE(CLK_TOP_MUX_SPI, "spi_sel", spi_parents, 0x0060, 16, 2, 23),
+	MUX(CLK_TOP_MUX_ULPOSC_SPI_CK_MUX, "ulposc_spi_ck_mux",
+	    ulposc_spi_ck_mux_parents, 0x0060, 18, 1),
+	MUX_GATE(CLK_TOP_MUX_USB20, "usb20_sel", usb20_parents,
+		 0x0060, 24, 2, 31),
+	MUX(CLK_TOP_MUX_MSDC50_0_HCLK, "msdc50_0_hclk_sel",
+	    msdc50_0_hclk_parents, 0x0070, 8, 2),
+	MUX_GATE(CLK_TOP_MUX_MSDC50_0, "msdc50_0_sel", msdc50_0_parents,
+		 0x0070, 16, 4, 23),
+	MUX_GATE(CLK_TOP_MUX_MSDC30_1, "msdc30_1_sel", msdc30_1_parents,
+		 0x0070, 24, 3, 31),
+	MUX_GATE(CLK_TOP_MUX_MSDC30_2, "msdc30_2_sel", msdc30_2_parents,
+		 0x0080, 0, 3, 7),
+	MUX_GATE(CLK_TOP_MUX_AUDIO, "audio_sel", audio_parents,
+		 0x0080, 16, 2, 23),
+	MUX(CLK_TOP_MUX_AUD_INTBUS, "aud_intbus_sel", aud_intbus_parents,
+	    0x0080, 24, 2),
+	MUX(CLK_TOP_MUX_PMICSPI, "pmicspi_sel", pmicspi_parents,
+	    0x0090, 0, 3),
+	MUX(CLK_TOP_MUX_SCP, "scp_sel", scp_parents,
+	    0x0090, 8, 2),
+	MUX(CLK_TOP_MUX_ATB, "atb_sel", atb_parents,
+	    0x0090, 16, 2),
+	MUX_GATE(CLK_TOP_MUX_MJC, "mjc_sel", mjc_parents, 0x0090, 24, 2, 31),
+	MUX_GATE(CLK_TOP_MUX_DPI0, "dpi0_sel", dpi0_parents, 0x00A0, 0, 3, 7),
+	MUX_GATE(CLK_TOP_MUX_AUD_1, "aud_1_sel", aud_1_parents,
+		 0x00A0, 16, 1, 23),
+	MUX_GATE(CLK_TOP_MUX_AUD_2, "aud_2_sel", aud_2_parents,
+		 0x00A0, 24, 1, 31),
+	MUX(CLK_TOP_MUX_SSUSB_TOP_SYS, "ssusb_top_sys_sel",
+	    ssusb_top_sys_parents, 0x00B0, 8, 1),
+	MUX(CLK_TOP_MUX_SPM, "spm_sel", spm_parents,
+	    0x00C0, 0, 1),
+	MUX(CLK_TOP_MUX_BSI_SPI, "bsi_spi_sel", bsi_spi_parents,
+	    0x00C0, 8, 2),
+	MUX_GATE(CLK_TOP_MUX_AUDIO_H, "audio_h_sel", audio_h_parents,
+		 0x00C0, 16, 2, 23),
+	MUX_GATE(CLK_TOP_MUX_ANC_MD32, "anc_md32_sel", anc_md32_parents,
+		 0x00C0, 24, 2, 31),
+	MUX(CLK_TOP_MUX_MFG_52M, "mfg_52m_sel", mfg_52m_parents,
+	    0x0104, 1, 2),
+};
+
+static int mtk_topckgen_init(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	void __iomem *base;
+	struct device_node *node = pdev->dev.of_node;
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	clk_data = mtk_alloc_clk_data(CLK_TOP_NR);
+
+	mtk_clk_register_factors(top_fixed_divs, ARRAY_SIZE(top_fixed_divs),
+				 clk_data);
+
+	mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base,
+				    &mt6797_clk_lock, clk_data);
+
+	return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static const struct mtk_gate_regs infra0_cg_regs = {
+	.set_ofs = 0x0080,
+	.clr_ofs = 0x0084,
+	.sta_ofs = 0x0090,
+};
+
+static const struct mtk_gate_regs infra1_cg_regs = {
+	.set_ofs = 0x0088,
+	.clr_ofs = 0x008c,
+	.sta_ofs = 0x0094,
+};
+
+static const struct mtk_gate_regs infra2_cg_regs = {
+	.set_ofs = 0x00a8,
+	.clr_ofs = 0x00ac,
+	.sta_ofs = 0x00b0,
+};
+
+#define GATE_ICG0(_id, _name, _parent, _shift) {	\
+	.id = _id,					\
+	.name = _name,					\
+	.parent_name = _parent,				\
+	.regs = &infra0_cg_regs,			\
+	.shift = _shift,				\
+	.ops = &mtk_clk_gate_ops_setclr,		\
+}
+
+#define GATE_ICG1(_id, _name, _parent, _shift) {	\
+	.id = _id,					\
+	.name = _name,					\
+	.parent_name = _parent,				\
+	.regs = &infra1_cg_regs,			\
+	.shift = _shift,				\
+	.ops = &mtk_clk_gate_ops_setclr,		\
+}
+
+#define GATE_ICG2(_id, _name, _parent, _shift) {	\
+	.id = _id,					\
+	.name = _name,					\
+	.parent_name = _parent,				\
+	.regs = &infra2_cg_regs,			\
+	.shift = _shift,				\
+	.ops = &mtk_clk_gate_ops_setclr,		\
+}
+
+static const struct mtk_gate infra_clks[] = {
+	GATE_ICG0(CLK_INFRA_PMIC_TMR, "infra_pmic_tmr", "ulposc", 0),
+	GATE_ICG0(CLK_INFRA_PMIC_AP, "infra_pmic_ap", "pmicspi_sel", 1),
+	GATE_ICG0(CLK_INFRA_PMIC_MD, "infra_pmic_md", "pmicspi_sel", 2),
+	GATE_ICG0(CLK_INFRA_PMIC_CONN, "infra_pmic_conn", "pmicspi_sel", 3),
+	GATE_ICG0(CLK_INFRA_SCP, "infra_scp", "scp_sel", 4),
+	GATE_ICG0(CLK_INFRA_SEJ, "infra_sej", "axi_sel", 5),
+	GATE_ICG0(CLK_INFRA_APXGPT, "infra_apxgpt", "axi_sel", 6),
+	GATE_ICG0(CLK_INFRA_SEJ_13M, "infra_sej_13m", "clk26m", 7),
+	GATE_ICG0(CLK_INFRA_ICUSB, "infra_icusb", "usb20_sel", 8),
+	GATE_ICG0(CLK_INFRA_GCE, "infra_gce", "axi_sel", 9),
+	GATE_ICG0(CLK_INFRA_THERM, "infra_therm", "axi_sel", 10),
+	GATE_ICG0(CLK_INFRA_I2C0, "infra_i2c0", "axi_sel", 11),
+	GATE_ICG0(CLK_INFRA_I2C1, "infra_i2c1", "axi_sel", 12),
+	GATE_ICG0(CLK_INFRA_I2C2, "infra_i2c2", "axi_sel", 13),
+	GATE_ICG0(CLK_INFRA_I2C3, "infra_i2c3", "axi_sel", 14),
+	GATE_ICG0(CLK_INFRA_PWM_HCLK, "infra_pwm_hclk", "axi_sel", 15),
+	GATE_ICG0(CLK_INFRA_PWM1, "infra_pwm1", "axi_sel", 16),
+	GATE_ICG0(CLK_INFRA_PWM2, "infra_pwm2", "axi_sel", 17),
+	GATE_ICG0(CLK_INFRA_PWM3, "infra_pwm3", "axi_sel", 18),
+	GATE_ICG0(CLK_INFRA_PWM4, "infra_pwm4", "axi_sel", 19),
+	GATE_ICG0(CLK_INFRA_PWM, "infra_pwm", "axi_sel", 21),
+	GATE_ICG0(CLK_INFRA_UART0, "infra_uart0", "uart_sel", 22),
+	GATE_ICG0(CLK_INFRA_UART1, "infra_uart1", "uart_sel", 23),
+	GATE_ICG0(CLK_INFRA_UART2, "infra_uart2", "uart_sel", 24),
+	GATE_ICG0(CLK_INFRA_UART3, "infra_uart3", "uart_sel", 25),
+	GATE_ICG0(CLK_INFRA_MD2MD_CCIF_0, "infra_md2md_ccif_0", "axi_sel", 27),
+	GATE_ICG0(CLK_INFRA_MD2MD_CCIF_1, "infra_md2md_ccif_1", "axi_sel", 28),
+	GATE_ICG0(CLK_INFRA_MD2MD_CCIF_2, "infra_md2md_ccif_2", "axi_sel", 29),
+	GATE_ICG0(CLK_INFRA_FHCTL, "infra_fhctl", "clk26m", 30),
+	GATE_ICG0(CLK_INFRA_BTIF, "infra_btif", "axi_sel", 31),
+	GATE_ICG1(CLK_INFRA_MD2MD_CCIF_3, "infra_md2md_ccif_3", "axi_sel", 0),
+	GATE_ICG1(CLK_INFRA_SPI, "infra_spi", "spi_sel", 1),
+	GATE_ICG1(CLK_INFRA_MSDC0, "infra_msdc0", "msdc50_0_sel", 2),
+	GATE_ICG1(CLK_INFRA_MD2MD_CCIF_4, "infra_md2md_ccif_4", "axi_sel", 3),
+	GATE_ICG1(CLK_INFRA_MSDC1, "infra_msdc1", "msdc30_1_sel", 4),
+	GATE_ICG1(CLK_INFRA_MSDC2, "infra_msdc2", "msdc30_2_sel", 5),
+	GATE_ICG1(CLK_INFRA_MD2MD_CCIF_5, "infra_md2md_ccif_5", "axi_sel", 7),
+	GATE_ICG1(CLK_INFRA_GCPU, "infra_gcpu", "axi_sel", 8),
+	GATE_ICG1(CLK_INFRA_TRNG, "infra_trng", "axi_sel", 9),
+	GATE_ICG1(CLK_INFRA_AUXADC, "infra_auxadc", "clk26m", 10),
+	GATE_ICG1(CLK_INFRA_CPUM, "infra_cpum", "axi_sel", 11),
+	GATE_ICG1(CLK_INFRA_AP_C2K_CCIF_0, "infra_ap_c2k_ccif_0",
+		  "axi_sel", 12),
+	GATE_ICG1(CLK_INFRA_AP_C2K_CCIF_1, "infra_ap_c2k_ccif_1",
+		  "axi_sel", 13),
+	GATE_ICG1(CLK_INFRA_CLDMA, "infra_cldma", "axi_sel", 16),
+	GATE_ICG1(CLK_INFRA_DISP_PWM, "infra_disp_pwm", "pwm_sel", 17),
+	GATE_ICG1(CLK_INFRA_AP_DMA, "infra_ap_dma", "axi_sel", 18),
+	GATE_ICG1(CLK_INFRA_DEVICE_APC, "infra_device_apc", "axi_sel", 20),
+	GATE_ICG1(CLK_INFRA_L2C_SRAM, "infra_l2c_sram", "mm_sel", 22),
+	GATE_ICG1(CLK_INFRA_CCIF_AP, "infra_ccif_ap", "axi_sel", 23),
+	GATE_ICG1(CLK_INFRA_AUDIO, "infra_audio", "axi_sel", 25),
+	GATE_ICG1(CLK_INFRA_CCIF_MD, "infra_ccif_md", "axi_sel", 26),
+	GATE_ICG1(CLK_INFRA_DRAMC_F26M, "infra_dramc_f26m", "clk26m", 31),
+	GATE_ICG2(CLK_INFRA_I2C4, "infra_i2c4", "axi_sel", 0),
+	GATE_ICG2(CLK_INFRA_I2C_APPM, "infra_i2c_appm", "axi_sel", 1),
+	GATE_ICG2(CLK_INFRA_I2C_GPUPM, "infra_i2c_gpupm", "axi_sel", 2),
+	GATE_ICG2(CLK_INFRA_I2C2_IMM, "infra_i2c2_imm", "axi_sel", 3),
+	GATE_ICG2(CLK_INFRA_I2C2_ARB, "infra_i2c2_arb", "axi_sel", 4),
+	GATE_ICG2(CLK_INFRA_I2C3_IMM, "infra_i2c3_imm", "axi_sel", 5),
+	GATE_ICG2(CLK_INFRA_I2C3_ARB, "infra_i2c3_arb", "axi_sel", 6),
+	GATE_ICG2(CLK_INFRA_I2C5, "infra_i2c5", "axi_sel", 7),
+	GATE_ICG2(CLK_INFRA_SYS_CIRQ, "infra_sys_cirq", "axi_sel", 8),
+	GATE_ICG2(CLK_INFRA_SPI1, "infra_spi1", "spi_sel", 10),
+	GATE_ICG2(CLK_INFRA_DRAMC_B_F26M, "infra_dramc_b_f26m", "clk26m", 11),
+	GATE_ICG2(CLK_INFRA_ANC_MD32, "infra_anc_md32", "anc_md32_sel", 12),
+	GATE_ICG2(CLK_INFRA_ANC_MD32_32K, "infra_anc_md32_32k", "clk26m", 13),
+	GATE_ICG2(CLK_INFRA_DVFS_SPM1, "infra_dvfs_spm1", "axi_sel", 15),
+	GATE_ICG2(CLK_INFRA_AES_TOP0, "infra_aes_top0", "axi_sel", 16),
+	GATE_ICG2(CLK_INFRA_AES_TOP1, "infra_aes_top1", "axi_sel", 17),
+	GATE_ICG2(CLK_INFRA_SSUSB_BUS, "infra_ssusb_bus", "axi_sel", 18),
+	GATE_ICG2(CLK_INFRA_SPI2, "infra_spi2", "spi_sel", 19),
+	GATE_ICG2(CLK_INFRA_SPI3, "infra_spi3", "spi_sel", 20),
+	GATE_ICG2(CLK_INFRA_SPI4, "infra_spi4", "spi_sel", 21),
+	GATE_ICG2(CLK_INFRA_SPI5, "infra_spi5", "spi_sel", 22),
+	GATE_ICG2(CLK_INFRA_IRTX, "infra_irtx", "spi_sel", 23),
+	GATE_ICG2(CLK_INFRA_SSUSB_SYS, "infra_ssusb_sys",
+		  "ssusb_top_sys_sel", 24),
+	GATE_ICG2(CLK_INFRA_SSUSB_REF, "infra_ssusb_ref", "clk26m", 9),
+	GATE_ICG2(CLK_INFRA_AUDIO_26M, "infra_audio_26m", "clk26m", 26),
+	GATE_ICG2(CLK_INFRA_AUDIO_26M_PAD_TOP, "infra_audio_26m_pad_top",
+		  "clk26m", 27),
+	GATE_ICG2(CLK_INFRA_MODEM_TEMP_SHARE, "infra_modem_temp_share",
+		  "axi_sel", 28),
+	GATE_ICG2(CLK_INFRA_VAD_WRAP_SOC, "infra_vad_wrap_soc", "axi_sel", 29),
+	GATE_ICG2(CLK_INFRA_DRAMC_CONF, "infra_dramc_conf", "axi_sel", 30),
+	GATE_ICG2(CLK_INFRA_DRAMC_B_CONF, "infra_dramc_b_conf", "axi_sel", 31),
+	GATE_ICG1(CLK_INFRA_MFG_VCG, "infra_mfg_vcg", "mfg_52m_sel", 14),
+};
+
+static const struct mtk_fixed_factor infra_fixed_divs[] = {
+	FACTOR(CLK_INFRA_13M, "clk13m", "clk26m", 1, 2),
+};
+
+static struct clk_onecell_data *infra_clk_data;
+
+static void mtk_infrasys_init_early(struct device_node *node)
+{
+	int r, i;
+
+	if (!infra_clk_data) {
+		infra_clk_data = mtk_alloc_clk_data(CLK_INFRA_NR);
+
+		for (i = 0; i < CLK_INFRA_NR; i++)
+			infra_clk_data->clks[i] = ERR_PTR(-EPROBE_DEFER);
+	}
+
+	mtk_clk_register_factors(infra_fixed_divs, ARRAY_SIZE(infra_fixed_divs),
+				 infra_clk_data);
+
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, infra_clk_data);
+	if (r)
+		pr_err("%s(): could not register clock provider: %d\n",
+		       __func__, r);
+}
+
+CLK_OF_DECLARE_DRIVER(mtk_infra, "mediatek,mt6797-infracfg",
+		      mtk_infrasys_init_early);
+
+static int mtk_infrasys_init(struct platform_device *pdev)
+{
+	int r, i;
+	struct device_node *node = pdev->dev.of_node;
+
+	if (!infra_clk_data) {
+		infra_clk_data = mtk_alloc_clk_data(CLK_INFRA_NR);
+	} else {
+		for (i = 0; i < CLK_INFRA_NR; i++) {
+			if (infra_clk_data->clks[i] == ERR_PTR(-EPROBE_DEFER))
+				infra_clk_data->clks[i] = ERR_PTR(-ENOENT);
+		}
+	}
+
+	mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks),
+			       infra_clk_data);
+	mtk_clk_register_factors(infra_fixed_divs, ARRAY_SIZE(infra_fixed_divs),
+				 infra_clk_data);
+
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, infra_clk_data);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+#define MT6797_PLL_FMAX		(3000UL * MHZ)
+
+#define CON0_MT6797_RST_BAR	BIT(24)
+
+#define PLL_B(_id, _name, _reg, _pwr_reg, _en_mask, _flags, _pcwbits,	\
+			_pd_reg, _pd_shift, _tuner_reg, _pcw_reg,	\
+			_pcw_shift, _div_table) {			\
+	.id = _id,						\
+	.name = _name,						\
+	.reg = _reg,						\
+	.pwr_reg = _pwr_reg,					\
+	.en_mask = _en_mask,					\
+	.flags = _flags,					\
+	.rst_bar_mask = CON0_MT6797_RST_BAR,			\
+	.fmax = MT6797_PLL_FMAX,				\
+	.pcwbits = _pcwbits,					\
+	.pd_reg = _pd_reg,					\
+	.pd_shift = _pd_shift,					\
+	.tuner_reg = _tuner_reg,				\
+	.pcw_reg = _pcw_reg,					\
+	.pcw_shift = _pcw_shift,				\
+	.div_table = _div_table,				\
+}
+
+#define PLL(_id, _name, _reg, _pwr_reg, _en_mask, _flags, _pcwbits,	\
+			_pd_reg, _pd_shift, _tuner_reg, _pcw_reg,	\
+			_pcw_shift)					\
+		PLL_B(_id, _name, _reg, _pwr_reg, _en_mask, _flags, _pcwbits, \
+			_pd_reg, _pd_shift, _tuner_reg, _pcw_reg, _pcw_shift, \
+			NULL)
+
+static const struct mtk_pll_data plls[] = {
+	PLL(CLK_APMIXED_MAINPLL, "mainpll", 0x0220, 0x022C, 0xF0000101, PLL_AO,
+	    21, 0x220, 4, 0x0, 0x224, 0),
+	PLL(CLK_APMIXED_UNIVPLL, "univpll", 0x0230, 0x023C, 0xFE000011, 0, 7,
+	    0x230, 4, 0x0, 0x234, 14),
+	PLL(CLK_APMIXED_MFGPLL, "mfgpll", 0x0240, 0x024C, 0x00000101, 0, 21,
+	    0x244, 24, 0x0, 0x244, 0),
+	PLL(CLK_APMIXED_MSDCPLL, "msdcpll", 0x0250, 0x025C, 0x00000121, 0, 21,
+	    0x250, 4, 0x0, 0x254, 0),
+	PLL(CLK_APMIXED_IMGPLL, "imgpll", 0x0260, 0x026C, 0x00000121, 0, 21,
+	    0x260, 4, 0x0, 0x264, 0),
+	PLL(CLK_APMIXED_TVDPLL, "tvdpll", 0x0270, 0x027C, 0xC0000121, 0, 21,
+	    0x270, 4, 0x0, 0x274, 0),
+	PLL(CLK_APMIXED_CODECPLL, "codecpll", 0x0290, 0x029C, 0x00000121, 0, 21,
+	    0x290, 4, 0x0, 0x294, 0),
+	PLL(CLK_APMIXED_VDECPLL, "vdecpll", 0x02E4, 0x02F0, 0x00000121, 0, 21,
+	    0x2E4, 4, 0x0, 0x2E8, 0),
+	PLL(CLK_APMIXED_APLL1, "apll1", 0x02A0, 0x02B0, 0x00000131, 0, 31,
+	    0x2A0, 4, 0x2A8, 0x2A4, 0),
+	PLL(CLK_APMIXED_APLL2, "apll2", 0x02B4, 0x02C4, 0x00000131, 0, 31,
+	    0x2B4, 4, 0x2BC, 0x2B8, 0),
+};
+
+static int mtk_apmixedsys_init(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	struct device_node *node = pdev->dev.of_node;
+
+	clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR);
+	if (!clk_data)
+		return -ENOMEM;
+
+	mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data);
+
+	return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static const struct of_device_id of_match_clk_mt6797[] = {
+	{
+		.compatible = "mediatek,mt6797-topckgen",
+		.data = mtk_topckgen_init,
+	}, {
+		.compatible = "mediatek,mt6797-infracfg",
+		.data = mtk_infrasys_init,
+	}, {
+		.compatible = "mediatek,mt6797-apmixedsys",
+		.data = mtk_apmixedsys_init,
+	}, {
+		/* sentinel */
+	}
+};
+
+static int clk_mt6797_probe(struct platform_device *pdev)
+{
+	int (*clk_init)(struct platform_device *);
+	int r;
+
+	clk_init = of_device_get_match_data(&pdev->dev);
+	if (!clk_init)
+		return -EINVAL;
+
+	r = clk_init(pdev);
+	if (r)
+		dev_err(&pdev->dev,
+			"could not register clock provider: %s: %d\n",
+			pdev->name, r);
+
+	return r;
+}
+
+static struct platform_driver clk_mt6797_drv = {
+	.probe = clk_mt6797_probe,
+	.driver = {
+		.name = "clk-mt6797",
+		.of_match_table = of_match_clk_mt6797,
+	},
+};
+
+static int __init clk_mt6797_init(void)
+{
+	return platform_driver_register(&clk_mt6797_drv);
+}
+
+arch_initcall(clk_mt6797_init);
diff --git a/drivers/clk/meson/Kconfig b/drivers/clk/meson/Kconfig
index 19480bcc7046..2f29ee1a4d00 100644
--- a/drivers/clk/meson/Kconfig
+++ b/drivers/clk/meson/Kconfig
@@ -14,6 +14,7 @@ config COMMON_CLK_MESON8B
 config COMMON_CLK_GXBB
 	bool
 	depends on COMMON_CLK_AMLOGIC
+	select RESET_CONTROLLER
 	help
 	  Support for the clock controller on AmLogic S905 devices, aka gxbb.
 	  Say Y if you want peripherals and CPU frequency scaling to work.
diff --git a/drivers/clk/meson/Makefile b/drivers/clk/meson/Makefile
index 349583405b7c..83b6d9d65aa1 100644
--- a/drivers/clk/meson/Makefile
+++ b/drivers/clk/meson/Makefile
@@ -2,6 +2,6 @@
 # Makefile for Meson specific clk
 #
 
-obj-$(CONFIG_COMMON_CLK_AMLOGIC) += clk-pll.o clk-cpu.o clk-mpll.o
+obj-$(CONFIG_COMMON_CLK_AMLOGIC) += clk-pll.o clk-cpu.o clk-mpll.o clk-audio-divider.o
 obj-$(CONFIG_COMMON_CLK_MESON8B) += meson8b.o
 obj-$(CONFIG_COMMON_CLK_GXBB)	 += gxbb.o gxbb-aoclk.o
diff --git a/drivers/clk/meson/clk-audio-divider.c b/drivers/clk/meson/clk-audio-divider.c
new file mode 100644
index 000000000000..6c07db06642d
--- /dev/null
+++ b/drivers/clk/meson/clk-audio-divider.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2017 AmLogic, Inc.
+ * Author: Jerome Brunet <jbrunet@baylibre.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * i2s master clock divider: The algorithm of the generic clk-divider used with
+ * a very precise clock parent such as the mpll tends to select a low divider
+ * factor. This gives poor results with this particular divider, especially with
+ * high frequencies (> 100 MHz)
+ *
+ * This driver try to select the maximum possible divider with the rate the
+ * upstream clock can provide.
+ */
+
+#include <linux/clk-provider.h>
+#include "clkc.h"
+
+#define to_meson_clk_audio_divider(_hw) container_of(_hw, \
+				struct meson_clk_audio_divider, hw)
+
+static int _div_round(unsigned long parent_rate, unsigned long rate,
+		      unsigned long flags)
+{
+	if (flags & CLK_DIVIDER_ROUND_CLOSEST)
+		return DIV_ROUND_CLOSEST_ULL((u64)parent_rate, rate);
+
+	return DIV_ROUND_UP_ULL((u64)parent_rate, rate);
+}
+
+static int _get_val(unsigned long parent_rate, unsigned long rate)
+{
+	return DIV_ROUND_UP_ULL((u64)parent_rate, rate) - 1;
+}
+
+static int _valid_divider(struct clk_hw *hw, int divider)
+{
+	struct meson_clk_audio_divider *adiv =
+		to_meson_clk_audio_divider(hw);
+	int max_divider;
+	u8 width;
+
+	width = adiv->div.width;
+	max_divider = 1 << width;
+
+	return clamp(divider, 1, max_divider);
+}
+
+static unsigned long audio_divider_recalc_rate(struct clk_hw *hw,
+					       unsigned long parent_rate)
+{
+	struct meson_clk_audio_divider *adiv =
+		to_meson_clk_audio_divider(hw);
+	struct parm *p;
+	unsigned long reg, divider;
+
+	p = &adiv->div;
+	reg = readl(adiv->base + p->reg_off);
+	divider = PARM_GET(p->width, p->shift, reg) + 1;
+
+	return DIV_ROUND_UP_ULL((u64)parent_rate, divider);
+}
+
+static long audio_divider_round_rate(struct clk_hw *hw,
+				     unsigned long rate,
+				     unsigned long *parent_rate)
+{
+	struct meson_clk_audio_divider *adiv =
+		to_meson_clk_audio_divider(hw);
+	unsigned long max_prate;
+	int divider;
+
+	if (!(clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT)) {
+		divider = _div_round(*parent_rate, rate, adiv->flags);
+		divider = _valid_divider(hw, divider);
+		return DIV_ROUND_UP_ULL((u64)*parent_rate, divider);
+	}
+
+	/* Get the maximum parent rate */
+	max_prate = clk_hw_round_rate(clk_hw_get_parent(hw), ULONG_MAX);
+
+	/* Get the corresponding rounded down divider */
+	divider = max_prate / rate;
+	divider = _valid_divider(hw, divider);
+
+	/* Get actual rate of the parent */
+	*parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw),
+					 divider * rate);
+
+	return DIV_ROUND_UP_ULL((u64)*parent_rate, divider);
+}
+
+static int audio_divider_set_rate(struct clk_hw *hw,
+				  unsigned long rate,
+				  unsigned long parent_rate)
+{
+	struct meson_clk_audio_divider *adiv =
+		to_meson_clk_audio_divider(hw);
+	struct parm *p;
+	unsigned long reg, flags = 0;
+	int val;
+
+	val = _get_val(parent_rate, rate);
+
+	if (adiv->lock)
+		spin_lock_irqsave(adiv->lock, flags);
+	else
+		__acquire(adiv->lock);
+
+	p = &adiv->div;
+	reg = readl(adiv->base + p->reg_off);
+	reg = PARM_SET(p->width, p->shift, reg, val);
+	writel(reg, adiv->base + p->reg_off);
+
+	if (adiv->lock)
+		spin_unlock_irqrestore(adiv->lock, flags);
+	else
+		__release(adiv->lock);
+
+	return 0;
+}
+
+const struct clk_ops meson_clk_audio_divider_ro_ops = {
+	.recalc_rate	= audio_divider_recalc_rate,
+	.round_rate	= audio_divider_round_rate,
+};
+
+const struct clk_ops meson_clk_audio_divider_ops = {
+	.recalc_rate	= audio_divider_recalc_rate,
+	.round_rate	= audio_divider_round_rate,
+	.set_rate	= audio_divider_set_rate,
+};
diff --git a/drivers/clk/meson/clk-mpll.c b/drivers/clk/meson/clk-mpll.c
index 03af79005ddb..39eab69fe51a 100644
--- a/drivers/clk/meson/clk-mpll.c
+++ b/drivers/clk/meson/clk-mpll.c
@@ -64,17 +64,51 @@
 #include <linux/clk-provider.h>
 #include "clkc.h"
 
-#define SDM_MAX 16384
+#define SDM_DEN 16384
+#define N2_MIN	4
+#define N2_MAX	511
 
 #define to_meson_clk_mpll(_hw) container_of(_hw, struct meson_clk_mpll, hw)
 
+static long rate_from_params(unsigned long parent_rate,
+				      unsigned long sdm,
+				      unsigned long n2)
+{
+	unsigned long divisor = (SDM_DEN * n2) + sdm;
+
+	if (n2 < N2_MIN)
+		return -EINVAL;
+
+	return DIV_ROUND_UP_ULL((u64)parent_rate * SDM_DEN, divisor);
+}
+
+static void params_from_rate(unsigned long requested_rate,
+			     unsigned long parent_rate,
+			     unsigned long *sdm,
+			     unsigned long *n2)
+{
+	uint64_t div = parent_rate;
+	unsigned long rem = do_div(div, requested_rate);
+
+	if (div < N2_MIN) {
+		*n2 = N2_MIN;
+		*sdm = 0;
+	} else if (div > N2_MAX) {
+		*n2 = N2_MAX;
+		*sdm = SDM_DEN - 1;
+	} else {
+		*n2 = div;
+		*sdm = DIV_ROUND_UP(rem * SDM_DEN, requested_rate);
+	}
+}
+
 static unsigned long mpll_recalc_rate(struct clk_hw *hw,
 		unsigned long parent_rate)
 {
 	struct meson_clk_mpll *mpll = to_meson_clk_mpll(hw);
 	struct parm *p;
-	unsigned long rate = 0;
 	unsigned long reg, sdm, n2;
+	long rate;
 
 	p = &mpll->sdm;
 	reg = readl(mpll->base + p->reg_off);
@@ -84,11 +118,123 @@ static unsigned long mpll_recalc_rate(struct clk_hw *hw,
 	reg = readl(mpll->base + p->reg_off);
 	n2 = PARM_GET(p->width, p->shift, reg);
 
-	rate = (parent_rate * SDM_MAX) / ((SDM_MAX * n2) + sdm);
+	rate = rate_from_params(parent_rate, sdm, n2);
+	if (rate < 0)
+		return 0;
 
 	return rate;
 }
 
+static long mpll_round_rate(struct clk_hw *hw,
+			    unsigned long rate,
+			    unsigned long *parent_rate)
+{
+	unsigned long sdm, n2;
+
+	params_from_rate(rate, *parent_rate, &sdm, &n2);
+	return rate_from_params(*parent_rate, sdm, n2);
+}
+
+static int mpll_set_rate(struct clk_hw *hw,
+			 unsigned long rate,
+			 unsigned long parent_rate)
+{
+	struct meson_clk_mpll *mpll = to_meson_clk_mpll(hw);
+	struct parm *p;
+	unsigned long reg, sdm, n2;
+	unsigned long flags = 0;
+
+	params_from_rate(rate, parent_rate, &sdm, &n2);
+
+	if (mpll->lock)
+		spin_lock_irqsave(mpll->lock, flags);
+	else
+		__acquire(mpll->lock);
+
+	p = &mpll->sdm;
+	reg = readl(mpll->base + p->reg_off);
+	reg = PARM_SET(p->width, p->shift, reg, sdm);
+	writel(reg, mpll->base + p->reg_off);
+
+	p = &mpll->sdm_en;
+	reg = readl(mpll->base + p->reg_off);
+	reg = PARM_SET(p->width, p->shift, reg, 1);
+	writel(reg, mpll->base + p->reg_off);
+
+	p = &mpll->n2;
+	reg = readl(mpll->base + p->reg_off);
+	reg = PARM_SET(p->width, p->shift, reg, n2);
+	writel(reg, mpll->base + p->reg_off);
+
+	if (mpll->lock)
+		spin_unlock_irqrestore(mpll->lock, flags);
+	else
+		__release(mpll->lock);
+
+	return 0;
+}
+
+static void mpll_enable_core(struct clk_hw *hw, int enable)
+{
+	struct meson_clk_mpll *mpll = to_meson_clk_mpll(hw);
+	struct parm *p;
+	unsigned long reg;
+	unsigned long flags = 0;
+
+	if (mpll->lock)
+		spin_lock_irqsave(mpll->lock, flags);
+	else
+		__acquire(mpll->lock);
+
+	p = &mpll->en;
+	reg = readl(mpll->base + p->reg_off);
+	reg = PARM_SET(p->width, p->shift, reg, enable ? 1 : 0);
+	writel(reg, mpll->base + p->reg_off);
+
+	if (mpll->lock)
+		spin_unlock_irqrestore(mpll->lock, flags);
+	else
+		__release(mpll->lock);
+}
+
+
+static int mpll_enable(struct clk_hw *hw)
+{
+	mpll_enable_core(hw, 1);
+
+	return 0;
+}
+
+static void mpll_disable(struct clk_hw *hw)
+{
+	mpll_enable_core(hw, 0);
+}
+
+static int mpll_is_enabled(struct clk_hw *hw)
+{
+	struct meson_clk_mpll *mpll = to_meson_clk_mpll(hw);
+	struct parm *p;
+	unsigned long reg;
+	int en;
+
+	p = &mpll->en;
+	reg = readl(mpll->base + p->reg_off);
+	en = PARM_GET(p->width, p->shift, reg);
+
+	return en;
+}
+
 const struct clk_ops meson_clk_mpll_ro_ops = {
-	.recalc_rate = mpll_recalc_rate,
+	.recalc_rate	= mpll_recalc_rate,
+	.round_rate	= mpll_round_rate,
+	.is_enabled	= mpll_is_enabled,
+};
+
+const struct clk_ops meson_clk_mpll_ops = {
+	.recalc_rate	= mpll_recalc_rate,
+	.round_rate	= mpll_round_rate,
+	.set_rate	= mpll_set_rate,
+	.enable		= mpll_enable,
+	.disable	= mpll_disable,
+	.is_enabled	= mpll_is_enabled,
 };
diff --git a/drivers/clk/meson/clk-pll.c b/drivers/clk/meson/clk-pll.c
index 4adc1e89212c..01341553f50b 100644
--- a/drivers/clk/meson/clk-pll.c
+++ b/drivers/clk/meson/clk-pll.c
@@ -116,6 +116,30 @@ static const struct pll_rate_table *meson_clk_get_pll_settings(struct meson_clk_
 	return NULL;
 }
 
+/* Specific wait loop for GXL/GXM GP0 PLL */
+static int meson_clk_pll_wait_lock_reset(struct meson_clk_pll *pll,
+					 struct parm *p_n)
+{
+	int delay = 100;
+	u32 reg;
+
+	while (delay > 0) {
+		reg = readl(pll->base + p_n->reg_off);
+		writel(reg | MESON_PLL_RESET, pll->base + p_n->reg_off);
+		udelay(10);
+		writel(reg & ~MESON_PLL_RESET, pll->base + p_n->reg_off);
+
+		/* This delay comes from AMLogic tree clk-gp0-gxl driver */
+		mdelay(1);
+
+		reg = readl(pll->base + p_n->reg_off);
+		if (reg & MESON_PLL_LOCK)
+			return 0;
+		delay--;
+	}
+	return -ETIMEDOUT;
+}
+
 static int meson_clk_pll_wait_lock(struct meson_clk_pll *pll,
 				   struct parm *p_n)
 {
@@ -132,6 +156,15 @@ static int meson_clk_pll_wait_lock(struct meson_clk_pll *pll,
 	return -ETIMEDOUT;
 }
 
+static void meson_clk_pll_init_params(struct meson_clk_pll *pll)
+{
+	int i;
+
+	for (i = 0 ; i < pll->params.params_count ; ++i)
+		writel(pll->params.params_table[i].value,
+		       pll->base + pll->params.params_table[i].reg_off);
+}
+
 static int meson_clk_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 				  unsigned long parent_rate)
 {
@@ -151,10 +184,16 @@ static int meson_clk_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 	if (!rate_set)
 		return -EINVAL;
 
+	/* Initialize the PLL in a clean state if specified */
+	if (pll->params.params_count)
+		meson_clk_pll_init_params(pll);
+
 	/* PLL reset */
 	p = &pll->n;
 	reg = readl(pll->base + p->reg_off);
-	writel(reg | MESON_PLL_RESET, pll->base + p->reg_off);
+	/* If no_init_reset is provided, avoid resetting at this point */
+	if (!pll->params.no_init_reset)
+		writel(reg | MESON_PLL_RESET, pll->base + p->reg_off);
 
 	reg = PARM_SET(p->width, p->shift, reg, rate_set->n);
 	writel(reg, pll->base + p->reg_off);
@@ -184,7 +223,17 @@ static int meson_clk_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 	}
 
 	p = &pll->n;
-	ret = meson_clk_pll_wait_lock(pll, p);
+	/* If clear_reset_for_lock is provided, remove the reset bit here */
+	if (pll->params.clear_reset_for_lock) {
+		reg = readl(pll->base + p->reg_off);
+		writel(reg & ~MESON_PLL_RESET, pll->base + p->reg_off);
+	}
+
+	/* If reset_lock_loop, use a special loop including resetting */
+	if (pll->params.reset_lock_loop)
+		ret = meson_clk_pll_wait_lock_reset(pll, p);
+	else
+		ret = meson_clk_pll_wait_lock(pll, p);
 	if (ret) {
 		pr_warn("%s: pll did not lock, trying to restore old rate %lu\n",
 			__func__, old_rate);
diff --git a/drivers/clk/meson/clkc.h b/drivers/clk/meson/clkc.h
index 9bb70e7a7d6a..d6feafe8bd6c 100644
--- a/drivers/clk/meson/clkc.h
+++ b/drivers/clk/meson/clkc.h
@@ -25,7 +25,7 @@
 #define PARM_GET(width, shift, reg)					\
 	(((reg) & SETPMASK(width, shift)) >> (shift))
 #define PARM_SET(width, shift, reg, val)				\
-	(((reg) & CLRPMASK(width, shift)) | (val << (shift)))
+	(((reg) & CLRPMASK(width, shift)) | ((val) << (shift)))
 
 #define MESON_PARM_APPLICABLE(p)		(!!((p)->width))
 
@@ -62,6 +62,28 @@ struct pll_rate_table {
 		.frac		= (_frac),				\
 	}								\
 
+struct pll_params_table {
+	unsigned int reg_off;
+	unsigned int value;
+};
+
+#define PLL_PARAM(_reg, _val)						\
+	{								\
+		.reg_off	= (_reg),				\
+		.value		= (_val),				\
+	}
+
+struct pll_setup_params {
+	struct pll_params_table *params_table;
+	unsigned int params_count;
+	/* Workaround for GP0, do not reset before configuring */
+	bool no_init_reset;
+	/* Workaround for GP0, unreset right before checking for lock */
+	bool clear_reset_for_lock;
+	/* Workaround for GXL GP0, reset in the lock checking loop */
+	bool reset_lock_loop;
+};
+
 struct meson_clk_pll {
 	struct clk_hw hw;
 	void __iomem *base;
@@ -70,6 +92,7 @@ struct meson_clk_pll {
 	struct parm frac;
 	struct parm od;
 	struct parm od2;
+	const struct pll_setup_params params;
 	const struct pll_rate_table *rate_table;
 	unsigned int rate_count;
 	spinlock_t *lock;
@@ -92,8 +115,17 @@ struct meson_clk_mpll {
 	struct clk_hw hw;
 	void __iomem *base;
 	struct parm sdm;
+	struct parm sdm_en;
 	struct parm n2;
-	/* FIXME ssen gate control? */
+	struct parm en;
+	spinlock_t *lock;
+};
+
+struct meson_clk_audio_divider {
+	struct clk_hw hw;
+	void __iomem *base;
+	struct parm div;
+	u8 flags;
 	spinlock_t *lock;
 };
 
@@ -116,5 +148,8 @@ extern const struct clk_ops meson_clk_pll_ro_ops;
 extern const struct clk_ops meson_clk_pll_ops;
 extern const struct clk_ops meson_clk_cpu_ops;
 extern const struct clk_ops meson_clk_mpll_ro_ops;
+extern const struct clk_ops meson_clk_mpll_ops;
+extern const struct clk_ops meson_clk_audio_divider_ro_ops;
+extern const struct clk_ops meson_clk_audio_divider_ops;
 
 #endif /* __CLKC_H */
diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 1c1ec137a3cc..ad5f027af1a2 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -20,6 +20,7 @@
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/init.h>
 
@@ -120,7 +121,7 @@ static const struct pll_rate_table sys_pll_rate_table[] = {
 	{ /* sentinel */ },
 };
 
-static const struct pll_rate_table gp0_pll_rate_table[] = {
+static const struct pll_rate_table gxbb_gp0_pll_rate_table[] = {
 	PLL_RATE(96000000, 32, 1, 3),
 	PLL_RATE(99000000, 33, 1, 3),
 	PLL_RATE(102000000, 34, 1, 3),
@@ -248,6 +249,35 @@ static const struct pll_rate_table gp0_pll_rate_table[] = {
 	{ /* sentinel */ },
 };
 
+static const struct pll_rate_table gxl_gp0_pll_rate_table[] = {
+	PLL_RATE(504000000, 42, 1, 1),
+	PLL_RATE(516000000, 43, 1, 1),
+	PLL_RATE(528000000, 44, 1, 1),
+	PLL_RATE(540000000, 45, 1, 1),
+	PLL_RATE(552000000, 46, 1, 1),
+	PLL_RATE(564000000, 47, 1, 1),
+	PLL_RATE(576000000, 48, 1, 1),
+	PLL_RATE(588000000, 49, 1, 1),
+	PLL_RATE(600000000, 50, 1, 1),
+	PLL_RATE(612000000, 51, 1, 1),
+	PLL_RATE(624000000, 52, 1, 1),
+	PLL_RATE(636000000, 53, 1, 1),
+	PLL_RATE(648000000, 54, 1, 1),
+	PLL_RATE(660000000, 55, 1, 1),
+	PLL_RATE(672000000, 56, 1, 1),
+	PLL_RATE(684000000, 57, 1, 1),
+	PLL_RATE(696000000, 58, 1, 1),
+	PLL_RATE(708000000, 59, 1, 1),
+	PLL_RATE(720000000, 60, 1, 1),
+	PLL_RATE(732000000, 61, 1, 1),
+	PLL_RATE(744000000, 62, 1, 1),
+	PLL_RATE(756000000, 63, 1, 1),
+	PLL_RATE(768000000, 64, 1, 1),
+	PLL_RATE(780000000, 65, 1, 1),
+	PLL_RATE(792000000, 66, 1, 1),
+	{ /* sentinel */ },
+};
+
 static const struct clk_div_table cpu_div_table[] = {
 	{ .val = 1, .div = 1 },
 	{ .val = 2, .div = 2 },
@@ -352,6 +382,13 @@ static struct meson_clk_pll gxbb_sys_pll = {
 	},
 };
 
+struct pll_params_table gxbb_gp0_params_table[] = {
+	PLL_PARAM(HHI_GP0_PLL_CNTL, 0x6a000228),
+	PLL_PARAM(HHI_GP0_PLL_CNTL2, 0x69c80000),
+	PLL_PARAM(HHI_GP0_PLL_CNTL3, 0x0a5590c4),
+	PLL_PARAM(HHI_GP0_PLL_CNTL4, 0x0000500d),
+};
+
 static struct meson_clk_pll gxbb_gp0_pll = {
 	.m = {
 		.reg_off = HHI_GP0_PLL_CNTL,
@@ -368,8 +405,57 @@ static struct meson_clk_pll gxbb_gp0_pll = {
 		.shift   = 16,
 		.width   = 2,
 	},
-	.rate_table = gp0_pll_rate_table,
-	.rate_count = ARRAY_SIZE(gp0_pll_rate_table),
+	.params = {
+		.params_table = gxbb_gp0_params_table,
+		.params_count =	ARRAY_SIZE(gxbb_gp0_params_table),
+		.no_init_reset = true,
+		.clear_reset_for_lock = true,
+	},
+	.rate_table = gxbb_gp0_pll_rate_table,
+	.rate_count = ARRAY_SIZE(gxbb_gp0_pll_rate_table),
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "gp0_pll",
+		.ops = &meson_clk_pll_ops,
+		.parent_names = (const char *[]){ "xtal" },
+		.num_parents = 1,
+		.flags = CLK_GET_RATE_NOCACHE,
+	},
+};
+
+struct pll_params_table gxl_gp0_params_table[] = {
+	PLL_PARAM(HHI_GP0_PLL_CNTL, 0x40010250),
+	PLL_PARAM(HHI_GP0_PLL_CNTL1, 0xc084a000),
+	PLL_PARAM(HHI_GP0_PLL_CNTL2, 0xb75020be),
+	PLL_PARAM(HHI_GP0_PLL_CNTL3, 0x0a59a288),
+	PLL_PARAM(HHI_GP0_PLL_CNTL4, 0xc000004d),
+	PLL_PARAM(HHI_GP0_PLL_CNTL5, 0x00078000),
+};
+
+static struct meson_clk_pll gxl_gp0_pll = {
+	.m = {
+		.reg_off = HHI_GP0_PLL_CNTL,
+		.shift   = 0,
+		.width   = 9,
+	},
+	.n = {
+		.reg_off = HHI_GP0_PLL_CNTL,
+		.shift   = 9,
+		.width   = 5,
+	},
+	.od = {
+		.reg_off = HHI_GP0_PLL_CNTL,
+		.shift   = 16,
+		.width   = 2,
+	},
+	.params = {
+		.params_table = gxl_gp0_params_table,
+		.params_count =	ARRAY_SIZE(gxl_gp0_params_table),
+		.no_init_reset = true,
+		.reset_lock_loop = true,
+	},
+	.rate_table = gxl_gp0_pll_rate_table,
+	.rate_count = ARRAY_SIZE(gxl_gp0_pll_rate_table),
 	.lock = &clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "gp0_pll",
@@ -441,15 +527,25 @@ static struct meson_clk_mpll gxbb_mpll0 = {
 		.shift   = 0,
 		.width   = 14,
 	},
+	.sdm_en = {
+		.reg_off = HHI_MPLL_CNTL7,
+		.shift   = 15,
+		.width	 = 1,
+	},
 	.n2 = {
 		.reg_off = HHI_MPLL_CNTL7,
 		.shift   = 16,
 		.width   = 9,
 	},
+	.en = {
+		.reg_off = HHI_MPLL_CNTL7,
+		.shift   = 14,
+		.width	 = 1,
+	},
 	.lock = &clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "mpll0",
-		.ops = &meson_clk_mpll_ro_ops,
+		.ops = &meson_clk_mpll_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
@@ -461,15 +557,25 @@ static struct meson_clk_mpll gxbb_mpll1 = {
 		.shift   = 0,
 		.width   = 14,
 	},
+	.sdm_en = {
+		.reg_off = HHI_MPLL_CNTL8,
+		.shift   = 15,
+		.width	 = 1,
+	},
 	.n2 = {
 		.reg_off = HHI_MPLL_CNTL8,
 		.shift   = 16,
 		.width   = 9,
 	},
+	.en = {
+		.reg_off = HHI_MPLL_CNTL8,
+		.shift   = 14,
+		.width	 = 1,
+	},
 	.lock = &clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "mpll1",
-		.ops = &meson_clk_mpll_ro_ops,
+		.ops = &meson_clk_mpll_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
@@ -481,15 +587,25 @@ static struct meson_clk_mpll gxbb_mpll2 = {
 		.shift   = 0,
 		.width   = 14,
 	},
+	.sdm_en = {
+		.reg_off = HHI_MPLL_CNTL9,
+		.shift   = 15,
+		.width	 = 1,
+	},
 	.n2 = {
 		.reg_off = HHI_MPLL_CNTL9,
 		.shift   = 16,
 		.width   = 9,
 	},
+	.en = {
+		.reg_off = HHI_MPLL_CNTL9,
+		.shift   = 14,
+		.width	 = 1,
+	},
 	.lock = &clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "mpll2",
-		.ops = &meson_clk_mpll_ro_ops,
+		.ops = &meson_clk_mpll_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
@@ -604,6 +720,237 @@ static struct clk_gate gxbb_sar_adc_clk = {
 	},
 };
 
+/*
+ * The MALI IP is clocked by two identical clocks (mali_0 and mali_1)
+ * muxed by a glitch-free switch.
+ */
+
+static u32 mux_table_mali_0_1[] = {0, 1, 2, 3, 4, 5, 6, 7};
+static const char *gxbb_mali_0_1_parent_names[] = {
+	"xtal", "gp0_pll", "mpll2", "mpll1", "fclk_div7",
+	"fclk_div4", "fclk_div3", "fclk_div5"
+};
+
+static struct clk_mux gxbb_mali_0_sel = {
+	.reg = (void *)HHI_MALI_CLK_CNTL,
+	.mask = 0x7,
+	.shift = 9,
+	.table = mux_table_mali_0_1,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mali_0_sel",
+		.ops = &clk_mux_ops,
+		/*
+		 * bits 10:9 selects from 8 possible parents:
+		 * xtal, gp0_pll, mpll2, mpll1, fclk_div7,
+		 * fclk_div4, fclk_div3, fclk_div5
+		 */
+		.parent_names = gxbb_mali_0_1_parent_names,
+		.num_parents = 8,
+		.flags = CLK_SET_RATE_NO_REPARENT,
+	},
+};
+
+static struct clk_divider gxbb_mali_0_div = {
+	.reg = (void *)HHI_MALI_CLK_CNTL,
+	.shift = 0,
+	.width = 7,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mali_0_div",
+		.ops = &clk_divider_ops,
+		.parent_names = (const char *[]){ "mali_0_sel" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_NO_REPARENT,
+	},
+};
+
+static struct clk_gate gxbb_mali_0 = {
+	.reg = (void *)HHI_MALI_CLK_CNTL,
+	.bit_idx = 8,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mali_0",
+		.ops = &clk_gate_ops,
+		.parent_names = (const char *[]){ "mali_0_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_mux gxbb_mali_1_sel = {
+	.reg = (void *)HHI_MALI_CLK_CNTL,
+	.mask = 0x7,
+	.shift = 25,
+	.table = mux_table_mali_0_1,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mali_1_sel",
+		.ops = &clk_mux_ops,
+		/*
+		 * bits 10:9 selects from 8 possible parents:
+		 * xtal, gp0_pll, mpll2, mpll1, fclk_div7,
+		 * fclk_div4, fclk_div3, fclk_div5
+		 */
+		.parent_names = gxbb_mali_0_1_parent_names,
+		.num_parents = 8,
+		.flags = CLK_SET_RATE_NO_REPARENT,
+	},
+};
+
+static struct clk_divider gxbb_mali_1_div = {
+	.reg = (void *)HHI_MALI_CLK_CNTL,
+	.shift = 16,
+	.width = 7,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mali_1_div",
+		.ops = &clk_divider_ops,
+		.parent_names = (const char *[]){ "mali_1_sel" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_NO_REPARENT,
+	},
+};
+
+static struct clk_gate gxbb_mali_1 = {
+	.reg = (void *)HHI_MALI_CLK_CNTL,
+	.bit_idx = 24,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mali_1",
+		.ops = &clk_gate_ops,
+		.parent_names = (const char *[]){ "mali_1_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static u32 mux_table_mali[] = {0, 1};
+static const char *gxbb_mali_parent_names[] = {
+	"mali_0", "mali_1"
+};
+
+static struct clk_mux gxbb_mali = {
+	.reg = (void *)HHI_MALI_CLK_CNTL,
+	.mask = 1,
+	.shift = 31,
+	.table = mux_table_mali,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mali",
+		.ops = &clk_mux_ops,
+		.parent_names = gxbb_mali_parent_names,
+		.num_parents = 2,
+		.flags = CLK_SET_RATE_NO_REPARENT,
+	},
+};
+
+static struct clk_mux gxbb_cts_amclk_sel = {
+	.reg = (void *) HHI_AUD_CLK_CNTL,
+	.mask = 0x3,
+	.shift = 9,
+	/* Default parent unknown (register reset value: 0) */
+	.table = (u32[]){ 1, 2, 3 },
+	.lock = &clk_lock,
+		.hw.init = &(struct clk_init_data){
+		.name = "cts_amclk_sel",
+		.ops = &clk_mux_ops,
+		.parent_names = (const char *[]){ "mpll0", "mpll1", "mpll2" },
+		.num_parents = 3,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct meson_clk_audio_divider gxbb_cts_amclk_div = {
+	.div = {
+		.reg_off = HHI_AUD_CLK_CNTL,
+		.shift   = 0,
+		.width   = 8,
+	},
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "cts_amclk_div",
+		.ops = &meson_clk_audio_divider_ops,
+		.parent_names = (const char *[]){ "cts_amclk_sel" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT | CLK_DIVIDER_ROUND_CLOSEST,
+	},
+};
+
+static struct clk_gate gxbb_cts_amclk = {
+	.reg = (void *) HHI_AUD_CLK_CNTL,
+	.bit_idx = 8,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "cts_amclk",
+		.ops = &clk_gate_ops,
+		.parent_names = (const char *[]){ "cts_amclk_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_mux gxbb_cts_mclk_i958_sel = {
+	.reg = (void *)HHI_AUD_CLK_CNTL2,
+	.mask = 0x3,
+	.shift = 25,
+	/* Default parent unknown (register reset value: 0) */
+	.table = (u32[]){ 1, 2, 3 },
+	.lock = &clk_lock,
+		.hw.init = &(struct clk_init_data){
+		.name = "cts_mclk_i958_sel",
+		.ops = &clk_mux_ops,
+		.parent_names = (const char *[]){ "mpll0", "mpll1", "mpll2" },
+		.num_parents = 3,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_divider gxbb_cts_mclk_i958_div = {
+	.reg = (void *)HHI_AUD_CLK_CNTL2,
+	.shift = 16,
+	.width = 8,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "cts_mclk_i958_div",
+		.ops = &clk_divider_ops,
+		.parent_names = (const char *[]){ "cts_mclk_i958_sel" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT | CLK_DIVIDER_ROUND_CLOSEST,
+	},
+};
+
+static struct clk_gate gxbb_cts_mclk_i958 = {
+	.reg = (void *)HHI_AUD_CLK_CNTL2,
+	.bit_idx = 24,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "cts_mclk_i958",
+		.ops = &clk_gate_ops,
+		.parent_names = (const char *[]){ "cts_mclk_i958_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_mux gxbb_cts_i958 = {
+	.reg = (void *)HHI_AUD_CLK_CNTL2,
+	.mask = 0x1,
+	.shift = 27,
+	.lock = &clk_lock,
+		.hw.init = &(struct clk_init_data){
+		.name = "cts_i958",
+		.ops = &clk_mux_ops,
+		.parent_names = (const char *[]){ "cts_amclk", "cts_mclk_i958" },
+		.num_parents = 2,
+		/*
+		 *The parent is specific to origin of the audio data. Let the
+		 * consumer choose the appropriate parent
+		 */
+		.flags = CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+	},
+};
+
 /* Everything Else (EE) domain gates */
 static MESON_GATE(gxbb_ddr, HHI_GCLK_MPEG0, 0);
 static MESON_GATE(gxbb_dos, HHI_GCLK_MPEG0, 1);
@@ -797,6 +1144,140 @@ static struct clk_hw_onecell_data gxbb_hw_onecell_data = {
 		[CLKID_SAR_ADC_CLK]	    = &gxbb_sar_adc_clk.hw,
 		[CLKID_SAR_ADC_SEL]	    = &gxbb_sar_adc_clk_sel.hw,
 		[CLKID_SAR_ADC_DIV]	    = &gxbb_sar_adc_clk_div.hw,
+		[CLKID_MALI_0_SEL]	    = &gxbb_mali_0_sel.hw,
+		[CLKID_MALI_0_DIV]	    = &gxbb_mali_0_div.hw,
+		[CLKID_MALI_0]		    = &gxbb_mali_0.hw,
+		[CLKID_MALI_1_SEL]	    = &gxbb_mali_1_sel.hw,
+		[CLKID_MALI_1_DIV]	    = &gxbb_mali_1_div.hw,
+		[CLKID_MALI_1]		    = &gxbb_mali_1.hw,
+		[CLKID_MALI]		    = &gxbb_mali.hw,
+		[CLKID_CTS_AMCLK]	    = &gxbb_cts_amclk.hw,
+		[CLKID_CTS_AMCLK_SEL]	    = &gxbb_cts_amclk_sel.hw,
+		[CLKID_CTS_AMCLK_DIV]	    = &gxbb_cts_amclk_div.hw,
+		[CLKID_CTS_MCLK_I958]	    = &gxbb_cts_mclk_i958.hw,
+		[CLKID_CTS_MCLK_I958_SEL]   = &gxbb_cts_mclk_i958_sel.hw,
+		[CLKID_CTS_MCLK_I958_DIV]   = &gxbb_cts_mclk_i958_div.hw,
+		[CLKID_CTS_I958]	    = &gxbb_cts_i958.hw,
+	},
+	.num = NR_CLKS,
+};
+
+static struct clk_hw_onecell_data gxl_hw_onecell_data = {
+	.hws = {
+		[CLKID_SYS_PLL]		    = &gxbb_sys_pll.hw,
+		[CLKID_CPUCLK]		    = &gxbb_cpu_clk.hw,
+		[CLKID_HDMI_PLL]	    = &gxbb_hdmi_pll.hw,
+		[CLKID_FIXED_PLL]	    = &gxbb_fixed_pll.hw,
+		[CLKID_FCLK_DIV2]	    = &gxbb_fclk_div2.hw,
+		[CLKID_FCLK_DIV3]	    = &gxbb_fclk_div3.hw,
+		[CLKID_FCLK_DIV4]	    = &gxbb_fclk_div4.hw,
+		[CLKID_FCLK_DIV5]	    = &gxbb_fclk_div5.hw,
+		[CLKID_FCLK_DIV7]	    = &gxbb_fclk_div7.hw,
+		[CLKID_GP0_PLL]		    = &gxl_gp0_pll.hw,
+		[CLKID_MPEG_SEL]	    = &gxbb_mpeg_clk_sel.hw,
+		[CLKID_MPEG_DIV]	    = &gxbb_mpeg_clk_div.hw,
+		[CLKID_CLK81]		    = &gxbb_clk81.hw,
+		[CLKID_MPLL0]		    = &gxbb_mpll0.hw,
+		[CLKID_MPLL1]		    = &gxbb_mpll1.hw,
+		[CLKID_MPLL2]		    = &gxbb_mpll2.hw,
+		[CLKID_DDR]		    = &gxbb_ddr.hw,
+		[CLKID_DOS]		    = &gxbb_dos.hw,
+		[CLKID_ISA]		    = &gxbb_isa.hw,
+		[CLKID_PL301]		    = &gxbb_pl301.hw,
+		[CLKID_PERIPHS]		    = &gxbb_periphs.hw,
+		[CLKID_SPICC]		    = &gxbb_spicc.hw,
+		[CLKID_I2C]		    = &gxbb_i2c.hw,
+		[CLKID_SAR_ADC]		    = &gxbb_sar_adc.hw,
+		[CLKID_SMART_CARD]	    = &gxbb_smart_card.hw,
+		[CLKID_RNG0]		    = &gxbb_rng0.hw,
+		[CLKID_UART0]		    = &gxbb_uart0.hw,
+		[CLKID_SDHC]		    = &gxbb_sdhc.hw,
+		[CLKID_STREAM]		    = &gxbb_stream.hw,
+		[CLKID_ASYNC_FIFO]	    = &gxbb_async_fifo.hw,
+		[CLKID_SDIO]		    = &gxbb_sdio.hw,
+		[CLKID_ABUF]		    = &gxbb_abuf.hw,
+		[CLKID_HIU_IFACE]	    = &gxbb_hiu_iface.hw,
+		[CLKID_ASSIST_MISC]	    = &gxbb_assist_misc.hw,
+		[CLKID_SPI]		    = &gxbb_spi.hw,
+		[CLKID_I2S_SPDIF]	    = &gxbb_i2s_spdif.hw,
+		[CLKID_ETH]		    = &gxbb_eth.hw,
+		[CLKID_DEMUX]		    = &gxbb_demux.hw,
+		[CLKID_AIU_GLUE]	    = &gxbb_aiu_glue.hw,
+		[CLKID_IEC958]		    = &gxbb_iec958.hw,
+		[CLKID_I2S_OUT]		    = &gxbb_i2s_out.hw,
+		[CLKID_AMCLK]		    = &gxbb_amclk.hw,
+		[CLKID_AIFIFO2]		    = &gxbb_aififo2.hw,
+		[CLKID_MIXER]		    = &gxbb_mixer.hw,
+		[CLKID_MIXER_IFACE]	    = &gxbb_mixer_iface.hw,
+		[CLKID_ADC]		    = &gxbb_adc.hw,
+		[CLKID_BLKMV]		    = &gxbb_blkmv.hw,
+		[CLKID_AIU]		    = &gxbb_aiu.hw,
+		[CLKID_UART1]		    = &gxbb_uart1.hw,
+		[CLKID_G2D]		    = &gxbb_g2d.hw,
+		[CLKID_USB0]		    = &gxbb_usb0.hw,
+		[CLKID_USB1]		    = &gxbb_usb1.hw,
+		[CLKID_RESET]		    = &gxbb_reset.hw,
+		[CLKID_NAND]		    = &gxbb_nand.hw,
+		[CLKID_DOS_PARSER]	    = &gxbb_dos_parser.hw,
+		[CLKID_USB]		    = &gxbb_usb.hw,
+		[CLKID_VDIN1]		    = &gxbb_vdin1.hw,
+		[CLKID_AHB_ARB0]	    = &gxbb_ahb_arb0.hw,
+		[CLKID_EFUSE]		    = &gxbb_efuse.hw,
+		[CLKID_BOOT_ROM]	    = &gxbb_boot_rom.hw,
+		[CLKID_AHB_DATA_BUS]	    = &gxbb_ahb_data_bus.hw,
+		[CLKID_AHB_CTRL_BUS]	    = &gxbb_ahb_ctrl_bus.hw,
+		[CLKID_HDMI_INTR_SYNC]	    = &gxbb_hdmi_intr_sync.hw,
+		[CLKID_HDMI_PCLK]	    = &gxbb_hdmi_pclk.hw,
+		[CLKID_USB1_DDR_BRIDGE]	    = &gxbb_usb1_ddr_bridge.hw,
+		[CLKID_USB0_DDR_BRIDGE]	    = &gxbb_usb0_ddr_bridge.hw,
+		[CLKID_MMC_PCLK]	    = &gxbb_mmc_pclk.hw,
+		[CLKID_DVIN]		    = &gxbb_dvin.hw,
+		[CLKID_UART2]		    = &gxbb_uart2.hw,
+		[CLKID_SANA]		    = &gxbb_sana.hw,
+		[CLKID_VPU_INTR]	    = &gxbb_vpu_intr.hw,
+		[CLKID_SEC_AHB_AHB3_BRIDGE] = &gxbb_sec_ahb_ahb3_bridge.hw,
+		[CLKID_CLK81_A53]	    = &gxbb_clk81_a53.hw,
+		[CLKID_VCLK2_VENCI0]	    = &gxbb_vclk2_venci0.hw,
+		[CLKID_VCLK2_VENCI1]	    = &gxbb_vclk2_venci1.hw,
+		[CLKID_VCLK2_VENCP0]	    = &gxbb_vclk2_vencp0.hw,
+		[CLKID_VCLK2_VENCP1]	    = &gxbb_vclk2_vencp1.hw,
+		[CLKID_GCLK_VENCI_INT0]	    = &gxbb_gclk_venci_int0.hw,
+		[CLKID_GCLK_VENCI_INT]	    = &gxbb_gclk_vencp_int.hw,
+		[CLKID_DAC_CLK]		    = &gxbb_dac_clk.hw,
+		[CLKID_AOCLK_GATE]	    = &gxbb_aoclk_gate.hw,
+		[CLKID_IEC958_GATE]	    = &gxbb_iec958_gate.hw,
+		[CLKID_ENC480P]		    = &gxbb_enc480p.hw,
+		[CLKID_RNG1]		    = &gxbb_rng1.hw,
+		[CLKID_GCLK_VENCI_INT1]	    = &gxbb_gclk_venci_int1.hw,
+		[CLKID_VCLK2_VENCLMCC]	    = &gxbb_vclk2_venclmcc.hw,
+		[CLKID_VCLK2_VENCL]	    = &gxbb_vclk2_vencl.hw,
+		[CLKID_VCLK_OTHER]	    = &gxbb_vclk_other.hw,
+		[CLKID_EDP]		    = &gxbb_edp.hw,
+		[CLKID_AO_MEDIA_CPU]	    = &gxbb_ao_media_cpu.hw,
+		[CLKID_AO_AHB_SRAM]	    = &gxbb_ao_ahb_sram.hw,
+		[CLKID_AO_AHB_BUS]	    = &gxbb_ao_ahb_bus.hw,
+		[CLKID_AO_IFACE]	    = &gxbb_ao_iface.hw,
+		[CLKID_AO_I2C]		    = &gxbb_ao_i2c.hw,
+		[CLKID_SD_EMMC_A]	    = &gxbb_emmc_a.hw,
+		[CLKID_SD_EMMC_B]	    = &gxbb_emmc_b.hw,
+		[CLKID_SD_EMMC_C]	    = &gxbb_emmc_c.hw,
+		[CLKID_SAR_ADC_CLK]	    = &gxbb_sar_adc_clk.hw,
+		[CLKID_SAR_ADC_SEL]	    = &gxbb_sar_adc_clk_sel.hw,
+		[CLKID_SAR_ADC_DIV]	    = &gxbb_sar_adc_clk_div.hw,
+		[CLKID_MALI_0_SEL]	    = &gxbb_mali_0_sel.hw,
+		[CLKID_MALI_0_DIV]	    = &gxbb_mali_0_div.hw,
+		[CLKID_MALI_0]		    = &gxbb_mali_0.hw,
+		[CLKID_MALI_1_SEL]	    = &gxbb_mali_1_sel.hw,
+		[CLKID_MALI_1_DIV]	    = &gxbb_mali_1_div.hw,
+		[CLKID_MALI_1]		    = &gxbb_mali_1.hw,
+		[CLKID_MALI]		    = &gxbb_mali.hw,
+		[CLKID_CTS_AMCLK]	    = &gxbb_cts_amclk.hw,
+		[CLKID_CTS_AMCLK_SEL]	    = &gxbb_cts_amclk_sel.hw,
+		[CLKID_CTS_AMCLK_DIV]	    = &gxbb_cts_amclk_div.hw,
+		[CLKID_CTS_MCLK_I958]	    = &gxbb_cts_mclk_i958.hw,
+		[CLKID_CTS_MCLK_I958_SEL]   = &gxbb_cts_mclk_i958_sel.hw,
+		[CLKID_CTS_MCLK_I958_DIV]   = &gxbb_cts_mclk_i958_div.hw,
+		[CLKID_CTS_I958]	    = &gxbb_cts_i958.hw,
 	},
 	.num = NR_CLKS,
 };
@@ -810,13 +1291,20 @@ static struct meson_clk_pll *const gxbb_clk_plls[] = {
 	&gxbb_gp0_pll,
 };
 
+static struct meson_clk_pll *const gxl_clk_plls[] = {
+	&gxbb_fixed_pll,
+	&gxbb_hdmi_pll,
+	&gxbb_sys_pll,
+	&gxl_gp0_pll,
+};
+
 static struct meson_clk_mpll *const gxbb_clk_mplls[] = {
 	&gxbb_mpll0,
 	&gxbb_mpll1,
 	&gxbb_mpll2,
 };
 
-static struct clk_gate *gxbb_clk_gates[] = {
+static struct clk_gate *const gxbb_clk_gates[] = {
 	&gxbb_clk81,
 	&gxbb_ddr,
 	&gxbb_dos,
@@ -900,16 +1388,105 @@ static struct clk_gate *gxbb_clk_gates[] = {
 	&gxbb_emmc_b,
 	&gxbb_emmc_c,
 	&gxbb_sar_adc_clk,
+	&gxbb_mali_0,
+	&gxbb_mali_1,
+	&gxbb_cts_amclk,
+	&gxbb_cts_mclk_i958,
+};
+
+static struct clk_mux *const gxbb_clk_muxes[] = {
+	&gxbb_mpeg_clk_sel,
+	&gxbb_sar_adc_clk_sel,
+	&gxbb_mali_0_sel,
+	&gxbb_mali_1_sel,
+	&gxbb_mali,
+	&gxbb_cts_amclk_sel,
+	&gxbb_cts_mclk_i958_sel,
+	&gxbb_cts_i958,
+};
+
+static struct clk_divider *const gxbb_clk_dividers[] = {
+	&gxbb_mpeg_clk_div,
+	&gxbb_sar_adc_clk_div,
+	&gxbb_mali_0_div,
+	&gxbb_mali_1_div,
+	&gxbb_cts_mclk_i958_div,
+};
+
+static struct meson_clk_audio_divider *const gxbb_audio_dividers[] = {
+	&gxbb_cts_amclk_div,
+};
+
+struct clkc_data {
+	struct clk_gate *const *clk_gates;
+	unsigned int clk_gates_count;
+	struct meson_clk_mpll *const *clk_mplls;
+	unsigned int clk_mplls_count;
+	struct meson_clk_pll *const *clk_plls;
+	unsigned int clk_plls_count;
+	struct clk_mux *const *clk_muxes;
+	unsigned int clk_muxes_count;
+	struct clk_divider *const *clk_dividers;
+	unsigned int clk_dividers_count;
+	struct meson_clk_audio_divider *const *clk_audio_dividers;
+	unsigned int clk_audio_dividers_count;
+	struct meson_clk_cpu *cpu_clk;
+	struct clk_hw_onecell_data *hw_onecell_data;
+};
+
+static const struct clkc_data gxbb_clkc_data = {
+	.clk_gates = gxbb_clk_gates,
+	.clk_gates_count = ARRAY_SIZE(gxbb_clk_gates),
+	.clk_mplls = gxbb_clk_mplls,
+	.clk_mplls_count = ARRAY_SIZE(gxbb_clk_mplls),
+	.clk_plls = gxbb_clk_plls,
+	.clk_plls_count = ARRAY_SIZE(gxbb_clk_plls),
+	.clk_muxes = gxbb_clk_muxes,
+	.clk_muxes_count = ARRAY_SIZE(gxbb_clk_muxes),
+	.clk_dividers = gxbb_clk_dividers,
+	.clk_dividers_count = ARRAY_SIZE(gxbb_clk_dividers),
+	.clk_audio_dividers = gxbb_audio_dividers,
+	.clk_audio_dividers_count = ARRAY_SIZE(gxbb_audio_dividers),
+	.cpu_clk = &gxbb_cpu_clk,
+	.hw_onecell_data = &gxbb_hw_onecell_data,
+};
+
+static const struct clkc_data gxl_clkc_data = {
+	.clk_gates = gxbb_clk_gates,
+	.clk_gates_count = ARRAY_SIZE(gxbb_clk_gates),
+	.clk_mplls = gxbb_clk_mplls,
+	.clk_mplls_count = ARRAY_SIZE(gxbb_clk_mplls),
+	.clk_plls = gxl_clk_plls,
+	.clk_plls_count = ARRAY_SIZE(gxl_clk_plls),
+	.clk_muxes = gxbb_clk_muxes,
+	.clk_muxes_count = ARRAY_SIZE(gxbb_clk_muxes),
+	.clk_dividers = gxbb_clk_dividers,
+	.clk_dividers_count = ARRAY_SIZE(gxbb_clk_dividers),
+	.clk_audio_dividers = gxbb_audio_dividers,
+	.clk_audio_dividers_count = ARRAY_SIZE(gxbb_audio_dividers),
+	.cpu_clk = &gxbb_cpu_clk,
+	.hw_onecell_data = &gxl_hw_onecell_data,
+};
+
+static const struct of_device_id clkc_match_table[] = {
+	{ .compatible = "amlogic,gxbb-clkc", .data = &gxbb_clkc_data },
+	{ .compatible = "amlogic,gxl-clkc", .data = &gxl_clkc_data },
+	{},
 };
 
 static int gxbb_clkc_probe(struct platform_device *pdev)
 {
+	const struct clkc_data *clkc_data;
 	void __iomem *clk_base;
 	int ret, clkid, i;
 	struct clk_hw *parent_hw;
 	struct clk *parent_clk;
 	struct device *dev = &pdev->dev;
 
+	clkc_data = of_device_get_match_data(&pdev->dev);
+	if (!clkc_data)
+		return -EINVAL;
+
 	/*  Generic clocks and PLLs */
 	clk_base = of_iomap(dev->of_node, 0);
 	if (!clk_base) {
@@ -918,34 +1495,45 @@ static int gxbb_clkc_probe(struct platform_device *pdev)
 	}
 
 	/* Populate base address for PLLs */
-	for (i = 0; i < ARRAY_SIZE(gxbb_clk_plls); i++)
-		gxbb_clk_plls[i]->base = clk_base;
+	for (i = 0; i < clkc_data->clk_plls_count; i++)
+		clkc_data->clk_plls[i]->base = clk_base;
 
 	/* Populate base address for MPLLs */
-	for (i = 0; i < ARRAY_SIZE(gxbb_clk_mplls); i++)
-		gxbb_clk_mplls[i]->base = clk_base;
+	for (i = 0; i < clkc_data->clk_mplls_count; i++)
+		clkc_data->clk_mplls[i]->base = clk_base;
 
 	/* Populate the base address for CPU clk */
-	gxbb_cpu_clk.base = clk_base;
+	clkc_data->cpu_clk->base = clk_base;
+
+	/* Populate base address for gates */
+	for (i = 0; i < clkc_data->clk_gates_count; i++)
+		clkc_data->clk_gates[i]->reg = clk_base +
+			(u64)clkc_data->clk_gates[i]->reg;
 
-	/* Populate the base address for the MPEG clks */
-	gxbb_mpeg_clk_sel.reg = clk_base + (u64)gxbb_mpeg_clk_sel.reg;
-	gxbb_mpeg_clk_div.reg = clk_base + (u64)gxbb_mpeg_clk_div.reg;
+	/* Populate base address for muxes */
+	for (i = 0; i < clkc_data->clk_muxes_count; i++)
+		clkc_data->clk_muxes[i]->reg = clk_base +
+			(u64)clkc_data->clk_muxes[i]->reg;
 
-	/* Populate the base address for the SAR ADC clks */
-	gxbb_sar_adc_clk_sel.reg = clk_base + (u64)gxbb_sar_adc_clk_sel.reg;
-	gxbb_sar_adc_clk_div.reg = clk_base + (u64)gxbb_sar_adc_clk_div.reg;
+	/* Populate base address for dividers */
+	for (i = 0; i < clkc_data->clk_dividers_count; i++)
+		clkc_data->clk_dividers[i]->reg = clk_base +
+			(u64)clkc_data->clk_dividers[i]->reg;
 
-	/* Populate base address for gates */
-	for (i = 0; i < ARRAY_SIZE(gxbb_clk_gates); i++)
-		gxbb_clk_gates[i]->reg = clk_base +
-			(u64)gxbb_clk_gates[i]->reg;
+	/* Populate base address for the audio dividers */
+	for (i = 0; i < clkc_data->clk_audio_dividers_count; i++)
+		clkc_data->clk_audio_dividers[i]->base = clk_base;
 
 	/*
 	 * register all clks
 	 */
-	for (clkid = 0; clkid < NR_CLKS; clkid++) {
-		ret = devm_clk_hw_register(dev, gxbb_hw_onecell_data.hws[clkid]);
+	for (clkid = 0; clkid < clkc_data->hw_onecell_data->num; clkid++) {
+		/* array might be sparse */
+		if (!clkc_data->hw_onecell_data->hws[clkid])
+			continue;
+
+		ret = devm_clk_hw_register(dev,
+					clkc_data->hw_onecell_data->hws[clkid]);
 		if (ret)
 			goto iounmap;
 	}
@@ -964,9 +1552,9 @@ static int gxbb_clkc_probe(struct platform_device *pdev)
 	 * a new clk_hw, and this hack will no longer work. Releasing the ccr
 	 * feature before that time solves the problem :-)
 	 */
-	parent_hw = clk_hw_get_parent(&gxbb_cpu_clk.hw);
+	parent_hw = clk_hw_get_parent(&clkc_data->cpu_clk->hw);
 	parent_clk = parent_hw->clk;
-	ret = clk_notifier_register(parent_clk, &gxbb_cpu_clk.clk_nb);
+	ret = clk_notifier_register(parent_clk, &clkc_data->cpu_clk->clk_nb);
 	if (ret) {
 		pr_err("%s: failed to register clock notifier for cpu_clk\n",
 				__func__);
@@ -974,23 +1562,18 @@ static int gxbb_clkc_probe(struct platform_device *pdev)
 	}
 
 	return of_clk_add_hw_provider(dev->of_node, of_clk_hw_onecell_get,
-			&gxbb_hw_onecell_data);
+			clkc_data->hw_onecell_data);
 
 iounmap:
 	iounmap(clk_base);
 	return ret;
 }
 
-static const struct of_device_id gxbb_clkc_match_table[] = {
-	{ .compatible = "amlogic,gxbb-clkc" },
-	{ }
-};
-
 static struct platform_driver gxbb_driver = {
 	.probe		= gxbb_clkc_probe,
 	.driver		= {
 		.name	= "gxbb-clkc",
-		.of_match_table = gxbb_clkc_match_table,
+		.of_match_table = clkc_match_table,
 	},
 };
 
diff --git a/drivers/clk/meson/gxbb.h b/drivers/clk/meson/gxbb.h
index 945aefa4d251..93b8f07ee7af 100644
--- a/drivers/clk/meson/gxbb.h
+++ b/drivers/clk/meson/gxbb.h
@@ -71,6 +71,8 @@
 #define HHI_GP0_PLL_CNTL2		0x44 /* 0x11 offset in data sheet */
 #define HHI_GP0_PLL_CNTL3		0x48 /* 0x12 offset in data sheet */
 #define HHI_GP0_PLL_CNTL4		0x4c /* 0x13 offset in data sheet */
+#define	HHI_GP0_PLL_CNTL5		0x50 /* 0x14 offset in data sheet */
+#define	HHI_GP0_PLL_CNTL1		0x58 /* 0x16 offset in data sheet */
 
 #define HHI_XTAL_DIVN_CNTL		0xbc /* 0x2f offset in data sheet */
 #define HHI_TIMER90K			0xec /* 0x3b offset in data sheet */
@@ -275,8 +277,15 @@
 #define CLKID_MALI_1_DIV	 104
 /* CLKID_MALI_1	*/
 /* CLKID_MALI	*/
+#define CLKID_CTS_AMCLK		  107
+#define CLKID_CTS_AMCLK_SEL	  108
+#define CLKID_CTS_AMCLK_DIV	  109
+#define CLKID_CTS_MCLK_I958	  110
+#define CLKID_CTS_MCLK_I958_SEL	  111
+#define CLKID_CTS_MCLK_I958_DIV	  112
+#define CLKID_CTS_I958		  113
 
-#define NR_CLKS			  107
+#define NR_CLKS			  114
 
 /* include the CLKIDs that have been made part of the stable DT binding */
 #include <dt-bindings/clock/gxbb-clkc.h>
diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c
index 888494d4fb8a..e9985503165c 100644
--- a/drivers/clk/meson/meson8b.c
+++ b/drivers/clk/meson/meson8b.c
@@ -245,6 +245,96 @@ static struct clk_fixed_factor meson8b_fclk_div7 = {
 	},
 };
 
+static struct meson_clk_mpll meson8b_mpll0 = {
+	.sdm = {
+		.reg_off = HHI_MPLL_CNTL7,
+		.shift   = 0,
+		.width   = 14,
+	},
+	.sdm_en = {
+		.reg_off = HHI_MPLL_CNTL7,
+		.shift   = 15,
+		.width   = 1,
+	},
+	.n2 = {
+		.reg_off = HHI_MPLL_CNTL7,
+		.shift   = 16,
+		.width   = 9,
+	},
+	.en = {
+		.reg_off = HHI_MPLL_CNTL7,
+		.shift   = 14,
+		.width   = 1,
+	},
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll0",
+		.ops = &meson_clk_mpll_ops,
+		.parent_names = (const char *[]){ "fixed_pll" },
+		.num_parents = 1,
+	},
+};
+
+static struct meson_clk_mpll meson8b_mpll1 = {
+	.sdm = {
+		.reg_off = HHI_MPLL_CNTL8,
+		.shift   = 0,
+		.width   = 14,
+	},
+	.sdm_en = {
+		.reg_off = HHI_MPLL_CNTL8,
+		.shift   = 15,
+		.width   = 1,
+	},
+	.n2 = {
+		.reg_off = HHI_MPLL_CNTL8,
+		.shift   = 16,
+		.width   = 9,
+	},
+	.en = {
+		.reg_off = HHI_MPLL_CNTL8,
+		.shift   = 14,
+		.width   = 1,
+	},
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll1",
+		.ops = &meson_clk_mpll_ops,
+		.parent_names = (const char *[]){ "fixed_pll" },
+		.num_parents = 1,
+	},
+};
+
+static struct meson_clk_mpll meson8b_mpll2 = {
+	.sdm = {
+		.reg_off = HHI_MPLL_CNTL9,
+		.shift   = 0,
+		.width   = 14,
+	},
+	.sdm_en = {
+		.reg_off = HHI_MPLL_CNTL9,
+		.shift   = 15,
+		.width   = 1,
+	},
+	.n2 = {
+		.reg_off = HHI_MPLL_CNTL9,
+		.shift   = 16,
+		.width   = 9,
+	},
+	.en = {
+		.reg_off = HHI_MPLL_CNTL9,
+		.shift   = 14,
+		.width   = 1,
+	},
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll2",
+		.ops = &meson_clk_mpll_ops,
+		.parent_names = (const char *[]){ "fixed_pll" },
+		.num_parents = 1,
+	},
+};
+
 /*
  * FIXME cpu clocks and the legacy composite clocks (e.g. clk81) are both PLL
  * post-dividers and should be modeled with their respective PLLs via the
@@ -491,6 +581,9 @@ static struct clk_hw_onecell_data meson8b_hw_onecell_data = {
 		[CLKID_AO_AHB_SRAM]	    = &meson8b_ao_ahb_sram.hw,
 		[CLKID_AO_AHB_BUS]	    = &meson8b_ao_ahb_bus.hw,
 		[CLKID_AO_IFACE]	    = &meson8b_ao_iface.hw,
+		[CLKID_MPLL0]		    = &meson8b_mpll0.hw,
+		[CLKID_MPLL1]		    = &meson8b_mpll1.hw,
+		[CLKID_MPLL2]		    = &meson8b_mpll2.hw,
 	},
 	.num = CLK_NR_CLKS,
 };
@@ -501,7 +594,13 @@ static struct meson_clk_pll *const meson8b_clk_plls[] = {
 	&meson8b_sys_pll,
 };
 
-static struct clk_gate *meson8b_clk_gates[] = {
+static struct meson_clk_mpll *const meson8b_clk_mplls[] = {
+	&meson8b_mpll0,
+	&meson8b_mpll1,
+	&meson8b_mpll2,
+};
+
+static struct clk_gate *const meson8b_clk_gates[] = {
 	&meson8b_clk81,
 	&meson8b_ddr,
 	&meson8b_dos,
@@ -582,6 +681,14 @@ static struct clk_gate *meson8b_clk_gates[] = {
 	&meson8b_ao_iface,
 };
 
+static struct clk_mux *const meson8b_clk_muxes[] = {
+	&meson8b_mpeg_clk_sel,
+};
+
+static struct clk_divider *const meson8b_clk_dividers[] = {
+	&meson8b_mpeg_clk_div,
+};
+
 static int meson8b_clkc_probe(struct platform_device *pdev)
 {
 	void __iomem *clk_base;
@@ -601,18 +708,28 @@ static int meson8b_clkc_probe(struct platform_device *pdev)
 	for (i = 0; i < ARRAY_SIZE(meson8b_clk_plls); i++)
 		meson8b_clk_plls[i]->base = clk_base;
 
+	/* Populate base address for MPLLs */
+	for (i = 0; i < ARRAY_SIZE(meson8b_clk_mplls); i++)
+		meson8b_clk_mplls[i]->base = clk_base;
+
 	/* Populate the base address for CPU clk */
 	meson8b_cpu_clk.base = clk_base;
 
-	/* Populate the base address for the MPEG clks */
-	meson8b_mpeg_clk_sel.reg = clk_base + (u32)meson8b_mpeg_clk_sel.reg;
-	meson8b_mpeg_clk_div.reg = clk_base + (u32)meson8b_mpeg_clk_div.reg;
-
 	/* Populate base address for gates */
 	for (i = 0; i < ARRAY_SIZE(meson8b_clk_gates); i++)
 		meson8b_clk_gates[i]->reg = clk_base +
 			(u32)meson8b_clk_gates[i]->reg;
 
+	/* Populate base address for muxes */
+	for (i = 0; i < ARRAY_SIZE(meson8b_clk_muxes); i++)
+		meson8b_clk_muxes[i]->reg = clk_base +
+			(u32)meson8b_clk_muxes[i]->reg;
+
+	/* Populate base address for dividers */
+	for (i = 0; i < ARRAY_SIZE(meson8b_clk_dividers); i++)
+		meson8b_clk_dividers[i]->reg = clk_base +
+			(u32)meson8b_clk_dividers[i]->reg;
+
 	/*
 	 * register all clks
 	 * CLKID_UNUSED = 0, so skip it and start with CLKID_XTAL = 1
diff --git a/drivers/clk/meson/meson8b.h b/drivers/clk/meson/meson8b.h
index 010e9582888d..3881defc8644 100644
--- a/drivers/clk/meson/meson8b.h
+++ b/drivers/clk/meson/meson8b.h
@@ -42,6 +42,21 @@
 #define HHI_VID_PLL_CNTL		0x320 /* 0xc8 offset in data sheet */
 
 /*
+ * MPLL register offeset taken from the S905 datasheet. Vendor kernel source
+ * confirm these are the same for the S805.
+ */
+#define HHI_MPLL_CNTL			0x280 /* 0xa0 offset in data sheet */
+#define HHI_MPLL_CNTL2			0x284 /* 0xa1 offset in data sheet */
+#define HHI_MPLL_CNTL3			0x288 /* 0xa2 offset in data sheet */
+#define HHI_MPLL_CNTL4			0x28C /* 0xa3 offset in data sheet */
+#define HHI_MPLL_CNTL5			0x290 /* 0xa4 offset in data sheet */
+#define HHI_MPLL_CNTL6			0x294 /* 0xa5 offset in data sheet */
+#define HHI_MPLL_CNTL7			0x298 /* 0xa6 offset in data sheet */
+#define HHI_MPLL_CNTL8			0x29C /* 0xa7 offset in data sheet */
+#define HHI_MPLL_CNTL9			0x2A0 /* 0xa8 offset in data sheet */
+#define HHI_MPLL_CNTL10			0x2A4 /* 0xa9 offset in data sheet */
+
+/*
  * CLKID index values
  *
  * These indices are entirely contrived and do not map onto the hardware.
@@ -142,8 +157,11 @@
 #define CLKID_AO_AHB_SRAM	90
 #define CLKID_AO_AHB_BUS	91
 #define CLKID_AO_IFACE		92
+#define CLKID_MPLL0		93
+#define CLKID_MPLL1		94
+#define CLKID_MPLL2		95
 
-#define CLK_NR_CLKS		93
+#define CLK_NR_CLKS		96
 
 /* include the CLKIDs that have been made part of the stable DT binding */
 #include <dt-bindings/clock/meson8b-clkc.h>
diff --git a/drivers/clk/mvebu/clk-cpu.c b/drivers/clk/mvebu/clk-cpu.c
index 044892b6534d..072aa38374ce 100644
--- a/drivers/clk/mvebu/clk-cpu.c
+++ b/drivers/clk/mvebu/clk-cpu.c
@@ -186,11 +186,11 @@ static void __init of_cpu_clk_setup(struct device_node *node)
 	for_each_node_by_type(dn, "cpu")
 		ncpus++;
 
-	cpuclk = kzalloc(ncpus * sizeof(*cpuclk), GFP_KERNEL);
+	cpuclk = kcalloc(ncpus, sizeof(*cpuclk), GFP_KERNEL);
 	if (WARN_ON(!cpuclk))
 		goto cpuclk_out;
 
-	clks = kzalloc(ncpus * sizeof(*clks), GFP_KERNEL);
+	clks = kcalloc(ncpus, sizeof(*clks), GFP_KERNEL);
 	if (WARN_ON(!clks))
 		goto clks_out;
 
diff --git a/drivers/clk/mvebu/common.c b/drivers/clk/mvebu/common.c
index 66be2e0c82b4..472c88b90256 100644
--- a/drivers/clk/mvebu/common.c
+++ b/drivers/clk/mvebu/common.c
@@ -126,7 +126,7 @@ void __init mvebu_coreclk_setup(struct device_node *np,
 	if (desc->get_refclk_freq)
 		clk_data.clk_num += 1;
 
-	clk_data.clks = kzalloc(clk_data.clk_num * sizeof(struct clk *),
+	clk_data.clks = kcalloc(clk_data.clk_num, sizeof(*clk_data.clks),
 				GFP_KERNEL);
 	if (WARN_ON(!clk_data.clks)) {
 		iounmap(base);
@@ -270,7 +270,7 @@ void __init mvebu_clk_gating_setup(struct device_node *np,
 		n++;
 
 	ctrl->num_gates = n;
-	ctrl->gates = kzalloc(ctrl->num_gates * sizeof(struct clk *),
+	ctrl->gates = kcalloc(ctrl->num_gates, sizeof(*ctrl->gates),
 			      GFP_KERNEL);
 	if (WARN_ON(!ctrl->gates))
 		goto gates_out;
diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c
index 3487c267833e..d990fe44aef3 100644
--- a/drivers/clk/qcom/clk-smd-rpm.c
+++ b/drivers/clk/qcom/clk-smd-rpm.c
@@ -165,7 +165,7 @@ static int clk_smd_rpm_handoff(struct clk_smd_rpm *r)
 	struct clk_smd_rpm_req req = {
 		.key = cpu_to_le32(r->rpm_key),
 		.nbytes = cpu_to_le32(sizeof(u32)),
-		.value = cpu_to_le32(INT_MAX),
+		.value = cpu_to_le32(r->branch ? 1 : INT_MAX),
 	};
 
 	ret = qcom_rpm_smd_write(r->rpm, QCOM_SMD_RPM_ACTIVE_STATE,
diff --git a/drivers/clk/qcom/mmcc-msm8996.c b/drivers/clk/qcom/mmcc-msm8996.c
index 9b97246287a7..352394d8fd8c 100644
--- a/drivers/clk/qcom/mmcc-msm8996.c
+++ b/drivers/clk/qcom/mmcc-msm8996.c
@@ -2944,6 +2944,7 @@ static struct gdsc venus_core0_gdsc = {
 	.pd = {
 		.name = "venus_core0",
 	},
+	.parent = &venus_gdsc.pd,
 	.pwrsts = PWRSTS_OFF_ON,
 	.flags = HW_CTRL,
 };
@@ -2955,6 +2956,7 @@ static struct gdsc venus_core1_gdsc = {
 	.pd = {
 		.name = "venus_core1",
 	},
+	.parent = &venus_gdsc.pd,
 	.pwrsts = PWRSTS_OFF_ON,
 	.flags = HW_CTRL,
 };
@@ -2986,7 +2988,7 @@ static struct gdsc vfe1_gdsc = {
 	.cxcs = (unsigned int []){ 0x36ac },
 	.cxc_count = 1,
 	.pd = {
-		.name = "vfe0",
+		.name = "vfe1",
 	},
 	.parent = &camss_gdsc.pd,
 	.pwrsts = PWRSTS_OFF_ON,
diff --git a/drivers/clk/renesas/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c
index bfffdb00df97..eaa98b488f01 100644
--- a/drivers/clk/renesas/r8a7795-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/soc/renesas/rcar-rst.h>
+#include <linux/sys_soc.h>
 
 #include <dt-bindings/clock/r8a7795-cpg-mssr.h>
 
@@ -24,7 +25,7 @@
 
 enum clk_ids {
 	/* Core Clock Outputs exported to DT */
-	LAST_DT_CORE_CLK = R8A7795_CLK_OSC,
+	LAST_DT_CORE_CLK = R8A7795_CLK_S0D12,
 
 	/* External Input Clocks */
 	CLK_EXTAL,
@@ -51,10 +52,10 @@ enum clk_ids {
 	MOD_CLK_BASE
 };
 
-static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
+static struct cpg_core_clk r8a7795_core_clks[] __initdata = {
 	/* External Clock Inputs */
-	DEF_INPUT("extal",  CLK_EXTAL),
-	DEF_INPUT("extalr", CLK_EXTALR),
+	DEF_INPUT("extal",      CLK_EXTAL),
+	DEF_INPUT("extalr",     CLK_EXTALR),
 
 	/* Internal Core Clocks */
 	DEF_BASE(".main",       CLK_MAIN, CLK_TYPE_GEN3_MAIN, CLK_EXTAL),
@@ -78,7 +79,12 @@ static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
 	DEF_FIXED("zt",         R8A7795_CLK_ZT,    CLK_PLL1_DIV2,  4, 1),
 	DEF_FIXED("zx",         R8A7795_CLK_ZX,    CLK_PLL1_DIV2,  2, 1),
 	DEF_FIXED("s0d1",       R8A7795_CLK_S0D1,  CLK_S0,         1, 1),
+	DEF_FIXED("s0d2",       R8A7795_CLK_S0D2,  CLK_S0,         2, 1),
+	DEF_FIXED("s0d3",       R8A7795_CLK_S0D3,  CLK_S0,         3, 1),
 	DEF_FIXED("s0d4",       R8A7795_CLK_S0D4,  CLK_S0,         4, 1),
+	DEF_FIXED("s0d6",       R8A7795_CLK_S0D6,  CLK_S0,         6, 1),
+	DEF_FIXED("s0d8",       R8A7795_CLK_S0D8,  CLK_S0,         8, 1),
+	DEF_FIXED("s0d12",      R8A7795_CLK_S0D12, CLK_S0,        12, 1),
 	DEF_FIXED("s1d1",       R8A7795_CLK_S1D1,  CLK_S1,         1, 1),
 	DEF_FIXED("s1d2",       R8A7795_CLK_S1D2,  CLK_S1,         2, 1),
 	DEF_FIXED("s1d4",       R8A7795_CLK_S1D4,  CLK_S1,         4, 1),
@@ -89,29 +95,29 @@ static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
 	DEF_FIXED("s3d2",       R8A7795_CLK_S3D2,  CLK_S3,         2, 1),
 	DEF_FIXED("s3d4",       R8A7795_CLK_S3D4,  CLK_S3,         4, 1),
 
-	DEF_GEN3_SD("sd0",      R8A7795_CLK_SD0,   CLK_SDSRC,     0x0074),
-	DEF_GEN3_SD("sd1",      R8A7795_CLK_SD1,   CLK_SDSRC,     0x0078),
-	DEF_GEN3_SD("sd2",      R8A7795_CLK_SD2,   CLK_SDSRC,     0x0268),
-	DEF_GEN3_SD("sd3",      R8A7795_CLK_SD3,   CLK_SDSRC,     0x026c),
+	DEF_GEN3_SD("sd0",      R8A7795_CLK_SD0,   CLK_SDSRC,     0x074),
+	DEF_GEN3_SD("sd1",      R8A7795_CLK_SD1,   CLK_SDSRC,     0x078),
+	DEF_GEN3_SD("sd2",      R8A7795_CLK_SD2,   CLK_SDSRC,     0x268),
+	DEF_GEN3_SD("sd3",      R8A7795_CLK_SD3,   CLK_SDSRC,     0x26c),
 
 	DEF_FIXED("cl",         R8A7795_CLK_CL,    CLK_PLL1_DIV2, 48, 1),
 	DEF_FIXED("cp",         R8A7795_CLK_CP,    CLK_EXTAL,      2, 1),
 
-	DEF_DIV6P1("mso",       R8A7795_CLK_MSO,   CLK_PLL1_DIV4, 0x014),
-	DEF_DIV6P1("hdmi",      R8A7795_CLK_HDMI,  CLK_PLL1_DIV4, 0x250),
 	DEF_DIV6P1("canfd",     R8A7795_CLK_CANFD, CLK_PLL1_DIV4, 0x244),
 	DEF_DIV6P1("csi0",      R8A7795_CLK_CSI0,  CLK_PLL1_DIV4, 0x00c),
+	DEF_DIV6P1("mso",       R8A7795_CLK_MSO,   CLK_PLL1_DIV4, 0x014),
+	DEF_DIV6P1("hdmi",      R8A7795_CLK_HDMI,  CLK_PLL1_DIV4, 0x250),
 
-	DEF_DIV6_RO("osc",      R8A7795_CLK_OSC,   CLK_EXTAL, CPG_RCKCR, 8),
+	DEF_DIV6_RO("osc",      R8A7795_CLK_OSC,   CLK_EXTAL, CPG_RCKCR,  8),
 	DEF_DIV6_RO("r_int",    CLK_RINT,          CLK_EXTAL, CPG_RCKCR, 32),
 
-	DEF_BASE("r",           R8A7795_CLK_R, CLK_TYPE_GEN3_R, CLK_RINT),
+	DEF_BASE("r",           R8A7795_CLK_R,     CLK_TYPE_GEN3_R, CLK_RINT),
 };
 
-static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
-	DEF_MOD("fdp1-2",		 117,	R8A7795_CLK_S2D1),
-	DEF_MOD("fdp1-1",		 118,	R8A7795_CLK_S2D1),
-	DEF_MOD("fdp1-0",		 119,	R8A7795_CLK_S2D1),
+static struct mssr_mod_clk r8a7795_mod_clks[] __initdata = {
+	DEF_MOD("fdp1-2",		 117,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("fdp1-1",		 118,	R8A7795_CLK_S0D1),
+	DEF_MOD("fdp1-0",		 119,	R8A7795_CLK_S0D1),
 	DEF_MOD("scif5",		 202,	R8A7795_CLK_S3D4),
 	DEF_MOD("scif4",		 203,	R8A7795_CLK_S3D4),
 	DEF_MOD("scif3",		 204,	R8A7795_CLK_S3D4),
@@ -121,9 +127,9 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("msiof2",		 209,	R8A7795_CLK_MSO),
 	DEF_MOD("msiof1",		 210,	R8A7795_CLK_MSO),
 	DEF_MOD("msiof0",		 211,	R8A7795_CLK_MSO),
-	DEF_MOD("sys-dmac2",		 217,	R8A7795_CLK_S3D1),
-	DEF_MOD("sys-dmac1",		 218,	R8A7795_CLK_S3D1),
-	DEF_MOD("sys-dmac0",		 219,	R8A7795_CLK_S3D1),
+	DEF_MOD("sys-dmac2",		 217,	R8A7795_CLK_S0D3),
+	DEF_MOD("sys-dmac1",		 218,	R8A7795_CLK_S0D3),
+	DEF_MOD("sys-dmac0",		 219,	R8A7795_CLK_S0D3),
 	DEF_MOD("cmt3",			 300,	R8A7795_CLK_R),
 	DEF_MOD("cmt2",			 301,	R8A7795_CLK_R),
 	DEF_MOD("cmt1",			 302,	R8A7795_CLK_R),
@@ -135,15 +141,15 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("sdif0",		 314,	R8A7795_CLK_SD0),
 	DEF_MOD("pcie1",		 318,	R8A7795_CLK_S3D1),
 	DEF_MOD("pcie0",		 319,	R8A7795_CLK_S3D1),
-	DEF_MOD("usb3-if1",		 327,	R8A7795_CLK_S3D1),
+	DEF_MOD("usb3-if1",		 327,	R8A7795_CLK_S3D1), /* ES1.x */
 	DEF_MOD("usb3-if0",		 328,	R8A7795_CLK_S3D1),
 	DEF_MOD("usb-dmac0",		 330,	R8A7795_CLK_S3D1),
 	DEF_MOD("usb-dmac1",		 331,	R8A7795_CLK_S3D1),
-	DEF_MOD("rwdt0",		 402,	R8A7795_CLK_R),
+	DEF_MOD("rwdt",			 402,	R8A7795_CLK_R),
 	DEF_MOD("intc-ex",		 407,	R8A7795_CLK_CP),
 	DEF_MOD("intc-ap",		 408,	R8A7795_CLK_S3D1),
-	DEF_MOD("audmac0",		 502,	R8A7795_CLK_S3D4),
-	DEF_MOD("audmac1",		 501,	R8A7795_CLK_S3D4),
+	DEF_MOD("audmac1",		 501,	R8A7795_CLK_S0D3),
+	DEF_MOD("audmac0",		 502,	R8A7795_CLK_S0D3),
 	DEF_MOD("drif7",		 508,	R8A7795_CLK_S3D2),
 	DEF_MOD("drif6",		 509,	R8A7795_CLK_S3D2),
 	DEF_MOD("drif5",		 510,	R8A7795_CLK_S3D2),
@@ -159,35 +165,35 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("hscif0",		 520,	R8A7795_CLK_S3D1),
 	DEF_MOD("thermal",		 522,	R8A7795_CLK_CP),
 	DEF_MOD("pwm",			 523,	R8A7795_CLK_S3D4),
-	DEF_MOD("fcpvd3",		 600,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvd2",		 601,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvd1",		 602,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvd0",		 603,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvb1",		 606,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvb0",		 607,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvi2",		 609,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvi1",		 610,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvi0",		 611,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpf2",		 613,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpf1",		 614,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpf0",		 615,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpci1",		 616,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpci0",		 617,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpcs",		 619,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspd3",		 620,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspd2",		 621,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspd1",		 622,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspd0",		 623,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspbc",		 624,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspbd",		 626,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspi2",		 629,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspi1",		 630,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspi0",		 631,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvd3",		 600,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("fcpvd2",		 601,	R8A7795_CLK_S0D2),
+	DEF_MOD("fcpvd1",		 602,	R8A7795_CLK_S0D2),
+	DEF_MOD("fcpvd0",		 603,	R8A7795_CLK_S0D2),
+	DEF_MOD("fcpvb1",		 606,	R8A7795_CLK_S0D1),
+	DEF_MOD("fcpvb0",		 607,	R8A7795_CLK_S0D1),
+	DEF_MOD("fcpvi2",		 609,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("fcpvi1",		 610,	R8A7795_CLK_S0D1),
+	DEF_MOD("fcpvi0",		 611,	R8A7795_CLK_S0D1),
+	DEF_MOD("fcpf2",		 613,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("fcpf1",		 614,	R8A7795_CLK_S0D1),
+	DEF_MOD("fcpf0",		 615,	R8A7795_CLK_S0D1),
+	DEF_MOD("fcpci1",		 616,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("fcpci0",		 617,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("fcpcs",		 619,	R8A7795_CLK_S0D1),
+	DEF_MOD("vspd3",		 620,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("vspd2",		 621,	R8A7795_CLK_S0D2),
+	DEF_MOD("vspd1",		 622,	R8A7795_CLK_S0D2),
+	DEF_MOD("vspd0",		 623,	R8A7795_CLK_S0D2),
+	DEF_MOD("vspbc",		 624,	R8A7795_CLK_S0D1),
+	DEF_MOD("vspbd",		 626,	R8A7795_CLK_S0D1),
+	DEF_MOD("vspi2",		 629,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("vspi1",		 630,	R8A7795_CLK_S0D1),
+	DEF_MOD("vspi0",		 631,	R8A7795_CLK_S0D1),
 	DEF_MOD("ehci2",		 701,	R8A7795_CLK_S3D4),
 	DEF_MOD("ehci1",		 702,	R8A7795_CLK_S3D4),
 	DEF_MOD("ehci0",		 703,	R8A7795_CLK_S3D4),
 	DEF_MOD("hsusb",		 704,	R8A7795_CLK_S3D4),
-	DEF_MOD("csi21",		 713,	R8A7795_CLK_CSI0),
+	DEF_MOD("csi21",		 713,	R8A7795_CLK_CSI0), /* ES1.x */
 	DEF_MOD("csi20",		 714,	R8A7795_CLK_CSI0),
 	DEF_MOD("csi41",		 715,	R8A7795_CLK_CSI0),
 	DEF_MOD("csi40",		 716,	R8A7795_CLK_CSI0),
@@ -198,16 +204,20 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("lvds",			 727,	R8A7795_CLK_S0D4),
 	DEF_MOD("hdmi1",		 728,	R8A7795_CLK_HDMI),
 	DEF_MOD("hdmi0",		 729,	R8A7795_CLK_HDMI),
-	DEF_MOD("vin7",			 804,	R8A7795_CLK_S2D1),
-	DEF_MOD("vin6",			 805,	R8A7795_CLK_S2D1),
-	DEF_MOD("vin5",			 806,	R8A7795_CLK_S2D1),
-	DEF_MOD("vin4",			 807,	R8A7795_CLK_S2D1),
-	DEF_MOD("vin3",			 808,	R8A7795_CLK_S2D1),
-	DEF_MOD("vin2",			 809,	R8A7795_CLK_S2D1),
-	DEF_MOD("vin1",			 810,	R8A7795_CLK_S2D1),
-	DEF_MOD("vin0",			 811,	R8A7795_CLK_S2D1),
-	DEF_MOD("etheravb",		 812,	R8A7795_CLK_S3D2),
+	DEF_MOD("vin7",			 804,	R8A7795_CLK_S0D2),
+	DEF_MOD("vin6",			 805,	R8A7795_CLK_S0D2),
+	DEF_MOD("vin5",			 806,	R8A7795_CLK_S0D2),
+	DEF_MOD("vin4",			 807,	R8A7795_CLK_S0D2),
+	DEF_MOD("vin3",			 808,	R8A7795_CLK_S0D2),
+	DEF_MOD("vin2",			 809,	R8A7795_CLK_S0D2),
+	DEF_MOD("vin1",			 810,	R8A7795_CLK_S0D2),
+	DEF_MOD("vin0",			 811,	R8A7795_CLK_S0D2),
+	DEF_MOD("etheravb",		 812,	R8A7795_CLK_S0D6),
 	DEF_MOD("sata0",		 815,	R8A7795_CLK_S3D2),
+	DEF_MOD("imr3",			 820,	R8A7795_CLK_S0D2),
+	DEF_MOD("imr2",			 821,	R8A7795_CLK_S0D2),
+	DEF_MOD("imr1",			 822,	R8A7795_CLK_S0D2),
+	DEF_MOD("imr0",			 823,	R8A7795_CLK_S0D2),
 	DEF_MOD("gpio7",		 905,	R8A7795_CLK_CP),
 	DEF_MOD("gpio6",		 906,	R8A7795_CLK_CP),
 	DEF_MOD("gpio5",		 907,	R8A7795_CLK_CP),
@@ -310,6 +320,82 @@ static const struct rcar_gen3_cpg_pll_config cpg_pll_configs[16] __initconst = {
 	{ 2,		192,		192,	},
 };
 
+static const struct soc_device_attribute r8a7795es1[] __initconst = {
+	{ .soc_id = "r8a7795", .revision = "ES1.*" },
+	{ /* sentinel */ }
+};
+
+
+	/*
+	 * Fixups for R-Car H3 ES1.x
+	 */
+
+static const unsigned int r8a7795es1_mod_nullify[] __initconst = {
+	MOD_CLK_ID(326),			/* USB-DMAC3-0 */
+	MOD_CLK_ID(329),			/* USB-DMAC3-1 */
+	MOD_CLK_ID(700),			/* EHCI/OHCI3 */
+	MOD_CLK_ID(705),			/* HS-USB-IF3 */
+
+};
+
+static const struct mssr_mod_reparent r8a7795es1_mod_reparent[] __initconst = {
+	{ MOD_CLK_ID(118), R8A7795_CLK_S2D1 },	/* FDP1-1 */
+	{ MOD_CLK_ID(119), R8A7795_CLK_S2D1 },	/* FDP1-0 */
+	{ MOD_CLK_ID(217), R8A7795_CLK_S3D1 },	/* SYS-DMAC2 */
+	{ MOD_CLK_ID(218), R8A7795_CLK_S3D1 },	/* SYS-DMAC1 */
+	{ MOD_CLK_ID(219), R8A7795_CLK_S3D1 },	/* SYS-DMAC0 */
+	{ MOD_CLK_ID(501), R8A7795_CLK_S3D1 },	/* AUDMAC1 */
+	{ MOD_CLK_ID(502), R8A7795_CLK_S3D1 },	/* AUDMAC0 */
+	{ MOD_CLK_ID(601), R8A7795_CLK_S2D1 },	/* FCPVD2 */
+	{ MOD_CLK_ID(602), R8A7795_CLK_S2D1 },	/* FCPVD1 */
+	{ MOD_CLK_ID(603), R8A7795_CLK_S2D1 },	/* FCPVD0 */
+	{ MOD_CLK_ID(606), R8A7795_CLK_S2D1 },	/* FCPVB1 */
+	{ MOD_CLK_ID(607), R8A7795_CLK_S2D1 },	/* FCPVB0 */
+	{ MOD_CLK_ID(610), R8A7795_CLK_S2D1 },	/* FCPVI1 */
+	{ MOD_CLK_ID(611), R8A7795_CLK_S2D1 },	/* FCPVI0 */
+	{ MOD_CLK_ID(614), R8A7795_CLK_S2D1 },	/* FCPF1 */
+	{ MOD_CLK_ID(615), R8A7795_CLK_S2D1 },	/* FCPF0 */
+	{ MOD_CLK_ID(619), R8A7795_CLK_S2D1 },	/* FCPCS */
+	{ MOD_CLK_ID(621), R8A7795_CLK_S2D1 },	/* VSPD2 */
+	{ MOD_CLK_ID(622), R8A7795_CLK_S2D1 },	/* VSPD1 */
+	{ MOD_CLK_ID(623), R8A7795_CLK_S2D1 },	/* VSPD0 */
+	{ MOD_CLK_ID(624), R8A7795_CLK_S2D1 },	/* VSPBC */
+	{ MOD_CLK_ID(626), R8A7795_CLK_S2D1 },	/* VSPBD */
+	{ MOD_CLK_ID(630), R8A7795_CLK_S2D1 },	/* VSPI1 */
+	{ MOD_CLK_ID(631), R8A7795_CLK_S2D1 },	/* VSPI0 */
+	{ MOD_CLK_ID(804), R8A7795_CLK_S2D1 },	/* VIN7 */
+	{ MOD_CLK_ID(805), R8A7795_CLK_S2D1 },	/* VIN6 */
+	{ MOD_CLK_ID(806), R8A7795_CLK_S2D1 },	/* VIN5 */
+	{ MOD_CLK_ID(807), R8A7795_CLK_S2D1 },	/* VIN4 */
+	{ MOD_CLK_ID(808), R8A7795_CLK_S2D1 },	/* VIN3 */
+	{ MOD_CLK_ID(809), R8A7795_CLK_S2D1 },	/* VIN2 */
+	{ MOD_CLK_ID(810), R8A7795_CLK_S2D1 },	/* VIN1 */
+	{ MOD_CLK_ID(811), R8A7795_CLK_S2D1 },	/* VIN0 */
+	{ MOD_CLK_ID(812), R8A7795_CLK_S3D2 },	/* EAVB-IF */
+	{ MOD_CLK_ID(820), R8A7795_CLK_S2D1 },	/* IMR3 */
+	{ MOD_CLK_ID(821), R8A7795_CLK_S2D1 },	/* IMR2 */
+	{ MOD_CLK_ID(822), R8A7795_CLK_S2D1 },	/* IMR1 */
+	{ MOD_CLK_ID(823), R8A7795_CLK_S2D1 },	/* IMR0 */
+};
+
+
+	/*
+	 * Fixups for R-Car H3 ES2.x
+	 */
+
+static const unsigned int r8a7795es2_mod_nullify[] __initconst = {
+	MOD_CLK_ID(117),			/* FDP1-2 */
+	MOD_CLK_ID(327),			/* USB3-IF1 */
+	MOD_CLK_ID(600),			/* FCPVD3 */
+	MOD_CLK_ID(609),			/* FCPVI2 */
+	MOD_CLK_ID(613),			/* FCPF2 */
+	MOD_CLK_ID(616),			/* FCPCI1 */
+	MOD_CLK_ID(617),			/* FCPCI0 */
+	MOD_CLK_ID(620),			/* VSPD3 */
+	MOD_CLK_ID(629),			/* VSPI2 */
+	MOD_CLK_ID(713),			/* CSI21 */
+};
+
 static int __init r8a7795_cpg_mssr_init(struct device *dev)
 {
 	const struct rcar_gen3_cpg_pll_config *cpg_pll_config;
@@ -326,7 +412,26 @@ static int __init r8a7795_cpg_mssr_init(struct device *dev)
 		return -EINVAL;
 	}
 
-	return rcar_gen3_cpg_init(cpg_pll_config, CLK_EXTALR);
+	if (soc_device_match(r8a7795es1)) {
+		cpg_core_nullify_range(r8a7795_core_clks,
+				       ARRAY_SIZE(r8a7795_core_clks),
+				       R8A7795_CLK_S0D2, R8A7795_CLK_S0D12);
+		mssr_mod_nullify(r8a7795_mod_clks,
+				 ARRAY_SIZE(r8a7795_mod_clks),
+				 r8a7795es1_mod_nullify,
+				 ARRAY_SIZE(r8a7795es1_mod_nullify));
+		mssr_mod_reparent(r8a7795_mod_clks,
+				  ARRAY_SIZE(r8a7795_mod_clks),
+				  r8a7795es1_mod_reparent,
+				  ARRAY_SIZE(r8a7795es1_mod_reparent));
+	} else {
+		mssr_mod_nullify(r8a7795_mod_clks,
+				 ARRAY_SIZE(r8a7795_mod_clks),
+				 r8a7795es2_mod_nullify,
+				 ARRAY_SIZE(r8a7795es2_mod_nullify));
+	}
+
+	return rcar_gen3_cpg_init(cpg_pll_config, CLK_EXTALR, cpg_mode);
 }
 
 const struct cpg_mssr_info r8a7795_cpg_mssr_info __initconst = {
diff --git a/drivers/clk/renesas/r8a7796-cpg-mssr.c b/drivers/clk/renesas/r8a7796-cpg-mssr.c
index 11e084a56b0d..9d114b31b073 100644
--- a/drivers/clk/renesas/r8a7796-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7796-cpg-mssr.c
@@ -54,8 +54,8 @@ enum clk_ids {
 
 static const struct cpg_core_clk r8a7796_core_clks[] __initconst = {
 	/* External Clock Inputs */
-	DEF_INPUT("extal",  CLK_EXTAL),
-	DEF_INPUT("extalr", CLK_EXTALR),
+	DEF_INPUT("extal",      CLK_EXTAL),
+	DEF_INPUT("extalr",     CLK_EXTALR),
 
 	/* Internal Core Clocks */
 	DEF_BASE(".main",       CLK_MAIN, CLK_TYPE_GEN3_MAIN, CLK_EXTAL),
@@ -95,10 +95,10 @@ static const struct cpg_core_clk r8a7796_core_clks[] __initconst = {
 	DEF_FIXED("s3d2",       R8A7796_CLK_S3D2,  CLK_S3,         2, 1),
 	DEF_FIXED("s3d4",       R8A7796_CLK_S3D4,  CLK_S3,         4, 1),
 
-	DEF_GEN3_SD("sd0",      R8A7796_CLK_SD0,   CLK_SDSRC,    0x0074),
-	DEF_GEN3_SD("sd1",      R8A7796_CLK_SD1,   CLK_SDSRC,    0x0078),
-	DEF_GEN3_SD("sd2",      R8A7796_CLK_SD2,   CLK_SDSRC,    0x0268),
-	DEF_GEN3_SD("sd3",      R8A7796_CLK_SD3,   CLK_SDSRC,    0x026c),
+	DEF_GEN3_SD("sd0",      R8A7796_CLK_SD0,   CLK_SDSRC,     0x074),
+	DEF_GEN3_SD("sd1",      R8A7796_CLK_SD1,   CLK_SDSRC,     0x078),
+	DEF_GEN3_SD("sd2",      R8A7796_CLK_SD2,   CLK_SDSRC,     0x268),
+	DEF_GEN3_SD("sd3",      R8A7796_CLK_SD3,   CLK_SDSRC,     0x26c),
 
 	DEF_FIXED("cl",         R8A7796_CLK_CL,    CLK_PLL1_DIV2, 48, 1),
 	DEF_FIXED("cp",         R8A7796_CLK_CP,    CLK_EXTAL,      2, 1),
@@ -135,7 +135,7 @@ static const struct mssr_mod_clk r8a7796_mod_clks[] __initconst = {
 	DEF_MOD("sdif2",		 312,	R8A7796_CLK_SD2),
 	DEF_MOD("sdif1",		 313,	R8A7796_CLK_SD1),
 	DEF_MOD("sdif0",		 314,	R8A7796_CLK_SD0),
-	DEF_MOD("rwdt0",		 402,	R8A7796_CLK_R),
+	DEF_MOD("rwdt",			 402,	R8A7796_CLK_R),
 	DEF_MOD("intc-ap",		 408,	R8A7796_CLK_S3D1),
 	DEF_MOD("drif7",		 508,	R8A7796_CLK_S3D2),
 	DEF_MOD("drif6",		 509,	R8A7796_CLK_S3D2),
@@ -179,6 +179,8 @@ static const struct mssr_mod_clk r8a7796_mod_clks[] __initconst = {
 	DEF_MOD("vin1",			 810,	R8A7796_CLK_S0D2),
 	DEF_MOD("vin0",			 811,	R8A7796_CLK_S0D2),
 	DEF_MOD("etheravb",		 812,	R8A7796_CLK_S0D6),
+	DEF_MOD("imr1",			 822,	R8A7796_CLK_S0D2),
+	DEF_MOD("imr0",			 823,	R8A7796_CLK_S0D2),
 	DEF_MOD("gpio7",		 905,	R8A7796_CLK_S3D4),
 	DEF_MOD("gpio6",		 906,	R8A7796_CLK_S3D4),
 	DEF_MOD("gpio5",		 907,	R8A7796_CLK_S3D4),
@@ -271,7 +273,7 @@ static int __init r8a7796_cpg_mssr_init(struct device *dev)
 		return -EINVAL;
 	}
 
-	return rcar_gen3_cpg_init(cpg_pll_config, CLK_EXTALR);
+	return rcar_gen3_cpg_init(cpg_pll_config, CLK_EXTALR, cpg_mode);
 }
 
 const struct cpg_mssr_info r8a7796_cpg_mssr_info __initconst = {
diff --git a/drivers/clk/renesas/rcar-gen3-cpg.c b/drivers/clk/renesas/rcar-gen3-cpg.c
index 742f6dc7c156..3dee900522b7 100644
--- a/drivers/clk/renesas/rcar-gen3-cpg.c
+++ b/drivers/clk/renesas/rcar-gen3-cpg.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/sys_soc.h>
 
 #include "renesas-cpg-mssr.h"
 #include "rcar-gen3-cpg.h"
@@ -247,6 +248,27 @@ static struct clk * __init cpg_sd_clk_register(const struct cpg_core_clk *core,
 
 static const struct rcar_gen3_cpg_pll_config *cpg_pll_config __initdata;
 static unsigned int cpg_clk_extalr __initdata;
+static u32 cpg_mode __initdata;
+static u32 cpg_quirks __initdata;
+
+#define PLL_ERRATA	BIT(0)		/* Missing PLL0/2/4 post-divider */
+#define RCKCR_CKSEL	BIT(1)		/* Manual RCLK parent selection */
+
+static const struct soc_device_attribute cpg_quirks_match[] __initconst = {
+	{
+		.soc_id = "r8a7795", .revision = "ES1.0",
+		.data = (void *)(PLL_ERRATA | RCKCR_CKSEL),
+	},
+	{
+		.soc_id = "r8a7795", .revision = "ES1.*",
+		.data = (void *)RCKCR_CKSEL,
+	},
+	{
+		.soc_id = "r8a7796", .revision = "ES1.0",
+		.data = (void *)RCKCR_CKSEL,
+	},
+	{ /* sentinel */ }
+};
 
 struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
 	const struct cpg_core_clk *core, const struct cpg_mssr_info *info,
@@ -275,6 +297,8 @@ struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
 		 */
 		value = readl(base + CPG_PLL0CR);
 		mult = (((value >> 24) & 0x7f) + 1) * 2;
+		if (cpg_quirks & PLL_ERRATA)
+			mult *= 2;
 		break;
 
 	case CLK_TYPE_GEN3_PLL1:
@@ -290,6 +314,8 @@ struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
 		 */
 		value = readl(base + CPG_PLL2CR);
 		mult = (((value >> 24) & 0x7f) + 1) * 2;
+		if (cpg_quirks & PLL_ERRATA)
+			mult *= 2;
 		break;
 
 	case CLK_TYPE_GEN3_PLL3:
@@ -305,24 +331,33 @@ struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
 		 */
 		value = readl(base + CPG_PLL4CR);
 		mult = (((value >> 24) & 0x7f) + 1) * 2;
+		if (cpg_quirks & PLL_ERRATA)
+			mult *= 2;
 		break;
 
 	case CLK_TYPE_GEN3_SD:
 		return cpg_sd_clk_register(core, base, __clk_get_name(parent));
 
 	case CLK_TYPE_GEN3_R:
-		/*
-		 * RINT is default.
-		 * Only if EXTALR is populated, we switch to it.
-		 */
-		value = readl(base + CPG_RCKCR) & 0x3f;
-
-		if (clk_get_rate(clks[cpg_clk_extalr])) {
-			parent = clks[cpg_clk_extalr];
-			value |= BIT(15);
+		if (cpg_quirks & RCKCR_CKSEL) {
+			/*
+			 * RINT is default.
+			 * Only if EXTALR is populated, we switch to it.
+			 */
+			value = readl(base + CPG_RCKCR) & 0x3f;
+
+			if (clk_get_rate(clks[cpg_clk_extalr])) {
+				parent = clks[cpg_clk_extalr];
+				value |= BIT(15);
+			}
+
+			writel(value, base + CPG_RCKCR);
+			break;
 		}
 
-		writel(value, base + CPG_RCKCR);
+		/* Select parent clock of RCLK by MD28 */
+		if (cpg_mode & BIT(28))
+			parent = clks[cpg_clk_extalr];
 		break;
 
 	default:
@@ -334,9 +369,16 @@ struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
 }
 
 int __init rcar_gen3_cpg_init(const struct rcar_gen3_cpg_pll_config *config,
-			      unsigned int clk_extalr)
+			      unsigned int clk_extalr, u32 mode)
 {
+	const struct soc_device_attribute *attr;
+
 	cpg_pll_config = config;
 	cpg_clk_extalr = clk_extalr;
+	cpg_mode = mode;
+	attr = soc_device_match(cpg_quirks_match);
+	if (attr)
+		cpg_quirks = (uintptr_t)attr->data;
+	pr_debug("%s: mode = 0x%x quirks = 0x%x\n", __func__, mode, cpg_quirks);
 	return 0;
 }
diff --git a/drivers/clk/renesas/rcar-gen3-cpg.h b/drivers/clk/renesas/rcar-gen3-cpg.h
index f788f481dd42..073be54b5d03 100644
--- a/drivers/clk/renesas/rcar-gen3-cpg.h
+++ b/drivers/clk/renesas/rcar-gen3-cpg.h
@@ -37,6 +37,6 @@ struct clk *rcar_gen3_cpg_clk_register(struct device *dev,
 	const struct cpg_core_clk *core, const struct cpg_mssr_info *info,
 	struct clk **clks, void __iomem *base);
 int rcar_gen3_cpg_init(const struct rcar_gen3_cpg_pll_config *config,
-		       unsigned int clk_extalr);
+		       unsigned int clk_extalr, u32 mode);
 
 #endif
diff --git a/drivers/clk/renesas/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c
index eadcbd43ff88..99eeec6f24ec 100644
--- a/drivers/clk/renesas/renesas-cpg-mssr.c
+++ b/drivers/clk/renesas/renesas-cpg-mssr.c
@@ -265,6 +265,11 @@ static void __init cpg_mssr_register_core_clk(const struct cpg_core_clk *core,
 	WARN_DEBUG(id >= priv->num_core_clks);
 	WARN_DEBUG(PTR_ERR(priv->clks[id]) != -ENOENT);
 
+	if (!core->name) {
+		/* Skip NULLified clock */
+		return;
+	}
+
 	switch (core->type) {
 	case CLK_TYPE_IN:
 		clk = of_clk_get_by_name(priv->dev->of_node, core->name);
@@ -335,6 +340,11 @@ static void __init cpg_mssr_register_mod_clk(const struct mssr_mod_clk *mod,
 	WARN_DEBUG(mod->parent >= priv->num_core_clks + priv->num_mod_clks);
 	WARN_DEBUG(PTR_ERR(priv->clks[id]) != -ENOENT);
 
+	if (!mod->name) {
+		/* Skip NULLified clock */
+		return;
+	}
+
 	parent = priv->clks[mod->parent];
 	if (IS_ERR(parent)) {
 		clk = parent;
@@ -734,5 +744,45 @@ static int __init cpg_mssr_init(void)
 
 subsys_initcall(cpg_mssr_init);
 
+void __init cpg_core_nullify_range(struct cpg_core_clk *core_clks,
+				   unsigned int num_core_clks,
+				   unsigned int first_clk,
+				   unsigned int last_clk)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_core_clks; i++)
+		if (core_clks[i].id >= first_clk &&
+		    core_clks[i].id <= last_clk)
+			core_clks[i].name = NULL;
+}
+
+void __init mssr_mod_nullify(struct mssr_mod_clk *mod_clks,
+			     unsigned int num_mod_clks,
+			     const unsigned int *clks, unsigned int n)
+{
+	unsigned int i, j;
+
+	for (i = 0, j = 0; i < num_mod_clks && j < n; i++)
+		if (mod_clks[i].id == clks[j]) {
+			mod_clks[i].name = NULL;
+			j++;
+		}
+}
+
+void __init mssr_mod_reparent(struct mssr_mod_clk *mod_clks,
+			      unsigned int num_mod_clks,
+			      const struct mssr_mod_reparent *clks,
+			      unsigned int n)
+{
+	unsigned int i, j;
+
+	for (i = 0, j = 0; i < num_mod_clks && j < n; i++)
+		if (mod_clks[i].id == clks[j].clk) {
+			mod_clks[i].parent = clks[j].parent;
+			j++;
+		}
+}
+
 MODULE_DESCRIPTION("Renesas CPG/MSSR Driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/renesas/renesas-cpg-mssr.h b/drivers/clk/renesas/renesas-cpg-mssr.h
index 4bb7a80c6469..148f4f0aa2a4 100644
--- a/drivers/clk/renesas/renesas-cpg-mssr.h
+++ b/drivers/clk/renesas/renesas-cpg-mssr.h
@@ -134,4 +134,26 @@ extern const struct cpg_mssr_info r8a7743_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a7745_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a7795_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a7796_cpg_mssr_info;
+
+
+    /*
+     * Helpers for fixing up clock tables depending on SoC revision
+     */
+
+struct mssr_mod_reparent {
+	unsigned int clk, parent;
+};
+
+
+extern void cpg_core_nullify_range(struct cpg_core_clk *core_clks,
+				   unsigned int num_core_clks,
+				   unsigned int first_clk,
+				   unsigned int last_clk);
+extern void mssr_mod_nullify(struct mssr_mod_clk *mod_clks,
+			     unsigned int num_mod_clks,
+			     const unsigned int *clks, unsigned int n);
+extern void mssr_mod_reparent(struct mssr_mod_clk *mod_clks,
+			      unsigned int num_mod_clks,
+			      const struct mssr_mod_reparent *clks,
+			      unsigned int n);
 #endif
diff --git a/drivers/clk/rockchip/Makefile b/drivers/clk/rockchip/Makefile
index 141971488f40..26b220c988b2 100644
--- a/drivers/clk/rockchip/Makefile
+++ b/drivers/clk/rockchip/Makefile
@@ -12,7 +12,7 @@ obj-y	+= clk-muxgrf.o
 obj-y	+= clk-ddr.o
 obj-$(CONFIG_RESET_CONTROLLER)	+= softrst.o
 
-obj-y	+= clk-rk1108.o
+obj-y	+= clk-rv1108.o
 obj-y	+= clk-rk3036.o
 obj-y	+= clk-rk3188.o
 obj-y	+= clk-rk3228.o
diff --git a/drivers/clk/rockchip/clk-pll.c b/drivers/clk/rockchip/clk-pll.c
index eec51893a7e6..dd0433d4753e 100644
--- a/drivers/clk/rockchip/clk-pll.c
+++ b/drivers/clk/rockchip/clk-pll.c
@@ -269,6 +269,7 @@ static int rockchip_rk3036_pll_enable(struct clk_hw *hw)
 
 	writel(HIWORD_UPDATE(0, RK3036_PLLCON1_PWRDOWN, 0),
 	       pll->reg_base + RK3036_PLLCON(1));
+	rockchip_pll_wait_lock(pll);
 
 	return 0;
 }
@@ -501,6 +502,7 @@ static int rockchip_rk3066_pll_enable(struct clk_hw *hw)
 
 	writel(HIWORD_UPDATE(0, RK3066_PLLCON3_PWRDOWN, 0),
 	       pll->reg_base + RK3066_PLLCON(3));
+	rockchip_pll_wait_lock(pll);
 
 	return 0;
 }
@@ -746,6 +748,7 @@ static int rockchip_rk3399_pll_enable(struct clk_hw *hw)
 
 	writel(HIWORD_UPDATE(0, RK3399_PLLCON3_PWRDOWN, 0),
 	       pll->reg_base + RK3399_PLLCON(3));
+	rockchip_rk3399_pll_wait_lock(pll);
 
 	return 0;
 }
diff --git a/drivers/clk/rockchip/clk-rk3328.c b/drivers/clk/rockchip/clk-rk3328.c
index 1e384e143504..b04f29774ee7 100644
--- a/drivers/clk/rockchip/clk-rk3328.c
+++ b/drivers/clk/rockchip/clk-rk3328.c
@@ -20,6 +20,7 @@
 #include <dt-bindings/clock/rk3328-cru.h>
 #include "clk.h"
 
+#define RK3328_GRF_SOC_CON4		0x410
 #define RK3328_GRF_SOC_STATUS0		0x480
 #define RK3328_GRF_MAC_CON1		0x904
 #define RK3328_GRF_MAC_CON2		0x908
@@ -214,6 +215,8 @@ PNAME(mux_mac2io_src_p)		= { "clk_mac2io_src",
 				    "gmac_clkin" };
 PNAME(mux_mac2phy_src_p)	= { "clk_mac2phy_src",
 				    "phy_50m_out" };
+PNAME(mux_mac2io_ext_p)		= { "clk_mac2io",
+				    "gmac_clkin" };
 
 static struct rockchip_pll_clock rk3328_pll_clks[] __initdata = {
 	[apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p,
@@ -680,6 +683,10 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 	COMPOSITE(SCLK_MAC2IO_OUT, "clk_mac2io_out", mux_2plls_p, 0,
 			RK3328_CLKSEL_CON(27), 15, 1, MFLAGS, 8, 5, DFLAGS,
 			RK3328_CLKGATE_CON(3), 5, GFLAGS),
+	MUXGRF(SCLK_MAC2IO, "clk_mac2io", mux_mac2io_src_p, CLK_SET_RATE_NO_REPARENT,
+			RK3328_GRF_MAC_CON1, 10, 1, MFLAGS),
+	MUXGRF(SCLK_MAC2IO_EXT, "clk_mac2io_ext", mux_mac2io_ext_p, CLK_SET_RATE_NO_REPARENT,
+			RK3328_GRF_SOC_CON4, 14, 1, MFLAGS),
 
 	COMPOSITE(SCLK_MAC2PHY_SRC, "clk_mac2phy_src", mux_2plls_p, 0,
 			RK3328_CLKSEL_CON(26), 7, 1, MFLAGS, 0, 5, DFLAGS,
@@ -691,6 +698,8 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 	COMPOSITE_NOMUX(SCLK_MAC2PHY_OUT, "clk_mac2phy_out", "clk_mac2phy", 0,
 			RK3328_CLKSEL_CON(26), 8, 2, DFLAGS,
 			RK3328_CLKGATE_CON(9), 2, GFLAGS),
+	MUXGRF(SCLK_MAC2PHY, "clk_mac2phy", mux_mac2phy_src_p, CLK_SET_RATE_NO_REPARENT,
+			RK3328_GRF_MAC_CON2, 10, 1, MFLAGS),
 
 	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
 
diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c
index 6cb474c593e7..024762d3214d 100644
--- a/drivers/clk/rockchip/clk-rk3368.c
+++ b/drivers/clk/rockchip/clk-rk3368.c
@@ -835,18 +835,18 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {
 	GATE(PCLK_PMU, "pclk_pmu", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 0, GFLAGS),
 
 	/* timer gates */
-	GATE(0, "sclk_timer15", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 11, GFLAGS),
-	GATE(0, "sclk_timer14", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 10, GFLAGS),
-	GATE(0, "sclk_timer13", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 9, GFLAGS),
-	GATE(0, "sclk_timer12", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 8, GFLAGS),
-	GATE(0, "sclk_timer11", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 7, GFLAGS),
-	GATE(0, "sclk_timer10", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 6, GFLAGS),
-	GATE(0, "sclk_timer05", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 5, GFLAGS),
-	GATE(0, "sclk_timer04", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 4, GFLAGS),
-	GATE(0, "sclk_timer03", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 3, GFLAGS),
-	GATE(0, "sclk_timer02", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 2, GFLAGS),
-	GATE(0, "sclk_timer01", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 1, GFLAGS),
-	GATE(0, "sclk_timer00", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 0, GFLAGS),
+	GATE(SCLK_TIMER15, "sclk_timer15", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 11, GFLAGS),
+	GATE(SCLK_TIMER14, "sclk_timer14", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 10, GFLAGS),
+	GATE(SCLK_TIMER13, "sclk_timer13", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 9, GFLAGS),
+	GATE(SCLK_TIMER12, "sclk_timer12", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 8, GFLAGS),
+	GATE(SCLK_TIMER11, "sclk_timer11", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 7, GFLAGS),
+	GATE(SCLK_TIMER10, "sclk_timer10", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 6, GFLAGS),
+	GATE(SCLK_TIMER05, "sclk_timer05", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 5, GFLAGS),
+	GATE(SCLK_TIMER04, "sclk_timer04", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 4, GFLAGS),
+	GATE(SCLK_TIMER03, "sclk_timer03", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 3, GFLAGS),
+	GATE(SCLK_TIMER02, "sclk_timer02", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 2, GFLAGS),
+	GATE(SCLK_TIMER01, "sclk_timer01", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 1, GFLAGS),
+	GATE(SCLK_TIMER00, "sclk_timer00", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 0, GFLAGS),
 };
 
 static const char *const rk3368_critical_clocks[] __initconst = {
@@ -858,6 +858,9 @@ static const char *const rk3368_critical_clocks[] __initconst = {
 	 */
 	"pclk_pwm1",
 	"pclk_pd_pmu",
+	"pclk_pd_alive",
+	"pclk_peri",
+	"hclk_peri",
 };
 
 static void __init rk3368_clk_init(struct device_node *np)
diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c
index 73121b144634..fa3cbef08776 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -1477,10 +1477,10 @@ static struct rockchip_clk_branch rk3399_clk_pmu_branches[] __initdata = {
 	GATE(PCLK_UART4_PMU, "pclk_uart4_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 14, GFLAGS),
 	GATE(PCLK_WDT_M0_PMU, "pclk_wdt_m0_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 15, GFLAGS),
 
-	GATE(FCLK_CM0S_PMU, "fclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 0, GFLAGS),
-	GATE(SCLK_CM0S_PMU, "sclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 1, GFLAGS),
-	GATE(HCLK_CM0S_PMU, "hclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 2, GFLAGS),
-	GATE(DCLK_CM0S_PMU, "dclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 3, GFLAGS),
+	GATE(FCLK_CM0S_PMU, "fclk_cm0s_pmu", "fclk_cm0s_src_pmu", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(2), 0, GFLAGS),
+	GATE(SCLK_CM0S_PMU, "sclk_cm0s_pmu", "fclk_cm0s_src_pmu", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(2), 1, GFLAGS),
+	GATE(HCLK_CM0S_PMU, "hclk_cm0s_pmu", "fclk_cm0s_src_pmu", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(2), 2, GFLAGS),
+	GATE(DCLK_CM0S_PMU, "dclk_cm0s_pmu", "fclk_cm0s_src_pmu", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(2), 3, GFLAGS),
 	GATE(HCLK_NOC_PMU, "hclk_noc_pmu", "fclk_cm0s_src_pmu", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(2), 5, GFLAGS),
 };
 
diff --git a/drivers/clk/rockchip/clk-rk1108.c b/drivers/clk/rockchip/clk-rv1108.c
index 92750d798e5d..7c05ab366348 100644
--- a/drivers/clk/rockchip/clk-rk1108.c
+++ b/drivers/clk/rockchip/clk-rv1108.c
@@ -18,16 +18,16 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/syscore_ops.h>
-#include <dt-bindings/clock/rk1108-cru.h>
+#include <dt-bindings/clock/rv1108-cru.h>
 #include "clk.h"
 
-#define RK1108_GRF_SOC_STATUS0	0x480
+#define RV1108_GRF_SOC_STATUS0	0x480
 
-enum rk1108_plls {
+enum rv1108_plls {
 	apll, dpll, gpll,
 };
 
-static struct rockchip_pll_rate_table rk1108_pll_rates[] = {
+static struct rockchip_pll_rate_table rv1108_pll_rates[] = {
 	/* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */
 	RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0),
 	RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0),
@@ -74,32 +74,32 @@ static struct rockchip_pll_rate_table rk1108_pll_rates[] = {
 	{ /* sentinel */ },
 };
 
-#define RK1108_DIV_CORE_MASK		0xf
-#define RK1108_DIV_CORE_SHIFT		4
+#define RV1108_DIV_CORE_MASK		0xf
+#define RV1108_DIV_CORE_SHIFT		4
 
-#define RK1108_CLKSEL0(_core_peri_div)	\
+#define RV1108_CLKSEL0(_core_peri_div)	\
 	{				\
-		.reg = RK1108_CLKSEL_CON(1),	\
-		.val = HIWORD_UPDATE(_core_peri_div, RK1108_DIV_CORE_MASK,\
-				RK1108_DIV_CORE_SHIFT)	\
+		.reg = RV1108_CLKSEL_CON(1),	\
+		.val = HIWORD_UPDATE(_core_peri_div, RV1108_DIV_CORE_MASK,\
+				RV1108_DIV_CORE_SHIFT)	\
 	}
 
-#define RK1108_CPUCLK_RATE(_prate, _core_peri_div)			\
+#define RV1108_CPUCLK_RATE(_prate, _core_peri_div)			\
 	{								\
 		.prate = _prate,					\
 		.divs = {						\
-			RK1108_CLKSEL0(_core_peri_div),		\
+			RV1108_CLKSEL0(_core_peri_div),		\
 		},							\
 	}
 
-static struct rockchip_cpuclk_rate_table rk1108_cpuclk_rates[] __initdata = {
-	RK1108_CPUCLK_RATE(816000000, 4),
-	RK1108_CPUCLK_RATE(600000000, 4),
-	RK1108_CPUCLK_RATE(312000000, 4),
+static struct rockchip_cpuclk_rate_table rv1108_cpuclk_rates[] __initdata = {
+	RV1108_CPUCLK_RATE(816000000, 4),
+	RV1108_CPUCLK_RATE(600000000, 4),
+	RV1108_CPUCLK_RATE(312000000, 4),
 };
 
-static const struct rockchip_cpuclk_reg_data rk1108_cpuclk_data = {
-	.core_reg = RK1108_CLKSEL_CON(0),
+static const struct rockchip_cpuclk_reg_data rv1108_cpuclk_data = {
+	.core_reg = RV1108_CLKSEL_CON(0),
 	.div_core_shift = 0,
 	.div_core_mask = 0x1f,
 	.mux_core_alt = 1,
@@ -131,13 +131,13 @@ PNAME(mux_i2s_out_p)		= { "i2s0_pre", "xin12m" };
 PNAME(mux_i2s1_p)		= { "i2s1_src", "i2s1_frac", "xin12m" };
 PNAME(mux_i2s2_p)		= { "i2s2_src", "i2s2_frac", "xin12m" };
 
-static struct rockchip_pll_clock rk1108_pll_clks[] __initdata = {
-	[apll] = PLL(pll_rk3399, PLL_APLL, "apll", mux_pll_p, 0, RK1108_PLL_CON(0),
-		     RK1108_PLL_CON(3), 8, 31, 0, rk1108_pll_rates),
-	[dpll] = PLL(pll_rk3399, PLL_DPLL, "dpll", mux_pll_p, 0, RK1108_PLL_CON(8),
-		     RK1108_PLL_CON(11), 8, 31, 0, NULL),
-	[gpll] = PLL(pll_rk3399, PLL_GPLL, "gpll", mux_pll_p, 0, RK1108_PLL_CON(16),
-		     RK1108_PLL_CON(19), 8, 31, ROCKCHIP_PLL_SYNC_RATE, rk1108_pll_rates),
+static struct rockchip_pll_clock rv1108_pll_clks[] __initdata = {
+	[apll] = PLL(pll_rk3399, PLL_APLL, "apll", mux_pll_p, 0, RV1108_PLL_CON(0),
+		     RV1108_PLL_CON(3), 8, 31, 0, rv1108_pll_rates),
+	[dpll] = PLL(pll_rk3399, PLL_DPLL, "dpll", mux_pll_p, 0, RV1108_PLL_CON(8),
+		     RV1108_PLL_CON(11), 8, 31, 0, NULL),
+	[gpll] = PLL(pll_rk3399, PLL_GPLL, "gpll", mux_pll_p, 0, RV1108_PLL_CON(16),
+		     RV1108_PLL_CON(19), 8, 31, ROCKCHIP_PLL_SYNC_RATE, rv1108_pll_rates),
 };
 
 #define MFLAGS CLK_MUX_HIWORD_MASK
@@ -145,56 +145,56 @@ static struct rockchip_pll_clock rk1108_pll_clks[] __initdata = {
 #define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE)
 #define IFLAGS ROCKCHIP_INVERTER_HIWORD_MASK
 
-static struct rockchip_clk_branch rk1108_uart0_fracmux __initdata =
+static struct rockchip_clk_branch rv1108_uart0_fracmux __initdata =
 	MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(13), 8, 2, MFLAGS);
+			RV1108_CLKSEL_CON(13), 8, 2, MFLAGS);
 
-static struct rockchip_clk_branch rk1108_uart1_fracmux __initdata =
+static struct rockchip_clk_branch rv1108_uart1_fracmux __initdata =
 	MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(14), 8, 2, MFLAGS);
+			RV1108_CLKSEL_CON(14), 8, 2, MFLAGS);
 
-static struct rockchip_clk_branch rk1108_uart2_fracmux __initdata =
+static struct rockchip_clk_branch rv1108_uart2_fracmux __initdata =
 	MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(15), 8, 2, MFLAGS);
+			RV1108_CLKSEL_CON(15), 8, 2, MFLAGS);
 
-static struct rockchip_clk_branch rk1108_i2s0_fracmux __initdata =
+static struct rockchip_clk_branch rv1108_i2s0_fracmux __initdata =
 	MUX(0, "i2s0_pre", mux_i2s0_pre_p, CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(5), 12, 2, MFLAGS);
+			RV1108_CLKSEL_CON(5), 12, 2, MFLAGS);
 
-static struct rockchip_clk_branch rk1108_i2s1_fracmux __initdata =
+static struct rockchip_clk_branch rv1108_i2s1_fracmux __initdata =
 	MUX(0, "i2s1_pre", mux_i2s1_p, CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(6), 12, 2, MFLAGS);
+			RV1108_CLKSEL_CON(6), 12, 2, MFLAGS);
 
-static struct rockchip_clk_branch rk1108_i2s2_fracmux __initdata =
+static struct rockchip_clk_branch rv1108_i2s2_fracmux __initdata =
 	MUX(0, "i2s2_pre", mux_i2s2_p, CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(7), 12, 2, MFLAGS);
+			RV1108_CLKSEL_CON(7), 12, 2, MFLAGS);
 
-static struct rockchip_clk_branch rk1108_clk_branches[] __initdata = {
+static struct rockchip_clk_branch rv1108_clk_branches[] __initdata = {
 	MUX(0, "hdmi_phy", mux_hdmiphy_phy_p, CLK_SET_RATE_PARENT,
-			RK1108_MISC_CON, 13, 2, MFLAGS),
+			RV1108_MISC_CON, 13, 2, MFLAGS),
 	MUX(0, "usb480m", mux_usb480m_pre_p, CLK_SET_RATE_PARENT,
-			RK1108_MISC_CON, 15, 2, MFLAGS),
+			RV1108_MISC_CON, 15, 2, MFLAGS),
 	/*
 	 * Clock-Architecture Diagram 2
 	 */
 
 	/* PD_CORE */
 	GATE(0, "dpll_core", "dpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(0), 1, GFLAGS),
+			RV1108_CLKGATE_CON(0), 1, GFLAGS),
 	GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(0), 0, GFLAGS),
+			RV1108_CLKGATE_CON(0), 0, GFLAGS),
 	GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(0), 2, GFLAGS),
+			RV1108_CLKGATE_CON(0), 2, GFLAGS),
 	COMPOSITE_NOMUX(0, "pclken_dbg", "armclk", CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(1), 4, 4, DFLAGS | CLK_DIVIDER_READ_ONLY,
-			RK1108_CLKGATE_CON(0), 5, GFLAGS),
+			RV1108_CLKSEL_CON(1), 4, 4, DFLAGS | CLK_DIVIDER_READ_ONLY,
+			RV1108_CLKGATE_CON(0), 5, GFLAGS),
 	COMPOSITE_NOMUX(ACLK_ENMCORE, "aclkenm_core", "armclk", CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(1), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY,
-			RK1108_CLKGATE_CON(0), 4, GFLAGS),
+			RV1108_CLKSEL_CON(1), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY,
+			RV1108_CLKGATE_CON(0), 4, GFLAGS),
 	GATE(ACLK_CORE, "aclk_core", "aclkenm_core", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(11), 0, GFLAGS),
+			RV1108_CLKGATE_CON(11), 0, GFLAGS),
 	GATE(0, "pclk_dbg", "pclken_dbg", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(11), 1, GFLAGS),
+			RV1108_CLKGATE_CON(11), 1, GFLAGS),
 
 	/* PD_RKVENC */
 
@@ -202,58 +202,58 @@ static struct rockchip_clk_branch rk1108_clk_branches[] __initdata = {
 
 	/* PD_PMU_wrapper */
 	COMPOSITE_NOMUX(0, "pmu_24m_ena", "gpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(38), 0, 5, DFLAGS,
-			RK1108_CLKGATE_CON(8), 12, GFLAGS),
+			RV1108_CLKSEL_CON(38), 0, 5, DFLAGS,
+			RV1108_CLKGATE_CON(8), 12, GFLAGS),
 	GATE(0, "pmu", "pmu_24m_ena", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(10), 0, GFLAGS),
+			RV1108_CLKGATE_CON(10), 0, GFLAGS),
 	GATE(0, "intmem1", "pmu_24m_ena", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(10), 1, GFLAGS),
+			RV1108_CLKGATE_CON(10), 1, GFLAGS),
 	GATE(0, "gpio0_pmu", "pmu_24m_ena", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(10), 2, GFLAGS),
+			RV1108_CLKGATE_CON(10), 2, GFLAGS),
 	GATE(0, "pmugrf", "pmu_24m_ena", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(10), 3, GFLAGS),
+			RV1108_CLKGATE_CON(10), 3, GFLAGS),
 	GATE(0, "pmu_noc", "pmu_24m_ena", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(10), 4, GFLAGS),
+			RV1108_CLKGATE_CON(10), 4, GFLAGS),
 	GATE(0, "i2c0_pmu_pclk", "pmu_24m_ena", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(10), 5, GFLAGS),
+			RV1108_CLKGATE_CON(10), 5, GFLAGS),
 	GATE(0, "pwm0_pmu_pclk", "pmu_24m_ena", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(10), 6, GFLAGS),
+			RV1108_CLKGATE_CON(10), 6, GFLAGS),
 	COMPOSITE(0, "pwm0_pmu_clk", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(12), 7, 1, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(8), 15, GFLAGS),
+			RV1108_CLKSEL_CON(12), 7, 1, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(8), 15, GFLAGS),
 	COMPOSITE(0, "i2c0_pmu_clk", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(19), 7, 1, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(8), 14, GFLAGS),
+			RV1108_CLKSEL_CON(19), 7, 1, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(8), 14, GFLAGS),
 	GATE(0, "pvtm_pmu", "xin24m", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(8), 13, GFLAGS),
+			RV1108_CLKGATE_CON(8), 13, GFLAGS),
 
 	/*
 	 * Clock-Architecture Diagram 4
 	 */
 	COMPOSITE(0, "aclk_vio0_2wrap_occ", mux_pll_src_4plls_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS,
-			RK1108_CLKGATE_CON(6), 0, GFLAGS),
+			RV1108_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS,
+			RV1108_CLKGATE_CON(6), 0, GFLAGS),
 	GATE(0, "aclk_vio0_pre", "aclk_vio0_2wrap_occ", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(17), 0, GFLAGS),
+			RV1108_CLKGATE_CON(17), 0, GFLAGS),
 	COMPOSITE_NOMUX(0, "hclk_vio_pre", "aclk_vio0_pre", 0,
-			RK1108_CLKSEL_CON(29), 0, 5, DFLAGS,
-			RK1108_CLKGATE_CON(7), 2, GFLAGS),
+			RV1108_CLKSEL_CON(29), 0, 5, DFLAGS,
+			RV1108_CLKGATE_CON(7), 2, GFLAGS),
 	COMPOSITE_NOMUX(0, "pclk_vio_pre", "aclk_vio0_pre", 0,
-			RK1108_CLKSEL_CON(29), 8, 5, DFLAGS,
-			RK1108_CLKGATE_CON(7), 3, GFLAGS),
+			RV1108_CLKSEL_CON(29), 8, 5, DFLAGS,
+			RV1108_CLKGATE_CON(7), 3, GFLAGS),
 
 	INVERTER(0, "pclk_vip", "ext_vip",
-			RK1108_CLKSEL_CON(31), 8, IFLAGS),
+			RV1108_CLKSEL_CON(31), 8, IFLAGS),
 	GATE(0, "pclk_isp_pre", "pclk_vip", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(7), 6, GFLAGS),
+			RV1108_CLKGATE_CON(7), 6, GFLAGS),
 	GATE(0, "pclk_isp", "pclk_isp_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(18), 10, GFLAGS),
+			RV1108_CLKGATE_CON(18), 10, GFLAGS),
 	GATE(0, "dclk_hdmiphy_src_gpll", "gpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(6), 5, GFLAGS),
+			RV1108_CLKGATE_CON(6), 5, GFLAGS),
 	GATE(0, "dclk_hdmiphy_src_dpll", "dpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(6), 4, GFLAGS),
+			RV1108_CLKGATE_CON(6), 4, GFLAGS),
 	COMPOSITE_NOGATE(0, "dclk_hdmiphy", mux_dclk_hdmiphy_pre_p, 0,
-			RK1108_CLKSEL_CON(32), 6, 2, MFLAGS, 8, 6, DFLAGS),
+			RV1108_CLKSEL_CON(32), 6, 2, MFLAGS, 8, 6, DFLAGS),
 
 	/*
 	 * Clock-Architecture Diagram 5
@@ -262,153 +262,153 @@ static struct rockchip_clk_branch rk1108_clk_branches[] __initdata = {
 	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
 
 	COMPOSITE(0, "i2s0_src", mux_pll_src_2plls_p, 0,
-			RK1108_CLKSEL_CON(5), 8, 1, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(2), 0, GFLAGS),
+			RV1108_CLKSEL_CON(5), 8, 1, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(2), 0, GFLAGS),
 	COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_src", CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(8), 0,
-			RK1108_CLKGATE_CON(2), 1, GFLAGS,
-			&rk1108_i2s0_fracmux),
+			RV1108_CLKSEL_CON(8), 0,
+			RV1108_CLKGATE_CON(2), 1, GFLAGS,
+			&rv1108_i2s0_fracmux),
 	GATE(SCLK_I2S0, "sclk_i2s0", "i2s0_pre", CLK_SET_RATE_PARENT,
-			RK1108_CLKGATE_CON(2), 2, GFLAGS),
+			RV1108_CLKGATE_CON(2), 2, GFLAGS),
 	COMPOSITE_NODIV(0, "i2s_out", mux_i2s_out_p, 0,
-			RK1108_CLKSEL_CON(5), 15, 1, MFLAGS,
-			RK1108_CLKGATE_CON(2), 3, GFLAGS),
+			RV1108_CLKSEL_CON(5), 15, 1, MFLAGS,
+			RV1108_CLKGATE_CON(2), 3, GFLAGS),
 
 	COMPOSITE(0, "i2s1_src", mux_pll_src_2plls_p, 0,
-			RK1108_CLKSEL_CON(6), 8, 1, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(2), 4, GFLAGS),
+			RV1108_CLKSEL_CON(6), 8, 1, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(2), 4, GFLAGS),
 	COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_src", CLK_SET_RATE_PARENT,
 			RK2928_CLKSEL_CON(9), 0,
 			RK2928_CLKGATE_CON(2), 5, GFLAGS,
-			&rk1108_i2s1_fracmux),
+			&rv1108_i2s1_fracmux),
 	GATE(SCLK_I2S1, "sclk_i2s1", "i2s1_pre", CLK_SET_RATE_PARENT,
-			RK1108_CLKGATE_CON(2), 6, GFLAGS),
+			RV1108_CLKGATE_CON(2), 6, GFLAGS),
 
 	COMPOSITE(0, "i2s2_src", mux_pll_src_2plls_p, 0,
-			RK1108_CLKSEL_CON(7), 8, 1, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 8, GFLAGS),
+			RV1108_CLKSEL_CON(7), 8, 1, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 8, GFLAGS),
 	COMPOSITE_FRACMUX(0, "i2s2_frac", "i2s2_src", CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(10), 0,
-			RK1108_CLKGATE_CON(2), 9, GFLAGS,
-			&rk1108_i2s2_fracmux),
+			RV1108_CLKSEL_CON(10), 0,
+			RV1108_CLKGATE_CON(2), 9, GFLAGS,
+			&rv1108_i2s2_fracmux),
 	GATE(SCLK_I2S2, "sclk_i2s2", "i2s2_pre", CLK_SET_RATE_PARENT,
-			RK1108_CLKGATE_CON(2), 10, GFLAGS),
+			RV1108_CLKGATE_CON(2), 10, GFLAGS),
 
 	/* PD_BUS */
 	GATE(0, "aclk_bus_src_gpll", "gpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 0, GFLAGS),
+			RV1108_CLKGATE_CON(1), 0, GFLAGS),
 	GATE(0, "aclk_bus_src_apll", "apll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 1, GFLAGS),
+			RV1108_CLKGATE_CON(1), 1, GFLAGS),
 	GATE(0, "aclk_bus_src_dpll", "dpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 2, GFLAGS),
+			RV1108_CLKGATE_CON(1), 2, GFLAGS),
 	COMPOSITE_NOGATE(ACLK_PRE, "aclk_bus_pre", mux_aclk_bus_src_p, 0,
-			RK1108_CLKSEL_CON(2), 8, 2, MFLAGS, 0, 5, DFLAGS),
+			RV1108_CLKSEL_CON(2), 8, 2, MFLAGS, 0, 5, DFLAGS),
 	COMPOSITE_NOMUX(0, "hclk_bus_pre", "aclk_bus_2wrap_occ", 0,
-			RK1108_CLKSEL_CON(3), 0, 5, DFLAGS,
-			RK1108_CLKGATE_CON(1), 4, GFLAGS),
+			RV1108_CLKSEL_CON(3), 0, 5, DFLAGS,
+			RV1108_CLKGATE_CON(1), 4, GFLAGS),
 	COMPOSITE_NOMUX(0, "pclken_bus", "aclk_bus_2wrap_occ", 0,
-			RK1108_CLKSEL_CON(3), 8, 5, DFLAGS,
-			RK1108_CLKGATE_CON(1), 5, GFLAGS),
+			RV1108_CLKSEL_CON(3), 8, 5, DFLAGS,
+			RV1108_CLKGATE_CON(1), 5, GFLAGS),
 	GATE(0, "pclk_bus_pre", "pclken_bus", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 6, GFLAGS),
+			RV1108_CLKGATE_CON(1), 6, GFLAGS),
 	GATE(0, "pclk_top_pre", "pclken_bus", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 7, GFLAGS),
+			RV1108_CLKGATE_CON(1), 7, GFLAGS),
 	GATE(0, "pclk_ddr_pre", "pclken_bus", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 8, GFLAGS),
+			RV1108_CLKGATE_CON(1), 8, GFLAGS),
 	GATE(0, "clk_timer0", "mux_pll_p", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 9, GFLAGS),
+			RV1108_CLKGATE_CON(1), 9, GFLAGS),
 	GATE(0, "clk_timer1", "mux_pll_p", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 10, GFLAGS),
+			RV1108_CLKGATE_CON(1), 10, GFLAGS),
 	GATE(0, "pclk_timer", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 4, GFLAGS),
+			RV1108_CLKGATE_CON(13), 4, GFLAGS),
 
 	COMPOSITE(0, "uart0_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(13), 12, 2, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 1, GFLAGS),
+			RV1108_CLKSEL_CON(13), 12, 2, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 1, GFLAGS),
 	COMPOSITE(0, "uart1_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(14), 12, 2, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 3, GFLAGS),
+			RV1108_CLKSEL_CON(14), 12, 2, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 3, GFLAGS),
 	COMPOSITE(0, "uart21_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(15), 12, 2, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 5, GFLAGS),
+			RV1108_CLKSEL_CON(15), 12, 2, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 5, GFLAGS),
 
 	COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(16), 0,
-			RK1108_CLKGATE_CON(3), 2, GFLAGS,
-			&rk1108_uart0_fracmux),
+			RV1108_CLKSEL_CON(16), 0,
+			RV1108_CLKGATE_CON(3), 2, GFLAGS,
+			&rv1108_uart0_fracmux),
 	COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(17), 0,
-			RK1108_CLKGATE_CON(3), 4, GFLAGS,
-			&rk1108_uart1_fracmux),
+			RV1108_CLKSEL_CON(17), 0,
+			RV1108_CLKGATE_CON(3), 4, GFLAGS,
+			&rv1108_uart1_fracmux),
 	COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_src", CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(18), 0,
-			RK1108_CLKGATE_CON(3), 6, GFLAGS,
-			&rk1108_uart2_fracmux),
+			RV1108_CLKSEL_CON(18), 0,
+			RV1108_CLKGATE_CON(3), 6, GFLAGS,
+			&rv1108_uart2_fracmux),
 	GATE(PCLK_UART0, "pclk_uart0", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 10, GFLAGS),
+			RV1108_CLKGATE_CON(13), 10, GFLAGS),
 	GATE(PCLK_UART1, "pclk_uart1", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 11, GFLAGS),
+			RV1108_CLKGATE_CON(13), 11, GFLAGS),
 	GATE(PCLK_UART2, "pclk_uart2", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 12, GFLAGS),
+			RV1108_CLKGATE_CON(13), 12, GFLAGS),
 
 	COMPOSITE(0, "clk_i2c1", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(19), 15, 2, MFLAGS, 8, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 7, GFLAGS),
+			RV1108_CLKSEL_CON(19), 15, 2, MFLAGS, 8, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 7, GFLAGS),
 	COMPOSITE(0, "clk_i2c2", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(20), 7, 2, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 8, GFLAGS),
+			RV1108_CLKSEL_CON(20), 7, 2, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 8, GFLAGS),
 	COMPOSITE(0, "clk_i2c3", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(20), 15, 2, MFLAGS, 8, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 9, GFLAGS),
+			RV1108_CLKSEL_CON(20), 15, 2, MFLAGS, 8, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 9, GFLAGS),
 	GATE(0, "pclk_i2c1", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 0, GFLAGS),
+			RV1108_CLKGATE_CON(13), 0, GFLAGS),
 	GATE(0, "pclk_i2c2", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 1, GFLAGS),
+			RV1108_CLKGATE_CON(13), 1, GFLAGS),
 	GATE(0, "pclk_i2c3", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 2, GFLAGS),
+			RV1108_CLKGATE_CON(13), 2, GFLAGS),
 	COMPOSITE(0, "clk_pwm1", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(12), 15, 2, MFLAGS, 8, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 10, GFLAGS),
+			RV1108_CLKSEL_CON(12), 15, 2, MFLAGS, 8, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 10, GFLAGS),
 	GATE(0, "pclk_pwm1", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 6, GFLAGS),
+			RV1108_CLKGATE_CON(13), 6, GFLAGS),
 	GATE(0, "pclk_wdt", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 3, GFLAGS),
+			RV1108_CLKGATE_CON(13), 3, GFLAGS),
 	GATE(0, "pclk_gpio1", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 7, GFLAGS),
+			RV1108_CLKGATE_CON(13), 7, GFLAGS),
 	GATE(0, "pclk_gpio2", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 8, GFLAGS),
+			RV1108_CLKGATE_CON(13), 8, GFLAGS),
 	GATE(0, "pclk_gpio3", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 9, GFLAGS),
+			RV1108_CLKGATE_CON(13), 9, GFLAGS),
 
 	GATE(0, "pclk_grf", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(14), 0, GFLAGS),
+			RV1108_CLKGATE_CON(14), 0, GFLAGS),
 
 	GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_pre", 0,
-	     RK1108_CLKGATE_CON(12), 2, GFLAGS),
+	     RV1108_CLKGATE_CON(12), 2, GFLAGS),
 	GATE(0, "hclk_rom", "hclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(12), 3, GFLAGS),
+			RV1108_CLKGATE_CON(12), 3, GFLAGS),
 	GATE(0, "aclk_intmem", "aclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(12), 1, GFLAGS),
+			RV1108_CLKGATE_CON(12), 1, GFLAGS),
 
 	/* PD_DDR */
 	GATE(0, "apll_ddr", "apll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(0), 8, GFLAGS),
+			RV1108_CLKGATE_CON(0), 8, GFLAGS),
 	GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(0), 9, GFLAGS),
+			RV1108_CLKGATE_CON(0), 9, GFLAGS),
 	GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(0), 10, GFLAGS),
+			RV1108_CLKGATE_CON(0), 10, GFLAGS),
 	COMPOSITE(0, "ddrphy4x", mux_ddrphy_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(4), 8, 2, MFLAGS, 0, 3,
+			RV1108_CLKSEL_CON(4), 8, 2, MFLAGS, 0, 3,
 			DFLAGS | CLK_DIVIDER_POWER_OF_TWO,
-			RK1108_CLKGATE_CON(10), 9, GFLAGS),
+			RV1108_CLKGATE_CON(10), 9, GFLAGS),
 	GATE(0, "ddrupctl", "ddrphy_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(12), 4, GFLAGS),
+			RV1108_CLKGATE_CON(12), 4, GFLAGS),
 	GATE(0, "ddrc", "ddrphy", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(12), 5, GFLAGS),
+			RV1108_CLKGATE_CON(12), 5, GFLAGS),
 	GATE(0, "ddrmon", "ddrphy_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(12), 6, GFLAGS),
+			RV1108_CLKGATE_CON(12), 6, GFLAGS),
 	GATE(0, "timer_clk", "xin24m", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(0), 11, GFLAGS),
+			RV1108_CLKGATE_CON(0), 11, GFLAGS),
 
 	/*
 	 * Clock-Architecture Diagram 6
@@ -416,73 +416,73 @@ static struct rockchip_clk_branch rk1108_clk_branches[] __initdata = {
 
 	/* PD_PERI */
 	COMPOSITE_NOMUX(0, "pclk_periph_pre", "gpll", 0,
-			RK1108_CLKSEL_CON(23), 10, 5, DFLAGS,
-			RK1108_CLKGATE_CON(4), 5, GFLAGS),
+			RV1108_CLKSEL_CON(23), 10, 5, DFLAGS,
+			RV1108_CLKGATE_CON(4), 5, GFLAGS),
 	GATE(0, "pclk_periph", "pclk_periph_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(15), 13, GFLAGS),
+			RV1108_CLKGATE_CON(15), 13, GFLAGS),
 	COMPOSITE_NOMUX(0, "hclk_periph_pre", "gpll", 0,
-			RK1108_CLKSEL_CON(23), 5, 5, DFLAGS,
-			RK1108_CLKGATE_CON(4), 4, GFLAGS),
+			RV1108_CLKSEL_CON(23), 5, 5, DFLAGS,
+			RV1108_CLKGATE_CON(4), 4, GFLAGS),
 	GATE(0, "hclk_periph", "hclk_periph_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(15), 12, GFLAGS),
+			RV1108_CLKGATE_CON(15), 12, GFLAGS),
 
 	GATE(0, "aclk_peri_src_dpll", "dpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(4), 1, GFLAGS),
+			RV1108_CLKGATE_CON(4), 1, GFLAGS),
 	GATE(0, "aclk_peri_src_gpll", "gpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(4), 2, GFLAGS),
+			RV1108_CLKGATE_CON(4), 2, GFLAGS),
 	COMPOSITE(0, "aclk_periph", mux_aclk_peri_src_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(23), 15, 2, MFLAGS, 0, 5, DFLAGS,
-			RK1108_CLKGATE_CON(15), 11, GFLAGS),
+			RV1108_CLKSEL_CON(23), 15, 2, MFLAGS, 0, 5, DFLAGS,
+			RV1108_CLKGATE_CON(15), 11, GFLAGS),
 
 	COMPOSITE(SCLK_SDMMC, "sclk_sdmmc0", mux_mmc_src_p, 0,
-			RK1108_CLKSEL_CON(25), 8, 2, MFLAGS, 0, 8, DFLAGS,
-			RK1108_CLKGATE_CON(5), 0, GFLAGS),
+			RV1108_CLKSEL_CON(25), 8, 2, MFLAGS, 0, 8, DFLAGS,
+			RV1108_CLKGATE_CON(5), 0, GFLAGS),
 
 	COMPOSITE_NODIV(0, "sclk_sdio_src", mux_mmc_src_p, 0,
-			RK1108_CLKSEL_CON(25), 10, 2, MFLAGS,
-			RK1108_CLKGATE_CON(5), 2, GFLAGS),
+			RV1108_CLKSEL_CON(25), 10, 2, MFLAGS,
+			RV1108_CLKGATE_CON(5), 2, GFLAGS),
 	DIV(SCLK_SDIO, "sclk_sdio", "sclk_sdio_src", 0,
-			RK1108_CLKSEL_CON(26), 0, 8, DFLAGS),
+			RV1108_CLKSEL_CON(26), 0, 8, DFLAGS),
 
 	COMPOSITE_NODIV(0, "sclk_emmc_src", mux_mmc_src_p, 0,
-			RK1108_CLKSEL_CON(25), 12, 2, MFLAGS,
-			RK1108_CLKGATE_CON(5), 1, GFLAGS),
+			RV1108_CLKSEL_CON(25), 12, 2, MFLAGS,
+			RV1108_CLKGATE_CON(5), 1, GFLAGS),
 	DIV(SCLK_EMMC, "sclk_emmc", "sclk_emmc_src", 0,
 			RK2928_CLKSEL_CON(26), 8, 8, DFLAGS),
-	GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_periph", 0, RK1108_CLKGATE_CON(15), 0, GFLAGS),
-	GATE(HCLK_SDIO, "hclk_sdio", "hclk_periph", 0, RK1108_CLKGATE_CON(15), 1, GFLAGS),
-	GATE(HCLK_EMMC, "hclk_emmc", "hclk_periph", 0, RK1108_CLKGATE_CON(15), 2, GFLAGS),
+	GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 0, GFLAGS),
+	GATE(HCLK_SDIO, "hclk_sdio", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 1, GFLAGS),
+	GATE(HCLK_EMMC, "hclk_emmc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 2, GFLAGS),
 
 	COMPOSITE(SCLK_NANDC, "sclk_nandc", mux_pll_src_2plls_p, 0,
-			RK1108_CLKSEL_CON(27), 14, 2, MFLAGS, 8, 5, DFLAGS,
-			RK1108_CLKGATE_CON(5), 3, GFLAGS),
-	GATE(HCLK_NANDC, "hclk_nandc", "hclk_periph", 0, RK1108_CLKGATE_CON(15), 3, GFLAGS),
+			RV1108_CLKSEL_CON(27), 14, 2, MFLAGS, 8, 5, DFLAGS,
+			RV1108_CLKGATE_CON(5), 3, GFLAGS),
+	GATE(HCLK_NANDC, "hclk_nandc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 3, GFLAGS),
 
 	COMPOSITE(SCLK_SFC, "sclk_sfc", mux_pll_src_2plls_p, 0,
-			RK1108_CLKSEL_CON(27), 7, 2, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(5), 4, GFLAGS),
-	GATE(HCLK_SFC, "hclk_sfc", "hclk_periph", 0, RK1108_CLKGATE_CON(15), 10, GFLAGS),
+			RV1108_CLKSEL_CON(27), 7, 2, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(5), 4, GFLAGS),
+	GATE(HCLK_SFC, "hclk_sfc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 10, GFLAGS),
 
 	COMPOSITE(0, "sclk_macphy_pre", mux_pll_src_apll_gpll_p, 0,
-			RK1108_CLKSEL_CON(24), 12, 2, MFLAGS, 0, 5, DFLAGS,
-			RK1108_CLKGATE_CON(4), 10, GFLAGS),
+			RV1108_CLKSEL_CON(24), 12, 2, MFLAGS, 0, 5, DFLAGS,
+			RV1108_CLKGATE_CON(4), 10, GFLAGS),
 	MUX(0, "sclk_macphy", mux_sclk_macphy_p, CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(24), 8, 2, MFLAGS),
-	GATE(0, "sclk_macphy_rx", "sclk_macphy", 0, RK1108_CLKGATE_CON(4), 8, GFLAGS),
-	GATE(0, "sclk_mac_ref", "sclk_macphy", 0, RK1108_CLKGATE_CON(4), 6, GFLAGS),
-	GATE(0, "sclk_mac_refout", "sclk_macphy", 0, RK1108_CLKGATE_CON(4), 7, GFLAGS),
+			RV1108_CLKSEL_CON(24), 8, 2, MFLAGS),
+	GATE(0, "sclk_macphy_rx", "sclk_macphy", 0, RV1108_CLKGATE_CON(4), 8, GFLAGS),
+	GATE(0, "sclk_mac_ref", "sclk_macphy", 0, RV1108_CLKGATE_CON(4), 6, GFLAGS),
+	GATE(0, "sclk_mac_refout", "sclk_macphy", 0, RV1108_CLKGATE_CON(4), 7, GFLAGS),
 
-	MMC(SCLK_SDMMC_DRV,    "sdmmc_drv",    "sclk_sdmmc", RK1108_SDMMC_CON0, 1),
-	MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK1108_SDMMC_CON1, 1),
+	MMC(SCLK_SDMMC_DRV,    "sdmmc_drv",    "sclk_sdmmc", RV1108_SDMMC_CON0, 1),
+	MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RV1108_SDMMC_CON1, 1),
 
-	MMC(SCLK_SDIO_DRV,     "sdio_drv",     "sclk_sdio",  RK1108_SDIO_CON0,  1),
-	MMC(SCLK_SDIO_SAMPLE,  "sdio_sample",  "sclk_sdio",  RK1108_SDIO_CON1,  1),
+	MMC(SCLK_SDIO_DRV,     "sdio_drv",     "sclk_sdio",  RV1108_SDIO_CON0,  1),
+	MMC(SCLK_SDIO_SAMPLE,  "sdio_sample",  "sclk_sdio",  RV1108_SDIO_CON1,  1),
 
-	MMC(SCLK_EMMC_DRV,     "emmc_drv",     "sclk_emmc",  RK1108_EMMC_CON0,  1),
-	MMC(SCLK_EMMC_SAMPLE,  "emmc_sample",  "sclk_emmc",  RK1108_EMMC_CON1,  1),
+	MMC(SCLK_EMMC_DRV,     "emmc_drv",     "sclk_emmc",  RV1108_EMMC_CON0,  1),
+	MMC(SCLK_EMMC_SAMPLE,  "emmc_sample",  "sclk_emmc",  RV1108_EMMC_CON1,  1),
 };
 
-static const char *const rk1108_critical_clocks[] __initconst = {
+static const char *const rv1108_critical_clocks[] __initconst = {
 	"aclk_core",
 	"aclk_bus_src_gpll",
 	"aclk_periph",
@@ -490,7 +490,7 @@ static const char *const rk1108_critical_clocks[] __initconst = {
 	"pclk_periph",
 };
 
-static void __init rk1108_clk_init(struct device_node *np)
+static void __init rv1108_clk_init(struct device_node *np)
 {
 	struct rockchip_clk_provider *ctx;
 	void __iomem *reg_base;
@@ -508,24 +508,24 @@ static void __init rk1108_clk_init(struct device_node *np)
 		return;
 	}
 
-	rockchip_clk_register_plls(ctx, rk1108_pll_clks,
-				   ARRAY_SIZE(rk1108_pll_clks),
-				   RK1108_GRF_SOC_STATUS0);
-	rockchip_clk_register_branches(ctx, rk1108_clk_branches,
-				  ARRAY_SIZE(rk1108_clk_branches));
-	rockchip_clk_protect_critical(rk1108_critical_clocks,
-				      ARRAY_SIZE(rk1108_critical_clocks));
+	rockchip_clk_register_plls(ctx, rv1108_pll_clks,
+				   ARRAY_SIZE(rv1108_pll_clks),
+				   RV1108_GRF_SOC_STATUS0);
+	rockchip_clk_register_branches(ctx, rv1108_clk_branches,
+				  ARRAY_SIZE(rv1108_clk_branches));
+	rockchip_clk_protect_critical(rv1108_critical_clocks,
+				      ARRAY_SIZE(rv1108_critical_clocks));
 
 	rockchip_clk_register_armclk(ctx, ARMCLK, "armclk",
 			mux_armclk_p, ARRAY_SIZE(mux_armclk_p),
-			&rk1108_cpuclk_data, rk1108_cpuclk_rates,
-			ARRAY_SIZE(rk1108_cpuclk_rates));
+			&rv1108_cpuclk_data, rv1108_cpuclk_rates,
+			ARRAY_SIZE(rv1108_cpuclk_rates));
 
-	rockchip_register_softrst(np, 13, reg_base + RK1108_SOFTRST_CON(0),
+	rockchip_register_softrst(np, 13, reg_base + RV1108_SOFTRST_CON(0),
 				  ROCKCHIP_SOFTRST_HIWORD_MASK);
 
-	rockchip_register_restart_notifier(ctx, RK1108_GLB_SRST_FST, NULL);
+	rockchip_register_restart_notifier(ctx, RV1108_GLB_SRST_FST, NULL);
 
 	rockchip_clk_of_add_provider(np, ctx);
 }
-CLK_OF_DECLARE(rk1108_cru, "rockchip,rk1108-cru", rk1108_clk_init);
+CLK_OF_DECLARE(rv1108_cru, "rockchip,rv1108-cru", rv1108_clk_init);
diff --git a/drivers/clk/rockchip/clk.h b/drivers/clk/rockchip/clk.h
index 7c15473ea72b..ef601dded32c 100644
--- a/drivers/clk/rockchip/clk.h
+++ b/drivers/clk/rockchip/clk.h
@@ -34,20 +34,20 @@ struct clk;
 #define HIWORD_UPDATE(val, mask, shift) \
 		((val) << (shift) | (mask) << ((shift) + 16))
 
-/* register positions shared by RK1108, RK2928, RK3036, RK3066, RK3188 and RK3228 */
-#define RK1108_PLL_CON(x)		((x) * 0x4)
-#define RK1108_CLKSEL_CON(x)		((x) * 0x4 + 0x60)
-#define RK1108_CLKGATE_CON(x)		((x) * 0x4 + 0x120)
-#define RK1108_SOFTRST_CON(x)		((x) * 0x4 + 0x180)
-#define RK1108_GLB_SRST_FST		0x1c0
-#define RK1108_GLB_SRST_SND		0x1c4
-#define RK1108_MISC_CON			0x1cc
-#define RK1108_SDMMC_CON0		0x1d8
-#define RK1108_SDMMC_CON1		0x1dc
-#define RK1108_SDIO_CON0		0x1e0
-#define RK1108_SDIO_CON1		0x1e4
-#define RK1108_EMMC_CON0		0x1e8
-#define RK1108_EMMC_CON1		0x1ec
+/* register positions shared by RV1108, RK2928, RK3036, RK3066, RK3188 and RK3228 */
+#define RV1108_PLL_CON(x)		((x) * 0x4)
+#define RV1108_CLKSEL_CON(x)		((x) * 0x4 + 0x60)
+#define RV1108_CLKGATE_CON(x)		((x) * 0x4 + 0x120)
+#define RV1108_SOFTRST_CON(x)		((x) * 0x4 + 0x180)
+#define RV1108_GLB_SRST_FST		0x1c0
+#define RV1108_GLB_SRST_SND		0x1c4
+#define RV1108_MISC_CON			0x1cc
+#define RV1108_SDMMC_CON0		0x1d8
+#define RV1108_SDMMC_CON1		0x1dc
+#define RV1108_SDIO_CON0		0x1e0
+#define RV1108_SDIO_CON1		0x1e4
+#define RV1108_EMMC_CON0		0x1e8
+#define RV1108_EMMC_CON1		0x1ec
 
 #define RK2928_PLL_CON(x)		((x) * 0x4)
 #define RK2928_MODE_CON		0x40
diff --git a/drivers/clk/spear/spear6xx_clock.c b/drivers/clk/spear/spear6xx_clock.c
index 7c9383c3c2c6..f911d9f77763 100644
--- a/drivers/clk/spear/spear6xx_clock.c
+++ b/drivers/clk/spear/spear6xx_clock.c
@@ -313,7 +313,7 @@ void __init spear6xx_clk_init(void __iomem *misc_base)
 	/* clock derived from apb clk */
 	clk = clk_register_gate(NULL, "adc_clk", "apb_clk", 0, PERIP1_CLK_ENB,
 			ADC_CLK_ENB, 0, &_lock);
-	clk_register_clkdev(clk, NULL, "adc");
+	clk_register_clkdev(clk, NULL, "d820b000.adc");
 
 	clk = clk_register_fixed_factor(NULL, "gpio0_clk", "apb_clk", 0, 1, 1);
 	clk_register_clkdev(clk, NULL, "f0100000.gpio");
diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig
index a077ab6edffa..eb89c7801f00 100644
--- a/drivers/clk/sunxi-ng/Kconfig
+++ b/drivers/clk/sunxi-ng/Kconfig
@@ -64,6 +64,7 @@ config SUN50I_A64_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default ARM64 && ARCH_SUNXI
+	depends on (ARM64 && ARCH_SUNXI) || COMPILE_TEST
 
 config SUN5I_CCU
 	bool "Support for the Allwinner sun5i family CCM"
@@ -75,6 +76,7 @@ config SUN5I_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default MACH_SUN5I
+	depends on MACH_SUN5I || COMPILE_TEST
 
 config SUN6I_A31_CCU
 	bool "Support for the Allwinner A31/A31s CCU"
@@ -86,6 +88,7 @@ config SUN6I_A31_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default MACH_SUN6I
+	depends on MACH_SUN6I || COMPILE_TEST
 
 config SUN8I_A23_CCU
 	bool "Support for the Allwinner A23 CCU"
@@ -98,6 +101,7 @@ config SUN8I_A23_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default MACH_SUN8I
+	depends on MACH_SUN8I || COMPILE_TEST
 
 config SUN8I_A33_CCU
 	bool "Support for the Allwinner A33 CCU"
@@ -110,6 +114,7 @@ config SUN8I_A33_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default MACH_SUN8I
+	depends on MACH_SUN8I || COMPILE_TEST
 
 config SUN8I_H3_CCU
 	bool "Support for the Allwinner H3 CCU"
@@ -120,7 +125,8 @@ config SUN8I_H3_CCU
 	select SUNXI_CCU_NM
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
-	default MACH_SUN8I
+	default MACH_SUN8I || (ARM64 && ARCH_SUNXI)
+	depends on MACH_SUN8I || (ARM64 && ARCH_SUNXI) || COMPILE_TEST
 
 config SUN8I_V3S_CCU
 	bool "Support for the Allwinner V3s CCU"
@@ -132,6 +138,7 @@ config SUN8I_V3S_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default MACH_SUN8I
+	depends on MACH_SUN8I || COMPILE_TEST
 
 config SUN9I_A80_CCU
 	bool "Support for the Allwinner A80 CCU"
@@ -143,5 +150,13 @@ config SUN9I_A80_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default MACH_SUN9I
+	depends on MACH_SUN9I || COMPILE_TEST
+
+config SUN8I_R_CCU
+	bool "Support for Allwinner SoCs' PRCM CCUs"
+	select SUNXI_CCU_DIV
+	select SUNXI_CCU_GATE
+	select SUNXI_CCU_MP
+	default MACH_SUN8I || (ARCH_SUNXI && ARM64)
 
 endif
diff --git a/drivers/clk/sunxi-ng/Makefile b/drivers/clk/sunxi-ng/Makefile
index 6feaac0c5600..0ec02fe14c50 100644
--- a/drivers/clk/sunxi-ng/Makefile
+++ b/drivers/clk/sunxi-ng/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SUN8I_A23_CCU)	+= ccu-sun8i-a23.o
 obj-$(CONFIG_SUN8I_A33_CCU)	+= ccu-sun8i-a33.o
 obj-$(CONFIG_SUN8I_H3_CCU)	+= ccu-sun8i-h3.o
 obj-$(CONFIG_SUN8I_V3S_CCU)	+= ccu-sun8i-v3s.o
+obj-$(CONFIG_SUN8I_R_CCU)	+= ccu-sun8i-r.o
 obj-$(CONFIG_SUN9I_A80_CCU)	+= ccu-sun9i-a80.o
 obj-$(CONFIG_SUN9I_A80_CCU)	+= ccu-sun9i-a80-de.o
 obj-$(CONFIG_SUN9I_A80_CCU)	+= ccu-sun9i-a80-usb.o
diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.h b/drivers/clk/sunxi-ng/ccu-sun50i-a64.h
index 9b3cd24b78d2..061b6fbb4f95 100644
--- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.h
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.h
@@ -31,7 +31,9 @@
 #define CLK_PLL_VIDEO0_2X		8
 #define CLK_PLL_VE			9
 #define CLK_PLL_DDR0			10
-#define CLK_PLL_PERIPH0			11
+
+/* PLL_PERIPH0 exported for PRCM */
+
 #define CLK_PLL_PERIPH0_2X		12
 #define CLK_PLL_PERIPH1			13
 #define CLK_PLL_PERIPH1_2X		14
diff --git a/drivers/clk/sunxi-ng/ccu-sun5i.c b/drivers/clk/sunxi-ng/ccu-sun5i.c
index 06edaa523479..5372bf8be5e6 100644
--- a/drivers/clk/sunxi-ng/ccu-sun5i.c
+++ b/drivers/clk/sunxi-ng/ccu-sun5i.c
@@ -243,7 +243,7 @@ static SUNXI_CCU_GATE(ahb_ss_clk,	"ahb-ss",	"ahb",
 static SUNXI_CCU_GATE(ahb_dma_clk,	"ahb-dma",	"ahb",
 		      0x060, BIT(6), 0);
 static SUNXI_CCU_GATE(ahb_bist_clk,	"ahb-bist",	"ahb",
-		      0x060, BIT(6), 0);
+		      0x060, BIT(7), 0);
 static SUNXI_CCU_GATE(ahb_mmc0_clk,	"ahb-mmc0",	"ahb",
 		      0x060, BIT(8), 0);
 static SUNXI_CCU_GATE(ahb_mmc1_clk,	"ahb-mmc1",	"ahb",
@@ -469,7 +469,7 @@ static const char * const csi_parents[] = { "hosc", "pll-video0", "pll-video1",
 static const u8 csi_table[] = { 0, 1, 2, 5, 6 };
 static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(csi_clk, "csi",
 				       csi_parents, csi_table,
-				       0x134, 0, 5, 24, 2, BIT(31), 0);
+				       0x134, 0, 5, 24, 3, BIT(31), 0);
 
 static SUNXI_CCU_GATE(ve_clk,		"ve",		"pll-ve",
 		      0x13c, BIT(31), CLK_SET_RATE_PARENT);
diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
index 89e68d29bf45..df97e25aec76 100644
--- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
+++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
@@ -556,7 +556,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(lcd0_ch1_clk, "lcd0-ch1", lcd_ch1_parents,
 				 0x12c, 0, 4, 24, 3, BIT(31),
 				 CLK_SET_RATE_PARENT);
 static SUNXI_CCU_M_WITH_MUX_GATE(lcd1_ch1_clk, "lcd1-ch1", lcd_ch1_parents,
-				 0x12c, 0, 4, 24, 3, BIT(31),
+				 0x130, 0, 4, 24, 3, BIT(31),
 				 CLK_SET_RATE_PARENT);
 
 static const char * const csi_sclk_parents[] = { "pll-video0", "pll-video1",
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
index 2c69b631967a..8d38e6510e29 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
@@ -159,13 +159,17 @@ static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_de_clk, "pll-de",
 					BIT(28),	/* lock */
 					CLK_SET_RATE_UNGATE);
 
-/* TODO: Fix N */
-static SUNXI_CCU_N_WITH_GATE_LOCK(pll_ddr1_clk, "pll-ddr1",
-				  "osc24M", 0x04c,
-				  8, 6,			/* N */
-				  BIT(31),		/* gate */
-				  BIT(28),		/* lock */
-				  CLK_SET_RATE_UNGATE);
+static struct ccu_mult pll_ddr1_clk = {
+	.enable	= BIT(31),
+	.lock	= BIT(28),
+	.mult	= _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 6, 0, 12, 0),
+	.common	= {
+		.reg		= 0x04c,
+		.hw.init	= CLK_HW_INIT("pll-ddr1", "osc24M",
+					      &ccu_mult_ops,
+					      CLK_SET_RATE_UNGATE),
+	},
+};
 
 static const char * const cpux_parents[] = { "osc32k", "osc24M",
 					     "pll-cpux" , "pll-cpux" };
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
index a26c8a19fe93..4cbc1b701b7c 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
@@ -300,8 +300,10 @@ static SUNXI_CCU_GATE(bus_uart2_clk,	"bus-uart2",	"apb2",
 		      0x06c, BIT(18), 0);
 static SUNXI_CCU_GATE(bus_uart3_clk,	"bus-uart3",	"apb2",
 		      0x06c, BIT(19), 0);
-static SUNXI_CCU_GATE(bus_scr_clk,	"bus-scr",	"apb2",
+static SUNXI_CCU_GATE(bus_scr0_clk,	"bus-scr0",	"apb2",
 		      0x06c, BIT(20), 0);
+static SUNXI_CCU_GATE(bus_scr1_clk,	"bus-scr1",	"apb2",
+		      0x06c, BIT(21), 0);
 
 static SUNXI_CCU_GATE(bus_ephy_clk,	"bus-ephy",	"ahb1",
 		      0x070, BIT(0), 0);
@@ -546,7 +548,7 @@ static struct ccu_common *sun8i_h3_ccu_clks[] = {
 	&bus_uart1_clk.common,
 	&bus_uart2_clk.common,
 	&bus_uart3_clk.common,
-	&bus_scr_clk.common,
+	&bus_scr0_clk.common,
 	&bus_ephy_clk.common,
 	&bus_dbg_clk.common,
 	&ths_clk.common,
@@ -597,6 +599,114 @@ static struct ccu_common *sun8i_h3_ccu_clks[] = {
 	&gpu_clk.common,
 };
 
+static struct ccu_common *sun50i_h5_ccu_clks[] = {
+	&pll_cpux_clk.common,
+	&pll_audio_base_clk.common,
+	&pll_video_clk.common,
+	&pll_ve_clk.common,
+	&pll_ddr_clk.common,
+	&pll_periph0_clk.common,
+	&pll_gpu_clk.common,
+	&pll_periph1_clk.common,
+	&pll_de_clk.common,
+	&cpux_clk.common,
+	&axi_clk.common,
+	&ahb1_clk.common,
+	&apb1_clk.common,
+	&apb2_clk.common,
+	&ahb2_clk.common,
+	&bus_ce_clk.common,
+	&bus_dma_clk.common,
+	&bus_mmc0_clk.common,
+	&bus_mmc1_clk.common,
+	&bus_mmc2_clk.common,
+	&bus_nand_clk.common,
+	&bus_dram_clk.common,
+	&bus_emac_clk.common,
+	&bus_ts_clk.common,
+	&bus_hstimer_clk.common,
+	&bus_spi0_clk.common,
+	&bus_spi1_clk.common,
+	&bus_otg_clk.common,
+	&bus_ehci0_clk.common,
+	&bus_ehci1_clk.common,
+	&bus_ehci2_clk.common,
+	&bus_ehci3_clk.common,
+	&bus_ohci0_clk.common,
+	&bus_ohci1_clk.common,
+	&bus_ohci2_clk.common,
+	&bus_ohci3_clk.common,
+	&bus_ve_clk.common,
+	&bus_tcon0_clk.common,
+	&bus_tcon1_clk.common,
+	&bus_deinterlace_clk.common,
+	&bus_csi_clk.common,
+	&bus_tve_clk.common,
+	&bus_hdmi_clk.common,
+	&bus_de_clk.common,
+	&bus_gpu_clk.common,
+	&bus_msgbox_clk.common,
+	&bus_spinlock_clk.common,
+	&bus_codec_clk.common,
+	&bus_spdif_clk.common,
+	&bus_pio_clk.common,
+	&bus_ths_clk.common,
+	&bus_i2s0_clk.common,
+	&bus_i2s1_clk.common,
+	&bus_i2s2_clk.common,
+	&bus_i2c0_clk.common,
+	&bus_i2c1_clk.common,
+	&bus_i2c2_clk.common,
+	&bus_uart0_clk.common,
+	&bus_uart1_clk.common,
+	&bus_uart2_clk.common,
+	&bus_uart3_clk.common,
+	&bus_scr0_clk.common,
+	&bus_scr1_clk.common,
+	&bus_ephy_clk.common,
+	&bus_dbg_clk.common,
+	&ths_clk.common,
+	&nand_clk.common,
+	&mmc0_clk.common,
+	&mmc1_clk.common,
+	&mmc2_clk.common,
+	&ts_clk.common,
+	&ce_clk.common,
+	&spi0_clk.common,
+	&spi1_clk.common,
+	&i2s0_clk.common,
+	&i2s1_clk.common,
+	&i2s2_clk.common,
+	&spdif_clk.common,
+	&usb_phy0_clk.common,
+	&usb_phy1_clk.common,
+	&usb_phy2_clk.common,
+	&usb_phy3_clk.common,
+	&usb_ohci0_clk.common,
+	&usb_ohci1_clk.common,
+	&usb_ohci2_clk.common,
+	&usb_ohci3_clk.common,
+	&dram_clk.common,
+	&dram_ve_clk.common,
+	&dram_csi_clk.common,
+	&dram_deinterlace_clk.common,
+	&dram_ts_clk.common,
+	&de_clk.common,
+	&tcon_clk.common,
+	&tve_clk.common,
+	&deinterlace_clk.common,
+	&csi_misc_clk.common,
+	&csi_sclk_clk.common,
+	&csi_mclk_clk.common,
+	&ve_clk.common,
+	&ac_dig_clk.common,
+	&avs_clk.common,
+	&hdmi_clk.common,
+	&hdmi_ddc_clk.common,
+	&mbus_clk.common,
+	&gpu_clk.common,
+};
+
 /* We hardcode the divider to 4 for now */
 static CLK_FIXED_FACTOR(pll_audio_clk, "pll-audio",
 			"pll-audio-base", 4, 1, CLK_SET_RATE_PARENT);
@@ -677,7 +787,7 @@ static struct clk_hw_onecell_data sun8i_h3_hw_clks = {
 		[CLK_BUS_UART1]		= &bus_uart1_clk.common.hw,
 		[CLK_BUS_UART2]		= &bus_uart2_clk.common.hw,
 		[CLK_BUS_UART3]		= &bus_uart3_clk.common.hw,
-		[CLK_BUS_SCR]		= &bus_scr_clk.common.hw,
+		[CLK_BUS_SCR0]		= &bus_scr0_clk.common.hw,
 		[CLK_BUS_EPHY]		= &bus_ephy_clk.common.hw,
 		[CLK_BUS_DBG]		= &bus_dbg_clk.common.hw,
 		[CLK_THS]		= &ths_clk.common.hw,
@@ -727,7 +837,123 @@ static struct clk_hw_onecell_data sun8i_h3_hw_clks = {
 		[CLK_MBUS]		= &mbus_clk.common.hw,
 		[CLK_GPU]		= &gpu_clk.common.hw,
 	},
-	.num	= CLK_NUMBER,
+	.num	= CLK_NUMBER_H3,
+};
+
+static struct clk_hw_onecell_data sun50i_h5_hw_clks = {
+	.hws	= {
+		[CLK_PLL_CPUX]		= &pll_cpux_clk.common.hw,
+		[CLK_PLL_AUDIO_BASE]	= &pll_audio_base_clk.common.hw,
+		[CLK_PLL_AUDIO]		= &pll_audio_clk.hw,
+		[CLK_PLL_AUDIO_2X]	= &pll_audio_2x_clk.hw,
+		[CLK_PLL_AUDIO_4X]	= &pll_audio_4x_clk.hw,
+		[CLK_PLL_AUDIO_8X]	= &pll_audio_8x_clk.hw,
+		[CLK_PLL_VIDEO]		= &pll_video_clk.common.hw,
+		[CLK_PLL_VE]		= &pll_ve_clk.common.hw,
+		[CLK_PLL_DDR]		= &pll_ddr_clk.common.hw,
+		[CLK_PLL_PERIPH0]	= &pll_periph0_clk.common.hw,
+		[CLK_PLL_PERIPH0_2X]	= &pll_periph0_2x_clk.hw,
+		[CLK_PLL_GPU]		= &pll_gpu_clk.common.hw,
+		[CLK_PLL_PERIPH1]	= &pll_periph1_clk.common.hw,
+		[CLK_PLL_DE]		= &pll_de_clk.common.hw,
+		[CLK_CPUX]		= &cpux_clk.common.hw,
+		[CLK_AXI]		= &axi_clk.common.hw,
+		[CLK_AHB1]		= &ahb1_clk.common.hw,
+		[CLK_APB1]		= &apb1_clk.common.hw,
+		[CLK_APB2]		= &apb2_clk.common.hw,
+		[CLK_AHB2]		= &ahb2_clk.common.hw,
+		[CLK_BUS_CE]		= &bus_ce_clk.common.hw,
+		[CLK_BUS_DMA]		= &bus_dma_clk.common.hw,
+		[CLK_BUS_MMC0]		= &bus_mmc0_clk.common.hw,
+		[CLK_BUS_MMC1]		= &bus_mmc1_clk.common.hw,
+		[CLK_BUS_MMC2]		= &bus_mmc2_clk.common.hw,
+		[CLK_BUS_NAND]		= &bus_nand_clk.common.hw,
+		[CLK_BUS_DRAM]		= &bus_dram_clk.common.hw,
+		[CLK_BUS_EMAC]		= &bus_emac_clk.common.hw,
+		[CLK_BUS_TS]		= &bus_ts_clk.common.hw,
+		[CLK_BUS_HSTIMER]	= &bus_hstimer_clk.common.hw,
+		[CLK_BUS_SPI0]		= &bus_spi0_clk.common.hw,
+		[CLK_BUS_SPI1]		= &bus_spi1_clk.common.hw,
+		[CLK_BUS_OTG]		= &bus_otg_clk.common.hw,
+		[CLK_BUS_EHCI0]		= &bus_ehci0_clk.common.hw,
+		[CLK_BUS_EHCI1]		= &bus_ehci1_clk.common.hw,
+		[CLK_BUS_EHCI2]		= &bus_ehci2_clk.common.hw,
+		[CLK_BUS_EHCI3]		= &bus_ehci3_clk.common.hw,
+		[CLK_BUS_OHCI0]		= &bus_ohci0_clk.common.hw,
+		[CLK_BUS_OHCI1]		= &bus_ohci1_clk.common.hw,
+		[CLK_BUS_OHCI2]		= &bus_ohci2_clk.common.hw,
+		[CLK_BUS_OHCI3]		= &bus_ohci3_clk.common.hw,
+		[CLK_BUS_VE]		= &bus_ve_clk.common.hw,
+		[CLK_BUS_TCON0]		= &bus_tcon0_clk.common.hw,
+		[CLK_BUS_TCON1]		= &bus_tcon1_clk.common.hw,
+		[CLK_BUS_DEINTERLACE]	= &bus_deinterlace_clk.common.hw,
+		[CLK_BUS_CSI]		= &bus_csi_clk.common.hw,
+		[CLK_BUS_TVE]		= &bus_tve_clk.common.hw,
+		[CLK_BUS_HDMI]		= &bus_hdmi_clk.common.hw,
+		[CLK_BUS_DE]		= &bus_de_clk.common.hw,
+		[CLK_BUS_GPU]		= &bus_gpu_clk.common.hw,
+		[CLK_BUS_MSGBOX]	= &bus_msgbox_clk.common.hw,
+		[CLK_BUS_SPINLOCK]	= &bus_spinlock_clk.common.hw,
+		[CLK_BUS_CODEC]		= &bus_codec_clk.common.hw,
+		[CLK_BUS_SPDIF]		= &bus_spdif_clk.common.hw,
+		[CLK_BUS_PIO]		= &bus_pio_clk.common.hw,
+		[CLK_BUS_THS]		= &bus_ths_clk.common.hw,
+		[CLK_BUS_I2S0]		= &bus_i2s0_clk.common.hw,
+		[CLK_BUS_I2S1]		= &bus_i2s1_clk.common.hw,
+		[CLK_BUS_I2S2]		= &bus_i2s2_clk.common.hw,
+		[CLK_BUS_I2C0]		= &bus_i2c0_clk.common.hw,
+		[CLK_BUS_I2C1]		= &bus_i2c1_clk.common.hw,
+		[CLK_BUS_I2C2]		= &bus_i2c2_clk.common.hw,
+		[CLK_BUS_UART0]		= &bus_uart0_clk.common.hw,
+		[CLK_BUS_UART1]		= &bus_uart1_clk.common.hw,
+		[CLK_BUS_UART2]		= &bus_uart2_clk.common.hw,
+		[CLK_BUS_UART3]		= &bus_uart3_clk.common.hw,
+		[CLK_BUS_SCR0]		= &bus_scr0_clk.common.hw,
+		[CLK_BUS_SCR1]		= &bus_scr1_clk.common.hw,
+		[CLK_BUS_EPHY]		= &bus_ephy_clk.common.hw,
+		[CLK_BUS_DBG]		= &bus_dbg_clk.common.hw,
+		[CLK_THS]		= &ths_clk.common.hw,
+		[CLK_NAND]		= &nand_clk.common.hw,
+		[CLK_MMC0]		= &mmc0_clk.common.hw,
+		[CLK_MMC1]		= &mmc1_clk.common.hw,
+		[CLK_MMC2]		= &mmc2_clk.common.hw,
+		[CLK_TS]		= &ts_clk.common.hw,
+		[CLK_CE]		= &ce_clk.common.hw,
+		[CLK_SPI0]		= &spi0_clk.common.hw,
+		[CLK_SPI1]		= &spi1_clk.common.hw,
+		[CLK_I2S0]		= &i2s0_clk.common.hw,
+		[CLK_I2S1]		= &i2s1_clk.common.hw,
+		[CLK_I2S2]		= &i2s2_clk.common.hw,
+		[CLK_SPDIF]		= &spdif_clk.common.hw,
+		[CLK_USB_PHY0]		= &usb_phy0_clk.common.hw,
+		[CLK_USB_PHY1]		= &usb_phy1_clk.common.hw,
+		[CLK_USB_PHY2]		= &usb_phy2_clk.common.hw,
+		[CLK_USB_PHY3]		= &usb_phy3_clk.common.hw,
+		[CLK_USB_OHCI0]		= &usb_ohci0_clk.common.hw,
+		[CLK_USB_OHCI1]		= &usb_ohci1_clk.common.hw,
+		[CLK_USB_OHCI2]		= &usb_ohci2_clk.common.hw,
+		[CLK_USB_OHCI3]		= &usb_ohci3_clk.common.hw,
+		[CLK_DRAM]		= &dram_clk.common.hw,
+		[CLK_DRAM_VE]		= &dram_ve_clk.common.hw,
+		[CLK_DRAM_CSI]		= &dram_csi_clk.common.hw,
+		[CLK_DRAM_DEINTERLACE]	= &dram_deinterlace_clk.common.hw,
+		[CLK_DRAM_TS]		= &dram_ts_clk.common.hw,
+		[CLK_DE]		= &de_clk.common.hw,
+		[CLK_TCON0]		= &tcon_clk.common.hw,
+		[CLK_TVE]		= &tve_clk.common.hw,
+		[CLK_DEINTERLACE]	= &deinterlace_clk.common.hw,
+		[CLK_CSI_MISC]		= &csi_misc_clk.common.hw,
+		[CLK_CSI_SCLK]		= &csi_sclk_clk.common.hw,
+		[CLK_CSI_MCLK]		= &csi_mclk_clk.common.hw,
+		[CLK_VE]		= &ve_clk.common.hw,
+		[CLK_AC_DIG]		= &ac_dig_clk.common.hw,
+		[CLK_AVS]		= &avs_clk.common.hw,
+		[CLK_HDMI]		= &hdmi_clk.common.hw,
+		[CLK_HDMI_DDC]		= &hdmi_ddc_clk.common.hw,
+		[CLK_MBUS]		= &mbus_clk.common.hw,
+		[CLK_GPU]		= &gpu_clk.common.hw,
+	},
+	.num	= CLK_NUMBER_H5,
 };
 
 static struct ccu_reset_map sun8i_h3_ccu_resets[] = {
@@ -790,7 +1016,71 @@ static struct ccu_reset_map sun8i_h3_ccu_resets[] = {
 	[RST_BUS_UART1]		=  { 0x2d8, BIT(17) },
 	[RST_BUS_UART2]		=  { 0x2d8, BIT(18) },
 	[RST_BUS_UART3]		=  { 0x2d8, BIT(19) },
-	[RST_BUS_SCR]		=  { 0x2d8, BIT(20) },
+	[RST_BUS_SCR0]		=  { 0x2d8, BIT(20) },
+};
+
+static struct ccu_reset_map sun50i_h5_ccu_resets[] = {
+	[RST_USB_PHY0]		=  { 0x0cc, BIT(0) },
+	[RST_USB_PHY1]		=  { 0x0cc, BIT(1) },
+	[RST_USB_PHY2]		=  { 0x0cc, BIT(2) },
+	[RST_USB_PHY3]		=  { 0x0cc, BIT(3) },
+
+	[RST_MBUS]		=  { 0x0fc, BIT(31) },
+
+	[RST_BUS_CE]		=  { 0x2c0, BIT(5) },
+	[RST_BUS_DMA]		=  { 0x2c0, BIT(6) },
+	[RST_BUS_MMC0]		=  { 0x2c0, BIT(8) },
+	[RST_BUS_MMC1]		=  { 0x2c0, BIT(9) },
+	[RST_BUS_MMC2]		=  { 0x2c0, BIT(10) },
+	[RST_BUS_NAND]		=  { 0x2c0, BIT(13) },
+	[RST_BUS_DRAM]		=  { 0x2c0, BIT(14) },
+	[RST_BUS_EMAC]		=  { 0x2c0, BIT(17) },
+	[RST_BUS_TS]		=  { 0x2c0, BIT(18) },
+	[RST_BUS_HSTIMER]	=  { 0x2c0, BIT(19) },
+	[RST_BUS_SPI0]		=  { 0x2c0, BIT(20) },
+	[RST_BUS_SPI1]		=  { 0x2c0, BIT(21) },
+	[RST_BUS_OTG]		=  { 0x2c0, BIT(23) },
+	[RST_BUS_EHCI0]		=  { 0x2c0, BIT(24) },
+	[RST_BUS_EHCI1]		=  { 0x2c0, BIT(25) },
+	[RST_BUS_EHCI2]		=  { 0x2c0, BIT(26) },
+	[RST_BUS_EHCI3]		=  { 0x2c0, BIT(27) },
+	[RST_BUS_OHCI0]		=  { 0x2c0, BIT(28) },
+	[RST_BUS_OHCI1]		=  { 0x2c0, BIT(29) },
+	[RST_BUS_OHCI2]		=  { 0x2c0, BIT(30) },
+	[RST_BUS_OHCI3]		=  { 0x2c0, BIT(31) },
+
+	[RST_BUS_VE]		=  { 0x2c4, BIT(0) },
+	[RST_BUS_TCON0]		=  { 0x2c4, BIT(3) },
+	[RST_BUS_TCON1]		=  { 0x2c4, BIT(4) },
+	[RST_BUS_DEINTERLACE]	=  { 0x2c4, BIT(5) },
+	[RST_BUS_CSI]		=  { 0x2c4, BIT(8) },
+	[RST_BUS_TVE]		=  { 0x2c4, BIT(9) },
+	[RST_BUS_HDMI0]		=  { 0x2c4, BIT(10) },
+	[RST_BUS_HDMI1]		=  { 0x2c4, BIT(11) },
+	[RST_BUS_DE]		=  { 0x2c4, BIT(12) },
+	[RST_BUS_GPU]		=  { 0x2c4, BIT(20) },
+	[RST_BUS_MSGBOX]	=  { 0x2c4, BIT(21) },
+	[RST_BUS_SPINLOCK]	=  { 0x2c4, BIT(22) },
+	[RST_BUS_DBG]		=  { 0x2c4, BIT(31) },
+
+	[RST_BUS_EPHY]		=  { 0x2c8, BIT(2) },
+
+	[RST_BUS_CODEC]		=  { 0x2d0, BIT(0) },
+	[RST_BUS_SPDIF]		=  { 0x2d0, BIT(1) },
+	[RST_BUS_THS]		=  { 0x2d0, BIT(8) },
+	[RST_BUS_I2S0]		=  { 0x2d0, BIT(12) },
+	[RST_BUS_I2S1]		=  { 0x2d0, BIT(13) },
+	[RST_BUS_I2S2]		=  { 0x2d0, BIT(14) },
+
+	[RST_BUS_I2C0]		=  { 0x2d8, BIT(0) },
+	[RST_BUS_I2C1]		=  { 0x2d8, BIT(1) },
+	[RST_BUS_I2C2]		=  { 0x2d8, BIT(2) },
+	[RST_BUS_UART0]		=  { 0x2d8, BIT(16) },
+	[RST_BUS_UART1]		=  { 0x2d8, BIT(17) },
+	[RST_BUS_UART2]		=  { 0x2d8, BIT(18) },
+	[RST_BUS_UART3]		=  { 0x2d8, BIT(19) },
+	[RST_BUS_SCR0]		=  { 0x2d8, BIT(20) },
+	[RST_BUS_SCR1]		=  { 0x2d8, BIT(20) },
 };
 
 static const struct sunxi_ccu_desc sun8i_h3_ccu_desc = {
@@ -803,6 +1093,16 @@ static const struct sunxi_ccu_desc sun8i_h3_ccu_desc = {
 	.num_resets	= ARRAY_SIZE(sun8i_h3_ccu_resets),
 };
 
+static const struct sunxi_ccu_desc sun50i_h5_ccu_desc = {
+	.ccu_clks	= sun50i_h5_ccu_clks,
+	.num_ccu_clks	= ARRAY_SIZE(sun50i_h5_ccu_clks),
+
+	.hw_clks	= &sun50i_h5_hw_clks,
+
+	.resets		= sun50i_h5_ccu_resets,
+	.num_resets	= ARRAY_SIZE(sun50i_h5_ccu_resets),
+};
+
 static struct ccu_mux_nb sun8i_h3_cpu_nb = {
 	.common		= &cpux_clk.common,
 	.cm		= &cpux_clk.mux,
@@ -810,7 +1110,8 @@ static struct ccu_mux_nb sun8i_h3_cpu_nb = {
 	.bypass_index	= 1, /* index of 24 MHz oscillator */
 };
 
-static void __init sun8i_h3_ccu_setup(struct device_node *node)
+static void __init sunxi_h3_h5_ccu_init(struct device_node *node,
+					const struct sunxi_ccu_desc *desc)
 {
 	void __iomem *reg;
 	u32 val;
@@ -827,10 +1128,22 @@ static void __init sun8i_h3_ccu_setup(struct device_node *node)
 	val &= ~GENMASK(19, 16);
 	writel(val | (3 << 16), reg + SUN8I_H3_PLL_AUDIO_REG);
 
-	sunxi_ccu_probe(node, reg, &sun8i_h3_ccu_desc);
+	sunxi_ccu_probe(node, reg, desc);
 
 	ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk,
 				  &sun8i_h3_cpu_nb);
 }
+
+static void __init sun8i_h3_ccu_setup(struct device_node *node)
+{
+	sunxi_h3_h5_ccu_init(node, &sun8i_h3_ccu_desc);
+}
 CLK_OF_DECLARE(sun8i_h3_ccu, "allwinner,sun8i-h3-ccu",
 	       sun8i_h3_ccu_setup);
+
+static void __init sun50i_h5_ccu_setup(struct device_node *node)
+{
+	sunxi_h3_h5_ccu_init(node, &sun50i_h5_ccu_desc);
+}
+CLK_OF_DECLARE(sun50i_h5_ccu, "allwinner,sun50i-h5-ccu",
+	       sun50i_h5_ccu_setup);
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.h b/drivers/clk/sunxi-ng/ccu-sun8i-h3.h
index 78be712c7487..1b4baea37d81 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.h
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.h
@@ -29,7 +29,9 @@
 #define CLK_PLL_VIDEO		6
 #define CLK_PLL_VE		7
 #define CLK_PLL_DDR		8
-#define CLK_PLL_PERIPH0		9
+
+/* PLL_PERIPH0 exported for PRCM */
+
 #define CLK_PLL_PERIPH0_2X	10
 #define CLK_PLL_GPU		11
 #define CLK_PLL_PERIPH1		12
@@ -57,6 +59,7 @@
 
 /* And the GPU module clock is exported */
 
-#define CLK_NUMBER		(CLK_GPU + 1)
+#define CLK_NUMBER_H3		(CLK_GPU + 1)
+#define CLK_NUMBER_H5		(CLK_BUS_SCR1 + 1)
 
 #endif /* _CCU_SUN8I_H3_H_ */
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-r.c b/drivers/clk/sunxi-ng/ccu-sun8i-r.c
new file mode 100644
index 000000000000..119f47b568ea
--- /dev/null
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-r.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2016 Icenowy Zheng <icenowy@aosc.xyz>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+
+#include "ccu_common.h"
+#include "ccu_reset.h"
+
+#include "ccu_div.h"
+#include "ccu_gate.h"
+#include "ccu_mp.h"
+#include "ccu_nm.h"
+
+#include "ccu-sun8i-r.h"
+
+static const char * const ar100_parents[] = { "osc32k", "osc24M",
+					     "pll-periph0", "iosc" };
+
+static struct ccu_div ar100_clk = {
+	.div		= _SUNXI_CCU_DIV_FLAGS(4, 2, CLK_DIVIDER_POWER_OF_TWO),
+
+	.mux		= {
+		.shift	= 16,
+		.width	= 2,
+
+		.variable_prediv	= {
+			.index	= 2,
+			.shift	= 8,
+			.width	= 5,
+		},
+	},
+
+	.common		= {
+		.reg		= 0x00,
+		.features	= CCU_FEATURE_VARIABLE_PREDIV,
+		.hw.init	= CLK_HW_INIT_PARENTS("ar100",
+						      ar100_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static CLK_FIXED_FACTOR(ahb0_clk, "ahb0", "ar100", 1, 1, 0);
+
+static struct ccu_div apb0_clk = {
+	.div		= _SUNXI_CCU_DIV_FLAGS(0, 2, CLK_DIVIDER_POWER_OF_TWO),
+
+	.common		= {
+		.reg		= 0x0c,
+		.hw.init	= CLK_HW_INIT("apb0",
+					      "ahb0",
+					      &ccu_div_ops,
+					      0),
+	},
+};
+
+static SUNXI_CCU_GATE(apb0_pio_clk,	"apb0-pio",	"apb0",
+		      0x28, BIT(0), 0);
+static SUNXI_CCU_GATE(apb0_ir_clk,	"apb0-ir",	"apb0",
+		      0x28, BIT(1), 0);
+static SUNXI_CCU_GATE(apb0_timer_clk,	"apb0-timer",	"apb0",
+		      0x28, BIT(2), 0);
+static SUNXI_CCU_GATE(apb0_rsb_clk,	"apb0-rsb",	"apb0",
+		      0x28, BIT(3), 0);
+static SUNXI_CCU_GATE(apb0_uart_clk,	"apb0-uart",	"apb0",
+		      0x28, BIT(4), 0);
+static SUNXI_CCU_GATE(apb0_i2c_clk,	"apb0-i2c",	"apb0",
+		      0x28, BIT(6), 0);
+static SUNXI_CCU_GATE(apb0_twd_clk,	"apb0-twd",	"apb0",
+		      0x28, BIT(7), 0);
+
+static const char * const r_mod0_default_parents[] = { "osc32k", "osc24M" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(ir_clk, "ir",
+				  r_mod0_default_parents, 0x54,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static struct ccu_common *sun8i_h3_r_ccu_clks[] = {
+	&ar100_clk.common,
+	&apb0_clk.common,
+	&apb0_pio_clk.common,
+	&apb0_ir_clk.common,
+	&apb0_timer_clk.common,
+	&apb0_uart_clk.common,
+	&apb0_i2c_clk.common,
+	&apb0_twd_clk.common,
+	&ir_clk.common,
+};
+
+static struct ccu_common *sun50i_a64_r_ccu_clks[] = {
+	&ar100_clk.common,
+	&apb0_clk.common,
+	&apb0_pio_clk.common,
+	&apb0_ir_clk.common,
+	&apb0_timer_clk.common,
+	&apb0_rsb_clk.common,
+	&apb0_uart_clk.common,
+	&apb0_i2c_clk.common,
+	&apb0_twd_clk.common,
+	&ir_clk.common,
+};
+
+static struct clk_hw_onecell_data sun8i_h3_r_hw_clks = {
+	.hws	= {
+		[CLK_AR100]		= &ar100_clk.common.hw,
+		[CLK_AHB0]		= &ahb0_clk.hw,
+		[CLK_APB0]		= &apb0_clk.common.hw,
+		[CLK_APB0_PIO]		= &apb0_pio_clk.common.hw,
+		[CLK_APB0_IR]		= &apb0_ir_clk.common.hw,
+		[CLK_APB0_TIMER]	= &apb0_timer_clk.common.hw,
+		[CLK_APB0_UART]		= &apb0_uart_clk.common.hw,
+		[CLK_APB0_I2C]		= &apb0_i2c_clk.common.hw,
+		[CLK_APB0_TWD]		= &apb0_twd_clk.common.hw,
+		[CLK_IR]		= &ir_clk.common.hw,
+	},
+	.num	= CLK_NUMBER,
+};
+
+static struct clk_hw_onecell_data sun50i_a64_r_hw_clks = {
+	.hws	= {
+		[CLK_AR100]		= &ar100_clk.common.hw,
+		[CLK_AHB0]		= &ahb0_clk.hw,
+		[CLK_APB0]		= &apb0_clk.common.hw,
+		[CLK_APB0_PIO]		= &apb0_pio_clk.common.hw,
+		[CLK_APB0_IR]		= &apb0_ir_clk.common.hw,
+		[CLK_APB0_TIMER]	= &apb0_timer_clk.common.hw,
+		[CLK_APB0_RSB]		= &apb0_rsb_clk.common.hw,
+		[CLK_APB0_UART]		= &apb0_uart_clk.common.hw,
+		[CLK_APB0_I2C]		= &apb0_i2c_clk.common.hw,
+		[CLK_APB0_TWD]		= &apb0_twd_clk.common.hw,
+		[CLK_IR]		= &ir_clk.common.hw,
+	},
+	.num	= CLK_NUMBER,
+};
+
+static struct ccu_reset_map sun8i_h3_r_ccu_resets[] = {
+	[RST_APB0_IR]		=  { 0xb0, BIT(1) },
+	[RST_APB0_TIMER]	=  { 0xb0, BIT(2) },
+	[RST_APB0_UART]		=  { 0xb0, BIT(4) },
+	[RST_APB0_I2C]		=  { 0xb0, BIT(6) },
+};
+
+static struct ccu_reset_map sun50i_a64_r_ccu_resets[] = {
+	[RST_APB0_IR]		=  { 0xb0, BIT(1) },
+	[RST_APB0_TIMER]	=  { 0xb0, BIT(2) },
+	[RST_APB0_RSB]		=  { 0xb0, BIT(3) },
+	[RST_APB0_UART]		=  { 0xb0, BIT(4) },
+	[RST_APB0_I2C]		=  { 0xb0, BIT(6) },
+};
+
+static const struct sunxi_ccu_desc sun8i_h3_r_ccu_desc = {
+	.ccu_clks	= sun8i_h3_r_ccu_clks,
+	.num_ccu_clks	= ARRAY_SIZE(sun8i_h3_r_ccu_clks),
+
+	.hw_clks	= &sun8i_h3_r_hw_clks,
+
+	.resets		= sun8i_h3_r_ccu_resets,
+	.num_resets	= ARRAY_SIZE(sun8i_h3_r_ccu_resets),
+};
+
+static const struct sunxi_ccu_desc sun50i_a64_r_ccu_desc = {
+	.ccu_clks	= sun50i_a64_r_ccu_clks,
+	.num_ccu_clks	= ARRAY_SIZE(sun50i_a64_r_ccu_clks),
+
+	.hw_clks	= &sun50i_a64_r_hw_clks,
+
+	.resets		= sun50i_a64_r_ccu_resets,
+	.num_resets	= ARRAY_SIZE(sun50i_a64_r_ccu_resets),
+};
+
+static void __init sunxi_r_ccu_init(struct device_node *node,
+				    const struct sunxi_ccu_desc *desc)
+{
+	void __iomem *reg;
+
+	reg = of_io_request_and_map(node, 0, of_node_full_name(node));
+	if (IS_ERR(reg)) {
+		pr_err("%s: Could not map the clock registers\n",
+		       of_node_full_name(node));
+		return;
+	}
+
+	sunxi_ccu_probe(node, reg, desc);
+}
+
+static void __init sun8i_h3_r_ccu_setup(struct device_node *node)
+{
+	sunxi_r_ccu_init(node, &sun8i_h3_r_ccu_desc);
+}
+CLK_OF_DECLARE(sun8i_h3_r_ccu, "allwinner,sun8i-h3-r-ccu",
+	       sun8i_h3_r_ccu_setup);
+
+static void __init sun50i_a64_r_ccu_setup(struct device_node *node)
+{
+	sunxi_r_ccu_init(node, &sun50i_a64_r_ccu_desc);
+}
+CLK_OF_DECLARE(sun50i_a64_r_ccu, "allwinner,sun50i-a64-r-ccu",
+	       sun50i_a64_r_ccu_setup);
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-r.h b/drivers/clk/sunxi-ng/ccu-sun8i-r.h
new file mode 100644
index 000000000000..a7a407f12b56
--- /dev/null
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-r.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2016 Icenowy <icenowy@aosc.xyz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CCU_SUN8I_R_H
+#define _CCU_SUN8I_R_H_
+
+#include <dt-bindings/clock/sun8i-r-ccu.h>
+#include <dt-bindings/reset/sun8i-r-ccu.h>
+
+/* AHB/APB bus clocks are not exported */
+#define CLK_AHB0	1
+#define CLK_APB0	2
+
+#define CLK_NUMBER	(CLK_IR + 1)
+
+#endif /* _CCU_SUN8I_R_H */
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
index e58706b40ae9..6297add857b5 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
@@ -537,7 +537,7 @@ static struct ccu_reset_map sun8i_v3s_ccu_resets[] = {
 	[RST_BUS_EMAC]		=  { 0x2c0, BIT(17) },
 	[RST_BUS_HSTIMER]	=  { 0x2c0, BIT(19) },
 	[RST_BUS_SPI0]		=  { 0x2c0, BIT(20) },
-	[RST_BUS_OTG]		=  { 0x2c0, BIT(23) },
+	[RST_BUS_OTG]		=  { 0x2c0, BIT(24) },
 	[RST_BUS_EHCI0]		=  { 0x2c0, BIT(26) },
 	[RST_BUS_OHCI0]		=  { 0x2c0, BIT(29) },
 
diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
index e13e313ce4f5..8936ef87652c 100644
--- a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
+++ b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
@@ -29,49 +29,48 @@
 
 #define CCU_SUN9I_LOCK_REG	0x09c
 
-static struct clk_div_table pll_cpux_p_div_table[] = {
-	{ .val = 0, .div = 1 },
-	{ .val = 1, .div = 4 },
-	{ /* Sentinel */ },
-};
-
 /*
- * The CPU PLLs are actually NP clocks, but P is /1 or /4, so here we
- * use the NM clocks with a divider table for M.
+ * The CPU PLLs are actually NP clocks, with P being /1 or /4. However
+ * P should only be used for output frequencies lower than 228 MHz.
+ * Neither mainline Linux, U-boot, nor the vendor BSPs use these.
+ *
+ * For now we can just model it as a multiplier clock, and force P to /1.
  */
-static struct ccu_nm pll_c0cpux_clk = {
+#define SUN9I_A80_PLL_C0CPUX_REG	0x000
+#define SUN9I_A80_PLL_C1CPUX_REG	0x004
+
+static struct ccu_mult pll_c0cpux_clk = {
 	.enable		= BIT(31),
 	.lock		= BIT(0),
-	.n		= _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
-	.m		= _SUNXI_CCU_DIV_TABLE(16, 1, pll_cpux_p_div_table),
+	.mult		= _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
 	.common		= {
-		.reg		= 0x000,
+		.reg		= SUN9I_A80_PLL_C0CPUX_REG,
 		.lock_reg	= CCU_SUN9I_LOCK_REG,
 		.features	= CCU_FEATURE_LOCK_REG,
 		.hw.init	= CLK_HW_INIT("pll-c0cpux", "osc24M",
-					      &ccu_nm_ops, CLK_SET_RATE_UNGATE),
+					      &ccu_mult_ops,
+					      CLK_SET_RATE_UNGATE),
 	},
 };
 
-static struct ccu_nm pll_c1cpux_clk = {
+static struct ccu_mult pll_c1cpux_clk = {
 	.enable		= BIT(31),
 	.lock		= BIT(1),
-	.n		= _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
-	.m		= _SUNXI_CCU_DIV_TABLE(16, 1, pll_cpux_p_div_table),
+	.mult		= _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
 	.common		= {
-		.reg		= 0x004,
+		.reg		= SUN9I_A80_PLL_C1CPUX_REG,
 		.lock_reg	= CCU_SUN9I_LOCK_REG,
 		.features	= CCU_FEATURE_LOCK_REG,
 		.hw.init	= CLK_HW_INIT("pll-c1cpux", "osc24M",
-					      &ccu_nm_ops, CLK_SET_RATE_UNGATE),
+					      &ccu_mult_ops,
+					      CLK_SET_RATE_UNGATE),
 	},
 };
 
 /*
  * The Audio PLL has d1, d2 dividers in addition to the usual N, M
  * factors. Since we only need 2 frequencies from this PLL: 22.5792 MHz
- * and 24.576 MHz, ignore them for now. Enforce the default for them,
- * which is d1 = 0, d2 = 1.
+ * and 24.576 MHz, ignore them for now. Enforce d1 = 0 and d2 = 0.
  */
 #define SUN9I_A80_PLL_AUDIO_REG	0x008
 
@@ -1189,6 +1188,36 @@ static const struct sunxi_ccu_desc sun9i_a80_ccu_desc = {
 	.num_resets	= ARRAY_SIZE(sun9i_a80_ccu_resets),
 };
 
+#define SUN9I_A80_PLL_P_SHIFT	16
+#define SUN9I_A80_PLL_N_SHIFT	8
+#define SUN9I_A80_PLL_N_WIDTH	8
+
+static void sun9i_a80_cpu_pll_fixup(void __iomem *reg)
+{
+	u32 val = readl(reg);
+
+	/* bail out if P divider is not used */
+	if (!(val & BIT(SUN9I_A80_PLL_P_SHIFT)))
+		return;
+
+	/*
+	 * If P is used, output should be less than 288 MHz. When we
+	 * set P to 1, we should also decrease the multiplier so the
+	 * output doesn't go out of range, but not too much such that
+	 * the multiplier stays above 12, the minimal operation value.
+	 *
+	 * To keep it simple, set the multiplier to 17, the reset value.
+	 */
+	val &= ~GENMASK(SUN9I_A80_PLL_N_SHIFT + SUN9I_A80_PLL_N_WIDTH - 1,
+			SUN9I_A80_PLL_N_SHIFT);
+	val |= 17 << SUN9I_A80_PLL_N_SHIFT;
+
+	/* And clear P */
+	val &= ~BIT(SUN9I_A80_PLL_P_SHIFT);
+
+	writel(val, reg);
+}
+
 static int sun9i_a80_ccu_probe(struct platform_device *pdev)
 {
 	struct resource *res;
@@ -1205,6 +1234,10 @@ static int sun9i_a80_ccu_probe(struct platform_device *pdev)
 	val &= (BIT(16) & BIT(18));
 	writel(val, reg + SUN9I_A80_PLL_AUDIO_REG);
 
+	/* Enforce P = 1 for both CPU cluster PLLs */
+	sun9i_a80_cpu_pll_fixup(reg + SUN9I_A80_PLL_C0CPUX_REG);
+	sun9i_a80_cpu_pll_fixup(reg + SUN9I_A80_PLL_C1CPUX_REG);
+
 	return sunxi_ccu_probe(pdev->dev.of_node, reg, &sun9i_a80_ccu_desc);
 }
 
diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c
index 9d8724715a43..40aac316128f 100644
--- a/drivers/clk/sunxi-ng/ccu_common.c
+++ b/drivers/clk/sunxi-ng/ccu_common.c
@@ -112,8 +112,8 @@ int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
 
 		ret = clk_hw_register(NULL, hw);
 		if (ret) {
-			pr_err("Couldn't register clock %s\n",
-			       clk_hw_get_name(hw));
+			pr_err("Couldn't register clock %d - %s\n",
+			       i, clk_hw_get_name(hw));
 			goto err_clk_unreg;
 		}
 	}
diff --git a/drivers/clk/sunxi-ng/ccu_gate.c b/drivers/clk/sunxi-ng/ccu_gate.c
index 8a81f9d4a89f..cd069d5da215 100644
--- a/drivers/clk/sunxi-ng/ccu_gate.c
+++ b/drivers/clk/sunxi-ng/ccu_gate.c
@@ -75,8 +75,55 @@ static int ccu_gate_is_enabled(struct clk_hw *hw)
 	return ccu_gate_helper_is_enabled(&cg->common, cg->enable);
 }
 
+static unsigned long ccu_gate_recalc_rate(struct clk_hw *hw,
+					  unsigned long parent_rate)
+{
+	struct ccu_gate *cg = hw_to_ccu_gate(hw);
+	unsigned long rate = parent_rate;
+
+	if (cg->common.features & CCU_FEATURE_ALL_PREDIV)
+		rate /= cg->common.prediv;
+
+	return rate;
+}
+
+static long ccu_gate_round_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long *prate)
+{
+	struct ccu_gate *cg = hw_to_ccu_gate(hw);
+	int div = 1;
+
+	if (cg->common.features & CCU_FEATURE_ALL_PREDIV)
+		div = cg->common.prediv;
+
+	if (clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT) {
+		unsigned long best_parent = rate;
+
+		if (cg->common.features & CCU_FEATURE_ALL_PREDIV)
+			best_parent *= div;
+		*prate = clk_hw_round_rate(clk_hw_get_parent(hw), best_parent);
+	}
+
+	return *prate / div;
+}
+
+static int ccu_gate_set_rate(struct clk_hw *hw, unsigned long rate,
+			     unsigned long parent_rate)
+{
+	/*
+	 * We must report success but we can do so unconditionally because
+	 * clk_factor_round_rate returns values that ensure this call is a
+	 * nop.
+	 */
+
+	return 0;
+}
+
 const struct clk_ops ccu_gate_ops = {
 	.disable	= ccu_gate_disable,
 	.enable		= ccu_gate_enable,
 	.is_enabled	= ccu_gate_is_enabled,
+	.round_rate	= ccu_gate_round_rate,
+	.set_rate	= ccu_gate_set_rate,
+	.recalc_rate	= ccu_gate_recalc_rate,
 };
diff --git a/drivers/clk/sunxi-ng/ccu_mult.c b/drivers/clk/sunxi-ng/ccu_mult.c
index 8724c01171b1..671141359895 100644
--- a/drivers/clk/sunxi-ng/ccu_mult.c
+++ b/drivers/clk/sunxi-ng/ccu_mult.c
@@ -137,6 +137,8 @@ static int ccu_mult_set_rate(struct clk_hw *hw, unsigned long rate,
 
 	spin_unlock_irqrestore(cm->common.lock, flags);
 
+	ccu_helper_wait_for_lock(&cm->common, cm->lock);
+
 	return 0;
 }
 
diff --git a/drivers/clk/sunxi-ng/ccu_mult.h b/drivers/clk/sunxi-ng/ccu_mult.h
index 524acddfcb2e..f9c37b987d72 100644
--- a/drivers/clk/sunxi-ng/ccu_mult.h
+++ b/drivers/clk/sunxi-ng/ccu_mult.h
@@ -33,6 +33,7 @@ struct ccu_mult_internal {
 
 struct ccu_mult {
 	u32			enable;
+	u32			lock;
 
 	struct ccu_frac_internal	frac;
 	struct ccu_mult_internal	mult;
@@ -45,6 +46,7 @@ struct ccu_mult {
 				   _flags)				\
 	struct ccu_mult _struct = {					\
 		.enable	= _gate,					\
+		.lock	= _lock,					\
 		.mult	= _SUNXI_CCU_MULT(_mshift, _mwidth),		\
 		.common	= {						\
 			.reg		= _reg,				\
diff --git a/drivers/clk/sunxi-ng/ccu_nk.c b/drivers/clk/sunxi-ng/ccu_nk.c
index b9e9b8a9d1b4..2485bda87a9a 100644
--- a/drivers/clk/sunxi-ng/ccu_nk.c
+++ b/drivers/clk/sunxi-ng/ccu_nk.c
@@ -102,9 +102,9 @@ static long ccu_nk_round_rate(struct clk_hw *hw, unsigned long rate,
 	if (nk->common.features & CCU_FEATURE_FIXED_POSTDIV)
 		rate *= nk->fixed_post_div;
 
-	_nk.min_n = nk->n.min;
+	_nk.min_n = nk->n.min ?: 1;
 	_nk.max_n = nk->n.max ?: 1 << nk->n.width;
-	_nk.min_k = nk->k.min;
+	_nk.min_k = nk->k.min ?: 1;
 	_nk.max_k = nk->k.max ?: 1 << nk->k.width;
 
 	ccu_nk_find_best(*parent_rate, rate, &_nk);
@@ -127,9 +127,9 @@ static int ccu_nk_set_rate(struct clk_hw *hw, unsigned long rate,
 	if (nk->common.features & CCU_FEATURE_FIXED_POSTDIV)
 		rate = rate * nk->fixed_post_div;
 
-	_nk.min_n = nk->n.min;
+	_nk.min_n = nk->n.min ?: 1;
 	_nk.max_n = nk->n.max ?: 1 << nk->n.width;
-	_nk.min_k = nk->k.min;
+	_nk.min_k = nk->k.min ?: 1;
 	_nk.max_k = nk->k.max ?: 1 << nk->k.width;
 
 	ccu_nk_find_best(parent_rate, rate, &_nk);
diff --git a/drivers/clk/sunxi-ng/ccu_nkm.c b/drivers/clk/sunxi-ng/ccu_nkm.c
index 71f81e95a061..cba84afe1cf1 100644
--- a/drivers/clk/sunxi-ng/ccu_nkm.c
+++ b/drivers/clk/sunxi-ng/ccu_nkm.c
@@ -109,9 +109,9 @@ static unsigned long ccu_nkm_round_rate(struct ccu_mux_internal *mux,
 	struct ccu_nkm *nkm = data;
 	struct _ccu_nkm _nkm;
 
-	_nkm.min_n = nkm->n.min;
+	_nkm.min_n = nkm->n.min ?: 1;
 	_nkm.max_n = nkm->n.max ?: 1 << nkm->n.width;
-	_nkm.min_k = nkm->k.min;
+	_nkm.min_k = nkm->k.min ?: 1;
 	_nkm.max_k = nkm->k.max ?: 1 << nkm->k.width;
 	_nkm.min_m = 1;
 	_nkm.max_m = nkm->m.max ?: 1 << nkm->m.width;
@@ -138,9 +138,9 @@ static int ccu_nkm_set_rate(struct clk_hw *hw, unsigned long rate,
 	unsigned long flags;
 	u32 reg;
 
-	_nkm.min_n = nkm->n.min;
+	_nkm.min_n = nkm->n.min ?: 1;
 	_nkm.max_n = nkm->n.max ?: 1 << nkm->n.width;
-	_nkm.min_k = nkm->k.min;
+	_nkm.min_k = nkm->k.min ?: 1;
 	_nkm.max_k = nkm->k.max ?: 1 << nkm->k.width;
 	_nkm.min_m = 1;
 	_nkm.max_m = nkm->m.max ?: 1 << nkm->m.width;
diff --git a/drivers/clk/sunxi-ng/ccu_nkmp.c b/drivers/clk/sunxi-ng/ccu_nkmp.c
index 488055ed944f..e58c95787f94 100644
--- a/drivers/clk/sunxi-ng/ccu_nkmp.c
+++ b/drivers/clk/sunxi-ng/ccu_nkmp.c
@@ -116,9 +116,9 @@ static long ccu_nkmp_round_rate(struct clk_hw *hw, unsigned long rate,
 	struct ccu_nkmp *nkmp = hw_to_ccu_nkmp(hw);
 	struct _ccu_nkmp _nkmp;
 
-	_nkmp.min_n = nkmp->n.min;
+	_nkmp.min_n = nkmp->n.min ?: 1;
 	_nkmp.max_n = nkmp->n.max ?: 1 << nkmp->n.width;
-	_nkmp.min_k = nkmp->k.min;
+	_nkmp.min_k = nkmp->k.min ?: 1;
 	_nkmp.max_k = nkmp->k.max ?: 1 << nkmp->k.width;
 	_nkmp.min_m = 1;
 	_nkmp.max_m = nkmp->m.max ?: 1 << nkmp->m.width;
@@ -138,9 +138,9 @@ static int ccu_nkmp_set_rate(struct clk_hw *hw, unsigned long rate,
 	unsigned long flags;
 	u32 reg;
 
-	_nkmp.min_n = 1;
+	_nkmp.min_n = nkmp->n.min ?: 1;
 	_nkmp.max_n = nkmp->n.max ?: 1 << nkmp->n.width;
-	_nkmp.min_k = 1;
+	_nkmp.min_k = nkmp->k.min ?: 1;
 	_nkmp.max_k = nkmp->k.max ?: 1 << nkmp->k.width;
 	_nkmp.min_m = 1;
 	_nkmp.max_m = nkmp->m.max ?: 1 << nkmp->m.width;
diff --git a/drivers/clk/sunxi-ng/ccu_nm.c b/drivers/clk/sunxi-ng/ccu_nm.c
index af71b1909cd9..5e5e90a4a50c 100644
--- a/drivers/clk/sunxi-ng/ccu_nm.c
+++ b/drivers/clk/sunxi-ng/ccu_nm.c
@@ -99,7 +99,7 @@ static long ccu_nm_round_rate(struct clk_hw *hw, unsigned long rate,
 	struct ccu_nm *nm = hw_to_ccu_nm(hw);
 	struct _ccu_nm _nm;
 
-	_nm.min_n = nm->n.min;
+	_nm.min_n = nm->n.min ?: 1;
 	_nm.max_n = nm->n.max ?: 1 << nm->n.width;
 	_nm.min_m = 1;
 	_nm.max_m = nm->m.max ?: 1 << nm->m.width;
@@ -122,7 +122,7 @@ static int ccu_nm_set_rate(struct clk_hw *hw, unsigned long rate,
 	else
 		ccu_frac_helper_disable(&nm->common, &nm->frac);
 
-	_nm.min_n = 1;
+	_nm.min_n = nm->n.min ?: 1;
 	_nm.max_n = nm->n.max ?: 1 << nm->n.width;
 	_nm.min_m = 1;
 	_nm.max_m = nm->m.max ?: 1 << nm->m.width;
diff --git a/drivers/clk/tegra/clk-id.h b/drivers/clk/tegra/clk-id.h
index 5738635c5274..689f344377a7 100644
--- a/drivers/clk/tegra/clk-id.h
+++ b/drivers/clk/tegra/clk-id.h
@@ -307,6 +307,23 @@ enum clk_id {
 	tegra_clk_xusb_ssp_src,
 	tegra_clk_sclk_mux,
 	tegra_clk_sor_safe,
+	tegra_clk_cec,
+	tegra_clk_ispa,
+	tegra_clk_dmic1,
+	tegra_clk_dmic2,
+	tegra_clk_dmic3,
+	tegra_clk_dmic1_sync_clk,
+	tegra_clk_dmic2_sync_clk,
+	tegra_clk_dmic3_sync_clk,
+	tegra_clk_dmic1_sync_clk_mux,
+	tegra_clk_dmic2_sync_clk_mux,
+	tegra_clk_dmic3_sync_clk_mux,
+	tegra_clk_iqc1,
+	tegra_clk_iqc2,
+	tegra_clk_pll_a_out_adsp,
+	tegra_clk_pll_a_out0_out_adsp,
+	tegra_clk_adsp,
+	tegra_clk_adsp_neon,
 	tegra_clk_max,
 };
 
diff --git a/drivers/clk/tegra/clk-periph-gate.c b/drivers/clk/tegra/clk-periph-gate.c
index 88127828befe..303ef32ee3f1 100644
--- a/drivers/clk/tegra/clk-periph-gate.c
+++ b/drivers/clk/tegra/clk-periph-gate.c
@@ -159,6 +159,9 @@ struct clk *tegra_clk_register_periph_gate(const char *name,
 	gate->enable_refcnt = enable_refcnt;
 	gate->regs = pregs;
 
+	if (read_enb(gate) & periph_clk_to_bit(gate))
+		enable_refcnt[clk_num]++;
+
 	/* Data in .init is copied by clk_register(), so stack variable OK */
 	gate->hw.init = &init;
 
diff --git a/drivers/clk/tegra/clk-periph.c b/drivers/clk/tegra/clk-periph.c
index a17ca6d7f649..cf80831de79d 100644
--- a/drivers/clk/tegra/clk-periph.c
+++ b/drivers/clk/tegra/clk-periph.c
@@ -138,7 +138,7 @@ static const struct clk_ops tegra_clk_periph_no_gate_ops = {
 };
 
 static struct clk *_tegra_clk_register_periph(const char *name,
-			const char **parent_names, int num_parents,
+			const char * const *parent_names, int num_parents,
 			struct tegra_clk_periph *periph,
 			void __iomem *clk_base, u32 offset,
 			unsigned long flags)
@@ -186,7 +186,7 @@ static struct clk *_tegra_clk_register_periph(const char *name,
 }
 
 struct clk *tegra_clk_register_periph(const char *name,
-		const char **parent_names, int num_parents,
+		const char * const *parent_names, int num_parents,
 		struct tegra_clk_periph *periph, void __iomem *clk_base,
 		u32 offset, unsigned long flags)
 {
@@ -195,7 +195,7 @@ struct clk *tegra_clk_register_periph(const char *name,
 }
 
 struct clk *tegra_clk_register_periph_nodiv(const char *name,
-		const char **parent_names, int num_parents,
+		const char * const *parent_names, int num_parents,
 		struct tegra_clk_periph *periph, void __iomem *clk_base,
 		u32 offset)
 {
diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c
index b3855360d6bc..159a854779e6 100644
--- a/drivers/clk/tegra/clk-pll.c
+++ b/drivers/clk/tegra/clk-pll.c
@@ -2517,152 +2517,6 @@ static int clk_plle_tegra210_is_enabled(struct clk_hw *hw)
 	return val & PLLE_BASE_ENABLE ? 1 : 0;
 }
 
-static int clk_pllu_tegra210_enable(struct clk_hw *hw)
-{
-	struct tegra_clk_pll *pll = to_clk_pll(hw);
-	struct clk_hw *pll_ref = clk_hw_get_parent(hw);
-	struct clk_hw *osc = clk_hw_get_parent(pll_ref);
-	const struct utmi_clk_param *params = NULL;
-	unsigned long flags = 0, input_rate;
-	unsigned int i;
-	int ret = 0;
-	u32 value;
-
-	if (!osc) {
-		pr_err("%s: failed to get OSC clock\n", __func__);
-		return -EINVAL;
-	}
-
-	input_rate = clk_hw_get_rate(osc);
-
-	if (pll->lock)
-		spin_lock_irqsave(pll->lock, flags);
-
-	_clk_pll_enable(hw);
-
-	ret = clk_pll_wait_for_lock(pll);
-	if (ret < 0)
-		goto out;
-
-	for (i = 0; i < ARRAY_SIZE(utmi_parameters); i++) {
-		if (input_rate == utmi_parameters[i].osc_frequency) {
-			params = &utmi_parameters[i];
-			break;
-		}
-	}
-
-	if (!params) {
-		pr_err("%s: unexpected input rate %lu Hz\n", __func__,
-		       input_rate);
-		ret = -EINVAL;
-		goto out;
-	}
-
-	value = pll_readl_base(pll);
-	value &= ~PLLU_BASE_OVERRIDE;
-	pll_writel_base(value, pll);
-
-	/* Put PLLU under HW control */
-	value = readl_relaxed(pll->clk_base + PLLU_HW_PWRDN_CFG0);
-	value |= PLLU_HW_PWRDN_CFG0_IDDQ_PD_INCLUDE |
-	         PLLU_HW_PWRDN_CFG0_USE_SWITCH_DETECT |
-	         PLLU_HW_PWRDN_CFG0_USE_LOCKDET;
-	value &= ~(PLLU_HW_PWRDN_CFG0_CLK_ENABLE_SWCTL |
-		   PLLU_HW_PWRDN_CFG0_CLK_SWITCH_SWCTL);
-	writel_relaxed(value, pll->clk_base + PLLU_HW_PWRDN_CFG0);
-
-	value = readl_relaxed(pll->clk_base + XUSB_PLL_CFG0);
-	value &= ~XUSB_PLL_CFG0_PLLU_LOCK_DLY;
-	writel_relaxed(value, pll->clk_base + XUSB_PLL_CFG0);
-
-	udelay(1);
-
-	value = readl_relaxed(pll->clk_base + PLLU_HW_PWRDN_CFG0);
-	value |= PLLU_HW_PWRDN_CFG0_SEQ_ENABLE;
-	writel_relaxed(value, pll->clk_base + PLLU_HW_PWRDN_CFG0);
-
-	udelay(1);
-
-	/* Disable PLLU clock branch to UTMIPLL since it uses OSC */
-	value = pll_readl_base(pll);
-	value &= ~PLLU_BASE_CLKENABLE_USB;
-	pll_writel_base(value, pll);
-
-	value = readl_relaxed(pll->clk_base + UTMIPLL_HW_PWRDN_CFG0);
-	if (value & UTMIPLL_HW_PWRDN_CFG0_SEQ_ENABLE) {
-		pr_debug("UTMIPLL already enabled\n");
-		goto out;
-	}
-
-	value &= ~UTMIPLL_HW_PWRDN_CFG0_IDDQ_OVERRIDE;
-	writel_relaxed(value, pll->clk_base + UTMIPLL_HW_PWRDN_CFG0);
-
-	/* Program UTMIP PLL stable and active counts */
-	value = readl_relaxed(pll->clk_base + UTMIP_PLL_CFG2);
-	value &= ~UTMIP_PLL_CFG2_STABLE_COUNT(~0);
-	value |= UTMIP_PLL_CFG2_STABLE_COUNT(params->stable_count);
-	value &= ~UTMIP_PLL_CFG2_ACTIVE_DLY_COUNT(~0);
-	value |= UTMIP_PLL_CFG2_ACTIVE_DLY_COUNT(params->active_delay_count);
-	value |= UTMIP_PLL_CFG2_PHY_XTAL_CLOCKEN;
-	writel_relaxed(value, pll->clk_base + UTMIP_PLL_CFG2);
-
-	/* Program UTMIP PLL delay and oscillator frequency counts */
-	value = readl_relaxed(pll->clk_base + UTMIP_PLL_CFG1);
-	value &= ~UTMIP_PLL_CFG1_ENABLE_DLY_COUNT(~0);
-	value |= UTMIP_PLL_CFG1_ENABLE_DLY_COUNT(params->enable_delay_count);
-	value &= ~UTMIP_PLL_CFG1_XTAL_FREQ_COUNT(~0);
-	value |= UTMIP_PLL_CFG1_XTAL_FREQ_COUNT(params->xtal_freq_count);
-	writel_relaxed(value, pll->clk_base + UTMIP_PLL_CFG1);
-
-	/* Remove power downs from UTMIP PLL control bits */
-	value = readl_relaxed(pll->clk_base + UTMIP_PLL_CFG1);
-	value &= ~UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERDOWN;
-	value |= UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERUP;
-	writel(value, pll->clk_base + UTMIP_PLL_CFG1);
-
-	udelay(1);
-
-	/* Enable samplers for SNPS, XUSB_HOST, XUSB_DEV */
-	value = readl_relaxed(pll->clk_base + UTMIP_PLL_CFG2);
-	value |= UTMIP_PLL_CFG2_FORCE_PD_SAMP_A_POWERUP;
-	value |= UTMIP_PLL_CFG2_FORCE_PD_SAMP_B_POWERUP;
-	value |= UTMIP_PLL_CFG2_FORCE_PD_SAMP_D_POWERUP;
-	value &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_A_POWERDOWN;
-	value &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_B_POWERDOWN;
-	value &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_D_POWERDOWN;
-	writel_relaxed(value, pll->clk_base + UTMIP_PLL_CFG2);
-
-	/* Setup HW control of UTMIPLL */
-	value = readl_relaxed(pll->clk_base + UTMIP_PLL_CFG1);
-	value &= ~UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERUP;
-	value &= ~UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERDOWN;
-	writel_relaxed(value, pll->clk_base + UTMIP_PLL_CFG1);
-
-	value = readl_relaxed(pll->clk_base + UTMIPLL_HW_PWRDN_CFG0);
-	value |= UTMIPLL_HW_PWRDN_CFG0_USE_LOCKDET;
-	value &= ~UTMIPLL_HW_PWRDN_CFG0_CLK_ENABLE_SWCTL;
-	writel_relaxed(value, pll->clk_base + UTMIPLL_HW_PWRDN_CFG0);
-
-	udelay(1);
-
-	value = readl_relaxed(pll->clk_base + XUSB_PLL_CFG0);
-	value &= ~XUSB_PLL_CFG0_UTMIPLL_LOCK_DLY;
-	writel_relaxed(value, pll->clk_base + XUSB_PLL_CFG0);
-
-	udelay(1);
-
-	/* Enable HW control of UTMIPLL */
-	value = readl_relaxed(pll->clk_base + UTMIPLL_HW_PWRDN_CFG0);
-	value |= UTMIPLL_HW_PWRDN_CFG0_SEQ_ENABLE;
-	writel_relaxed(value, pll->clk_base + UTMIPLL_HW_PWRDN_CFG0);
-
-out:
-	if (pll->lock)
-		spin_unlock_irqrestore(pll->lock, flags);
-
-	return ret;
-}
-
 static const struct clk_ops tegra_clk_plle_tegra210_ops = {
 	.is_enabled =  clk_plle_tegra210_is_enabled,
 	.enable = clk_plle_tegra210_enable,
@@ -2670,13 +2524,6 @@ static const struct clk_ops tegra_clk_plle_tegra210_ops = {
 	.recalc_rate = clk_pll_recalc_rate,
 };
 
-static const struct clk_ops tegra_clk_pllu_tegra210_ops = {
-	.is_enabled =  clk_pll_is_enabled,
-	.enable = clk_pllu_tegra210_enable,
-	.disable = clk_pll_disable,
-	.recalc_rate = clk_pllre_recalc_rate,
-};
-
 struct clk *tegra_clk_register_plle_tegra210(const char *name,
 				const char *parent_name,
 				void __iomem *clk_base, unsigned long flags,
@@ -2918,25 +2765,4 @@ struct clk *tegra_clk_register_pllmb(const char *name, const char *parent_name,
 	return clk;
 }
 
-struct clk *tegra_clk_register_pllu_tegra210(const char *name,
-		const char *parent_name, void __iomem *clk_base,
-		unsigned long flags, struct tegra_clk_pll_params *pll_params,
-		spinlock_t *lock)
-{
-	struct tegra_clk_pll *pll;
-	struct clk *clk;
-
-	pll_params->flags |= TEGRA_PLLU;
-
-	pll = _tegra_init_pll(clk_base, NULL, pll_params, lock);
-	if (IS_ERR(pll))
-		return ERR_CAST(pll);
-
-	clk = _tegra_clk_register_pll(pll, name, parent_name, flags,
-				      &tegra_clk_pllu_tegra210_ops);
-	if (IS_ERR(clk))
-		kfree(pll);
-
-	return clk;
-}
 #endif
diff --git a/drivers/clk/tegra/clk-super.c b/drivers/clk/tegra/clk-super.c
index 131d1b5085e2..84267cfc4433 100644
--- a/drivers/clk/tegra/clk-super.c
+++ b/drivers/clk/tegra/clk-super.c
@@ -121,9 +121,50 @@ out:
 	return err;
 }
 
+const struct clk_ops tegra_clk_super_mux_ops = {
+	.get_parent = clk_super_get_parent,
+	.set_parent = clk_super_set_parent,
+};
+
+static long clk_super_round_rate(struct clk_hw *hw, unsigned long rate,
+				 unsigned long *parent_rate)
+{
+	struct tegra_clk_super_mux *super = to_clk_super_mux(hw);
+	struct clk_hw *div_hw = &super->frac_div.hw;
+
+	__clk_hw_set_clk(div_hw, hw);
+
+	return super->div_ops->round_rate(div_hw, rate, parent_rate);
+}
+
+static unsigned long clk_super_recalc_rate(struct clk_hw *hw,
+					   unsigned long parent_rate)
+{
+	struct tegra_clk_super_mux *super = to_clk_super_mux(hw);
+	struct clk_hw *div_hw = &super->frac_div.hw;
+
+	__clk_hw_set_clk(div_hw, hw);
+
+	return super->div_ops->recalc_rate(div_hw, parent_rate);
+}
+
+static int clk_super_set_rate(struct clk_hw *hw, unsigned long rate,
+			      unsigned long parent_rate)
+{
+	struct tegra_clk_super_mux *super = to_clk_super_mux(hw);
+	struct clk_hw *div_hw = &super->frac_div.hw;
+
+	__clk_hw_set_clk(div_hw, hw);
+
+	return super->div_ops->set_rate(div_hw, rate, parent_rate);
+}
+
 const struct clk_ops tegra_clk_super_ops = {
 	.get_parent = clk_super_get_parent,
 	.set_parent = clk_super_set_parent,
+	.set_rate = clk_super_set_rate,
+	.round_rate = clk_super_round_rate,
+	.recalc_rate = clk_super_recalc_rate,
 };
 
 struct clk *tegra_clk_register_super_mux(const char *name,
@@ -136,13 +177,11 @@ struct clk *tegra_clk_register_super_mux(const char *name,
 	struct clk_init_data init;
 
 	super = kzalloc(sizeof(*super), GFP_KERNEL);
-	if (!super) {
-		pr_err("%s: could not allocate super clk\n", __func__);
+	if (!super)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	init.name = name;
-	init.ops = &tegra_clk_super_ops;
+	init.ops = &tegra_clk_super_mux_ops;
 	init.flags = flags;
 	init.parent_names = parent_names;
 	init.num_parents = num_parents;
@@ -163,3 +202,43 @@ struct clk *tegra_clk_register_super_mux(const char *name,
 
 	return clk;
 }
+
+struct clk *tegra_clk_register_super_clk(const char *name,
+		const char * const *parent_names, u8 num_parents,
+		unsigned long flags, void __iomem *reg, u8 clk_super_flags,
+		spinlock_t *lock)
+{
+	struct tegra_clk_super_mux *super;
+	struct clk *clk;
+	struct clk_init_data init;
+
+	super = kzalloc(sizeof(*super), GFP_KERNEL);
+	if (!super)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &tegra_clk_super_ops;
+	init.flags = flags;
+	init.parent_names = parent_names;
+	init.num_parents = num_parents;
+
+	super->reg = reg;
+	super->lock = lock;
+	super->width = 4;
+	super->flags = clk_super_flags;
+	super->frac_div.reg = reg + 4;
+	super->frac_div.shift = 16;
+	super->frac_div.width = 8;
+	super->frac_div.frac_width = 1;
+	super->frac_div.lock = lock;
+	super->div_ops = &tegra_clk_frac_div_ops;
+
+	/* Data in .init is copied by clk_register(), so stack variable OK */
+	super->hw.init = &init;
+
+	clk = clk_register(NULL, &super->hw);
+	if (IS_ERR(clk))
+		kfree(super);
+
+	return clk;
+}
diff --git a/drivers/clk/tegra/clk-tegra-audio.c b/drivers/clk/tegra/clk-tegra-audio.c
index e2bfa9b368f6..b37cae7af26d 100644
--- a/drivers/clk/tegra/clk-tegra-audio.c
+++ b/drivers/clk/tegra/clk-tegra-audio.c
@@ -31,6 +31,9 @@
 #define AUDIO_SYNC_CLK_I2S3 0x4ac
 #define AUDIO_SYNC_CLK_I2S4 0x4b0
 #define AUDIO_SYNC_CLK_SPDIF 0x4b4
+#define AUDIO_SYNC_CLK_DMIC1 0x560
+#define AUDIO_SYNC_CLK_DMIC2 0x564
+#define AUDIO_SYNC_CLK_DMIC3 0x6b8
 
 #define AUDIO_SYNC_DOUBLER 0x49c
 
@@ -91,8 +94,14 @@ struct tegra_audio2x_clk_initdata {
 
 static DEFINE_SPINLOCK(clk_doubler_lock);
 
-static const char *mux_audio_sync_clk[] = { "spdif_in_sync", "i2s0_sync",
-	"i2s1_sync", "i2s2_sync", "i2s3_sync", "i2s4_sync", "vimclk_sync",
+static const char * const mux_audio_sync_clk[] = { "spdif_in_sync",
+	"i2s0_sync", "i2s1_sync", "i2s2_sync", "i2s3_sync", "i2s4_sync",
+	"pll_a_out0", "vimclk_sync",
+};
+
+static const char * const mux_dmic_sync_clk[] = { "unused", "i2s0_sync",
+	"i2s1_sync", "i2s2_sync", "i2s3_sync", "i2s4_sync", "pll_a_out0",
+	"vimclk_sync",
 };
 
 static struct tegra_sync_source_initdata sync_source_clks[] __initdata = {
@@ -114,6 +123,12 @@ static struct tegra_audio_clk_initdata audio_clks[] = {
 	AUDIO(spdif, AUDIO_SYNC_CLK_SPDIF),
 };
 
+static struct tegra_audio_clk_initdata dmic_clks[] = {
+	AUDIO(dmic1_sync_clk, AUDIO_SYNC_CLK_DMIC1),
+	AUDIO(dmic2_sync_clk, AUDIO_SYNC_CLK_DMIC2),
+	AUDIO(dmic3_sync_clk, AUDIO_SYNC_CLK_DMIC3),
+};
+
 static struct tegra_audio2x_clk_initdata audio2x_clks[] = {
 	AUDIO2X(audio0, 113, 24),
 	AUDIO2X(audio1, 114, 25),
@@ -123,6 +138,41 @@ static struct tegra_audio2x_clk_initdata audio2x_clks[] = {
 	AUDIO2X(spdif, 118, 29),
 };
 
+static void __init tegra_audio_sync_clk_init(void __iomem *clk_base,
+				      struct tegra_clk *tegra_clks,
+				      struct tegra_audio_clk_initdata *sync,
+				      int num_sync_clks,
+				      const char * const *mux_names,
+				      int num_mux_inputs)
+{
+	struct clk *clk;
+	struct clk **dt_clk;
+	struct tegra_audio_clk_initdata *data;
+	int i;
+
+	for (i = 0, data = sync; i < num_sync_clks; i++, data++) {
+		dt_clk = tegra_lookup_dt_id(data->mux_clk_id, tegra_clks);
+		if (!dt_clk)
+			continue;
+
+		clk = clk_register_mux(NULL, data->mux_name, mux_names,
+					num_mux_inputs,
+					CLK_SET_RATE_NO_REPARENT,
+					clk_base + data->offset, 0, 3, 0,
+					NULL);
+		*dt_clk = clk;
+
+		dt_clk = tegra_lookup_dt_id(data->gate_clk_id, tegra_clks);
+		if (!dt_clk)
+			continue;
+
+		clk = clk_register_gate(NULL, data->gate_name, data->mux_name,
+					0, clk_base + data->offset, 4,
+					CLK_GATE_SET_TO_DISABLE, NULL);
+		*dt_clk = clk;
+	}
+}
+
 void __init tegra_audio_clk_init(void __iomem *clk_base,
 			void __iomem *pmc_base, struct tegra_clk *tegra_clks,
 			struct tegra_audio_clk_info *audio_info,
@@ -176,30 +226,17 @@ void __init tegra_audio_clk_init(void __iomem *clk_base,
 		*dt_clk = clk;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(audio_clks); i++) {
-		struct tegra_audio_clk_initdata *data;
+	tegra_audio_sync_clk_init(clk_base, tegra_clks, audio_clks,
+				  ARRAY_SIZE(audio_clks), mux_audio_sync_clk,
+				  ARRAY_SIZE(mux_audio_sync_clk));
 
-		data = &audio_clks[i];
-		dt_clk = tegra_lookup_dt_id(data->mux_clk_id, tegra_clks);
+	/* make sure the DMIC sync clocks have a valid parent */
+	for (i = 0; i < ARRAY_SIZE(dmic_clks); i++)
+		writel_relaxed(1, clk_base + dmic_clks[i].offset);
 
-		if (!dt_clk)
-			continue;
-		clk = clk_register_mux(NULL, data->mux_name, mux_audio_sync_clk,
-					ARRAY_SIZE(mux_audio_sync_clk),
-					CLK_SET_RATE_NO_REPARENT,
-					clk_base + data->offset, 0, 3, 0,
-					NULL);
-		*dt_clk = clk;
-
-		dt_clk = tegra_lookup_dt_id(data->gate_clk_id, tegra_clks);
-		if (!dt_clk)
-			continue;
-
-		clk = clk_register_gate(NULL, data->gate_name, data->mux_name,
-					0, clk_base + data->offset, 4,
-					CLK_GATE_SET_TO_DISABLE, NULL);
-		*dt_clk = clk;
-	}
+	tegra_audio_sync_clk_init(clk_base, tegra_clks, dmic_clks,
+				  ARRAY_SIZE(dmic_clks), mux_dmic_sync_clk,
+				  ARRAY_SIZE(mux_dmic_sync_clk));
 
 	for (i = 0; i < ARRAY_SIZE(audio2x_clks); i++) {
 		struct tegra_audio2x_clk_initdata *data;
diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c
index 4ce4e7fb1124..294bfe40a4f5 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -138,6 +138,9 @@
 #define CLK_SOURCE_TSECB 0x6d8
 #define CLK_SOURCE_MAUD 0x6d4
 #define CLK_SOURCE_USB2_HSIC_TRK 0x6cc
+#define CLK_SOURCE_DMIC1 0x64c
+#define CLK_SOURCE_DMIC2 0x650
+#define CLK_SOURCE_DMIC3 0x6bc
 
 #define MASK(x) (BIT(x) - 1)
 
@@ -168,6 +171,12 @@
 			      0, TEGRA_PERIPH_NO_GATE, _clk_id,\
 			      _parents##_idx, 0, _lock)
 
+#define MUX8_NOGATE(_name, _parents, _offset, _clk_id)	\
+	TEGRA_INIT_DATA_TABLE(_name, NULL, NULL, _parents, _offset,	\
+			      29, MASK(3), 0, 0, 8, 1, TEGRA_DIVIDER_ROUND_UP,\
+			      0, TEGRA_PERIPH_NO_GATE, _clk_id,\
+			      _parents##_idx, 0, NULL)
+
 #define INT(_name, _parents, _offset,	\
 			    _clk_num, _gate_flags, _clk_id)	\
 	TEGRA_INIT_DATA_TABLE(_name, NULL, NULL, _parents, _offset,\
@@ -619,6 +628,21 @@ static const char *mux_clkm_plldp_sor0lvds[] = {
 };
 #define mux_clkm_plldp_sor0lvds_idx NULL
 
+static const char * const mux_dmic1[] = {
+	"pll_a_out0", "dmic1_sync_clk", "pll_p", "clk_m"
+};
+#define mux_dmic1_idx NULL
+
+static const char * const mux_dmic2[] = {
+	"pll_a_out0", "dmic2_sync_clk", "pll_p", "clk_m"
+};
+#define mux_dmic2_idx NULL
+
+static const char * const mux_dmic3[] = {
+	"pll_a_out0", "dmic3_sync_clk", "pll_p", "clk_m"
+};
+#define mux_dmic3_idx NULL
+
 static struct tegra_periph_init_data periph_clks[] = {
 	AUDIO("d_audio", CLK_SOURCE_D_AUDIO, 106, TEGRA_PERIPH_ON_APB, tegra_clk_d_audio),
 	AUDIO("dam0", CLK_SOURCE_DAM0, 108, TEGRA_PERIPH_ON_APB, tegra_clk_dam0),
@@ -739,7 +763,7 @@ static struct tegra_periph_init_data periph_clks[] = {
 	MUX8("soc_therm", mux_clkm_pllc_pllp_plla, CLK_SOURCE_SOC_THERM, 78, TEGRA_PERIPH_ON_APB, tegra_clk_soc_therm_8),
 	MUX8("vi_sensor", mux_pllm_pllc2_c_c3_pllp_plla, CLK_SOURCE_VI_SENSOR, 164, TEGRA_PERIPH_NO_RESET, tegra_clk_vi_sensor_8),
 	MUX8("isp", mux_pllm_pllc_pllp_plla_clkm_pllc4, CLK_SOURCE_ISP, 23, TEGRA_PERIPH_ON_APB, tegra_clk_isp_8),
-	MUX8("isp", mux_pllc_pllp_plla1_pllc2_c3_clkm_pllc4, CLK_SOURCE_ISP, 23, TEGRA_PERIPH_ON_APB, tegra_clk_isp_9),
+	MUX8_NOGATE("isp", mux_pllc_pllp_plla1_pllc2_c3_clkm_pllc4, CLK_SOURCE_ISP, tegra_clk_isp_9),
 	MUX8("entropy", mux_pllp_clkm1, CLK_SOURCE_ENTROPY, 149,  0, tegra_clk_entropy),
 	MUX8("entropy", mux_pllp_clkm_clk32_plle, CLK_SOURCE_ENTROPY, 149,  0, tegra_clk_entropy_8),
 	MUX8("hdmi_audio", mux_pllp3_pllc_clkm, CLK_SOURCE_HDMI_AUDIO, 176, TEGRA_PERIPH_NO_RESET, tegra_clk_hdmi_audio),
@@ -788,6 +812,9 @@ static struct tegra_periph_init_data periph_clks[] = {
 	MUX("uartape", mux_pllp_pllc_clkm, CLK_SOURCE_UARTAPE, 212, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_uartape),
 	MUX8("tsecb", mux_pllp_pllc2_c_c3_clkm, CLK_SOURCE_TSECB, 206, 0, tegra_clk_tsecb),
 	MUX8("maud", mux_pllp_pllp_out3_clkm_clk32k_plla, CLK_SOURCE_MAUD, 202, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_maud),
+	MUX8("dmic1", mux_dmic1, CLK_SOURCE_DMIC1, 161, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_dmic1),
+	MUX8("dmic2", mux_dmic2, CLK_SOURCE_DMIC2, 162, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_dmic2),
+	MUX8("dmic3", mux_dmic3, CLK_SOURCE_DMIC3, 197, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_dmic3),
 };
 
 static struct tegra_periph_init_data gate_clks[] = {
@@ -809,7 +836,7 @@ static struct tegra_periph_init_data gate_clks[] = {
 	GATE("usb2", "clk_m", 58, 0, tegra_clk_usb2, 0),
 	GATE("usb3", "clk_m", 59, 0, tegra_clk_usb3, 0),
 	GATE("csi", "pll_p_out3", 52, 0, tegra_clk_csi, 0),
-	GATE("afi", "clk_m", 72, 0, tegra_clk_afi, 0),
+	GATE("afi", "mselect", 72, 0, tegra_clk_afi, 0),
 	GATE("csus", "clk_m", 92, TEGRA_PERIPH_NO_RESET, tegra_clk_csus, 0),
 	GATE("dds", "clk_m", 150, TEGRA_PERIPH_ON_APB, tegra_clk_dds, 0),
 	GATE("dp2", "clk_m", 152, TEGRA_PERIPH_ON_APB, tegra_clk_dp2, 0),
@@ -819,7 +846,8 @@ static struct tegra_periph_init_data gate_clks[] = {
 	GATE("xusb_dev", "xusb_dev_src", 95, 0, tegra_clk_xusb_dev, 0),
 	GATE("emc", "emc_mux", 57, 0, tegra_clk_emc, CLK_IGNORE_UNUSED),
 	GATE("sata_cold", "clk_m", 129, TEGRA_PERIPH_ON_APB, tegra_clk_sata_cold, 0),
-	GATE("ispb", "clk_m", 3, 0, tegra_clk_ispb, 0),
+	GATE("ispa", "isp", 23, 0, tegra_clk_ispa, 0),
+	GATE("ispb", "isp", 3, 0, tegra_clk_ispb, 0),
 	GATE("vim2_clk", "clk_m", 11, 0, tegra_clk_vim2_clk, 0),
 	GATE("pcie", "clk_m", 70, 0, tegra_clk_pcie, 0),
 	GATE("gpu", "pll_ref", 184, 0, tegra_clk_gpu, 0),
@@ -830,6 +858,13 @@ static struct tegra_periph_init_data gate_clks[] = {
 	GATE("pll_p_out_cpu", "pll_p", 223, 0, tegra_clk_pll_p_out_cpu, 0),
 	GATE("pll_p_out_adsp", "pll_p", 187, 0, tegra_clk_pll_p_out_adsp, 0),
 	GATE("apb2ape", "clk_m", 107, 0, tegra_clk_apb2ape, 0),
+	GATE("cec", "pclk", 136, 0, tegra_clk_cec, 0),
+	GATE("iqc1", "clk_m", 221, 0, tegra_clk_iqc1, 0),
+	GATE("iqc2", "clk_m", 220, 0, tegra_clk_iqc1, 0),
+	GATE("pll_a_out_adsp", "pll_a", 188, 0, tegra_clk_pll_a_out_adsp, 0),
+	GATE("pll_a_out0_out_adsp", "pll_a", 188, 0, tegra_clk_pll_a_out0_out_adsp, 0),
+	GATE("adsp", "aclk", 199, 0, tegra_clk_adsp, 0),
+	GATE("adsp_neon", "aclk", 218, 0, tegra_clk_adsp_neon, 0),
 };
 
 static struct tegra_periph_init_data div_clks[] = {
diff --git a/drivers/clk/tegra/clk-tegra-pmc.c b/drivers/clk/tegra/clk-tegra-pmc.c
index 91377abfefa1..a35579a3f884 100644
--- a/drivers/clk/tegra/clk-tegra-pmc.c
+++ b/drivers/clk/tegra/clk-tegra-pmc.c
@@ -95,7 +95,8 @@ void __init tegra_pmc_clk_init(void __iomem *pmc_base,
 			continue;
 
 		clk = clk_register_mux(NULL, data->mux_name, data->parents,
-				data->num_parents, CLK_SET_RATE_NO_REPARENT,
+				data->num_parents,
+				CLK_SET_RATE_NO_REPARENT | CLK_SET_RATE_PARENT,
 				pmc_base + PMC_CLK_OUT_CNTRL, data->mux_shift,
 				3, 0, &clk_out_lock);
 		*dt_clk = clk;
@@ -106,7 +107,8 @@ void __init tegra_pmc_clk_init(void __iomem *pmc_base,
 			continue;
 
 		clk = clk_register_gate(NULL, data->gate_name, data->mux_name,
-					0, pmc_base + PMC_CLK_OUT_CNTRL,
+					CLK_SET_RATE_PARENT,
+					pmc_base + PMC_CLK_OUT_CNTRL,
 					data->gate_shift, 0, &clk_out_lock);
 		*dt_clk = clk;
 		clk_register_clkdev(clk, data->dev_name, data->gate_name);
diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c
index 933b5dd698b8..fd1a99c05c2d 100644
--- a/drivers/clk/tegra/clk-tegra114.c
+++ b/drivers/clk/tegra/clk-tegra114.c
@@ -819,6 +819,7 @@ static struct tegra_clk tegra114_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_clk_out_3_mux] = { .dt_id = TEGRA114_CLK_CLK_OUT_3_MUX, .present = true },
 	[tegra_clk_dsia_mux] = { .dt_id = TEGRA114_CLK_DSIA_MUX, .present = true },
 	[tegra_clk_dsib_mux] = { .dt_id = TEGRA114_CLK_DSIB_MUX, .present = true },
+	[tegra_clk_cec] = { .dt_id = TEGRA114_CLK_CEC, .present = true },
 };
 
 static struct tegra_devclk devclks[] __initdata = {
diff --git a/drivers/clk/tegra/clk-tegra124.c b/drivers/clk/tegra/clk-tegra124.c
index a112d3d2bff1..e81ea5b11577 100644
--- a/drivers/clk/tegra/clk-tegra124.c
+++ b/drivers/clk/tegra/clk-tegra124.c
@@ -928,6 +928,7 @@ static struct tegra_clk tegra124_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_clk_out_1_mux] = { .dt_id = TEGRA124_CLK_CLK_OUT_1_MUX, .present = true },
 	[tegra_clk_clk_out_2_mux] = { .dt_id = TEGRA124_CLK_CLK_OUT_2_MUX, .present = true },
 	[tegra_clk_clk_out_3_mux] = { .dt_id = TEGRA124_CLK_CLK_OUT_3_MUX, .present = true },
+	[tegra_clk_cec] = { .dt_id = TEGRA124_CLK_CEC, .present = true },
 };
 
 static struct tegra_devclk devclks[] __initdata = {
diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index 2896d2e783ce..1024e853ea65 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -24,6 +24,8 @@
 #include <linux/export.h>
 #include <linux/clk/tegra.h>
 #include <dt-bindings/clock/tegra210-car.h>
+#include <dt-bindings/reset/tegra210-car.h>
+#include <linux/iopoll.h>
 
 #include "clk.h"
 #include "clk-id.h"
@@ -155,9 +157,35 @@
 #define PMC_PLLM_WB0_OVERRIDE 0x1dc
 #define PMC_PLLM_WB0_OVERRIDE_2 0x2b0
 
+#define UTMIP_PLL_CFG2 0x488
+#define UTMIP_PLL_CFG2_STABLE_COUNT(x) (((x) & 0xfff) << 6)
+#define UTMIP_PLL_CFG2_ACTIVE_DLY_COUNT(x) (((x) & 0x3f) << 18)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_A_POWERDOWN BIT(0)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_A_POWERUP BIT(1)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_B_POWERDOWN BIT(2)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_B_POWERUP BIT(3)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_C_POWERDOWN BIT(4)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_C_POWERUP BIT(5)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_D_POWERDOWN BIT(24)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_D_POWERUP BIT(25)
+
+#define UTMIP_PLL_CFG1 0x484
+#define UTMIP_PLL_CFG1_ENABLE_DLY_COUNT(x) (((x) & 0x1f) << 27)
+#define UTMIP_PLL_CFG1_XTAL_FREQ_COUNT(x) (((x) & 0xfff) << 0)
+#define UTMIP_PLL_CFG1_FORCE_PLLU_POWERUP BIT(17)
+#define UTMIP_PLL_CFG1_FORCE_PLLU_POWERDOWN BIT(16)
+#define UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERUP BIT(15)
+#define UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERDOWN BIT(14)
+#define UTMIP_PLL_CFG1_FORCE_PLL_ACTIVE_POWERDOWN BIT(12)
+
 #define SATA_PLL_CFG0				0x490
 #define SATA_PLL_CFG0_PADPLL_RESET_SWCTL	BIT(0)
 #define SATA_PLL_CFG0_PADPLL_USE_LOCKDET	BIT(2)
+#define SATA_PLL_CFG0_SATA_SEQ_IN_SWCTL		BIT(4)
+#define SATA_PLL_CFG0_SATA_SEQ_RESET_INPUT_VALUE	BIT(5)
+#define SATA_PLL_CFG0_SATA_SEQ_LANE_PD_INPUT_VALUE	BIT(6)
+#define SATA_PLL_CFG0_SATA_SEQ_PADPLL_PD_INPUT_VALUE	BIT(7)
+
 #define SATA_PLL_CFG0_PADPLL_SLEEP_IDDQ		BIT(13)
 #define SATA_PLL_CFG0_SEQ_ENABLE		BIT(24)
 
@@ -196,6 +224,12 @@
 #define CLK_M_DIVISOR_SHIFT 2
 #define CLK_M_DIVISOR_MASK 0x3
 
+#define RST_DFLL_DVCO 0x2f4
+#define DVFS_DFLL_RESET_SHIFT 0
+
+#define CLK_RST_CONTROLLER_RST_DEV_Y_SET 0x2a8
+#define CLK_RST_CONTROLLER_RST_DEV_Y_CLR 0x2ac
+
 /*
  * SDM fractional divisor is 16-bit 2's complement signed number within
  * (-2^12 ... 2^12-1) range. Represented in PLL data structure as unsigned
@@ -454,6 +488,26 @@ void tegra210_sata_pll_hw_sequence_start(void)
 }
 EXPORT_SYMBOL_GPL(tegra210_sata_pll_hw_sequence_start);
 
+void tegra210_set_sata_pll_seq_sw(bool state)
+{
+	u32 val;
+
+	val = readl_relaxed(clk_base + SATA_PLL_CFG0);
+	if (state) {
+		val |= SATA_PLL_CFG0_SATA_SEQ_IN_SWCTL;
+		val |= SATA_PLL_CFG0_SATA_SEQ_RESET_INPUT_VALUE;
+		val |= SATA_PLL_CFG0_SATA_SEQ_LANE_PD_INPUT_VALUE;
+		val |= SATA_PLL_CFG0_SATA_SEQ_PADPLL_PD_INPUT_VALUE;
+	} else {
+		val &= ~SATA_PLL_CFG0_SATA_SEQ_IN_SWCTL;
+		val &= ~SATA_PLL_CFG0_SATA_SEQ_RESET_INPUT_VALUE;
+		val &= ~SATA_PLL_CFG0_SATA_SEQ_LANE_PD_INPUT_VALUE;
+		val &= ~SATA_PLL_CFG0_SATA_SEQ_PADPLL_PD_INPUT_VALUE;
+	}
+	writel_relaxed(val, clk_base + SATA_PLL_CFG0);
+}
+EXPORT_SYMBOL_GPL(tegra210_set_sata_pll_seq_sw);
+
 static inline void _pll_misc_chk_default(void __iomem *base,
 					struct tegra_clk_pll_params *params,
 					u8 misc_num, u32 default_val, u32 mask)
@@ -501,12 +555,12 @@ static void tegra210_pllcx_set_defaults(const char *name,
 {
 	pllcx->params->defaults_set = true;
 
-	if (readl_relaxed(clk_base + pllcx->params->base_reg) &
-			PLL_ENABLE) {
+	if (readl_relaxed(clk_base + pllcx->params->base_reg) & PLL_ENABLE) {
 		/* PLL is ON: only check if defaults already set */
 		pllcx_check_defaults(pllcx->params);
-		pr_warn("%s already enabled. Postponing set full defaults\n",
-			name);
+		if (!pllcx->params->defaults_set)
+			pr_warn("%s already enabled. Postponing set full defaults\n",
+				name);
 		return;
 	}
 
@@ -608,7 +662,6 @@ static void tegra210_plld_set_defaults(struct tegra_clk_pll *plld)
 
 	if (readl_relaxed(clk_base + plld->params->base_reg) &
 			PLL_ENABLE) {
-		pr_warn("PLL_D already enabled. Postponing set full defaults\n");
 
 		/*
 		 * PLL is ON: check if defaults already set, then set those
@@ -625,6 +678,9 @@ static void tegra210_plld_set_defaults(struct tegra_clk_pll *plld)
 		_pll_misc_chk_default(clk_base, plld->params, 0, val,
 				~mask & PLLD_MISC0_WRITE_MASK);
 
+		if (!plld->params->defaults_set)
+			pr_warn("PLL_D already enabled. Postponing set full defaults\n");
+
 		/* Enable lock detect */
 		mask = PLLD_MISC0_LOCK_ENABLE | PLLD_MISC0_LOCK_OVERRIDE;
 		val = readl_relaxed(clk_base + plld->params->ext_misc_reg[0]);
@@ -896,7 +952,6 @@ static void tegra210_pllx_set_defaults(struct tegra_clk_pll *pllx)
 	val |= step_b << PLLX_MISC2_DYNRAMP_STEPB_SHIFT;
 
 	if (readl_relaxed(clk_base + pllx->params->base_reg) & PLL_ENABLE) {
-		pr_warn("PLL_X already enabled. Postponing set full defaults\n");
 
 		/*
 		 * PLL is ON: check if defaults already set, then set those
@@ -904,6 +959,8 @@ static void tegra210_pllx_set_defaults(struct tegra_clk_pll *pllx)
 		 */
 		pllx_check_defaults(pllx);
 
+		if (!pllx->params->defaults_set)
+			pr_warn("PLL_X already enabled. Postponing set full defaults\n");
 		/* Configure dyn ramp, disable lock override */
 		writel_relaxed(val, clk_base + pllx->params->ext_misc_reg[2]);
 
@@ -948,7 +1005,6 @@ static void tegra210_pllmb_set_defaults(struct tegra_clk_pll *pllmb)
 	pllmb->params->defaults_set = true;
 
 	if (val & PLL_ENABLE) {
-		pr_warn("PLL_MB already enabled. Postponing set full defaults\n");
 
 		/*
 		 * PLL is ON: check if defaults already set, then set those
@@ -959,6 +1015,8 @@ static void tegra210_pllmb_set_defaults(struct tegra_clk_pll *pllmb)
 		_pll_misc_chk_default(clk_base, pllmb->params, 0, val,
 				~mask & PLLMB_MISC1_WRITE_MASK);
 
+		if (!pllmb->params->defaults_set)
+			pr_warn("PLL_MB already enabled. Postponing set full defaults\n");
 		/* Enable lock detect */
 		val = readl_relaxed(clk_base + pllmb->params->ext_misc_reg[0]);
 		val &= ~mask;
@@ -1008,13 +1066,14 @@ static void tegra210_pllp_set_defaults(struct tegra_clk_pll *pllp)
 	pllp->params->defaults_set = true;
 
 	if (val & PLL_ENABLE) {
-		pr_warn("PLL_P already enabled. Postponing set full defaults\n");
 
 		/*
 		 * PLL is ON: check if defaults already set, then set those
 		 * that can be updated in flight.
 		 */
 		pllp_check_defaults(pllp, true);
+		if (!pllp->params->defaults_set)
+			pr_warn("PLL_P already enabled. Postponing set full defaults\n");
 
 		/* Enable lock detect */
 		val = readl_relaxed(clk_base + pllp->params->ext_misc_reg[0]);
@@ -1046,47 +1105,49 @@ static void tegra210_pllp_set_defaults(struct tegra_clk_pll *pllp)
  * Both VCO and post-divider output rates are fixed at 480MHz and 240MHz,
  * respectively.
  */
-static void pllu_check_defaults(struct tegra_clk_pll *pll, bool hw_control)
+static void pllu_check_defaults(struct tegra_clk_pll_params *params,
+				bool hw_control)
 {
 	u32 val, mask;
 
 	/* Ignore lock enable (will be set) and IDDQ if under h/w control */
 	val = PLLU_MISC0_DEFAULT_VALUE & (~PLLU_MISC0_IDDQ);
 	mask = PLLU_MISC0_LOCK_ENABLE | (hw_control ? PLLU_MISC0_IDDQ : 0);
-	_pll_misc_chk_default(clk_base, pll->params, 0, val,
+	_pll_misc_chk_default(clk_base, params, 0, val,
 			~mask & PLLU_MISC0_WRITE_MASK);
 
 	val = PLLU_MISC1_DEFAULT_VALUE;
 	mask = PLLU_MISC1_LOCK_OVERRIDE;
-	_pll_misc_chk_default(clk_base, pll->params, 1, val,
+	_pll_misc_chk_default(clk_base, params, 1, val,
 			~mask & PLLU_MISC1_WRITE_MASK);
 }
 
-static void tegra210_pllu_set_defaults(struct tegra_clk_pll *pllu)
+static void tegra210_pllu_set_defaults(struct tegra_clk_pll_params *pllu)
 {
-	u32 val = readl_relaxed(clk_base + pllu->params->base_reg);
+	u32 val = readl_relaxed(clk_base + pllu->base_reg);
 
-	pllu->params->defaults_set = true;
+	pllu->defaults_set = true;
 
 	if (val & PLL_ENABLE) {
-		pr_warn("PLL_U already enabled. Postponing set full defaults\n");
 
 		/*
 		 * PLL is ON: check if defaults already set, then set those
 		 * that can be updated in flight.
 		 */
 		pllu_check_defaults(pllu, false);
+		if (!pllu->defaults_set)
+			pr_warn("PLL_U already enabled. Postponing set full defaults\n");
 
 		/* Enable lock detect */
-		val = readl_relaxed(clk_base + pllu->params->ext_misc_reg[0]);
+		val = readl_relaxed(clk_base + pllu->ext_misc_reg[0]);
 		val &= ~PLLU_MISC0_LOCK_ENABLE;
 		val |= PLLU_MISC0_DEFAULT_VALUE & PLLU_MISC0_LOCK_ENABLE;
-		writel_relaxed(val, clk_base + pllu->params->ext_misc_reg[0]);
+		writel_relaxed(val, clk_base + pllu->ext_misc_reg[0]);
 
-		val = readl_relaxed(clk_base + pllu->params->ext_misc_reg[1]);
+		val = readl_relaxed(clk_base + pllu->ext_misc_reg[1]);
 		val &= ~PLLU_MISC1_LOCK_OVERRIDE;
 		val |= PLLU_MISC1_DEFAULT_VALUE & PLLU_MISC1_LOCK_OVERRIDE;
-		writel_relaxed(val, clk_base + pllu->params->ext_misc_reg[1]);
+		writel_relaxed(val, clk_base + pllu->ext_misc_reg[1]);
 		udelay(1);
 
 		return;
@@ -1094,9 +1155,9 @@ static void tegra210_pllu_set_defaults(struct tegra_clk_pll *pllu)
 
 	/* set IDDQ, enable lock detect */
 	writel_relaxed(PLLU_MISC0_DEFAULT_VALUE,
-			clk_base + pllu->params->ext_misc_reg[0]);
+			clk_base + pllu->ext_misc_reg[0]);
 	writel_relaxed(PLLU_MISC1_DEFAULT_VALUE,
-			clk_base + pllu->params->ext_misc_reg[1]);
+			clk_base + pllu->ext_misc_reg[1]);
 	udelay(1);
 }
 
@@ -1216,6 +1277,7 @@ static int tegra210_pll_fixed_mdiv_cfg(struct clk_hw *hw,
 	cfg->n = p_rate / cf;
 
 	cfg->sdm_data = 0;
+	cfg->output_rate = input_rate;
 	if (params->sdm_ctrl_reg) {
 		unsigned long rem = p_rate - cf * cfg->n;
 		/* If ssc is enabled SDM enabled as well, even for integer n */
@@ -1226,10 +1288,15 @@ static int tegra210_pll_fixed_mdiv_cfg(struct clk_hw *hw,
 			s -= PLL_SDM_COEFF / 2;
 			cfg->sdm_data = sdin_din_to_data(s);
 		}
+		cfg->output_rate *= cfg->n * PLL_SDM_COEFF + PLL_SDM_COEFF/2 +
+					sdin_data_to_din(cfg->sdm_data);
+		cfg->output_rate /= p * cfg->m * PLL_SDM_COEFF;
+	} else {
+		cfg->output_rate *= cfg->n;
+		cfg->output_rate /= p * cfg->m;
 	}
 
 	cfg->input_rate = input_rate;
-	cfg->output_rate = rate;
 
 	return 0;
 }
@@ -1772,7 +1839,7 @@ static struct tegra_clk_pll_params pll_a1_params = {
 	.misc_reg = PLLA1_MISC0,
 	.lock_mask = PLLCX_BASE_LOCK,
 	.lock_delay = 300,
-	.iddq_reg = PLLA1_MISC0,
+	.iddq_reg = PLLA1_MISC1,
 	.iddq_bit_idx = PLLCX_IDDQ_BIT,
 	.reset_reg = PLLA1_MISC0,
 	.reset_bit_idx = PLLCX_RESET_BIT,
@@ -1987,9 +2054,9 @@ static struct div_nmp pllu_nmp = {
 };
 
 static struct tegra_clk_pll_freq_table pll_u_freq_table[] = {
-	{ 12000000, 480000000, 40, 1, 1, 0 },
-	{ 13000000, 480000000, 36, 1, 1, 0 }, /* actual: 468.0 MHz */
-	{ 38400000, 480000000, 25, 2, 1, 0 },
+	{ 12000000, 480000000, 40, 1, 0, 0 },
+	{ 13000000, 480000000, 36, 1, 0, 0 }, /* actual: 468.0 MHz */
+	{ 38400000, 480000000, 25, 2, 0, 0 },
 	{        0,         0,  0, 0, 0, 0 },
 };
 
@@ -2013,8 +2080,47 @@ static struct tegra_clk_pll_params pll_u_vco_params = {
 	.div_nmp = &pllu_nmp,
 	.freq_table = pll_u_freq_table,
 	.flags = TEGRA_PLLU | TEGRA_PLL_USE_LOCK | TEGRA_PLL_VCO_OUT,
-	.set_defaults = tegra210_pllu_set_defaults,
-	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
+};
+
+struct utmi_clk_param {
+	/* Oscillator Frequency in KHz */
+	u32 osc_frequency;
+	/* UTMIP PLL Enable Delay Count  */
+	u8 enable_delay_count;
+	/* UTMIP PLL Stable count */
+	u16 stable_count;
+	/*  UTMIP PLL Active delay count */
+	u8 active_delay_count;
+	/* UTMIP PLL Xtal frequency count */
+	u16 xtal_freq_count;
+};
+
+static const struct utmi_clk_param utmi_parameters[] = {
+	{
+		.osc_frequency = 38400000, .enable_delay_count = 0x0,
+		.stable_count = 0x0, .active_delay_count = 0x6,
+		.xtal_freq_count = 0x80
+	}, {
+		.osc_frequency = 13000000, .enable_delay_count = 0x02,
+		.stable_count = 0x33, .active_delay_count = 0x05,
+		.xtal_freq_count = 0x7f
+	}, {
+		.osc_frequency = 19200000, .enable_delay_count = 0x03,
+		.stable_count = 0x4b, .active_delay_count = 0x06,
+		.xtal_freq_count = 0xbb
+	}, {
+		.osc_frequency = 12000000, .enable_delay_count = 0x02,
+		.stable_count = 0x2f, .active_delay_count = 0x08,
+		.xtal_freq_count = 0x76
+	}, {
+		.osc_frequency = 26000000, .enable_delay_count = 0x04,
+		.stable_count = 0x66, .active_delay_count = 0x09,
+		.xtal_freq_count = 0xfe
+	}, {
+		.osc_frequency = 16800000, .enable_delay_count = 0x03,
+		.stable_count = 0x41, .active_delay_count = 0x0a,
+		.xtal_freq_count = 0xa4
+	},
 };
 
 static struct tegra_clk tegra210_clks[tegra_clk_max] __initdata = {
@@ -2115,7 +2221,6 @@ static struct tegra_clk tegra210_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_pll_c2] = { .dt_id = TEGRA210_CLK_PLL_C2, .present = true },
 	[tegra_clk_pll_c3] = { .dt_id = TEGRA210_CLK_PLL_C3, .present = true },
 	[tegra_clk_pll_m] = { .dt_id = TEGRA210_CLK_PLL_M, .present = true },
-	[tegra_clk_pll_m_out1] = { .dt_id = TEGRA210_CLK_PLL_M_OUT1, .present = true },
 	[tegra_clk_pll_p] = { .dt_id = TEGRA210_CLK_PLL_P, .present = true },
 	[tegra_clk_pll_p_out1] = { .dt_id = TEGRA210_CLK_PLL_P_OUT1, .present = true },
 	[tegra_clk_pll_p_out3] = { .dt_id = TEGRA210_CLK_PLL_P_OUT3, .present = true },
@@ -2209,6 +2314,25 @@ static struct tegra_clk tegra210_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_pll_c4_out2] = { .dt_id = TEGRA210_CLK_PLL_C4_OUT2, .present = true },
 	[tegra_clk_pll_c4_out3] = { .dt_id = TEGRA210_CLK_PLL_C4_OUT3, .present = true },
 	[tegra_clk_apb2ape] = { .dt_id = TEGRA210_CLK_APB2APE, .present = true },
+	[tegra_clk_pll_a1] = { .dt_id = TEGRA210_CLK_PLL_A1, .present = true },
+	[tegra_clk_ispa] = { .dt_id = TEGRA210_CLK_ISPA, .present = true },
+	[tegra_clk_cec] = { .dt_id = TEGRA210_CLK_CEC, .present = true },
+	[tegra_clk_dmic1] = { .dt_id = TEGRA210_CLK_DMIC1, .present = true },
+	[tegra_clk_dmic2] = { .dt_id = TEGRA210_CLK_DMIC2, .present = true },
+	[tegra_clk_dmic3] = { .dt_id = TEGRA210_CLK_DMIC3, .present = true },
+	[tegra_clk_dmic1_sync_clk] = { .dt_id = TEGRA210_CLK_DMIC1_SYNC_CLK, .present = true },
+	[tegra_clk_dmic2_sync_clk] = { .dt_id = TEGRA210_CLK_DMIC2_SYNC_CLK, .present = true },
+	[tegra_clk_dmic3_sync_clk] = { .dt_id = TEGRA210_CLK_DMIC3_SYNC_CLK, .present = true },
+	[tegra_clk_dmic1_sync_clk_mux] = { .dt_id = TEGRA210_CLK_DMIC1_SYNC_CLK_MUX, .present = true },
+	[tegra_clk_dmic2_sync_clk_mux] = { .dt_id = TEGRA210_CLK_DMIC2_SYNC_CLK_MUX, .present = true },
+	[tegra_clk_dmic3_sync_clk_mux] = { .dt_id = TEGRA210_CLK_DMIC3_SYNC_CLK_MUX, .present = true },
+	[tegra_clk_dp2] = { .dt_id = TEGRA210_CLK_DP2, .present = true },
+	[tegra_clk_iqc1] = { .dt_id = TEGRA210_CLK_IQC1, .present = true },
+	[tegra_clk_iqc2] = { .dt_id = TEGRA210_CLK_IQC2, .present = true },
+	[tegra_clk_pll_a_out_adsp] = { .dt_id = TEGRA210_CLK_PLL_A_OUT_ADSP, .present = true },
+	[tegra_clk_pll_a_out0_out_adsp] = { .dt_id = TEGRA210_CLK_PLL_A_OUT0_OUT_ADSP, .present = true },
+	[tegra_clk_adsp] = { .dt_id = TEGRA210_CLK_ADSP, .present = true },
+	[tegra_clk_adsp_neon] = { .dt_id = TEGRA210_CLK_ADSP_NEON, .present = true },
 };
 
 static struct tegra_devclk devclks[] __initdata = {
@@ -2227,7 +2351,6 @@ static struct tegra_devclk devclks[] __initdata = {
 	{ .con_id = "pll_p_out3", .dt_id = TEGRA210_CLK_PLL_P_OUT3 },
 	{ .con_id = "pll_p_out4", .dt_id = TEGRA210_CLK_PLL_P_OUT4 },
 	{ .con_id = "pll_m", .dt_id = TEGRA210_CLK_PLL_M },
-	{ .con_id = "pll_m_out1", .dt_id = TEGRA210_CLK_PLL_M_OUT1 },
 	{ .con_id = "pll_x", .dt_id = TEGRA210_CLK_PLL_X },
 	{ .con_id = "pll_x_out0", .dt_id = TEGRA210_CLK_PLL_X_OUT0 },
 	{ .con_id = "pll_u", .dt_id = TEGRA210_CLK_PLL_U },
@@ -2286,6 +2409,221 @@ static struct tegra_audio_clk_info tegra210_audio_plls[] = {
 
 static struct clk **clks;
 
+static const char * const aclk_parents[] = {
+	"pll_a1", "pll_c", "pll_p", "pll_a_out0", "pll_c2", "pll_c3",
+	"clk_m"
+};
+
+void tegra210_put_utmipll_in_iddq(void)
+{
+	u32 reg;
+
+	reg = readl_relaxed(clk_base + UTMIPLL_HW_PWRDN_CFG0);
+
+	if (reg & UTMIPLL_HW_PWRDN_CFG0_UTMIPLL_LOCK) {
+		pr_err("trying to assert IDDQ while UTMIPLL is locked\n");
+		return;
+	}
+
+	reg |= UTMIPLL_HW_PWRDN_CFG0_IDDQ_OVERRIDE;
+	writel_relaxed(reg, clk_base + UTMIPLL_HW_PWRDN_CFG0);
+}
+EXPORT_SYMBOL_GPL(tegra210_put_utmipll_in_iddq);
+
+void tegra210_put_utmipll_out_iddq(void)
+{
+	u32 reg;
+
+	reg = readl_relaxed(clk_base + UTMIPLL_HW_PWRDN_CFG0);
+	reg &= ~UTMIPLL_HW_PWRDN_CFG0_IDDQ_OVERRIDE;
+	writel_relaxed(reg, clk_base + UTMIPLL_HW_PWRDN_CFG0);
+}
+EXPORT_SYMBOL_GPL(tegra210_put_utmipll_out_iddq);
+
+static void tegra210_utmi_param_configure(void)
+{
+	u32 reg;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(utmi_parameters); i++) {
+		if (osc_freq == utmi_parameters[i].osc_frequency)
+			break;
+	}
+
+	if (i >= ARRAY_SIZE(utmi_parameters)) {
+		pr_err("%s: Unexpected oscillator freq %lu\n", __func__,
+			osc_freq);
+		return;
+	}
+
+	reg = readl_relaxed(clk_base + UTMIPLL_HW_PWRDN_CFG0);
+	reg &= ~UTMIPLL_HW_PWRDN_CFG0_IDDQ_OVERRIDE;
+	writel_relaxed(reg, clk_base + UTMIPLL_HW_PWRDN_CFG0);
+
+	udelay(10);
+
+	reg = readl_relaxed(clk_base + UTMIP_PLL_CFG2);
+
+	/* Program UTMIP PLL stable and active counts */
+	/* [FIXME] arclk_rst.h says WRONG! This should be 1ms -> 0x50 Check! */
+	reg &= ~UTMIP_PLL_CFG2_STABLE_COUNT(~0);
+	reg |= UTMIP_PLL_CFG2_STABLE_COUNT(utmi_parameters[i].stable_count);
+
+	reg &= ~UTMIP_PLL_CFG2_ACTIVE_DLY_COUNT(~0);
+
+	reg |=
+	UTMIP_PLL_CFG2_ACTIVE_DLY_COUNT(utmi_parameters[i].active_delay_count);
+	writel_relaxed(reg, clk_base + UTMIP_PLL_CFG2);
+
+	/* Program UTMIP PLL delay and oscillator frequency counts */
+	reg = readl_relaxed(clk_base + UTMIP_PLL_CFG1);
+	reg &= ~UTMIP_PLL_CFG1_ENABLE_DLY_COUNT(~0);
+
+	reg |=
+	UTMIP_PLL_CFG1_ENABLE_DLY_COUNT(utmi_parameters[i].enable_delay_count);
+
+	reg &= ~UTMIP_PLL_CFG1_XTAL_FREQ_COUNT(~0);
+	reg |=
+	UTMIP_PLL_CFG1_XTAL_FREQ_COUNT(utmi_parameters[i].xtal_freq_count);
+
+	reg |= UTMIP_PLL_CFG1_FORCE_PLLU_POWERDOWN;
+	writel_relaxed(reg, clk_base + UTMIP_PLL_CFG1);
+
+	/* Remove power downs from UTMIP PLL control bits */
+	reg = readl_relaxed(clk_base + UTMIP_PLL_CFG1);
+	reg &= ~UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERDOWN;
+	reg |= UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERUP;
+	writel_relaxed(reg, clk_base + UTMIP_PLL_CFG1);
+	udelay(1);
+
+	/* Enable samplers for SNPS, XUSB_HOST, XUSB_DEV */
+	reg = readl_relaxed(clk_base + UTMIP_PLL_CFG2);
+	reg |= UTMIP_PLL_CFG2_FORCE_PD_SAMP_A_POWERUP;
+	reg |= UTMIP_PLL_CFG2_FORCE_PD_SAMP_B_POWERUP;
+	reg |= UTMIP_PLL_CFG2_FORCE_PD_SAMP_D_POWERUP;
+	reg &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_A_POWERDOWN;
+	reg &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_B_POWERDOWN;
+	reg &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_D_POWERDOWN;
+	writel_relaxed(reg, clk_base + UTMIP_PLL_CFG2);
+
+	/* Setup HW control of UTMIPLL */
+	reg = readl_relaxed(clk_base + UTMIP_PLL_CFG1);
+	reg &= ~UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERDOWN;
+	reg &= ~UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERUP;
+	writel_relaxed(reg, clk_base + UTMIP_PLL_CFG1);
+
+	reg = readl_relaxed(clk_base + UTMIPLL_HW_PWRDN_CFG0);
+	reg |= UTMIPLL_HW_PWRDN_CFG0_USE_LOCKDET;
+	reg &= ~UTMIPLL_HW_PWRDN_CFG0_CLK_ENABLE_SWCTL;
+	writel_relaxed(reg, clk_base + UTMIPLL_HW_PWRDN_CFG0);
+
+	udelay(1);
+
+	reg = readl_relaxed(clk_base + XUSB_PLL_CFG0);
+	reg &= ~XUSB_PLL_CFG0_UTMIPLL_LOCK_DLY;
+	writel_relaxed(reg, clk_base + XUSB_PLL_CFG0);
+
+	udelay(1);
+
+	/* Enable HW control UTMIPLL */
+	reg = readl_relaxed(clk_base + UTMIPLL_HW_PWRDN_CFG0);
+	reg |= UTMIPLL_HW_PWRDN_CFG0_SEQ_ENABLE;
+	writel_relaxed(reg, clk_base + UTMIPLL_HW_PWRDN_CFG0);
+}
+
+static int tegra210_enable_pllu(void)
+{
+	struct tegra_clk_pll_freq_table *fentry;
+	struct tegra_clk_pll pllu;
+	u32 reg;
+
+	for (fentry = pll_u_freq_table; fentry->input_rate; fentry++) {
+		if (fentry->input_rate == pll_ref_freq)
+			break;
+	}
+
+	if (!fentry->input_rate) {
+		pr_err("Unknown PLL_U reference frequency %lu\n", pll_ref_freq);
+		return -EINVAL;
+	}
+
+	/* clear IDDQ bit */
+	pllu.params = &pll_u_vco_params;
+	reg = readl_relaxed(clk_base + pllu.params->ext_misc_reg[0]);
+	reg &= ~BIT(pllu.params->iddq_bit_idx);
+	writel_relaxed(reg, clk_base + pllu.params->ext_misc_reg[0]);
+
+	reg = readl_relaxed(clk_base + PLLU_BASE);
+	reg &= ~GENMASK(20, 0);
+	reg |= fentry->m;
+	reg |= fentry->n << 8;
+	reg |= fentry->p << 16;
+	writel(reg, clk_base + PLLU_BASE);
+	reg |= PLL_ENABLE;
+	writel(reg, clk_base + PLLU_BASE);
+
+	readl_relaxed_poll_timeout(clk_base + PLLU_BASE, reg,
+				   reg & PLL_BASE_LOCK, 2, 1000);
+	if (!(reg & PLL_BASE_LOCK)) {
+		pr_err("Timed out waiting for PLL_U to lock\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int tegra210_init_pllu(void)
+{
+	u32 reg;
+	int err;
+
+	tegra210_pllu_set_defaults(&pll_u_vco_params);
+	/* skip initialization when pllu is in hw controlled mode */
+	reg = readl_relaxed(clk_base + PLLU_BASE);
+	if (reg & PLLU_BASE_OVERRIDE) {
+		if (!(reg & PLL_ENABLE)) {
+			err = tegra210_enable_pllu();
+			if (err < 0) {
+				WARN_ON(1);
+				return err;
+			}
+		}
+		/* enable hw controlled mode */
+		reg = readl_relaxed(clk_base + PLLU_BASE);
+		reg &= ~PLLU_BASE_OVERRIDE;
+		writel(reg, clk_base + PLLU_BASE);
+
+		reg = readl_relaxed(clk_base + PLLU_HW_PWRDN_CFG0);
+		reg |= PLLU_HW_PWRDN_CFG0_IDDQ_PD_INCLUDE |
+		       PLLU_HW_PWRDN_CFG0_USE_SWITCH_DETECT |
+		       PLLU_HW_PWRDN_CFG0_USE_LOCKDET;
+		reg &= ~(PLLU_HW_PWRDN_CFG0_CLK_ENABLE_SWCTL |
+			PLLU_HW_PWRDN_CFG0_CLK_SWITCH_SWCTL);
+		writel_relaxed(reg, clk_base + PLLU_HW_PWRDN_CFG0);
+
+		reg = readl_relaxed(clk_base + XUSB_PLL_CFG0);
+		reg &= ~XUSB_PLL_CFG0_PLLU_LOCK_DLY_MASK;
+		writel_relaxed(reg, clk_base + XUSB_PLL_CFG0);
+		udelay(1);
+
+		reg = readl_relaxed(clk_base + PLLU_HW_PWRDN_CFG0);
+		reg |= PLLU_HW_PWRDN_CFG0_SEQ_ENABLE;
+		writel_relaxed(reg, clk_base + PLLU_HW_PWRDN_CFG0);
+		udelay(1);
+
+		reg = readl_relaxed(clk_base + PLLU_BASE);
+		reg &= ~PLLU_BASE_CLKENABLE_USB;
+		writel_relaxed(reg, clk_base + PLLU_BASE);
+	}
+
+	/* enable UTMIPLL hw control if not yet done by the bootloader */
+	reg = readl_relaxed(clk_base + UTMIPLL_HW_PWRDN_CFG0);
+	if (!(reg & UTMIPLL_HW_PWRDN_CFG0_SEQ_ENABLE))
+		tegra210_utmi_param_configure();
+
+	return 0;
+}
+
 static __init void tegra210_periph_clk_init(void __iomem *clk_base,
 					    void __iomem *pmc_base)
 {
@@ -2347,6 +2685,11 @@ static __init void tegra210_periph_clk_init(void __iomem *clk_base,
 	clk_register_clkdev(clk, "cml1", NULL);
 	clks[TEGRA210_CLK_CML1] = clk;
 
+	clk = tegra_clk_register_super_clk("aclk", aclk_parents,
+				ARRAY_SIZE(aclk_parents), 0, clk_base + 0x6e0,
+				0, NULL);
+	clks[TEGRA210_CLK_ACLK] = clk;
+
 	tegra_periph_clk_init(clk_base, pmc_base, tegra210_clks, &pll_p_params);
 }
 
@@ -2402,9 +2745,6 @@ static void __init tegra210_pll_init(void __iomem *clk_base,
 	clk_register_clkdev(clk, "pll_mb", NULL);
 	clks[TEGRA210_CLK_PLL_MB] = clk;
 
-	clk_register_clkdev(clk, "pll_m_out1", NULL);
-	clks[TEGRA210_CLK_PLL_M_OUT1] = clk;
-
 	/* PLLM_UD */
 	clk = clk_register_fixed_factor(NULL, "pll_m_ud", "pll_m",
 					CLK_SET_RATE_PARENT, 1, 1);
@@ -2412,11 +2752,12 @@ static void __init tegra210_pll_init(void __iomem *clk_base,
 	clks[TEGRA210_CLK_PLL_M_UD] = clk;
 
 	/* PLLU_VCO */
-	clk = tegra_clk_register_pllu_tegra210("pll_u_vco", "pll_ref",
-					       clk_base, 0, &pll_u_vco_params,
-					       &pll_u_lock);
-	clk_register_clkdev(clk, "pll_u_vco", NULL);
-	clks[TEGRA210_CLK_PLL_U] = clk;
+	if (!tegra210_init_pllu()) {
+		clk = clk_register_fixed_rate(NULL, "pll_u_vco", "pll_ref", 0,
+					      480*1000*1000);
+		clk_register_clkdev(clk, "pll_u_vco", NULL);
+		clks[TEGRA210_CLK_PLL_U] = clk;
+	}
 
 	/* PLLU_OUT */
 	clk = clk_register_divider_table(NULL, "pll_u_out", "pll_u_vco", 0,
@@ -2651,6 +2992,8 @@ static struct tegra_clk_init_table init_table[] __initdata = {
 	{ TEGRA210_CLK_EMC, TEGRA210_CLK_CLK_MAX, 0, 1 },
 	{ TEGRA210_CLK_MSELECT, TEGRA210_CLK_CLK_MAX, 0, 1 },
 	{ TEGRA210_CLK_CSITE, TEGRA210_CLK_CLK_MAX, 0, 1 },
+	/* TODO find a way to enable this on-demand */
+	{ TEGRA210_CLK_DBGAPB, TEGRA210_CLK_CLK_MAX, 0, 1 },
 	{ TEGRA210_CLK_TSENSOR, TEGRA210_CLK_CLK_M, 400000, 0 },
 	{ TEGRA210_CLK_I2C1, TEGRA210_CLK_PLL_P, 0, 0 },
 	{ TEGRA210_CLK_I2C2, TEGRA210_CLK_PLL_P, 0, 0 },
@@ -2661,6 +3004,8 @@ static struct tegra_clk_init_table init_table[] __initdata = {
 	{ TEGRA210_CLK_PLL_DP, TEGRA210_CLK_CLK_MAX, 270000000, 0 },
 	{ TEGRA210_CLK_SOC_THERM, TEGRA210_CLK_PLL_P, 51000000, 0 },
 	{ TEGRA210_CLK_CCLK_G, TEGRA210_CLK_CLK_MAX, 0, 1 },
+	{ TEGRA210_CLK_PLL_U_OUT1, TEGRA210_CLK_CLK_MAX, 48000000, 1 },
+	{ TEGRA210_CLK_PLL_U_OUT2, TEGRA210_CLK_CLK_MAX, 60000000, 1 },
 	/* This MUST be the last entry. */
 	{ TEGRA210_CLK_CLK_MAX, TEGRA210_CLK_CLK_MAX, 0, 0 },
 };
@@ -2679,6 +3024,81 @@ static void __init tegra210_clock_apply_init_table(void)
 }
 
 /**
+ * tegra210_car_barrier - wait for pending writes to the CAR to complete
+ *
+ * Wait for any outstanding writes to the CAR MMIO space from this CPU
+ * to complete before continuing execution.  No return value.
+ */
+static void tegra210_car_barrier(void)
+{
+	readl_relaxed(clk_base + RST_DFLL_DVCO);
+}
+
+/**
+ * tegra210_clock_assert_dfll_dvco_reset - assert the DFLL's DVCO reset
+ *
+ * Assert the reset line of the DFLL's DVCO.  No return value.
+ */
+static void tegra210_clock_assert_dfll_dvco_reset(void)
+{
+	u32 v;
+
+	v = readl_relaxed(clk_base + RST_DFLL_DVCO);
+	v |= (1 << DVFS_DFLL_RESET_SHIFT);
+	writel_relaxed(v, clk_base + RST_DFLL_DVCO);
+	tegra210_car_barrier();
+}
+
+/**
+ * tegra210_clock_deassert_dfll_dvco_reset - deassert the DFLL's DVCO reset
+ *
+ * Deassert the reset line of the DFLL's DVCO, allowing the DVCO to
+ * operate.  No return value.
+ */
+static void tegra210_clock_deassert_dfll_dvco_reset(void)
+{
+	u32 v;
+
+	v = readl_relaxed(clk_base + RST_DFLL_DVCO);
+	v &= ~(1 << DVFS_DFLL_RESET_SHIFT);
+	writel_relaxed(v, clk_base + RST_DFLL_DVCO);
+	tegra210_car_barrier();
+}
+
+static int tegra210_reset_assert(unsigned long id)
+{
+	if (id == TEGRA210_RST_DFLL_DVCO)
+		tegra210_clock_assert_dfll_dvco_reset();
+	else if (id == TEGRA210_RST_ADSP)
+		writel(GENMASK(26, 21) | BIT(7),
+			clk_base + CLK_RST_CONTROLLER_RST_DEV_Y_SET);
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static int tegra210_reset_deassert(unsigned long id)
+{
+	if (id == TEGRA210_RST_DFLL_DVCO)
+		tegra210_clock_deassert_dfll_dvco_reset();
+	else if (id == TEGRA210_RST_ADSP) {
+		writel(BIT(21), clk_base + CLK_RST_CONTROLLER_RST_DEV_Y_CLR);
+		/*
+		 * Considering adsp cpu clock (min: 12.5MHZ, max: 1GHz)
+		 * a delay of 5us ensures that it's at least
+		 * 6 * adsp_cpu_cycle_period long.
+		 */
+		udelay(5);
+		writel(GENMASK(26, 22) | BIT(7),
+			clk_base + CLK_RST_CONTROLLER_RST_DEV_Y_CLR);
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
  * tegra210_clock_init - Tegra210-specific clock initialization
  * @np: struct device_node * of the DT node for the SoC CAR IP block
  *
@@ -2742,6 +3162,9 @@ static void __init tegra210_clock_init(struct device_node *np)
 
 	tegra_super_clk_gen5_init(clk_base, pmc_base, tegra210_clks,
 				  &pll_x_params);
+	tegra_init_special_resets(2, tegra210_reset_assert,
+				  tegra210_reset_deassert);
+
 	tegra_add_of_provider(np);
 	tegra_register_devclks(devclks, ARRAY_SIZE(devclks));
 
diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c
index 8e2db5ead8da..a2d163f759b4 100644
--- a/drivers/clk/tegra/clk-tegra30.c
+++ b/drivers/clk/tegra/clk-tegra30.c
@@ -817,6 +817,7 @@ static struct tegra_clk tegra30_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_pll_p_out4] = { .dt_id = TEGRA30_CLK_PLL_P_OUT4, .present = true },
 	[tegra_clk_pll_a] = { .dt_id = TEGRA30_CLK_PLL_A, .present = true },
 	[tegra_clk_pll_a_out0] = { .dt_id = TEGRA30_CLK_PLL_A_OUT0, .present = true },
+	[tegra_clk_cec] = { .dt_id = TEGRA30_CLK_CEC, .present = true },
 };
 
 static const char *pll_e_parents[] = { "pll_ref", "pll_p" };
diff --git a/drivers/clk/tegra/clk.c b/drivers/clk/tegra/clk.c
index b2cdd9a235f4..ba923f0d5953 100644
--- a/drivers/clk/tegra/clk.c
+++ b/drivers/clk/tegra/clk.c
@@ -17,6 +17,7 @@
 #include <linux/clkdev.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/delay.h>
 #include <linux/of.h>
 #include <linux/clk/tegra.h>
 #include <linux/reset-controller.h>
@@ -182,6 +183,20 @@ static int tegra_clk_rst_deassert(struct reset_controller_dev *rcdev,
 	return -EINVAL;
 }
 
+static int tegra_clk_rst_reset(struct reset_controller_dev *rcdev,
+		unsigned long id)
+{
+	int err;
+
+	err = tegra_clk_rst_assert(rcdev, id);
+	if (err)
+		return err;
+
+	udelay(1);
+
+	return tegra_clk_rst_deassert(rcdev, id);
+}
+
 const struct tegra_clk_periph_regs *get_reg_bank(int clkid)
 {
 	int reg_bank = clkid / 32;
@@ -274,6 +289,7 @@ void __init tegra_init_from_table(struct tegra_clk_init_table *tbl,
 static const struct reset_control_ops rst_ops = {
 	.assert = tegra_clk_rst_assert,
 	.deassert = tegra_clk_rst_deassert,
+	.reset = tegra_clk_rst_reset,
 };
 
 static struct reset_controller_dev rst_ctlr = {
diff --git a/drivers/clk/tegra/clk.h b/drivers/clk/tegra/clk.h
index 6ba82ecffd4d..945b07093afa 100644
--- a/drivers/clk/tegra/clk.h
+++ b/drivers/clk/tegra/clk.h
@@ -116,7 +116,7 @@ struct tegra_clk_pll_freq_table {
 	unsigned long	input_rate;
 	unsigned long	output_rate;
 	u32		n;
-	u16		m;
+	u32		m;
 	u8		p;
 	u8		cpcon;
 	u16		sdm_data;
@@ -586,11 +586,11 @@ struct tegra_clk_periph {
 
 extern const struct clk_ops tegra_clk_periph_ops;
 struct clk *tegra_clk_register_periph(const char *name,
-		const char **parent_names, int num_parents,
+		const char * const *parent_names, int num_parents,
 		struct tegra_clk_periph *periph, void __iomem *clk_base,
 		u32 offset, unsigned long flags);
 struct clk *tegra_clk_register_periph_nodiv(const char *name,
-		const char **parent_names, int num_parents,
+		const char * const *parent_names, int num_parents,
 		struct tegra_clk_periph *periph, void __iomem *clk_base,
 		u32 offset);
 
@@ -626,7 +626,7 @@ struct tegra_periph_init_data {
 	const char *name;
 	int clk_id;
 	union {
-		const char **parent_names;
+		const char *const *parent_names;
 		const char *parent_name;
 	} p;
 	int num_parents;
@@ -686,6 +686,8 @@ struct tegra_periph_init_data {
 struct tegra_clk_super_mux {
 	struct clk_hw	hw;
 	void __iomem	*reg;
+	struct tegra_clk_frac_div frac_div;
+	const struct clk_ops	*div_ops;
 	u8		width;
 	u8		flags;
 	u8		div2_index;
@@ -702,7 +704,10 @@ struct clk *tegra_clk_register_super_mux(const char *name,
 		const char **parent_names, u8 num_parents,
 		unsigned long flags, void __iomem *reg, u8 clk_super_flags,
 		u8 width, u8 pllx_index, u8 div2_index, spinlock_t *lock);
-
+struct clk *tegra_clk_register_super_clk(const char *name,
+		const char * const *parent_names, u8 num_parents,
+		unsigned long flags, void __iomem *reg, u8 clk_super_flags,
+		spinlock_t *lock);
 /**
  * struct clk_init_table - clock initialization table
  * @clk_id:	clock id as mentioned in device tree bindings
diff --git a/drivers/clk/ti/apll.c b/drivers/clk/ti/apll.c
index 6411e132faa2..06f486b3488c 100644
--- a/drivers/clk/ti/apll.c
+++ b/drivers/clk/ti/apll.c
@@ -55,20 +55,20 @@ static int dra7_apll_enable(struct clk_hw *hw)
 	state <<= __ffs(ad->idlest_mask);
 
 	/* Check is already locked */
-	v = ti_clk_ll_ops->clk_readl(ad->idlest_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->idlest_reg);
 
 	if ((v & ad->idlest_mask) == state)
 		return r;
 
-	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->control_reg);
 	v &= ~ad->enable_mask;
 	v |= APLL_FORCE_LOCK << __ffs(ad->enable_mask);
-	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+	ti_clk_ll_ops->clk_writel(v, &ad->control_reg);
 
 	state <<= __ffs(ad->idlest_mask);
 
 	while (1) {
-		v = ti_clk_ll_ops->clk_readl(ad->idlest_reg);
+		v = ti_clk_ll_ops->clk_readl(&ad->idlest_reg);
 		if ((v & ad->idlest_mask) == state)
 			break;
 		if (i > MAX_APLL_WAIT_TRIES)
@@ -99,10 +99,10 @@ static void dra7_apll_disable(struct clk_hw *hw)
 
 	state <<= __ffs(ad->idlest_mask);
 
-	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->control_reg);
 	v &= ~ad->enable_mask;
 	v |= APLL_AUTO_IDLE << __ffs(ad->enable_mask);
-	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+	ti_clk_ll_ops->clk_writel(v, &ad->control_reg);
 }
 
 static int dra7_apll_is_enabled(struct clk_hw *hw)
@@ -113,7 +113,7 @@ static int dra7_apll_is_enabled(struct clk_hw *hw)
 
 	ad = clk->dpll_data;
 
-	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->control_reg);
 	v &= ad->enable_mask;
 
 	v >>= __ffs(ad->enable_mask);
@@ -164,7 +164,7 @@ static void __init omap_clk_register_apll(struct clk_hw *hw,
 
 	ad->clk_bypass = __clk_get_hw(clk);
 
-	clk = clk_register(NULL, &clk_hw->hw);
+	clk = ti_clk_register(NULL, &clk_hw->hw, node->name);
 	if (!IS_ERR(clk)) {
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 		kfree(clk_hw->hw.init->parent_names);
@@ -185,6 +185,7 @@ static void __init of_dra7_apll_setup(struct device_node *node)
 	struct clk_hw_omap *clk_hw = NULL;
 	struct clk_init_data *init = NULL;
 	const char **parent_names = NULL;
+	int ret;
 
 	ad = kzalloc(sizeof(*ad), GFP_KERNEL);
 	clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
@@ -194,7 +195,6 @@ static void __init of_dra7_apll_setup(struct device_node *node)
 
 	clk_hw->dpll_data = ad;
 	clk_hw->hw.init = init;
-	clk_hw->flags = MEMMAP_ADDRESSING;
 
 	init->name = node->name;
 	init->ops = &apll_ck_ops;
@@ -213,10 +213,10 @@ static void __init of_dra7_apll_setup(struct device_node *node)
 
 	init->parent_names = parent_names;
 
-	ad->control_reg = ti_clk_get_reg_addr(node, 0);
-	ad->idlest_reg = ti_clk_get_reg_addr(node, 1);
+	ret = ti_clk_get_reg_addr(node, 0, &ad->control_reg);
+	ret |= ti_clk_get_reg_addr(node, 1, &ad->idlest_reg);
 
-	if (IS_ERR(ad->control_reg) || IS_ERR(ad->idlest_reg))
+	if (ret)
 		goto cleanup;
 
 	ad->idlest_mask = 0x1;
@@ -242,7 +242,7 @@ static int omap2_apll_is_enabled(struct clk_hw *hw)
 	struct dpll_data *ad = clk->dpll_data;
 	u32 v;
 
-	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->control_reg);
 	v &= ad->enable_mask;
 
 	v >>= __ffs(ad->enable_mask);
@@ -268,13 +268,13 @@ static int omap2_apll_enable(struct clk_hw *hw)
 	u32 v;
 	int i = 0;
 
-	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->control_reg);
 	v &= ~ad->enable_mask;
 	v |= OMAP2_EN_APLL_LOCKED << __ffs(ad->enable_mask);
-	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+	ti_clk_ll_ops->clk_writel(v, &ad->control_reg);
 
 	while (1) {
-		v = ti_clk_ll_ops->clk_readl(ad->idlest_reg);
+		v = ti_clk_ll_ops->clk_readl(&ad->idlest_reg);
 		if (v & ad->idlest_mask)
 			break;
 		if (i > MAX_APLL_WAIT_TRIES)
@@ -298,10 +298,10 @@ static void omap2_apll_disable(struct clk_hw *hw)
 	struct dpll_data *ad = clk->dpll_data;
 	u32 v;
 
-	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->control_reg);
 	v &= ~ad->enable_mask;
 	v |= OMAP2_EN_APLL_STOPPED << __ffs(ad->enable_mask);
-	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+	ti_clk_ll_ops->clk_writel(v, &ad->control_reg);
 }
 
 static struct clk_ops omap2_apll_ops = {
@@ -316,10 +316,10 @@ static void omap2_apll_set_autoidle(struct clk_hw_omap *clk, u32 val)
 	struct dpll_data *ad = clk->dpll_data;
 	u32 v;
 
-	v = ti_clk_ll_ops->clk_readl(ad->autoidle_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->autoidle_reg);
 	v &= ~ad->autoidle_mask;
 	v |= val << __ffs(ad->autoidle_mask);
-	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+	ti_clk_ll_ops->clk_writel(v, &ad->control_reg);
 }
 
 #define OMAP2_APLL_AUTOIDLE_LOW_POWER_STOP	0x3
@@ -348,6 +348,7 @@ static void __init of_omap2_apll_setup(struct device_node *node)
 	struct clk *clk;
 	const char *parent_name;
 	u32 val;
+	int ret;
 
 	ad = kzalloc(sizeof(*ad), GFP_KERNEL);
 	clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
@@ -393,12 +394,11 @@ static void __init of_omap2_apll_setup(struct device_node *node)
 
 	ad->idlest_mask = 1 << val;
 
-	ad->control_reg = ti_clk_get_reg_addr(node, 0);
-	ad->autoidle_reg = ti_clk_get_reg_addr(node, 1);
-	ad->idlest_reg = ti_clk_get_reg_addr(node, 2);
+	ret = ti_clk_get_reg_addr(node, 0, &ad->control_reg);
+	ret |= ti_clk_get_reg_addr(node, 1, &ad->autoidle_reg);
+	ret |= ti_clk_get_reg_addr(node, 2, &ad->idlest_reg);
 
-	if (IS_ERR(ad->control_reg) || IS_ERR(ad->autoidle_reg) ||
-	    IS_ERR(ad->idlest_reg))
+	if (ret)
 		goto cleanup;
 
 	clk = clk_register(NULL, &clk_hw->hw);
diff --git a/drivers/clk/ti/autoidle.c b/drivers/clk/ti/autoidle.c
index 345af43465f0..7bb9afbe4058 100644
--- a/drivers/clk/ti/autoidle.c
+++ b/drivers/clk/ti/autoidle.c
@@ -25,7 +25,7 @@
 #include "clock.h"
 
 struct clk_ti_autoidle {
-	void __iomem		*reg;
+	struct clk_omap_reg	reg;
 	u8			shift;
 	u8			flags;
 	const char		*name;
@@ -73,28 +73,28 @@ static void _allow_autoidle(struct clk_ti_autoidle *clk)
 {
 	u32 val;
 
-	val = ti_clk_ll_ops->clk_readl(clk->reg);
+	val = ti_clk_ll_ops->clk_readl(&clk->reg);
 
 	if (clk->flags & AUTOIDLE_LOW)
 		val &= ~(1 << clk->shift);
 	else
 		val |= (1 << clk->shift);
 
-	ti_clk_ll_ops->clk_writel(val, clk->reg);
+	ti_clk_ll_ops->clk_writel(val, &clk->reg);
 }
 
 static void _deny_autoidle(struct clk_ti_autoidle *clk)
 {
 	u32 val;
 
-	val = ti_clk_ll_ops->clk_readl(clk->reg);
+	val = ti_clk_ll_ops->clk_readl(&clk->reg);
 
 	if (clk->flags & AUTOIDLE_LOW)
 		val |= (1 << clk->shift);
 	else
 		val &= ~(1 << clk->shift);
 
-	ti_clk_ll_ops->clk_writel(val, clk->reg);
+	ti_clk_ll_ops->clk_writel(val, &clk->reg);
 }
 
 /**
@@ -140,6 +140,7 @@ int __init of_ti_clk_autoidle_setup(struct device_node *node)
 {
 	u32 shift;
 	struct clk_ti_autoidle *clk;
+	int ret;
 
 	/* Check if this clock has autoidle support or not */
 	if (of_property_read_u32(node, "ti,autoidle-shift", &shift))
@@ -152,11 +153,10 @@ int __init of_ti_clk_autoidle_setup(struct device_node *node)
 
 	clk->shift = shift;
 	clk->name = node->name;
-	clk->reg = ti_clk_get_reg_addr(node, 0);
-
-	if (IS_ERR(clk->reg)) {
+	ret = ti_clk_get_reg_addr(node, 0, &clk->reg);
+	if (ret) {
 		kfree(clk);
-		return -EINVAL;
+		return ret;
 	}
 
 	if (of_property_read_bool(node, "ti,invert-autoidle-bit"))
diff --git a/drivers/clk/ti/clk-3xxx.c b/drivers/clk/ti/clk-3xxx.c
index 11d8aa3ec186..b1251cae98b8 100644
--- a/drivers/clk/ti/clk-3xxx.c
+++ b/drivers/clk/ti/clk-3xxx.c
@@ -52,14 +52,13 @@
  * @idlest_reg and @idlest_bit.  No return value.
  */
 static void omap3430es2_clk_ssi_find_idlest(struct clk_hw_omap *clk,
-					    void __iomem **idlest_reg,
+					    struct clk_omap_reg *idlest_reg,
 					    u8 *idlest_bit,
 					    u8 *idlest_val)
 {
-	u32 r;
-
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
+	memcpy(idlest_reg, &clk->enable_reg, sizeof(*idlest_reg));
+	idlest_reg->offset &= ~0xf0;
+	idlest_reg->offset |= 0x20;
 	*idlest_bit = OMAP3430ES2_ST_SSI_IDLE_SHIFT;
 	*idlest_val = OMAP34XX_CM_IDLEST_VAL;
 }
@@ -85,15 +84,15 @@ const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_ssi_wait = {
  * default find_idlest code assumes that they are at the same
  * position.)  No return value.
  */
-static void omap3430es2_clk_dss_usbhost_find_idlest(struct clk_hw_omap *clk,
-						    void __iomem **idlest_reg,
-						    u8 *idlest_bit,
-						    u8 *idlest_val)
+static void
+omap3430es2_clk_dss_usbhost_find_idlest(struct clk_hw_omap *clk,
+					struct clk_omap_reg *idlest_reg,
+					u8 *idlest_bit, u8 *idlest_val)
 {
-	u32 r;
+	memcpy(idlest_reg, &clk->enable_reg, sizeof(*idlest_reg));
 
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
+	idlest_reg->offset &= ~0xf0;
+	idlest_reg->offset |= 0x20;
 	/* USBHOST_IDLE has same shift */
 	*idlest_bit = OMAP3430ES2_ST_DSS_IDLE_SHIFT;
 	*idlest_val = OMAP34XX_CM_IDLEST_VAL;
@@ -122,15 +121,15 @@ const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_dss_usbhost_wait = {
  * shift from the CM_{I,F}CLKEN bit.  Pass back the correct info via
  * @idlest_reg and @idlest_bit.  No return value.
  */
-static void omap3430es2_clk_hsotgusb_find_idlest(struct clk_hw_omap *clk,
-						 void __iomem **idlest_reg,
-						 u8 *idlest_bit,
-						 u8 *idlest_val)
+static void
+omap3430es2_clk_hsotgusb_find_idlest(struct clk_hw_omap *clk,
+				     struct clk_omap_reg *idlest_reg,
+				     u8 *idlest_bit,
+				     u8 *idlest_val)
 {
-	u32 r;
-
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
+	memcpy(idlest_reg, &clk->enable_reg, sizeof(*idlest_reg));
+	idlest_reg->offset &= ~0xf0;
+	idlest_reg->offset |= 0x20;
 	*idlest_bit = OMAP3430ES2_ST_HSOTGUSB_IDLE_SHIFT;
 	*idlest_val = OMAP34XX_CM_IDLEST_VAL;
 }
@@ -154,11 +153,11 @@ const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_hsotgusb_wait = {
  * bit. A value of 1 indicates that clock is enabled.
  */
 static void am35xx_clk_find_idlest(struct clk_hw_omap *clk,
-				   void __iomem **idlest_reg,
+				   struct clk_omap_reg *idlest_reg,
 				   u8 *idlest_bit,
 				   u8 *idlest_val)
 {
-	*idlest_reg = (__force void __iomem *)(clk->enable_reg);
+	memcpy(idlest_reg, &clk->enable_reg, sizeof(*idlest_reg));
 	*idlest_bit = clk->enable_bit + AM35XX_IPSS_ICK_EN_ACK_OFFSET;
 	*idlest_val = AM35XX_IPSS_CLK_IDLEST_VAL;
 }
@@ -178,10 +177,10 @@ static void am35xx_clk_find_idlest(struct clk_hw_omap *clk,
  * avoid this issue, and remove the casts.  No return value.
  */
 static void am35xx_clk_find_companion(struct clk_hw_omap *clk,
-				      void __iomem **other_reg,
+				      struct clk_omap_reg *other_reg,
 				      u8 *other_bit)
 {
-	*other_reg = (__force void __iomem *)(clk->enable_reg);
+	memcpy(other_reg, &clk->enable_reg, sizeof(*other_reg));
 	if (clk->enable_bit & AM35XX_IPSS_ICK_MASK)
 		*other_bit = clk->enable_bit + AM35XX_IPSS_ICK_FCK_OFFSET;
 	else
@@ -205,14 +204,14 @@ const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait = {
  * and @idlest_bit.  No return value.
  */
 static void am35xx_clk_ipss_find_idlest(struct clk_hw_omap *clk,
-					void __iomem **idlest_reg,
+					struct clk_omap_reg *idlest_reg,
 					u8 *idlest_bit,
 					u8 *idlest_val)
 {
-	u32 r;
+	memcpy(idlest_reg, &clk->enable_reg, sizeof(*idlest_reg));
 
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
+	idlest_reg->offset &= ~0xf0;
+	idlest_reg->offset |= 0x20;
 	*idlest_bit = AM35XX_ST_IPSS_SHIFT;
 	*idlest_val = OMAP34XX_CM_IDLEST_VAL;
 }
diff --git a/drivers/clk/ti/clk-44xx.c b/drivers/clk/ti/clk-44xx.c
index 7a8b51b35f9f..1c8bb83003bf 100644
--- a/drivers/clk/ti/clk-44xx.c
+++ b/drivers/clk/ti/clk-44xx.c
@@ -34,196 +34,13 @@
 #define OMAP4_DPLL_USB_DEFFREQ				960000000
 
 static struct ti_dt_clk omap44xx_clks[] = {
-	DT_CLK(NULL, "extalt_clkin_ck", "extalt_clkin_ck"),
-	DT_CLK(NULL, "pad_clks_src_ck", "pad_clks_src_ck"),
-	DT_CLK(NULL, "pad_clks_ck", "pad_clks_ck"),
-	DT_CLK(NULL, "pad_slimbus_core_clks_ck", "pad_slimbus_core_clks_ck"),
-	DT_CLK(NULL, "secure_32k_clk_src_ck", "secure_32k_clk_src_ck"),
-	DT_CLK(NULL, "slimbus_src_clk", "slimbus_src_clk"),
-	DT_CLK(NULL, "slimbus_clk", "slimbus_clk"),
-	DT_CLK(NULL, "sys_32k_ck", "sys_32k_ck"),
-	DT_CLK(NULL, "virt_12000000_ck", "virt_12000000_ck"),
-	DT_CLK(NULL, "virt_13000000_ck", "virt_13000000_ck"),
-	DT_CLK(NULL, "virt_16800000_ck", "virt_16800000_ck"),
-	DT_CLK(NULL, "virt_19200000_ck", "virt_19200000_ck"),
-	DT_CLK(NULL, "virt_26000000_ck", "virt_26000000_ck"),
-	DT_CLK(NULL, "virt_27000000_ck", "virt_27000000_ck"),
-	DT_CLK(NULL, "virt_38400000_ck", "virt_38400000_ck"),
-	DT_CLK(NULL, "sys_clkin_ck", "sys_clkin_ck"),
-	DT_CLK(NULL, "tie_low_clock_ck", "tie_low_clock_ck"),
-	DT_CLK(NULL, "utmi_phy_clkout_ck", "utmi_phy_clkout_ck"),
-	DT_CLK(NULL, "xclk60mhsp1_ck", "xclk60mhsp1_ck"),
-	DT_CLK(NULL, "xclk60mhsp2_ck", "xclk60mhsp2_ck"),
-	DT_CLK(NULL, "xclk60motg_ck", "xclk60motg_ck"),
-	DT_CLK(NULL, "abe_dpll_bypass_clk_mux_ck", "abe_dpll_bypass_clk_mux_ck"),
-	DT_CLK(NULL, "abe_dpll_refclk_mux_ck", "abe_dpll_refclk_mux_ck"),
-	DT_CLK(NULL, "dpll_abe_ck", "dpll_abe_ck"),
-	DT_CLK(NULL, "dpll_abe_x2_ck", "dpll_abe_x2_ck"),
-	DT_CLK(NULL, "dpll_abe_m2x2_ck", "dpll_abe_m2x2_ck"),
-	DT_CLK(NULL, "abe_24m_fclk", "abe_24m_fclk"),
-	DT_CLK(NULL, "abe_clk", "abe_clk"),
-	DT_CLK(NULL, "aess_fclk", "aess_fclk"),
-	DT_CLK(NULL, "dpll_abe_m3x2_ck", "dpll_abe_m3x2_ck"),
-	DT_CLK(NULL, "core_hsd_byp_clk_mux_ck", "core_hsd_byp_clk_mux_ck"),
-	DT_CLK(NULL, "dpll_core_ck", "dpll_core_ck"),
-	DT_CLK(NULL, "dpll_core_x2_ck", "dpll_core_x2_ck"),
-	DT_CLK(NULL, "dpll_core_m6x2_ck", "dpll_core_m6x2_ck"),
-	DT_CLK(NULL, "dbgclk_mux_ck", "dbgclk_mux_ck"),
-	DT_CLK(NULL, "dpll_core_m2_ck", "dpll_core_m2_ck"),
-	DT_CLK(NULL, "ddrphy_ck", "ddrphy_ck"),
-	DT_CLK(NULL, "dpll_core_m5x2_ck", "dpll_core_m5x2_ck"),
-	DT_CLK(NULL, "div_core_ck", "div_core_ck"),
-	DT_CLK(NULL, "div_iva_hs_clk", "div_iva_hs_clk"),
-	DT_CLK(NULL, "div_mpu_hs_clk", "div_mpu_hs_clk"),
-	DT_CLK(NULL, "dpll_core_m4x2_ck", "dpll_core_m4x2_ck"),
-	DT_CLK(NULL, "dll_clk_div_ck", "dll_clk_div_ck"),
-	DT_CLK(NULL, "dpll_abe_m2_ck", "dpll_abe_m2_ck"),
-	DT_CLK(NULL, "dpll_core_m3x2_ck", "dpll_core_m3x2_ck"),
-	DT_CLK(NULL, "dpll_core_m7x2_ck", "dpll_core_m7x2_ck"),
-	DT_CLK(NULL, "iva_hsd_byp_clk_mux_ck", "iva_hsd_byp_clk_mux_ck"),
-	DT_CLK(NULL, "dpll_iva_ck", "dpll_iva_ck"),
-	DT_CLK(NULL, "dpll_iva_x2_ck", "dpll_iva_x2_ck"),
-	DT_CLK(NULL, "dpll_iva_m4x2_ck", "dpll_iva_m4x2_ck"),
-	DT_CLK(NULL, "dpll_iva_m5x2_ck", "dpll_iva_m5x2_ck"),
-	DT_CLK(NULL, "dpll_mpu_ck", "dpll_mpu_ck"),
-	DT_CLK(NULL, "dpll_mpu_m2_ck", "dpll_mpu_m2_ck"),
-	DT_CLK(NULL, "per_hs_clk_div_ck", "per_hs_clk_div_ck"),
-	DT_CLK(NULL, "per_hsd_byp_clk_mux_ck", "per_hsd_byp_clk_mux_ck"),
-	DT_CLK(NULL, "dpll_per_ck", "dpll_per_ck"),
-	DT_CLK(NULL, "dpll_per_m2_ck", "dpll_per_m2_ck"),
-	DT_CLK(NULL, "dpll_per_x2_ck", "dpll_per_x2_ck"),
-	DT_CLK(NULL, "dpll_per_m2x2_ck", "dpll_per_m2x2_ck"),
-	DT_CLK(NULL, "dpll_per_m3x2_ck", "dpll_per_m3x2_ck"),
-	DT_CLK(NULL, "dpll_per_m4x2_ck", "dpll_per_m4x2_ck"),
-	DT_CLK(NULL, "dpll_per_m5x2_ck", "dpll_per_m5x2_ck"),
-	DT_CLK(NULL, "dpll_per_m6x2_ck", "dpll_per_m6x2_ck"),
-	DT_CLK(NULL, "dpll_per_m7x2_ck", "dpll_per_m7x2_ck"),
-	DT_CLK(NULL, "usb_hs_clk_div_ck", "usb_hs_clk_div_ck"),
-	DT_CLK(NULL, "dpll_usb_ck", "dpll_usb_ck"),
-	DT_CLK(NULL, "dpll_usb_clkdcoldo_ck", "dpll_usb_clkdcoldo_ck"),
-	DT_CLK(NULL, "dpll_usb_m2_ck", "dpll_usb_m2_ck"),
-	DT_CLK(NULL, "ducati_clk_mux_ck", "ducati_clk_mux_ck"),
-	DT_CLK(NULL, "func_12m_fclk", "func_12m_fclk"),
-	DT_CLK(NULL, "func_24m_clk", "func_24m_clk"),
-	DT_CLK(NULL, "func_24mc_fclk", "func_24mc_fclk"),
-	DT_CLK(NULL, "func_48m_fclk", "func_48m_fclk"),
-	DT_CLK(NULL, "func_48mc_fclk", "func_48mc_fclk"),
-	DT_CLK(NULL, "func_64m_fclk", "func_64m_fclk"),
-	DT_CLK(NULL, "func_96m_fclk", "func_96m_fclk"),
-	DT_CLK(NULL, "init_60m_fclk", "init_60m_fclk"),
-	DT_CLK(NULL, "l3_div_ck", "l3_div_ck"),
-	DT_CLK(NULL, "l4_div_ck", "l4_div_ck"),
-	DT_CLK(NULL, "lp_clk_div_ck", "lp_clk_div_ck"),
-	DT_CLK(NULL, "l4_wkup_clk_mux_ck", "l4_wkup_clk_mux_ck"),
 	DT_CLK("smp_twd", NULL, "mpu_periphclk"),
-	DT_CLK(NULL, "ocp_abe_iclk", "ocp_abe_iclk"),
-	DT_CLK(NULL, "per_abe_24m_fclk", "per_abe_24m_fclk"),
-	DT_CLK(NULL, "per_abe_nc_fclk", "per_abe_nc_fclk"),
-	DT_CLK(NULL, "syc_clk_div_ck", "syc_clk_div_ck"),
-	DT_CLK(NULL, "aes1_fck", "aes1_fck"),
-	DT_CLK(NULL, "aes2_fck", "aes2_fck"),
-	DT_CLK(NULL, "dmic_sync_mux_ck", "dmic_sync_mux_ck"),
-	DT_CLK(NULL, "func_dmic_abe_gfclk", "func_dmic_abe_gfclk"),
-	DT_CLK(NULL, "dss_sys_clk", "dss_sys_clk"),
-	DT_CLK(NULL, "dss_tv_clk", "dss_tv_clk"),
-	DT_CLK(NULL, "dss_dss_clk", "dss_dss_clk"),
-	DT_CLK(NULL, "dss_48mhz_clk", "dss_48mhz_clk"),
-	DT_CLK(NULL, "dss_fck", "dss_fck"),
 	DT_CLK("omapdss_dss", "ick", "dss_fck"),
-	DT_CLK(NULL, "fdif_fck", "fdif_fck"),
-	DT_CLK(NULL, "gpio1_dbclk", "gpio1_dbclk"),
-	DT_CLK(NULL, "gpio2_dbclk", "gpio2_dbclk"),
-	DT_CLK(NULL, "gpio3_dbclk", "gpio3_dbclk"),
-	DT_CLK(NULL, "gpio4_dbclk", "gpio4_dbclk"),
-	DT_CLK(NULL, "gpio5_dbclk", "gpio5_dbclk"),
-	DT_CLK(NULL, "gpio6_dbclk", "gpio6_dbclk"),
-	DT_CLK(NULL, "sgx_clk_mux", "sgx_clk_mux"),
-	DT_CLK(NULL, "hsi_fck", "hsi_fck"),
-	DT_CLK(NULL, "iss_ctrlclk", "iss_ctrlclk"),
-	DT_CLK(NULL, "mcasp_sync_mux_ck", "mcasp_sync_mux_ck"),
-	DT_CLK(NULL, "func_mcasp_abe_gfclk", "func_mcasp_abe_gfclk"),
-	DT_CLK(NULL, "mcbsp1_sync_mux_ck", "mcbsp1_sync_mux_ck"),
-	DT_CLK(NULL, "func_mcbsp1_gfclk", "func_mcbsp1_gfclk"),
-	DT_CLK(NULL, "mcbsp2_sync_mux_ck", "mcbsp2_sync_mux_ck"),
-	DT_CLK(NULL, "func_mcbsp2_gfclk", "func_mcbsp2_gfclk"),
-	DT_CLK(NULL, "mcbsp3_sync_mux_ck", "mcbsp3_sync_mux_ck"),
-	DT_CLK(NULL, "func_mcbsp3_gfclk", "func_mcbsp3_gfclk"),
-	DT_CLK(NULL, "mcbsp4_sync_mux_ck", "mcbsp4_sync_mux_ck"),
-	DT_CLK(NULL, "per_mcbsp4_gfclk", "per_mcbsp4_gfclk"),
-	DT_CLK(NULL, "hsmmc1_fclk", "hsmmc1_fclk"),
-	DT_CLK(NULL, "hsmmc2_fclk", "hsmmc2_fclk"),
-	DT_CLK(NULL, "ocp2scp_usb_phy_phy_48m", "ocp2scp_usb_phy_phy_48m"),
-	DT_CLK(NULL, "sha2md5_fck", "sha2md5_fck"),
-	DT_CLK(NULL, "slimbus1_fclk_1", "slimbus1_fclk_1"),
-	DT_CLK(NULL, "slimbus1_fclk_0", "slimbus1_fclk_0"),
-	DT_CLK(NULL, "slimbus1_fclk_2", "slimbus1_fclk_2"),
-	DT_CLK(NULL, "slimbus1_slimbus_clk", "slimbus1_slimbus_clk"),
-	DT_CLK(NULL, "slimbus2_fclk_1", "slimbus2_fclk_1"),
-	DT_CLK(NULL, "slimbus2_fclk_0", "slimbus2_fclk_0"),
-	DT_CLK(NULL, "slimbus2_slimbus_clk", "slimbus2_slimbus_clk"),
-	DT_CLK(NULL, "smartreflex_core_fck", "smartreflex_core_fck"),
-	DT_CLK(NULL, "smartreflex_iva_fck", "smartreflex_iva_fck"),
-	DT_CLK(NULL, "smartreflex_mpu_fck", "smartreflex_mpu_fck"),
-	DT_CLK(NULL, "dmt1_clk_mux", "dmt1_clk_mux"),
-	DT_CLK(NULL, "cm2_dm10_mux", "cm2_dm10_mux"),
-	DT_CLK(NULL, "cm2_dm11_mux", "cm2_dm11_mux"),
-	DT_CLK(NULL, "cm2_dm2_mux", "cm2_dm2_mux"),
-	DT_CLK(NULL, "cm2_dm3_mux", "cm2_dm3_mux"),
-	DT_CLK(NULL, "cm2_dm4_mux", "cm2_dm4_mux"),
-	DT_CLK(NULL, "timer5_sync_mux", "timer5_sync_mux"),
-	DT_CLK(NULL, "timer6_sync_mux", "timer6_sync_mux"),
-	DT_CLK(NULL, "timer7_sync_mux", "timer7_sync_mux"),
-	DT_CLK(NULL, "timer8_sync_mux", "timer8_sync_mux"),
-	DT_CLK(NULL, "cm2_dm9_mux", "cm2_dm9_mux"),
-	DT_CLK(NULL, "usb_host_fs_fck", "usb_host_fs_fck"),
 	DT_CLK("usbhs_omap", "fs_fck", "usb_host_fs_fck"),
-	DT_CLK(NULL, "utmi_p1_gfclk", "utmi_p1_gfclk"),
-	DT_CLK(NULL, "usb_host_hs_utmi_p1_clk", "usb_host_hs_utmi_p1_clk"),
-	DT_CLK(NULL, "utmi_p2_gfclk", "utmi_p2_gfclk"),
-	DT_CLK(NULL, "usb_host_hs_utmi_p2_clk", "usb_host_hs_utmi_p2_clk"),
-	DT_CLK(NULL, "usb_host_hs_utmi_p3_clk", "usb_host_hs_utmi_p3_clk"),
-	DT_CLK(NULL, "usb_host_hs_hsic480m_p1_clk", "usb_host_hs_hsic480m_p1_clk"),
-	DT_CLK(NULL, "usb_host_hs_hsic60m_p1_clk", "usb_host_hs_hsic60m_p1_clk"),
-	DT_CLK(NULL, "usb_host_hs_hsic60m_p2_clk", "usb_host_hs_hsic60m_p2_clk"),
-	DT_CLK(NULL, "usb_host_hs_hsic480m_p2_clk", "usb_host_hs_hsic480m_p2_clk"),
-	DT_CLK(NULL, "usb_host_hs_func48mclk", "usb_host_hs_func48mclk"),
-	DT_CLK(NULL, "usb_host_hs_fck", "usb_host_hs_fck"),
 	DT_CLK("usbhs_omap", "hs_fck", "usb_host_hs_fck"),
-	DT_CLK(NULL, "otg_60m_gfclk", "otg_60m_gfclk"),
-	DT_CLK(NULL, "usb_otg_hs_xclk", "usb_otg_hs_xclk"),
-	DT_CLK(NULL, "usb_otg_hs_ick", "usb_otg_hs_ick"),
 	DT_CLK("musb-omap2430", "ick", "usb_otg_hs_ick"),
-	DT_CLK(NULL, "usb_phy_cm_clk32k", "usb_phy_cm_clk32k"),
-	DT_CLK(NULL, "usb_tll_hs_usb_ch2_clk", "usb_tll_hs_usb_ch2_clk"),
-	DT_CLK(NULL, "usb_tll_hs_usb_ch0_clk", "usb_tll_hs_usb_ch0_clk"),
-	DT_CLK(NULL, "usb_tll_hs_usb_ch1_clk", "usb_tll_hs_usb_ch1_clk"),
-	DT_CLK(NULL, "usb_tll_hs_ick", "usb_tll_hs_ick"),
 	DT_CLK("usbhs_omap", "usbtll_ick", "usb_tll_hs_ick"),
 	DT_CLK("usbhs_tll", "usbtll_ick", "usb_tll_hs_ick"),
-	DT_CLK(NULL, "usim_ck", "usim_ck"),
-	DT_CLK(NULL, "usim_fclk", "usim_fclk"),
-	DT_CLK(NULL, "pmd_stm_clock_mux_ck", "pmd_stm_clock_mux_ck"),
-	DT_CLK(NULL, "pmd_trace_clk_mux_ck", "pmd_trace_clk_mux_ck"),
-	DT_CLK(NULL, "stm_clk_div_ck", "stm_clk_div_ck"),
-	DT_CLK(NULL, "trace_clk_div_ck", "trace_clk_div_ck"),
-	DT_CLK(NULL, "auxclk0_src_ck", "auxclk0_src_ck"),
-	DT_CLK(NULL, "auxclk0_ck", "auxclk0_ck"),
-	DT_CLK(NULL, "auxclkreq0_ck", "auxclkreq0_ck"),
-	DT_CLK(NULL, "auxclk1_src_ck", "auxclk1_src_ck"),
-	DT_CLK(NULL, "auxclk1_ck", "auxclk1_ck"),
-	DT_CLK(NULL, "auxclkreq1_ck", "auxclkreq1_ck"),
-	DT_CLK(NULL, "auxclk2_src_ck", "auxclk2_src_ck"),
-	DT_CLK(NULL, "auxclk2_ck", "auxclk2_ck"),
-	DT_CLK(NULL, "auxclkreq2_ck", "auxclkreq2_ck"),
-	DT_CLK(NULL, "auxclk3_src_ck", "auxclk3_src_ck"),
-	DT_CLK(NULL, "auxclk3_ck", "auxclk3_ck"),
-	DT_CLK(NULL, "auxclkreq3_ck", "auxclkreq3_ck"),
-	DT_CLK(NULL, "auxclk4_src_ck", "auxclk4_src_ck"),
-	DT_CLK(NULL, "auxclk4_ck", "auxclk4_ck"),
-	DT_CLK(NULL, "auxclkreq4_ck", "auxclkreq4_ck"),
-	DT_CLK(NULL, "auxclk5_src_ck", "auxclk5_src_ck"),
-	DT_CLK(NULL, "auxclk5_ck", "auxclk5_ck"),
-	DT_CLK(NULL, "auxclkreq5_ck", "auxclkreq5_ck"),
 	DT_CLK("omap_i2c.1", "ick", "dummy_ck"),
 	DT_CLK("omap_i2c.2", "ick", "dummy_ck"),
 	DT_CLK("omap_i2c.3", "ick", "dummy_ck"),
@@ -263,9 +80,6 @@ static struct ti_dt_clk omap44xx_clks[] = {
 	DT_CLK("4013c000.timer", "timer_sys_ck", "syc_clk_div_ck"),
 	DT_CLK("4013e000.timer", "timer_sys_ck", "syc_clk_div_ck"),
 	DT_CLK(NULL, "cpufreq_ck", "dpll_mpu_ck"),
-	DT_CLK(NULL, "bandgap_fclk", "bandgap_fclk"),
-	DT_CLK(NULL, "div_ts_ck", "div_ts_ck"),
-	DT_CLK(NULL, "bandgap_ts_fclk", "bandgap_ts_fclk"),
 	{ .node_name = NULL },
 };
 
@@ -278,6 +92,8 @@ int __init omap4xxx_dt_clk_init(void)
 
 	omap2_clk_disable_autoidle_all();
 
+	ti_clk_add_aliases();
+
 	/*
 	 * Lock USB DPLL on OMAP4 devices so that the L3INIT power
 	 * domain can transition to retention state when not in use.
diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c
index 45d05339d583..13eb04f72389 100644
--- a/drivers/clk/ti/clk-dra7-atl.c
+++ b/drivers/clk/ti/clk-dra7-atl.c
@@ -24,6 +24,9 @@
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/clk/ti.h>
+
+#include "clock.h"
 
 #define DRA7_ATL_INSTANCES	4
 
@@ -171,6 +174,7 @@ static void __init of_dra7_atl_clock_setup(struct device_node *node)
 	struct clk_init_data init = { NULL };
 	const char **parent_names = NULL;
 	struct clk *clk;
+	int ret;
 
 	clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
 	if (!clk_hw) {
@@ -200,9 +204,14 @@ static void __init of_dra7_atl_clock_setup(struct device_node *node)
 
 	init.parent_names = parent_names;
 
-	clk = clk_register(NULL, &clk_hw->hw);
+	clk = ti_clk_register(NULL, &clk_hw->hw, node->name);
 
 	if (!IS_ERR(clk)) {
+		ret = ti_clk_add_alias(NULL, clk, node->name);
+		if (ret) {
+			clk_unregister(clk);
+			goto cleanup;
+		}
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 		kfree(parent_names);
 		return;
diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index 5fcf247759ac..e5a1c8297a1d 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -24,6 +24,7 @@
 #include <linux/list.h>
 #include <linux/regmap.h>
 #include <linux/bootmem.h>
+#include <linux/device.h>
 
 #include "clock.h"
 
@@ -42,27 +43,29 @@ struct clk_iomap {
 
 static struct clk_iomap *clk_memmaps[CLK_MAX_MEMMAPS];
 
-static void clk_memmap_writel(u32 val, void __iomem *reg)
+static void clk_memmap_writel(u32 val, const struct clk_omap_reg *reg)
 {
-	struct clk_omap_reg *r = (struct clk_omap_reg *)&reg;
-	struct clk_iomap *io = clk_memmaps[r->index];
+	struct clk_iomap *io = clk_memmaps[reg->index];
 
-	if (io->regmap)
-		regmap_write(io->regmap, r->offset, val);
+	if (reg->ptr)
+		writel_relaxed(val, reg->ptr);
+	else if (io->regmap)
+		regmap_write(io->regmap, reg->offset, val);
 	else
-		writel_relaxed(val, io->mem + r->offset);
+		writel_relaxed(val, io->mem + reg->offset);
 }
 
-static u32 clk_memmap_readl(void __iomem *reg)
+static u32 clk_memmap_readl(const struct clk_omap_reg *reg)
 {
 	u32 val;
-	struct clk_omap_reg *r = (struct clk_omap_reg *)&reg;
-	struct clk_iomap *io = clk_memmaps[r->index];
+	struct clk_iomap *io = clk_memmaps[reg->index];
 
-	if (io->regmap)
-		regmap_read(io->regmap, r->offset, &val);
+	if (reg->ptr)
+		val = readl_relaxed(reg->ptr);
+	else if (io->regmap)
+		regmap_read(io->regmap, reg->offset, &val);
 	else
-		val = readl_relaxed(io->mem + r->offset);
+		val = readl_relaxed(io->mem + reg->offset);
 
 	return val;
 }
@@ -161,20 +164,18 @@ int __init ti_clk_retry_init(struct device_node *node, struct clk_hw *hw,
  * ti_clk_get_reg_addr - get register address for a clock register
  * @node: device node for the clock
  * @index: register index from the clock node
+ * @reg: pointer to target register struct
  *
- * Builds clock register address from device tree information. This
- * is a struct of type clk_omap_reg. Returns a pointer to the register
- * address, or a pointer error value in failure.
+ * Builds clock register address from device tree information, and returns
+ * the data via the provided output pointer @reg. Returns 0 on success,
+ * negative error value on failure.
  */
-void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index)
+int ti_clk_get_reg_addr(struct device_node *node, int index,
+			struct clk_omap_reg *reg)
 {
-	struct clk_omap_reg *reg;
 	u32 val;
-	u32 tmp;
 	int i;
 
-	reg = (struct clk_omap_reg *)&tmp;
-
 	for (i = 0; i < CLK_MAX_MEMMAPS; i++) {
 		if (clocks_node_ptr[i] == node->parent)
 			break;
@@ -182,19 +183,20 @@ void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index)
 
 	if (i == CLK_MAX_MEMMAPS) {
 		pr_err("clk-provider not found for %s!\n", node->name);
-		return IOMEM_ERR_PTR(-ENOENT);
+		return -ENOENT;
 	}
 
 	reg->index = i;
 
 	if (of_property_read_u32_index(node, "reg", index, &val)) {
 		pr_err("%s must have reg[%d]!\n", node->name, index);
-		return IOMEM_ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	reg->offset = val;
+	reg->ptr = NULL;
 
-	return (__force void __iomem *)tmp;
+	return 0;
 }
 
 /**
@@ -297,6 +299,7 @@ struct clk __init *ti_clk_register_clk(struct ti_clk *setup)
 	struct ti_clk_fixed *fixed;
 	struct ti_clk_fixed_factor *fixed_factor;
 	struct clk_hw *clk_hw;
+	int ret;
 
 	if (setup->clk)
 		return setup->clk;
@@ -307,6 +310,13 @@ struct clk __init *ti_clk_register_clk(struct ti_clk *setup)
 
 		clk = clk_register_fixed_rate(NULL, setup->name, NULL, 0,
 					      fixed->frequency);
+		if (!IS_ERR(clk)) {
+			ret = ti_clk_add_alias(NULL, clk, setup->name);
+			if (ret) {
+				clk_unregister(clk);
+				clk = ERR_PTR(ret);
+			}
+		}
 		break;
 	case TI_CLK_MUX:
 		clk = ti_clk_register_mux(setup);
@@ -324,6 +334,13 @@ struct clk __init *ti_clk_register_clk(struct ti_clk *setup)
 						fixed_factor->parent,
 						0, fixed_factor->mult,
 						fixed_factor->div);
+		if (!IS_ERR(clk)) {
+			ret = ti_clk_add_alias(NULL, clk, setup->name);
+			if (ret) {
+				clk_unregister(clk);
+				clk = ERR_PTR(ret);
+			}
+		}
 		break;
 	case TI_CLK_GATE:
 		clk = ti_clk_register_gate(setup);
@@ -371,9 +388,6 @@ int __init ti_clk_register_legacy_clks(struct ti_clk_alias *clks)
 				       clks->clk->name, PTR_ERR(clk));
 				return PTR_ERR(clk);
 			}
-		} else {
-			clks->lk.clk = clk;
-			clkdev_add(&clks->lk);
 		}
 		clks++;
 	}
@@ -396,8 +410,6 @@ int __init ti_clk_register_legacy_clks(struct ti_clk_alias *clks)
 				}
 			} else {
 				retry = true;
-				retry_clk->lk.clk = clk;
-				clkdev_add(&retry_clk->lk);
 				list_del(&retry_clk->link);
 			}
 		}
@@ -407,6 +419,32 @@ int __init ti_clk_register_legacy_clks(struct ti_clk_alias *clks)
 }
 #endif
 
+static const struct of_device_id simple_clk_match_table[] __initconst = {
+	{ .compatible = "fixed-clock" },
+	{ .compatible = "fixed-factor-clock" },
+	{ }
+};
+
+/**
+ * ti_clk_add_aliases - setup clock aliases
+ *
+ * Sets up any missing clock aliases. No return value.
+ */
+void __init ti_clk_add_aliases(void)
+{
+	struct device_node *np;
+	struct clk *clk;
+
+	for_each_matching_node(np, simple_clk_match_table) {
+		struct of_phandle_args clkspec;
+
+		clkspec.np = np;
+		clk = of_clk_get_from_provider(&clkspec);
+
+		ti_clk_add_alias(NULL, clk, np->name);
+	}
+}
+
 /**
  * ti_clk_setup_features - setup clock features flags
  * @features: features definition to use
@@ -453,3 +491,66 @@ void omap2_clk_enable_init_clocks(const char **clk_names, u8 num_clocks)
 		clk_prepare_enable(init_clk);
 	}
 }
+
+/**
+ * ti_clk_add_alias - add a clock alias for a TI clock
+ * @dev: device alias for this clock
+ * @clk: clock handle to create alias for
+ * @con: connection ID for this clock
+ *
+ * Creates a clock alias for a TI clock. Allocates the clock lookup entry
+ * and assigns the data to it. Returns 0 if successful, negative error
+ * value otherwise.
+ */
+int ti_clk_add_alias(struct device *dev, struct clk *clk, const char *con)
+{
+	struct clk_lookup *cl;
+
+	if (!clk)
+		return 0;
+
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	cl = kzalloc(sizeof(*cl), GFP_KERNEL);
+	if (!cl)
+		return -ENOMEM;
+
+	if (dev)
+		cl->dev_id = dev_name(dev);
+	cl->con_id = con;
+	cl->clk = clk;
+
+	clkdev_add(cl);
+
+	return 0;
+}
+
+/**
+ * ti_clk_register - register a TI clock to the common clock framework
+ * @dev: device for this clock
+ * @hw: hardware clock handle
+ * @con: connection ID for this clock
+ *
+ * Registers a TI clock to the common clock framework, and adds a clock
+ * alias for it. Returns a handle to the registered clock if successful,
+ * ERR_PTR value in failure.
+ */
+struct clk *ti_clk_register(struct device *dev, struct clk_hw *hw,
+			    const char *con)
+{
+	struct clk *clk;
+	int ret;
+
+	clk = clk_register(dev, hw);
+	if (IS_ERR(clk))
+		return clk;
+
+	ret = ti_clk_add_alias(dev, clk, con);
+	if (ret) {
+		clk_unregister(clk);
+		return ERR_PTR(ret);
+	}
+
+	return clk;
+}
diff --git a/drivers/clk/ti/clkt_dflt.c b/drivers/clk/ti/clkt_dflt.c
index c6ae563801d7..91751dd26b16 100644
--- a/drivers/clk/ti/clkt_dflt.c
+++ b/drivers/clk/ti/clkt_dflt.c
@@ -55,7 +55,8 @@
  * elapsed.  XXX Deprecated - should be moved into drivers for the
  * individual IP block that the IDLEST register exists in.
  */
-static int _wait_idlest_generic(struct clk_hw_omap *clk, void __iomem *reg,
+static int _wait_idlest_generic(struct clk_hw_omap *clk,
+				struct clk_omap_reg *reg,
 				u32 mask, u8 idlest, const char *name)
 {
 	int i = 0, ena = 0;
@@ -91,7 +92,7 @@ static int _wait_idlest_generic(struct clk_hw_omap *clk, void __iomem *reg,
  */
 static void _omap2_module_wait_ready(struct clk_hw_omap *clk)
 {
-	void __iomem *companion_reg, *idlest_reg;
+	struct clk_omap_reg companion_reg, idlest_reg;
 	u8 other_bit, idlest_bit, idlest_val, idlest_reg_id;
 	s16 prcm_mod;
 	int r;
@@ -99,17 +100,17 @@ static void _omap2_module_wait_ready(struct clk_hw_omap *clk)
 	/* Not all modules have multiple clocks that their IDLEST depends on */
 	if (clk->ops->find_companion) {
 		clk->ops->find_companion(clk, &companion_reg, &other_bit);
-		if (!(ti_clk_ll_ops->clk_readl(companion_reg) &
+		if (!(ti_clk_ll_ops->clk_readl(&companion_reg) &
 		      (1 << other_bit)))
 			return;
 	}
 
 	clk->ops->find_idlest(clk, &idlest_reg, &idlest_bit, &idlest_val);
-	r = ti_clk_ll_ops->cm_split_idlest_reg(idlest_reg, &prcm_mod,
+	r = ti_clk_ll_ops->cm_split_idlest_reg(&idlest_reg, &prcm_mod,
 					       &idlest_reg_id);
 	if (r) {
 		/* IDLEST register not in the CM module */
-		_wait_idlest_generic(clk, idlest_reg, (1 << idlest_bit),
+		_wait_idlest_generic(clk, &idlest_reg, (1 << idlest_bit),
 				     idlest_val, clk_hw_get_name(&clk->hw));
 	} else {
 		ti_clk_ll_ops->cm_wait_module_ready(0, prcm_mod, idlest_reg_id,
@@ -139,17 +140,17 @@ static void _omap2_module_wait_ready(struct clk_hw_omap *clk)
  * avoid this issue, and remove the casts.  No return value.
  */
 void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk,
-				   void __iomem **other_reg, u8 *other_bit)
+				   struct clk_omap_reg *other_reg,
+				   u8 *other_bit)
 {
-	u32 r;
+	memcpy(other_reg, &clk->enable_reg, sizeof(*other_reg));
 
 	/*
 	 * Convert CM_ICLKEN* <-> CM_FCLKEN*.  This conversion assumes
 	 * it's just a matter of XORing the bits.
 	 */
-	r = ((__force u32)clk->enable_reg ^ (CM_FCLKEN ^ CM_ICLKEN));
+	other_reg->offset ^= (CM_FCLKEN ^ CM_ICLKEN);
 
-	*other_reg = (__force void __iomem *)r;
 	*other_bit = clk->enable_bit;
 }
 
@@ -168,13 +169,14 @@ void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk,
  * CM_IDLEST2).  This is not true for all modules.  No return value.
  */
 void omap2_clk_dflt_find_idlest(struct clk_hw_omap *clk,
-				void __iomem **idlest_reg, u8 *idlest_bit,
+				struct clk_omap_reg *idlest_reg, u8 *idlest_bit,
 				u8 *idlest_val)
 {
-	u32 r;
+	memcpy(idlest_reg, &clk->enable_reg, sizeof(*idlest_reg));
+
+	idlest_reg->offset &= ~0xf0;
+	idlest_reg->offset |= 0x20;
 
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
 	*idlest_bit = clk->enable_bit;
 
 	/*
@@ -222,31 +224,19 @@ int omap2_dflt_clk_enable(struct clk_hw *hw)
 		}
 	}
 
-	if (IS_ERR(clk->enable_reg)) {
-		pr_err("%s: %s missing enable_reg\n", __func__,
-		       clk_hw_get_name(hw));
-		ret = -EINVAL;
-		goto err;
-	}
-
 	/* FIXME should not have INVERT_ENABLE bit here */
-	v = ti_clk_ll_ops->clk_readl(clk->enable_reg);
+	v = ti_clk_ll_ops->clk_readl(&clk->enable_reg);
 	if (clk->flags & INVERT_ENABLE)
 		v &= ~(1 << clk->enable_bit);
 	else
 		v |= (1 << clk->enable_bit);
-	ti_clk_ll_ops->clk_writel(v, clk->enable_reg);
-	v = ti_clk_ll_ops->clk_readl(clk->enable_reg); /* OCP barrier */
+	ti_clk_ll_ops->clk_writel(v, &clk->enable_reg);
+	v = ti_clk_ll_ops->clk_readl(&clk->enable_reg); /* OCP barrier */
 
 	if (clk->ops && clk->ops->find_idlest)
 		_omap2_module_wait_ready(clk);
 
 	return 0;
-
-err:
-	if (clkdm_control && clk->clkdm)
-		ti_clk_ll_ops->clkdm_clk_disable(clk->clkdm, hw->clk);
-	return ret;
 }
 
 /**
@@ -264,22 +254,13 @@ void omap2_dflt_clk_disable(struct clk_hw *hw)
 	u32 v;
 
 	clk = to_clk_hw_omap(hw);
-	if (IS_ERR(clk->enable_reg)) {
-		/*
-		 * 'independent' here refers to a clock which is not
-		 * controlled by its parent.
-		 */
-		pr_err("%s: independent clock %s has no enable_reg\n",
-		       __func__, clk_hw_get_name(hw));
-		return;
-	}
 
-	v = ti_clk_ll_ops->clk_readl(clk->enable_reg);
+	v = ti_clk_ll_ops->clk_readl(&clk->enable_reg);
 	if (clk->flags & INVERT_ENABLE)
 		v |= (1 << clk->enable_bit);
 	else
 		v &= ~(1 << clk->enable_bit);
-	ti_clk_ll_ops->clk_writel(v, clk->enable_reg);
+	ti_clk_ll_ops->clk_writel(v, &clk->enable_reg);
 	/* No OCP barrier needed here since it is a disable operation */
 
 	if (!(ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) &&
@@ -300,7 +281,7 @@ int omap2_dflt_clk_is_enabled(struct clk_hw *hw)
 	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
 	u32 v;
 
-	v = ti_clk_ll_ops->clk_readl(clk->enable_reg);
+	v = ti_clk_ll_ops->clk_readl(&clk->enable_reg);
 
 	if (clk->flags & INVERT_ENABLE)
 		v ^= BIT(clk->enable_bit);
diff --git a/drivers/clk/ti/clkt_dpll.c b/drivers/clk/ti/clkt_dpll.c
index b919fdfe8256..ce98da2c10be 100644
--- a/drivers/clk/ti/clkt_dpll.c
+++ b/drivers/clk/ti/clkt_dpll.c
@@ -213,7 +213,7 @@ u8 omap2_init_dpll_parent(struct clk_hw *hw)
 	if (!dd)
 		return -EINVAL;
 
-	v = ti_clk_ll_ops->clk_readl(dd->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
 	v &= dd->enable_mask;
 	v >>= __ffs(dd->enable_mask);
 
@@ -249,14 +249,14 @@ unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk)
 		return 0;
 
 	/* Return bypass rate if DPLL is bypassed */
-	v = ti_clk_ll_ops->clk_readl(dd->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
 	v &= dd->enable_mask;
 	v >>= __ffs(dd->enable_mask);
 
 	if (_omap2_dpll_is_in_bypass(v))
 		return clk_hw_get_rate(dd->clk_bypass);
 
-	v = ti_clk_ll_ops->clk_readl(dd->mult_div1_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->mult_div1_reg);
 	dpll_mult = v & dd->mult_mask;
 	dpll_mult >>= __ffs(dd->mult_mask);
 	dpll_div = v & dd->div1_mask;
diff --git a/drivers/clk/ti/clkt_iclk.c b/drivers/clk/ti/clkt_iclk.c
index 38c36908cf88..60b583d7db33 100644
--- a/drivers/clk/ti/clkt_iclk.c
+++ b/drivers/clk/ti/clkt_iclk.c
@@ -31,28 +31,29 @@
 void omap2_clkt_iclk_allow_idle(struct clk_hw_omap *clk)
 {
 	u32 v;
-	void __iomem *r;
+	struct clk_omap_reg r;
 
-	r = (__force void __iomem *)
-		((__force u32)clk->enable_reg ^ (CM_AUTOIDLE ^ CM_ICLKEN));
+	memcpy(&r, &clk->enable_reg, sizeof(r));
+	r.offset ^= (CM_AUTOIDLE ^ CM_ICLKEN);
 
-	v = ti_clk_ll_ops->clk_readl(r);
+	v = ti_clk_ll_ops->clk_readl(&r);
 	v |= (1 << clk->enable_bit);
-	ti_clk_ll_ops->clk_writel(v, r);
+	ti_clk_ll_ops->clk_writel(v, &r);
 }
 
 /* XXX */
 void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk)
 {
 	u32 v;
-	void __iomem *r;
+	struct clk_omap_reg r;
 
-	r = (__force void __iomem *)
-		((__force u32)clk->enable_reg ^ (CM_AUTOIDLE ^ CM_ICLKEN));
+	memcpy(&r, &clk->enable_reg, sizeof(r));
 
-	v = ti_clk_ll_ops->clk_readl(r);
+	r.offset ^= (CM_AUTOIDLE ^ CM_ICLKEN);
+
+	v = ti_clk_ll_ops->clk_readl(&r);
 	v &= ~(1 << clk->enable_bit);
-	ti_clk_ll_ops->clk_writel(v, r);
+	ti_clk_ll_ops->clk_writel(v, &r);
 }
 
 /**
@@ -68,14 +69,12 @@ void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk)
  * modules.  No return value.
  */
 static void omap2430_clk_i2chs_find_idlest(struct clk_hw_omap *clk,
-					   void __iomem **idlest_reg,
+					   struct clk_omap_reg *idlest_reg,
 					   u8 *idlest_bit,
 					   u8 *idlest_val)
 {
-	u32 r;
-
-	r = ((__force u32)clk->enable_reg ^ (OMAP24XX_CM_FCLKEN2 ^ CM_IDLEST));
-	*idlest_reg = (__force void __iomem *)r;
+	memcpy(idlest_reg, &clk->enable_reg, sizeof(*idlest_reg));
+	idlest_reg->offset ^= (OMAP24XX_CM_FCLKEN2 ^ CM_IDLEST);
 	*idlest_bit = clk->enable_bit;
 	*idlest_val = OMAP24XX_CM_IDLEST_VAL;
 }
diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index 13c37f48d9d6..3f7b26540be8 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -16,6 +16,28 @@
 #ifndef __DRIVERS_CLK_TI_CLOCK__
 #define __DRIVERS_CLK_TI_CLOCK__
 
+struct clk_omap_divider {
+	struct clk_hw		hw;
+	struct clk_omap_reg	reg;
+	u8			shift;
+	u8			width;
+	u8			flags;
+	const struct clk_div_table	*table;
+};
+
+#define to_clk_omap_divider(_hw) container_of(_hw, struct clk_omap_divider, hw)
+
+struct clk_omap_mux {
+	struct clk_hw		hw;
+	struct clk_omap_reg	reg;
+	u32			*table;
+	u32			mask;
+	u8			shift;
+	u8			flags;
+};
+
+#define to_clk_omap_mux(_hw) container_of(_hw, struct clk_omap_mux, hw)
+
 enum {
 	TI_CLK_FIXED,
 	TI_CLK_MUX,
@@ -86,7 +108,7 @@ struct ti_clk_mux {
 	int num_parents;
 	u16 reg;
 	u8 module;
-	const char **parents;
+	const char * const *parents;
 	u16 flags;
 };
 
@@ -189,16 +211,25 @@ struct clk *ti_clk_register_mux(struct ti_clk *setup);
 struct clk *ti_clk_register_divider(struct ti_clk *setup);
 struct clk *ti_clk_register_composite(struct ti_clk *setup);
 struct clk *ti_clk_register_dpll(struct ti_clk *setup);
+struct clk *ti_clk_register(struct device *dev, struct clk_hw *hw,
+			    const char *con);
+int ti_clk_add_alias(struct device *dev, struct clk *clk, const char *con);
+void ti_clk_add_aliases(void);
 
 struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup);
 struct clk_hw *ti_clk_build_component_gate(struct ti_clk_gate *setup);
 struct clk_hw *ti_clk_build_component_mux(struct ti_clk_mux *setup);
 
+int ti_clk_parse_divider_data(int *div_table, int num_dividers, int max_div,
+			      u8 flags, u8 *width,
+			      const struct clk_div_table **table);
+
 void ti_clk_patch_legacy_clks(struct ti_clk **patch);
 struct clk *ti_clk_register_clk(struct ti_clk *setup);
 int ti_clk_register_legacy_clks(struct ti_clk_alias *clks);
 
-void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index);
+int ti_clk_get_reg_addr(struct device_node *node, int index,
+			struct clk_omap_reg *reg);
 void ti_dt_clocks_register(struct ti_dt_clk *oclks);
 int ti_clk_retry_init(struct device_node *node, struct clk_hw *hw,
 		      ti_of_clk_init_cb_t func);
@@ -223,7 +254,9 @@ extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_wait;
 
 extern const struct clk_ops ti_clk_divider_ops;
 extern const struct clk_ops ti_clk_mux_ops;
+extern const struct clk_ops omap_gate_clk_ops;
 
+void omap2_init_clk_clkdm(struct clk_hw *hw);
 int omap2_clkops_enable_clkdm(struct clk_hw *hw);
 void omap2_clkops_disable_clkdm(struct clk_hw *hw);
 
@@ -231,10 +264,10 @@ int omap2_dflt_clk_enable(struct clk_hw *hw);
 void omap2_dflt_clk_disable(struct clk_hw *hw);
 int omap2_dflt_clk_is_enabled(struct clk_hw *hw);
 void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk,
-				   void __iomem **other_reg,
+				   struct clk_omap_reg *other_reg,
 				   u8 *other_bit);
 void omap2_clk_dflt_find_idlest(struct clk_hw_omap *clk,
-				void __iomem **idlest_reg,
+				struct clk_omap_reg *idlest_reg,
 				u8 *idlest_bit, u8 *idlest_val);
 
 void omap2_clkt_iclk_allow_idle(struct clk_hw_omap *clk);
diff --git a/drivers/clk/ti/clockdomain.c b/drivers/clk/ti/clockdomain.c
index 6cf9dd189a92..fbedc6a9fed0 100644
--- a/drivers/clk/ti/clockdomain.c
+++ b/drivers/clk/ti/clockdomain.c
@@ -52,10 +52,6 @@ int omap2_clkops_enable_clkdm(struct clk_hw *hw)
 		return -EINVAL;
 	}
 
-	if (unlikely(clk->enable_reg))
-		pr_err("%s: %s: should use dflt_clk_enable ?!\n", __func__,
-		       clk_hw_get_name(hw));
-
 	if (ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) {
 		pr_err("%s: %s: clkfw-based clockdomain control disabled ?!\n",
 		       __func__, clk_hw_get_name(hw));
@@ -90,10 +86,6 @@ void omap2_clkops_disable_clkdm(struct clk_hw *hw)
 		return;
 	}
 
-	if (unlikely(clk->enable_reg))
-		pr_err("%s: %s: should use dflt_clk_disable ?!\n", __func__,
-		       clk_hw_get_name(hw));
-
 	if (ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) {
 		pr_err("%s: %s: clkfw-based clockdomain control disabled ?!\n",
 		       __func__, clk_hw_get_name(hw));
@@ -103,6 +95,36 @@ void omap2_clkops_disable_clkdm(struct clk_hw *hw)
 	ti_clk_ll_ops->clkdm_clk_disable(clk->clkdm, hw->clk);
 }
 
+/**
+ * omap2_init_clk_clkdm - look up a clockdomain name, store pointer in clk
+ * @clk: OMAP clock struct ptr to use
+ *
+ * Convert a clockdomain name stored in a struct clk 'clk' into a
+ * clockdomain pointer, and save it into the struct clk.  Intended to be
+ * called during clk_register().  No return value.
+ */
+void omap2_init_clk_clkdm(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct clockdomain *clkdm;
+	const char *clk_name;
+
+	if (!clk->clkdm_name)
+		return;
+
+	clk_name = __clk_get_name(hw->clk);
+
+	clkdm = ti_clk_ll_ops->clkdm_lookup(clk->clkdm_name);
+	if (clkdm) {
+		pr_debug("clock: associated clk %s to clkdm %s\n",
+			 clk_name, clk->clkdm_name);
+		clk->clkdm = clkdm;
+	} else {
+		pr_debug("clock: could not associate clk %s to clkdm %s\n",
+			 clk_name, clk->clkdm_name);
+	}
+}
+
 static void __init of_ti_clockdomain_setup(struct device_node *node)
 {
 	struct clk *clk;
diff --git a/drivers/clk/ti/composite.c b/drivers/clk/ti/composite.c
index 1cf70f452e1e..beea89463ca2 100644
--- a/drivers/clk/ti/composite.c
+++ b/drivers/clk/ti/composite.c
@@ -124,8 +124,9 @@ struct clk *ti_clk_register_composite(struct ti_clk *setup)
 	struct clk_hw *mux;
 	struct clk_hw *div;
 	int num_parents = 1;
-	const char **parent_names = NULL;
+	const char * const *parent_names = NULL;
 	struct clk *clk;
+	int ret;
 
 	comp = setup->data;
 
@@ -150,6 +151,12 @@ struct clk *ti_clk_register_composite(struct ti_clk *setup)
 				     &ti_composite_divider_ops, gate,
 				     &ti_composite_gate_ops, 0);
 
+	ret = ti_clk_add_alias(NULL, clk, setup->name);
+	if (ret) {
+		clk_unregister(clk);
+		return ERR_PTR(ret);
+	}
+
 	return clk;
 }
 #endif
@@ -163,6 +170,7 @@ static void __init _register_composite(struct clk_hw *hw,
 	int num_parents = 0;
 	const char **parent_names = NULL;
 	int i;
+	int ret;
 
 	/* Check for presence of each component clock */
 	for (i = 0; i < CLK_COMPONENT_TYPE_MAX; i++) {
@@ -217,8 +225,14 @@ static void __init _register_composite(struct clk_hw *hw,
 				     _get_hw(cclk, CLK_COMPONENT_TYPE_GATE),
 				     &ti_composite_gate_ops, 0);
 
-	if (!IS_ERR(clk))
+	if (!IS_ERR(clk)) {
+		ret = ti_clk_add_alias(NULL, clk, node->name);
+		if (ret) {
+			clk_unregister(clk);
+			goto cleanup;
+		}
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	}
 
 cleanup:
 	/* Free component clock list entries */
diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c
index 6bb87784a0d6..88f04a4cb890 100644
--- a/drivers/clk/ti/divider.c
+++ b/drivers/clk/ti/divider.c
@@ -39,7 +39,7 @@ static unsigned int _get_table_maxdiv(const struct clk_div_table *table)
 	return maxdiv;
 }
 
-static unsigned int _get_maxdiv(struct clk_divider *divider)
+static unsigned int _get_maxdiv(struct clk_omap_divider *divider)
 {
 	if (divider->flags & CLK_DIVIDER_ONE_BASED)
 		return div_mask(divider);
@@ -61,7 +61,7 @@ static unsigned int _get_table_div(const struct clk_div_table *table,
 	return 0;
 }
 
-static unsigned int _get_div(struct clk_divider *divider, unsigned int val)
+static unsigned int _get_div(struct clk_omap_divider *divider, unsigned int val)
 {
 	if (divider->flags & CLK_DIVIDER_ONE_BASED)
 		return val;
@@ -83,7 +83,7 @@ static unsigned int _get_table_val(const struct clk_div_table *table,
 	return 0;
 }
 
-static unsigned int _get_val(struct clk_divider *divider, u8 div)
+static unsigned int _get_val(struct clk_omap_divider *divider, u8 div)
 {
 	if (divider->flags & CLK_DIVIDER_ONE_BASED)
 		return div;
@@ -97,10 +97,10 @@ static unsigned int _get_val(struct clk_divider *divider, u8 div)
 static unsigned long ti_clk_divider_recalc_rate(struct clk_hw *hw,
 						unsigned long parent_rate)
 {
-	struct clk_divider *divider = to_clk_divider(hw);
+	struct clk_omap_divider *divider = to_clk_omap_divider(hw);
 	unsigned int div, val;
 
-	val = ti_clk_ll_ops->clk_readl(divider->reg) >> divider->shift;
+	val = ti_clk_ll_ops->clk_readl(&divider->reg) >> divider->shift;
 	val &= div_mask(divider);
 
 	div = _get_div(divider, val);
@@ -131,7 +131,7 @@ static bool _is_valid_table_div(const struct clk_div_table *table,
 	return false;
 }
 
-static bool _is_valid_div(struct clk_divider *divider, unsigned int div)
+static bool _is_valid_div(struct clk_omap_divider *divider, unsigned int div)
 {
 	if (divider->flags & CLK_DIVIDER_POWER_OF_TWO)
 		return is_power_of_2(div);
@@ -172,7 +172,7 @@ static int _div_round(const struct clk_div_table *table,
 static int ti_clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
 				  unsigned long *best_parent_rate)
 {
-	struct clk_divider *divider = to_clk_divider(hw);
+	struct clk_omap_divider *divider = to_clk_omap_divider(hw);
 	int i, bestdiv = 0;
 	unsigned long parent_rate, best = 0, now, maxdiv;
 	unsigned long parent_rate_saved = *best_parent_rate;
@@ -239,14 +239,14 @@ static long ti_clk_divider_round_rate(struct clk_hw *hw, unsigned long rate,
 static int ti_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 				   unsigned long parent_rate)
 {
-	struct clk_divider *divider;
+	struct clk_omap_divider *divider;
 	unsigned int div, value;
 	u32 val;
 
 	if (!hw || !rate)
 		return -EINVAL;
 
-	divider = to_clk_divider(hw);
+	divider = to_clk_omap_divider(hw);
 
 	div = DIV_ROUND_UP(parent_rate, rate);
 	value = _get_val(divider, div);
@@ -257,11 +257,11 @@ static int ti_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 	if (divider->flags & CLK_DIVIDER_HIWORD_MASK) {
 		val = div_mask(divider) << (divider->shift + 16);
 	} else {
-		val = ti_clk_ll_ops->clk_readl(divider->reg);
+		val = ti_clk_ll_ops->clk_readl(&divider->reg);
 		val &= ~(div_mask(divider) << divider->shift);
 	}
 	val |= value << divider->shift;
-	ti_clk_ll_ops->clk_writel(val, divider->reg);
+	ti_clk_ll_ops->clk_writel(val, &divider->reg);
 
 	return 0;
 }
@@ -274,11 +274,12 @@ const struct clk_ops ti_clk_divider_ops = {
 
 static struct clk *_register_divider(struct device *dev, const char *name,
 				     const char *parent_name,
-				     unsigned long flags, void __iomem *reg,
+				     unsigned long flags,
+				     struct clk_omap_reg *reg,
 				     u8 shift, u8 width, u8 clk_divider_flags,
 				     const struct clk_div_table *table)
 {
-	struct clk_divider *div;
+	struct clk_omap_divider *div;
 	struct clk *clk;
 	struct clk_init_data init;
 
@@ -303,7 +304,7 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 	init.num_parents = (parent_name ? 1 : 0);
 
 	/* struct clk_divider assignments */
-	div->reg = reg;
+	memcpy(&div->reg, reg, sizeof(*reg));
 	div->shift = shift;
 	div->width = width;
 	div->flags = clk_divider_flags;
@@ -311,7 +312,7 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 	div->table = table;
 
 	/* register the clock */
-	clk = clk_register(dev, &div->hw);
+	clk = ti_clk_register(dev, &div->hw, name);
 
 	if (IS_ERR(clk))
 		kfree(div);
@@ -319,20 +320,17 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 	return clk;
 }
 
-static struct clk_div_table *
-_get_div_table_from_setup(struct ti_clk_divider *setup, u8 *width)
+int ti_clk_parse_divider_data(int *div_table, int num_dividers, int max_div,
+			      u8 flags, u8 *width,
+			      const struct clk_div_table **table)
 {
 	int valid_div = 0;
-	struct clk_div_table *table;
-	int i;
-	int div;
 	u32 val;
-	u8 flags;
-
-	if (!setup->num_dividers) {
-		/* Clk divider table not provided, determine min/max divs */
-		flags = setup->flags;
+	int div;
+	int i;
+	struct clk_div_table *tmp;
 
+	if (!div_table) {
 		if (flags & CLKF_INDEX_STARTS_AT_ONE)
 			val = 1;
 		else
@@ -340,7 +338,7 @@ _get_div_table_from_setup(struct ti_clk_divider *setup, u8 *width)
 
 		div = 1;
 
-		while (div < setup->max_div) {
+		while (div < max_div) {
 			if (flags & CLKF_INDEX_POWER_OF_TWO)
 				div <<= 1;
 			else
@@ -349,37 +347,59 @@ _get_div_table_from_setup(struct ti_clk_divider *setup, u8 *width)
 		}
 
 		*width = fls(val);
+		*table = NULL;
 
-		return NULL;
+		return 0;
 	}
 
-	for (i = 0; i < setup->num_dividers; i++)
-		if (setup->dividers[i])
+	i = 0;
+
+	while (!num_dividers || i < num_dividers) {
+		if (div_table[i] == -1)
+			break;
+		if (div_table[i])
 			valid_div++;
+		i++;
+	}
 
-	table = kzalloc(sizeof(*table) * (valid_div + 1), GFP_KERNEL);
-	if (!table)
-		return ERR_PTR(-ENOMEM);
+	num_dividers = i;
+
+	tmp = kzalloc(sizeof(*tmp) * (valid_div + 1), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
 
 	valid_div = 0;
 	*width = 0;
 
-	for (i = 0; i < setup->num_dividers; i++)
-		if (setup->dividers[i]) {
-			table[valid_div].div = setup->dividers[i];
-			table[valid_div].val = i;
+	for (i = 0; i < num_dividers; i++)
+		if (div_table[i] > 0) {
+			tmp[valid_div].div = div_table[i];
+			tmp[valid_div].val = i;
 			valid_div++;
 			*width = i;
 		}
 
 	*width = fls(*width);
+	*table = tmp;
+
+	return 0;
+}
+
+static const struct clk_div_table *
+_get_div_table_from_setup(struct ti_clk_divider *setup, u8 *width)
+{
+	const struct clk_div_table *table = NULL;
+
+	ti_clk_parse_divider_data(setup->dividers, setup->num_dividers,
+				  setup->max_div, setup->flags, width,
+				  &table);
 
 	return table;
 }
 
 struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup)
 {
-	struct clk_divider *div;
+	struct clk_omap_divider *div;
 	struct clk_omap_reg *reg;
 
 	if (!setup)
@@ -408,22 +428,17 @@ struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup)
 
 struct clk *ti_clk_register_divider(struct ti_clk *setup)
 {
-	struct ti_clk_divider *div;
-	struct clk_omap_reg *reg_setup;
-	u32 reg;
+	struct ti_clk_divider *div = setup->data;
+	struct clk_omap_reg reg = {
+		.index = div->module,
+		.offset = div->reg,
+	};
 	u8 width;
 	u32 flags = 0;
 	u8 div_flags = 0;
-	struct clk_div_table *table;
+	const struct clk_div_table *table;
 	struct clk *clk;
 
-	div = setup->data;
-
-	reg_setup = (struct clk_omap_reg *)&reg;
-
-	reg_setup->index = div->module;
-	reg_setup->offset = div->reg;
-
 	if (div->flags & CLKF_INDEX_STARTS_AT_ONE)
 		div_flags |= CLK_DIVIDER_ONE_BASED;
 
@@ -438,7 +453,7 @@ struct clk *ti_clk_register_divider(struct ti_clk *setup)
 		return (struct clk *)table;
 
 	clk = _register_divider(NULL, setup->name, div->parent,
-				flags, (void __iomem *)reg, div->bit_shift,
+				flags, &reg, div->bit_shift,
 				width, div_flags, table);
 
 	if (IS_ERR(clk))
@@ -542,14 +557,15 @@ static int _get_divider_width(struct device_node *node,
 }
 
 static int __init ti_clk_divider_populate(struct device_node *node,
-	void __iomem **reg, const struct clk_div_table **table,
+	struct clk_omap_reg *reg, const struct clk_div_table **table,
 	u32 *flags, u8 *div_flags, u8 *width, u8 *shift)
 {
 	u32 val;
+	int ret;
 
-	*reg = ti_clk_get_reg_addr(node, 0);
-	if (IS_ERR(*reg))
-		return PTR_ERR(*reg);
+	ret = ti_clk_get_reg_addr(node, 0, reg);
+	if (ret)
+		return ret;
 
 	if (!of_property_read_u32(node, "ti,bit-shift", &val))
 		*shift = val;
@@ -588,7 +604,7 @@ static void __init of_ti_divider_clk_setup(struct device_node *node)
 {
 	struct clk *clk;
 	const char *parent_name;
-	void __iomem *reg;
+	struct clk_omap_reg reg;
 	u8 clk_divider_flags = 0;
 	u8 width = 0;
 	u8 shift = 0;
@@ -601,7 +617,7 @@ static void __init of_ti_divider_clk_setup(struct device_node *node)
 				    &clk_divider_flags, &width, &shift))
 		goto cleanup;
 
-	clk = _register_divider(NULL, node->name, parent_name, flags, reg,
+	clk = _register_divider(NULL, node->name, parent_name, flags, &reg,
 				shift, width, clk_divider_flags, table);
 
 	if (!IS_ERR(clk)) {
@@ -617,7 +633,7 @@ CLK_OF_DECLARE(divider_clk, "ti,divider-clock", of_ti_divider_clk_setup);
 
 static void __init of_ti_composite_divider_clk_setup(struct device_node *node)
 {
-	struct clk_divider *div;
+	struct clk_omap_divider *div;
 	u32 val;
 
 	div = kzalloc(sizeof(*div), GFP_KERNEL);
diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c
index 4b9a419d8e14..d4e4444bc5ca 100644
--- a/drivers/clk/ti/dpll.c
+++ b/drivers/clk/ti/dpll.c
@@ -185,7 +185,7 @@ static void __init _register_dpll(struct clk_hw *hw,
 	dd->clk_bypass = __clk_get_hw(clk);
 
 	/* register the clock */
-	clk = clk_register(NULL, &clk_hw->hw);
+	clk = ti_clk_register(NULL, &clk_hw->hw, node->name);
 
 	if (!IS_ERR(clk)) {
 		omap2_init_clk_hw_omap_clocks(&clk_hw->hw);
@@ -203,17 +203,10 @@ cleanup:
 }
 
 #if defined(CONFIG_ARCH_OMAP3) && defined(CONFIG_ATAGS)
-static void __iomem *_get_reg(u8 module, u16 offset)
+void _get_reg(u8 module, u16 offset, struct clk_omap_reg *reg)
 {
-	u32 reg;
-	struct clk_omap_reg *reg_setup;
-
-	reg_setup = (struct clk_omap_reg *)&reg;
-
-	reg_setup->index = module;
-	reg_setup->offset = offset;
-
-	return (void __iomem *)reg;
+	reg->index = module;
+	reg->offset = offset;
 }
 
 struct clk *ti_clk_register_dpll(struct ti_clk *setup)
@@ -248,7 +241,6 @@ struct clk *ti_clk_register_dpll(struct ti_clk *setup)
 	clk_hw->dpll_data = dd;
 	clk_hw->ops = &clkhwops_omap3_dpll;
 	clk_hw->hw.init = &init;
-	clk_hw->flags = MEMMAP_ADDRESSING;
 
 	init.name = setup->name;
 	init.ops = ops;
@@ -256,10 +248,10 @@ struct clk *ti_clk_register_dpll(struct ti_clk *setup)
 	init.num_parents = dpll->num_parents;
 	init.parent_names = dpll->parents;
 
-	dd->control_reg = _get_reg(dpll->module, dpll->control_reg);
-	dd->idlest_reg = _get_reg(dpll->module, dpll->idlest_reg);
-	dd->mult_div1_reg = _get_reg(dpll->module, dpll->mult_div1_reg);
-	dd->autoidle_reg = _get_reg(dpll->module, dpll->autoidle_reg);
+	_get_reg(dpll->module, dpll->control_reg, &dd->control_reg);
+	_get_reg(dpll->module, dpll->idlest_reg, &dd->idlest_reg);
+	_get_reg(dpll->module, dpll->mult_div1_reg, &dd->mult_div1_reg);
+	_get_reg(dpll->module, dpll->autoidle_reg, &dd->autoidle_reg);
 
 	dd->modes = dpll->modes;
 	dd->div1_mask = dpll->div1_mask;
@@ -288,7 +280,7 @@ struct clk *ti_clk_register_dpll(struct ti_clk *setup)
 	if (dpll->flags & CLKF_J_TYPE)
 		dd->flags |= DPLL_J_TYPE;
 
-	clk = clk_register(NULL, &clk_hw->hw);
+	clk = ti_clk_register(NULL, &clk_hw->hw, setup->name);
 
 	if (!IS_ERR(clk))
 		return clk;
@@ -339,8 +331,24 @@ static void _register_dpll_x2(struct device_node *node,
 	init.parent_names = &parent_name;
 	init.num_parents = 1;
 
+#if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) || \
+	defined(CONFIG_SOC_DRA7XX)
+	if (hw_ops == &clkhwops_omap4_dpllmx) {
+		int ret;
+
+		/* Check if register defined, if not, drop hw-ops */
+		ret = of_property_count_elems_of_size(node, "reg", 1);
+		if (ret <= 0) {
+			clk_hw->ops = NULL;
+		} else if (ti_clk_get_reg_addr(node, 0, &clk_hw->clksel_reg)) {
+			kfree(clk_hw);
+			return;
+		}
+	}
+#endif
+
 	/* register the clock */
-	clk = clk_register(NULL, &clk_hw->hw);
+	clk = ti_clk_register(NULL, &clk_hw->hw, name);
 
 	if (IS_ERR(clk)) {
 		kfree(clk_hw);
@@ -380,7 +388,6 @@ static void __init of_ti_dpll_setup(struct device_node *node,
 	clk_hw->dpll_data = dd;
 	clk_hw->ops = &clkhwops_omap3_dpll;
 	clk_hw->hw.init = init;
-	clk_hw->flags = MEMMAP_ADDRESSING;
 
 	init->name = node->name;
 	init->ops = ops;
@@ -399,7 +406,8 @@ static void __init of_ti_dpll_setup(struct device_node *node,
 
 	init->parent_names = parent_names;
 
-	dd->control_reg = ti_clk_get_reg_addr(node, 0);
+	if (ti_clk_get_reg_addr(node, 0, &dd->control_reg))
+		goto cleanup;
 
 	/*
 	 * Special case for OMAP2 DPLL, register order is different due to
@@ -407,25 +415,22 @@ static void __init of_ti_dpll_setup(struct device_node *node,
 	 * missing idlest_mask.
 	 */
 	if (!dd->idlest_mask) {
-		dd->mult_div1_reg = ti_clk_get_reg_addr(node, 1);
+		if (ti_clk_get_reg_addr(node, 1, &dd->mult_div1_reg))
+			goto cleanup;
 #ifdef CONFIG_ARCH_OMAP2
 		clk_hw->ops = &clkhwops_omap2xxx_dpll;
 		omap2xxx_clkt_dpllcore_init(&clk_hw->hw);
 #endif
 	} else {
-		dd->idlest_reg = ti_clk_get_reg_addr(node, 1);
-		if (IS_ERR(dd->idlest_reg))
+		if (ti_clk_get_reg_addr(node, 1, &dd->idlest_reg))
 			goto cleanup;
 
-		dd->mult_div1_reg = ti_clk_get_reg_addr(node, 2);
+		if (ti_clk_get_reg_addr(node, 2, &dd->mult_div1_reg))
+			goto cleanup;
 	}
 
-	if (IS_ERR(dd->control_reg) || IS_ERR(dd->mult_div1_reg))
-		goto cleanup;
-
 	if (dd->autoidle_mask) {
-		dd->autoidle_reg = ti_clk_get_reg_addr(node, 3);
-		if (IS_ERR(dd->autoidle_reg))
+		if (ti_clk_get_reg_addr(node, 3, &dd->autoidle_reg))
 			goto cleanup;
 	}
 
diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c
index 4cdd28a25584..4534de2ef455 100644
--- a/drivers/clk/ti/dpll3xxx.c
+++ b/drivers/clk/ti/dpll3xxx.c
@@ -54,10 +54,10 @@ static void _omap3_dpll_write_clken(struct clk_hw_omap *clk, u8 clken_bits)
 
 	dd = clk->dpll_data;
 
-	v = ti_clk_ll_ops->clk_readl(dd->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
 	v &= ~dd->enable_mask;
 	v |= clken_bits << __ffs(dd->enable_mask);
-	ti_clk_ll_ops->clk_writel(v, dd->control_reg);
+	ti_clk_ll_ops->clk_writel(v, &dd->control_reg);
 }
 
 /* _omap3_wait_dpll_status: wait for a DPLL to enter a specific state */
@@ -73,7 +73,7 @@ static int _omap3_wait_dpll_status(struct clk_hw_omap *clk, u8 state)
 
 	state <<= __ffs(dd->idlest_mask);
 
-	while (((ti_clk_ll_ops->clk_readl(dd->idlest_reg) & dd->idlest_mask)
+	while (((ti_clk_ll_ops->clk_readl(&dd->idlest_reg) & dd->idlest_mask)
 		!= state) && i < MAX_DPLL_WAIT_TRIES) {
 		i++;
 		udelay(1);
@@ -151,7 +151,7 @@ static int _omap3_noncore_dpll_lock(struct clk_hw_omap *clk)
 	state <<= __ffs(dd->idlest_mask);
 
 	/* Check if already locked */
-	if ((ti_clk_ll_ops->clk_readl(dd->idlest_reg) & dd->idlest_mask) ==
+	if ((ti_clk_ll_ops->clk_readl(&dd->idlest_reg) & dd->idlest_mask) ==
 	    state)
 		goto done;
 
@@ -317,14 +317,14 @@ static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel)
 	 * only since freqsel field is no longer present on other devices.
 	 */
 	if (ti_clk_get_features()->flags & TI_CLK_DPLL_HAS_FREQSEL) {
-		v = ti_clk_ll_ops->clk_readl(dd->control_reg);
+		v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
 		v &= ~dd->freqsel_mask;
 		v |= freqsel << __ffs(dd->freqsel_mask);
-		ti_clk_ll_ops->clk_writel(v, dd->control_reg);
+		ti_clk_ll_ops->clk_writel(v, &dd->control_reg);
 	}
 
 	/* Set DPLL multiplier, divider */
-	v = ti_clk_ll_ops->clk_readl(dd->mult_div1_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->mult_div1_reg);
 
 	/* Handle Duty Cycle Correction */
 	if (dd->dcc_mask) {
@@ -370,11 +370,11 @@ static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel)
 		}
 	}
 
-	ti_clk_ll_ops->clk_writel(v, dd->mult_div1_reg);
+	ti_clk_ll_ops->clk_writel(v, &dd->mult_div1_reg);
 
 	/* Set 4X multiplier and low-power mode */
 	if (dd->m4xen_mask || dd->lpmode_mask) {
-		v = ti_clk_ll_ops->clk_readl(dd->control_reg);
+		v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
 
 		if (dd->m4xen_mask) {
 			if (dd->last_rounded_m4xen)
@@ -390,7 +390,7 @@ static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel)
 				v &= ~dd->lpmode_mask;
 		}
 
-		ti_clk_ll_ops->clk_writel(v, dd->control_reg);
+		ti_clk_ll_ops->clk_writel(v, &dd->control_reg);
 	}
 
 	/* We let the clock framework set the other output dividers later */
@@ -652,10 +652,10 @@ static u32 omap3_dpll_autoidle_read(struct clk_hw_omap *clk)
 
 	dd = clk->dpll_data;
 
-	if (!dd->autoidle_reg)
+	if (!dd->autoidle_mask)
 		return -EINVAL;
 
-	v = ti_clk_ll_ops->clk_readl(dd->autoidle_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->autoidle_reg);
 	v &= dd->autoidle_mask;
 	v >>= __ffs(dd->autoidle_mask);
 
@@ -681,7 +681,7 @@ static void omap3_dpll_allow_idle(struct clk_hw_omap *clk)
 
 	dd = clk->dpll_data;
 
-	if (!dd->autoidle_reg)
+	if (!dd->autoidle_mask)
 		return;
 
 	/*
@@ -689,10 +689,10 @@ static void omap3_dpll_allow_idle(struct clk_hw_omap *clk)
 	 * by writing 0x5 instead of 0x1.  Add some mechanism to
 	 * optionally enter this mode.
 	 */
-	v = ti_clk_ll_ops->clk_readl(dd->autoidle_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->autoidle_reg);
 	v &= ~dd->autoidle_mask;
 	v |= DPLL_AUTOIDLE_LOW_POWER_STOP << __ffs(dd->autoidle_mask);
-	ti_clk_ll_ops->clk_writel(v, dd->autoidle_reg);
+	ti_clk_ll_ops->clk_writel(v, &dd->autoidle_reg);
 }
 
 /**
@@ -711,13 +711,13 @@ static void omap3_dpll_deny_idle(struct clk_hw_omap *clk)
 
 	dd = clk->dpll_data;
 
-	if (!dd->autoidle_reg)
+	if (!dd->autoidle_mask)
 		return;
 
-	v = ti_clk_ll_ops->clk_readl(dd->autoidle_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->autoidle_reg);
 	v &= ~dd->autoidle_mask;
 	v |= DPLL_AUTOIDLE_DISABLE << __ffs(dd->autoidle_mask);
-	ti_clk_ll_ops->clk_writel(v, dd->autoidle_reg);
+	ti_clk_ll_ops->clk_writel(v, &dd->autoidle_reg);
 }
 
 /* Clock control for DPLL outputs */
@@ -773,7 +773,7 @@ unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw,
 
 	WARN_ON(!dd->enable_mask);
 
-	v = ti_clk_ll_ops->clk_readl(dd->control_reg) & dd->enable_mask;
+	v = ti_clk_ll_ops->clk_readl(&dd->control_reg) & dd->enable_mask;
 	v >>= __ffs(dd->enable_mask);
 	if ((v != OMAP3XXX_EN_DPLL_LOCKED) || (dd->flags & DPLL_J_TYPE))
 		rate = parent_rate;
diff --git a/drivers/clk/ti/dpll44xx.c b/drivers/clk/ti/dpll44xx.c
index 82c05b55a7be..d7a3f7ec8d77 100644
--- a/drivers/clk/ti/dpll44xx.c
+++ b/drivers/clk/ti/dpll44xx.c
@@ -42,17 +42,17 @@ static void omap4_dpllmx_allow_gatectrl(struct clk_hw_omap *clk)
 	u32 v;
 	u32 mask;
 
-	if (!clk || !clk->clksel_reg)
+	if (!clk)
 		return;
 
 	mask = clk->flags & CLOCK_CLKOUTX2 ?
 			OMAP4430_DPLL_CLKOUTX2_GATE_CTRL_MASK :
 			OMAP4430_DPLL_CLKOUT_GATE_CTRL_MASK;
 
-	v = ti_clk_ll_ops->clk_readl(clk->clksel_reg);
+	v = ti_clk_ll_ops->clk_readl(&clk->clksel_reg);
 	/* Clear the bit to allow gatectrl */
 	v &= ~mask;
-	ti_clk_ll_ops->clk_writel(v, clk->clksel_reg);
+	ti_clk_ll_ops->clk_writel(v, &clk->clksel_reg);
 }
 
 static void omap4_dpllmx_deny_gatectrl(struct clk_hw_omap *clk)
@@ -60,17 +60,17 @@ static void omap4_dpllmx_deny_gatectrl(struct clk_hw_omap *clk)
 	u32 v;
 	u32 mask;
 
-	if (!clk || !clk->clksel_reg)
+	if (!clk)
 		return;
 
 	mask = clk->flags & CLOCK_CLKOUTX2 ?
 			OMAP4430_DPLL_CLKOUTX2_GATE_CTRL_MASK :
 			OMAP4430_DPLL_CLKOUT_GATE_CTRL_MASK;
 
-	v = ti_clk_ll_ops->clk_readl(clk->clksel_reg);
+	v = ti_clk_ll_ops->clk_readl(&clk->clksel_reg);
 	/* Set the bit to deny gatectrl */
 	v |= mask;
-	ti_clk_ll_ops->clk_writel(v, clk->clksel_reg);
+	ti_clk_ll_ops->clk_writel(v, &clk->clksel_reg);
 }
 
 const struct clk_hw_omap_ops clkhwops_omap4_dpllmx = {
@@ -128,7 +128,7 @@ unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw,
 	rate = omap2_get_dpll_rate(clk);
 
 	/* regm4xen adds a multiplier of 4 to DPLL calculations */
-	v = ti_clk_ll_ops->clk_readl(dd->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
 	if (v & OMAP4430_DPLL_REGM4XEN_MASK)
 		rate *= OMAP4430_REGM4XEN_MULT;
 
diff --git a/drivers/clk/ti/fixed-factor.c b/drivers/clk/ti/fixed-factor.c
index 3cd406768909..0174a51a4ba6 100644
--- a/drivers/clk/ti/fixed-factor.c
+++ b/drivers/clk/ti/fixed-factor.c
@@ -62,6 +62,7 @@ static void __init of_ti_fixed_factor_clk_setup(struct device_node *node)
 	if (!IS_ERR(clk)) {
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 		of_ti_clk_autoidle_setup(node);
+		ti_clk_add_alias(NULL, clk, clk_name);
 	}
 }
 CLK_OF_DECLARE(ti_fixed_factor_clk, "ti,fixed-factor-clock",
diff --git a/drivers/clk/ti/gate.c b/drivers/clk/ti/gate.c
index bc05f276f32b..7151ec3a1b07 100644
--- a/drivers/clk/ti/gate.c
+++ b/drivers/clk/ti/gate.c
@@ -35,7 +35,7 @@ static const struct clk_ops omap_gate_clkdm_clk_ops = {
 	.disable	= &omap2_clkops_disable_clkdm,
 };
 
-static const struct clk_ops omap_gate_clk_ops = {
+const struct clk_ops omap_gate_clk_ops = {
 	.init		= &omap2_init_clk_clkdm,
 	.enable		= &omap2_dflt_clk_enable,
 	.disable	= &omap2_dflt_clk_disable,
@@ -62,7 +62,7 @@ static const struct clk_ops omap_gate_clk_hsdiv_restore_ops = {
  */
 static int omap36xx_gate_clk_enable_with_hsdiv_restore(struct clk_hw *hw)
 {
-	struct clk_divider *parent;
+	struct clk_omap_divider *parent;
 	struct clk_hw *parent_hw;
 	u32 dummy_v, orig_v;
 	int ret;
@@ -72,19 +72,19 @@ static int omap36xx_gate_clk_enable_with_hsdiv_restore(struct clk_hw *hw)
 
 	/* Parent is the x2 node, get parent of parent for the m2 div */
 	parent_hw = clk_hw_get_parent(clk_hw_get_parent(hw));
-	parent = to_clk_divider(parent_hw);
+	parent = to_clk_omap_divider(parent_hw);
 
 	/* Restore the dividers */
 	if (!ret) {
-		orig_v = ti_clk_ll_ops->clk_readl(parent->reg);
+		orig_v = ti_clk_ll_ops->clk_readl(&parent->reg);
 		dummy_v = orig_v;
 
 		/* Write any other value different from the Read value */
 		dummy_v ^= (1 << parent->shift);
-		ti_clk_ll_ops->clk_writel(dummy_v, parent->reg);
+		ti_clk_ll_ops->clk_writel(dummy_v, &parent->reg);
 
 		/* Write the original divider */
-		ti_clk_ll_ops->clk_writel(orig_v, parent->reg);
+		ti_clk_ll_ops->clk_writel(orig_v, &parent->reg);
 	}
 
 	return ret;
@@ -92,7 +92,7 @@ static int omap36xx_gate_clk_enable_with_hsdiv_restore(struct clk_hw *hw)
 
 static struct clk *_register_gate(struct device *dev, const char *name,
 				  const char *parent_name, unsigned long flags,
-				  void __iomem *reg, u8 bit_idx,
+				  struct clk_omap_reg *reg, u8 bit_idx,
 				  u8 clk_gate_flags, const struct clk_ops *ops,
 				  const struct clk_hw_omap_ops *hw_ops)
 {
@@ -109,18 +109,18 @@ static struct clk *_register_gate(struct device *dev, const char *name,
 	init.name = name;
 	init.ops = ops;
 
-	clk_hw->enable_reg = reg;
+	memcpy(&clk_hw->enable_reg, reg, sizeof(*reg));
 	clk_hw->enable_bit = bit_idx;
 	clk_hw->ops = hw_ops;
 
-	clk_hw->flags = MEMMAP_ADDRESSING | clk_gate_flags;
+	clk_hw->flags = clk_gate_flags;
 
 	init.parent_names = &parent_name;
 	init.num_parents = 1;
 
 	init.flags = flags;
 
-	clk = clk_register(NULL, &clk_hw->hw);
+	clk = ti_clk_register(NULL, &clk_hw->hw, name);
 
 	if (IS_ERR(clk))
 		kfree(clk_hw);
@@ -133,8 +133,7 @@ struct clk *ti_clk_register_gate(struct ti_clk *setup)
 {
 	const struct clk_ops *ops = &omap_gate_clk_ops;
 	const struct clk_hw_omap_ops *hw_ops = NULL;
-	u32 reg;
-	struct clk_omap_reg *reg_setup;
+	struct clk_omap_reg reg;
 	u32 flags = 0;
 	u8 clk_gate_flags = 0;
 	struct ti_clk_gate *gate;
@@ -144,8 +143,6 @@ struct clk *ti_clk_register_gate(struct ti_clk *setup)
 	if (gate->flags & CLKF_INTERFACE)
 		return ti_clk_register_interface(setup);
 
-	reg_setup = (struct clk_omap_reg *)&reg;
-
 	if (gate->flags & CLKF_SET_RATE_PARENT)
 		flags |= CLK_SET_RATE_PARENT;
 
@@ -169,11 +166,12 @@ struct clk *ti_clk_register_gate(struct ti_clk *setup)
 	if (gate->flags & CLKF_AM35XX)
 		hw_ops = &clkhwops_am35xx_ipss_module_wait;
 
-	reg_setup->index = gate->module;
-	reg_setup->offset = gate->reg;
+	reg.index = gate->module;
+	reg.offset = gate->reg;
+	reg.ptr = NULL;
 
 	return _register_gate(NULL, setup->name, gate->parent, flags,
-			      (void __iomem *)reg, gate->bit_shift,
+			      &reg, gate->bit_shift,
 			      clk_gate_flags, ops, hw_ops);
 }
 
@@ -203,7 +201,6 @@ struct clk_hw *ti_clk_build_component_gate(struct ti_clk_gate *setup)
 		ops = &clkhwops_iclk_wait;
 
 	gate->ops = ops;
-	gate->flags = MEMMAP_ADDRESSING;
 
 	return &gate->hw;
 }
@@ -215,15 +212,14 @@ static void __init _of_ti_gate_clk_setup(struct device_node *node,
 {
 	struct clk *clk;
 	const char *parent_name;
-	void __iomem *reg = NULL;
+	struct clk_omap_reg reg;
 	u8 enable_bit = 0;
 	u32 val;
 	u32 flags = 0;
 	u8 clk_gate_flags = 0;
 
 	if (ops != &omap_gate_clkdm_clk_ops) {
-		reg = ti_clk_get_reg_addr(node, 0);
-		if (IS_ERR(reg))
+		if (ti_clk_get_reg_addr(node, 0, &reg))
 			return;
 
 		if (!of_property_read_u32(node, "ti,bit-shift", &val))
@@ -243,7 +239,7 @@ static void __init _of_ti_gate_clk_setup(struct device_node *node,
 	if (of_property_read_bool(node, "ti,set-bit-to-disable"))
 		clk_gate_flags |= INVERT_ENABLE;
 
-	clk = _register_gate(NULL, node->name, parent_name, flags, reg,
+	clk = _register_gate(NULL, node->name, parent_name, flags, &reg,
 			     enable_bit, clk_gate_flags, ops, hw_ops);
 
 	if (!IS_ERR(clk))
@@ -261,15 +257,13 @@ _of_ti_composite_gate_clk_setup(struct device_node *node,
 	if (!gate)
 		return;
 
-	gate->enable_reg = ti_clk_get_reg_addr(node, 0);
-	if (IS_ERR(gate->enable_reg))
+	if (ti_clk_get_reg_addr(node, 0, &gate->enable_reg))
 		goto cleanup;
 
 	of_property_read_u32(node, "ti,bit-shift", &val);
 
 	gate->enable_bit = val;
 	gate->ops = hw_ops;
-	gate->flags = MEMMAP_ADDRESSING;
 
 	if (!ti_clk_add_component(node, &gate->hw, CLK_COMPONENT_TYPE_GATE))
 		return;
diff --git a/drivers/clk/ti/interface.c b/drivers/clk/ti/interface.c
index e505e6f8228d..62cf50c1e1e3 100644
--- a/drivers/clk/ti/interface.c
+++ b/drivers/clk/ti/interface.c
@@ -34,7 +34,7 @@ static const struct clk_ops ti_interface_clk_ops = {
 
 static struct clk *_register_interface(struct device *dev, const char *name,
 				       const char *parent_name,
-				       void __iomem *reg, u8 bit_idx,
+				       struct clk_omap_reg *reg, u8 bit_idx,
 				       const struct clk_hw_omap_ops *ops)
 {
 	struct clk_init_data init = { NULL };
@@ -47,8 +47,7 @@ static struct clk *_register_interface(struct device *dev, const char *name,
 
 	clk_hw->hw.init = &init;
 	clk_hw->ops = ops;
-	clk_hw->flags = MEMMAP_ADDRESSING;
-	clk_hw->enable_reg = reg;
+	memcpy(&clk_hw->enable_reg, reg, sizeof(*reg));
 	clk_hw->enable_bit = bit_idx;
 
 	init.name = name;
@@ -58,7 +57,7 @@ static struct clk *_register_interface(struct device *dev, const char *name,
 	init.num_parents = 1;
 	init.parent_names = &parent_name;
 
-	clk = clk_register(NULL, &clk_hw->hw);
+	clk = ti_clk_register(NULL, &clk_hw->hw, name);
 
 	if (IS_ERR(clk))
 		kfree(clk_hw);
@@ -72,14 +71,13 @@ static struct clk *_register_interface(struct device *dev, const char *name,
 struct clk *ti_clk_register_interface(struct ti_clk *setup)
 {
 	const struct clk_hw_omap_ops *ops = &clkhwops_iclk_wait;
-	u32 reg;
-	struct clk_omap_reg *reg_setup;
+	struct clk_omap_reg reg;
 	struct ti_clk_gate *gate;
 
 	gate = setup->data;
-	reg_setup = (struct clk_omap_reg *)&reg;
-	reg_setup->index = gate->module;
-	reg_setup->offset = gate->reg;
+	reg.index = gate->module;
+	reg.offset = gate->reg;
+	reg.ptr = NULL;
 
 	if (gate->flags & CLKF_NO_WAIT)
 		ops = &clkhwops_iclk;
@@ -97,7 +95,7 @@ struct clk *ti_clk_register_interface(struct ti_clk *setup)
 		ops = &clkhwops_am35xx_ipss_wait;
 
 	return _register_interface(NULL, setup->name, gate->parent,
-				   (void __iomem *)reg, gate->bit_shift, ops);
+				   &reg, gate->bit_shift, ops);
 }
 #endif
 
@@ -106,12 +104,11 @@ static void __init _of_ti_interface_clk_setup(struct device_node *node,
 {
 	struct clk *clk;
 	const char *parent_name;
-	void __iomem *reg;
+	struct clk_omap_reg reg;
 	u8 enable_bit = 0;
 	u32 val;
 
-	reg = ti_clk_get_reg_addr(node, 0);
-	if (IS_ERR(reg))
+	if (ti_clk_get_reg_addr(node, 0, &reg))
 		return;
 
 	if (!of_property_read_u32(node, "ti,bit-shift", &val))
@@ -123,7 +120,7 @@ static void __init _of_ti_interface_clk_setup(struct device_node *node,
 		return;
 	}
 
-	clk = _register_interface(NULL, node->name, parent_name, reg,
+	clk = _register_interface(NULL, node->name, parent_name, &reg,
 				  enable_bit, ops);
 
 	if (!IS_ERR(clk))
diff --git a/drivers/clk/ti/mux.c b/drivers/clk/ti/mux.c
index 44777ab6fdeb..18c267b38461 100644
--- a/drivers/clk/ti/mux.c
+++ b/drivers/clk/ti/mux.c
@@ -28,7 +28,7 @@
 
 static u8 ti_clk_mux_get_parent(struct clk_hw *hw)
 {
-	struct clk_mux *mux = to_clk_mux(hw);
+	struct clk_omap_mux *mux = to_clk_omap_mux(hw);
 	int num_parents = clk_hw_get_num_parents(hw);
 	u32 val;
 
@@ -39,7 +39,7 @@ static u8 ti_clk_mux_get_parent(struct clk_hw *hw)
 	 * OTOH, pmd_trace_clk_mux_ck uses a separate bit for each clock, so
 	 * val = 0x4 really means "bit 2, index starts at bit 0"
 	 */
-	val = ti_clk_ll_ops->clk_readl(mux->reg) >> mux->shift;
+	val = ti_clk_ll_ops->clk_readl(&mux->reg) >> mux->shift;
 	val &= mux->mask;
 
 	if (mux->table) {
@@ -65,7 +65,7 @@ static u8 ti_clk_mux_get_parent(struct clk_hw *hw)
 
 static int ti_clk_mux_set_parent(struct clk_hw *hw, u8 index)
 {
-	struct clk_mux *mux = to_clk_mux(hw);
+	struct clk_omap_mux *mux = to_clk_omap_mux(hw);
 	u32 val;
 
 	if (mux->table) {
@@ -81,11 +81,11 @@ static int ti_clk_mux_set_parent(struct clk_hw *hw, u8 index)
 	if (mux->flags & CLK_MUX_HIWORD_MASK) {
 		val = mux->mask << (mux->shift + 16);
 	} else {
-		val = ti_clk_ll_ops->clk_readl(mux->reg);
+		val = ti_clk_ll_ops->clk_readl(&mux->reg);
 		val &= ~(mux->mask << mux->shift);
 	}
 	val |= index << mux->shift;
-	ti_clk_ll_ops->clk_writel(val, mux->reg);
+	ti_clk_ll_ops->clk_writel(val, &mux->reg);
 
 	return 0;
 }
@@ -97,12 +97,12 @@ const struct clk_ops ti_clk_mux_ops = {
 };
 
 static struct clk *_register_mux(struct device *dev, const char *name,
-				 const char **parent_names, u8 num_parents,
-				 unsigned long flags, void __iomem *reg,
-				 u8 shift, u32 mask, u8 clk_mux_flags,
-				 u32 *table)
+				 const char * const *parent_names,
+				 u8 num_parents, unsigned long flags,
+				 struct clk_omap_reg *reg, u8 shift, u32 mask,
+				 u8 clk_mux_flags, u32 *table)
 {
-	struct clk_mux *mux;
+	struct clk_omap_mux *mux;
 	struct clk *clk;
 	struct clk_init_data init;
 
@@ -120,14 +120,14 @@ static struct clk *_register_mux(struct device *dev, const char *name,
 	init.num_parents = num_parents;
 
 	/* struct clk_mux assignments */
-	mux->reg = reg;
+	memcpy(&mux->reg, reg, sizeof(*reg));
 	mux->shift = shift;
 	mux->mask = mask;
 	mux->flags = clk_mux_flags;
 	mux->table = table;
 	mux->hw.init = &init;
 
-	clk = clk_register(dev, &mux->hw);
+	clk = ti_clk_register(dev, &mux->hw, name);
 
 	if (IS_ERR(clk))
 		kfree(mux);
@@ -140,12 +140,9 @@ struct clk *ti_clk_register_mux(struct ti_clk *setup)
 	struct ti_clk_mux *mux;
 	u32 flags;
 	u8 mux_flags = 0;
-	struct clk_omap_reg *reg_setup;
-	u32 reg;
+	struct clk_omap_reg reg;
 	u32 mask;
 
-	reg_setup = (struct clk_omap_reg *)&reg;
-
 	mux = setup->data;
 	flags = CLK_SET_RATE_NO_REPARENT;
 
@@ -154,8 +151,9 @@ struct clk *ti_clk_register_mux(struct ti_clk *setup)
 		mask--;
 
 	mask = (1 << fls(mask)) - 1;
-	reg_setup->index = mux->module;
-	reg_setup->offset = mux->reg;
+	reg.index = mux->module;
+	reg.offset = mux->reg;
+	reg.ptr = NULL;
 
 	if (mux->flags & CLKF_INDEX_STARTS_AT_ONE)
 		mux_flags |= CLK_MUX_INDEX_ONE;
@@ -164,7 +162,7 @@ struct clk *ti_clk_register_mux(struct ti_clk *setup)
 		flags |= CLK_SET_RATE_PARENT;
 
 	return _register_mux(NULL, setup->name, mux->parents, mux->num_parents,
-			     flags, (void __iomem *)reg, mux->bit_shift, mask,
+			     flags, &reg, mux->bit_shift, mask,
 			     mux_flags, NULL);
 }
 
@@ -177,7 +175,7 @@ struct clk *ti_clk_register_mux(struct ti_clk *setup)
 static void of_mux_clk_setup(struct device_node *node)
 {
 	struct clk *clk;
-	void __iomem *reg;
+	struct clk_omap_reg reg;
 	unsigned int num_parents;
 	const char **parent_names;
 	u8 clk_mux_flags = 0;
@@ -196,9 +194,7 @@ static void of_mux_clk_setup(struct device_node *node)
 
 	of_clk_parent_fill(node, parent_names, num_parents);
 
-	reg = ti_clk_get_reg_addr(node, 0);
-
-	if (IS_ERR(reg))
+	if (ti_clk_get_reg_addr(node, 0, &reg))
 		goto cleanup;
 
 	of_property_read_u32(node, "ti,bit-shift", &shift);
@@ -217,7 +213,7 @@ static void of_mux_clk_setup(struct device_node *node)
 	mask = (1 << fls(mask)) - 1;
 
 	clk = _register_mux(NULL, node->name, parent_names, num_parents,
-			    flags, reg, shift, mask, clk_mux_flags, NULL);
+			    flags, &reg, shift, mask, clk_mux_flags, NULL);
 
 	if (!IS_ERR(clk))
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
@@ -229,8 +225,7 @@ CLK_OF_DECLARE(mux_clk, "ti,mux-clock", of_mux_clk_setup);
 
 struct clk_hw *ti_clk_build_component_mux(struct ti_clk_mux *setup)
 {
-	struct clk_mux *mux;
-	struct clk_omap_reg *reg;
+	struct clk_omap_mux *mux;
 	int num_parents;
 
 	if (!setup)
@@ -240,12 +235,10 @@ struct clk_hw *ti_clk_build_component_mux(struct ti_clk_mux *setup)
 	if (!mux)
 		return ERR_PTR(-ENOMEM);
 
-	reg = (struct clk_omap_reg *)&mux->reg;
-
 	mux->shift = setup->bit_shift;
 
-	reg->index = setup->module;
-	reg->offset = setup->reg;
+	mux->reg.index = setup->module;
+	mux->reg.offset = setup->reg;
 
 	if (setup->flags & CLKF_INDEX_STARTS_AT_ONE)
 		mux->flags |= CLK_MUX_INDEX_ONE;
@@ -260,7 +253,7 @@ struct clk_hw *ti_clk_build_component_mux(struct ti_clk_mux *setup)
 
 static void __init of_ti_composite_mux_clk_setup(struct device_node *node)
 {
-	struct clk_mux *mux;
+	struct clk_omap_mux *mux;
 	unsigned int num_parents;
 	u32 val;
 
@@ -268,9 +261,7 @@ static void __init of_ti_composite_mux_clk_setup(struct device_node *node)
 	if (!mux)
 		return;
 
-	mux->reg = ti_clk_get_reg_addr(node, 0);
-
-	if (IS_ERR(mux->reg))
+	if (ti_clk_get_reg_addr(node, 0, &mux->reg))
 		goto cleanup;
 
 	if (!of_property_read_u32(node, "ti,bit-shift", &val))
diff --git a/drivers/clk/x86/clk-pmc-atom.c b/drivers/clk/x86/clk-pmc-atom.c
index 2b60577703ef..f99abc1106f0 100644
--- a/drivers/clk/x86/clk-pmc-atom.c
+++ b/drivers/clk/x86/clk-pmc-atom.c
@@ -54,6 +54,7 @@ struct clk_plt_data {
 	struct clk_plt_fixed **parents;
 	u8 nparents;
 	struct clk_plt *clks[PMC_CLK_NUM];
+	struct clk_lookup *mclk_lookup;
 };
 
 /* Return an index in parent table */
@@ -337,6 +338,11 @@ static int plt_clk_probe(struct platform_device *pdev)
 			goto err_unreg_clk_plt;
 		}
 	}
+	data->mclk_lookup = clkdev_hw_create(&data->clks[3]->hw, "mclk", NULL);
+	if (!data->mclk_lookup) {
+		err = -ENOMEM;
+		goto err_unreg_clk_plt;
+	}
 
 	plt_clk_free_parent_names_loop(parent_names, data->nparents);
 
@@ -356,6 +362,7 @@ static int plt_clk_remove(struct platform_device *pdev)
 
 	data = platform_get_drvdata(pdev);
 
+	clkdev_drop(data->mclk_lookup);
 	plt_clk_unregister_loop(data, PMC_CLK_NUM);
 	plt_clk_unregister_parents(data);
 	return 0;
diff --git a/drivers/clk/zte/clk-zx296718.c b/drivers/clk/zte/clk-zx296718.c
index 2f7c668643fe..a10962988ba8 100644
--- a/drivers/clk/zte/clk-zx296718.c
+++ b/drivers/clk/zte/clk-zx296718.c
@@ -94,13 +94,36 @@
 
 static DEFINE_SPINLOCK(clk_lock);
 
-static struct zx_pll_config pll_cpu_table[] = {
+static const struct zx_pll_config pll_cpu_table[] = {
 	PLL_RATE(1312000000, 0x00103621, 0x04aaaaaa),
 	PLL_RATE(1407000000, 0x00103a21, 0x04aaaaaa),
 	PLL_RATE(1503000000, 0x00103e21, 0x04aaaaaa),
 	PLL_RATE(1600000000, 0x00104221, 0x04aaaaaa),
 };
 
+static const struct zx_pll_config pll_vga_table[] = {
+	PLL_RATE(36000000,  0x00102464, 0x04000000), /* 800x600@56 */
+	PLL_RATE(40000000,  0x00102864, 0x04000000), /* 800x600@60 */
+	PLL_RATE(49500000,  0x00103164, 0x04800000), /* 800x600@75 */
+	PLL_RATE(50000000,  0x00103264, 0x04000000), /* 800x600@72 */
+	PLL_RATE(56250000,  0x00103864, 0x04400000), /* 800x600@85 */
+	PLL_RATE(65000000,  0x00104164, 0x04000000), /* 1024x768@60 */
+	PLL_RATE(74375000,  0x00104a64, 0x04600000), /* 1280x720@60 */
+	PLL_RATE(75000000,  0x00104b64, 0x04800000), /* 1024x768@70 */
+	PLL_RATE(78750000,  0x00104e64, 0x04c00000), /* 1024x768@75 */
+	PLL_RATE(85500000,  0x00105564, 0x04800000), /* 1360x768@60 */
+	PLL_RATE(106500000, 0x00106a64, 0x04800000), /* 1440x900@60 */
+	PLL_RATE(108000000, 0x00106c64, 0x04000000), /* 1280x1024@60 */
+	PLL_RATE(110000000, 0x00106e64, 0x04000000), /* 1024x768@85 */
+	PLL_RATE(135000000, 0x00105a44, 0x04000000), /* 1280x1024@75 */
+	PLL_RATE(136750000, 0x00104462, 0x04600000), /* 1440x900@75 */
+	PLL_RATE(148500000, 0x00104a62, 0x04400000), /* 1920x1080@60 */
+	PLL_RATE(157000000, 0x00104e62, 0x04800000), /* 1440x900@85 */
+	PLL_RATE(157500000, 0x00104e62, 0x04c00000), /* 1280x1024@85 */
+	PLL_RATE(162000000, 0x00105162, 0x04000000), /* 1600x1200@60 */
+	PLL_RATE(193250000, 0x00106062, 0x04a00000), /* 1920x1200@60 */
+};
+
 PNAME(osc) = {
 	"osc24m",
 	"osc32k",
@@ -369,6 +392,7 @@ PNAME(wdt_ares_p) = {
 
 static struct clk_zx_pll zx296718_pll_clk[] = {
 	ZX296718_PLL("pll_cpu",	"osc24m",	PLL_CPU_REG,	pll_cpu_table),
+	ZX296718_PLL("pll_vga",	"osc24m",	PLL_VGA_REG,	pll_vga_table),
 };
 
 static struct zx_clk_fixed_factor top_ffactor_clk[] = {
@@ -409,7 +433,7 @@ static struct zx_clk_fixed_factor top_ffactor_clk[] = {
 	FFACTOR(0, "clk54m",		"pll_mm1", 1, 24, 0),
 	/* vga */
 	FFACTOR(0, "pll_vga_1800m",	"pll_vga", 1, 1, 0),
-	FFACTOR(0, "clk_vga",		"pll_vga", 1, 2, 0),
+	FFACTOR(0, "clk_vga",		"pll_vga", 1, 1, CLK_SET_RATE_PARENT),
 	/* pll ddr */
 	FFACTOR(0, "clk466m",		"pll_ddr", 1, 2, 0),
 
@@ -458,8 +482,8 @@ static struct zx_clk_mux top_mux_clk[] = {
 	MUX(0, "sappu_a_mux",	 sappu_aclk_p,	  TOP_CLK_MUX5,  4, 2),
 	MUX(0, "sappu_w_mux",	 sappu_wclk_p,	  TOP_CLK_MUX5,  8, 3),
 	MUX(0, "vou_a_mux",	 vou_aclk_p,	  TOP_CLK_MUX7,  0, 3),
-	MUX(0, "vou_main_w_mux", vou_main_wclk_p, TOP_CLK_MUX7,  4, 3),
-	MUX(0, "vou_aux_w_mux",	 vou_aux_wclk_p,  TOP_CLK_MUX7,  8, 3),
+	MUX_F(0, "vou_main_w_mux", vou_main_wclk_p, TOP_CLK_MUX7,  4, 3, CLK_SET_RATE_PARENT, 0),
+	MUX_F(0, "vou_aux_w_mux",  vou_aux_wclk_p,  TOP_CLK_MUX7,  8, 3, CLK_SET_RATE_PARENT, 0),
 	MUX(0, "vou_ppu_w_mux",	 vou_ppu_wclk_p,  TOP_CLK_MUX7, 12, 3),
 	MUX(0, "vga_i2c_mux",	 vga_i2c_wclk_p,  TOP_CLK_MUX7, 16, 1),
 	MUX(0, "viu_m0_a_mux",	 viu_m0_aclk_p,	  TOP_CLK_MUX6,  0, 3),
diff --git a/drivers/clk/zte/clk.c b/drivers/clk/zte/clk.c
index 878d879b23ff..b82031766ffa 100644
--- a/drivers/clk/zte/clk.c
+++ b/drivers/clk/zte/clk.c
@@ -52,7 +52,10 @@ static int hw_to_idx(struct clk_zx_pll *zx_pll)
 
 	/* For matching the value in lookup table */
 	hw_cfg0 &= ~BIT(zx_pll->lock_bit);
-	hw_cfg0 |= BIT(zx_pll->pd_bit);
+
+	/* Check availability of pd_bit */
+	if (zx_pll->pd_bit < 32)
+		hw_cfg0 |= BIT(zx_pll->pd_bit);
 
 	for (i = 0; i < zx_pll->count; i++) {
 		if (hw_cfg0 == config[i].cfg0 && hw_cfg1 == config[i].cfg1)
@@ -108,6 +111,10 @@ static int zx_pll_enable(struct clk_hw *hw)
 	struct clk_zx_pll *zx_pll = to_clk_zx_pll(hw);
 	u32 reg;
 
+	/* If pd_bit is not available, simply return success. */
+	if (zx_pll->pd_bit > 31)
+		return 0;
+
 	reg = readl_relaxed(zx_pll->reg_base);
 	writel_relaxed(reg & ~BIT(zx_pll->pd_bit), zx_pll->reg_base);
 
@@ -120,6 +127,9 @@ static void zx_pll_disable(struct clk_hw *hw)
 	struct clk_zx_pll *zx_pll = to_clk_zx_pll(hw);
 	u32 reg;
 
+	if (zx_pll->pd_bit > 31)
+		return;
+
 	reg = readl_relaxed(zx_pll->reg_base);
 	writel_relaxed(reg | BIT(zx_pll->pd_bit), zx_pll->reg_base);
 }
diff --git a/drivers/clk/zte/clk.h b/drivers/clk/zte/clk.h
index 84a55a3e2bd4..4df0f121b56d 100644
--- a/drivers/clk/zte/clk.h
+++ b/drivers/clk/zte/clk.h
@@ -66,8 +66,12 @@ struct clk_zx_pll {
 				CLK_GET_RATE_NOCACHE),			\
 }
 
+/*
+ * The pd_bit is not available on ZX296718, so let's pass something
+ * bigger than 31, e.g. 0xff, to indicate that.
+ */
 #define ZX296718_PLL(_name, _parent, _reg, _table)			\
-ZX_PLL(_name, _parent, _reg, _table, 0, 30)
+ZX_PLL(_name, _parent, _reg, _table, 0xff, 30)
 
 struct zx_clk_gate {
 	struct clk_gate gate;
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index a1fb918b8021..8b5c30062d99 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -1209,9 +1209,9 @@ arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame)
 		return 0;
 	}
 
-	rate = readl_relaxed(frame + CNTFRQ);
+	rate = readl_relaxed(base + CNTFRQ);
 
-	iounmap(frame);
+	iounmap(base);
 
 	return rate;
 }
@@ -1268,7 +1268,7 @@ arch_timer_mem_find_best_frame(struct arch_timer_mem *timer_mem)
 		pr_err("Unable to find a suitable frame in timer @ %pa\n",
 			&timer_mem->cntctlbase);
 
-	return frame;
+	return best_frame;
 }
 
 static int __init
diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c
index 44e5e951583b..8e64b8460f11 100644
--- a/drivers/clocksource/cadence_ttc_timer.c
+++ b/drivers/clocksource/cadence_ttc_timer.c
@@ -18,6 +18,7 @@
 #include <linux/clk.h>
 #include <linux/interrupt.h>
 #include <linux/clockchips.h>
+#include <linux/clocksource.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/slab.h>
diff --git a/drivers/clocksource/cs5535-clockevt.c b/drivers/clocksource/cs5535-clockevt.c
index 9a7e37cf56b0..a1df588343f2 100644
--- a/drivers/clocksource/cs5535-clockevt.c
+++ b/drivers/clocksource/cs5535-clockevt.c
@@ -22,7 +22,7 @@
 #define DRV_NAME "cs5535-clockevt"
 
 static int timer_irq;
-module_param_named(irq, timer_irq, int, 0644);
+module_param_hw_named(irq, timer_irq, int, irq, 0644);
 MODULE_PARM_DESC(irq, "Which IRQ to use for the clock source MFGPT ticks.");
 
 /*
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index 2e9c830ae1cd..c4656c4d44a6 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -12,6 +12,7 @@
 
 #include <linux/clk.h>
 #include <linux/clockchips.h>
+#include <linux/clocksource.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 74ed7e9a7f27..2011fec2d6ad 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -71,6 +71,15 @@ config ARM_HIGHBANK_CPUFREQ
 
 	  If in doubt, say N.
 
+config ARM_DB8500_CPUFREQ
+	tristate "ST-Ericsson DB8500 cpufreq" if COMPILE_TEST && !ARCH_U8500
+	default ARCH_U8500
+	depends on HAS_IOMEM
+	depends on !CPU_THERMAL || THERMAL
+	help
+	  This adds the CPUFreq driver for ST-Ericsson Ux500 (DB8500) SoC
+	  series.
+
 config ARM_IMX6Q_CPUFREQ
 	tristate "Freescale i.MX6 cpufreq support"
 	depends on ARCH_MXC
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index b7e78f063c4f..ab3a42cd29ef 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -53,7 +53,7 @@ obj-$(CONFIG_ARM_DT_BL_CPUFREQ)		+= arm_big_little_dt.o
 
 obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ)	+= brcmstb-avs-cpufreq.o
 obj-$(CONFIG_ARCH_DAVINCI)		+= davinci-cpufreq.o
-obj-$(CONFIG_UX500_SOC_DB8500)		+= dbx500-cpufreq.o
+obj-$(CONFIG_ARM_DB8500_CPUFREQ)	+= dbx500-cpufreq.o
 obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ)	+= exynos5440-cpufreq.o
 obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ)	+= highbank-cpufreq.o
 obj-$(CONFIG_ARM_IMX6Q_CPUFREQ)		+= imx6q-cpufreq.o
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 0e3f6496524d..26b643d57847 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2468,6 +2468,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	if (!(cpufreq_driver->flags & CPUFREQ_STICKY) &&
 	    list_empty(&cpufreq_policy_list)) {
 		/* if all ->init() calls failed, unregister */
+		ret = -ENODEV;
 		pr_debug("%s: No CPU initialized for driver %s\n", __func__,
 			 driver_data->name);
 		goto err_if_unreg;
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 992f7c20760f..88220ff3e1c2 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -185,8 +185,8 @@ static ssize_t store_down_threshold(struct gov_attr_set *attr_set,
 	int ret;
 	ret = sscanf(buf, "%u", &input);
 
-	/* cannot be lower than 11 otherwise freq will not fall */
-	if (ret != 1 || input < 11 || input > 100 ||
+	/* cannot be lower than 1 otherwise freq will not fall */
+	if (ret != 1 || input < 1 || input > 100 ||
 			input >= dbs_data->up_threshold)
 		return -EINVAL;
 
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b7de5bd76a31..eb1158532de3 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -571,9 +571,10 @@ static inline void update_turbo_state(void)
 static int min_perf_pct_min(void)
 {
 	struct cpudata *cpu = all_cpu_data[0];
+	int turbo_pstate = cpu->pstate.turbo_pstate;
 
-	return DIV_ROUND_UP(cpu->pstate.min_pstate * 100,
-			    cpu->pstate.turbo_pstate);
+	return turbo_pstate ?
+		DIV_ROUND_UP(cpu->pstate.min_pstate * 100, turbo_pstate) : 0;
 }
 
 static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c
index 1b9bcd76c60e..c2dd43f3f5d8 100644
--- a/drivers/cpufreq/kirkwood-cpufreq.c
+++ b/drivers/cpufreq/kirkwood-cpufreq.c
@@ -127,7 +127,12 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
 		return PTR_ERR(priv.cpu_clk);
 	}
 
-	clk_prepare_enable(priv.cpu_clk);
+	err = clk_prepare_enable(priv.cpu_clk);
+	if (err) {
+		dev_err(priv.dev, "Unable to prepare cpuclk\n");
+		return err;
+	}
+
 	kirkwood_freq_table[0].frequency = clk_get_rate(priv.cpu_clk) / 1000;
 
 	priv.ddr_clk = of_clk_get_by_name(np, "ddrclk");
@@ -137,7 +142,11 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
 		goto out_cpu;
 	}
 
-	clk_prepare_enable(priv.ddr_clk);
+	err = clk_prepare_enable(priv.ddr_clk);
+	if (err) {
+		dev_err(priv.dev, "Unable to prepare ddrclk\n");
+		goto out_cpu;
+	}
 	kirkwood_freq_table[1].frequency = clk_get_rate(priv.ddr_clk) / 1000;
 
 	priv.powersave_clk = of_clk_get_by_name(np, "powersave");
@@ -146,7 +155,11 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
 		err = PTR_ERR(priv.powersave_clk);
 		goto out_ddr;
 	}
-	clk_prepare_enable(priv.powersave_clk);
+	err = clk_prepare_enable(priv.powersave_clk);
+	if (err) {
+		dev_err(priv.dev, "Unable to prepare powersave clk\n");
+		goto out_ddr;
+	}
 
 	of_node_put(np);
 	np = NULL;
diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index 6bbdac1065ff..9ac27b22476c 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -51,19 +51,12 @@ static int loongson2_cpu_freq_notifier(struct notifier_block *nb,
 static int loongson2_cpufreq_target(struct cpufreq_policy *policy,
 				     unsigned int index)
 {
-	unsigned int cpu = policy->cpu;
-	cpumask_t cpus_allowed;
 	unsigned int freq;
 
-	cpus_allowed = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, cpumask_of(cpu));
-
 	freq =
 	    ((cpu_clock_freq / 1000) *
 	     loongson2_clockmod_table[index].driver_data) / 8;
 
-	set_cpus_allowed_ptr(current, &cpus_allowed);
-
 	/* setting the cpu frequency */
 	clk_set_rate(policy->clk, freq * 1000);
 
diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c
index 35dd4d7ffee0..b257fc7d5204 100644
--- a/drivers/cpufreq/pasemi-cpufreq.c
+++ b/drivers/cpufreq/pasemi-cpufreq.c
@@ -226,7 +226,7 @@ static int pas_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 	 * We don't support CPU hotplug. Don't unmap after the system
 	 * has already made it to a running state.
 	 */
-	if (system_state != SYSTEM_BOOTING)
+	if (system_state >= SYSTEM_RUNNING)
 		return 0;
 
 	if (sdcasr_mapbase)
diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c
index 770a9ae1999a..37b30071c220 100644
--- a/drivers/cpufreq/speedstep-smi.c
+++ b/drivers/cpufreq/speedstep-smi.c
@@ -378,7 +378,7 @@ static void __exit speedstep_exit(void)
 	cpufreq_unregister_driver(&speedstep_driver);
 }
 
-module_param(smi_port, int, 0444);
+module_param_hw(smi_port, int, ioport, 0444);
 module_param(smi_cmd,  int, 0444);
 module_param(smi_sig, uint, 0444);
 
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 548b90be7685..60bb64f4329d 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -111,7 +111,8 @@ void cpuidle_use_deepest_state(bool enable)
 
 	preempt_disable();
 	dev = cpuidle_get_device();
-	dev->use_deepest_state = enable;
+	if (dev)
+		dev->use_deepest_state = enable;
 	preempt_enable();
 }
 
@@ -219,6 +220,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	entered_state = target_state->enter(dev, drv, index);
 	start_critical_timings();
 
+	sched_clock_idle_wakeup_event();
 	time_end = ns_to_ktime(local_clock());
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 
diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c
index ffca4fc0061d..ae8eb0359889 100644
--- a/drivers/cpuidle/dt_idle_states.c
+++ b/drivers/cpuidle/dt_idle_states.c
@@ -180,8 +180,10 @@ int dt_init_idle_driver(struct cpuidle_driver *drv,
 		if (!state_node)
 			break;
 
-		if (!of_device_is_available(state_node))
+		if (!of_device_is_available(state_node)) {
+			of_node_put(state_node);
 			continue;
+		}
 
 		if (!idle_state_valid(state_node, i, cpumask)) {
 			pr_warn("%s idle state not valid, bailing out\n",
diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index 21472e427f6f..a111cd72797b 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -119,8 +119,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi)
 		names[i] = vi->data_vq[i].name;
 	}
 
-	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
-					 names, NULL);
+	ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, callbacks, names, NULL);
 	if (ret)
 		goto err_find;
 
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index b7053eafd88e..b79aa8f7a497 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -19,7 +19,7 @@ config DEV_DAX
 
 config DEV_DAX_PMEM
 	tristate "PMEM DAX: direct access to persistent memory"
-	depends on LIBNVDIMM && NVDIMM_DAX
+	depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX
 	default DEV_DAX
 	help
 	  Support raw access to persistent memory.  Note that this
@@ -28,9 +28,4 @@ config DEV_DAX_PMEM
 
 	  Say Y if unsure
 
-config NR_DEV_DAX
-	int "Maximum number of Device-DAX instances"
-	default 32768
-	range 256 2147483647
-
 endif
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 465dcd7317d5..922d0823f8ec 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -14,16 +14,13 @@
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/magic.h>
+#include <linux/genhd.h>
 #include <linux/cdev.h>
 #include <linux/hash.h>
 #include <linux/slab.h>
 #include <linux/dax.h>
 #include <linux/fs.h>
 
-static int nr_dax = CONFIG_NR_DEV_DAX;
-module_param(nr_dax, int, S_IRUGO);
-MODULE_PARM_DESC(nr_dax, "max number of dax device instances");
-
 static dev_t dax_devt;
 DEFINE_STATIC_SRCU(dax_srcu);
 static struct vfsmount *dax_mnt;
@@ -47,6 +44,77 @@ void dax_read_unlock(int id)
 }
 EXPORT_SYMBOL_GPL(dax_read_unlock);
 
+#ifdef CONFIG_BLOCK
+int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
+		pgoff_t *pgoff)
+{
+	phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
+
+	if (pgoff)
+		*pgoff = PHYS_PFN(phys_off);
+	if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
+		return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL(bdev_dax_pgoff);
+
+/**
+ * __bdev_dax_supported() - Check if the device supports dax for filesystem
+ * @sb: The superblock of the device
+ * @blocksize: The block size of the device
+ *
+ * This is a library function for filesystems to check if the block device
+ * can be mounted with dax option.
+ *
+ * Return: negative errno if unsupported, 0 if supported.
+ */
+int __bdev_dax_supported(struct super_block *sb, int blocksize)
+{
+	struct block_device *bdev = sb->s_bdev;
+	struct dax_device *dax_dev;
+	pgoff_t pgoff;
+	int err, id;
+	void *kaddr;
+	pfn_t pfn;
+	long len;
+
+	if (blocksize != PAGE_SIZE) {
+		pr_err("VFS (%s): error: unsupported blocksize for dax\n",
+				sb->s_id);
+		return -EINVAL;
+	}
+
+	err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
+	if (err) {
+		pr_err("VFS (%s): error: unaligned partition for dax\n",
+				sb->s_id);
+		return err;
+	}
+
+	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+	if (!dax_dev) {
+		pr_err("VFS (%s): error: device does not support dax\n",
+				sb->s_id);
+		return -EOPNOTSUPP;
+	}
+
+	id = dax_read_lock();
+	len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
+	dax_read_unlock(id);
+
+	put_dax(dax_dev);
+
+	if (len < 1) {
+		pr_err("VFS (%s): error: dax access failed (%ld)",
+				sb->s_id, len);
+		return len < 0 ? len : -EIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__bdev_dax_supported);
+#endif
+
 /**
  * struct dax_device - anchor object for dax services
  * @inode: core vfs
@@ -142,9 +210,12 @@ EXPORT_SYMBOL_GPL(kill_dax);
 static struct inode *dax_alloc_inode(struct super_block *sb)
 {
 	struct dax_device *dax_dev;
+	struct inode *inode;
 
 	dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL);
-	return &dax_dev->inode;
+	inode = &dax_dev->inode;
+	inode->i_rdev = 0;
+	return inode;
 }
 
 static struct dax_device *to_dax_dev(struct inode *inode)
@@ -159,7 +230,8 @@ static void dax_i_callback(struct rcu_head *head)
 
 	kfree(dax_dev->host);
 	dax_dev->host = NULL;
-	ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev));
+	if (inode->i_rdev)
+		ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev));
 	kmem_cache_free(dax_cache, dax_dev);
 }
 
@@ -261,7 +333,7 @@ struct dax_device *alloc_dax(void *private, const char *__host,
 	if (__host && !host)
 		return NULL;
 
-	minor = ida_simple_get(&dax_minor_ida, 0, nr_dax, GFP_KERNEL);
+	minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL);
 	if (minor < 0)
 		goto err_minor;
 
@@ -355,6 +427,7 @@ static void init_once(void *_dax_dev)
 	struct dax_device *dax_dev = _dax_dev;
 	struct inode *inode = &dax_dev->inode;
 
+	memset(dax_dev, 0, sizeof(*dax_dev));
 	inode_init_once(inode);
 }
 
@@ -405,8 +478,7 @@ static int __init dax_fs_init(void)
 	if (rc)
 		return rc;
 
-	nr_dax = max(nr_dax, 256);
-	rc = alloc_chrdev_region(&dax_devt, 0, nr_dax, "dax");
+	rc = alloc_chrdev_region(&dax_devt, 0, MINORMASK+1, "dax");
 	if (rc)
 		__dax_fs_exit();
 	return rc;
@@ -414,7 +486,7 @@ static int __init dax_fs_init(void)
 
 static void __exit dax_fs_exit(void)
 {
-	unregister_chrdev_region(dax_devt, nr_dax);
+	unregister_chrdev_region(dax_devt, MINORMASK+1);
 	ida_destroy(&dax_minor_ida);
 	__dax_fs_exit();
 }
diff --git a/drivers/devfreq/event/exynos-nocp.c b/drivers/devfreq/event/exynos-nocp.c
index 5c3e7b11e8a6..f6e7956fc91a 100644
--- a/drivers/devfreq/event/exynos-nocp.c
+++ b/drivers/devfreq/event/exynos-nocp.c
@@ -267,7 +267,11 @@ static int exynos_nocp_probe(struct platform_device *pdev)
 	}
 	platform_set_drvdata(pdev, nocp);
 
-	clk_prepare_enable(nocp->clk);
+	ret = clk_prepare_enable(nocp->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to prepare ppmu clock\n");
+		return ret;
+	}
 
 	pr_info("exynos-nocp: new NoC Probe device registered: %s\n",
 			dev_name(dev));
diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
index 9b7350935b73..d96e3dc71cf8 100644
--- a/drivers/devfreq/event/exynos-ppmu.c
+++ b/drivers/devfreq/event/exynos-ppmu.c
@@ -44,7 +44,7 @@ struct exynos_ppmu {
 	{ "ppmu-event2-"#name, PPMU_PMNCNT2 },	\
 	{ "ppmu-event3-"#name, PPMU_PMNCNT3 }
 
-struct __exynos_ppmu_events {
+static struct __exynos_ppmu_events {
 	char *name;
 	int id;
 } ppmu_events[] = {
@@ -648,7 +648,11 @@ static int exynos_ppmu_probe(struct platform_device *pdev)
 			dev_name(&pdev->dev), desc[i].name);
 	}
 
-	clk_prepare_enable(info->ppmu.clk);
+	ret = clk_prepare_enable(info->ppmu.clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to prepare ppmu clock\n");
+		return ret;
+	}
 
 	return 0;
 }
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index d37e8dda8079..ec240592f5c8 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -201,6 +201,7 @@ struct ep93xx_dma_engine {
 	struct dma_device	dma_dev;
 	bool			m2m;
 	int			(*hw_setup)(struct ep93xx_dma_chan *);
+	void			(*hw_synchronize)(struct ep93xx_dma_chan *);
 	void			(*hw_shutdown)(struct ep93xx_dma_chan *);
 	void			(*hw_submit)(struct ep93xx_dma_chan *);
 	int			(*hw_interrupt)(struct ep93xx_dma_chan *);
@@ -323,6 +324,8 @@ static int m2p_hw_setup(struct ep93xx_dma_chan *edmac)
 		| M2P_CONTROL_ENABLE;
 	m2p_set_control(edmac, control);
 
+	edmac->buffer = 0;
+
 	return 0;
 }
 
@@ -331,21 +334,27 @@ static inline u32 m2p_channel_state(struct ep93xx_dma_chan *edmac)
 	return (readl(edmac->regs + M2P_STATUS) >> 4) & 0x3;
 }
 
-static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac)
+static void m2p_hw_synchronize(struct ep93xx_dma_chan *edmac)
 {
+	unsigned long flags;
 	u32 control;
 
+	spin_lock_irqsave(&edmac->lock, flags);
 	control = readl(edmac->regs + M2P_CONTROL);
 	control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT);
 	m2p_set_control(edmac, control);
+	spin_unlock_irqrestore(&edmac->lock, flags);
 
 	while (m2p_channel_state(edmac) >= M2P_STATE_ON)
-		cpu_relax();
+		schedule();
+}
 
+static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac)
+{
 	m2p_set_control(edmac, 0);
 
-	while (m2p_channel_state(edmac) == M2P_STATE_STALL)
-		cpu_relax();
+	while (m2p_channel_state(edmac) != M2P_STATE_IDLE)
+		dev_warn(chan2dev(edmac), "M2P: Not yet IDLE\n");
 }
 
 static void m2p_fill_desc(struct ep93xx_dma_chan *edmac)
@@ -1161,6 +1170,26 @@ fail:
 }
 
 /**
+ * ep93xx_dma_synchronize - Synchronizes the termination of transfers to the
+ * current context.
+ * @chan: channel
+ *
+ * Synchronizes the DMA channel termination to the current context. When this
+ * function returns it is guaranteed that all transfers for previously issued
+ * descriptors have stopped and and it is safe to free the memory associated
+ * with them. Furthermore it is guaranteed that all complete callback functions
+ * for a previously submitted descriptor have finished running and it is safe to
+ * free resources accessed from within the complete callbacks.
+ */
+static void ep93xx_dma_synchronize(struct dma_chan *chan)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+
+	if (edmac->edma->hw_synchronize)
+		edmac->edma->hw_synchronize(edmac);
+}
+
+/**
  * ep93xx_dma_terminate_all - terminate all transactions
  * @chan: channel
  *
@@ -1323,6 +1352,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev)
 	dma_dev->device_prep_slave_sg = ep93xx_dma_prep_slave_sg;
 	dma_dev->device_prep_dma_cyclic = ep93xx_dma_prep_dma_cyclic;
 	dma_dev->device_config = ep93xx_dma_slave_config;
+	dma_dev->device_synchronize = ep93xx_dma_synchronize;
 	dma_dev->device_terminate_all = ep93xx_dma_terminate_all;
 	dma_dev->device_issue_pending = ep93xx_dma_issue_pending;
 	dma_dev->device_tx_status = ep93xx_dma_tx_status;
@@ -1340,6 +1370,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev)
 	} else {
 		dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
 
+		edma->hw_synchronize = m2p_hw_synchronize;
 		edma->hw_setup = m2p_hw_setup;
 		edma->hw_shutdown = m2p_hw_shutdown;
 		edma->hw_submit = m2p_hw_submit;
diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c
index a28a01fcba67..f3e211f8f6c5 100644
--- a/drivers/dma/mv_xor_v2.c
+++ b/drivers/dma/mv_xor_v2.c
@@ -161,6 +161,7 @@ struct mv_xor_v2_device {
 	struct mv_xor_v2_sw_desc *sw_desq;
 	int desc_size;
 	unsigned int npendings;
+	unsigned int hw_queue_idx;
 };
 
 /**
@@ -214,18 +215,6 @@ static void mv_xor_v2_set_data_buffers(struct mv_xor_v2_device *xor_dev,
 }
 
 /*
- * Return the next available index in the DESQ.
- */
-static int mv_xor_v2_get_desq_write_ptr(struct mv_xor_v2_device *xor_dev)
-{
-	/* read the index for the next available descriptor in the DESQ */
-	u32 reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_ALLOC_OFF);
-
-	return ((reg >> MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_SHIFT)
-		& MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_MASK);
-}
-
-/*
  * notify the engine of new descriptors, and update the available index.
  */
 static void mv_xor_v2_add_desc_to_desq(struct mv_xor_v2_device *xor_dev,
@@ -257,22 +246,6 @@ static int mv_xor_v2_set_desc_size(struct mv_xor_v2_device *xor_dev)
 	return MV_XOR_V2_EXT_DESC_SIZE;
 }
 
-/*
- * Set the IMSG threshold
- */
-static inline
-void mv_xor_v2_set_imsg_thrd(struct mv_xor_v2_device *xor_dev, int thrd_val)
-{
-	u32 reg;
-
-	reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF);
-
-	reg &= (~MV_XOR_V2_DMA_IMSG_THRD_MASK << MV_XOR_V2_DMA_IMSG_THRD_SHIFT);
-	reg |= (thrd_val << MV_XOR_V2_DMA_IMSG_THRD_SHIFT);
-
-	writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF);
-}
-
 static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
 {
 	struct mv_xor_v2_device *xor_dev = data;
@@ -288,12 +261,6 @@ static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
 	if (!ndescs)
 		return IRQ_NONE;
 
-	/*
-	 * Update IMSG threshold, to disable new IMSG interrupts until
-	 * end of the tasklet
-	 */
-	mv_xor_v2_set_imsg_thrd(xor_dev, MV_XOR_V2_DESC_NUM);
-
 	/* schedule a tasklet to handle descriptors callbacks */
 	tasklet_schedule(&xor_dev->irq_tasklet);
 
@@ -306,7 +273,6 @@ static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
 static dma_cookie_t
 mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx)
 {
-	int desq_ptr;
 	void *dest_hw_desc;
 	dma_cookie_t cookie;
 	struct mv_xor_v2_sw_desc *sw_desc =
@@ -322,15 +288,15 @@ mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx)
 	spin_lock_bh(&xor_dev->lock);
 	cookie = dma_cookie_assign(tx);
 
-	/* get the next available slot in the DESQ */
-	desq_ptr = mv_xor_v2_get_desq_write_ptr(xor_dev);
-
 	/* copy the HW descriptor from the SW descriptor to the DESQ */
-	dest_hw_desc = xor_dev->hw_desq_virt + desq_ptr;
+	dest_hw_desc = xor_dev->hw_desq_virt + xor_dev->hw_queue_idx;
 
 	memcpy(dest_hw_desc, &sw_desc->hw_desc, xor_dev->desc_size);
 
 	xor_dev->npendings++;
+	xor_dev->hw_queue_idx++;
+	if (xor_dev->hw_queue_idx >= MV_XOR_V2_DESC_NUM)
+		xor_dev->hw_queue_idx = 0;
 
 	spin_unlock_bh(&xor_dev->lock);
 
@@ -344,6 +310,7 @@ static struct mv_xor_v2_sw_desc	*
 mv_xor_v2_prep_sw_desc(struct mv_xor_v2_device *xor_dev)
 {
 	struct mv_xor_v2_sw_desc *sw_desc;
+	bool found = false;
 
 	/* Lock the channel */
 	spin_lock_bh(&xor_dev->lock);
@@ -355,19 +322,23 @@ mv_xor_v2_prep_sw_desc(struct mv_xor_v2_device *xor_dev)
 		return NULL;
 	}
 
-	/* get a free SW descriptor from the SW DESQ */
-	sw_desc = list_first_entry(&xor_dev->free_sw_desc,
-				   struct mv_xor_v2_sw_desc, free_list);
+	list_for_each_entry(sw_desc, &xor_dev->free_sw_desc, free_list) {
+		if (async_tx_test_ack(&sw_desc->async_tx)) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		spin_unlock_bh(&xor_dev->lock);
+		return NULL;
+	}
+
 	list_del(&sw_desc->free_list);
 
 	/* Release the channel */
 	spin_unlock_bh(&xor_dev->lock);
 
-	/* set the async tx descriptor */
-	dma_async_tx_descriptor_init(&sw_desc->async_tx, &xor_dev->dmachan);
-	sw_desc->async_tx.tx_submit = mv_xor_v2_tx_submit;
-	async_tx_ack(&sw_desc->async_tx);
-
 	return sw_desc;
 }
 
@@ -389,6 +360,8 @@ mv_xor_v2_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
 		__func__, len, &src, &dest, flags);
 
 	sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+	if (!sw_desc)
+		return NULL;
 
 	sw_desc->async_tx.flags = flags;
 
@@ -443,6 +416,8 @@ mv_xor_v2_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
 		__func__, src_cnt, len, &dest, flags);
 
 	sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+	if (!sw_desc)
+		return NULL;
 
 	sw_desc->async_tx.flags = flags;
 
@@ -491,6 +466,8 @@ mv_xor_v2_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
 		container_of(chan, struct mv_xor_v2_device, dmachan);
 
 	sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+	if (!sw_desc)
+		return NULL;
 
 	/* set the HW descriptor */
 	hw_descriptor = &sw_desc->hw_desc;
@@ -554,7 +531,6 @@ static void mv_xor_v2_tasklet(unsigned long data)
 {
 	struct mv_xor_v2_device *xor_dev = (struct mv_xor_v2_device *) data;
 	int pending_ptr, num_of_pending, i;
-	struct mv_xor_v2_descriptor *next_pending_hw_desc = NULL;
 	struct mv_xor_v2_sw_desc *next_pending_sw_desc = NULL;
 
 	dev_dbg(xor_dev->dmadev.dev, "%s %d\n", __func__, __LINE__);
@@ -562,17 +538,10 @@ static void mv_xor_v2_tasklet(unsigned long data)
 	/* get the pending descriptors parameters */
 	num_of_pending = mv_xor_v2_get_pending_params(xor_dev, &pending_ptr);
 
-	/* next HW descriptor */
-	next_pending_hw_desc = xor_dev->hw_desq_virt + pending_ptr;
-
 	/* loop over free descriptors */
 	for (i = 0; i < num_of_pending; i++) {
-
-		if (pending_ptr > MV_XOR_V2_DESC_NUM)
-			pending_ptr = 0;
-
-		if (next_pending_sw_desc != NULL)
-			next_pending_hw_desc++;
+		struct mv_xor_v2_descriptor *next_pending_hw_desc =
+			xor_dev->hw_desq_virt + pending_ptr;
 
 		/* get the SW descriptor related to the HW descriptor */
 		next_pending_sw_desc =
@@ -608,15 +577,14 @@ static void mv_xor_v2_tasklet(unsigned long data)
 
 		/* increment the next descriptor */
 		pending_ptr++;
+		if (pending_ptr >= MV_XOR_V2_DESC_NUM)
+			pending_ptr = 0;
 	}
 
 	if (num_of_pending != 0) {
 		/* free the descriptores */
 		mv_xor_v2_free_desc_from_desq(xor_dev, num_of_pending);
 	}
-
-	/* Update IMSG threshold, to enable new IMSG interrupts */
-	mv_xor_v2_set_imsg_thrd(xor_dev, 0);
 }
 
 /*
@@ -648,9 +616,6 @@ static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev)
 	writel((xor_dev->hw_desq & 0xFFFF00000000) >> 32,
 	       xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_BAHR_OFF);
 
-	/* enable the DMA engine */
-	writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF);
-
 	/*
 	 * This is a temporary solution, until we activate the
 	 * SMMU. Set the attributes for reading & writing data buffers
@@ -694,6 +659,9 @@ static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev)
 	reg |= MV_XOR_V2_GLOB_PAUSE_AXI_TIME_DIS_VAL;
 	writel(reg, xor_dev->glob_base + MV_XOR_V2_GLOB_PAUSE);
 
+	/* enable the DMA engine */
+	writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF);
+
 	return 0;
 }
 
@@ -725,6 +693,10 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, xor_dev);
 
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
+	if (ret)
+		return ret;
+
 	xor_dev->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(xor_dev->clk) && PTR_ERR(xor_dev->clk) == -EPROBE_DEFER)
 		return -EPROBE_DEFER;
@@ -785,8 +757,15 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
 
 	/* add all SW descriptors to the free list */
 	for (i = 0; i < MV_XOR_V2_DESC_NUM; i++) {
-		xor_dev->sw_desq[i].idx = i;
-		list_add(&xor_dev->sw_desq[i].free_list,
+		struct mv_xor_v2_sw_desc *sw_desc =
+			xor_dev->sw_desq + i;
+		sw_desc->idx = i;
+		dma_async_tx_descriptor_init(&sw_desc->async_tx,
+					     &xor_dev->dmachan);
+		sw_desc->async_tx.tx_submit = mv_xor_v2_tx_submit;
+		async_tx_ack(&sw_desc->async_tx);
+
+		list_add(&sw_desc->free_list,
 			 &xor_dev->free_sw_desc);
 	}
 
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 8b0da7fa520d..e90a7a0d760a 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -3008,7 +3008,8 @@ static int pl330_remove(struct amba_device *adev)
 
 	for (i = 0; i < AMBA_NR_IRQS; i++) {
 		irq = adev->irq[i];
-		devm_free_irq(&adev->dev, irq, pl330);
+		if (irq)
+			devm_free_irq(&adev->dev, irq, pl330);
 	}
 
 	dma_async_device_unregister(&pl330->ddma);
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index db41795fe42a..bd261c9e9664 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1287,6 +1287,9 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
 	if (desc->hwdescs.use) {
 		dptr = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
 			RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT;
+		if (dptr == 0)
+			dptr = desc->nchunks;
+		dptr--;
 		WARN_ON(dptr >= desc->nchunks);
 	} else {
 		running = desc->running;
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
index 72c649713ace..31a145154e9f 100644
--- a/drivers/dma/sh/usb-dmac.c
+++ b/drivers/dma/sh/usb-dmac.c
@@ -117,7 +117,7 @@ struct usb_dmac {
 #define USB_DMASWR			0x0008
 #define USB_DMASWR_SWR			(1 << 0)
 #define USB_DMAOR			0x0060
-#define USB_DMAOR_AE			(1 << 2)
+#define USB_DMAOR_AE			(1 << 1)
 #define USB_DMAOR_DME			(1 << 0)
 
 #define USB_DMASAR			0x0000
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 7717b094fabb..db75d4b614f7 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -214,24 +214,16 @@ static void altr_sdr_mc_create_debugfs_nodes(struct mem_ctl_info *mci)
 static unsigned long get_total_mem(void)
 {
 	struct device_node *np = NULL;
-	const unsigned int *reg, *reg_end;
-	int len, sw, aw;
-	unsigned long start, size, total_mem = 0;
+	struct resource res;
+	int ret;
+	unsigned long total_mem = 0;
 
 	for_each_node_by_type(np, "memory") {
-		aw = of_n_addr_cells(np);
-		sw = of_n_size_cells(np);
-		reg = (const unsigned int *)of_get_property(np, "reg", &len);
-		reg_end = reg + (len / sizeof(u32));
-
-		total_mem = 0;
-		do {
-			start = of_read_number(reg, aw);
-			reg += aw;
-			size = of_read_number(reg, sw);
-			reg += sw;
-			total_mem += size;
-		} while (reg < reg_end);
+		ret = of_address_to_resource(np, 0, &res);
+		if (ret)
+			continue;
+
+		total_mem += resource_size(&res);
 	}
 	edac_dbg(0, "total_mem 0x%lx\n", total_mem);
 	return total_mem;
@@ -1839,7 +1831,7 @@ static int a10_eccmgr_irqdomain_map(struct irq_domain *d, unsigned int irq,
 	return 0;
 }
 
-static struct irq_domain_ops a10_eccmgr_ic_ops = {
+static const struct irq_domain_ops a10_eccmgr_ic_ops = {
 	.map = a10_eccmgr_irqdomain_map,
 	.xlate = irq_domain_xlate_twocell,
 };
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 82dab1692264..3aea55698165 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -782,24 +782,26 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
 
 static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
 {
-	u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
-	int dimm, size0, size1;
+	int dimm, size0, size1, cs0, cs1;
 
 	edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl);
 
 	for (dimm = 0; dimm < 4; dimm++) {
 		size0 = 0;
+		cs0 = dimm * 2;
 
-		if (dcsb[dimm*2] & DCSB_CS_ENABLE)
-			size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, dimm);
+		if (csrow_enabled(cs0, ctrl, pvt))
+			size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs0);
 
 		size1 = 0;
-		if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE)
-			size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, dimm);
+		cs1 = dimm * 2 + 1;
+
+		if (csrow_enabled(cs1, ctrl, pvt))
+			size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1);
 
 		amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
-				dimm * 2,     size0,
-				dimm * 2 + 1, size1);
+				cs0,	size0,
+				cs1,	size1);
 	}
 }
 
@@ -2756,26 +2758,22 @@ skip:
  *	encompasses
  *
  */
-static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
+static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig)
 {
-	u32 cs_mode, nr_pages;
 	u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
+	int csrow_nr = csrow_nr_orig;
+	u32 cs_mode, nr_pages;
 
+	if (!pvt->umc)
+		csrow_nr >>= 1;
 
-	/*
-	 * The math on this doesn't look right on the surface because x/2*4 can
-	 * be simplified to x*2 but this expression makes use of the fact that
-	 * it is integral math where 1/2=0. This intermediate value becomes the
-	 * number of bits to shift the DBAM register to extract the proper CSROW
-	 * field.
-	 */
-	cs_mode = DBAM_DIMM(csrow_nr / 2, dbam);
+	cs_mode = DBAM_DIMM(csrow_nr, dbam);
 
-	nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, (csrow_nr / 2))
-							   << (20 - PAGE_SHIFT);
+	nr_pages   = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr);
+	nr_pages <<= 20 - PAGE_SHIFT;
 
 	edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
-		    csrow_nr, dct,  cs_mode);
+		    csrow_nr_orig, dct,  cs_mode);
 	edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
 
 	return nr_pages;
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index f683919981b0..8f5a56e25bd2 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -227,7 +227,7 @@
 #define			NREC_RDWR(x)		(((x)>>11) & 1)
 #define			NREC_RANK(x)		(((x)>>8) & 0x7)
 #define		NRECMEMB		0xC0
-#define			NREC_CAS(x)		(((x)>>16) & 0xFFFFFF)
+#define			NREC_CAS(x)		(((x)>>16) & 0xFFF)
 #define			NREC_RAS(x)		((x) & 0x7FFF)
 #define		NRECFGLOG		0xC4
 #define		NREEECFBDA		0xC8
@@ -371,7 +371,7 @@ struct i5000_error_info {
 	/* These registers are input ONLY if there was a
 	 * Non-Recoverable Error */
 	u16 nrecmema;		/* Non-Recoverable Mem log A */
-	u16 nrecmemb;		/* Non-Recoverable Mem log B */
+	u32 nrecmemb;		/* Non-Recoverable Mem log B */
 
 };
 
@@ -407,7 +407,7 @@ static void i5000_get_error_info(struct mem_ctl_info *mci,
 				NERR_FAT_FBD, &info->nerr_fat_fbd);
 		pci_read_config_word(pvt->branchmap_werrors,
 				NRECMEMA, &info->nrecmema);
-		pci_read_config_word(pvt->branchmap_werrors,
+		pci_read_config_dword(pvt->branchmap_werrors,
 				NRECMEMB, &info->nrecmemb);
 
 		/* Clear the error bits, by writing them back */
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 37a9ba71da44..cd889edc8516 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -368,7 +368,7 @@ struct i5400_error_info {
 
 	/* These registers are input ONLY if there was a Non-Rec Error */
 	u16 nrecmema;		/* Non-Recoverable Mem log A */
-	u16 nrecmemb;		/* Non-Recoverable Mem log B */
+	u32 nrecmemb;		/* Non-Recoverable Mem log B */
 
 };
 
@@ -458,7 +458,7 @@ static void i5400_get_error_info(struct mem_ctl_info *mci,
 				NERR_FAT_FBD, &info->nerr_fat_fbd);
 		pci_read_config_word(pvt->branchmap_werrors,
 				NRECMEMA, &info->nrecmema);
-		pci_read_config_word(pvt->branchmap_werrors,
+		pci_read_config_dword(pvt->branchmap_werrors,
 				NRECMEMB, &info->nrecmemb);
 
 		/* Clear the error bits, by writing them back */
diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c
index 2733fb5938a4..4260579e6901 100644
--- a/drivers/edac/ie31200_edac.c
+++ b/drivers/edac/ie31200_edac.c
@@ -18,10 +18,12 @@
  * 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller
  * 0c08: Xeon E3-1200 v3 Processor DRAM Controller
  * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers
+ * 5918: Xeon E3-1200 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
  *
  * Based on Intel specification:
  * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf
  * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e3-1200-family-vol-2-datasheet.html
+ * http://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-h-processor-lines-datasheet-vol-2.html
  *
  * According to the above datasheet (p.16):
  * "
@@ -57,6 +59,7 @@
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x1918
+#define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x5918
 
 #define IE31200_DIMMS			4
 #define IE31200_RANKS			8
@@ -376,7 +379,12 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
 	void __iomem *window;
 	struct ie31200_priv *priv;
 	u32 addr_decode, mad_offset;
-	bool skl = (pdev->device == PCI_DEVICE_ID_INTEL_IE31200_HB_8);
+
+	/*
+	 * Kaby Lake seems to work like Skylake. Please re-visit this logic
+	 * when adding new CPU support.
+	 */
+	bool skl = (pdev->device >= PCI_DEVICE_ID_INTEL_IE31200_HB_8);
 
 	edac_dbg(0, "MC:\n");
 
@@ -560,6 +568,9 @@ static const struct pci_device_id ie31200_pci_tbl[] = {
 		PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		IE31200},
 	{
+		PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		IE31200},
+	{
 		0,
 	}            /* 0 terminated list. */
 };
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index ba35b7ea3686..9a2658a256a9 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -161,7 +161,7 @@ static const char * const smca_ls_mce_desc[] = {
 	"Sys Read data error thread 0",
 	"Sys read data error thread 1",
 	"DC tag error type 2",
-	"DC data error type 1 (poison comsumption)",
+	"DC data error type 1 (poison consumption)",
 	"DC data error type 2",
 	"DC data error type 3",
 	"DC tag error type 4",
diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c
index 14b7e7b71eaa..d3650df94fe8 100644
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c
@@ -32,21 +32,21 @@ static void mv64x60_pci_check(struct edac_pci_ctl_info *pci)
 	struct mv64x60_pci_pdata *pdata = pci->pvt_info;
 	u32 cause;
 
-	cause = in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
+	cause = readl(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
 	if (!cause)
 		return;
 
 	printk(KERN_ERR "Error in PCI %d Interface\n", pdata->pci_hose);
 	printk(KERN_ERR "Cause register: 0x%08x\n", cause);
 	printk(KERN_ERR "Address Low: 0x%08x\n",
-	       in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_LO));
+	       readl(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_LO));
 	printk(KERN_ERR "Address High: 0x%08x\n",
-	       in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_HI));
+	       readl(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_HI));
 	printk(KERN_ERR "Attribute: 0x%08x\n",
-	       in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ATTR));
+	       readl(pdata->pci_vbase + MV64X60_PCI_ERROR_ATTR));
 	printk(KERN_ERR "Command: 0x%08x\n",
-	       in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CMD));
-	out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, ~cause);
+	       readl(pdata->pci_vbase + MV64X60_PCI_ERROR_CMD));
+	writel(~cause, pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
 
 	if (cause & MV64X60_PCI_PE_MASK)
 		edac_pci_handle_pe(pci, pci->ctl_name);
@@ -61,7 +61,7 @@ static irqreturn_t mv64x60_pci_isr(int irq, void *dev_id)
 	struct mv64x60_pci_pdata *pdata = pci->pvt_info;
 	u32 val;
 
-	val = in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
+	val = readl(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
 	if (!val)
 		return IRQ_NONE;
 
@@ -93,7 +93,7 @@ static int __init mv64x60_pci_fixup(struct platform_device *pdev)
 	if (!pci_serr)
 		return -ENOMEM;
 
-	out_le32(pci_serr, in_le32(pci_serr) & ~0x1);
+	writel(readl(pci_serr) & ~0x1, pci_serr);
 	iounmap(pci_serr);
 
 	return 0;
@@ -116,7 +116,7 @@ static int mv64x60_pci_err_probe(struct platform_device *pdev)
 	pdata = pci->pvt_info;
 
 	pdata->pci_hose = pdev->id;
-	pdata->name = "mpc85xx_pci_err";
+	pdata->name = "mv64x60_pci_err";
 	platform_set_drvdata(pdev, pci);
 	pci->dev = &pdev->dev;
 	pci->dev_name = dev_name(&pdev->dev);
@@ -161,10 +161,10 @@ static int mv64x60_pci_err_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, 0);
-	out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, 0);
-	out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK,
-		 MV64X60_PCIx_ERR_MASK_VAL);
+	writel(0, pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
+	writel(0, pdata->pci_vbase + MV64X60_PCI_ERROR_MASK);
+	writel(MV64X60_PCIx_ERR_MASK_VAL,
+		  pdata->pci_vbase + MV64X60_PCI_ERROR_MASK);
 
 	if (edac_pci_add_device(pci, pdata->edac_idx) > 0) {
 		edac_dbg(3, "failed edac_pci_add_device()\n");
@@ -233,23 +233,23 @@ static void mv64x60_sram_check(struct edac_device_ctl_info *edac_dev)
 	struct mv64x60_sram_pdata *pdata = edac_dev->pvt_info;
 	u32 cause;
 
-	cause = in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
+	cause = readl(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
 	if (!cause)
 		return;
 
 	printk(KERN_ERR "Error in internal SRAM\n");
 	printk(KERN_ERR "Cause register: 0x%08x\n", cause);
 	printk(KERN_ERR "Address Low: 0x%08x\n",
-	       in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_LO));
+	       readl(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_LO));
 	printk(KERN_ERR "Address High: 0x%08x\n",
-	       in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_HI));
+	       readl(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_HI));
 	printk(KERN_ERR "Data Low: 0x%08x\n",
-	       in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_LO));
+	       readl(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_LO));
 	printk(KERN_ERR "Data High: 0x%08x\n",
-	       in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_HI));
+	       readl(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_HI));
 	printk(KERN_ERR "Parity: 0x%08x\n",
-	       in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_PARITY));
-	out_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE, 0);
+	       readl(pdata->sram_vbase + MV64X60_SRAM_ERR_PARITY));
+	writel(0, pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
 
 	edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
 }
@@ -260,7 +260,7 @@ static irqreturn_t mv64x60_sram_isr(int irq, void *dev_id)
 	struct mv64x60_sram_pdata *pdata = edac_dev->pvt_info;
 	u32 cause;
 
-	cause = in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
+	cause = readl(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
 	if (!cause)
 		return IRQ_NONE;
 
@@ -322,7 +322,7 @@ static int mv64x60_sram_err_probe(struct platform_device *pdev)
 	}
 
 	/* setup SRAM err registers */
-	out_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE, 0);
+	writel(0, pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
 
 	edac_dev->mod_name = EDAC_MOD_STR;
 	edac_dev->ctl_name = pdata->name;
@@ -398,7 +398,7 @@ static void mv64x60_cpu_check(struct edac_device_ctl_info *edac_dev)
 	struct mv64x60_cpu_pdata *pdata = edac_dev->pvt_info;
 	u32 cause;
 
-	cause = in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
+	cause = readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
 	    MV64x60_CPU_CAUSE_MASK;
 	if (!cause)
 		return;
@@ -406,16 +406,16 @@ static void mv64x60_cpu_check(struct edac_device_ctl_info *edac_dev)
 	printk(KERN_ERR "Error on CPU interface\n");
 	printk(KERN_ERR "Cause register: 0x%08x\n", cause);
 	printk(KERN_ERR "Address Low: 0x%08x\n",
-	       in_le32(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_LO));
+	       readl(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_LO));
 	printk(KERN_ERR "Address High: 0x%08x\n",
-	       in_le32(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_HI));
+	       readl(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_HI));
 	printk(KERN_ERR "Data Low: 0x%08x\n",
-	       in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_LO));
+	       readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_LO));
 	printk(KERN_ERR "Data High: 0x%08x\n",
-	       in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_HI));
+	       readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_HI));
 	printk(KERN_ERR "Parity: 0x%08x\n",
-	       in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_PARITY));
-	out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE, 0);
+	       readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_PARITY));
+	writel(0, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE);
 
 	edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
 }
@@ -426,7 +426,7 @@ static irqreturn_t mv64x60_cpu_isr(int irq, void *dev_id)
 	struct mv64x60_cpu_pdata *pdata = edac_dev->pvt_info;
 	u32 cause;
 
-	cause = in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
+	cause = readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
 	    MV64x60_CPU_CAUSE_MASK;
 	if (!cause)
 		return IRQ_NONE;
@@ -515,9 +515,9 @@ static int mv64x60_cpu_err_probe(struct platform_device *pdev)
 	}
 
 	/* setup CPU err registers */
-	out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE, 0);
-	out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK, 0);
-	out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK, 0x000000ff);
+	writel(0, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE);
+	writel(0, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK);
+	writel(0x000000ff, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK);
 
 	edac_dev->mod_name = EDAC_MOD_STR;
 	edac_dev->ctl_name = pdata->name;
@@ -596,13 +596,13 @@ static void mv64x60_mc_check(struct mem_ctl_info *mci)
 	u32 comp_ecc;
 	u32 syndrome;
 
-	reg = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
+	reg = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
 	if (!reg)
 		return;
 
 	err_addr = reg & ~0x3;
-	sdram_ecc = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_RCVD);
-	comp_ecc = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CALC);
+	sdram_ecc = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_RCVD);
+	comp_ecc = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CALC);
 	syndrome = sdram_ecc ^ comp_ecc;
 
 	/* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */
@@ -620,7 +620,7 @@ static void mv64x60_mc_check(struct mem_ctl_info *mci)
 				     mci->ctl_name, "");
 
 	/* clear the error */
-	out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0);
+	writel(0, pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
 }
 
 static irqreturn_t mv64x60_mc_isr(int irq, void *dev_id)
@@ -629,7 +629,7 @@ static irqreturn_t mv64x60_mc_isr(int irq, void *dev_id)
 	struct mv64x60_mc_pdata *pdata = mci->pvt_info;
 	u32 reg;
 
-	reg = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
+	reg = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
 	if (!reg)
 		return IRQ_NONE;
 
@@ -664,7 +664,7 @@ static void mv64x60_init_csrows(struct mem_ctl_info *mci,
 
 	get_total_mem(pdata);
 
-	ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
+	ctl = readl(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
 
 	csrow = mci->csrows[0];
 	dimm = csrow->channels[0]->dimm;
@@ -753,7 +753,7 @@ static int mv64x60_mc_err_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
+	ctl = readl(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
 	if (!(ctl & MV64X60_SDRAM_ECC)) {
 		/* Non-ECC RAM? */
 		printk(KERN_WARNING "%s: No ECC DIMMs discovered\n", __func__);
@@ -779,10 +779,10 @@ static int mv64x60_mc_err_probe(struct platform_device *pdev)
 	mv64x60_init_csrows(mci, pdata);
 
 	/* setup MC registers */
-	out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0);
-	ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL);
+	writel(0, pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
+	ctl = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL);
 	ctl = (ctl & 0xff00ffff) | 0x10000;
-	out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL, ctl);
+	writel(ctl, pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL);
 
 	res = edac_mc_add_mc(mci);
 	if (res) {
@@ -853,10 +853,10 @@ static struct platform_driver * const drivers[] = {
 
 static int __init mv64x60_edac_init(void)
 {
-	int ret = 0;
 
 	printk(KERN_INFO "Marvell MV64x60 EDAC driver " MV64x60_REVISION "\n");
 	printk(KERN_INFO "\t(C) 2006-2007 MontaVista Software\n");
+
 	/* make sure error reporting method is sane */
 	switch (edac_op_state) {
 	case EDAC_OPSTATE_POLL:
diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index 1cad5a9af8d0..8e599490f6de 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c
@@ -131,7 +131,7 @@ static struct mem_ctl_info *pnd2_mci;
 
 #ifdef CONFIG_X86_INTEL_SBI_APL
 #include "linux/platform_data/sbi_apl.h"
-int sbi_send(int port, int off, int op, u32 *data)
+static int sbi_send(int port, int off, int op, u32 *data)
 {
 	struct sbi_apl_message sbi_arg;
 	int ret, read = 0;
@@ -160,7 +160,7 @@ int sbi_send(int port, int off, int op, u32 *data)
 	return ret;
 }
 #else
-int sbi_send(int port, int off, int op, u32 *data)
+static int sbi_send(int port, int off, int op, u32 *data)
 {
 	return -EUNATCH;
 }
@@ -168,14 +168,15 @@ int sbi_send(int port, int off, int op, u32 *data)
 
 static int apl_rd_reg(int port, int off, int op, void *data, size_t sz, char *name)
 {
-	int	ret = 0;
+	int ret = 0;
 
 	edac_dbg(2, "Read %s port=%x off=%x op=%x\n", name, port, off, op);
 	switch (sz) {
 	case 8:
 		ret = sbi_send(port, off + 4, op, (u32 *)(data + 4));
+		/* fall through */
 	case 4:
-		ret = sbi_send(port, off, op, (u32 *)data);
+		ret |= sbi_send(port, off, op, (u32 *)data);
 		pnd2_printk(KERN_DEBUG, "%s=%x%08x ret=%d\n", name,
 					sz == 8 ? *((u32 *)(data + 4)) : 0, *((u32 *)data), ret);
 		break;
@@ -423,16 +424,21 @@ static void dnv_mk_region(char *name, struct region *rp, void *asym)
 
 static int apl_get_registers(void)
 {
+	int ret = -ENODEV;
 	int i;
 
 	if (RD_REG(&asym_2way, b_cr_asym_2way_mem_region_mchbar))
 		return -ENODEV;
 
+	/*
+	 * RD_REGP() will fail for unpopulated or non-existent
+	 * DIMM slots. Return success if we find at least one DIMM.
+	 */
 	for (i = 0; i < APL_NUM_CHANNELS; i++)
-		if (RD_REGP(&drp0[i], d_cr_drp0, apl_dports[i]))
-			return -ENODEV;
+		if (!RD_REGP(&drp0[i], d_cr_drp0, apl_dports[i]))
+			ret = 0;
 
-	return 0;
+	return ret;
 }
 
 static int dnv_get_registers(void)
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index ea21cb651b3c..80d860cb0746 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -35,7 +35,7 @@ static LIST_HEAD(sbridge_edac_list);
 /*
  * Alter this version for the module when modifications are made
  */
-#define SBRIDGE_REVISION    " Ver: 1.1.1 "
+#define SBRIDGE_REVISION    " Ver: 1.1.2 "
 #define EDAC_MOD_STR      "sbridge_edac"
 
 /*
@@ -279,7 +279,7 @@ static const u32 correrrthrsld[] = {
  * sbridge structs
  */
 
-#define NUM_CHANNELS		8	/* 2MC per socket, four chan per MC */
+#define NUM_CHANNELS		4	/* Max channels per MC */
 #define MAX_DIMMS		3	/* Max DIMMS per channel */
 #define KNL_MAX_CHAS		38	/* KNL max num. of Cache Home Agents */
 #define KNL_MAX_CHANNELS	6	/* KNL max num. of PCI channels */
@@ -294,6 +294,12 @@ enum type {
 	KNIGHTS_LANDING,
 };
 
+enum domain {
+	IMC0 = 0,
+	IMC1,
+	SOCK,
+};
+
 struct sbridge_pvt;
 struct sbridge_info {
 	enum type	type;
@@ -324,11 +330,14 @@ struct sbridge_channel {
 struct pci_id_descr {
 	int			dev_id;
 	int			optional;
+	enum domain		dom;
 };
 
 struct pci_id_table {
 	const struct pci_id_descr	*descr;
-	int				n_devs;
+	int				n_devs_per_imc;
+	int				n_devs_per_sock;
+	int				n_imcs_per_sock;
 	enum type			type;
 };
 
@@ -337,7 +346,9 @@ struct sbridge_dev {
 	u8			bus, mc;
 	u8			node_id, source_id;
 	struct pci_dev		**pdev;
+	enum domain		dom;
 	int			n_devs;
+	int			i_devs;
 	struct mem_ctl_info	*mci;
 };
 
@@ -352,11 +363,12 @@ struct knl_pvt {
 };
 
 struct sbridge_pvt {
-	struct pci_dev		*pci_ta, *pci_ddrio, *pci_ras;
+	/* Devices per socket */
+	struct pci_dev		*pci_ddrio;
 	struct pci_dev		*pci_sad0, *pci_sad1;
-	struct pci_dev		*pci_ha0, *pci_ha1;
 	struct pci_dev		*pci_br0, *pci_br1;
-	struct pci_dev		*pci_ha1_ta;
+	/* Devices per memory controller */
+	struct pci_dev		*pci_ha, *pci_ta, *pci_ras;
 	struct pci_dev		*pci_tad[NUM_CHANNELS];
 
 	struct sbridge_dev	*sbridge_dev;
@@ -373,39 +385,42 @@ struct sbridge_pvt {
 	struct knl_pvt knl;
 };
 
-#define PCI_DESCR(device_id, opt)	\
+#define PCI_DESCR(device_id, opt, domain)	\
 	.dev_id = (device_id),		\
-	.optional = opt
+	.optional = opt,	\
+	.dom = domain
 
 static const struct pci_id_descr pci_dev_descr_sbridge[] = {
 		/* Processor Home Agent */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0,   0, IMC0) },
 
 		/* Memory controller */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA,    0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0,  0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1,  0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2,  0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3,  0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1, SOCK) },
 
 		/* System Address Decoder */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0,      0, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1,      0, SOCK) },
 
 		/* Broadcast Registers */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0)		},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR,        0, SOCK) },
 };
 
-#define PCI_ID_TABLE_ENTRY(A, T) {	\
+#define PCI_ID_TABLE_ENTRY(A, N, M, T) {	\
 	.descr = A,			\
-	.n_devs = ARRAY_SIZE(A),	\
+	.n_devs_per_imc = N,	\
+	.n_devs_per_sock = ARRAY_SIZE(A),	\
+	.n_imcs_per_sock = M,	\
 	.type = T			\
 }
 
 static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, SANDY_BRIDGE),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, ARRAY_SIZE(pci_dev_descr_sbridge), 1, SANDY_BRIDGE),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -439,40 +454,39 @@ static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
 
 static const struct pci_id_descr pci_dev_descr_ibridge[] = {
 		/* Processor Home Agent */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0)		},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0,        0, IMC0) },
 
 		/* Memory controller */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0)		},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS, 0)		},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA,     0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS,    0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3,   0, IMC0) },
+
+		/* Optional, mode 2HA */
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1,        1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA,     1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS,    1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0,   1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1,   1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2,   1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3,   1, IMC1) },
+
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1, SOCK) },
 
 		/* System Address Decoder */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_SAD, 0)			},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_SAD,            0, SOCK) },
 
 		/* Broadcast Registers */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR0, 1)			},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR1, 0)			},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR0,            1, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR1,            0, SOCK) },
 
-		/* Optional, mode 2HA */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1)		},
-#if 0
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1)	},
-#endif
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3, 1)	},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1)	},
 };
 
 static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, IVY_BRIDGE),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, 12, 2, IVY_BRIDGE),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -498,9 +512,9 @@ static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0	0x2fa0
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1	0x2f60
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA	0x2fa8
-#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL 0x2f71
+#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM	0x2f71
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA	0x2f68
-#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_THERMAL 0x2f79
+#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM	0x2f79
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0 0x2ffc
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1 0x2ffd
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0 0x2faa
@@ -517,35 +531,33 @@ static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3 0x2fbb
 static const struct pci_id_descr pci_dev_descr_haswell[] = {
 	/* first item must be the HA */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0, 0)		},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1, 0)	},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1, 1)		},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA, 0)		},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3, 1)	},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0, 1)		},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1, 1)		},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2, 1)		},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3, 1)		},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA, 1)		},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_THERMAL, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3, 1)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0,      0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1,      1, IMC1) },
+
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0, 0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1, 0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2, 1, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3, 1, IMC0) },
+
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA,   1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM,   1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0, 1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1, 1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2, 1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3, 1, IMC1) },
+
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0, 0, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1, 0, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0,   1, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1,   1, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2,   1, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3,   1, SOCK) },
 };
 
 static const struct pci_id_table pci_dev_descr_haswell_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, HASWELL),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, 13, 2, HASWELL),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -559,7 +571,7 @@ static const struct pci_id_table pci_dev_descr_haswell_table[] = {
 /* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */
 #define PCI_DEVICE_ID_INTEL_KNL_IMC_MC       0x7840
 /* DRAM channel stuff; bank addrs, dimmmtr, etc.. 2-8-2 - 2-9-4 (6 of these) */
-#define PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL  0x7843
+#define PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN     0x7843
 /* kdrwdbu TAD limits/offsets, MCMTR - 2-10-1, 2-11-1 (2 of these) */
 #define PCI_DEVICE_ID_INTEL_KNL_IMC_TA       0x7844
 /* CHA broadcast registers, dram rules - 1-29-0 (1 of these) */
@@ -579,17 +591,17 @@ static const struct pci_id_table pci_dev_descr_haswell_table[] = {
  */
 
 static const struct pci_id_descr pci_dev_descr_knl[] = {
-	[0]         = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0, 0) },
-	[1]         = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1, 0) },
-	[2 ... 3]   = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_MC, 0)},
-	[4 ... 41]  = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHA, 0) },
-	[42 ... 47] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL, 0) },
-	[48]        = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TA, 0) },
-	[49]        = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM, 0) },
+	[0 ... 1]   = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_MC,    0, IMC0)},
+	[2 ... 7]   = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN,  0, IMC0) },
+	[8]	    = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TA,    0, IMC0) },
+	[9]	    = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM, 0, IMC0) },
+	[10]	    = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0,  0, SOCK) },
+	[11]	    = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1,  0, SOCK) },
+	[12 ... 49] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHA,   0, SOCK) },
 };
 
 static const struct pci_id_table pci_dev_descr_knl_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, KNIGHTS_LANDING),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, ARRAY_SIZE(pci_dev_descr_knl), 1, KNIGHTS_LANDING),
 	{0,}
 };
 
@@ -615,9 +627,9 @@ static const struct pci_id_table pci_dev_descr_knl_table[] = {
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0	0x6fa0
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1	0x6f60
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA	0x6fa8
-#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL 0x6f71
+#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM	0x6f71
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA	0x6f68
-#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_THERMAL 0x6f79
+#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM	0x6f79
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0 0x6ffc
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1 0x6ffd
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0 0x6faa
@@ -632,32 +644,30 @@ static const struct pci_id_table pci_dev_descr_knl_table[] = {
 
 static const struct pci_id_descr pci_dev_descr_broadwell[] = {
 	/* first item must be the HA */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0, 0)		},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1, 0)	},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1, 1)		},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3, 1)	},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0, 1)	},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_THERMAL, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3, 1)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0,      0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1,      1, IMC1) },
+
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0, 0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1, 0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2, 1, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3, 1, IMC0) },
+
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA,   1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM,   1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0, 1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1, 1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2, 1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3, 1, IMC1) },
+
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0, 0, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1, 0, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0,   1, SOCK) },
 };
 
 static const struct pci_id_table pci_dev_descr_broadwell_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, BROADWELL),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, 10, 2, BROADWELL),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -709,7 +719,8 @@ static inline int numcol(u32 mtr)
 	return 1 << cols;
 }
 
-static struct sbridge_dev *get_sbridge_dev(u8 bus, int multi_bus)
+static struct sbridge_dev *get_sbridge_dev(u8 bus, enum domain dom, int multi_bus,
+					   struct sbridge_dev *prev)
 {
 	struct sbridge_dev *sbridge_dev;
 
@@ -722,16 +733,19 @@ static struct sbridge_dev *get_sbridge_dev(u8 bus, int multi_bus)
 				struct sbridge_dev, list);
 	}
 
-	list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
-		if (sbridge_dev->bus == bus)
+	sbridge_dev = list_entry(prev ? prev->list.next
+				      : sbridge_edac_list.next, struct sbridge_dev, list);
+
+	list_for_each_entry_from(sbridge_dev, &sbridge_edac_list, list) {
+		if (sbridge_dev->bus == bus && (dom == SOCK || dom == sbridge_dev->dom))
 			return sbridge_dev;
 	}
 
 	return NULL;
 }
 
-static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
-					   const struct pci_id_table *table)
+static struct sbridge_dev *alloc_sbridge_dev(u8 bus, enum domain dom,
+					     const struct pci_id_table *table)
 {
 	struct sbridge_dev *sbridge_dev;
 
@@ -739,15 +753,17 @@ static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
 	if (!sbridge_dev)
 		return NULL;
 
-	sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs,
-				   GFP_KERNEL);
+	sbridge_dev->pdev = kcalloc(table->n_devs_per_imc,
+				    sizeof(*sbridge_dev->pdev),
+				    GFP_KERNEL);
 	if (!sbridge_dev->pdev) {
 		kfree(sbridge_dev);
 		return NULL;
 	}
 
 	sbridge_dev->bus = bus;
-	sbridge_dev->n_devs = table->n_devs;
+	sbridge_dev->dom = dom;
+	sbridge_dev->n_devs = table->n_devs_per_imc;
 	list_add_tail(&sbridge_dev->list, &sbridge_edac_list);
 
 	return sbridge_dev;
@@ -1044,79 +1060,6 @@ static int haswell_chan_hash(int idx, u64 addr)
 	return idx;
 }
 
-/****************************************************************************
-			Memory check routines
- ****************************************************************************/
-static struct pci_dev *get_pdev_same_bus(u8 bus, u32 id)
-{
-	struct pci_dev *pdev = NULL;
-
-	do {
-		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, id, pdev);
-		if (pdev && pdev->bus->number == bus)
-			break;
-	} while (pdev);
-
-	return pdev;
-}
-
-/**
- * check_if_ecc_is_active() - Checks if ECC is active
- * @bus:	Device bus
- * @type:	Memory controller type
- * returns: 0 in case ECC is active, -ENODEV if it can't be determined or
- *	    disabled
- */
-static int check_if_ecc_is_active(const u8 bus, enum type type)
-{
-	struct pci_dev *pdev = NULL;
-	u32 mcmtr, id;
-
-	switch (type) {
-	case IVY_BRIDGE:
-		id = PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA;
-		break;
-	case HASWELL:
-		id = PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA;
-		break;
-	case SANDY_BRIDGE:
-		id = PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA;
-		break;
-	case BROADWELL:
-		id = PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA;
-		break;
-	case KNIGHTS_LANDING:
-		/*
-		 * KNL doesn't group things by bus the same way
-		 * SB/IB/Haswell does.
-		 */
-		id = PCI_DEVICE_ID_INTEL_KNL_IMC_TA;
-		break;
-	default:
-		return -ENODEV;
-	}
-
-	if (type != KNIGHTS_LANDING)
-		pdev = get_pdev_same_bus(bus, id);
-	else
-		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, id, 0);
-
-	if (!pdev) {
-		sbridge_printk(KERN_ERR, "Couldn't find PCI device "
-					"%04x:%04x! on bus %02d\n",
-					PCI_VENDOR_ID_INTEL, id, bus);
-		return -ENODEV;
-	}
-
-	pci_read_config_dword(pdev,
-			type == KNIGHTS_LANDING ? KNL_MCMTR : MCMTR, &mcmtr);
-	if (!IS_ECC_ENABLED(mcmtr)) {
-		sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
-		return -ENODEV;
-	}
-	return 0;
-}
-
 /* Low bits of TAD limit, and some metadata. */
 static const u32 knl_tad_dram_limit_lo[] = {
 	0x400, 0x500, 0x600, 0x700,
@@ -1587,25 +1530,13 @@ static int knl_get_dimm_capacity(struct sbridge_pvt *pvt, u64 *mc_sizes)
 	return 0;
 }
 
-static int get_dimm_config(struct mem_ctl_info *mci)
+static void get_source_id(struct mem_ctl_info *mci)
 {
 	struct sbridge_pvt *pvt = mci->pvt_info;
-	struct dimm_info *dimm;
-	unsigned i, j, banks, ranks, rows, cols, npages;
-	u64 size;
 	u32 reg;
-	enum edac_type mode;
-	enum mem_type mtype;
-	int channels = pvt->info.type == KNIGHTS_LANDING ?
-		KNL_MAX_CHANNELS : NUM_CHANNELS;
-	u64 knl_mc_sizes[KNL_MAX_CHANNELS];
 
-	if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) {
-		pci_read_config_dword(pvt->pci_ha0, HASWELL_HASYSDEFEATURE2, &reg);
-		pvt->is_chan_hash = GET_BITFIELD(reg, 21, 21);
-	}
 	if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL ||
-			pvt->info.type == KNIGHTS_LANDING)
+	    pvt->info.type == KNIGHTS_LANDING)
 		pci_read_config_dword(pvt->pci_sad1, SAD_TARGET, &reg);
 	else
 		pci_read_config_dword(pvt->pci_br0, SAD_TARGET, &reg);
@@ -1614,50 +1545,19 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 		pvt->sbridge_dev->source_id = SOURCE_ID_KNL(reg);
 	else
 		pvt->sbridge_dev->source_id = SOURCE_ID(reg);
+}
 
-	pvt->sbridge_dev->node_id = pvt->info.get_node_id(pvt);
-	edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
-		 pvt->sbridge_dev->mc,
-		 pvt->sbridge_dev->node_id,
-		 pvt->sbridge_dev->source_id);
-
-	/* KNL doesn't support mirroring or lockstep,
-	 * and is always closed page
-	 */
-	if (pvt->info.type == KNIGHTS_LANDING) {
-		mode = EDAC_S4ECD4ED;
-		pvt->is_mirrored = false;
-
-		if (knl_get_dimm_capacity(pvt, knl_mc_sizes) != 0)
-			return -1;
-	} else {
-		pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
-		if (IS_MIRROR_ENABLED(reg)) {
-			edac_dbg(0, "Memory mirror is enabled\n");
-			pvt->is_mirrored = true;
-		} else {
-			edac_dbg(0, "Memory mirror is disabled\n");
-			pvt->is_mirrored = false;
-		}
-
-		pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
-		if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
-			edac_dbg(0, "Lockstep is enabled\n");
-			mode = EDAC_S8ECD8ED;
-			pvt->is_lockstep = true;
-		} else {
-			edac_dbg(0, "Lockstep is disabled\n");
-			mode = EDAC_S4ECD4ED;
-			pvt->is_lockstep = false;
-		}
-		if (IS_CLOSE_PG(pvt->info.mcmtr)) {
-			edac_dbg(0, "address map is on closed page mode\n");
-			pvt->is_close_pg = true;
-		} else {
-			edac_dbg(0, "address map is on open page mode\n");
-			pvt->is_close_pg = false;
-		}
-	}
+static int __populate_dimms(struct mem_ctl_info *mci,
+			    u64 knl_mc_sizes[KNL_MAX_CHANNELS],
+			    enum edac_type mode)
+{
+	struct sbridge_pvt *pvt = mci->pvt_info;
+	int channels = pvt->info.type == KNIGHTS_LANDING ? KNL_MAX_CHANNELS
+							 : NUM_CHANNELS;
+	unsigned int i, j, banks, ranks, rows, cols, npages;
+	struct dimm_info *dimm;
+	enum mem_type mtype;
+	u64 size;
 
 	mtype = pvt->info.get_memory_type(pvt);
 	if (mtype == MEM_RDDR3 || mtype == MEM_RDDR4)
@@ -1688,8 +1588,7 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 		}
 
 		for (j = 0; j < max_dimms_per_channel; j++) {
-			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
-				       i, j, 0);
+			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, j, 0);
 			if (pvt->info.type == KNIGHTS_LANDING) {
 				pci_read_config_dword(pvt->knl.pci_channel[i],
 					knl_mtr_reg, &mtr);
@@ -1699,6 +1598,12 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 			}
 			edac_dbg(4, "Channel #%d  MTR%d = %x\n", i, j, mtr);
 			if (IS_DIMM_PRESENT(mtr)) {
+				if (!IS_ECC_ENABLED(pvt->info.mcmtr)) {
+					sbridge_printk(KERN_ERR, "CPU SrcID #%d, Ha #%d, Channel #%d has DIMMs, but ECC is disabled\n",
+						       pvt->sbridge_dev->source_id,
+						       pvt->sbridge_dev->dom, i);
+					return -ENODEV;
+				}
 				pvt->channel[i].dimms++;
 
 				ranks = numrank(pvt->info.type, mtr);
@@ -1717,7 +1622,7 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 				npages = MiB_TO_PAGES(size);
 
 				edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
-					 pvt->sbridge_dev->mc, i/4, i%4, j,
+					 pvt->sbridge_dev->mc, pvt->sbridge_dev->dom, i, j,
 					 size, npages,
 					 banks, ranks, rows, cols);
 
@@ -1727,8 +1632,8 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 				dimm->mtype = mtype;
 				dimm->edac_mode = mode;
 				snprintf(dimm->label, sizeof(dimm->label),
-					 "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u",
-					 pvt->sbridge_dev->source_id, i/4, i%4, j);
+						 "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u",
+						 pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom, i, j);
 			}
 		}
 	}
@@ -1736,6 +1641,65 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 	return 0;
 }
 
+static int get_dimm_config(struct mem_ctl_info *mci)
+{
+	struct sbridge_pvt *pvt = mci->pvt_info;
+	u64 knl_mc_sizes[KNL_MAX_CHANNELS];
+	enum edac_type mode;
+	u32 reg;
+
+	if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) {
+		pci_read_config_dword(pvt->pci_ha, HASWELL_HASYSDEFEATURE2, &reg);
+		pvt->is_chan_hash = GET_BITFIELD(reg, 21, 21);
+	}
+	pvt->sbridge_dev->node_id = pvt->info.get_node_id(pvt);
+	edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
+		 pvt->sbridge_dev->mc,
+		 pvt->sbridge_dev->node_id,
+		 pvt->sbridge_dev->source_id);
+
+	/* KNL doesn't support mirroring or lockstep,
+	 * and is always closed page
+	 */
+	if (pvt->info.type == KNIGHTS_LANDING) {
+		mode = EDAC_S4ECD4ED;
+		pvt->is_mirrored = false;
+
+		if (knl_get_dimm_capacity(pvt, knl_mc_sizes) != 0)
+			return -1;
+		pci_read_config_dword(pvt->pci_ta, KNL_MCMTR, &pvt->info.mcmtr);
+	} else {
+		pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
+		if (IS_MIRROR_ENABLED(reg)) {
+			edac_dbg(0, "Memory mirror is enabled\n");
+			pvt->is_mirrored = true;
+		} else {
+			edac_dbg(0, "Memory mirror is disabled\n");
+			pvt->is_mirrored = false;
+		}
+
+		pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
+		if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
+			edac_dbg(0, "Lockstep is enabled\n");
+			mode = EDAC_S8ECD8ED;
+			pvt->is_lockstep = true;
+		} else {
+			edac_dbg(0, "Lockstep is disabled\n");
+			mode = EDAC_S4ECD4ED;
+			pvt->is_lockstep = false;
+		}
+		if (IS_CLOSE_PG(pvt->info.mcmtr)) {
+			edac_dbg(0, "address map is on closed page mode\n");
+			pvt->is_close_pg = true;
+		} else {
+			edac_dbg(0, "address map is on open page mode\n");
+			pvt->is_close_pg = false;
+		}
+	}
+
+	return __populate_dimms(mci, knl_mc_sizes, mode);
+}
+
 static void get_memory_layout(const struct mem_ctl_info *mci)
 {
 	struct sbridge_pvt *pvt = mci->pvt_info;
@@ -1816,8 +1780,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
 	 */
 	prv = 0;
 	for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
-		pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
-				      &reg);
+		pci_read_config_dword(pvt->pci_ha, tad_dram_rule[n_tads], &reg);
 		limit = TAD_LIMIT(reg);
 		if (limit <= prv)
 			break;
@@ -1899,12 +1862,12 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
 	}
 }
 
-static struct mem_ctl_info *get_mci_for_node_id(u8 node_id)
+static struct mem_ctl_info *get_mci_for_node_id(u8 node_id, u8 ha)
 {
 	struct sbridge_dev *sbridge_dev;
 
 	list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
-		if (sbridge_dev->node_id == node_id)
+		if (sbridge_dev->node_id == node_id && sbridge_dev->dom == ha)
 			return sbridge_dev->mci;
 	}
 	return NULL;
@@ -1925,7 +1888,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	int			interleave_mode, shiftup = 0;
 	unsigned		sad_interleave[pvt->info.max_interleave];
 	u32			reg, dram_rule;
-	u8			ch_way, sck_way, pkg, sad_ha = 0, ch_add = 0;
+	u8			ch_way, sck_way, pkg, sad_ha = 0;
 	u32			tad_offset;
 	u32			rir_way;
 	u32			mb, gb;
@@ -2038,13 +2001,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 		pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx);
 		*socket = sad_pkg_socket(pkg);
 		sad_ha = sad_pkg_ha(pkg);
-		if (sad_ha)
-			ch_add = 4;
 
 		if (a7mode) {
 			/* MCChanShiftUpEnable */
-			pci_read_config_dword(pvt->pci_ha0,
-					      HASWELL_HASYSDEFEATURE2, &reg);
+			pci_read_config_dword(pvt->pci_ha, HASWELL_HASYSDEFEATURE2, &reg);
 			shiftup = GET_BITFIELD(reg, 22, 22);
 		}
 
@@ -2056,8 +2016,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 		pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx);
 		*socket = sad_pkg_socket(pkg);
 		sad_ha = sad_pkg_ha(pkg);
-		if (sad_ha)
-			ch_add = 4;
 		edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %d\n",
 			 idx, *socket, sad_ha);
 	}
@@ -2068,7 +2026,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	 * Move to the proper node structure, in order to access the
 	 * right PCI registers
 	 */
-	new_mci = get_mci_for_node_id(*socket);
+	new_mci = get_mci_for_node_id(*socket, sad_ha);
 	if (!new_mci) {
 		sprintf(msg, "Struct for socket #%u wasn't initialized",
 			*socket);
@@ -2081,14 +2039,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	 * Step 2) Get memory channel
 	 */
 	prv = 0;
-	if (pvt->info.type == SANDY_BRIDGE)
-		pci_ha = pvt->pci_ha0;
-	else {
-		if (sad_ha)
-			pci_ha = pvt->pci_ha1;
-		else
-			pci_ha = pvt->pci_ha0;
-	}
+	pci_ha = pvt->pci_ha;
 	for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
 		pci_read_config_dword(pci_ha, tad_dram_rule[n_tads], &reg);
 		limit = TAD_LIMIT(reg);
@@ -2139,9 +2090,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	}
 	*channel_mask = 1 << base_ch;
 
-	pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
-				tad_ch_nilv_offset[n_tads],
-				&tad_offset);
+	pci_read_config_dword(pvt->pci_tad[base_ch], tad_ch_nilv_offset[n_tads], &tad_offset);
 
 	if (pvt->is_mirrored) {
 		*channel_mask |= 1 << ((base_ch + 2) % 4);
@@ -2192,9 +2141,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	 * Step 3) Decode rank
 	 */
 	for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) {
-		pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
-				      rir_way_limit[n_rir],
-				      &reg);
+		pci_read_config_dword(pvt->pci_tad[base_ch], rir_way_limit[n_rir], &reg);
 
 		if (!IS_RIR_VALID(reg))
 			continue;
@@ -2222,9 +2169,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 		idx = (ch_addr >> 13);	/* FIXME: Datasheet says to shift by 15 */
 	idx %= 1 << rir_way;
 
-	pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
-			      rir_offset[n_rir][idx],
-			      &reg);
+	pci_read_config_dword(pvt->pci_tad[base_ch], rir_offset[n_rir][idx], &reg);
 	*rank = RIR_RNK_TGT(pvt->info.type, reg);
 
 	edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
@@ -2277,10 +2222,11 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
 				 const unsigned devno,
 				 const int multi_bus)
 {
-	struct sbridge_dev *sbridge_dev;
+	struct sbridge_dev *sbridge_dev = NULL;
 	const struct pci_id_descr *dev_descr = &table->descr[devno];
 	struct pci_dev *pdev = NULL;
 	u8 bus = 0;
+	int i = 0;
 
 	sbridge_printk(KERN_DEBUG,
 		"Seeking for: PCI ID %04x:%04x\n",
@@ -2311,9 +2257,14 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
 	}
 	bus = pdev->bus->number;
 
-	sbridge_dev = get_sbridge_dev(bus, multi_bus);
+next_imc:
+	sbridge_dev = get_sbridge_dev(bus, dev_descr->dom, multi_bus, sbridge_dev);
 	if (!sbridge_dev) {
-		sbridge_dev = alloc_sbridge_dev(bus, table);
+
+		if (dev_descr->dom == SOCK)
+			goto out_imc;
+
+		sbridge_dev = alloc_sbridge_dev(bus, dev_descr->dom, table);
 		if (!sbridge_dev) {
 			pci_dev_put(pdev);
 			return -ENOMEM;
@@ -2321,7 +2272,7 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
 		(*num_mc)++;
 	}
 
-	if (sbridge_dev->pdev[devno]) {
+	if (sbridge_dev->pdev[sbridge_dev->i_devs]) {
 		sbridge_printk(KERN_ERR,
 			"Duplicated device for %04x:%04x\n",
 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
@@ -2329,8 +2280,16 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
 		return -ENODEV;
 	}
 
-	sbridge_dev->pdev[devno] = pdev;
+	sbridge_dev->pdev[sbridge_dev->i_devs++] = pdev;
+
+	/* pdev belongs to more than one IMC, do extra gets */
+	if (++i > 1)
+		pci_dev_get(pdev);
 
+	if (dev_descr->dom == SOCK && i < table->n_imcs_per_sock)
+		goto next_imc;
+
+out_imc:
 	/* Be sure that the device is enabled */
 	if (unlikely(pci_enable_device(pdev) < 0)) {
 		sbridge_printk(KERN_ERR,
@@ -2374,7 +2333,7 @@ static int sbridge_get_all_devices(u8 *num_mc,
 	if (table->type == KNIGHTS_LANDING)
 		allow_dups = multi_bus = 1;
 	while (table && table->descr) {
-		for (i = 0; i < table->n_devs; i++) {
+		for (i = 0; i < table->n_devs_per_sock; i++) {
 			if (!allow_dups || i == 0 ||
 					table->descr[i].dev_id !=
 						table->descr[i-1].dev_id) {
@@ -2385,7 +2344,7 @@ static int sbridge_get_all_devices(u8 *num_mc,
 							   table, i, multi_bus);
 				if (rc < 0) {
 					if (i == 0) {
-						i = table->n_devs;
+						i = table->n_devs_per_sock;
 						break;
 					}
 					sbridge_put_all_devices();
@@ -2399,6 +2358,13 @@ static int sbridge_get_all_devices(u8 *num_mc,
 	return 0;
 }
 
+/*
+ * Device IDs for {SBRIDGE,IBRIDGE,HASWELL,BROADWELL}_IMC_HA0_TAD0 are in
+ * the format: XXXa. So we can convert from a device to the corresponding
+ * channel like this
+ */
+#define TAD_DEV_TO_CHAN(dev) (((dev) & 0xf) - 0xa)
+
 static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
 				 struct sbridge_dev *sbridge_dev)
 {
@@ -2423,7 +2389,7 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
 			pvt->pci_br0 = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0:
-			pvt->pci_ha0 = pdev;
+			pvt->pci_ha = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA:
 			pvt->pci_ta = pdev;
@@ -2436,7 +2402,7 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
 		case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2:
 		case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3:
 		{
-			int id = pdev->device - PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0;
+			int id = TAD_DEV_TO_CHAN(pdev->device);
 			pvt->pci_tad[id] = pdev;
 			saw_chan_mask |= 1 << id;
 		}
@@ -2455,7 +2421,7 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
 	}
 
 	/* Check if everything were registered */
-	if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 ||
+	if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha ||
 	    !pvt->pci_ras || !pvt->pci_ta)
 		goto enodev;
 
@@ -2488,19 +2454,26 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
 
 		switch (pdev->device) {
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0:
-			pvt->pci_ha0 = pdev;
+		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1:
+			pvt->pci_ha = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
+		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA:
 			pvt->pci_ta = pdev;
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS:
+		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS:
 			pvt->pci_ras = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0:
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1:
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2:
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3:
+		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0:
+		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1:
+		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2:
+		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3:
 		{
-			int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0;
+			int id = TAD_DEV_TO_CHAN(pdev->device);
 			pvt->pci_tad[id] = pdev;
 			saw_chan_mask |= 1 << id;
 		}
@@ -2520,19 +2493,6 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_BR1:
 			pvt->pci_br1 = pdev;
 			break;
-		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1:
-			pvt->pci_ha1 = pdev;
-			break;
-		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0:
-		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1:
-		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2:
-		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3:
-		{
-			int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 + 4;
-			pvt->pci_tad[id] = pdev;
-			saw_chan_mask |= 1 << id;
-		}
-			break;
 		default:
 			goto error;
 		}
@@ -2544,13 +2504,12 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
 	}
 
 	/* Check if everything were registered */
-	if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_br0 ||
+	if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_br0 ||
 	    !pvt->pci_br1 || !pvt->pci_ras || !pvt->pci_ta)
 		goto enodev;
 
-	if (saw_chan_mask != 0x0f && /* -EN */
-	    saw_chan_mask != 0x33 && /* -EP */
-	    saw_chan_mask != 0xff)   /* -EX */
+	if (saw_chan_mask != 0x0f && /* -EN/-EX */
+	    saw_chan_mask != 0x03)   /* -EP */
 		goto enodev;
 	return 0;
 
@@ -2593,32 +2552,27 @@ static int haswell_mci_bind_devs(struct mem_ctl_info *mci,
 			pvt->pci_sad1 = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0:
-			pvt->pci_ha0 = pdev;
+		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1:
+			pvt->pci_ha = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA:
+		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA:
 			pvt->pci_ta = pdev;
 			break;
-		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL:
+		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM:
+		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM:
 			pvt->pci_ras = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0:
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1:
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2:
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3:
-		{
-			int id = pdev->device - PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0;
-
-			pvt->pci_tad[id] = pdev;
-			saw_chan_mask |= 1 << id;
-		}
-			break;
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0:
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1:
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2:
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3:
 		{
-			int id = pdev->device - PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0 + 4;
-
+			int id = TAD_DEV_TO_CHAN(pdev->device);
 			pvt->pci_tad[id] = pdev;
 			saw_chan_mask |= 1 << id;
 		}
@@ -2630,12 +2584,6 @@ static int haswell_mci_bind_devs(struct mem_ctl_info *mci,
 			if (!pvt->pci_ddrio)
 				pvt->pci_ddrio = pdev;
 			break;
-		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1:
-			pvt->pci_ha1 = pdev;
-			break;
-		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA:
-			pvt->pci_ha1_ta = pdev;
-			break;
 		default:
 			break;
 		}
@@ -2647,13 +2595,12 @@ static int haswell_mci_bind_devs(struct mem_ctl_info *mci,
 	}
 
 	/* Check if everything were registered */
-	if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_sad1 ||
+	if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_sad1 ||
 	    !pvt->pci_ras  || !pvt->pci_ta || !pvt->info.pci_vtd)
 		goto enodev;
 
-	if (saw_chan_mask != 0x0f && /* -EN */
-	    saw_chan_mask != 0x33 && /* -EP */
-	    saw_chan_mask != 0xff)   /* -EX */
+	if (saw_chan_mask != 0x0f && /* -EN/-EX */
+	    saw_chan_mask != 0x03)   /* -EP */
 		goto enodev;
 	return 0;
 
@@ -2690,30 +2637,27 @@ static int broadwell_mci_bind_devs(struct mem_ctl_info *mci,
 			pvt->pci_sad1 = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0:
-			pvt->pci_ha0 = pdev;
+		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1:
+			pvt->pci_ha = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA:
+		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA:
 			pvt->pci_ta = pdev;
 			break;
-		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL:
+		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM:
+		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM:
 			pvt->pci_ras = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0:
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1:
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2:
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3:
-		{
-			int id = pdev->device - PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0;
-			pvt->pci_tad[id] = pdev;
-			saw_chan_mask |= 1 << id;
-		}
-			break;
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0:
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1:
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2:
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3:
 		{
-			int id = pdev->device - PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0 + 4;
+			int id = TAD_DEV_TO_CHAN(pdev->device);
 			pvt->pci_tad[id] = pdev;
 			saw_chan_mask |= 1 << id;
 		}
@@ -2721,12 +2665,6 @@ static int broadwell_mci_bind_devs(struct mem_ctl_info *mci,
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0:
 			pvt->pci_ddrio = pdev;
 			break;
-		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1:
-			pvt->pci_ha1 = pdev;
-			break;
-		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA:
-			pvt->pci_ha1_ta = pdev;
-			break;
 		default:
 			break;
 		}
@@ -2738,13 +2676,12 @@ static int broadwell_mci_bind_devs(struct mem_ctl_info *mci,
 	}
 
 	/* Check if everything were registered */
-	if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_sad1 ||
+	if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_sad1 ||
 	    !pvt->pci_ras  || !pvt->pci_ta || !pvt->info.pci_vtd)
 		goto enodev;
 
-	if (saw_chan_mask != 0x0f && /* -EN */
-	    saw_chan_mask != 0x33 && /* -EP */
-	    saw_chan_mask != 0xff)   /* -EX */
+	if (saw_chan_mask != 0x0f && /* -EN/-EX */
+	    saw_chan_mask != 0x03)   /* -EP */
 		goto enodev;
 	return 0;
 
@@ -2812,7 +2749,7 @@ static int knl_mci_bind_devs(struct mem_ctl_info *mci,
 			pvt->knl.pci_cha[devidx] = pdev;
 			break;
 
-		case PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL:
+		case PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN:
 			devidx = -1;
 
 			/*
@@ -3006,7 +2943,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
 
 	if (rc < 0)
 		goto err_parsing;
-	new_mci = get_mci_for_node_id(socket);
+	new_mci = get_mci_for_node_id(socket, ha);
 	if (!new_mci) {
 		strcpy(msg, "Error: socket got corrupted!");
 		goto err_parsing;
@@ -3053,7 +2990,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
 	/* Call the helper to output message */
 	edac_mc_handle_error(tp_event, mci, core_err_cnt,
 			     m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
-			     4*ha+channel, dimm, -1,
+			     channel, dimm, -1,
 			     optype, msg);
 	return;
 err_parsing:
@@ -3078,7 +3015,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
 		return NOTIFY_DONE;
 
-	mci = get_mci_for_node_id(mce->socketid);
+	mci = get_mci_for_node_id(mce->socketid, IMC0);
 	if (!mci)
 		return NOTIFY_DONE;
 	pvt = mci->pvt_info;
@@ -3159,11 +3096,6 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 	struct pci_dev *pdev = sbridge_dev->pdev[0];
 	int rc;
 
-	/* Check the number of active and not disabled channels */
-	rc = check_if_ecc_is_active(sbridge_dev->bus, type);
-	if (unlikely(rc < 0))
-		return rc;
-
 	/* allocate a new MC control structure */
 	layers[0].type = EDAC_MC_LAYER_CHANNEL;
 	layers[0].size = type == KNIGHTS_LANDING ?
@@ -3192,7 +3124,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 		MEM_FLAG_DDR4 : MEM_FLAG_DDR3;
 	mci->edac_ctl_cap = EDAC_FLAG_NONE;
 	mci->edac_cap = EDAC_FLAG_NONE;
-	mci->mod_name = "sbridge_edac.c";
+	mci->mod_name = "sb_edac.c";
 	mci->mod_ver = SBRIDGE_REVISION;
 	mci->dev_name = pci_name(pdev);
 	mci->ctl_page_to_phys = NULL;
@@ -3215,12 +3147,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 		pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
 		pvt->info.interleave_pkg = ibridge_interleave_pkg;
 		pvt->info.get_width = ibridge_get_width;
-		mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge Socket#%d", mci->mc_idx);
 
 		/* Store pci devices at mci for faster access */
 		rc = ibridge_mci_bind_devs(mci, sbridge_dev);
 		if (unlikely(rc < 0))
 			goto fail0;
+		get_source_id(mci);
+		mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge SrcID#%d_Ha#%d",
+			pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
 		break;
 	case SANDY_BRIDGE:
 		pvt->info.rankcfgr = SB_RANK_CFG_A;
@@ -3238,12 +3172,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 		pvt->info.max_interleave = ARRAY_SIZE(sbridge_interleave_list);
 		pvt->info.interleave_pkg = sbridge_interleave_pkg;
 		pvt->info.get_width = sbridge_get_width;
-		mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx);
 
 		/* Store pci devices at mci for faster access */
 		rc = sbridge_mci_bind_devs(mci, sbridge_dev);
 		if (unlikely(rc < 0))
 			goto fail0;
+		get_source_id(mci);
+		mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge SrcID#%d_Ha#%d",
+			pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
 		break;
 	case HASWELL:
 		/* rankcfgr isn't used */
@@ -3261,12 +3197,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 		pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
 		pvt->info.interleave_pkg = ibridge_interleave_pkg;
 		pvt->info.get_width = ibridge_get_width;
-		mci->ctl_name = kasprintf(GFP_KERNEL, "Haswell Socket#%d", mci->mc_idx);
 
 		/* Store pci devices at mci for faster access */
 		rc = haswell_mci_bind_devs(mci, sbridge_dev);
 		if (unlikely(rc < 0))
 			goto fail0;
+		get_source_id(mci);
+		mci->ctl_name = kasprintf(GFP_KERNEL, "Haswell SrcID#%d_Ha#%d",
+			pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
 		break;
 	case BROADWELL:
 		/* rankcfgr isn't used */
@@ -3284,12 +3222,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 		pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
 		pvt->info.interleave_pkg = ibridge_interleave_pkg;
 		pvt->info.get_width = broadwell_get_width;
-		mci->ctl_name = kasprintf(GFP_KERNEL, "Broadwell Socket#%d", mci->mc_idx);
 
 		/* Store pci devices at mci for faster access */
 		rc = broadwell_mci_bind_devs(mci, sbridge_dev);
 		if (unlikely(rc < 0))
 			goto fail0;
+		get_source_id(mci);
+		mci->ctl_name = kasprintf(GFP_KERNEL, "Broadwell SrcID#%d_Ha#%d",
+			pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
 		break;
 	case KNIGHTS_LANDING:
 		/* pvt->info.rankcfgr == ??? */
@@ -3307,17 +3247,22 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 		pvt->info.max_interleave = ARRAY_SIZE(knl_interleave_list);
 		pvt->info.interleave_pkg = ibridge_interleave_pkg;
 		pvt->info.get_width = knl_get_width;
-		mci->ctl_name = kasprintf(GFP_KERNEL,
-			"Knights Landing Socket#%d", mci->mc_idx);
 
 		rc = knl_mci_bind_devs(mci, sbridge_dev);
 		if (unlikely(rc < 0))
 			goto fail0;
+		get_source_id(mci);
+		mci->ctl_name = kasprintf(GFP_KERNEL, "Knights Landing SrcID#%d_Ha#%d",
+			pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
 		break;
 	}
 
 	/* Get dimm basic config and the memory layout */
-	get_dimm_config(mci);
+	rc = get_dimm_config(mci);
+	if (rc < 0) {
+		edac_dbg(0, "MC: failed to get_dimm_config()\n");
+		goto fail;
+	}
 	get_memory_layout(mci);
 
 	/* record ptr to the generic device */
@@ -3327,13 +3272,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 	if (unlikely(edac_mc_add_mc(mci))) {
 		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
 		rc = -EINVAL;
-		goto fail0;
+		goto fail;
 	}
 
 	return 0;
 
-fail0:
+fail:
 	kfree(mci->ctl_name);
+fail0:
 	edac_mc_free(mci);
 	sbridge_dev->mci = NULL;
 	return rc;
diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
index 86d585cb6d32..2d352b40ae1c 100644
--- a/drivers/edac/thunderx_edac.c
+++ b/drivers/edac/thunderx_edac.c
@@ -2080,7 +2080,7 @@ static int thunderx_l2c_probe(struct pci_dev *pdev,
 	if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
 		l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
 
-		thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
+		ret = thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
 					      l2c, dfs_entries);
 
 		if (ret != dfs_entries) {
diff --git a/drivers/firmware/dmi-id.c b/drivers/firmware/dmi-id.c
index 44c01390d035..951b6c79f166 100644
--- a/drivers/firmware/dmi-id.c
+++ b/drivers/firmware/dmi-id.c
@@ -47,6 +47,7 @@ DEFINE_DMI_ATTR_WITH_SHOW(product_name,		0444, DMI_PRODUCT_NAME);
 DEFINE_DMI_ATTR_WITH_SHOW(product_version,	0444, DMI_PRODUCT_VERSION);
 DEFINE_DMI_ATTR_WITH_SHOW(product_serial,	0400, DMI_PRODUCT_SERIAL);
 DEFINE_DMI_ATTR_WITH_SHOW(product_uuid,		0400, DMI_PRODUCT_UUID);
+DEFINE_DMI_ATTR_WITH_SHOW(product_family,	0444, DMI_PRODUCT_FAMILY);
 DEFINE_DMI_ATTR_WITH_SHOW(board_vendor,		0444, DMI_BOARD_VENDOR);
 DEFINE_DMI_ATTR_WITH_SHOW(board_name,		0444, DMI_BOARD_NAME);
 DEFINE_DMI_ATTR_WITH_SHOW(board_version,	0444, DMI_BOARD_VERSION);
@@ -191,6 +192,7 @@ static void __init dmi_id_init_attr_table(void)
 	ADD_DMI_ATTR(product_version,   DMI_PRODUCT_VERSION);
 	ADD_DMI_ATTR(product_serial,    DMI_PRODUCT_SERIAL);
 	ADD_DMI_ATTR(product_uuid,      DMI_PRODUCT_UUID);
+	ADD_DMI_ATTR(product_family,    DMI_PRODUCT_FAMILY);
 	ADD_DMI_ATTR(board_vendor,      DMI_BOARD_VENDOR);
 	ADD_DMI_ATTR(board_name,        DMI_BOARD_NAME);
 	ADD_DMI_ATTR(board_version,     DMI_BOARD_VERSION);
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 54be60ead08f..783041964439 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -144,7 +144,7 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *,
 
 	buf = dmi_early_remap(dmi_base, orig_dmi_len);
 	if (buf == NULL)
-		return -1;
+		return -ENOMEM;
 
 	dmi_decode_table(buf, decode, NULL);
 
@@ -178,7 +178,7 @@ static void __init dmi_save_ident(const struct dmi_header *dm, int slot,
 	const char *d = (const char *) dm;
 	const char *p;
 
-	if (dmi_ident[slot])
+	if (dmi_ident[slot] || dm->length <= string)
 		return;
 
 	p = dmi_string(dm, d[string]);
@@ -191,13 +191,14 @@ static void __init dmi_save_ident(const struct dmi_header *dm, int slot,
 static void __init dmi_save_uuid(const struct dmi_header *dm, int slot,
 		int index)
 {
-	const u8 *d = (u8 *) dm + index;
+	const u8 *d;
 	char *s;
 	int is_ff = 1, is_00 = 1, i;
 
-	if (dmi_ident[slot])
+	if (dmi_ident[slot] || dm->length <= index + 16)
 		return;
 
+	d = (u8 *) dm + index;
 	for (i = 0; i < 16 && (is_ff || is_00); i++) {
 		if (d[i] != 0x00)
 			is_00 = 0;
@@ -228,16 +229,17 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot,
 static void __init dmi_save_type(const struct dmi_header *dm, int slot,
 		int index)
 {
-	const u8 *d = (u8 *) dm + index;
+	const u8 *d;
 	char *s;
 
-	if (dmi_ident[slot])
+	if (dmi_ident[slot] || dm->length <= index)
 		return;
 
 	s = dmi_alloc(4);
 	if (!s)
 		return;
 
+	d = (u8 *) dm + index;
 	sprintf(s, "%u", *d & 0x7F);
 	dmi_ident[slot] = s;
 }
@@ -278,9 +280,13 @@ static void __init dmi_save_devices(const struct dmi_header *dm)
 
 static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm)
 {
-	int i, count = *(u8 *)(dm + 1);
+	int i, count;
 	struct dmi_device *dev;
 
+	if (dm->length < 0x05)
+		return;
+
+	count = *(u8 *)(dm + 1);
 	for (i = 1; i <= count; i++) {
 		const char *devname = dmi_string(dm, i);
 
@@ -353,6 +359,9 @@ static void __init dmi_save_extended_devices(const struct dmi_header *dm)
 	const char *name;
 	const u8 *d = (u8 *)dm;
 
+	if (dm->length < 0x0B)
+		return;
+
 	/* Skip disabled device */
 	if ((d[0x5] & 0x80) == 0)
 		return;
@@ -387,7 +396,7 @@ static void __init save_mem_devices(const struct dmi_header *dm, void *v)
 	const char *d = (const char *)dm;
 	static int nr;
 
-	if (dm->type != DMI_ENTRY_MEM_DEVICE)
+	if (dm->type != DMI_ENTRY_MEM_DEVICE || dm->length < 0x12)
 		return;
 	if (nr >= dmi_memdev_nr) {
 		pr_warn(FW_BUG "Too many DIMM entries in SMBIOS table\n");
@@ -430,6 +439,7 @@ static void __init dmi_decode(const struct dmi_header *dm, void *dummy)
 		dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
 		dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7);
 		dmi_save_uuid(dm, DMI_PRODUCT_UUID, 8);
+		dmi_save_ident(dm, DMI_PRODUCT_FAMILY, 26);
 		break;
 	case 2:		/* Base Board Information */
 		dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
@@ -649,6 +659,21 @@ void __init dmi_scan_machine(void)
 			goto error;
 
 		/*
+		 * Same logic as above, look for a 64-bit entry point
+		 * first, and if not found, fall back to 32-bit entry point.
+		 */
+		memcpy_fromio(buf, p, 16);
+		for (q = p + 16; q < p + 0x10000; q += 16) {
+			memcpy_fromio(buf + 16, q, 16);
+			if (!dmi_smbios3_present(buf)) {
+				dmi_available = 1;
+				dmi_early_unmap(p, 0x10000);
+				goto out;
+			}
+			memcpy(buf, buf + 16, 16);
+		}
+
+		/*
 		 * Iterate over all possible DMI header addresses q.
 		 * Maintain the 32 bytes around q in buf.  On the
 		 * first iteration, substitute zero for the
@@ -658,7 +683,7 @@ void __init dmi_scan_machine(void)
 		memset(buf, 0, 16);
 		for (q = p; q < p + 0x10000; q += 16) {
 			memcpy_fromio(buf + 16, q, 16);
-			if (!dmi_smbios3_present(buf) || !dmi_present(buf)) {
+			if (!dmi_present(buf)) {
 				dmi_available = 1;
 				dmi_early_unmap(p, 0x10000);
 				goto out;
@@ -992,7 +1017,8 @@ EXPORT_SYMBOL(dmi_get_date);
  *	@decode: Callback function
  *	@private_data: Private data to be passed to the callback function
  *
- *	Returns -1 when the DMI table can't be reached, 0 on success.
+ *	Returns 0 on success, -ENXIO if DMI is not selected or not present,
+ *	or a different negative error code if DMI walking fails.
  */
 int dmi_walk(void (*decode)(const struct dmi_header *, void *),
 	     void *private_data)
@@ -1000,11 +1026,11 @@ int dmi_walk(void (*decode)(const struct dmi_header *, void *),
 	u8 *buf;
 
 	if (!dmi_available)
-		return -1;
+		return -ENXIO;
 
 	buf = dmi_remap(dmi_base, dmi_len);
 	if (buf == NULL)
-		return -1;
+		return -ENOMEM;
 
 	dmi_decode_table(buf, decode, private_data);
 
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 2e78b0b96d74..394db40ed374 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -112,6 +112,15 @@ config EFI_CAPSULE_LOADER
 
 	  Most users should say N.
 
+config EFI_CAPSULE_QUIRK_QUARK_CSH
+	boolean "Add support for Quark capsules with non-standard headers"
+	depends on X86 && !64BIT
+	select EFI_CAPSULE_LOADER
+	default y
+	help
+	  Add support for processing Quark X1000 EFI capsules, whose header
+	  layout deviates from the layout mandated by the UEFI specification.
+
 config EFI_TEST
 	tristate "EFI Runtime Service Tests Support"
 	depends on EFI
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index 974c5a31a005..1cc41c3d6315 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -11,6 +11,7 @@
  *
  */
 
+#include <linux/dmi.h>
 #include <linux/efi.h>
 #include <linux/io.h>
 #include <linux/memblock.h>
@@ -166,3 +167,18 @@ void efi_virtmap_unload(void)
 	efi_set_pgd(current->active_mm);
 	preempt_enable();
 }
+
+
+static int __init arm_dmi_init(void)
+{
+	/*
+	 * On arm64/ARM, DMI depends on UEFI, and dmi_scan_machine() needs to
+	 * be called early because dmi_id_init(), which is an arch_initcall
+	 * itself, depends on dmi_scan_machine() having been called already.
+	 */
+	dmi_scan_machine();
+	if (dmi_available)
+		dmi_set_dump_stack_arch_desc();
+	return 0;
+}
+core_initcall(arm_dmi_init);
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
index 9ae6c116c474..ec8ac5c4dd84 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -20,15 +20,9 @@
 
 #define NO_FURTHER_WRITE_ACTION -1
 
-struct capsule_info {
-	bool		header_obtained;
-	int		reset_type;
-	long		index;
-	size_t		count;
-	size_t		total_size;
-	struct page	**pages;
-	size_t		page_bytes_remain;
-};
+#ifndef phys_to_page
+#define phys_to_page(x)		pfn_to_page((x) >> PAGE_SHIFT)
+#endif
 
 /**
  * efi_free_all_buff_pages - free all previous allocated buffer pages
@@ -41,65 +35,70 @@ struct capsule_info {
 static void efi_free_all_buff_pages(struct capsule_info *cap_info)
 {
 	while (cap_info->index > 0)
-		__free_page(cap_info->pages[--cap_info->index]);
+		__free_page(phys_to_page(cap_info->pages[--cap_info->index]));
 
 	cap_info->index = NO_FURTHER_WRITE_ACTION;
 }
 
-/**
- * efi_capsule_setup_info - obtain the efi capsule header in the binary and
- *			    setup capsule_info structure
- * @cap_info: pointer to current instance of capsule_info structure
- * @kbuff: a mapped first page buffer pointer
- * @hdr_bytes: the total received number of bytes for efi header
- **/
-static ssize_t efi_capsule_setup_info(struct capsule_info *cap_info,
-				      void *kbuff, size_t hdr_bytes)
+int __efi_capsule_setup_info(struct capsule_info *cap_info)
 {
-	efi_capsule_header_t *cap_hdr;
 	size_t pages_needed;
 	int ret;
 	void *temp_page;
 
-	/* Only process data block that is larger than efi header size */
-	if (hdr_bytes < sizeof(efi_capsule_header_t))
-		return 0;
-
-	/* Reset back to the correct offset of header */
-	cap_hdr = kbuff - cap_info->count;
-	pages_needed = ALIGN(cap_hdr->imagesize, PAGE_SIZE) >> PAGE_SHIFT;
+	pages_needed = ALIGN(cap_info->total_size, PAGE_SIZE) / PAGE_SIZE;
 
 	if (pages_needed == 0) {
-		pr_err("%s: pages count invalid\n", __func__);
+		pr_err("invalid capsule size");
 		return -EINVAL;
 	}
 
 	/* Check if the capsule binary supported */
-	ret = efi_capsule_supported(cap_hdr->guid, cap_hdr->flags,
-				    cap_hdr->imagesize,
+	ret = efi_capsule_supported(cap_info->header.guid,
+				    cap_info->header.flags,
+				    cap_info->header.imagesize,
 				    &cap_info->reset_type);
 	if (ret) {
-		pr_err("%s: efi_capsule_supported() failed\n",
-		       __func__);
+		pr_err("capsule not supported\n");
 		return ret;
 	}
 
-	cap_info->total_size = cap_hdr->imagesize;
 	temp_page = krealloc(cap_info->pages,
 			     pages_needed * sizeof(void *),
 			     GFP_KERNEL | __GFP_ZERO);
-	if (!temp_page) {
-		pr_debug("%s: krealloc() failed\n", __func__);
+	if (!temp_page)
 		return -ENOMEM;
-	}
 
 	cap_info->pages = temp_page;
-	cap_info->header_obtained = true;
 
 	return 0;
 }
 
 /**
+ * efi_capsule_setup_info - obtain the efi capsule header in the binary and
+ *			    setup capsule_info structure
+ * @cap_info: pointer to current instance of capsule_info structure
+ * @kbuff: a mapped first page buffer pointer
+ * @hdr_bytes: the total received number of bytes for efi header
+ *
+ * Platforms with non-standard capsule update mechanisms can override
+ * this __weak function so they can perform any required capsule
+ * image munging. See quark_quirk_function() for an example.
+ **/
+int __weak efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
+				  size_t hdr_bytes)
+{
+	/* Only process data block that is larger than efi header size */
+	if (hdr_bytes < sizeof(efi_capsule_header_t))
+		return 0;
+
+	memcpy(&cap_info->header, kbuff, sizeof(cap_info->header));
+	cap_info->total_size = cap_info->header.imagesize;
+
+	return __efi_capsule_setup_info(cap_info);
+}
+
+/**
  * efi_capsule_submit_update - invoke the efi_capsule_update API once binary
  *			       upload done
  * @cap_info: pointer to current instance of capsule_info structure
@@ -107,26 +106,17 @@ static ssize_t efi_capsule_setup_info(struct capsule_info *cap_info,
 static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
 {
 	int ret;
-	void *cap_hdr_temp;
-
-	cap_hdr_temp = vmap(cap_info->pages, cap_info->index,
-			VM_MAP, PAGE_KERNEL);
-	if (!cap_hdr_temp) {
-		pr_debug("%s: vmap() failed\n", __func__);
-		return -EFAULT;
-	}
 
-	ret = efi_capsule_update(cap_hdr_temp, cap_info->pages);
-	vunmap(cap_hdr_temp);
+	ret = efi_capsule_update(&cap_info->header, cap_info->pages);
 	if (ret) {
-		pr_err("%s: efi_capsule_update() failed\n", __func__);
+		pr_err("capsule update failed\n");
 		return ret;
 	}
 
 	/* Indicate capsule binary uploading is done */
 	cap_info->index = NO_FURTHER_WRITE_ACTION;
-	pr_info("%s: Successfully upload capsule file with reboot type '%s'\n",
-		__func__, !cap_info->reset_type ? "RESET_COLD" :
+	pr_info("Successfully upload capsule file with reboot type '%s'\n",
+		!cap_info->reset_type ? "RESET_COLD" :
 		cap_info->reset_type == 1 ? "RESET_WARM" :
 		"RESET_SHUTDOWN");
 	return 0;
@@ -171,37 +161,30 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff,
 	if (!cap_info->page_bytes_remain) {
 		page = alloc_page(GFP_KERNEL);
 		if (!page) {
-			pr_debug("%s: alloc_page() failed\n", __func__);
 			ret = -ENOMEM;
 			goto failed;
 		}
 
-		cap_info->pages[cap_info->index++] = page;
+		cap_info->pages[cap_info->index++] = page_to_phys(page);
 		cap_info->page_bytes_remain = PAGE_SIZE;
+	} else {
+		page = phys_to_page(cap_info->pages[cap_info->index - 1]);
 	}
 
-	page = cap_info->pages[cap_info->index - 1];
-
 	kbuff = kmap(page);
-	if (!kbuff) {
-		pr_debug("%s: kmap() failed\n", __func__);
-		ret = -EFAULT;
-		goto failed;
-	}
 	kbuff += PAGE_SIZE - cap_info->page_bytes_remain;
 
 	/* Copy capsule binary data from user space to kernel space buffer */
 	write_byte = min_t(size_t, count, cap_info->page_bytes_remain);
 	if (copy_from_user(kbuff, buff, write_byte)) {
-		pr_debug("%s: copy_from_user() failed\n", __func__);
 		ret = -EFAULT;
 		goto fail_unmap;
 	}
 	cap_info->page_bytes_remain -= write_byte;
 
 	/* Setup capsule binary info structure */
-	if (!cap_info->header_obtained) {
-		ret = efi_capsule_setup_info(cap_info, kbuff,
+	if (cap_info->header.headersize == 0) {
+		ret = efi_capsule_setup_info(cap_info, kbuff - cap_info->count,
 					     cap_info->count + write_byte);
 		if (ret)
 			goto fail_unmap;
@@ -211,11 +194,10 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff,
 	kunmap(page);
 
 	/* Submit the full binary to efi_capsule_update() API */
-	if (cap_info->header_obtained &&
+	if (cap_info->header.headersize > 0 &&
 	    cap_info->count >= cap_info->total_size) {
 		if (cap_info->count > cap_info->total_size) {
-			pr_err("%s: upload size exceeded header defined size\n",
-			       __func__);
+			pr_err("capsule upload size exceeded header defined size\n");
 			ret = -EINVAL;
 			goto failed;
 		}
@@ -249,7 +231,7 @@ static int efi_capsule_flush(struct file *file, fl_owner_t id)
 	struct capsule_info *cap_info = file->private_data;
 
 	if (cap_info->index > 0) {
-		pr_err("%s: capsule upload not complete\n", __func__);
+		pr_err("capsule upload not complete\n");
 		efi_free_all_buff_pages(cap_info);
 		ret = -ECANCELED;
 	}
@@ -328,8 +310,7 @@ static int __init efi_capsule_loader_init(void)
 
 	ret = misc_register(&efi_capsule_misc);
 	if (ret)
-		pr_err("%s: Failed to register misc char file note\n",
-		       __func__);
+		pr_err("Unable to register capsule loader device\n");
 
 	return ret;
 }
diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c
index 6eedff45e6d7..901b9306bf94 100644
--- a/drivers/firmware/efi/capsule.c
+++ b/drivers/firmware/efi/capsule.c
@@ -214,7 +214,7 @@ efi_capsule_update_locked(efi_capsule_header_t *capsule,
  *
  * Return 0 on success, a converted EFI status code on failure.
  */
-int efi_capsule_update(efi_capsule_header_t *capsule, struct page **pages)
+int efi_capsule_update(efi_capsule_header_t *capsule, phys_addr_t *pages)
 {
 	u32 imagesize = capsule->imagesize;
 	efi_guid_t guid = capsule->guid;
@@ -247,16 +247,13 @@ int efi_capsule_update(efi_capsule_header_t *capsule, struct page **pages)
 		efi_capsule_block_desc_t *sglist;
 
 		sglist = kmap(sg_pages[i]);
-		if (!sglist) {
-			rv = -ENOMEM;
-			goto out;
-		}
 
 		for (j = 0; j < SGLIST_PER_PAGE && count > 0; j++) {
-			u64 sz = min_t(u64, imagesize, PAGE_SIZE);
+			u64 sz = min_t(u64, imagesize,
+				       PAGE_SIZE - (u64)*pages % PAGE_SIZE);
 
 			sglist[j].length = sz;
-			sglist[j].data = page_to_phys(*pages++);
+			sglist[j].data = *pages++;
 
 			imagesize -= sz;
 			count--;
diff --git a/drivers/firmware/efi/efi-bgrt.c b/drivers/firmware/efi/efi-bgrt.c
index 04ca8764f0c0..b58233e4ed71 100644
--- a/drivers/firmware/efi/efi-bgrt.c
+++ b/drivers/firmware/efi/efi-bgrt.c
@@ -27,6 +27,26 @@ struct bmp_header {
 	u32 size;
 } __packed;
 
+static bool efi_bgrt_addr_valid(u64 addr)
+{
+	efi_memory_desc_t *md;
+
+	for_each_efi_memory_desc(md) {
+		u64 size;
+		u64 end;
+
+		if (md->type != EFI_BOOT_SERVICES_DATA)
+			continue;
+
+		size = md->num_pages << EFI_PAGE_SHIFT;
+		end = md->phys_addr + size;
+		if (addr >= md->phys_addr && addr < end)
+			return true;
+	}
+
+	return false;
+}
+
 void __init efi_bgrt_init(struct acpi_table_header *table)
 {
 	void *image;
@@ -36,6 +56,9 @@ void __init efi_bgrt_init(struct acpi_table_header *table)
 	if (acpi_disabled)
 		return;
 
+	if (!efi_enabled(EFI_MEMMAP))
+		return;
+
 	if (table->length < sizeof(bgrt_tab)) {
 		pr_notice("Ignoring BGRT: invalid length %u (expected %zu)\n",
 		       table->length, sizeof(bgrt_tab));
@@ -62,6 +85,10 @@ void __init efi_bgrt_init(struct acpi_table_header *table)
 		goto out;
 	}
 
+	if (!efi_bgrt_addr_valid(bgrt->image_address)) {
+		pr_notice("Ignoring BGRT: invalid image address\n");
+		goto out;
+	}
 	image = early_memremap(bgrt->image_address, sizeof(bmp_header));
 	if (!image) {
 		pr_notice("Ignoring BGRT: failed to map image header memory\n");
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index ed3137c1ceb0..ef1fafdad400 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -53,6 +53,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry,
 	if (sscanf(name, "dump-type%u-%u-%d-%lu-%c",
 		   &record->type, &part, &cnt, &time, &data_type) == 5) {
 		record->id = generic_id(time, part, cnt);
+		record->part = part;
 		record->count = cnt;
 		record->time.tv_sec = time;
 		record->time.tv_nsec = 0;
@@ -64,6 +65,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry,
 	} else if (sscanf(name, "dump-type%u-%u-%d-%lu",
 		   &record->type, &part, &cnt, &time) == 4) {
 		record->id = generic_id(time, part, cnt);
+		record->part = part;
 		record->count = cnt;
 		record->time.tv_sec = time;
 		record->time.tv_nsec = 0;
@@ -77,6 +79,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry,
 		 * multiple logs, remains.
 		 */
 		record->id = generic_id(time, part, 0);
+		record->part = part;
 		record->count = 0;
 		record->time.tv_sec = time;
 		record->time.tv_nsec = 0;
@@ -155,19 +158,14 @@ static int efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
  * efi_pstore_sysfs_entry_iter
  *
  * @record: pstore record to pass to callback
- * @pos: entry to begin iterating from
  *
  * You MUST call efivar_enter_iter_begin() before this function, and
  * efivar_entry_iter_end() afterwards.
  *
- * It is possible to begin iteration from an arbitrary entry within
- * the list by passing @pos. @pos is updated on return to point to
- * the next entry of the last one passed to efi_pstore_read_func().
- * To begin iterating from the beginning of the list @pos must be %NULL.
  */
-static int efi_pstore_sysfs_entry_iter(struct pstore_record *record,
-				       struct efivar_entry **pos)
+static int efi_pstore_sysfs_entry_iter(struct pstore_record *record)
 {
+	struct efivar_entry **pos = (struct efivar_entry **)&record->psi->data;
 	struct efivar_entry *entry, *n;
 	struct list_head *head = &efivar_sysfs_list;
 	int size = 0;
@@ -218,7 +216,6 @@ static int efi_pstore_sysfs_entry_iter(struct pstore_record *record,
  */
 static ssize_t efi_pstore_read(struct pstore_record *record)
 {
-	struct efivar_entry *entry = (struct efivar_entry *)record->psi->data;
 	ssize_t size;
 
 	record->buf = kzalloc(EFIVARS_DATA_SIZE_MAX, GFP_KERNEL);
@@ -229,7 +226,7 @@ static ssize_t efi_pstore_read(struct pstore_record *record)
 		size = -EINTR;
 		goto out;
 	}
-	size = efi_pstore_sysfs_entry_iter(record, &entry);
+	size = efi_pstore_sysfs_entry_iter(record);
 	efivar_entry_iter_end();
 
 out:
@@ -247,9 +244,15 @@ static int efi_pstore_write(struct pstore_record *record)
 	efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
 	int i, ret = 0;
 
+	record->time.tv_sec = get_seconds();
+	record->time.tv_nsec = 0;
+
+	record->id = generic_id(record->time.tv_sec, record->part,
+				record->count);
+
 	snprintf(name, sizeof(name), "dump-type%u-%u-%d-%lu-%c",
 		 record->type, record->part, record->count,
-		 get_seconds(), record->compressed ? 'C' : 'D');
+		 record->time.tv_sec, record->compressed ? 'C' : 'D');
 
 	for (i = 0; i < DUMP_NAME_LEN; i++)
 		efi_name[i] = name[i];
@@ -261,7 +264,6 @@ static int efi_pstore_write(struct pstore_record *record)
 	if (record->reason == KMSG_DUMP_OOPS)
 		efivar_run_worker();
 
-	record->id = record->part;
 	return ret;
 };
 
@@ -293,7 +295,7 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
 		 * holding multiple logs, remains.
 		 */
 		snprintf(name_old, sizeof(name_old), "dump-type%u-%u-%lu",
-			ed->record->type, (unsigned int)ed->record->id,
+			ed->record->type, ed->record->part,
 			ed->record->time.tv_sec);
 
 		for (i = 0; i < DUMP_NAME_LEN; i++)
@@ -326,10 +328,7 @@ static int efi_pstore_erase(struct pstore_record *record)
 	char name[DUMP_NAME_LEN];
 	efi_char16_t efi_name[DUMP_NAME_LEN];
 	int found, i;
-	unsigned int part;
 
-	do_div(record->id, 1000);
-	part = do_div(record->id, 100);
 	snprintf(name, sizeof(name), "dump-type%u-%u-%d-%lu",
 		 record->type, record->part, record->count,
 		 record->time.tv_sec);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index b372aad3b449..045d6d311bde 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -528,7 +528,8 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz,
 		}
 	}
 
-	efi_memattr_init();
+	if (efi_enabled(EFI_MEMMAP))
+		efi_memattr_init();
 
 	/* Parse the EFI Properties table if it exists */
 	if (efi.properties_table != EFI_INVALID_TABLE_ADDR) {
diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c
index 8c34d50a4d80..959777ec8a77 100644
--- a/drivers/firmware/efi/libstub/secureboot.c
+++ b/drivers/firmware/efi/libstub/secureboot.c
@@ -16,10 +16,10 @@
 
 /* BIOS variables */
 static const efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
-static const efi_char16_t const efi_SecureBoot_name[] = {
+static const efi_char16_t efi_SecureBoot_name[] = {
 	'S', 'e', 'c', 'u', 'r', 'e', 'B', 'o', 'o', 't', 0
 };
-static const efi_char16_t const efi_SetupMode_name[] = {
+static const efi_char16_t efi_SetupMode_name[] = {
 	'S', 'e', 't', 'u', 'p', 'M', 'o', 'd', 'e', 0
 };
 
diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c
index 8cd578f62059..08129b7b80ab 100644
--- a/drivers/firmware/efi/test/efi_test.c
+++ b/drivers/firmware/efi/test/efi_test.c
@@ -71,18 +71,13 @@ copy_ucs2_from_user_len(efi_char16_t **dst, efi_char16_t __user *src,
 	if (!access_ok(VERIFY_READ, src, 1))
 		return -EFAULT;
 
-	buf = kmalloc(len, GFP_KERNEL);
-	if (!buf) {
+	buf = memdup_user(src, len);
+	if (IS_ERR(buf)) {
 		*dst = NULL;
-		return -ENOMEM;
+		return PTR_ERR(buf);
 	}
 	*dst = buf;
 
-	if (copy_from_user(*dst, src, len)) {
-		kfree(buf);
-		return -EFAULT;
-	}
-
 	return 0;
 }
 
diff --git a/drivers/firmware/google/vpd.c b/drivers/firmware/google/vpd.c
index 3ce813110d5e..31058d400bda 100644
--- a/drivers/firmware/google/vpd.c
+++ b/drivers/firmware/google/vpd.c
@@ -116,9 +116,13 @@ static int vpd_section_attrib_add(const u8 *key, s32 key_len,
 		return VPD_OK;
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
-	info->key = kzalloc(key_len + 1, GFP_KERNEL);
-	if (!info->key)
+	if (!info)
 		return -ENOMEM;
+	info->key = kzalloc(key_len + 1, GFP_KERNEL);
+	if (!info->key) {
+		ret = -ENOMEM;
+		goto free_info;
+	}
 
 	memcpy(info->key, key, key_len);
 
@@ -132,15 +136,20 @@ static int vpd_section_attrib_add(const u8 *key, s32 key_len,
 	info->value = value;
 
 	INIT_LIST_HEAD(&info->list);
-	list_add_tail(&info->list, &sec->attribs);
 
 	ret = sysfs_create_bin_file(sec->kobj, &info->bin_attr);
-	if (ret) {
-		kfree(info->key);
-		return ret;
-	}
+	if (ret)
+		goto free_info_key;
 
+	list_add_tail(&info->list, &sec->attribs);
 	return 0;
+
+free_info_key:
+	kfree(info->key);
+free_info:
+	kfree(info);
+
+	return ret;
 }
 
 static void vpd_section_attrib_destroy(struct vpd_section *sec)
@@ -149,8 +158,8 @@ static void vpd_section_attrib_destroy(struct vpd_section *sec)
 	struct vpd_attrib_info *temp;
 
 	list_for_each_entry_safe(info, temp, &sec->attribs, list) {
-		kfree(info->key);
 		sysfs_remove_bin_file(sec->kobj, &info->bin_attr);
+		kfree(info->key);
 		kfree(info);
 	}
 }
@@ -235,7 +244,7 @@ static int vpd_section_destroy(struct vpd_section *sec)
 {
 	if (sec->enabled) {
 		vpd_section_attrib_destroy(sec);
-		kobject_del(sec->kobj);
+		kobject_put(sec->kobj);
 		sysfs_remove_bin_file(vpd_kobj, &sec->bin_attr);
 		kfree(sec->raw_name);
 		iounmap(sec->baseaddr);
@@ -322,7 +331,7 @@ static void __exit vpd_platform_exit(void)
 {
 	vpd_section_destroy(&ro_vpd);
 	vpd_section_destroy(&rw_vpd);
-	kobject_del(vpd_kobj);
+	kobject_put(vpd_kobj);
 }
 
 module_init(vpd_platform_init);
diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c
index 874ff32db366..00cfed3c3e1a 100644
--- a/drivers/firmware/ti_sci.c
+++ b/drivers/firmware/ti_sci.c
@@ -202,7 +202,8 @@ static int ti_sci_debugfs_create(struct platform_device *pdev,
 	info->debug_buffer[info->debug_region_size] = 0;
 
 	info->d = debugfs_create_file(strncat(debug_name, dev_name(dev),
-					      sizeof(debug_name)),
+					      sizeof(debug_name) -
+					      sizeof("ti_sci_debug@")),
 				      0444, NULL, info, &ti_sci_debug_fops);
 	if (IS_ERR(info->d))
 		return PTR_ERR(info->d);
diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
index 61b50c40b87b..598e209efa2d 100644
--- a/drivers/gpio/gpio-104-dio-48e.c
+++ b/drivers/gpio/gpio-104-dio-48e.c
@@ -33,11 +33,11 @@
 
 static unsigned int base[MAX_NUM_DIO48E];
 static unsigned int num_dio48e;
-module_param_array(base, uint, &num_dio48e, 0);
+module_param_hw_array(base, uint, ioport, &num_dio48e, 0);
 MODULE_PARM_DESC(base, "ACCES 104-DIO-48E base addresses");
 
 static unsigned int irq[MAX_NUM_DIO48E];
-module_param_array(irq, uint, NULL, 0);
+module_param_hw_array(irq, uint, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "ACCES 104-DIO-48E interrupt line numbers");
 
 /**
diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c
index 337c048168d8..51f046e29ff7 100644
--- a/drivers/gpio/gpio-104-idi-48.c
+++ b/drivers/gpio/gpio-104-idi-48.c
@@ -33,11 +33,11 @@
 
 static unsigned int base[MAX_NUM_IDI_48];
 static unsigned int num_idi_48;
-module_param_array(base, uint, &num_idi_48, 0);
+module_param_hw_array(base, uint, ioport, &num_idi_48, 0);
 MODULE_PARM_DESC(base, "ACCES 104-IDI-48 base addresses");
 
 static unsigned int irq[MAX_NUM_IDI_48];
-module_param_array(irq, uint, NULL, 0);
+module_param_hw_array(irq, uint, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "ACCES 104-IDI-48 interrupt line numbers");
 
 /**
diff --git a/drivers/gpio/gpio-104-idio-16.c b/drivers/gpio/gpio-104-idio-16.c
index 5281e1cedb01..ec2ce34ff473 100644
--- a/drivers/gpio/gpio-104-idio-16.c
+++ b/drivers/gpio/gpio-104-idio-16.c
@@ -33,11 +33,11 @@
 
 static unsigned int base[MAX_NUM_IDIO_16];
 static unsigned int num_idio_16;
-module_param_array(base, uint, &num_idio_16, 0);
+module_param_hw_array(base, uint, ioport, &num_idio_16, 0);
 MODULE_PARM_DESC(base, "ACCES 104-IDIO-16 base addresses");
 
 static unsigned int irq[MAX_NUM_IDIO_16];
-module_param_array(irq, uint, NULL, 0);
+module_param_hw_array(irq, uint, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "ACCES 104-IDIO-16 interrupt line numbers");
 
 /**
diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
index ccea609676ee..4ca436e66bdb 100644
--- a/drivers/gpio/gpio-aspeed.c
+++ b/drivers/gpio/gpio-aspeed.c
@@ -646,6 +646,9 @@ static int enable_debounce(struct gpio_chip *chip, unsigned int offset,
 	int rc;
 	int i;
 
+	if (!gpio->clk)
+		return -EINVAL;
+
 	rc = usecs_to_cycles(gpio, usecs, &requested_cycles);
 	if (rc < 0) {
 		dev_warn(chip->parent, "Failed to convert %luus to cycles at %luHz: %d\n",
diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c
index 2197368cc899..e60156ec0c18 100644
--- a/drivers/gpio/gpio-crystalcove.c
+++ b/drivers/gpio/gpio-crystalcove.c
@@ -90,8 +90,18 @@ static inline int to_reg(int gpio, enum ctrl_register reg_type)
 {
 	int reg;
 
-	if (gpio == 94)
-		return GPIOPANELCTL;
+	if (gpio >= CRYSTALCOVE_GPIO_NUM) {
+		/*
+		 * Virtual GPIO called from ACPI, for now we only support
+		 * the panel ctl.
+		 */
+		switch (gpio) {
+		case 0x5e:
+			return GPIOPANELCTL;
+		default:
+			return -EOPNOTSUPP;
+		}
+	}
 
 	if (reg_type == CTRL_IN) {
 		if (gpio < 8)
@@ -130,36 +140,36 @@ static void crystalcove_update_irq_ctrl(struct crystalcove_gpio *cg, int gpio)
 static int crystalcove_gpio_dir_in(struct gpio_chip *chip, unsigned gpio)
 {
 	struct crystalcove_gpio *cg = gpiochip_get_data(chip);
+	int reg = to_reg(gpio, CTRL_OUT);
 
-	if (gpio > CRYSTALCOVE_VGPIO_NUM)
+	if (reg < 0)
 		return 0;
 
-	return regmap_write(cg->regmap, to_reg(gpio, CTRL_OUT),
-			    CTLO_INPUT_SET);
+	return regmap_write(cg->regmap, reg, CTLO_INPUT_SET);
 }
 
 static int crystalcove_gpio_dir_out(struct gpio_chip *chip, unsigned gpio,
 				    int value)
 {
 	struct crystalcove_gpio *cg = gpiochip_get_data(chip);
+	int reg = to_reg(gpio, CTRL_OUT);
 
-	if (gpio > CRYSTALCOVE_VGPIO_NUM)
+	if (reg < 0)
 		return 0;
 
-	return regmap_write(cg->regmap, to_reg(gpio, CTRL_OUT),
-			    CTLO_OUTPUT_SET | value);
+	return regmap_write(cg->regmap, reg, CTLO_OUTPUT_SET | value);
 }
 
 static int crystalcove_gpio_get(struct gpio_chip *chip, unsigned gpio)
 {
 	struct crystalcove_gpio *cg = gpiochip_get_data(chip);
-	int ret;
 	unsigned int val;
+	int ret, reg = to_reg(gpio, CTRL_IN);
 
-	if (gpio > CRYSTALCOVE_VGPIO_NUM)
+	if (reg < 0)
 		return 0;
 
-	ret = regmap_read(cg->regmap, to_reg(gpio, CTRL_IN), &val);
+	ret = regmap_read(cg->regmap, reg, &val);
 	if (ret)
 		return ret;
 
@@ -170,14 +180,15 @@ static void crystalcove_gpio_set(struct gpio_chip *chip,
 				 unsigned gpio, int value)
 {
 	struct crystalcove_gpio *cg = gpiochip_get_data(chip);
+	int reg = to_reg(gpio, CTRL_OUT);
 
-	if (gpio > CRYSTALCOVE_VGPIO_NUM)
+	if (reg < 0)
 		return;
 
 	if (value)
-		regmap_update_bits(cg->regmap, to_reg(gpio, CTRL_OUT), 1, 1);
+		regmap_update_bits(cg->regmap, reg, 1, 1);
 	else
-		regmap_update_bits(cg->regmap, to_reg(gpio, CTRL_OUT), 1, 0);
+		regmap_update_bits(cg->regmap, reg, 1, 0);
 }
 
 static int crystalcove_irq_type(struct irq_data *data, unsigned type)
@@ -185,6 +196,9 @@ static int crystalcove_irq_type(struct irq_data *data, unsigned type)
 	struct crystalcove_gpio *cg =
 		gpiochip_get_data(irq_data_get_irq_chip_data(data));
 
+	if (data->hwirq >= CRYSTALCOVE_GPIO_NUM)
+		return 0;
+
 	switch (type) {
 	case IRQ_TYPE_NONE:
 		cg->intcnt_value = CTLI_INTCNT_DIS;
@@ -235,8 +249,10 @@ static void crystalcove_irq_unmask(struct irq_data *data)
 	struct crystalcove_gpio *cg =
 		gpiochip_get_data(irq_data_get_irq_chip_data(data));
 
-	cg->set_irq_mask = false;
-	cg->update |= UPDATE_IRQ_MASK;
+	if (data->hwirq < CRYSTALCOVE_GPIO_NUM) {
+		cg->set_irq_mask = false;
+		cg->update |= UPDATE_IRQ_MASK;
+	}
 }
 
 static void crystalcove_irq_mask(struct irq_data *data)
@@ -244,8 +260,10 @@ static void crystalcove_irq_mask(struct irq_data *data)
 	struct crystalcove_gpio *cg =
 		gpiochip_get_data(irq_data_get_irq_chip_data(data));
 
-	cg->set_irq_mask = true;
-	cg->update |= UPDATE_IRQ_MASK;
+	if (data->hwirq < CRYSTALCOVE_GPIO_NUM) {
+		cg->set_irq_mask = true;
+		cg->update |= UPDATE_IRQ_MASK;
+	}
 }
 
 static struct irq_chip crystalcove_irqchip = {
diff --git a/drivers/gpio/gpio-gpio-mm.c b/drivers/gpio/gpio-gpio-mm.c
index fa4baa2543db..11ade5b288f8 100644
--- a/drivers/gpio/gpio-gpio-mm.c
+++ b/drivers/gpio/gpio-gpio-mm.c
@@ -31,7 +31,7 @@
 
 static unsigned int base[MAX_NUM_GPIOMM];
 static unsigned int num_gpiomm;
-module_param_array(base, uint, &num_gpiomm, 0);
+module_param_hw_array(base, uint, ioport, &num_gpiomm, 0);
 MODULE_PARM_DESC(base, "Diamond Systems GPIO-MM base addresses");
 
 /**
diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index 19a92efabbef..c83ea68be792 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -721,7 +721,7 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
 	u32 set;
 
 	if (!of_device_is_compatible(mvchip->chip.of_node,
-				     "marvell,armada-370-xp-gpio"))
+				     "marvell,armada-370-gpio"))
 		return 0;
 
 	if (IS_ERR(mvchip->clk))
@@ -747,7 +747,7 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
 		set = U32_MAX;
 	else
 		return -EINVAL;
-	writel_relaxed(0, mvebu_gpioreg_blink_counter_select(mvchip));
+	writel_relaxed(set, mvebu_gpioreg_blink_counter_select(mvchip));
 
 	mvpwm = devm_kzalloc(dev, sizeof(struct mvebu_pwm), GFP_KERNEL);
 	if (!mvpwm)
@@ -768,6 +768,13 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
 	mvpwm->chip.dev = dev;
 	mvpwm->chip.ops = &mvebu_pwm_ops;
 	mvpwm->chip.npwm = mvchip->chip.ngpio;
+	/*
+	 * There may already be some PWM allocated, so we can't force
+	 * mvpwm->chip.base to a fixed point like mvchip->chip.base.
+	 * So, we let pwmchip_add() do the numbering and take the next free
+	 * region.
+	 */
+	mvpwm->chip.base = -1;
 
 	spin_lock_init(&mvpwm->lock);
 
@@ -845,7 +852,7 @@ static const struct of_device_id mvebu_gpio_of_match[] = {
 		.data	    = (void *) MVEBU_GPIO_SOC_VARIANT_ARMADAXP,
 	},
 	{
-		.compatible = "marvell,armada-370-xp-gpio",
+		.compatible = "marvell,armada-370-gpio",
 		.data	    = (void *) MVEBU_GPIO_SOC_VARIANT_ORION,
 	},
 	{
@@ -1121,7 +1128,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
 						 mvchip);
 	}
 
-	/* Armada 370/XP has simple PWM support for GPIO lines */
+	/* Some MVEBU SoCs have simple PWM support for GPIO lines */
 	if (IS_ENABLED(CONFIG_PWM))
 		return mvebu_pwm_probe(pdev, mvchip, id);
 
diff --git a/drivers/gpio/gpio-ws16c48.c b/drivers/gpio/gpio-ws16c48.c
index 87d63695dfcf..5037974ac063 100644
--- a/drivers/gpio/gpio-ws16c48.c
+++ b/drivers/gpio/gpio-ws16c48.c
@@ -30,11 +30,11 @@
 
 static unsigned int base[MAX_NUM_WS16C48];
 static unsigned int num_ws16c48;
-module_param_array(base, uint, &num_ws16c48, 0);
+module_param_hw_array(base, uint, ioport, &num_ws16c48, 0);
 MODULE_PARM_DESC(base, "WinSystems WS16C48 base addresses");
 
 static unsigned int irq[MAX_NUM_WS16C48];
-module_param_array(irq, uint, NULL, 0);
+module_param_hw_array(irq, uint, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "WinSystems WS16C48 interrupt line numbers");
 
 /**
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 2185232da823..8fa5fcd00e9a 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -201,7 +201,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 			handler = acpi_gpio_irq_handler_evt;
 	}
 	if (!handler)
-		return AE_BAD_PARAMETER;
+		return AE_OK;
 
 	pin = acpi_gpiochip_pin_to_gpio_offset(chip->gpiodev, pin);
 	if (pin < 0)
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 5db44139cef8..a42a1eea5714 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -708,7 +708,8 @@ static irqreturn_t lineevent_irq_thread(int irq, void *p)
 
 	ge.timestamp = ktime_get_real_ns();
 
-	if (le->eflags & GPIOEVENT_REQUEST_BOTH_EDGES) {
+	if (le->eflags & GPIOEVENT_REQUEST_RISING_EDGE
+	    && le->eflags & GPIOEVENT_REQUEST_FALLING_EDGE) {
 		int level = gpiod_get_value_cansleep(le->desc);
 
 		if (level)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6a8129949333..833c3c16501a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -110,6 +110,7 @@ extern int amdgpu_pos_buf_per_se;
 extern int amdgpu_cntl_sb_buf_per_se;
 extern int amdgpu_param_buf_per_se;
 
+#define AMDGPU_DEFAULT_GTT_SIZE_MB		3072ULL /* 3GB by default */
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
 #define AMDGPU_MAX_USEC_TIMEOUT			100000	/* 100 ms */
 #define AMDGPU_FENCE_JIFFIES_TIMEOUT		(HZ / 2)
@@ -966,6 +967,8 @@ struct amdgpu_gfx_config {
 	unsigned mc_arb_ramcfg;
 	unsigned gb_addr_config;
 	unsigned num_rbs;
+	unsigned gs_vgt_table_depth;
+	unsigned gs_prim_buffer_depth;
 
 	uint32_t tile_mode_array[32];
 	uint32_t macrotile_mode_array[16];
@@ -980,6 +983,7 @@ struct amdgpu_gfx_config {
 struct amdgpu_cu_info {
 	uint32_t number; /* total active CU number */
 	uint32_t ao_cu_mask;
+	uint32_t wave_front_size;
 	uint32_t bitmap[4][4];
 };
 
@@ -1000,10 +1004,10 @@ struct amdgpu_ngg_buf {
 };
 
 enum {
-	PRIM = 0,
-	POS,
-	CNTL,
-	PARAM,
+	NGG_PRIM = 0,
+	NGG_POS,
+	NGG_CNTL,
+	NGG_PARAM,
 	NGG_BUF_MAX
 };
 
@@ -1125,6 +1129,7 @@ struct amdgpu_job {
 	void			*owner;
 	uint64_t		fence_ctx; /* the fence_context this job uses */
 	bool                    vm_needs_flush;
+	bool			need_pipeline_sync;
 	unsigned		vm_id;
 	uint64_t		vm_pd_addr;
 	uint32_t		gds_base, gds_size;
@@ -1704,9 +1709,6 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
 #define WREG32_FIELD_OFFSET(reg, offset, field, val)	\
 	WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
 
-#define WREG32_FIELD15(ip, idx, reg, field, val)	\
-	WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
-
 /*
  * BIOS helpers.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index ad4329922f79..1e8e1123ddf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -693,6 +693,10 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
 			DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n",
 				 adev->clock.default_dispclk / 100);
 			adev->clock.default_dispclk = 60000;
+		} else if (adev->clock.default_dispclk <= 60000) {
+			DRM_INFO("Changing default dispclk from %dMhz to 625Mhz\n",
+				 adev->clock.default_dispclk / 100);
+			adev->clock.default_dispclk = 62500;
 		}
 		adev->clock.dp_extclk =
 			le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
@@ -1727,6 +1731,12 @@ void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev)
 {
 	int i;
 
+	/*
+	 * VBIOS will check ASIC_INIT_COMPLETE bit to decide if
+	 * execute ASIC_Init posting via driver
+	 */
+	adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK;
+
 	for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++)
 		WREG32(mmBIOS_SCRATCH_0 + i, adev->bios_scratch[i]);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 4b9abd68e04f..4bdda56fccee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -26,6 +26,7 @@
 #include "atomfirmware.h"
 #include "amdgpu_atomfirmware.h"
 #include "atom.h"
+#include "atombios.h"
 
 #define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t))
 
@@ -77,10 +78,29 @@ void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev)
 {
 	int i;
 
+	/*
+	 * VBIOS will check ASIC_INIT_COMPLETE bit to decide if
+	 * execute ASIC_Init posting via driver
+	 */
+	adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK;
+
 	for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++)
 		WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]);
 }
 
+void amdgpu_atomfirmware_scratch_regs_engine_hung(struct amdgpu_device *adev,
+						  bool hung)
+{
+	u32 tmp = RREG32(adev->bios_scratch_reg_offset + 3);
+
+	if (hung)
+		tmp |= ATOM_S3_ASIC_GUI_ENGINE_HUNG;
+	else
+		tmp &= ~ATOM_S3_ASIC_GUI_ENGINE_HUNG;
+
+	WREG32(adev->bios_scratch_reg_offset + 3, tmp);
+}
+
 int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
 {
 	struct atom_context *ctx = adev->mode_info.atom_context;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index d0c4dcd7fa96..a2c3ebe22c71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -28,6 +28,8 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev)
 void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
 void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev);
 void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev);
+void amdgpu_atomfirmware_scratch_regs_engine_hung(struct amdgpu_device *adev,
+						  bool hung);
 int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index cc97eee93226..1beae5b930d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -117,8 +117,13 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
 	}
 
 out_cleanup:
+	/* Check error value now. The value can be overwritten when clean up.*/
+	if (r) {
+		DRM_ERROR("Error while benchmarking BO move.\n");
+	}
+
 	if (sobj) {
-		r = amdgpu_bo_reserve(sobj, false);
+		r = amdgpu_bo_reserve(sobj, true);
 		if (likely(r == 0)) {
 			amdgpu_bo_unpin(sobj);
 			amdgpu_bo_unreserve(sobj);
@@ -126,17 +131,13 @@ out_cleanup:
 		amdgpu_bo_unref(&sobj);
 	}
 	if (dobj) {
-		r = amdgpu_bo_reserve(dobj, false);
+		r = amdgpu_bo_reserve(dobj, true);
 		if (likely(r == 0)) {
 			amdgpu_bo_unpin(dobj);
 			amdgpu_bo_unreserve(dobj);
 		}
 		amdgpu_bo_unref(&dobj);
 	}
-
-	if (r) {
-		DRM_ERROR("Error while benchmarking BO move.\n");
-	}
 }
 
 void amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 1c7e6c28f93a..c6dba1eaefbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -42,82 +42,6 @@ struct amdgpu_cgs_device {
 	struct amdgpu_device *adev =					\
 		((struct amdgpu_cgs_device *)cgs_device)->adev
 
-static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type,
-				   uint64_t *mc_start, uint64_t *mc_size,
-				   uint64_t *mem_size)
-{
-	CGS_FUNC_ADEV;
-	switch(type) {
-	case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB:
-	case CGS_GPU_MEM_TYPE__VISIBLE_FB:
-		*mc_start = 0;
-		*mc_size = adev->mc.visible_vram_size;
-		*mem_size = adev->mc.visible_vram_size - adev->vram_pin_size;
-		break;
-	case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB:
-	case CGS_GPU_MEM_TYPE__INVISIBLE_FB:
-		*mc_start = adev->mc.visible_vram_size;
-		*mc_size = adev->mc.real_vram_size - adev->mc.visible_vram_size;
-		*mem_size = *mc_size;
-		break;
-	case CGS_GPU_MEM_TYPE__GART_CACHEABLE:
-	case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE:
-		*mc_start = adev->mc.gtt_start;
-		*mc_size = adev->mc.gtt_size;
-		*mem_size = adev->mc.gtt_size - adev->gart_pin_size;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void *kmem,
-				uint64_t size,
-				uint64_t min_offset, uint64_t max_offset,
-				cgs_handle_t *kmem_handle, uint64_t *mcaddr)
-{
-	CGS_FUNC_ADEV;
-	int ret;
-	struct amdgpu_bo *bo;
-	struct page *kmem_page = vmalloc_to_page(kmem);
-	int npages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT;
-
-	struct sg_table *sg = drm_prime_pages_to_sg(&kmem_page, npages);
-	ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false,
-			       AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL, &bo);
-	if (ret)
-		return ret;
-	ret = amdgpu_bo_reserve(bo, false);
-	if (unlikely(ret != 0))
-		return ret;
-
-	/* pin buffer into GTT */
-	ret = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_GTT,
-				       min_offset, max_offset, mcaddr);
-	amdgpu_bo_unreserve(bo);
-
-	*kmem_handle = (cgs_handle_t)bo;
-	return ret;
-}
-
-static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, cgs_handle_t kmem_handle)
-{
-	struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle;
-
-	if (obj) {
-		int r = amdgpu_bo_reserve(obj, false);
-		if (likely(r == 0)) {
-			amdgpu_bo_unpin(obj);
-			amdgpu_bo_unreserve(obj);
-		}
-		amdgpu_bo_unref(&obj);
-
-	}
-	return 0;
-}
-
 static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
 				    enum cgs_gpu_mem_type type,
 				    uint64_t size, uint64_t align,
@@ -215,7 +139,7 @@ static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h
 	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
 
 	if (obj) {
-		int r = amdgpu_bo_reserve(obj, false);
+		int r = amdgpu_bo_reserve(obj, true);
 		if (likely(r == 0)) {
 			amdgpu_bo_kunmap(obj);
 			amdgpu_bo_unpin(obj);
@@ -239,7 +163,7 @@ static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h
 	min_offset = obj->placements[0].fpfn << PAGE_SHIFT;
 	max_offset = obj->placements[0].lpfn << PAGE_SHIFT;
 
-	r = amdgpu_bo_reserve(obj, false);
+	r = amdgpu_bo_reserve(obj, true);
 	if (unlikely(r != 0))
 		return r;
 	r = amdgpu_bo_pin_restricted(obj, obj->prefered_domains,
@@ -252,7 +176,7 @@ static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t
 {
 	int r;
 	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-	r = amdgpu_bo_reserve(obj, false);
+	r = amdgpu_bo_reserve(obj, true);
 	if (unlikely(r != 0))
 		return r;
 	r = amdgpu_bo_unpin(obj);
@@ -265,7 +189,7 @@ static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h
 {
 	int r;
 	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-	r = amdgpu_bo_reserve(obj, false);
+	r = amdgpu_bo_reserve(obj, true);
 	if (unlikely(r != 0))
 		return r;
 	r = amdgpu_bo_kmap(obj, map);
@@ -277,7 +201,7 @@ static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t
 {
 	int r;
 	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-	r = amdgpu_bo_reserve(obj, false);
+	r = amdgpu_bo_reserve(obj, true);
 	if (unlikely(r != 0))
 		return r;
 	amdgpu_bo_kunmap(obj);
@@ -349,62 +273,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
 	WARN(1, "Invalid indirect register space");
 }
 
-static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device *cgs_device, unsigned addr)
-{
-	CGS_FUNC_ADEV;
-	uint8_t val;
-	int ret = pci_read_config_byte(adev->pdev, addr, &val);
-	if (WARN(ret, "pci_read_config_byte error"))
-		return 0;
-	return val;
-}
-
-static uint16_t amdgpu_cgs_read_pci_config_word(struct cgs_device *cgs_device, unsigned addr)
-{
-	CGS_FUNC_ADEV;
-	uint16_t val;
-	int ret = pci_read_config_word(adev->pdev, addr, &val);
-	if (WARN(ret, "pci_read_config_word error"))
-		return 0;
-	return val;
-}
-
-static uint32_t amdgpu_cgs_read_pci_config_dword(struct cgs_device *cgs_device,
-						 unsigned addr)
-{
-	CGS_FUNC_ADEV;
-	uint32_t val;
-	int ret = pci_read_config_dword(adev->pdev, addr, &val);
-	if (WARN(ret, "pci_read_config_dword error"))
-		return 0;
-	return val;
-}
-
-static void amdgpu_cgs_write_pci_config_byte(struct cgs_device *cgs_device, unsigned addr,
-					     uint8_t value)
-{
-	CGS_FUNC_ADEV;
-	int ret = pci_write_config_byte(adev->pdev, addr, value);
-	WARN(ret, "pci_write_config_byte error");
-}
-
-static void amdgpu_cgs_write_pci_config_word(struct cgs_device *cgs_device, unsigned addr,
-					     uint16_t value)
-{
-	CGS_FUNC_ADEV;
-	int ret = pci_write_config_word(adev->pdev, addr, value);
-	WARN(ret, "pci_write_config_word error");
-}
-
-static void amdgpu_cgs_write_pci_config_dword(struct cgs_device *cgs_device, unsigned addr,
-					      uint32_t value)
-{
-	CGS_FUNC_ADEV;
-	int ret = pci_write_config_dword(adev->pdev, addr, value);
-	WARN(ret, "pci_write_config_dword error");
-}
-
-
 static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device,
 				       enum cgs_resource_type resource_type,
 				       uint64_t size,
@@ -477,56 +345,6 @@ static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigne
 		adev->mode_info.atom_context, table, args);
 }
 
-static int amdgpu_cgs_create_pm_request(struct cgs_device *cgs_device, cgs_handle_t *request)
-{
-	/* TODO */
-	return 0;
-}
-
-static int amdgpu_cgs_destroy_pm_request(struct cgs_device *cgs_device, cgs_handle_t request)
-{
-	/* TODO */
-	return 0;
-}
-
-static int amdgpu_cgs_set_pm_request(struct cgs_device *cgs_device, cgs_handle_t request,
-				     int active)
-{
-	/* TODO */
-	return 0;
-}
-
-static int amdgpu_cgs_pm_request_clock(struct cgs_device *cgs_device, cgs_handle_t request,
-				       enum cgs_clock clock, unsigned freq)
-{
-	/* TODO */
-	return 0;
-}
-
-static int amdgpu_cgs_pm_request_engine(struct cgs_device *cgs_device, cgs_handle_t request,
-					enum cgs_engine engine, int powered)
-{
-	/* TODO */
-	return 0;
-}
-
-
-
-static int amdgpu_cgs_pm_query_clock_limits(struct cgs_device *cgs_device,
-					    enum cgs_clock clock,
-					    struct cgs_clock_limits *limits)
-{
-	/* TODO */
-	return 0;
-}
-
-static int amdgpu_cgs_set_camera_voltages(struct cgs_device *cgs_device, uint32_t mask,
-					  const uint32_t *voltages)
-{
-	DRM_ERROR("not implemented");
-	return -EPERM;
-}
-
 struct cgs_irq_params {
 	unsigned src_id;
 	cgs_irq_source_set_func_t set;
@@ -1269,9 +1087,6 @@ static int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
 }
 
 static const struct cgs_ops amdgpu_cgs_ops = {
-	.gpu_mem_info = amdgpu_cgs_gpu_mem_info,
-	.gmap_kmem = amdgpu_cgs_gmap_kmem,
-	.gunmap_kmem = amdgpu_cgs_gunmap_kmem,
 	.alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem,
 	.free_gpu_mem = amdgpu_cgs_free_gpu_mem,
 	.gmap_gpu_mem = amdgpu_cgs_gmap_gpu_mem,
@@ -1282,23 +1097,10 @@ static const struct cgs_ops amdgpu_cgs_ops = {
 	.write_register = amdgpu_cgs_write_register,
 	.read_ind_register = amdgpu_cgs_read_ind_register,
 	.write_ind_register = amdgpu_cgs_write_ind_register,
-	.read_pci_config_byte = amdgpu_cgs_read_pci_config_byte,
-	.read_pci_config_word = amdgpu_cgs_read_pci_config_word,
-	.read_pci_config_dword = amdgpu_cgs_read_pci_config_dword,
-	.write_pci_config_byte = amdgpu_cgs_write_pci_config_byte,
-	.write_pci_config_word = amdgpu_cgs_write_pci_config_word,
-	.write_pci_config_dword = amdgpu_cgs_write_pci_config_dword,
 	.get_pci_resource = amdgpu_cgs_get_pci_resource,
 	.atom_get_data_table = amdgpu_cgs_atom_get_data_table,
 	.atom_get_cmd_table_revs = amdgpu_cgs_atom_get_cmd_table_revs,
 	.atom_exec_cmd_table = amdgpu_cgs_atom_exec_cmd_table,
-	.create_pm_request = amdgpu_cgs_create_pm_request,
-	.destroy_pm_request = amdgpu_cgs_destroy_pm_request,
-	.set_pm_request = amdgpu_cgs_set_pm_request,
-	.pm_request_clock = amdgpu_cgs_pm_request_clock,
-	.pm_request_engine = amdgpu_cgs_pm_request_engine,
-	.pm_query_clock_limits = amdgpu_cgs_pm_query_clock_limits,
-	.set_camera_voltages = amdgpu_cgs_set_camera_voltages,
 	.get_firmware_info = amdgpu_cgs_get_firmware_info,
 	.rel_firmware = amdgpu_cgs_rel_firmware,
 	.set_powergating_state = amdgpu_cgs_set_powergating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ec71b9320561..4e6b9501ab0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1074,6 +1074,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
 	job->uf_sequence = cs->out.handle;
 	amdgpu_job_free_resources(job);
+	amdgpu_cs_parser_fini(p, 0, true);
 
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
@@ -1129,7 +1130,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		goto out;
 
 	r = amdgpu_cs_submit(&parser, cs);
+	if (r)
+		goto out;
 
+	return 0;
 out:
 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
 	return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index cf0500671353..90d1ac8a80f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -273,6 +273,9 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 
 	spin_lock(&ctx->ring_lock);
 
+	if (seq == ~0ull)
+		seq = ctx->rings[ring->idx].sequence - 1;
+
 	if (seq >= cring->sequence) {
 		spin_unlock(&ctx->ring_lock);
 		return ERR_PTR(-EINVAL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 483660742f75..43ca16b6eee2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -53,7 +53,6 @@
 #include "bif/bif_4_1_d.h"
 #include <linux/pci.h>
 #include <linux/firmware.h>
-#include "amdgpu_pm.h"
 
 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
 static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev);
@@ -350,7 +349,7 @@ static void amdgpu_vram_scratch_fini(struct amdgpu_device *adev)
 	if (adev->vram_scratch.robj == NULL) {
 		return;
 	}
-	r = amdgpu_bo_reserve(adev->vram_scratch.robj, false);
+	r = amdgpu_bo_reserve(adev->vram_scratch.robj, true);
 	if (likely(r == 0)) {
 		amdgpu_bo_kunmap(adev->vram_scratch.robj);
 		amdgpu_bo_unpin(adev->vram_scratch.robj);
@@ -422,12 +421,11 @@ static int amdgpu_doorbell_init(struct amdgpu_device *adev)
 	if (adev->doorbell.num_doorbells == 0)
 		return -EINVAL;
 
-	adev->doorbell.ptr = ioremap(adev->doorbell.base, adev->doorbell.num_doorbells * sizeof(u32));
-	if (adev->doorbell.ptr == NULL) {
+	adev->doorbell.ptr = ioremap(adev->doorbell.base,
+				     adev->doorbell.num_doorbells *
+				     sizeof(u32));
+	if (adev->doorbell.ptr == NULL)
 		return -ENOMEM;
-	}
-	DRM_INFO("doorbell mmio base: 0x%08X\n", (uint32_t)adev->doorbell.base);
-	DRM_INFO("doorbell mmio size: %u\n", (unsigned)adev->doorbell.size);
 
 	return 0;
 }
@@ -1584,9 +1582,6 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
 		}
 	}
 
-	amdgpu_dpm_enable_uvd(adev, false);
-	amdgpu_dpm_enable_vce(adev, false);
-
 	return 0;
 }
 
@@ -1854,7 +1849,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 	/* mutex initialization are all done here so we
 	 * can recall function without having locking issues */
-	mutex_init(&adev->vm_manager.lock);
 	atomic_set(&adev->irq.ih.lock, 0);
 	mutex_init(&adev->firmware.mutex);
 	mutex_init(&adev->pm.mutex);
@@ -2071,7 +2065,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 
 	DRM_INFO("amdgpu: finishing device.\n");
 	adev->shutdown = true;
-	drm_crtc_force_disable_all(adev->ddev);
+	if (adev->mode_info.mode_config_initialized)
+		drm_crtc_force_disable_all(adev->ddev);
 	/* evict vram memory */
 	amdgpu_bo_evict_vram(adev);
 	amdgpu_ib_pool_fini(adev);
@@ -2146,7 +2141,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 
 		if (amdgpu_crtc->cursor_bo) {
 			struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-			r = amdgpu_bo_reserve(aobj, false);
+			r = amdgpu_bo_reserve(aobj, true);
 			if (r == 0) {
 				amdgpu_bo_unpin(aobj);
 				amdgpu_bo_unreserve(aobj);
@@ -2159,7 +2154,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 		robj = gem_to_amdgpu_bo(rfb->obj);
 		/* don't unpin kernel fb objects */
 		if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
-			r = amdgpu_bo_reserve(robj, false);
+			r = amdgpu_bo_reserve(robj, true);
 			if (r == 0) {
 				amdgpu_bo_unpin(robj);
 				amdgpu_bo_unreserve(robj);
@@ -2216,7 +2211,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	struct drm_connector *connector;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct drm_crtc *crtc;
-	int r;
+	int r = 0;
 
 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
@@ -2228,11 +2223,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 		pci_set_power_state(dev->pdev, PCI_D0);
 		pci_restore_state(dev->pdev);
 		r = pci_enable_device(dev->pdev);
-		if (r) {
-			if (fbcon)
-				console_unlock();
-			return r;
-		}
+		if (r)
+			goto unlock;
 	}
 	if (adev->is_atom_fw)
 		amdgpu_atomfirmware_scratch_regs_restore(adev);
@@ -2249,7 +2241,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	r = amdgpu_resume(adev);
 	if (r) {
 		DRM_ERROR("amdgpu_resume failed (%d).\n", r);
-		return r;
+		goto unlock;
 	}
 	amdgpu_fence_driver_resume(adev);
 
@@ -2260,11 +2252,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	}
 
 	r = amdgpu_late_init(adev);
-	if (r) {
-		if (fbcon)
-			console_unlock();
-		return r;
-	}
+	if (r)
+		goto unlock;
 
 	/* pin cursors */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -2272,7 +2261,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 
 		if (amdgpu_crtc->cursor_bo) {
 			struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-			r = amdgpu_bo_reserve(aobj, false);
+			r = amdgpu_bo_reserve(aobj, true);
 			if (r == 0) {
 				r = amdgpu_bo_pin(aobj,
 						  AMDGPU_GEM_DOMAIN_VRAM,
@@ -2314,12 +2303,14 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	dev->dev->power.disable_depth--;
 #endif
 
-	if (fbcon) {
+	if (fbcon)
 		amdgpu_fbdev_set_suspend(adev, 0);
+
+unlock:
+	if (fbcon)
 		console_unlock();
-	}
 
-	return 0;
+	return r;
 }
 
 static bool amdgpu_check_soft_reset(struct amdgpu_device *adev)
@@ -2430,25 +2421,37 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
 	uint32_t domain;
 	int r;
 
-       if (!bo->shadow)
-               return 0;
+	if (!bo->shadow)
+		return 0;
+
+	r = amdgpu_bo_reserve(bo, true);
+	if (r)
+		return r;
+	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
+	/* if bo has been evicted, then no need to recover */
+	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+		r = amdgpu_bo_validate(bo->shadow);
+		if (r) {
+			DRM_ERROR("bo validate failed!\n");
+			goto err;
+		}
 
-       r = amdgpu_bo_reserve(bo, false);
-       if (r)
-               return r;
-       domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
-       /* if bo has been evicted, then no need to recover */
-       if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
-               r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
+		r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem);
+		if (r) {
+			DRM_ERROR("%p bind failed\n", bo->shadow);
+			goto err;
+		}
+
+		r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
 						 NULL, fence, true);
-               if (r) {
-                       DRM_ERROR("recover page table failed!\n");
-                       goto err;
-               }
-       }
+		if (r) {
+			DRM_ERROR("recover page table failed!\n");
+			goto err;
+		}
+	}
 err:
-       amdgpu_bo_unreserve(bo);
-       return r;
+	amdgpu_bo_unreserve(bo);
+	return r;
 }
 
 /**
@@ -2520,6 +2523,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
 	ring = adev->mman.buffer_funcs_ring;
 	mutex_lock(&adev->shadow_list_lock);
 	list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
+		next = NULL;
 		amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
 		if (fence) {
 			r = dma_fence_wait(fence, false);
@@ -2593,7 +2597,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
-		if (!ring)
+		if (!ring || !ring->sched.thread)
 			continue;
 		kthread_park(ring->sched.thread);
 		amd_sched_hw_job_reset(&ring->sched);
@@ -2666,6 +2670,7 @@ retry:
 			DRM_INFO("recover vram bo from shadow\n");
 			mutex_lock(&adev->shadow_list_lock);
 			list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
+				next = NULL;
 				amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
 				if (fence) {
 					r = dma_fence_wait(fence, false);
@@ -2688,7 +2693,8 @@ retry:
 		}
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = adev->rings[i];
-			if (!ring)
+
+			if (!ring || !ring->sched.thread)
 				continue;
 
 			amd_sched_job_recovery(&ring->sched);
@@ -2697,7 +2703,7 @@ retry:
 	} else {
 		dev_err(adev->dev, "asic resume failed (%d).\n", r);
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-			if (adev->rings[i]) {
+			if (adev->rings[i] && adev->rings[i]->sched.thread) {
 				kthread_unpark(adev->rings[i]->sched.thread);
 			}
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 96926a221bd5..cdf2ab20166a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -123,7 +123,7 @@ static void amdgpu_unpin_work_func(struct work_struct *__work)
 	int r;
 
 	/* unpin of the old buffer */
-	r = amdgpu_bo_reserve(work->old_abo, false);
+	r = amdgpu_bo_reserve(work->old_abo, true);
 	if (likely(r == 0)) {
 		r = amdgpu_bo_unpin(work->old_abo);
 		if (unlikely(r != 0)) {
@@ -138,52 +138,11 @@ static void amdgpu_unpin_work_func(struct work_struct *__work)
 	kfree(work);
 }
 
-
-static void amdgpu_flip_work_cleanup(struct amdgpu_flip_work *work)
-{
-	int i;
-
-	amdgpu_bo_unref(&work->old_abo);
-	dma_fence_put(work->excl);
-	for (i = 0; i < work->shared_count; ++i)
-		dma_fence_put(work->shared[i]);
-	kfree(work->shared);
-	kfree(work);
-}
-
-static void amdgpu_flip_cleanup_unreserve(struct amdgpu_flip_work *work,
-					  struct amdgpu_bo *new_abo)
-{
-	amdgpu_bo_unreserve(new_abo);
-	amdgpu_flip_work_cleanup(work);
-}
-
-static void amdgpu_flip_cleanup_unpin(struct amdgpu_flip_work *work,
-				      struct amdgpu_bo *new_abo)
-{
-	if (unlikely(amdgpu_bo_unpin(new_abo) != 0))
-		DRM_ERROR("failed to unpin new abo in error path\n");
-	amdgpu_flip_cleanup_unreserve(work, new_abo);
-}
-
-void amdgpu_crtc_cleanup_flip_ctx(struct amdgpu_flip_work *work,
-				  struct amdgpu_bo *new_abo)
-{
-	if (unlikely(amdgpu_bo_reserve(new_abo, false) != 0)) {
-		DRM_ERROR("failed to reserve new abo in error path\n");
-		amdgpu_flip_work_cleanup(work);
-		return;
-	}
-	amdgpu_flip_cleanup_unpin(work, new_abo);
-}
-
-int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc,
-			     struct drm_framebuffer *fb,
-			     struct drm_pending_vblank_event *event,
-			     uint32_t page_flip_flags,
-			     uint32_t target,
-			     struct amdgpu_flip_work **work_p,
-			     struct amdgpu_bo **new_abo_p)
+int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
+				 struct drm_framebuffer *fb,
+				 struct drm_pending_vblank_event *event,
+				 uint32_t page_flip_flags, uint32_t target,
+				 struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
@@ -196,7 +155,7 @@ int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc,
 	unsigned long flags;
 	u64 tiling_flags;
 	u64 base;
-	int r;
+	int i, r;
 
 	work = kzalloc(sizeof *work, GFP_KERNEL);
 	if (work == NULL)
@@ -257,80 +216,41 @@ int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc,
 		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		r = -EBUSY;
 		goto pflip_cleanup;
-
 	}
-	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-
-	*work_p = work;
-	*new_abo_p = new_abo;
-
-	return 0;
-
-pflip_cleanup:
-	amdgpu_crtc_cleanup_flip_ctx(work, new_abo);
-	return r;
-
-unpin:
-	amdgpu_flip_cleanup_unpin(work, new_abo);
-	return r;
-
-unreserve:
-	amdgpu_flip_cleanup_unreserve(work, new_abo);
-	return r;
 
-cleanup:
-	amdgpu_flip_work_cleanup(work);
-	return r;
-
-}
-
-void amdgpu_crtc_submit_flip(struct drm_crtc *crtc,
-			     struct drm_framebuffer *fb,
-			     struct amdgpu_flip_work *work,
-			     struct amdgpu_bo *new_abo)
-{
-	unsigned long flags;
-	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
-	spin_lock_irqsave(&crtc->dev->event_lock, flags);
 	amdgpu_crtc->pflip_status = AMDGPU_FLIP_PENDING;
 	amdgpu_crtc->pflip_works = work;
 
+
+	DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n",
+					 amdgpu_crtc->crtc_id, amdgpu_crtc, work);
 	/* update crtc fb */
 	crtc->primary->fb = fb;
 	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-
-	DRM_DEBUG_DRIVER(
-			"crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n",
-			amdgpu_crtc->crtc_id, amdgpu_crtc, work);
-
 	amdgpu_flip_work_func(&work->flip_work.work);
-}
-
-int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_pending_vblank_event *event,
-				 uint32_t page_flip_flags,
-				 uint32_t target,
-				 struct drm_modeset_acquire_ctx *ctx)
-{
-	struct amdgpu_bo *new_abo;
-	struct amdgpu_flip_work *work;
-	int r;
+	return 0;
 
-	r = amdgpu_crtc_prepare_flip(crtc,
-				     fb,
-				     event,
-				     page_flip_flags,
-				     target,
-				     &work,
-				     &new_abo);
-	if (r)
-		return r;
+pflip_cleanup:
+	if (unlikely(amdgpu_bo_reserve(new_abo, false) != 0)) {
+		DRM_ERROR("failed to reserve new abo in error path\n");
+		goto cleanup;
+	}
+unpin:
+	if (unlikely(amdgpu_bo_unpin(new_abo) != 0)) {
+		DRM_ERROR("failed to unpin new abo in error path\n");
+	}
+unreserve:
+	amdgpu_bo_unreserve(new_abo);
 
-	amdgpu_crtc_submit_flip(crtc, fb, work, new_abo);
+cleanup:
+	amdgpu_bo_unref(&work->old_abo);
+	dma_fence_put(work->excl);
+	for (i = 0; i < work->shared_count; ++i)
+		dma_fence_put(work->shared[i]);
+	kfree(work->shared);
+	kfree(work);
 
-	return 0;
+	return r;
 }
 
 int amdgpu_crtc_set_config(struct drm_mode_set *set,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4e0f7d2d87f1..ab6b0d0febab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -63,9 +63,11 @@
  * - 3.11.0 - Add support for sensor query info (clocks, temp, etc).
  * - 3.12.0 - Add query for double offchip LDS buffers
  * - 3.13.0 - Add PRT support
+ * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality
+ * - 3.15.0 - Export more gpu info for gfx9
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	13
+#define KMS_DRIVER_MINOR	15
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
@@ -447,13 +449,16 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	/* Vega 10 */
 	{0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x6862, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x6863, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x6864, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x6867, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0, 0, 0}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index a48142d930c6..c0d8c6ff6380 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -112,7 +112,7 @@ static void amdgpufb_destroy_pinned_object(struct drm_gem_object *gobj)
 	struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
 	int ret;
 
-	ret = amdgpu_bo_reserve(abo, false);
+	ret = amdgpu_bo_reserve(abo, true);
 	if (likely(ret == 0)) {
 		amdgpu_bo_kunmap(abo);
 		amdgpu_bo_unpin(abo);
@@ -425,10 +425,15 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
 
 void amdgpu_fbdev_restore_mode(struct amdgpu_device *adev)
 {
-	struct amdgpu_fbdev *afbdev = adev->mode_info.rfbdev;
+	struct amdgpu_fbdev *afbdev;
 	struct drm_fb_helper *fb_helper;
 	int ret;
 
+	if (!adev)
+		return;
+
+	afbdev = adev->mode_info.rfbdev;
+
 	if (!afbdev)
 		return;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 2ee327d69775..902e6015abca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -186,7 +186,7 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
 	if (adev->gart.robj == NULL) {
 		return;
 	}
-	r = amdgpu_bo_reserve(adev->gart.robj, false);
+	r = amdgpu_bo_reserve(adev->gart.robj, true);
 	if (likely(r == 0)) {
 		amdgpu_bo_kunmap(adev->gart.robj);
 		amdgpu_bo_unpin(adev->gart.robj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 03a9c5cad222..94cb91cf93eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -139,6 +139,35 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
 	return 0;
 }
 
+static int amdgpu_gem_vm_check(void *param, struct amdgpu_bo *bo)
+{
+	/* if anything is swapped out don't swap it in here,
+	   just abort and wait for the next CS */
+	if (!amdgpu_bo_gpu_accessible(bo))
+		return -ERESTARTSYS;
+
+	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
+		return -ERESTARTSYS;
+
+	return 0;
+}
+
+static bool amdgpu_gem_vm_ready(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm,
+				struct list_head *list)
+{
+	struct ttm_validate_buffer *entry;
+
+	list_for_each_entry(entry, list, head) {
+		struct amdgpu_bo *bo =
+			container_of(entry->bo, struct amdgpu_bo, tbo);
+		if (amdgpu_gem_vm_check(NULL, bo))
+			return false;
+	}
+
+	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_vm_check, NULL);
+}
+
 void amdgpu_gem_object_close(struct drm_gem_object *obj,
 			     struct drm_file *file_priv)
 {
@@ -148,15 +177,13 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	struct amdgpu_vm *vm = &fpriv->vm;
 
 	struct amdgpu_bo_list_entry vm_pd;
-	struct list_head list, duplicates;
+	struct list_head list;
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
 	struct amdgpu_bo_va *bo_va;
-	struct dma_fence *fence = NULL;
 	int r;
 
 	INIT_LIST_HEAD(&list);
-	INIT_LIST_HEAD(&duplicates);
 
 	tv.bo = &bo->tbo;
 	tv.shared = true;
@@ -164,16 +191,18 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 
 	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
 	if (r) {
 		dev_err(adev->dev, "leaking bo va because "
 			"we fail to reserve bo (%d)\n", r);
 		return;
 	}
 	bo_va = amdgpu_vm_bo_find(vm, bo);
-	if (bo_va) {
-		if (--bo_va->ref_count == 0) {
-			amdgpu_vm_bo_rmv(adev, bo_va);
+	if (bo_va && --bo_va->ref_count == 0) {
+		amdgpu_vm_bo_rmv(adev, bo_va);
+
+		if (amdgpu_gem_vm_ready(adev, vm, &list)) {
+			struct dma_fence *fence = NULL;
 
 			r = amdgpu_vm_clear_freed(adev, vm, &fence);
 			if (unlikely(r)) {
@@ -502,19 +531,6 @@ out:
 	return r;
 }
 
-static int amdgpu_gem_va_check(void *param, struct amdgpu_bo *bo)
-{
-	/* if anything is swapped out don't swap it in here,
-	   just abort and wait for the next CS */
-	if (!amdgpu_bo_gpu_accessible(bo))
-		return -ERESTARTSYS;
-
-	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
-		return -ERESTARTSYS;
-
-	return 0;
-}
-
 /**
  * amdgpu_gem_va_update_vm -update the bo_va in its VM
  *
@@ -533,19 +549,9 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 				    struct list_head *list,
 				    uint32_t operation)
 {
-	struct ttm_validate_buffer *entry;
 	int r = -ERESTARTSYS;
 
-	list_for_each_entry(entry, list, head) {
-		struct amdgpu_bo *bo =
-			container_of(entry->bo, struct amdgpu_bo, tbo);
-		if (amdgpu_gem_va_check(NULL, bo))
-			goto error;
-	}
-
-	r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check,
-				      NULL);
-	if (r)
+	if (!amdgpu_gem_vm_ready(adev, vm, list))
 		goto error;
 
 	r = amdgpu_vm_update_directories(adev, vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 0335c2f331e9..f7d22c44034d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -134,6 +134,15 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
 	return r;
 }
 
+void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager *man)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
+	struct amdgpu_gtt_mgr *mgr = man->priv;
+
+	seq_printf(m, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n",
+		   man->size, mgr->available, (u64)atomic64_read(&adev->gtt_usage) >> 20);
+
+}
 /**
  * amdgpu_gtt_mgr_new - allocate a new node
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index aab857d89d03..6e4ae0d983c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -160,6 +160,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
 		return r;
 	}
+	if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync)
+		amdgpu_ring_emit_pipeline_sync(ring);
 
 	if (vm) {
 		r = amdgpu_vm_flush(ring, job);
@@ -217,7 +219,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		if (job && job->vm_id)
-			amdgpu_vm_reset_id(adev, job->vm_id);
+			amdgpu_vm_reset_id(adev, ring->funcs->vmhub,
+					   job->vm_id);
 		amdgpu_ring_undo(ring);
 		return r;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 86a12424c162..7570f2439a11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -57,6 +57,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 	(*job)->vm = vm;
 	(*job)->ibs = (void *)&(*job)[1];
 	(*job)->num_ibs = num_ibs;
+	(*job)->need_pipeline_sync = false;
 
 	amdgpu_sync_create(&(*job)->sync);
 
@@ -139,7 +140,7 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 
 	struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync);
 
-	if (fence == NULL && vm && !job->vm_id) {
+	while (fence == NULL && vm && !job->vm_id) {
 		struct amdgpu_ring *ring = job->ring;
 		int r;
 
@@ -152,6 +153,9 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 		fence = amdgpu_sync_get_fence(&job->sync);
 	}
 
+	if (amd_sched_dependency_optimized(fence, sched_job->s_entity))
+		job->need_pipeline_sync = true;
+
 	return fence;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 832be632478f..96c341670782 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -545,11 +545,22 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			adev->gfx.config.double_offchip_lds_buf;
 
 		if (amdgpu_ngg) {
-			dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[PRIM].gpu_addr;
-			dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[POS].gpu_addr;
-			dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[CNTL].gpu_addr;
-			dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[PARAM].gpu_addr;
+			dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PRIM].gpu_addr;
+			dev_info.prim_buf_size = adev->gfx.ngg.buf[NGG_PRIM].size;
+			dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[NGG_POS].gpu_addr;
+			dev_info.pos_buf_size = adev->gfx.ngg.buf[NGG_POS].size;
+			dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[NGG_CNTL].gpu_addr;
+			dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[NGG_CNTL].size;
+			dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PARAM].gpu_addr;
+			dev_info.param_buf_size = adev->gfx.ngg.buf[NGG_PARAM].size;
 		}
+		dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size;
+		dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs;
+		dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
+		dev_info.num_tcc_blocks = adev->gfx.config.max_texture_channel_caches;
+		dev_info.gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth;
+		dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth;
+		dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads;
 
 		return copy_to_user(out, &dev_info,
 				    min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0;
@@ -810,7 +821,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 
 	if (amdgpu_sriov_vf(adev)) {
 		/* TODO: how to handle reserve failure */
-		BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false));
+		BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
 		amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va);
 		fpriv->vm.csa_bo_va = NULL;
 		amdgpu_bo_unreserve(adev->virt.csa_obj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index db8f8dda209c..dbd10618ec20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -597,21 +597,6 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
 				 struct drm_pending_vblank_event *event,
 				 uint32_t page_flip_flags, uint32_t target,
 				 struct drm_modeset_acquire_ctx *ctx);
-void amdgpu_crtc_cleanup_flip_ctx(struct amdgpu_flip_work *work,
-				  struct amdgpu_bo *new_abo);
-int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc,
-			     struct drm_framebuffer *fb,
-			     struct drm_pending_vblank_event *event,
-			     uint32_t page_flip_flags,
-			     uint32_t target,
-			     struct amdgpu_flip_work **work,
-			     struct amdgpu_bo **new_abo);
-
-void amdgpu_crtc_submit_flip(struct drm_crtc *crtc,
-			     struct drm_framebuffer *fb,
-			     struct amdgpu_flip_work *work,
-			     struct amdgpu_bo *new_abo);
-
 extern const struct drm_mode_config_funcs amdgpu_mode_funcs;
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index cb89fff863c0..365883d7948d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -295,7 +295,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
 	if (*bo == NULL)
 		return;
 
-	if (likely(amdgpu_bo_reserve(*bo, false) == 0)) {
+	if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
 		if (cpu_addr)
 			amdgpu_bo_kunmap(*bo);
 
@@ -543,6 +543,27 @@ err:
 	return r;
 }
 
+int amdgpu_bo_validate(struct amdgpu_bo *bo)
+{
+	uint32_t domain;
+	int r;
+
+	if (bo->pin_count)
+		return 0;
+
+	domain = bo->prefered_domains;
+
+retry:
+	amdgpu_ttm_placement_from_domain(bo, domain);
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
+		domain = bo->allowed_domains;
+		goto retry;
+	}
+
+	return r;
+}
+
 int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
 				  struct amdgpu_ring *ring,
 				  struct amdgpu_bo *bo,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 15a723adca76..382485115b06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -175,6 +175,7 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
 			       struct amdgpu_bo *bo,
 			       struct reservation_object *resv,
 			       struct dma_fence **fence, bool direct);
+int amdgpu_bo_validate(struct amdgpu_bo *bo);
 int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
 				  struct amdgpu_ring *ring,
 				  struct amdgpu_bo *bo,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 990fde2cf4fd..7df503aedb69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -867,8 +867,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
 
 	pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
 
-	/* never 0 (full-speed), fuse or smc-controlled always */
-	return sprintf(buf, "%i\n", pwm_mode == FDO_PWM_MODE_STATIC ? 1 : 2);
+	return sprintf(buf, "%i\n", pwm_mode);
 }
 
 static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
@@ -887,14 +886,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
 	if (err)
 		return err;
 
-	switch (value) {
-	case 1: /* manual, percent-based */
-		amdgpu_dpm_set_fan_control_mode(adev, FDO_PWM_MODE_STATIC);
-		break;
-	default: /* disable */
-		amdgpu_dpm_set_fan_control_mode(adev, 0);
-		break;
-	}
+	amdgpu_dpm_set_fan_control_mode(adev, value);
 
 	return count;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 3826d5aea0a6..6bdc866570ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -113,7 +113,7 @@ void amdgpu_gem_prime_unpin(struct drm_gem_object *obj)
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
 	int ret = 0;
 
-	ret = amdgpu_bo_reserve(bo, false);
+	ret = amdgpu_bo_reserve(bo, true);
 	if (unlikely(ret != 0))
 		return;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index ed6e5799016e..ac5e92e5d59d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -55,6 +55,8 @@ static int psp_sw_init(void *handle)
 		psp->bootloader_load_sos = psp_v3_1_bootloader_load_sos;
 		psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf;
 		psp->ring_init = psp_v3_1_ring_init;
+		psp->ring_create = psp_v3_1_ring_create;
+		psp->ring_destroy = psp_v3_1_ring_destroy;
 		psp->cmd_submit = psp_v3_1_cmd_submit;
 		psp->compare_sram_data = psp_v3_1_compare_sram_data;
 		psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
@@ -152,11 +154,6 @@ static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd,
 static int psp_tmr_init(struct psp_context *psp)
 {
 	int ret;
-	struct psp_gfx_cmd_resp *cmd;
-
-	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
-	if (!cmd)
-		return -ENOMEM;
 
 	/*
 	 * Allocate 3M memory aligned to 1M from Frame Buffer (local
@@ -168,22 +165,30 @@ static int psp_tmr_init(struct psp_context *psp)
 	ret = amdgpu_bo_create_kernel(psp->adev, 0x300000, 0x100000,
 				      AMDGPU_GEM_DOMAIN_VRAM,
 				      &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
-	if (ret)
-		goto failed;
+
+	return ret;
+}
+
+static int psp_tmr_load(struct psp_context *psp)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd;
+
+	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
 
 	psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, 0x300000);
 
 	ret = psp_cmd_submit_buf(psp, NULL, cmd,
 				 psp->fence_buf_mc_addr, 1);
 	if (ret)
-		goto failed_mem;
+		goto failed;
 
 	kfree(cmd);
 
 	return 0;
 
-failed_mem:
-	amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
 failed:
 	kfree(cmd);
 	return ret;
@@ -203,104 +208,78 @@ static void psp_prep_asd_cmd_buf(struct psp_gfx_cmd_resp *cmd,
 	cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
 }
 
-static int psp_asd_load(struct psp_context *psp)
+static int psp_asd_init(struct psp_context *psp)
 {
 	int ret;
-	struct amdgpu_bo *asd_bo, *asd_shared_bo;
-	uint64_t asd_mc_addr, asd_shared_mc_addr;
-	void *asd_buf, *asd_shared_buf;
-	struct psp_gfx_cmd_resp *cmd;
-
-	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
-	if (!cmd)
-		return -ENOMEM;
 
 	/*
 	 * Allocate 16k memory aligned to 4k from Frame Buffer (local
 	 * physical) for shared ASD <-> Driver
 	 */
-	ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &asd_shared_bo, &asd_shared_mc_addr, &asd_buf);
-	if (ret)
-		goto failed;
+	ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE,
+				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
+				      &psp->asd_shared_bo,
+				      &psp->asd_shared_mc_addr,
+				      &psp->asd_shared_buf);
 
-	/*
-	 * Allocate 256k memory aligned to 4k from Frame Buffer (local
-	 * physical) for ASD firmware
-	 */
-	ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_BIN_SIZE, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &asd_bo, &asd_mc_addr, &asd_buf);
-	if (ret)
-		goto failed_mem;
+	return ret;
+}
+
+static int psp_asd_load(struct psp_context *psp)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd;
+
+	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
 
-	memcpy(asd_buf, psp->asd_start_addr, psp->asd_ucode_size);
+	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+	memcpy(psp->fw_pri_buf, psp->asd_start_addr, psp->asd_ucode_size);
 
-	psp_prep_asd_cmd_buf(cmd, asd_mc_addr, asd_shared_mc_addr,
+	psp_prep_asd_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->asd_shared_mc_addr,
 			     psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE);
 
 	ret = psp_cmd_submit_buf(psp, NULL, cmd,
 				 psp->fence_buf_mc_addr, 2);
-	if (ret)
-		goto failed_mem1;
 
-	amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf);
-	amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf);
 	kfree(cmd);
 
-	return 0;
-
-failed_mem1:
-	amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf);
-failed_mem:
-	amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf);
-failed:
-	kfree(cmd);
 	return ret;
 }
 
-static int psp_load_fw(struct amdgpu_device *adev)
+static int psp_hw_start(struct psp_context *psp)
 {
 	int ret;
-	struct psp_gfx_cmd_resp *cmd;
-	int i;
-	struct amdgpu_firmware_info *ucode;
-	struct psp_context *psp = &adev->psp;
-
-	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
-	if (!cmd)
-		return -ENOMEM;
 
 	ret = psp_bootloader_load_sysdrv(psp);
 	if (ret)
-		goto failed;
+		return ret;
 
 	ret = psp_bootloader_load_sos(psp);
 	if (ret)
-		goto failed;
-
-	ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
-	if (ret)
-		goto failed;
+		return ret;
 
-	ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &psp->fence_buf_bo,
-				      &psp->fence_buf_mc_addr,
-				      &psp->fence_buf);
+	ret = psp_ring_create(psp, PSP_RING_TYPE__KM);
 	if (ret)
-		goto failed;
-
-	memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
+		return ret;
 
-	ret = psp_tmr_init(psp);
+	ret = psp_tmr_load(psp);
 	if (ret)
-		goto failed_mem;
+		return ret;
 
 	ret = psp_asd_load(psp);
 	if (ret)
-		goto failed_mem;
+		return ret;
+
+	return 0;
+}
+
+static int psp_np_fw_load(struct psp_context *psp)
+{
+	int i, ret;
+	struct amdgpu_firmware_info *ucode;
+	struct amdgpu_device* adev = psp->adev;
 
 	for (i = 0; i < adev->firmware.max_ucodes; i++) {
 		ucode = &adev->firmware.ucode[i];
@@ -310,15 +289,21 @@ static int psp_load_fw(struct amdgpu_device *adev)
 		if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
 		    psp_smu_reload_quirk(psp))
 			continue;
+		if (amdgpu_sriov_vf(adev) &&
+		   (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0
+		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1
+		    || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G))
+			/*skip ucode loading in SRIOV VF */
+			continue;
 
-		ret = psp_prep_cmd_buf(ucode, cmd);
+		ret = psp_prep_cmd_buf(ucode, psp->cmd);
 		if (ret)
-			goto failed_mem;
+			return ret;
 
-		ret = psp_cmd_submit_buf(psp, ucode, cmd,
+		ret = psp_cmd_submit_buf(psp, ucode, psp->cmd,
 					 psp->fence_buf_mc_addr, i + 3);
 		if (ret)
-			goto failed_mem;
+			return ret;
 
 #if 0
 		/* check if firmware loaded sucessfully */
@@ -327,8 +312,59 @@ static int psp_load_fw(struct amdgpu_device *adev)
 #endif
 	}
 
-	amdgpu_bo_free_kernel(&psp->fence_buf_bo,
-			      &psp->fence_buf_mc_addr, &psp->fence_buf);
+	return 0;
+}
+
+static int psp_load_fw(struct amdgpu_device *adev)
+{
+	int ret;
+	struct psp_context *psp = &adev->psp;
+	struct psp_gfx_cmd_resp *cmd;
+
+	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	psp->cmd = cmd;
+
+	ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &psp->fw_pri_bo,
+				      &psp->fw_pri_mc_addr,
+				      &psp->fw_pri_buf);
+	if (ret)
+		goto failed;
+
+	ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &psp->fence_buf_bo,
+				      &psp->fence_buf_mc_addr,
+				      &psp->fence_buf);
+	if (ret)
+		goto failed_mem1;
+
+	memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
+
+	ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
+	if (ret)
+		goto failed_mem1;
+
+	ret = psp_tmr_init(psp);
+	if (ret)
+		goto failed_mem;
+
+	ret = psp_asd_init(psp);
+	if (ret)
+		goto failed_mem;
+
+	ret = psp_hw_start(psp);
+	if (ret)
+		goto failed_mem;
+
+	ret = psp_np_fw_load(psp);
+	if (ret)
+		goto failed_mem;
+
 	kfree(cmd);
 
 	return 0;
@@ -336,6 +372,9 @@ static int psp_load_fw(struct amdgpu_device *adev)
 failed_mem:
 	amdgpu_bo_free_kernel(&psp->fence_buf_bo,
 			      &psp->fence_buf_mc_addr, &psp->fence_buf);
+failed_mem1:
+	amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+			      &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
 failed:
 	kfree(cmd);
 	return ret;
@@ -379,12 +418,24 @@ static int psp_hw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct psp_context *psp = &adev->psp;
 
-	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
-		amdgpu_ucode_fini_bo(adev);
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+		return 0;
+
+	amdgpu_ucode_fini_bo(adev);
+
+	psp_ring_destroy(psp, PSP_RING_TYPE__KM);
 
 	if (psp->tmr_buf)
 		amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
 
+	if (psp->fw_pri_buf)
+		amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+				      &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
+
+	if (psp->fence_buf_bo)
+		amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+				      &psp->fence_buf_mc_addr, &psp->fence_buf);
+
 	return 0;
 }
 
@@ -397,18 +448,30 @@ static int psp_resume(void *handle)
 {
 	int ret;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct psp_context *psp = &adev->psp;
 
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 		return 0;
 
+	DRM_INFO("PSP is resuming...\n");
+
 	mutex_lock(&adev->firmware.mutex);
 
-	ret = psp_load_fw(adev);
+	ret = psp_hw_start(psp);
 	if (ret)
-		DRM_ERROR("PSP resume failed\n");
+		goto failed;
+
+	ret = psp_np_fw_load(psp);
+	if (ret)
+		goto failed;
 
 	mutex_unlock(&adev->firmware.mutex);
 
+	return 0;
+
+failed:
+	DRM_ERROR("PSP resume failed\n");
+	mutex_unlock(&adev->firmware.mutex);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index e9f35e025b59..0301e4e0b297 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -30,8 +30,8 @@
 
 #define PSP_FENCE_BUFFER_SIZE	0x1000
 #define PSP_CMD_BUFFER_SIZE	0x1000
-#define PSP_ASD_BIN_SIZE	0x40000
 #define PSP_ASD_SHARED_MEM_SIZE	0x4000
+#define PSP_1_MEG		0x100000
 
 enum psp_ring_type
 {
@@ -57,6 +57,7 @@ struct psp_context
 {
 	struct amdgpu_device            *adev;
 	struct psp_ring                 km_ring;
+	struct psp_gfx_cmd_resp		*cmd;
 
 	int (*init_microcode)(struct psp_context *psp);
 	int (*bootloader_load_sysdrv)(struct psp_context *psp);
@@ -64,6 +65,9 @@ struct psp_context
 	int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode,
 			    struct psp_gfx_cmd_resp *cmd);
 	int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
+	int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type);
+	int (*ring_destroy)(struct psp_context *psp,
+			    enum psp_ring_type ring_type);
 	int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode,
 			  uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index);
 	bool (*compare_sram_data)(struct psp_context *psp,
@@ -71,6 +75,11 @@ struct psp_context
 				  enum AMDGPU_UCODE_ID ucode_type);
 	bool (*smu_reload_quirk)(struct psp_context *psp);
 
+	/* fence buffer */
+	struct amdgpu_bo 		*fw_pri_bo;
+	uint64_t 			fw_pri_mc_addr;
+	void				*fw_pri_buf;
+
 	/* sos firmware */
 	const struct firmware		*sos_fw;
 	uint32_t			sos_fw_version;
@@ -85,12 +94,15 @@ struct psp_context
 	uint64_t 			tmr_mc_addr;
 	void				*tmr_buf;
 
-	/* asd firmware */
+	/* asd firmware and buffer */
 	const struct firmware		*asd_fw;
 	uint32_t			asd_fw_version;
 	uint32_t			asd_feature_version;
 	uint32_t			asd_ucode_size;
 	uint8_t				*asd_start_addr;
+	struct amdgpu_bo 		*asd_shared_bo;
+	uint64_t 			asd_shared_mc_addr;
+	void				*asd_shared_buf;
 
 	/* fence buffer */
 	struct amdgpu_bo 		*fence_buf_bo;
@@ -105,6 +117,8 @@ struct amdgpu_psp_funcs {
 
 #define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type))
 #define psp_ring_init(psp, type) (psp)->ring_init((psp), (type))
+#define psp_ring_create(psp, type) (psp)->ring_create((psp), (type))
+#define psp_ring_destroy(psp, type) ((psp)->ring_destroy((psp), (type)))
 #define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \
 		(psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index))
 #define psp_compare_sram_data(psp, ucode, type) \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 63e56398ca9a..944443c5b90a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -99,6 +99,7 @@ struct amdgpu_ring_funcs {
 	uint32_t		align_mask;
 	u32			nop;
 	bool			support_64bit_ptrs;
+	unsigned		vmhub;
 
 	/* ring read/write ptr handling */
 	u64 (*get_rptr)(struct amdgpu_ring *ring);
@@ -178,6 +179,7 @@ struct amdgpu_ring {
 	unsigned		cond_exe_offs;
 	u64			cond_exe_gpu_addr;
 	volatile u32		*cond_exe_cpu_addr;
+	unsigned		vm_inv_eng;
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry *ent;
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index de9f919ae336..5ca75a456ad2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -130,7 +130,7 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
 		return -EINVAL;
 	}
 
-	r = amdgpu_bo_reserve(sa_manager->bo, false);
+	r = amdgpu_bo_reserve(sa_manager->bo, true);
 	if (!r) {
 		amdgpu_bo_kunmap(sa_manager->bo);
 		amdgpu_bo_unpin(sa_manager->bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index ee9d0f346d75..8601904e670a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -190,26 +190,29 @@ TRACE_EVENT(amdgpu_sched_run_job,
 
 
 TRACE_EVENT(amdgpu_vm_grab_id,
-	    TP_PROTO(struct amdgpu_vm *vm, int ring, struct amdgpu_job *job),
+	    TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+		     struct amdgpu_job *job),
 	    TP_ARGS(vm, ring, job),
 	    TP_STRUCT__entry(
 			     __field(struct amdgpu_vm *, vm)
 			     __field(u32, ring)
-			     __field(u32, vmid)
+			     __field(u32, vm_id)
+			     __field(u32, vm_hub)
 			     __field(u64, pd_addr)
 			     __field(u32, needs_flush)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->vm = vm;
-			   __entry->ring = ring;
-			   __entry->vmid = job->vm_id;
+			   __entry->ring = ring->idx;
+			   __entry->vm_id = job->vm_id;
+			   __entry->vm_hub = ring->funcs->vmhub,
 			   __entry->pd_addr = job->vm_pd_addr;
 			   __entry->needs_flush = job->vm_needs_flush;
 			   ),
-	    TP_printk("vm=%p, ring=%u, id=%u, pd_addr=%010Lx needs_flush=%u",
-		      __entry->vm, __entry->ring, __entry->vmid,
-		      __entry->pd_addr, __entry->needs_flush)
+	    TP_printk("vm=%p, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
+		      __entry->vm, __entry->ring, __entry->vm_id,
+		      __entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
 );
 
 TRACE_EVENT(amdgpu_vm_bo_map,
@@ -331,21 +334,25 @@ TRACE_EVENT(amdgpu_vm_copy_ptes,
 );
 
 TRACE_EVENT(amdgpu_vm_flush,
-	    TP_PROTO(uint64_t pd_addr, unsigned ring, unsigned id),
-	    TP_ARGS(pd_addr, ring, id),
+	    TP_PROTO(struct amdgpu_ring *ring, unsigned vm_id,
+		     uint64_t pd_addr),
+	    TP_ARGS(ring, vm_id, pd_addr),
 	    TP_STRUCT__entry(
-			     __field(u64, pd_addr)
 			     __field(u32, ring)
-			     __field(u32, id)
+			     __field(u32, vm_id)
+			     __field(u32, vm_hub)
+			     __field(u64, pd_addr)
 			     ),
 
 	    TP_fast_assign(
+			   __entry->ring = ring->idx;
+			   __entry->vm_id = vm_id;
+			   __entry->vm_hub = ring->funcs->vmhub;
 			   __entry->pd_addr = pd_addr;
-			   __entry->ring = ring;
-			   __entry->id = id;
 			   ),
-	    TP_printk("ring=%u, id=%u, pd_addr=%010Lx",
-		      __entry->ring, __entry->id, __entry->pd_addr)
+	    TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx",
+		      __entry->ring, __entry->vm_id,
+		      __entry->vm_hub,__entry->pd_addr)
 );
 
 TRACE_EVENT(amdgpu_bo_list_set,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 35d53a0d9ba6..5db0230e45c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -203,7 +203,9 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 	abo = container_of(bo, struct amdgpu_bo, tbo);
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
-		if (adev->mman.buffer_funcs_ring->ready == false) {
+		if (adev->mman.buffer_funcs &&
+		    adev->mman.buffer_funcs_ring &&
+		    adev->mman.buffer_funcs_ring->ready == false) {
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
 		} else {
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
@@ -763,7 +765,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
 {
 	struct amdgpu_ttm_tt *gtt, *tmp;
 	struct ttm_mem_reg bo_mem;
-	uint32_t flags;
+	uint64_t flags;
 	int r;
 
 	bo_mem.mem_type = TTM_PL_TT;
@@ -1038,11 +1040,17 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 					    const struct ttm_place *place)
 {
-	if (bo->mem.mem_type == TTM_PL_VRAM &&
-	    bo->mem.start == AMDGPU_BO_INVALID_OFFSET) {
-		unsigned long num_pages = bo->mem.num_pages;
-		struct drm_mm_node *node = bo->mem.mm_node;
+	unsigned long num_pages = bo->mem.num_pages;
+	struct drm_mm_node *node = bo->mem.mm_node;
+
+	if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
+		return ttm_bo_eviction_valuable(bo, place);
+
+	switch (bo->mem.mem_type) {
+	case TTM_PL_TT:
+		return true;
 
+	case TTM_PL_VRAM:
 		/* Check each drm MM node individually */
 		while (num_pages) {
 			if (place->fpfn < (node->start + node->size) &&
@@ -1052,8 +1060,10 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 			num_pages -= node->size;
 			++node;
 		}
+		break;
 
-		return false;
+	default:
+		break;
 	}
 
 	return ttm_bo_eviction_valuable(bo, place);
@@ -1188,7 +1198,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 		return;
 	amdgpu_ttm_debugfs_fini(adev);
 	if (adev->stollen_vga_memory) {
-		r = amdgpu_bo_reserve(adev->stollen_vga_memory, false);
+		r = amdgpu_bo_reserve(adev->stollen_vga_memory, true);
 		if (r == 0) {
 			amdgpu_bo_unpin(adev->stollen_vga_memory);
 			amdgpu_bo_unreserve(adev->stollen_vga_memory);
@@ -1401,6 +1411,8 @@ error_free:
 
 #if defined(CONFIG_DEBUG_FS)
 
+extern void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager
+				 *man);
 static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
@@ -1414,11 +1426,17 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
 	spin_lock(&glob->lru_lock);
 	drm_mm_print(mm, &p);
 	spin_unlock(&glob->lru_lock);
-	if (ttm_pl == TTM_PL_VRAM)
+	switch (ttm_pl) {
+	case TTM_PL_VRAM:
 		seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
 			   adev->mman.bdev.man[ttm_pl].size,
 			   (u64)atomic64_read(&adev->vram_usage) >> 20,
 			   (u64)atomic64_read(&adev->vram_vis_usage) >> 20);
+		break;
+	case TTM_PL_TT:
+		amdgpu_gtt_mgr_print(m, &adev->mman.bdev.man[TTM_PL_TT]);
+		break;
+	}
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index a1891c93cdbf..dfd1c98efa7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -382,10 +382,14 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
 	 * if SMU loaded firmware, it needn't add SMC, UVD, and VCE
 	 * ucode info here
 	 */
-	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
-		adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 4;
-	else
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+		if (amdgpu_sriov_vf(adev))
+			adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 3;
+		else
+			adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 4;
+	} else {
 		adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM;
+	}
 
 	for (i = 0; i < adev->firmware.max_ucodes; i++) {
 		ucode = &adev->firmware.ucode[i];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index c853400805d1..735c38d7db0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -955,11 +955,11 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
 	struct amdgpu_device *adev = ring->adev;
 	uint32_t rptr = amdgpu_ring_get_rptr(ring);
 	unsigned i;
-	int r;
+	int r, timeout = adev->usec_timeout;
 
-	/* TODO: remove it if VCE can work for sriov */
+	/* workaround VCE ring test slow issue for sriov*/
 	if (amdgpu_sriov_vf(adev))
-		return 0;
+		timeout *= 10;
 
 	r = amdgpu_ring_alloc(ring, 16);
 	if (r) {
@@ -970,13 +970,13 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, VCE_CMD_END);
 	amdgpu_ring_commit(ring);
 
-	for (i = 0; i < adev->usec_timeout; i++) {
+	for (i = 0; i < timeout; i++) {
 		if (amdgpu_ring_get_rptr(ring) != rptr)
 			break;
 		DRM_UDELAY(1);
 	}
 
-	if (i < adev->usec_timeout) {
+	if (i < timeout) {
 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
 			 ring->idx, i);
 	} else {
@@ -999,10 +999,6 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	struct dma_fence *fence = NULL;
 	long r;
 
-	/* TODO: remove it if VCE can work for sriov */
-	if (amdgpu_sriov_vf(ring->adev))
-		return 0;
-
 	/* skip vce ring1/2 ib test for now, since it's not reliable */
 	if (ring != &ring->adev->vce.ring[0])
 		return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index ba8b8ae6234f..6bf5cea294f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -225,3 +225,49 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev)
 
 	return 0;
 }
+
+/**
+ * amdgpu_virt_alloc_mm_table() - alloc memory for mm table
+ * @amdgpu:	amdgpu device.
+ * MM table is used by UVD and VCE for its initialization
+ * Return: Zero if allocate success.
+ */
+int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (!amdgpu_sriov_vf(adev) || adev->virt.mm_table.gpu_addr)
+		return 0;
+
+	r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_VRAM,
+				    &adev->virt.mm_table.bo,
+				    &adev->virt.mm_table.gpu_addr,
+				    (void *)&adev->virt.mm_table.cpu_addr);
+	if (r) {
+		DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
+		return r;
+	}
+
+	memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
+	DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
+		 adev->virt.mm_table.gpu_addr,
+		 adev->virt.mm_table.cpu_addr);
+	return 0;
+}
+
+/**
+ * amdgpu_virt_free_mm_table() - free mm table memory
+ * @amdgpu:	amdgpu device.
+ * Free MM table memory
+ */
+void amdgpu_virt_free_mm_table(struct amdgpu_device *adev)
+{
+	if (!amdgpu_sriov_vf(adev) || !adev->virt.mm_table.gpu_addr)
+		return;
+
+	amdgpu_bo_free_kernel(&adev->virt.mm_table.bo,
+			      &adev->virt.mm_table.gpu_addr,
+			      (void *)&adev->virt.mm_table.cpu_addr);
+	adev->virt.mm_table.gpu_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 1ee0a190b33b..a8ed162cc0bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -98,5 +98,7 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
 int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary);
+int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
+void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7ed5302b511a..8ecf82c5fe74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -406,6 +406,8 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_job *job)
 {
 	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->funcs->vmhub;
+	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 	uint64_t fence_context = adev->fence_context + ring->idx;
 	struct dma_fence *updates = sync->last_vm_update;
 	struct amdgpu_vm_id *id, *idle;
@@ -413,16 +415,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	unsigned i;
 	int r = 0;
 
-	fences = kmalloc_array(sizeof(void *), adev->vm_manager.num_ids,
-			       GFP_KERNEL);
+	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
 	if (!fences)
 		return -ENOMEM;
 
-	mutex_lock(&adev->vm_manager.lock);
+	mutex_lock(&id_mgr->lock);
 
 	/* Check if we have an idle VMID */
 	i = 0;
-	list_for_each_entry(idle, &adev->vm_manager.ids_lru, list) {
+	list_for_each_entry(idle, &id_mgr->ids_lru, list) {
 		fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
 		if (!fences[i])
 			break;
@@ -430,7 +431,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	}
 
 	/* If we can't find a idle VMID to use, wait till one becomes available */
-	if (&idle->list == &adev->vm_manager.ids_lru) {
+	if (&idle->list == &id_mgr->ids_lru) {
 		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
 		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
 		struct dma_fence_array *array;
@@ -455,25 +456,19 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (r)
 			goto error;
 
-		mutex_unlock(&adev->vm_manager.lock);
+		mutex_unlock(&id_mgr->lock);
 		return 0;
 
 	}
 	kfree(fences);
 
-	job->vm_needs_flush = true;
+	job->vm_needs_flush = false;
 	/* Check if we can use a VMID already assigned to this VM */
-	i = ring->idx;
-	do {
+	list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {
 		struct dma_fence *flushed;
-
-		id = vm->ids[i++];
-		if (i == AMDGPU_MAX_RINGS)
-			i = 0;
+		bool needs_flush = false;
 
 		/* Check all the prerequisites to using this VMID */
-		if (!id)
-			continue;
 		if (amdgpu_vm_had_gpu_reset(adev, id))
 			continue;
 
@@ -483,16 +478,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (job->vm_pd_addr != id->pd_gpu_addr)
 			continue;
 
-		if (!id->last_flush)
-			continue;
-
-		if (id->last_flush->context != fence_context &&
-		    !dma_fence_is_signaled(id->last_flush))
-			continue;
+		if (!id->last_flush ||
+		    (id->last_flush->context != fence_context &&
+		     !dma_fence_is_signaled(id->last_flush)))
+			needs_flush = true;
 
 		flushed  = id->flushed_updates;
-		if (updates &&
-		    (!flushed || dma_fence_is_later(updates, flushed)))
+		if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
+			needs_flush = true;
+
+		/* Concurrent flushes are only possible starting with Vega10 */
+		if (adev->asic_type < CHIP_VEGA10 && needs_flush)
 			continue;
 
 		/* Good we can use this VMID. Remember this submission as
@@ -502,17 +498,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (r)
 			goto error;
 
-		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
-		vm->ids[ring->idx] = id;
-
-		job->vm_id = id - adev->vm_manager.ids;
-		job->vm_needs_flush = false;
-		trace_amdgpu_vm_grab_id(vm, ring->idx, job);
+		if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
+			dma_fence_put(id->flushed_updates);
+			id->flushed_updates = dma_fence_get(updates);
+		}
 
-		mutex_unlock(&adev->vm_manager.lock);
-		return 0;
+		if (needs_flush)
+			goto needs_flush;
+		else
+			goto no_flush_needed;
 
-	} while (i != ring->idx);
+	};
 
 	/* Still no ID to use? Then use the idle one found earlier */
 	id = idle;
@@ -522,23 +518,25 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	if (r)
 		goto error;
 
-	dma_fence_put(id->last_flush);
-	id->last_flush = NULL;
-
+	id->pd_gpu_addr = job->vm_pd_addr;
 	dma_fence_put(id->flushed_updates);
 	id->flushed_updates = dma_fence_get(updates);
-
-	id->pd_gpu_addr = job->vm_pd_addr;
 	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
-	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 	atomic64_set(&id->owner, vm->client_id);
-	vm->ids[ring->idx] = id;
 
-	job->vm_id = id - adev->vm_manager.ids;
-	trace_amdgpu_vm_grab_id(vm, ring->idx, job);
+needs_flush:
+	job->vm_needs_flush = true;
+	dma_fence_put(id->last_flush);
+	id->last_flush = NULL;
+
+no_flush_needed:
+	list_move_tail(&id->list, &id_mgr->ids_lru);
+
+	job->vm_id = id - id_mgr->ids;
+	trace_amdgpu_vm_grab_id(vm, ring, job);
 
 error:
-	mutex_unlock(&adev->vm_manager.lock);
+	mutex_unlock(&id_mgr->lock);
 	return r;
 }
 
@@ -590,7 +588,9 @@ static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_vm_id *id = &adev->vm_manager.ids[job->vm_id];
+	unsigned vmhub = ring->funcs->vmhub;
+	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vm_id *id = &id_mgr->ids[job->vm_id];
 	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
 		id->gds_base != job->gds_base ||
 		id->gds_size != job->gds_size ||
@@ -614,27 +614,27 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 	if (ring->funcs->init_cond_exec)
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
-	if (ring->funcs->emit_pipeline_sync)
+	if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync)
 		amdgpu_ring_emit_pipeline_sync(ring);
 
 	if (ring->funcs->emit_vm_flush && vm_flush_needed) {
 		u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
 		struct dma_fence *fence;
 
-		trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
+		trace_amdgpu_vm_flush(ring, job->vm_id, pd_addr);
 		amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
 
 		r = amdgpu_fence_emit(ring, &fence);
 		if (r)
 			return r;
 
-		mutex_lock(&adev->vm_manager.lock);
+		mutex_lock(&id_mgr->lock);
 		dma_fence_put(id->last_flush);
 		id->last_flush = fence;
-		mutex_unlock(&adev->vm_manager.lock);
+		mutex_unlock(&id_mgr->lock);
 	}
 
-	if (gds_switch_needed) {
+	if (ring->funcs->emit_gds_switch && gds_switch_needed) {
 		id->gds_base = job->gds_base;
 		id->gds_size = job->gds_size;
 		id->gws_base = job->gws_base;
@@ -666,10 +666,13 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
  *
  * Reset saved GDW, GWS and OA to force switch on next flush.
  */
-void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id)
+void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
+			unsigned vmid)
 {
-	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
+	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vm_id *id = &id_mgr->ids[vmid];
 
+	atomic64_set(&id->owner, 0);
 	id->gds_base = 0;
 	id->gds_size = 0;
 	id->gws_base = 0;
@@ -679,6 +682,26 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id)
 }
 
 /**
+ * amdgpu_vm_reset_all_id - reset VMID to zero
+ *
+ * @adev: amdgpu device structure
+ *
+ * Reset VMID to force flush on next use
+ */
+void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev)
+{
+	unsigned i, j;
+
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+		struct amdgpu_vm_id_manager *id_mgr =
+			&adev->vm_manager.id_mgr[i];
+
+		for (j = 1; j < id_mgr->num_ids; ++j)
+			amdgpu_vm_reset_id(adev, i, j);
+	}
+}
+
+/**
  * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
  *
  * @vm: requested vm
@@ -1336,6 +1359,12 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 	flags &= ~AMDGPU_PTE_MTYPE_MASK;
 	flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK);
 
+	if ((mapping->flags & AMDGPU_PTE_PRT) &&
+	    (adev->asic_type >= CHIP_VEGA10)) {
+		flags |= AMDGPU_PTE_PRT;
+		flags &= ~AMDGPU_PTE_VALID;
+	}
+
 	trace_amdgpu_vm_bo_update(mapping);
 
 	pfn = mapping->offset >> PAGE_SHIFT;
@@ -1629,8 +1658,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);
 
-		r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping,
-					       0, 0, &f);
+		r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm,
+						mapping->start, mapping->last,
+						0, 0, &f);
 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
 		if (r) {
 			dma_fence_put(f);
@@ -2117,10 +2147,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	unsigned ring_instance;
 	struct amdgpu_ring *ring;
 	struct amd_sched_rq *rq;
-	int i, r;
+	int r;
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
-		vm->ids[i] = NULL;
 	vm->va = RB_ROOT;
 	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
 	spin_lock_init(&vm->status_lock);
@@ -2241,16 +2269,21 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  */
 void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 {
-	unsigned i;
+	unsigned i, j;
+
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+		struct amdgpu_vm_id_manager *id_mgr =
+			&adev->vm_manager.id_mgr[i];
 
-	INIT_LIST_HEAD(&adev->vm_manager.ids_lru);
+		mutex_init(&id_mgr->lock);
+		INIT_LIST_HEAD(&id_mgr->ids_lru);
 
-	/* skip over VMID 0, since it is the system VM */
-	for (i = 1; i < adev->vm_manager.num_ids; ++i) {
-		amdgpu_vm_reset_id(adev, i);
-		amdgpu_sync_create(&adev->vm_manager.ids[i].active);
-		list_add_tail(&adev->vm_manager.ids[i].list,
-			      &adev->vm_manager.ids_lru);
+		/* skip over VMID 0, since it is the system VM */
+		for (j = 1; j < id_mgr->num_ids; ++j) {
+			amdgpu_vm_reset_id(adev, i, j);
+			amdgpu_sync_create(&id_mgr->ids[i].active);
+			list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
+		}
 	}
 
 	adev->vm_manager.fence_context =
@@ -2273,13 +2306,19 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
  */
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 {
-	unsigned i;
+	unsigned i, j;
 
-	for (i = 0; i < AMDGPU_NUM_VM; ++i) {
-		struct amdgpu_vm_id *id = &adev->vm_manager.ids[i];
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+		struct amdgpu_vm_id_manager *id_mgr =
+			&adev->vm_manager.id_mgr[i];
 
-		amdgpu_sync_free(&adev->vm_manager.ids[i].active);
-		dma_fence_put(id->flushed_updates);
-		dma_fence_put(id->last_flush);
+		mutex_destroy(&id_mgr->lock);
+		for (j = 0; j < AMDGPU_NUM_VM; ++j) {
+			struct amdgpu_vm_id *id = &id_mgr->ids[j];
+
+			amdgpu_sync_free(&id->active);
+			dma_fence_put(id->flushed_updates);
+			dma_fence_put(id->last_flush);
+		}
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d9e57290dc71..e1d951ece433 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -65,7 +65,8 @@ struct amdgpu_bo_list_entry;
 
 #define AMDGPU_PTE_FRAG(x)	((x & 0x1fULL) << 7)
 
-#define AMDGPU_PTE_PRT		(1ULL << 63)
+/* TILED for VEGA10, reserved for older ASICs  */
+#define AMDGPU_PTE_PRT		(1ULL << 51)
 
 /* VEGA10 only */
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
@@ -114,9 +115,6 @@ struct amdgpu_vm {
 	struct dma_fence	*last_dir_update;
 	uint64_t		last_eviction_counter;
 
-	/* for id and flush management per ring */
-	struct amdgpu_vm_id	*ids[AMDGPU_MAX_RINGS];
-
 	/* protecting freed */
 	spinlock_t		freed_lock;
 
@@ -149,12 +147,16 @@ struct amdgpu_vm_id {
 	uint32_t		oa_size;
 };
 
+struct amdgpu_vm_id_manager {
+	struct mutex		lock;
+	unsigned		num_ids;
+	struct list_head	ids_lru;
+	struct amdgpu_vm_id	ids[AMDGPU_NUM_VM];
+};
+
 struct amdgpu_vm_manager {
 	/* Handling of VMIDs */
-	struct mutex				lock;
-	unsigned				num_ids;
-	struct list_head			ids_lru;
-	struct amdgpu_vm_id			ids[AMDGPU_NUM_VM];
+	struct amdgpu_vm_id_manager		id_mgr[AMDGPU_MAX_VMHUBS];
 
 	/* Handling of VM fences */
 	u64					fence_context;
@@ -200,7 +202,9 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync, struct dma_fence *fence,
 		      struct amdgpu_job *job);
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
-void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
+void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
+			unsigned vmid);
+void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm);
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index a4831fe0223b..a2c59a08b2bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -220,9 +220,9 @@ static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man,
 }
 
 const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = {
-	amdgpu_vram_mgr_init,
-	amdgpu_vram_mgr_fini,
-	amdgpu_vram_mgr_new,
-	amdgpu_vram_mgr_del,
-	amdgpu_vram_mgr_debug
+	.init		= amdgpu_vram_mgr_init,
+	.takedown	= amdgpu_vram_mgr_fini,
+	.get_node	= amdgpu_vram_mgr_new,
+	.put_node	= amdgpu_vram_mgr_del,
+	.debug		= amdgpu_vram_mgr_debug
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
index 8c9bc75a9c2d..8a0818b23ea4 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
@@ -165,7 +165,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 	int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
-	ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+	ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
 
 	memset(&args, 0, sizeof(args));
 
@@ -178,7 +178,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
 void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
 {
 	int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
-	ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+	ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
 
 	memset(&args, 0, sizeof(args));
 
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 11ccda83d767..ec93714e4524 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -906,6 +906,12 @@ static bool ci_dpm_vblank_too_short(struct amdgpu_device *adev)
 	u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
 	u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300;
 
+	/* disable mclk switching if the refresh is >120Hz, even if the
+	 * blanking period would allow it
+	 */
+	if (amdgpu_dpm_get_vrefresh(adev) > 120)
+		return true;
+
 	if (vblank_time < switch_limit)
 		return true;
 	else
@@ -1267,30 +1273,33 @@ static int ci_dpm_set_fan_speed_percent(struct amdgpu_device *adev,
 
 static void ci_dpm_set_fan_control_mode(struct amdgpu_device *adev, u32 mode)
 {
-	if (mode) {
-		/* stop auto-manage */
+	switch (mode) {
+	case AMD_FAN_CTRL_NONE:
 		if (adev->pm.dpm.fan.ucode_fan_control)
 			ci_fan_ctrl_stop_smc_fan_control(adev);
-		ci_fan_ctrl_set_static_mode(adev, mode);
-	} else {
-		/* restart auto-manage */
+		ci_dpm_set_fan_speed_percent(adev, 100);
+		break;
+	case AMD_FAN_CTRL_MANUAL:
+		if (adev->pm.dpm.fan.ucode_fan_control)
+			ci_fan_ctrl_stop_smc_fan_control(adev);
+		break;
+	case AMD_FAN_CTRL_AUTO:
 		if (adev->pm.dpm.fan.ucode_fan_control)
 			ci_thermal_start_smc_fan_control(adev);
-		else
-			ci_fan_ctrl_set_default_mode(adev);
+		break;
+	default:
+		break;
 	}
 }
 
 static u32 ci_dpm_get_fan_control_mode(struct amdgpu_device *adev)
 {
 	struct ci_power_info *pi = ci_get_pi(adev);
-	u32 tmp;
 
 	if (pi->fan_is_controlled_by_smc)
-		return 0;
-
-	tmp = RREG32_SMC(ixCG_FDO_CTRL2) & CG_FDO_CTRL2__FDO_PWM_MODE_MASK;
-	return (tmp >> CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT);
+		return AMD_FAN_CTRL_AUTO;
+	else
+		return AMD_FAN_CTRL_MANUAL;
 }
 
 #if 0
@@ -3036,6 +3045,7 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev,
 						      memory_clock,
 						      &memory_level->MinVddcPhases);
 
+	memory_level->EnabledForActivity = 1;
 	memory_level->EnabledForThrottle = 1;
 	memory_level->UpH = 0;
 	memory_level->DownH = 100;
@@ -3468,8 +3478,6 @@ static int ci_populate_all_memory_levels(struct amdgpu_device *adev)
 			return ret;
 	}
 
-	pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1;
-
 	if ((dpm_table->mclk_table.count >= 2) &&
 	    ((adev->pdev->device == 0x67B0) || (adev->pdev->device == 0x67B1))) {
 		pi->smc_state_table.MemoryLevel[1].MinVddc =
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index ba98d35340a3..5dffa27afa45 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1207,8 +1207,11 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
 	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
-		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min(line_time, (u32)65535);
 
 		/* watermark for high clocks */
 		if (adev->pm.dpm_enabled) {
@@ -2230,7 +2233,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (!atomic && fb && fb != crtc->primary->fb) {
 		amdgpu_fb = to_amdgpu_framebuffer(fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 			return r;
 		amdgpu_bo_unpin(abo);
@@ -2589,7 +2592,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
 unpin:
 	if (amdgpu_crtc->cursor_bo) {
 		struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-		ret = amdgpu_bo_reserve(aobj, false);
+		ret = amdgpu_bo_reserve(aobj, true);
 		if (likely(ret == 0)) {
 			amdgpu_bo_unpin(aobj);
 			amdgpu_bo_unreserve(aobj);
@@ -2720,7 +2723,7 @@ static void dce_v10_0_crtc_disable(struct drm_crtc *crtc)
 
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 		else {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index e59bc42df18c..47bbc87f96d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -1176,8 +1176,11 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
 	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
-		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min(line_time, (u32)65535);
 
 		/* watermark for high clocks */
 		if (adev->pm.dpm_enabled) {
@@ -2214,7 +2217,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (!atomic && fb && fb != crtc->primary->fb) {
 		amdgpu_fb = to_amdgpu_framebuffer(fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 			return r;
 		amdgpu_bo_unpin(abo);
@@ -2609,7 +2612,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
 unpin:
 	if (amdgpu_crtc->cursor_bo) {
 		struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-		ret = amdgpu_bo_reserve(aobj, false);
+		ret = amdgpu_bo_reserve(aobj, true);
 		if (likely(ret == 0)) {
 			amdgpu_bo_unpin(aobj);
 			amdgpu_bo_unreserve(aobj);
@@ -2740,7 +2743,7 @@ static void dce_v11_0_crtc_disable(struct drm_crtc *crtc)
 
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 		else {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 307269bda4fa..d8c9a959493e 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -979,12 +979,15 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
 	u32 priority_a_mark = 0, priority_b_mark = 0;
 	u32 priority_a_cnt = PRIORITY_OFF;
 	u32 priority_b_cnt = PRIORITY_OFF;
-	u32 tmp, arb_control3;
+	u32 tmp, arb_control3, lb_vblank_lead_lines = 0;
 	fixed20_12 a, b, c;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
-		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min(line_time, (u32)65535);
 		priority_a_cnt = 0;
 		priority_b_cnt = 0;
 
@@ -1091,6 +1094,8 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
 		c.full = dfixed_div(c, a);
 		priority_b_mark = dfixed_trunc(c);
 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
+
+		lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
 	}
 
 	/* select wm A */
@@ -1120,6 +1125,9 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
 	/* save values for DPM */
 	amdgpu_crtc->line_time = line_time;
 	amdgpu_crtc->wm_high = latency_watermark_a;
+
+	/* Save number of lines the linebuffer leads before the scanout */
+	amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
 }
 
 /* watermark setup */
@@ -1640,7 +1648,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (!atomic && fb && fb != crtc->primary->fb) {
 		amdgpu_fb = to_amdgpu_framebuffer(fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 			return r;
 		amdgpu_bo_unpin(abo);
@@ -1957,7 +1965,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc,
 unpin:
 	if (amdgpu_crtc->cursor_bo) {
 		struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-		ret = amdgpu_bo_reserve(aobj, false);
+		ret = amdgpu_bo_reserve(aobj, true);
 		if (likely(ret == 0)) {
 			amdgpu_bo_unpin(aobj);
 			amdgpu_bo_unreserve(aobj);
@@ -2083,7 +2091,7 @@ static void dce_v6_0_crtc_disable(struct drm_crtc *crtc)
 
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 		else {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 6df7a28e8aac..db30c6ba563a 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -1091,8 +1091,11 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
 	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
-		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min(line_time, (u32)65535);
 
 		/* watermark for high clocks */
 		if (adev->pm.dpm_enabled) {
@@ -2089,7 +2092,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (!atomic && fb && fb != crtc->primary->fb) {
 		amdgpu_fb = to_amdgpu_framebuffer(fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 			return r;
 		amdgpu_bo_unpin(abo);
@@ -2440,7 +2443,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,
 unpin:
 	if (amdgpu_crtc->cursor_bo) {
 		struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-		ret = amdgpu_bo_reserve(aobj, false);
+		ret = amdgpu_bo_reserve(aobj, true);
 		if (likely(ret == 0)) {
 			amdgpu_bo_unpin(aobj);
 			amdgpu_bo_unreserve(aobj);
@@ -2571,7 +2574,7 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc)
 
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 		else {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index 81a24b6b4846..f1b479b6ac98 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -248,7 +248,7 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc)
 
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 		else {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 4c4874fdf59f..a125f9d44577 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -1579,7 +1579,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev)
 
 static void gfx_v6_0_config_init(struct amdgpu_device *adev)
 {
-	adev->gfx.config.double_offchip_lds_buf = 1;
+	adev->gfx.config.double_offchip_lds_buf = 0;
 }
 
 static void gfx_v6_0_gpu_init(struct amdgpu_device *adev)
@@ -2437,7 +2437,7 @@ static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
 	int r;
 
 	if (adev->gfx.rlc.save_restore_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj);
@@ -2448,7 +2448,7 @@ static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
 	}
 
 	if (adev->gfx.rlc.clear_state_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
@@ -2459,7 +2459,7 @@ static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
 	}
 
 	if (adev->gfx.rlc.cp_table_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
@@ -3292,7 +3292,7 @@ static int gfx_v6_0_sw_init(void *handle)
 		ring->me = 1;
 		ring->pipe = i;
 		ring->queue = i;
-		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
+		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
 		r = amdgpu_ring_init(adev, ring, 1024,
 				     &adev->gfx.eop_irq, irq_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 8a8bc2fe6f2e..ee2f2139e2eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1935,7 +1935,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
 				   INDEX_STRIDE, 3);
 
 	mutex_lock(&adev->srbm_mutex);
-	for (i = 0; i < adev->vm_manager.num_ids; i++) {
+	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
 		if (i == 0)
 			sh_mem_base = 0;
 		else
@@ -2792,7 +2792,7 @@ static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
 
 		if (ring->mqd_obj) {
-			r = amdgpu_bo_reserve(ring->mqd_obj, false);
+			r = amdgpu_bo_reserve(ring->mqd_obj, true);
 			if (unlikely(r != 0))
 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
 
@@ -2810,7 +2810,7 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
 	int r;
 
 	if (adev->gfx.mec.hpd_eop_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
@@ -3359,7 +3359,7 @@ static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
 
 	/* save restore block */
 	if (adev->gfx.rlc.save_restore_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj);
@@ -3371,7 +3371,7 @@ static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
 
 	/* clear state block */
 	if (adev->gfx.rlc.clear_state_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
@@ -3383,7 +3383,7 @@ static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
 
 	/* clear state block */
 	if (adev->gfx.rlc.cp_table_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index dad8a4cd1b37..758d636a6f52 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1239,7 +1239,7 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
 
 	/* clear state block */
 	if (adev->gfx.rlc.clear_state_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
@@ -1250,7 +1250,7 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
 
 	/* jump table block */
 	if (adev->gfx.rlc.cp_table_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
@@ -1363,7 +1363,7 @@ static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
 	int r;
 
 	if (adev->gfx.mec.hpd_eop_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
@@ -1490,7 +1490,7 @@ static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
 
 	memset(hpd, 0, MEC_HPD_SIZE);
 
-	r = amdgpu_bo_reserve(kiq->eop_obj, false);
+	r = amdgpu_bo_reserve(kiq->eop_obj, true);
 	if (unlikely(r != 0))
 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
 	amdgpu_bo_kunmap(kiq->eop_obj);
@@ -1932,6 +1932,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 		case 0xca:
 		case 0xce:
 		case 0x88:
+		case 0xe6:
 			/* B6 */
 			adev->gfx.config.max_cu_per_sh = 6;
 			break;
@@ -1964,17 +1965,28 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 		adev->gfx.config.max_backends_per_se = 1;
 
 		switch (adev->pdev->revision) {
+		case 0x80:
+		case 0x81:
 		case 0xc0:
 		case 0xc1:
 		case 0xc2:
 		case 0xc4:
 		case 0xc8:
 		case 0xc9:
+		case 0xd6:
+		case 0xda:
+		case 0xe9:
+		case 0xea:
 			adev->gfx.config.max_cu_per_sh = 3;
 			break;
+		case 0x83:
 		case 0xd0:
 		case 0xd1:
 		case 0xd2:
+		case 0xd4:
+		case 0xdb:
+		case 0xe1:
+		case 0xe2:
 		default:
 			adev->gfx.config.max_cu_per_sh = 2;
 			break;
@@ -3890,7 +3902,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
 				   INDEX_STRIDE, 3);
 	mutex_lock(&adev->srbm_mutex);
-	for (i = 0; i < adev->vm_manager.num_ids; i++) {
+	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
 		vi_srbm_select(adev, 0, 0, 0, i);
 		/* CP and shaders */
 		if (i == 0) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index a447b70841c9..0c16b7563b73 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -39,7 +39,6 @@
 
 #define GFX9_NUM_GFX_RINGS     1
 #define GFX9_NUM_COMPUTE_RINGS 8
-#define GFX9_NUM_SE		4
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000
 
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
@@ -453,7 +452,7 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
 	int r;
 
 	if (adev->gfx.mec.hpd_eop_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
@@ -463,7 +462,7 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
 		adev->gfx.mec.hpd_eop_obj = NULL;
 	}
 	if (adev->gfx.mec.mec_fw_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj);
@@ -599,7 +598,7 @@ static int gfx_v9_0_kiq_init(struct amdgpu_device *adev)
 
 	memset(hpd, 0, MEC_HPD_SIZE);
 
-	r = amdgpu_bo_reserve(kiq->eop_obj, false);
+	r = amdgpu_bo_reserve(kiq->eop_obj, true);
 	if (unlikely(r != 0))
 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
 	amdgpu_bo_kunmap(kiq->eop_obj);
@@ -631,7 +630,6 @@ static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev,
 		ring->pipe = 1;
 	}
 
-	irq->data = ring;
 	ring->queue = 0;
 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
 	sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
@@ -647,7 +645,6 @@ static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring,
 {
 	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
 	amdgpu_ring_fini(ring);
-	irq->data = NULL;
 }
 
 /* create MQD for each compute queue */
@@ -705,19 +702,19 @@ static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
 
 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
 {
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX),
+	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
 		(SQ_IND_INDEX__FORCE_READ_MASK));
-	return RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA));
+	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
 }
 
 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
 			   uint32_t wave, uint32_t thread,
 			   uint32_t regno, uint32_t num, uint32_t *out)
 {
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX),
+	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
@@ -725,7 +722,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
 		(SQ_IND_INDEX__FORCE_READ_MASK) |
 		(SQ_IND_INDEX__AUTO_INCR_MASK));
 	while (num--)
-		*(out++) = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA));
+		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
 }
 
 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
@@ -774,7 +771,6 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 		adev->gfx.config.max_shader_engines = 4;
-		adev->gfx.config.max_tile_pipes = 8; //??
 		adev->gfx.config.max_cu_per_sh = 16;
 		adev->gfx.config.max_sh_per_se = 1;
 		adev->gfx.config.max_backends_per_se = 4;
@@ -787,6 +783,8 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+		adev->gfx.config.gs_vgt_table_depth = 32;
+		adev->gfx.config.gs_prim_buffer_depth = 1792;
 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
 		break;
 	default:
@@ -801,6 +799,10 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 					adev->gfx.config.gb_addr_config,
 					GB_ADDR_CONFIG,
 					NUM_PIPES);
+
+	adev->gfx.config.max_tile_pipes =
+		adev->gfx.config.gb_addr_config_fields.num_pipes;
+
 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
 			REG_GET_FIELD(
 					adev->gfx.config.gb_addr_config,
@@ -841,7 +843,7 @@ static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
 	}
 	size_se = size_se ? size_se : default_size_se;
 
-	ngg_buf->size = size_se * GFX9_NUM_SE;
+	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
 				    &ngg_buf->bo,
@@ -888,7 +890,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
 	adev->gfx.ngg.gds_reserve_addr += adev->gds.mem.gfx_partition_size;
 
 	/* Primitive Buffer */
-	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PRIM],
+	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
 				    amdgpu_prim_buf_per_se,
 				    64 * 1024);
 	if (r) {
@@ -897,7 +899,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
 	}
 
 	/* Position Buffer */
-	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[POS],
+	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
 				    amdgpu_pos_buf_per_se,
 				    256 * 1024);
 	if (r) {
@@ -906,7 +908,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
 	}
 
 	/* Control Sideband */
-	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[CNTL],
+	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
 				    amdgpu_cntl_sb_buf_per_se,
 				    256);
 	if (r) {
@@ -918,7 +920,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
 	if (amdgpu_param_buf_per_se <= 0)
 		goto out;
 
-	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PARAM],
+	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
 				    amdgpu_param_buf_per_se,
 				    512 * 1024);
 	if (r) {
@@ -947,47 +949,47 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
 
 	/* Program buffer size */
 	data = 0;
-	size = adev->gfx.ngg.buf[PRIM].size / 256;
+	size = adev->gfx.ngg.buf[NGG_PRIM].size / 256;
 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, size);
 
-	size = adev->gfx.ngg.buf[POS].size / 256;
+	size = adev->gfx.ngg.buf[NGG_POS].size / 256;
 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, size);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_1), data);
+	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
 
 	data = 0;
-	size = adev->gfx.ngg.buf[CNTL].size / 256;
+	size = adev->gfx.ngg.buf[NGG_CNTL].size / 256;
 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, size);
 
-	size = adev->gfx.ngg.buf[PARAM].size / 1024;
+	size = adev->gfx.ngg.buf[NGG_PARAM].size / 1024;
 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, size);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_2), data);
+	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
 
 	/* Program buffer base address */
-	base = lower_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr);
+	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE), data);
+	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
 
-	base = upper_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr);
+	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE_HI), data);
+	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
 
-	base = lower_32_bits(adev->gfx.ngg.buf[POS].gpu_addr);
+	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE), data);
+	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
 
-	base = upper_32_bits(adev->gfx.ngg.buf[POS].gpu_addr);
+	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE_HI), data);
+	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
 
-	base = lower_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr);
+	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE), data);
+	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
 
-	base = upper_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr);
+	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI), data);
+	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
 
 	/* Clear GDS reserved memory */
 	r = amdgpu_ring_alloc(ring, 17);
@@ -1096,7 +1098,7 @@ static int gfx_v9_0_sw_init(void *handle)
 		ring->pipe = i / 8;
 		ring->queue = i % 8;
 		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
-		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
+		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
 		/* type-2 packets are deprecated on MEC, use type-3 instead */
 		r = amdgpu_ring_init(adev, ring, 1024,
@@ -1203,7 +1205,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh
 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
 	}
-	WREG32( SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
 }
 
 static u32 gfx_v9_0_create_bitmask(u32 bit_width)
@@ -1215,8 +1217,8 @@ static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
 
-	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_RB_BACKEND_DISABLE));
-	data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_RB_BACKEND_DISABLE));
+	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
+	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
 
 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
@@ -1276,8 +1278,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
 		soc15_grbm_select(adev, 0, 0, 0, i);
 		/* CP and shaders */
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+		WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
+		WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
 	}
 	soc15_grbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
@@ -1304,8 +1306,8 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
 		tmp = 0;
 		tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
 				    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), tmp);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), 0);
+		WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
+		WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);
 	}
 	soc15_grbm_select(adev, 0, 0, 0, 0);
 
@@ -1320,7 +1322,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
 	 */
 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FIFO_SIZE),
+	WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
@@ -1343,7 +1345,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
 			for (k = 0; k < adev->usec_timeout; k++) {
-				if (RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY)) == 0)
+				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
 					break;
 				udelay(1);
 			}
@@ -1357,7 +1359,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
 	for (k = 0; k < adev->usec_timeout; k++) {
-		if ((RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY)) & mask) == 0)
+		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
 			break;
 		udelay(1);
 	}
@@ -1366,7 +1368,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
 					       bool enable)
 {
-	u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
+	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
 
 	if (enable)
 		return;
@@ -1376,15 +1378,15 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), tmp);
+	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
 }
 
 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
 {
-	u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
+	u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
 
 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL), tmp);
+	WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp);
 
 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
 
@@ -1415,17 +1417,17 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
 
 #ifdef AMDGPU_RLC_DEBUG_RETRY
 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
-	rlc_ucode_ver = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_6));
+	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
 	if(rlc_ucode_ver == 0x108) {
 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
 		 * default is 0x9C4 to create a 100us interval */
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_TIMER_INT_3), 0x9C4);
+		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
 		 * to disable the page fault retry interrupts, default is 
 		 * 0x100 (256) */
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_12), 0x100);
+		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
 	}
 #endif
 }
@@ -1446,11 +1448,11 @@ static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR),
+	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
 			RLCG_UCODE_LOADING_START_ADDRESS);
 	for (i = 0; i < fw_size; i++)
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA), le32_to_cpup(fw_data++));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR), adev->gfx.rlc_fw_version);
+		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
+	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
 
 	return 0;
 }
@@ -1465,10 +1467,10 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
 	gfx_v9_0_rlc_stop(adev);
 
 	/* disable CG */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), 0);
+	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
 
 	/* disable PG */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), 0);
+	WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0);
 
 	gfx_v9_0_rlc_reset(adev);
 
@@ -1487,7 +1489,7 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 {
 	int i;
-	u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL));
+	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
 
 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
@@ -1496,7 +1498,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
 			adev->gfx.gfx_ring[i].ready = false;
 	}
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
 	udelay(50);
 }
 
@@ -1529,30 +1531,30 @@ static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
 		(adev->gfx.pfp_fw->data +
 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), 0);
+	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
 	for (i = 0; i < fw_size; i++)
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA), le32_to_cpup(fw_data++));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), adev->gfx.pfp_fw_version);
+		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
+	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
 
 	/* CE */
 	fw_data = (const __le32 *)
 		(adev->gfx.ce_fw->data +
 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), 0);
+	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
 	for (i = 0; i < fw_size; i++)
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA), le32_to_cpup(fw_data++));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), adev->gfx.ce_fw_version);
+		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
+	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
 
 	/* ME */
 	fw_data = (const __le32 *)
 		(adev->gfx.me_fw->data +
 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), 0);
+	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
 	for (i = 0; i < fw_size; i++)
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_DATA), le32_to_cpup(fw_data++));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), adev->gfx.me_fw_version);
+		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
+	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
 
 	return 0;
 }
@@ -1594,8 +1596,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
 	int r, i;
 
 	/* init the CP */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MAX_CONTEXT), adev->gfx.config.max_hw_contexts - 1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_DEVICE_ID), 1);
+	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
+	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
 
 	gfx_v9_0_cp_gfx_enable(adev, true);
 
@@ -1650,10 +1652,10 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
 
 	/* Set the write pointer delay */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_DELAY), 0);
+	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
 
 	/* set the RB to use vmid 0 */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_VMID), 0);
+	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
 
 	/* Set ring buffer size */
 	ring = &adev->gfx.gfx_ring[0];
@@ -1663,30 +1665,30 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
 #ifdef __BIG_ENDIAN
 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
 #endif
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
 
 	/* Initialize the ring buffer's write pointers */
 	ring->wptr = 0;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr));
+	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
 
 	/* set the wb address wether it's enabled or not */
 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR), lower_32_bits(rptr_addr));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR_HI), upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
 
 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO), lower_32_bits(wptr_gpu_addr));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI), upper_32_bits(wptr_gpu_addr));
+	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
+	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
 
 	mdelay(1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
 
 	rb_addr = ring->gpu_addr >> 8;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE), rb_addr);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE_HI), upper_32_bits(rb_addr));
+	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
+	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
 
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
 	if (ring->use_doorbell) {
 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
 				    DOORBELL_OFFSET, ring->doorbell_index);
@@ -1695,13 +1697,13 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
 	} else {
 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
 	}
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
 
 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER), tmp);
+	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER),
+	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
 
 
@@ -1717,9 +1719,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
 	int i;
 
 	if (enable) {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL), 0);
+		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
 	} else {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL),
+		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
 			adev->gfx.compute_ring[i].ready = false;
@@ -1756,21 +1758,21 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 	tmp = 0;
 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_CNTL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_LO),
+	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_HI),
+	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
  
 	/* MEC1 */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
 			 mec_hdr->jt_offset);
 	for (i = 0; i < mec_hdr->jt_size; i++)
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA),
+		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
 			adev->gfx.mec_fw_version);
 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
 
@@ -1785,7 +1787,7 @@ static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev)
 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
 
 		if (ring->mqd_obj) {
-			r = amdgpu_bo_reserve(ring->mqd_obj, false);
+			r = amdgpu_bo_reserve(ring->mqd_obj, true);
 			if (unlikely(r != 0))
 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
 
@@ -1823,12 +1825,12 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
 	struct amdgpu_device *adev = ring->adev;
 
 	/* tell RLC which is KIQ queue */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
+	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
 	tmp &= 0xffffff00;
 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), tmp);
+	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
 	tmp |= 0x80;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), tmp);
+	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
 }
 
 static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring)
@@ -1898,14 +1900,14 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
 
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
 			(order_base_2(MEC_HPD_SIZE / 4) - 1));
 
 	mqd->cp_hqd_eop_control = tmp;
 
 	/* enable doorbell? */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
 
 	if (ring->use_doorbell) {
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
@@ -1935,7 +1937,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
 
 	/* set MQD vmid to 0 */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
 	mqd->cp_mqd_control = tmp;
 
@@ -1945,7 +1947,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
 
 	/* set up the HQD, this is similar to CP_RB0_CNTL */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
 			    (order_base_2(ring->ring_size / 4) - 1));
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
@@ -1973,7 +1975,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	tmp = 0;
 	/* enable the doorbell if requested */
 	if (ring->use_doorbell) {
-		tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
+		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 				DOORBELL_OFFSET, ring->doorbell_index);
 
@@ -1989,15 +1991,20 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 
 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
 	ring->wptr = 0;
-	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
 
 	/* set the vmid for the queue */
 	mqd->cp_hqd_vmid = 0;
 
-	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
 	mqd->cp_hqd_persistent_state = tmp;
 
+	/* set MIN_IB_AVAIL_SIZE */
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+	mqd->cp_hqd_ib_control = tmp;
+
 	/* activate the queue */
 	mqd->cp_hqd_active = 1;
 
@@ -2013,94 +2020,94 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
 	/* disable wptr polling */
 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
 	       mqd->cp_hqd_eop_base_addr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
 	       mqd->cp_hqd_eop_base_addr_hi);
 
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL),
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
 	       mqd->cp_hqd_eop_control);
 
 	/* enable doorbell? */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
 	       mqd->cp_hqd_pq_doorbell_control);
 
 	/* disable the queue if it's active */
-	if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1);
+	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
+		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
 		for (j = 0; j < adev->usec_timeout; j++) {
-			if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1))
+			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
 				break;
 			udelay(1);
 		}
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
+		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
 		       mqd->cp_hqd_dequeue_request);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR),
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
 		       mqd->cp_hqd_pq_rptr);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
 		       mqd->cp_hqd_pq_wptr_lo);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
 		       mqd->cp_hqd_pq_wptr_hi);
 	}
 
 	/* set the pointer to the MQD */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
 	       mqd->cp_mqd_base_addr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI),
+	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
 	       mqd->cp_mqd_base_addr_hi);
 
 	/* set MQD vmid to 0 */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL),
+	WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
 	       mqd->cp_mqd_control);
 
 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
 	       mqd->cp_hqd_pq_base_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
 	       mqd->cp_hqd_pq_base_hi);
 
 	/* set up the HQD, this is similar to CP_RB0_CNTL */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
 	       mqd->cp_hqd_pq_control);
 
 	/* set the wb address whether it's enabled or not */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
 				mqd->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
 				mqd->cp_hqd_pq_rptr_report_addr_hi);
 
 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
 
 	/* enable the doorbell if requested */
 	if (ring->use_doorbell) {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER),
+		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
 					(AMDGPU_DOORBELL64_KIQ *2) << 2);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER),
+		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
 					(AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2);
 	}
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
 	       mqd->cp_hqd_pq_doorbell_control);
 
 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
 	       mqd->cp_hqd_pq_wptr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
 	       mqd->cp_hqd_pq_wptr_hi);
 
 	/* set the vmid for the queue */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid);
+	WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
 	       mqd->cp_hqd_persistent_state);
 
 	/* activate the queue */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE),
+	WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
 	       mqd->cp_hqd_active);
 
 	if (ring->use_doorbell)
@@ -2323,7 +2330,7 @@ static bool gfx_v9_0_is_idle(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (REG_GET_FIELD(RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)), 
+	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
 				GRBM_STATUS, GUI_ACTIVE))
 		return false;
 	else
@@ -2338,7 +2345,7 @@ static int gfx_v9_0_wait_for_idle(void *handle)
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 		/* read MC_STATUS */
-		tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)) & 
+		tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) &
 			GRBM_STATUS__GUI_ACTIVE_MASK;
 
 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
@@ -2355,7 +2362,7 @@ static int gfx_v9_0_soft_reset(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	/* GRBM_STATUS */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS));
+	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
@@ -2374,7 +2381,7 @@ static int gfx_v9_0_soft_reset(void *handle)
 	}
 
 	/* GRBM_STATUS2 */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2));
+	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
@@ -2391,17 +2398,17 @@ static int gfx_v9_0_soft_reset(void *handle)
 		gfx_v9_0_cp_compute_enable(adev, false);
 
 		if (grbm_soft_reset) {
-			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
+			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
 			tmp |= grbm_soft_reset;
 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp);
-			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
+			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
 
 			udelay(50);
 
 			tmp &= ~grbm_soft_reset;
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp);
-			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
+			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
 		}
 
 		/* Wait a little for things to settle down */
@@ -2415,9 +2422,9 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
 	uint64_t clock;
 
 	mutex_lock(&adev->gfx.gpu_clock_mutex);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT), 1);
-	clock = (uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB)) |
-		((uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB)) << 32ULL);
+	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
+		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
 	return clock;
 }
@@ -2497,7 +2504,7 @@ static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
 		return;
 
 	/* if RLC is not enabled, do nothing */
-	rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
+	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
 		return;
 
@@ -2506,7 +2513,7 @@ static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
 	     AMD_CG_SUPPORT_GFX_3D_CGCG)) {
 		data = RLC_SAFE_MODE__CMD_MASK;
 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data);
+		WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
 
 		/* wait for RLC_SAFE_MODE */
 		for (i = 0; i < adev->usec_timeout; i++) {
@@ -2526,7 +2533,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
 		return;
 
 	/* if RLC is not enabled, do nothing */
-	rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
+	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
 		return;
 
@@ -2537,7 +2544,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
 		 * mode.
 		 */
 		data = RLC_SAFE_MODE__CMD_MASK;
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data);
+		WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
 		adev->gfx.rlc.in_safe_mode = false;
 	}
 }
@@ -2550,7 +2557,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
 	/* It is disabled by HW by default */
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
-		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
 			  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
@@ -2560,48 +2567,48 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
 
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
 
 		/* MGLS is a global flag to control all MGLS in GFX */
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
 			/* 2 - RLC memory Light sleep */
 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
-				def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
+				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
 				if (def != data)
-					WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data);
+					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
 			}
 			/* 3 - CP memory Light sleep */
 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
-				def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
+				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
 				if (def != data)
-					WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data);
+					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
 			}
 		}
 	} else {
 		/* 1 - MGCG_OVERRIDE */
-		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
 			 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
 
 		/* 2 - disable MGLS in RLC */
-		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
+		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data);
+			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
 		}
 
 		/* 3 - disable MGLS in CP */
-		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
+		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data);
+			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
 		}
 	}
 }
@@ -2616,37 +2623,37 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
 	/* Enable 3D CGCG/CGLS */
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
 		/* write cmd to clear cgcg/cgls ov */
-		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 		/* unset CGCG override */
 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
 		/* update CGCG and CGLS override bits */
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
 		/* enable 3Dcgcg FSM(0x0020003f) */
-		def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
+		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
 		data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
 
 		/* set IDLE_POLL_COUNT(0x00900100) */
-		def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
+		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
+			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
 	} else {
 		/* Disable CGCG/CGLS */
-		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
 		/* disable cgcg, cgls should be disabled */
 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
 		/* disable cgcg and cgls in FSM */
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
 	}
 
 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
@@ -2660,7 +2667,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
-		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 		/* unset CGCG override */
 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
@@ -2669,31 +2676,31 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
 		/* update CGCG and CGLS override bits */
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
 
 		/* enable cgcg FSM(0x0020003F) */
-		def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
+		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
 		data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
 			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
 
 		/* set IDLE_POLL_COUNT(0x00900100) */
-		def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
+		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
+			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
 	} else {
-		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
 		/* reset CGCG/CGLS bits */
 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
 		/* disable cgcg and cgls in FSM */
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
 	}
 
 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
@@ -2740,6 +2747,9 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 		gfx_v9_0_update_gfx_clock_gating(adev,
@@ -2760,12 +2770,12 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
 		*flags = 0;
 
 	/* AMD_CG_SUPPORT_GFX_MGCG */
-	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
 
 	/* AMD_CG_SUPPORT_GFX_CGCG */
-	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
+	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
 
@@ -2774,17 +2784,17 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
 
 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
-	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
+	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
 
 	/* AMD_CG_SUPPORT_GFX_CP_LS */
-	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
+	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
 
 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
-	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
+	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
 
@@ -2807,8 +2817,8 @@ static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
 	if (ring->use_doorbell) {
 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
 	} else {
-		wptr = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR));
-		wptr += (u64)RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI)) << 32;
+		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
+		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
 	}
 
 	return wptr;
@@ -2823,8 +2833,8 @@ static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
 		WDOORBELL64(ring->doorbell_index, ring->wptr);
 	} else {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr));
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr));
+		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
 	}
 }
 
@@ -2956,35 +2966,29 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	unsigned eng = ring->idx;
-	unsigned i;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-					   hub->ctx0_ptb_addr_lo32
-					   + (2 * vm_id),
-					   lower_32_bits(pd_addr));
+	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+				   hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
+				   lower_32_bits(pd_addr));
 
-		gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-					   hub->ctx0_ptb_addr_hi32
-					   + (2 * vm_id),
-					   upper_32_bits(pd_addr));
+	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+				   hub->ctx0_ptb_addr_hi32 + (2 * vm_id),
+				   upper_32_bits(pd_addr));
 
-		gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-					   hub->vm_inv_eng0_req + eng, req);
+	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+				   hub->vm_inv_eng0_req + eng, req);
 
-		/* wait for the invalidate to complete */
-		gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
-				      eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
-	}
+	/* wait for the invalidate to complete */
+	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
+			      eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
 
 	/* compute doesn't have PFP */
 	if (usepfp) {
@@ -3373,9 +3377,7 @@ static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
 					    enum amdgpu_interrupt_state state)
 {
 	uint32_t tmp, target;
-	struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data;
-
-	BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
+	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
 
 	if (ring->me == 1)
 		target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
@@ -3386,20 +3388,20 @@ static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
 	switch (type) {
 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
 		if (state == AMDGPU_IRQ_STATE_DISABLE) {
-			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL));
+			tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
 						 GENERIC2_INT_ENABLE, 0);
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), tmp);
+			WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
 
 			tmp = RREG32(target);
 			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
 						 GENERIC2_INT_ENABLE, 0);
 			WREG32(target, tmp);
 		} else {
-			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL));
+			tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
 						 GENERIC2_INT_ENABLE, 1);
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), tmp);
+			WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
 
 			tmp = RREG32(target);
 			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
@@ -3419,9 +3421,7 @@ static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev,
 			    struct amdgpu_iv_entry *entry)
 {
 	u8 me_id, pipe_id, queue_id;
-	struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data;
-
-	BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
+	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
 
 	me_id = (entry->ring_id & 0x0c) >> 2;
 	pipe_id = (entry->ring_id & 0x03) >> 0;
@@ -3456,13 +3456,14 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.align_mask = 0xff,
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
+	.vmhub = AMDGPU_GFXHUB,
 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
 		5 +  /* COND_EXEC */
 		7 +  /* PIPELINE_SYNC */
-		46 + /* VM_FLUSH */
+		24 + /* VM_FLUSH */
 		8 +  /* FENCE for VM_FLUSH */
 		20 + /* GDS switch */
 		4 + /* double SWITCH_BUFFER,
@@ -3500,6 +3501,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.align_mask = 0xff,
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
+	.vmhub = AMDGPU_GFXHUB,
 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
@@ -3508,7 +3510,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
 		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-		64 + /* gfx_v9_0_ring_emit_vm_flush */
+		24 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
@@ -3529,6 +3531,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 	.align_mask = 0xff,
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
+	.vmhub = AMDGPU_GFXHUB,
 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
@@ -3537,7 +3540,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
 		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-		64 + /* gfx_v9_0_ring_emit_vm_flush */
+		24 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
@@ -3612,7 +3615,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
 {
 	/* init asci gds info */
-	adev->gds.mem.total_size = RREG32(SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE));
+	adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
 	adev->gds.gws.total_size = 64;
 	adev->gds.oa.total_size = 16;
 
@@ -3641,8 +3644,8 @@ static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
 
-	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG));
-	data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG));
+	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
+	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
 
 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
@@ -3763,25 +3766,25 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
 	eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE);
 	eop_gpu_addr >>= 8;
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR), lower_32_bits(eop_gpu_addr));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), upper_32_bits(eop_gpu_addr));
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, lower_32_bits(eop_gpu_addr));
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
 	mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr);
 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr);
 
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, tmp);
 
 	/* enable doorbell? */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
 	if (use_doorbell)
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
 	else
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
 	mqd->cp_hqd_pq_doorbell_control = tmp;
 
 	/* disable the queue if it's active */
@@ -3790,40 +3793,40 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
 	mqd->cp_hqd_pq_rptr = 0;
 	mqd->cp_hqd_pq_wptr_lo = 0;
 	mqd->cp_hqd_pq_wptr_hi = 0;
-	if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1);
+	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
+		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
 		for (j = 0; j < adev->usec_timeout; j++) {
-			if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1))
+			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
 				break;
 			udelay(1);
 		}
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), mqd->cp_hqd_dequeue_request);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR), mqd->cp_hqd_pq_rptr);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi);
+		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo);
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi);
 	}
 
 	/* set the pointer to the MQD */
 	mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
 	mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR), mqd->cp_mqd_base_addr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI), mqd->cp_mqd_base_addr_hi);
+	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
+	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
 
 	/* set MQD vmid to 0 */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, tmp);
 	mqd->cp_mqd_control = tmp;
 
 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
 	hqd_gpu_addr = ring->gpu_addr >> 8;
 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE), mqd->cp_hqd_pq_base_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI), mqd->cp_hqd_pq_base_hi);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
 
 	/* set up the HQD, this is similar to CP_RB0_CNTL */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
 		(order_base_2(ring->ring_size / 4) - 1));
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
@@ -3835,7 +3838,7 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, tmp);
 	mqd->cp_hqd_pq_control = tmp;
 
 	/* set the wb address wether it's enabled or not */
@@ -3843,27 +3846,27 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
 	mqd->cp_hqd_pq_rptr_report_addr_hi =
 	upper_32_bits(wb_gpu_addr) & 0xffff;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
 		mqd->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
 		mqd->cp_hqd_pq_rptr_report_addr_hi);
 
 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
 		mqd->cp_hqd_pq_wptr_poll_addr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
 		mqd->cp_hqd_pq_wptr_poll_addr_hi);
 
 	/* enable the doorbell if requested */
 	if (use_doorbell) {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER),
+		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
 			(AMDGPU_DOORBELL64_KIQ * 2) << 2);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER),
+		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
 			(AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2);
-		tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
+		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 			DOORBELL_OFFSET, ring->doorbell_index);
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
@@ -3874,25 +3877,25 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
 	} else {
 		mqd->cp_hqd_pq_doorbell_control = 0;
 	}
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
 		mqd->cp_hqd_pq_doorbell_control);
 
 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi);
 
 	/* set the vmid for the queue */
 	mqd->cp_hqd_vmid = 0;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid);
+	WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
 
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE));
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE), tmp);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, tmp);
 	mqd->cp_hqd_persistent_state = tmp;
 
 	/* activate the queue */
 	mqd->cp_hqd_active = 1;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), mqd->cp_hqd_active);
+	WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
 	soc15_grbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 631aef38126d..d860939152df 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -346,7 +346,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
 	 * size equal to the 1024 or vram, whichever is larger.
 	 */
 	if (amdgpu_gart_size == -1)
-		adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size);
+		adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
+					adev->mc.mc_vram_size);
 	else
 		adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;
 
@@ -621,7 +622,7 @@ static int gmc_v6_0_vm_init(struct amdgpu_device *adev)
 	 * amdgpu graphics/compute will use VMIDs 1-7
 	 * amdkfd will use VMIDs 8-15
 	 */
-	adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
 	adev->vm_manager.num_level = 1;
 	amdgpu_vm_manager_init(adev);
 
@@ -949,10 +950,6 @@ static int gmc_v6_0_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (adev->vm_manager.enabled) {
-		gmc_v6_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
 	gmc_v6_0_hw_fini(adev);
 
 	return 0;
@@ -967,16 +964,9 @@ static int gmc_v6_0_resume(void *handle)
 	if (r)
 		return r;
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v6_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
-	}
+	amdgpu_vm_reset_all_ids(adev);
 
-	return r;
+	return 0;
 }
 
 static bool gmc_v6_0_is_idle(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 92abe12d92bb..2750e5c23813 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -395,7 +395,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
 	 * size equal to the 1024 or vram, whichever is larger.
 	 */
 	if (amdgpu_gart_size == -1)
-		adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size);
+		adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
+					adev->mc.mc_vram_size);
 	else
 		adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;
 
@@ -746,7 +747,7 @@ static int gmc_v7_0_vm_init(struct amdgpu_device *adev)
 	 * amdgpu graphics/compute will use VMIDs 1-7
 	 * amdkfd will use VMIDs 8-15
 	 */
-	adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
 	adev->vm_manager.num_level = 1;
 	amdgpu_vm_manager_init(adev);
 
@@ -1116,10 +1117,6 @@ static int gmc_v7_0_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (adev->vm_manager.enabled) {
-		gmc_v7_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
 	gmc_v7_0_hw_fini(adev);
 
 	return 0;
@@ -1134,16 +1131,9 @@ static int gmc_v7_0_resume(void *handle)
 	if (r)
 		return r;
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v7_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
-	}
+	amdgpu_vm_reset_all_ids(adev);
 
-	return r;
+	return 0;
 }
 
 static bool gmc_v7_0_is_idle(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index f2ccefc66fd4..f56b4089ee9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -557,7 +557,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
 	 * size equal to the 1024 or vram, whichever is larger.
 	 */
 	if (amdgpu_gart_size == -1)
-		adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size);
+		adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
+					adev->mc.mc_vram_size);
 	else
 		adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;
 
@@ -949,7 +950,7 @@ static int gmc_v8_0_vm_init(struct amdgpu_device *adev)
 	 * amdgpu graphics/compute will use VMIDs 1-7
 	 * amdkfd will use VMIDs 8-15
 	 */
-	adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
 	adev->vm_manager.num_level = 1;
 	amdgpu_vm_manager_init(adev);
 
@@ -1208,10 +1209,6 @@ static int gmc_v8_0_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (adev->vm_manager.enabled) {
-		gmc_v8_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
 	gmc_v8_0_hw_fini(adev);
 
 	return 0;
@@ -1226,16 +1223,9 @@ static int gmc_v8_0_resume(void *handle)
 	if (r)
 		return r;
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v8_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
-	}
+	amdgpu_vm_reset_all_ids(adev);
 
-	return r;
+	return 0;
 }
 
 static bool gmc_v8_0_is_idle(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3b045e0b114e..f936332a069d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -386,6 +386,23 @@ static int gmc_v9_0_early_init(void *handle)
 static int gmc_v9_0_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 3, 3 };
+	unsigned i;
+
+	for(i = 0; i < adev->num_rings; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
+		unsigned vmhub = ring->funcs->vmhub;
+
+		ring->vm_inv_eng = vm_inv_eng[vmhub]++;
+		dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n",
+			 ring->idx, ring->name, ring->vm_inv_eng,
+			 ring->funcs->vmhub);
+	}
+
+	/* Engine 17 is used for GART flushes */
+	for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
+		BUG_ON(vm_inv_eng[i] > 17);
+
 	return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
 }
 
@@ -469,7 +486,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 	 * size equal to the 1024 or vram, whichever is larger.
 	 */
 	if (amdgpu_gart_size == -1)
-		adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size);
+		adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
+					adev->mc.mc_vram_size);
 	else
 		adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;
 
@@ -519,7 +537,8 @@ static int gmc_v9_0_vm_init(struct amdgpu_device *adev)
 	 * amdgpu graphics/compute will use VMIDs 1-7
 	 * amdkfd will use VMIDs 8-15
 	 */
-	adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
 
 	/* TODO: fix num_level for APU when updating vm size and block size */
 	if (adev->flags & AMD_IS_APU)
@@ -772,10 +791,6 @@ static int gmc_v9_0_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (adev->vm_manager.enabled) {
-		gmc_v9_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
 	gmc_v9_0_hw_fini(adev);
 
 	return 0;
@@ -790,17 +805,9 @@ static int gmc_v9_0_resume(void *handle)
 	if (r)
 		return r;
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v9_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev,
-				"vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
-	}
+	amdgpu_vm_reset_all_ids(adev);
 
-	return r;
+	return 0;
 }
 
 static bool gmc_v9_0_is_idle(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 62684510ddcd..dbfe48d1207a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -511,6 +511,9 @@ static int mmhub_v1_0_set_clockgating_state(void *handle,
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 		mmhub_v1_0_update_medium_grain_clock_gating(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
index 5f0fc8bf16a9..8af0bddf85e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
@@ -84,4 +84,61 @@ struct mmsch_v1_0_cmd_indirect_write {
 	uint32_t reg_value;
 };
 
+static inline void mmsch_v1_0_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt,
+					       uint32_t *init_table,
+					       uint32_t reg_offset,
+					       uint32_t value)
+{
+	direct_wt->cmd_header.reg_offset = reg_offset;
+	direct_wt->reg_value = value;
+	memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write));
+}
+
+static inline void mmsch_v1_0_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt,
+						      uint32_t *init_table,
+						      uint32_t reg_offset,
+						      uint32_t mask, uint32_t data)
+{
+	direct_rd_mod_wt->cmd_header.reg_offset = reg_offset;
+	direct_rd_mod_wt->mask_value = mask;
+	direct_rd_mod_wt->write_data = data;
+	memcpy((void *)init_table, direct_rd_mod_wt,
+	       sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write));
+}
+
+static inline void mmsch_v1_0_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll,
+						 uint32_t *init_table,
+						 uint32_t reg_offset,
+						 uint32_t mask, uint32_t wait)
+{
+	direct_poll->cmd_header.reg_offset = reg_offset;
+	direct_poll->mask_value = mask;
+	direct_poll->wait_value = wait;
+	memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling));
+}
+
+#define MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
+	mmsch_v1_0_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \
+					   init_table, (reg), \
+					   (mask), (data)); \
+	init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
+	table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
+}
+
+#define MMSCH_V1_0_INSERT_DIRECT_WT(reg, value) { \
+	mmsch_v1_0_insert_direct_wt(&direct_wt, \
+				    init_table, (reg), \
+				    (value)); \
+	init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
+	table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
+}
+
+#define MMSCH_V1_0_INSERT_DIRECT_POLL(reg, mask, wait) { \
+	mmsch_v1_0_insert_direct_poll(&direct_poll, \
+				      init_table, (reg), \
+				      (mask), (wait)); \
+	init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
+	table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index 70a3dd13cb02..7bdc51b02326 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -368,9 +368,12 @@ static int xgpu_vi_mailbox_rcv_msg(struct amdgpu_device *adev,
 	u32 reg;
 	u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, RCV_MSG_VALID);
 
-	reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
-	if (!(reg & mask))
-		return -ENOENT;
+	/* workaround: host driver doesn't set VALID for CMPL now */
+	if (event != IDH_FLR_NOTIFICATION_CMPL) {
+		reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
+		if (!(reg & mask))
+			return -ENOENT;
+	}
 
 	reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
 	if (reg != event)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index c3588d1c7cb0..60a6407ba267 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -166,11 +166,8 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
 {
 	int ret;
 	uint32_t psp_gfxdrv_command_reg = 0;
-	struct amdgpu_bo *psp_sysdrv;
-	void *psp_sysdrv_virt = NULL;
-	uint64_t psp_sysdrv_mem;
 	struct amdgpu_device *adev = psp->adev;
-	uint32_t size, sol_reg;
+	uint32_t sol_reg;
 
 	/* Check sOS sign of life register to confirm sys driver and sOS
 	 * are already been loaded.
@@ -185,27 +182,14 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
 	if (ret)
 		return ret;
 
-	/*
-	 * Create a 1 meg GART memory to store the psp sys driver
-	 * binary with a 1 meg aligned address
-	 */
-	size = (psp->sys_bin_size + (PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)) &
-		(~(PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1));
-
-	ret = amdgpu_bo_create_kernel(adev, size, PSP_BOOTLOADER_1_MEG_ALIGNMENT,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &psp_sysdrv,
-				      &psp_sysdrv_mem,
-				      &psp_sysdrv_virt);
-	if (ret)
-		return ret;
+	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
 
 	/* Copy PSP System Driver binary to memory */
-	memcpy(psp_sysdrv_virt, psp->sys_start_addr, psp->sys_bin_size);
+	memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
 
 	/* Provide the sys driver to bootrom */
 	WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36),
-	       (uint32_t)(psp_sysdrv_mem >> 20));
+	       (uint32_t)(psp->fw_pri_mc_addr >> 20));
 	psp_gfxdrv_command_reg = 1 << 16;
 	WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
 	       psp_gfxdrv_command_reg);
@@ -216,8 +200,6 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
 	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
 			   0x80000000, 0x80000000, false);
 
-	amdgpu_bo_free_kernel(&psp_sysdrv, &psp_sysdrv_mem, &psp_sysdrv_virt);
-
 	return ret;
 }
 
@@ -225,11 +207,8 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
 {
 	int ret;
 	unsigned int psp_gfxdrv_command_reg = 0;
-	struct amdgpu_bo *psp_sos;
-	void *psp_sos_virt = NULL;
-	uint64_t psp_sos_mem;
 	struct amdgpu_device *adev = psp->adev;
-	uint32_t size, sol_reg;
+	uint32_t sol_reg;
 
 	/* Check sOS sign of life register to confirm sys driver and sOS
 	 * are already been loaded.
@@ -244,23 +223,14 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
 	if (ret)
 		return ret;
 
-	size = (psp->sos_bin_size + (PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)) &
-		(~((uint64_t)PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1));
-
-	ret = amdgpu_bo_create_kernel(adev, size, PSP_BOOTLOADER_1_MEG_ALIGNMENT,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &psp_sos,
-				      &psp_sos_mem,
-				      &psp_sos_virt);
-	if (ret)
-		return ret;
+	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
 
 	/* Copy Secure OS binary to PSP memory */
-	memcpy(psp_sos_virt, psp->sos_start_addr, psp->sos_bin_size);
+	memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
 
 	/* Provide the PSP secure OS to bootrom */
 	WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36),
-	       (uint32_t)(psp_sos_mem >> 20));
+	       (uint32_t)(psp->fw_pri_mc_addr >> 20));
 	psp_gfxdrv_command_reg = 2 << 16;
 	WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
 	       psp_gfxdrv_command_reg);
@@ -273,8 +243,6 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
 			   0, true);
 #endif
 
-	amdgpu_bo_free_kernel(&psp_sos, &psp_sos_mem, &psp_sos_virt);
-
 	return ret;
 }
 
@@ -300,7 +268,6 @@ int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd
 int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type)
 {
 	int ret = 0;
-	unsigned int psp_ring_reg = 0;
 	struct psp_ring *ring;
 	struct amdgpu_device *adev = psp->adev;
 
@@ -320,6 +287,16 @@ int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type)
 		return ret;
 	}
 
+	return 0;
+}
+
+int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type)
+{
+	int ret = 0;
+	unsigned int psp_ring_reg = 0;
+	struct psp_ring *ring = &psp->km_ring;
+	struct amdgpu_device *adev = psp->adev;
+
 	/* Write low address of the ring to C2PMSG_69 */
 	psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
 	WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_69), psp_ring_reg);
@@ -344,6 +321,33 @@ int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type)
 	return ret;
 }
 
+int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type)
+{
+	int ret = 0;
+	struct psp_ring *ring;
+	unsigned int psp_ring_reg = 0;
+	struct amdgpu_device *adev = psp->adev;
+
+	ring = &psp->km_ring;
+
+	/* Write the ring destroy command to C2PMSG_64 */
+	psp_ring_reg = 3 << 16;
+	WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), psp_ring_reg);
+
+	/* there might be handshake issue with hardware which needs delay */
+	mdelay(20);
+
+	/* Wait for response flag (bit 31) in C2PMSG_64 */
+	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+			   0x80000000, 0x80000000, false);
+
+	if (ring->ring_mem)
+		amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+				      &ring->ring_mem_mc_addr,
+				      (void **)&ring->ring_mem);
+	return ret;
+}
+
 int psp_v3_1_cmd_submit(struct psp_context *psp,
 		        struct amdgpu_firmware_info *ucode,
 		        uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
index e82eff741a08..9dcd0b25c4c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
@@ -39,6 +39,10 @@ extern int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
 				 struct psp_gfx_cmd_resp *cmd);
 extern int psp_v3_1_ring_init(struct psp_context *psp,
 			      enum psp_ring_type ring_type);
+extern int psp_v3_1_ring_create(struct psp_context *psp,
+				enum psp_ring_type ring_type);
+extern int psp_v3_1_ring_destroy(struct psp_context *psp,
+				enum psp_ring_type ring_type);
 extern int psp_v3_1_cmd_submit(struct psp_context *psp,
 			       struct amdgpu_firmware_info *ucode,
 			       uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 21f38d882335..ecc70a730a54 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -48,8 +48,7 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
 static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
 static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
 
-static const u32 golden_settings_sdma_4[] =
-{
+static const u32 golden_settings_sdma_4[] = {
 	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07,
 	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100,
 	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100,
@@ -76,8 +75,7 @@ static const u32 golden_settings_sdma_4[] =
 	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_UTCL1_PAGE), 0x000003ff, 0x000003c0
 };
 
-static const u32 golden_settings_sdma_vg10[] =
-{
+static const u32 golden_settings_sdma_vg10[] = {
 	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00104002,
 	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002,
 	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG), 0x0018773f, 0x00104002,
@@ -87,16 +85,17 @@ static const u32 golden_settings_sdma_vg10[] =
 static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset)
 {
 	u32 base = 0;
+
 	switch (instance) {
-		case 0:
-			base = SDMA0_BASE.instance[0].segment[0];
-			break;
-		case 1:
-			base = SDMA1_BASE.instance[0].segment[0];
-			break;
-		default:
-			BUG();
-			break;
+	case 0:
+		base = SDMA0_BASE.instance[0].segment[0];
+		break;
+	case 1:
+		base = SDMA1_BASE.instance[0].segment[0];
+		break;
+	default:
+		BUG();
+		break;
 	}
 
 	return base + internal_offset;
@@ -159,7 +158,8 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
 	case CHIP_VEGA10:
 		chip_name = "vega10";
 		break;
-	default: BUG();
+	default:
+		BUG();
 	}
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -179,7 +179,7 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
 		if (adev->sdma.instance[i].feature_version >= 20)
 			adev->sdma.instance[i].burst_nop = true;
 		DRM_DEBUG("psp_load == '%s'\n",
-				adev->firmware.load_type == AMDGPU_FW_LOAD_PSP? "true": "false");
+				adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
 
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
@@ -192,9 +192,7 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
 	}
 out:
 	if (err) {
-		printk(KERN_ERR
-		       "sdma_v4_0: Failed to load firmware \"%s\"\n",
-		       fw_name);
+		DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name);
 		for (i = 0; i < adev->sdma.num_instances; i++) {
 			release_firmware(adev->sdma.instance[i].fw);
 			adev->sdma.instance[i].fw = NULL;
@@ -212,10 +210,10 @@ out:
  */
 static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
 {
-	u64* rptr;
+	u64 *rptr;
 
 	/* XXX check if swapping is necessary on BE */
-	rptr =((u64*)&ring->adev->wb.wb[ring->rptr_offs]);
+	rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
 
 	DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
 	return ((*rptr) >> 2);
@@ -231,19 +229,20 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
 static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
-	u64* wptr = NULL;
-	uint64_t local_wptr=0;
+	u64 *wptr = NULL;
+	uint64_t local_wptr = 0;
 
 	if (ring->use_doorbell) {
 		/* XXX check if swapping is necessary on BE */
-		wptr = ((u64*)&adev->wb.wb[ring->wptr_offs]);
+		wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]);
 		DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr);
 		*wptr = (*wptr) >> 2;
 		DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr);
 	} else {
 		u32 lowbit, highbit;
 		int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
-		wptr=&local_wptr;
+
+		wptr = &local_wptr;
 		lowbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR)) >> 2;
 		highbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
 
@@ -285,12 +284,13 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
 		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
 	} else {
 		int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
+
 		DRM_DEBUG("Not using doorbell -- "
 				"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
-				"mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x \n",
-				me,
+				"mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
 				me,
 				lower_32_bits(ring->wptr << 2),
+				me,
 				upper_32_bits(ring->wptr << 2));
 		WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
 		WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
@@ -319,22 +319,22 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  * Schedule an IB in the DMA ring (VEGA10).
  */
 static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
-                                   struct amdgpu_ib *ib,
-                                   unsigned vm_id, bool ctx_switch)
+					struct amdgpu_ib *ib,
+					unsigned vm_id, bool ctx_switch)
 {
-        u32 vmid = vm_id & 0xf;
+	u32 vmid = vm_id & 0xf;
 
-        /* IB packet must end on a 8 DW boundary */
-        sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+	/* IB packet must end on a 8 DW boundary */
+	sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
 
-        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
-                          SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
-        /* base must be 32 byte aligned */
-        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
-        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
-        amdgpu_ring_write(ring, ib->length_dw);
-        amdgpu_ring_write(ring, 0);
-        amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
+			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
+	/* base must be 32 byte aligned */
+	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, ib->length_dw);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, 0);
 
 }
 
@@ -523,7 +523,7 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
 	u32 doorbell;
 	u32 doorbell_offset;
 	u32 temp;
-	int i,r;
+	int i, r;
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		ring = &adev->sdma.instance[i].ring;
@@ -572,7 +572,7 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
 		doorbell = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL));
 		doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET));
 
-		if (ring->use_doorbell){
+		if (ring->use_doorbell) {
 			doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
 			doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
 					OFFSET, ring->doorbell_index);
@@ -694,9 +694,7 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
 
 
 		for (j = 0; j < fw_size; j++)
-		{
 			WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
-		}
 
 		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
 	}
@@ -744,10 +742,8 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
 	if (r)
 		return r;
 	r = sdma_v4_0_rlc_resume(adev);
-	if (r)
-		return r;
 
-	return 0;
+	return r;
 }
 
 /**
@@ -797,9 +793,8 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 		tmp = le32_to_cpu(adev->wb.wb[index]);
-		if (tmp == 0xDEADBEEF) {
+		if (tmp == 0xDEADBEEF)
 			break;
-		}
 		DRM_UDELAY(1);
 	}
 
@@ -864,29 +859,29 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	if (r)
 		goto err1;
 
-        r = dma_fence_wait_timeout(f, false, timeout);
-        if (r == 0) {
-                DRM_ERROR("amdgpu: IB test timed out\n");
-                r = -ETIMEDOUT;
-                goto err1;
-        } else if (r < 0) {
-                DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
-                goto err1;
-        }
-        tmp = le32_to_cpu(adev->wb.wb[index]);
-        if (tmp == 0xDEADBEEF) {
-                DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
-                r = 0;
-        } else {
-                DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
-                r = -EINVAL;
-        }
+	r = dma_fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out\n");
+		r = -ETIMEDOUT;
+		goto err1;
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+		goto err1;
+	}
+	tmp = le32_to_cpu(adev->wb.wb[index]);
+	if (tmp == 0xDEADBEEF) {
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
+	} else {
+		DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
+		r = -EINVAL;
+	}
 err1:
-        amdgpu_ib_free(adev, &ib, NULL);
-        dma_fence_put(f);
+	amdgpu_ib_free(adev, &ib, NULL);
+	dma_fence_put(f);
 err0:
-        amdgpu_wb_free(adev, index);
-        return r;
+	amdgpu_wb_free(adev, index);
+	return r;
 }
 
 
@@ -1039,44 +1034,40 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					 unsigned vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	unsigned eng = ring->idx;
-	unsigned i;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
-				  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
-		amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
-				  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
-		amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2);
-		amdgpu_ring_write(ring, upper_32_bits(pd_addr));
-
-		/* flush TLB */
-		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
-				  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
-		amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng);
-		amdgpu_ring_write(ring, req);
-
-		/* wait for flush */
-		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
-				  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
-				  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
-		amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-		amdgpu_ring_write(ring, 0);
-		amdgpu_ring_write(ring, 1 << vm_id); /* reference */
-		amdgpu_ring_write(ring, 1 << vm_id); /* mask */
-		amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
-				  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
-	}
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+	amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+	amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2);
+	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
+
+	/* flush TLB */
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+	amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng);
+	amdgpu_ring_write(ring, req);
+
+	/* wait for flush */
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, 1 << vm_id); /* reference */
+	amdgpu_ring_write(ring, 1 << vm_id); /* mask */
+	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
 }
 
 static int sdma_v4_0_early_init(void *handle)
@@ -1162,8 +1153,6 @@ static int sdma_v4_0_hw_init(void *handle)
 	sdma_v4_0_init_golden_registers(adev);
 
 	r = sdma_v4_0_start(adev);
-	if (r)
-		return r;
 
 	return r;
 }
@@ -1199,10 +1188,12 @@ static bool sdma_v4_0_is_idle(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	u32 i;
+
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		u32 tmp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_STATUS_REG));
+
 		if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
-	   		 return false;
+			return false;
 	}
 
 	return true;
@@ -1211,8 +1202,9 @@ static bool sdma_v4_0_is_idle(void *handle)
 static int sdma_v4_0_wait_for_idle(void *handle)
 {
 	unsigned i;
-	u32 sdma0,sdma1;
+	u32 sdma0, sdma1;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
 	for (i = 0; i < adev->usec_timeout; i++) {
 		sdma0 = RREG32(sdma_v4_0_get_reg_offset(0, mmSDMA0_STATUS_REG));
 		sdma1 = RREG32(sdma_v4_0_get_reg_offset(1, mmSDMA0_STATUS_REG));
@@ -1240,7 +1232,7 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
 
 	u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ?
 		sdma_v4_0_get_reg_offset(0, mmSDMA0_CNTL) :
-	       	sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL);
+		sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL);
 
 	sdma_cntl = RREG32(reg_offset);
 	sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
@@ -1332,7 +1324,7 @@ static void sdma_v4_0_update_medium_grain_clock_gating(
 				  SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
 				  SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
 				  SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
-			if(def != data)
+			if (def != data)
 				WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data);
 		}
 	} else {
@@ -1382,17 +1374,17 @@ static void sdma_v4_0_update_medium_grain_light_sleep(
 
 		/* 1-not override: enable sdma1 mem light sleep */
 		if (adev->asic_type == CHIP_VEGA10) {
-			 def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
-			 data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
-			 if (def != data)
-				 WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
+			def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
+			data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+			if (def != data)
+				WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
 		}
 	} else {
 		/* 0-override:disable sdma0 mem light sleep */
 		def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
 		data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
 		if (def != data)
-		       WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
+			WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
 
 		/* 0-override:disable sdma1 mem light sleep */
 		if (adev->asic_type == CHIP_VEGA10) {
@@ -1473,6 +1465,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
 	.align_mask = 0xf,
 	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
 	.support_64bit_ptrs = true,
+	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = sdma_v4_0_ring_get_rptr,
 	.get_wptr = sdma_v4_0_ring_get_wptr,
 	.set_wptr = sdma_v4_0_ring_set_wptr,
@@ -1480,7 +1473,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
 		6 + /* sdma_v4_0_ring_emit_hdp_flush */
 		3 + /* sdma_v4_0_ring_emit_hdp_invalidate */
 		6 + /* sdma_v4_0_ring_emit_pipeline_sync */
-		36 + /* sdma_v4_0_ring_emit_vm_flush */
+		18 + /* sdma_v4_0_ring_emit_vm_flush */
 		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
 	.emit_ib = sdma_v4_0_ring_emit_ib,
@@ -1606,8 +1599,7 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
 	}
 }
 
-const struct amdgpu_ip_block_version sdma_v4_0_ip_block =
-{
+const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
 	.type = AMD_IP_BLOCK_TYPE_SDMA,
 	.major = 4,
 	.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 385de8617075..6b55d451ae7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -25,7 +25,7 @@
 #include <linux/module.h>
 #include "drmP.h"
 #include "amdgpu.h"
-#include "amdgpu_atombios.h"
+#include "amdgpu_atomfirmware.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_uvd.h"
 #include "amdgpu_vce.h"
@@ -405,11 +405,11 @@ static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev)
 
 static int soc15_asic_reset(struct amdgpu_device *adev)
 {
-	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+	amdgpu_atomfirmware_scratch_regs_engine_hung(adev, true);
 
 	soc15_gpu_pci_config_reset(adev);
 
-	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+	amdgpu_atomfirmware_scratch_regs_engine_hung(adev, false);
 
 	return 0;
 }
@@ -505,8 +505,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
 		amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block);
 		amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block);
-		if (!amdgpu_sriov_vf(adev))
-			amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block);
+		amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block);
 		amdgpu_ip_block_add(adev, &vce_v4_0_ip_block);
 		break;
 	default:
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 2b96c806baa1..e8df6d820dbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -45,13 +45,31 @@ struct nbio_pcie_index_data {
 	u32 index_offset;
 	u32 data_offset;
 };
-// Register Access Macro
+
+/* Register Access Macros */
 #define SOC15_REG_OFFSET(ip, inst, reg)       (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
                                                 (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
                                                     (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \
                                                         (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
                                                             (ip##_BASE__INST##inst##_SEG4 + reg)))))
 
+#define WREG32_FIELD15(ip, idx, reg, field, val)	\
+	WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+
+#define RREG32_SOC15(ip, inst, reg) \
+	RREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
+		(1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
+		(2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \
+		(3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
+		(ip##_BASE__INST##inst##_SEG4 + reg))))))
+
+#define WREG32_SOC15(ip, inst, reg, value) \
+	WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
+		(1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
+		(2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \
+		(3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
+		(ip##_BASE__INST##inst##_SEG4 + reg))))), value)
+
 #endif
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 9bcf01469282..eca8f6e01e97 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -27,10 +27,14 @@
 #include "amdgpu_uvd.h"
 #include "soc15d.h"
 #include "soc15_common.h"
+#include "mmsch_v1_0.h"
 
 #include "vega10/soc15ip.h"
 #include "vega10/UVD/uvd_7_0_offset.h"
 #include "vega10/UVD/uvd_7_0_sh_mask.h"
+#include "vega10/VCE/vce_4_0_offset.h"
+#include "vega10/VCE/vce_4_0_default.h"
+#include "vega10/VCE/vce_4_0_sh_mask.h"
 #include "vega10/NBIF/nbif_6_1_offset.h"
 #include "vega10/HDP/hdp_4_0_offset.h"
 #include "vega10/MMHUB/mmhub_1_0_offset.h"
@@ -41,6 +45,7 @@ static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev);
 static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static int uvd_v7_0_start(struct amdgpu_device *adev);
 static void uvd_v7_0_stop(struct amdgpu_device *adev);
+static int uvd_v7_0_sriov_start(struct amdgpu_device *adev);
 
 /**
  * uvd_v7_0_ring_get_rptr - get read pointer
@@ -98,6 +103,9 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 
+	if (ring->use_doorbell)
+		return adev->wb.wb[ring->wptr_offs];
+
 	if (ring == &adev->uvd.ring_enc[0])
 		return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR));
 	else
@@ -129,6 +137,13 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 
+	if (ring->use_doorbell) {
+		/* XXX check if swapping is necessary on BE */
+		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+		return;
+	}
+
 	if (ring == &adev->uvd.ring_enc[0])
 		WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR),
 			lower_32_bits(ring->wptr));
@@ -353,7 +368,10 @@ static int uvd_v7_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	adev->uvd.num_enc_rings = 2;
+	if (amdgpu_sriov_vf(adev))
+		adev->uvd.num_enc_rings = 1;
+	else
+		adev->uvd.num_enc_rings = 2;
 	uvd_v7_0_set_ring_funcs(adev);
 	uvd_v7_0_set_enc_ring_funcs(adev);
 	uvd_v7_0_set_irq_funcs(adev);
@@ -406,21 +424,31 @@ static int uvd_v7_0_sw_init(void *handle)
 	r = amdgpu_uvd_resume(adev);
 	if (r)
 		return r;
+	if (!amdgpu_sriov_vf(adev)) {
+		ring = &adev->uvd.ring;
+		sprintf(ring->name, "uvd");
+		r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
+		if (r)
+			return r;
+	}
 
-	ring = &adev->uvd.ring;
-	sprintf(ring->name, "uvd");
-	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
-	if (r)
-		return r;
 
 	for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
 		ring = &adev->uvd.ring_enc[i];
 		sprintf(ring->name, "uvd_enc%d", i);
+		if (amdgpu_sriov_vf(adev)) {
+			ring->use_doorbell = true;
+			ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
+		}
 		r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
 		if (r)
 			return r;
 	}
 
+	r = amdgpu_virt_alloc_mm_table(adev);
+	if (r)
+		return r;
+
 	return r;
 }
 
@@ -429,6 +457,8 @@ static int uvd_v7_0_sw_fini(void *handle)
 	int i, r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	amdgpu_virt_free_mm_table(adev);
+
 	r = amdgpu_uvd_suspend(adev);
 	if (r)
 		return r;
@@ -455,48 +485,53 @@ static int uvd_v7_0_hw_init(void *handle)
 	uint32_t tmp;
 	int i, r;
 
-	r = uvd_v7_0_start(adev);
+	if (amdgpu_sriov_vf(adev))
+		r = uvd_v7_0_sriov_start(adev);
+	else
+		r = uvd_v7_0_start(adev);
 	if (r)
 		goto done;
 
-	ring->ready = true;
-	r = amdgpu_ring_test_ring(ring);
-	if (r) {
-		ring->ready = false;
-		goto done;
-	}
+	if (!amdgpu_sriov_vf(adev)) {
+		ring->ready = true;
+		r = amdgpu_ring_test_ring(ring);
+		if (r) {
+			ring->ready = false;
+			goto done;
+		}
 
-	r = amdgpu_ring_alloc(ring, 10);
-	if (r) {
-		DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
-		goto done;
-	}
+		r = amdgpu_ring_alloc(ring, 10);
+		if (r) {
+			DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
+			goto done;
+		}
 
-	tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-		mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
-	amdgpu_ring_write(ring, tmp);
-	amdgpu_ring_write(ring, 0xFFFFF);
+		tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
+			mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
+		amdgpu_ring_write(ring, tmp);
+		amdgpu_ring_write(ring, 0xFFFFF);
 
-	tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-		mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
-	amdgpu_ring_write(ring, tmp);
-	amdgpu_ring_write(ring, 0xFFFFF);
+		tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
+			mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
+		amdgpu_ring_write(ring, tmp);
+		amdgpu_ring_write(ring, 0xFFFFF);
 
-	tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-		mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
-	amdgpu_ring_write(ring, tmp);
-	amdgpu_ring_write(ring, 0xFFFFF);
+		tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
+			mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
+		amdgpu_ring_write(ring, tmp);
+		amdgpu_ring_write(ring, 0xFFFFF);
 
-	/* Clear timeout status bits */
-	amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
-		mmUVD_SEMA_TIMEOUT_STATUS), 0));
-	amdgpu_ring_write(ring, 0x8);
+		/* Clear timeout status bits */
+		amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
+			mmUVD_SEMA_TIMEOUT_STATUS), 0));
+		amdgpu_ring_write(ring, 0x8);
 
-	amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
-		mmUVD_SEMA_CNTL), 0));
-	amdgpu_ring_write(ring, 3);
+		amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
+			mmUVD_SEMA_CNTL), 0));
+		amdgpu_ring_write(ring, 3);
 
-	amdgpu_ring_commit(ring);
+		amdgpu_ring_commit(ring);
+	}
 
 	for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
 		ring = &adev->uvd.ring_enc[i];
@@ -618,6 +653,241 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
 	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
 }
 
+static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
+				struct amdgpu_mm_table *table)
+{
+	uint32_t data = 0, loop;
+	uint64_t addr = table->gpu_addr;
+	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
+	uint32_t size;
+
+	size = header->header_size + header->vce_table_size + header->uvd_table_size;
+
+	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
+	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
+	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
+
+	/* 2, update vmid of descriptor */
+	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
+	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
+	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
+
+	/* 3, notify mmsch about the size of this descriptor */
+	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
+
+	/* 4, set resp to zero */
+	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
+
+	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
+	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
+
+	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
+	loop = 1000;
+	while ((data & 0x10000002) != 0x10000002) {
+		udelay(10);
+		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
+		loop--;
+		if (!loop)
+			break;
+	}
+
+	if (!loop) {
+		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	uint32_t offset, size, tmp;
+	uint32_t table_size = 0;
+	struct mmsch_v1_0_cmd_direct_write direct_wt = { {0} };
+	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} };
+	struct mmsch_v1_0_cmd_direct_polling direct_poll = { {0} };
+	struct mmsch_v1_0_cmd_end end = { {0} };
+	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
+	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
+
+	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
+	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
+	end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+	if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) {
+		header->version = MMSCH_VERSION;
+		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
+
+		if (header->vce_table_offset == 0 && header->vce_table_size == 0)
+			header->uvd_table_offset = header->header_size;
+		else
+			header->uvd_table_offset = header->vce_table_size + header->vce_table_offset;
+
+		init_table += header->uvd_table_offset;
+
+		ring = &adev->uvd.ring;
+		size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
+
+		/* disable clock gating */
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
+						   ~UVD_POWER_STATUS__UVD_PG_MODE_MASK, 0);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
+						   0xFFFFFFFF, 0x00000004);
+		/* mc resume*/
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+						    lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+						    upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+			offset = 0;
+		} else {
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+						    lower_32_bits(adev->uvd.gpu_addr));
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+						    upper_32_bits(adev->uvd.gpu_addr));
+			offset = size;
+		}
+
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
+					    AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size);
+
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+					    lower_32_bits(adev->uvd.gpu_addr + offset));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+					    upper_32_bits(adev->uvd.gpu_addr + offset));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
+
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+					    lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+					    upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2),
+					    AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
+
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_ADDR_CONFIG),
+					    adev->gfx.config.gb_addr_config);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG),
+					    adev->gfx.config.gb_addr_config);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG),
+					    adev->gfx.config.gb_addr_config);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
+		/* mc resume end*/
+
+		/* disable clock gating */
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL),
+						   ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
+
+		/* disable interupt */
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
+						   ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
+
+		/* stall UMC and register bus before resetting VCPU */
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
+						   ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+						   UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+		/* put LMI, VCPU, RBC etc... into reset */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+					    (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+						       UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+						       UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+						       UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+						       UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+						       UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+						       UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+						       UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
+
+		/* initialize UVD memory controller */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL),
+					    (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+						       UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+						       UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+						       UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+						       UVD_LMI_CTRL__REQ_MODE_MASK |
+						       0x00100000L));
+
+		/* disable byte swapping */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_SWAP_CNTL), 0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MP_SWAP_CNTL), 0);
+
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA0), 0x40c2040);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA1), 0x0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB0), 0x40c2040);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB1), 0x0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_ALU), 0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUX), 0x88);
+
+		/* take all subblocks out of reset, except VCPU */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+					    UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+		/* enable VCPU clock */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
+					    UVD_VCPU_CNTL__CLK_EN_MASK);
+
+		/* enable UMC */
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
+						   ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
+
+		/* boot up the VCPU */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0);
+
+		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02);
+
+		/* enable master interrupt */
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
+						   ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+						   (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
+
+		/* clear the bit 4 of UVD_STATUS */
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
+						   ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
+
+		/* force RBC into idle state */
+		size = order_base_2(ring->ring_size);
+		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp);
+
+		/* set the write pointer delay */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL), 0);
+
+		/* set the wb address */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR),
+					    (upper_32_bits(ring->gpu_addr) >> 2));
+
+		/* programm the RB_BASE for ring buffer */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
+					    lower_32_bits(ring->gpu_addr));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
+					    upper_32_bits(ring->gpu_addr));
+
+		ring->wptr = 0;
+		ring = &adev->uvd.ring_enc[0];
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4);
+
+		/* add end packet */
+		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
+		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+		header->uvd_table_size = table_size;
+
+		return uvd_v7_0_mmsch_start(adev, &adev->virt.mm_table);
+	}
+	return -EINVAL; /* already initializaed ? */
+}
+
 /**
  * uvd_v7_0_start - start UVD block
  *
@@ -1034,42 +1304,38 @@ static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring,
 static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	uint32_t data0, data1, mask;
-	unsigned eng = ring->idx;
-	unsigned i;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
-		data1 = upper_32_bits(pd_addr);
-		uvd_v7_0_vm_reg_write(ring, data0, data1);
-
-		data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
-		data1 = lower_32_bits(pd_addr);
-		uvd_v7_0_vm_reg_write(ring, data0, data1);
-
-		data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
-		data1 = lower_32_bits(pd_addr);
-		mask = 0xffffffff;
-		uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
-
-		/* flush TLB */
-		data0 = (hub->vm_inv_eng0_req + eng) << 2;
-		data1 = req;
-		uvd_v7_0_vm_reg_write(ring, data0, data1);
-
-		/* wait for flush */
-		data0 = (hub->vm_inv_eng0_ack + eng) << 2;
-		data1 = 1 << vm_id;
-		mask =  1 << vm_id;
-		uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
-	}
+	data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
+	data1 = upper_32_bits(pd_addr);
+	uvd_v7_0_vm_reg_write(ring, data0, data1);
+
+	data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
+	data1 = lower_32_bits(pd_addr);
+	uvd_v7_0_vm_reg_write(ring, data0, data1);
+
+	data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
+	data1 = lower_32_bits(pd_addr);
+	mask = 0xffffffff;
+	uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
+
+	/* flush TLB */
+	data0 = (hub->vm_inv_eng0_req + eng) << 2;
+	data1 = req;
+	uvd_v7_0_vm_reg_write(ring, data0, data1);
+
+	/* wait for flush */
+	data0 = (hub->vm_inv_eng0_ack + eng) << 2;
+	data1 = 1 << vm_id;
+	mask =  1 << vm_id;
+	uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
 }
 
 static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
@@ -1080,44 +1346,37 @@ static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
 static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 			 unsigned int vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	unsigned eng = ring->idx;
-	unsigned i;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, upper_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, 0xffffffff);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		/* flush TLB */
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
-		amdgpu_ring_write(ring, req);
-
-		/* wait for flush */
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
-		amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-		amdgpu_ring_write(ring, 1 << vm_id);
-		amdgpu_ring_write(ring, 1 << vm_id);
-	}
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, 0xffffffff);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	/* flush TLB */
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
+	amdgpu_ring_write(ring, req);
+
+	/* wait for flush */
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
+	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
+	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vm_id);
 }
 
 #if 0
@@ -1240,7 +1499,8 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev,
 		amdgpu_fence_process(&adev->uvd.ring_enc[0]);
 		break;
 	case 120:
-		amdgpu_fence_process(&adev->uvd.ring_enc[1]);
+		if (!amdgpu_sriov_vf(adev))
+			amdgpu_fence_process(&adev->uvd.ring_enc[1]);
 		break;
 	default:
 		DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -1448,13 +1708,14 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
 	.align_mask = 0xf,
 	.nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0),
 	.support_64bit_ptrs = false,
+	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = uvd_v7_0_ring_get_rptr,
 	.get_wptr = uvd_v7_0_ring_get_wptr,
 	.set_wptr = uvd_v7_0_ring_set_wptr,
 	.emit_frame_size =
 		2 + /* uvd_v7_0_ring_emit_hdp_flush */
 		2 + /* uvd_v7_0_ring_emit_hdp_invalidate */
-		34 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_ring_emit_vm_flush */
+		34 + /* uvd_v7_0_ring_emit_vm_flush */
 		14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */
 	.emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */
 	.emit_ib = uvd_v7_0_ring_emit_ib,
@@ -1475,11 +1736,12 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
 	.align_mask = 0x3f,
 	.nop = HEVC_ENC_CMD_NO_OP,
 	.support_64bit_ptrs = false,
+	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = uvd_v7_0_enc_ring_get_rptr,
 	.get_wptr = uvd_v7_0_enc_ring_get_wptr,
 	.set_wptr = uvd_v7_0_enc_ring_set_wptr,
 	.emit_frame_size =
-		17 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_enc_ring_emit_vm_flush */
+		17 + /* uvd_v7_0_enc_ring_emit_vm_flush */
 		5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */
 		1, /* uvd_v7_0_enc_ring_insert_end */
 	.emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index fb0819359909..90332f55cfba 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -77,13 +77,26 @@ static int vce_v3_0_set_clockgating_state(void *handle,
 static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
+	u32 v;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+	if (adev->vce.harvest_config == 0 ||
+		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
+		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
+	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
+		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 
 	if (ring == &adev->vce.ring[0])
-		return RREG32(mmVCE_RB_RPTR);
+		v = RREG32(mmVCE_RB_RPTR);
 	else if (ring == &adev->vce.ring[1])
-		return RREG32(mmVCE_RB_RPTR2);
+		v = RREG32(mmVCE_RB_RPTR2);
 	else
-		return RREG32(mmVCE_RB_RPTR3);
+		v = RREG32(mmVCE_RB_RPTR3);
+
+	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return v;
 }
 
 /**
@@ -96,13 +109,26 @@ static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
 static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
+	u32 v;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+	if (adev->vce.harvest_config == 0 ||
+		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
+		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
+	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
+		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 
 	if (ring == &adev->vce.ring[0])
-		return RREG32(mmVCE_RB_WPTR);
+		v = RREG32(mmVCE_RB_WPTR);
 	else if (ring == &adev->vce.ring[1])
-		return RREG32(mmVCE_RB_WPTR2);
+		v = RREG32(mmVCE_RB_WPTR2);
 	else
-		return RREG32(mmVCE_RB_WPTR3);
+		v = RREG32(mmVCE_RB_WPTR3);
+
+	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return v;
 }
 
 /**
@@ -116,12 +142,22 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 
+	mutex_lock(&adev->grbm_idx_mutex);
+	if (adev->vce.harvest_config == 0 ||
+		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
+		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
+	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
+		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
+
 	if (ring == &adev->vce.ring[0])
 		WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
 	else if (ring == &adev->vce.ring[1])
 		WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
 	else
 		WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
+
+	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
+	mutex_unlock(&adev->grbm_idx_mutex);
 }
 
 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
@@ -231,33 +267,38 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
 	struct amdgpu_ring *ring;
 	int idx, r;
 
-	ring = &adev->vce.ring[0];
-	WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
-	WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
-	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
-	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
-	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
-
-	ring = &adev->vce.ring[1];
-	WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
-	WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
-	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
-	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
-	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
-
-	ring = &adev->vce.ring[2];
-	WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
-	WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
-	WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
-	WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
-	WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
-
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (idx = 0; idx < 2; ++idx) {
 		if (adev->vce.harvest_config & (1 << idx))
 			continue;
 
 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
+
+		/* Program instance 0 reg space for two instances or instance 0 case
+		program instance 1 reg space for only instance 1 available case */
+		if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
+			ring = &adev->vce.ring[0];
+			WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
+			WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
+			WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
+			WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+			WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
+
+			ring = &adev->vce.ring[1];
+			WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
+			WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
+			WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
+			WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+			WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
+
+			ring = &adev->vce.ring[2];
+			WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
+			WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
+			WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
+			WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
+			WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
+		}
+
 		vce_v3_0_mc_resume(adev, idx);
 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index edde5fe938d6..139f964196b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -49,63 +49,6 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
 
-static inline void mmsch_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt,
-					  uint32_t *init_table,
-					  uint32_t reg_offset,
-					  uint32_t value)
-{
-	direct_wt->cmd_header.reg_offset = reg_offset;
-	direct_wt->reg_value = value;
-	memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write));
-}
-
-static inline void mmsch_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt,
-						 uint32_t *init_table,
-						 uint32_t reg_offset,
-						 uint32_t mask, uint32_t data)
-{
-	direct_rd_mod_wt->cmd_header.reg_offset = reg_offset;
-	direct_rd_mod_wt->mask_value = mask;
-	direct_rd_mod_wt->write_data = data;
-	memcpy((void *)init_table, direct_rd_mod_wt,
-	       sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write));
-}
-
-static inline void mmsch_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll,
-					    uint32_t *init_table,
-					    uint32_t reg_offset,
-					    uint32_t mask, uint32_t wait)
-{
-	direct_poll->cmd_header.reg_offset = reg_offset;
-	direct_poll->mask_value = mask;
-	direct_poll->wait_value = wait;
-	memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling));
-}
-
-#define INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
-	mmsch_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \
-				      init_table, (reg), \
-				      (mask), (data)); \
-	init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
-	table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
-}
-
-#define INSERT_DIRECT_WT(reg, value) { \
-	mmsch_insert_direct_wt(&direct_wt, \
-			       init_table, (reg), \
-			       (value)); \
-	init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
-	table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
-}
-
-#define INSERT_DIRECT_POLL(reg, mask, wait) { \
-	mmsch_insert_direct_poll(&direct_poll, \
-				 init_table, (reg), \
-				 (mask), (wait)); \
-	init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
-	table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
-}
-
 /**
  * vce_v4_0_ring_get_rptr - get read pointer
  *
@@ -280,60 +223,73 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 		init_table += header->vce_table_offset;
 
 		ring = &adev->vce.ring[0];
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), ring->wptr);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), ring->wptr);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), lower_32_bits(ring->gpu_addr));
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
+					    lower_32_bits(ring->gpu_addr));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
+					    upper_32_bits(ring->gpu_addr));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
+					    ring->ring_size / 4);
 
 		/* BEGING OF MC_RESUME */
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), ~(1 << 16), 0);
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), ~0xFF9FF000, 0x1FF000);
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), ~0x3F, 0x3F);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
-
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
-
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->vce.gpu_addr >> 8);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), adev->vce.gpu_addr >> 8);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), adev->vce.gpu_addr >> 8);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
+
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
+						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
+		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
+						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
+		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
+						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
+		} else {
+		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
+						adev->vce.gpu_addr >> 8);
+		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
+						adev->vce.gpu_addr >> 8);
+		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
+						adev->vce.gpu_addr >> 8);
+		}
 
 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 		size = VCE_V4_0_FW_SIZE;
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & 0x7FFFFFFF);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
+					    offset & 0x7FFFFFFF);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
 
 		offset += size;
 		size = VCE_V4_0_STACK_SIZE;
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), offset & 0x7FFFFFFF);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
+					    offset & 0x7FFFFFFF);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
 
 		offset += size;
 		size = VCE_V4_0_DATA_SIZE;
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), offset & 0x7FFFFFFF);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
+					    offset & 0x7FFFFFFF);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
 
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
-				0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
+						   0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 
 		/* end of MC_RESUME */
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
-				~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
-				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
+						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
+						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
+						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
 
-		INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
-				VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
-				VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
+		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
+					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
+					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
 
 		/* clear BUSY flag */
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
-				~VCE_STATUS__JOB_BUSY_MASK, 0);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
+						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
 
 		/* add end packet */
 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
@@ -494,20 +450,9 @@ static int vce_v4_0_sw_init(void *handle)
 			return r;
 	}
 
-	if (amdgpu_sriov_vf(adev)) {
-		r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
-					    AMDGPU_GEM_DOMAIN_VRAM,
-					    &adev->virt.mm_table.bo,
-					    &adev->virt.mm_table.gpu_addr,
-					    (void *)&adev->virt.mm_table.cpu_addr);
-		if (!r) {
-			memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
-			printk("mm table gpu addr = 0x%llx, cpu addr = %p. \n",
-			       adev->virt.mm_table.gpu_addr,
-			       adev->virt.mm_table.cpu_addr);
-		}
+	r = amdgpu_virt_alloc_mm_table(adev);
+	if (r)
 		return r;
-	}
 
 	return r;
 }
@@ -518,10 +463,7 @@ static int vce_v4_0_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	/* free MM table */
-	if (amdgpu_sriov_vf(adev))
-		amdgpu_bo_free_kernel(&adev->virt.mm_table.bo,
-				      &adev->virt.mm_table.gpu_addr,
-				      (void *)&adev->virt.mm_table.cpu_addr);
+	amdgpu_virt_free_mm_table(adev);
 
 	r = amdgpu_vce_suspend(adev);
 	if (r)
@@ -973,44 +915,37 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 			 unsigned int vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	unsigned eng = ring->idx;
-	unsigned i;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, upper_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, 0xffffffff);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		/* flush TLB */
-		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
-		amdgpu_ring_write(ring, req);
-
-		/* wait for flush */
-		amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
-		amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-		amdgpu_ring_write(ring, 1 << vm_id);
-		amdgpu_ring_write(ring, 1 << vm_id);
-	}
+	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, 0xffffffff);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	/* flush TLB */
+	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
+	amdgpu_ring_write(ring, req);
+
+	/* wait for flush */
+	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
+	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
+	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vm_id);
 }
 
 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -1078,12 +1013,13 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
 	.align_mask = 0x3f,
 	.nop = VCE_CMD_NO_OP,
 	.support_64bit_ptrs = false,
+	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = vce_v4_0_ring_get_rptr,
 	.get_wptr = vce_v4_0_ring_get_wptr,
 	.set_wptr = vce_v4_0_ring_set_wptr,
 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
 	.emit_frame_size =
-		17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */
+		17 + /* vce_v4_0_emit_vm_flush */
 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
 		1, /* vce_v4_0_ring_insert_end */
 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 2ccf44e580de..1d1ac1ef94f7 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -138,6 +138,12 @@ struct amd_pp_profile {
 	uint8_t down_hyst;
 };
 
+enum amd_fan_ctrl_mode {
+	AMD_FAN_CTRL_NONE = 0,
+	AMD_FAN_CTRL_MANUAL = 1,
+	AMD_FAN_CTRL_AUTO = 2,
+};
+
 /* CG flags */
 #define AMD_CG_SUPPORT_GFX_MGCG			(1 << 0)
 #define AMD_CG_SUPPORT_GFX_MGLS			(1 << 1)
diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h
index 17b9d41f3e87..0a94f749e3c0 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h
@@ -54,20 +54,6 @@ enum cgs_ind_reg {
 };
 
 /**
- * enum cgs_clock - Clocks controlled by the SMU
- */
-enum cgs_clock {
-	CGS_CLOCK__SCLK,
-	CGS_CLOCK__MCLK,
-	CGS_CLOCK__VCLK,
-	CGS_CLOCK__DCLK,
-	CGS_CLOCK__ECLK,
-	CGS_CLOCK__ACLK,
-	CGS_CLOCK__ICLK,
-	/* ... */
-};
-
-/**
  * enum cgs_engine - Engines that can be statically power-gated
  */
 enum cgs_engine {
@@ -81,15 +67,6 @@ enum cgs_engine {
 	/* ... */
 };
 
-/**
- * enum cgs_voltage_planes - Voltage planes for external camera HW
- */
-enum cgs_voltage_planes {
-	CGS_VOLTAGE_PLANE__SENSOR0,
-	CGS_VOLTAGE_PLANE__SENSOR1,
-	/* ... */
-};
-
 /*
  * enum cgs_ucode_id - Firmware types for different IPs
  */
@@ -147,17 +124,6 @@ enum cgs_resource_type {
 };
 
 /**
- * struct cgs_clock_limits - Clock limits
- *
- * Clocks are specified in 10KHz units.
- */
-struct cgs_clock_limits {
-	unsigned min;		/**< Minimum supported frequency */
-	unsigned max;		/**< Maxumim supported frequency */
-	unsigned sustainable;	/**< Thermally sustainable frequency */
-};
-
-/**
  * struct cgs_firmware_info - Firmware information
  */
 struct cgs_firmware_info {
@@ -221,54 +187,6 @@ struct cgs_acpi_method_info {
 };
 
 /**
- * cgs_gpu_mem_info() - Return information about memory heaps
- * @cgs_device: opaque device handle
- * @type:	memory type
- * @mc_start:	Start MC address of the heap (output)
- * @mc_size:	MC address space size (output)
- * @mem_size:	maximum amount of memory available for allocation (output)
- *
- * This function returns information about memory heaps. The type
- * parameter is used to select the memory heap. The mc_start and
- * mc_size for GART heaps may be bigger than the memory available for
- * allocation.
- *
- * mc_start and mc_size are undefined for non-contiguous FB memory
- * types, since buffers allocated with these types may or may not be
- * GART mapped.
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_gpu_mem_info_t)(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type,
-				  uint64_t *mc_start, uint64_t *mc_size,
-				  uint64_t *mem_size);
-
-/**
- * cgs_gmap_kmem() - map kernel memory to GART aperture
- * @cgs_device:	opaque device handle
- * @kmem:	pointer to kernel memory
- * @size:	size to map
- * @min_offset: minimum offset from start of GART aperture
- * @max_offset: maximum offset from start of GART aperture
- * @kmem_handle: kernel memory handle (output)
- * @mcaddr:	MC address (output)
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_gmap_kmem_t)(struct cgs_device *cgs_device, void *kmem, uint64_t size,
-			       uint64_t min_offset, uint64_t max_offset,
-			       cgs_handle_t *kmem_handle, uint64_t *mcaddr);
-
-/**
- * cgs_gunmap_kmem() - unmap kernel memory
- * @cgs_device:	opaque device handle
- * @kmem_handle: kernel memory handle returned by gmap_kmem
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_gunmap_kmem_t)(struct cgs_device *cgs_device, cgs_handle_t kmem_handle);
-
-/**
  * cgs_alloc_gpu_mem() - Allocate GPU memory
  * @cgs_device:	opaque device handle
  * @type:	memory type
@@ -392,62 +310,6 @@ typedef void (*cgs_write_ind_register_t)(struct cgs_device *cgs_device, enum cgs
 					 unsigned index, uint32_t value);
 
 /**
- * cgs_read_pci_config_byte() - Read byte from PCI configuration space
- * @cgs_device:	opaque device handle
- * @addr:	address
- *
- * Return:  Value read
- */
-typedef uint8_t (*cgs_read_pci_config_byte_t)(struct cgs_device *cgs_device, unsigned addr);
-
-/**
- * cgs_read_pci_config_word() - Read word from PCI configuration space
- * @cgs_device:	opaque device handle
- * @addr:	address, must be word-aligned
- *
- * Return:  Value read
- */
-typedef uint16_t (*cgs_read_pci_config_word_t)(struct cgs_device *cgs_device, unsigned addr);
-
-/**
- * cgs_read_pci_config_dword() - Read dword from PCI configuration space
- * @cgs_device:	opaque device handle
- * @addr:	address, must be dword-aligned
- *
- * Return:  Value read
- */
-typedef uint32_t (*cgs_read_pci_config_dword_t)(struct cgs_device *cgs_device,
-						unsigned addr);
-
-/**
- * cgs_write_pci_config_byte() - Write byte to PCI configuration space
- * @cgs_device:	opaque device handle
- * @addr:	address
- * @value:	value to write
- */
-typedef void (*cgs_write_pci_config_byte_t)(struct cgs_device *cgs_device, unsigned addr,
-					    uint8_t value);
-
-/**
- * cgs_write_pci_config_word() - Write byte to PCI configuration space
- * @cgs_device:	opaque device handle
- * @addr:	address, must be word-aligned
- * @value:	value to write
- */
-typedef void (*cgs_write_pci_config_word_t)(struct cgs_device *cgs_device, unsigned addr,
-					    uint16_t value);
-
-/**
- * cgs_write_pci_config_dword() - Write byte to PCI configuration space
- * @cgs_device:	opaque device handle
- * @addr:	address, must be dword-aligned
- * @value:	value to write
- */
-typedef void (*cgs_write_pci_config_dword_t)(struct cgs_device *cgs_device, unsigned addr,
-					     uint32_t value);
-
-
-/**
  * cgs_get_pci_resource() - provide access to a device resource (PCI BAR)
  * @cgs_device:	opaque device handle
  * @resource_type:	Type of Resource (MMIO, IO, ROM, FB, DOORBELL)
@@ -501,87 +363,6 @@ typedef int (*cgs_atom_exec_cmd_table_t)(struct cgs_device *cgs_device,
 					 unsigned table, void *args);
 
 /**
- * cgs_create_pm_request() - Create a power management request
- * @cgs_device:	opaque device handle
- * @request:	handle of created PM request (output)
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_create_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t *request);
-
-/**
- * cgs_destroy_pm_request() - Destroy a power management request
- * @cgs_device:	opaque device handle
- * @request:	handle of created PM request
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_destroy_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t request);
-
-/**
- * cgs_set_pm_request() - Activate or deactiveate a PM request
- * @cgs_device:	opaque device handle
- * @request:	PM request handle
- * @active:	0 = deactivate, non-0 = activate
- *
- * While a PM request is active, its minimum clock requests are taken
- * into account as the requested engines are powered up. When the
- * request is inactive, the engines may be powered down and clocks may
- * be lower, depending on other PM requests by other driver
- * components.
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_set_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t request,
-				    int active);
-
-/**
- * cgs_pm_request_clock() - Request a minimum frequency for a specific clock
- * @cgs_device:	opaque device handle
- * @request:	PM request handle
- * @clock:	which clock?
- * @freq:	requested min. frequency in 10KHz units (0 to clear request)
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_pm_request_clock_t)(struct cgs_device *cgs_device, cgs_handle_t request,
-				      enum cgs_clock clock, unsigned freq);
-
-/**
- * cgs_pm_request_engine() - Request an engine to be powered up
- * @cgs_device:	opaque device handle
- * @request:	PM request handle
- * @engine:	which engine?
- * @powered:	0 = powered down, non-0 = powered up
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_pm_request_engine_t)(struct cgs_device *cgs_device, cgs_handle_t request,
-				       enum cgs_engine engine, int powered);
-
-/**
- * cgs_pm_query_clock_limits() - Query clock frequency limits
- * @cgs_device:	opaque device handle
- * @clock:	which clock?
- * @limits:	clock limits
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_pm_query_clock_limits_t)(struct cgs_device *cgs_device,
-					   enum cgs_clock clock,
-					   struct cgs_clock_limits *limits);
-
-/**
- * cgs_set_camera_voltages() - Apply specific voltages to PMIC voltage planes
- * @cgs_device:	opaque device handle
- * @mask:	bitmask of voltages to change (1<<CGS_VOLTAGE_PLANE__xyz|...)
- * @voltages:	pointer to array of voltage values in 1mV units
- *
- * Return: 0 on success, -errno otherwise
- */
-typedef int (*cgs_set_camera_voltages_t)(struct cgs_device *cgs_device, uint32_t mask,
-					 const uint32_t *voltages);
-/**
  * cgs_get_firmware_info - Get the firmware information from core driver
  * @cgs_device: opaque device handle
  * @type: the firmware type
@@ -627,9 +408,6 @@ typedef int (*cgs_enter_safe_mode)(struct cgs_device *cgs_device, bool en);
 
 struct cgs_ops {
 	/* memory management calls (similar to KFD interface) */
-	cgs_gpu_mem_info_t gpu_mem_info;
-	cgs_gmap_kmem_t gmap_kmem;
-	cgs_gunmap_kmem_t gunmap_kmem;
 	cgs_alloc_gpu_mem_t alloc_gpu_mem;
 	cgs_free_gpu_mem_t free_gpu_mem;
 	cgs_gmap_gpu_mem_t gmap_gpu_mem;
@@ -641,27 +419,12 @@ struct cgs_ops {
 	cgs_write_register_t write_register;
 	cgs_read_ind_register_t read_ind_register;
 	cgs_write_ind_register_t write_ind_register;
-	/* PCI configuration space access */
-	cgs_read_pci_config_byte_t read_pci_config_byte;
-	cgs_read_pci_config_word_t read_pci_config_word;
-	cgs_read_pci_config_dword_t read_pci_config_dword;
-	cgs_write_pci_config_byte_t write_pci_config_byte;
-	cgs_write_pci_config_word_t write_pci_config_word;
-	cgs_write_pci_config_dword_t write_pci_config_dword;
 	/* PCI resources */
 	cgs_get_pci_resource_t get_pci_resource;
 	/* ATOM BIOS */
 	cgs_atom_get_data_table_t atom_get_data_table;
 	cgs_atom_get_cmd_table_revs_t atom_get_cmd_table_revs;
 	cgs_atom_exec_cmd_table_t atom_exec_cmd_table;
-	/* Power management */
-	cgs_create_pm_request_t create_pm_request;
-	cgs_destroy_pm_request_t destroy_pm_request;
-	cgs_set_pm_request_t set_pm_request;
-	cgs_pm_request_clock_t pm_request_clock;
-	cgs_pm_request_engine_t pm_request_engine;
-	cgs_pm_query_clock_limits_t pm_query_clock_limits;
-	cgs_set_camera_voltages_t set_camera_voltages;
 	/* Firmware Info */
 	cgs_get_firmware_info get_firmware_info;
 	cgs_rel_firmware rel_firmware;
@@ -696,12 +459,6 @@ struct cgs_device
 #define CGS_OS_CALL(func,dev,...) \
 	(((struct cgs_device *)dev)->os_ops->func(dev, ##__VA_ARGS__))
 
-#define cgs_gpu_mem_info(dev,type,mc_start,mc_size,mem_size)		\
-	CGS_CALL(gpu_mem_info,dev,type,mc_start,mc_size,mem_size)
-#define cgs_gmap_kmem(dev,kmem,size,min_off,max_off,kmem_handle,mcaddr)	\
-	CGS_CALL(gmap_kmem,dev,kmem,size,min_off,max_off,kmem_handle,mcaddr)
-#define cgs_gunmap_kmem(dev,kmem_handle)	\
-	CGS_CALL(gunmap_kmem,dev,keme_handle)
 #define cgs_alloc_gpu_mem(dev,type,size,align,min_off,max_off,handle)	\
 	CGS_CALL(alloc_gpu_mem,dev,type,size,align,min_off,max_off,handle)
 #define cgs_free_gpu_mem(dev,handle)		\
@@ -724,19 +481,6 @@ struct cgs_device
 #define cgs_write_ind_register(dev,space,index,value)		\
 	CGS_CALL(write_ind_register,dev,space,index,value)
 
-#define cgs_read_pci_config_byte(dev,addr)	\
-	CGS_CALL(read_pci_config_byte,dev,addr)
-#define cgs_read_pci_config_word(dev,addr)	\
-	CGS_CALL(read_pci_config_word,dev,addr)
-#define cgs_read_pci_config_dword(dev,addr)		\
-	CGS_CALL(read_pci_config_dword,dev,addr)
-#define cgs_write_pci_config_byte(dev,addr,value)	\
-	CGS_CALL(write_pci_config_byte,dev,addr,value)
-#define cgs_write_pci_config_word(dev,addr,value)	\
-	CGS_CALL(write_pci_config_word,dev,addr,value)
-#define cgs_write_pci_config_dword(dev,addr,value)	\
-	CGS_CALL(write_pci_config_dword,dev,addr,value)
-
 #define cgs_atom_get_data_table(dev,table,size,frev,crev)	\
 	CGS_CALL(atom_get_data_table,dev,table,size,frev,crev)
 #define cgs_atom_get_cmd_table_revs(dev,table,frev,crev)	\
@@ -744,20 +488,6 @@ struct cgs_device
 #define cgs_atom_exec_cmd_table(dev,table,args)		\
 	CGS_CALL(atom_exec_cmd_table,dev,table,args)
 
-#define cgs_create_pm_request(dev,request)	\
-	CGS_CALL(create_pm_request,dev,request)
-#define cgs_destroy_pm_request(dev,request)		\
-	CGS_CALL(destroy_pm_request,dev,request)
-#define cgs_set_pm_request(dev,request,active)		\
-	CGS_CALL(set_pm_request,dev,request,active)
-#define cgs_pm_request_clock(dev,request,clock,freq)		\
-	CGS_CALL(pm_request_clock,dev,request,clock,freq)
-#define cgs_pm_request_engine(dev,request,engine,powered)	\
-	CGS_CALL(pm_request_engine,dev,request,engine,powered)
-#define cgs_pm_query_clock_limits(dev,clock,limits)		\
-	CGS_CALL(pm_query_clock_limits,dev,clock,limits)
-#define cgs_set_camera_voltages(dev,mask,voltages)	\
-	CGS_CALL(set_camera_voltages,dev,mask,voltages)
 #define cgs_get_firmware_info(dev, type, info)	\
 	CGS_CALL(get_firmware_info, dev, type, info)
 #define cgs_rel_firmware(dev, type)	\
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 9da5b0bb66d8..f73e80c4bf33 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -251,7 +251,9 @@ static int pp_suspend(void *handle)
 
 	ret = pp_check(pp_handle);
 
-	if (ret != 0)
+	if (ret == PP_DPM_DISABLED)
+		return 0;
+	else if (ret != 0)
 		return ret;
 
 	eventmgr = pp_handle->eventmgr;
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c
index 9ef2d90e2886..b82c43af59ab 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c
@@ -219,7 +219,7 @@ const pem_event_action notify_smu_suspend_tasks[] = {
 };
 
 const pem_event_action disable_smc_firmware_ctf_tasks[] = {
-	/* PEM_Task_DisableSMCFirmwareCTF,*/
+	pem_task_disable_smc_firmware_ctf,
 	NULL
 };
 
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
index e04216ec7ee1..8c4ebaae1e0c 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
@@ -173,6 +173,11 @@ int pem_task_stop_asic_block_usage(struct pp_eventmgr *eventmgr, struct pem_even
 	return 0;
 }
 
+int pem_task_disable_smc_firmware_ctf(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data)
+{
+	return phm_disable_smc_firmware_ctf(eventmgr->hwmgr);
+}
+
 int pem_task_setup_asic(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data)
 {
 	return phm_setup_asic(eventmgr->hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h
index 6c6297e3b598..37e7ca5a58e0 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h
@@ -84,5 +84,6 @@ int pem_task_update_allowed_performance_levels(struct pp_eventmgr *eventmgr, str
 /*thermal */
 int pem_task_initialize_thermal_controller(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data);
 int pem_task_uninitialize_thermal_controller(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data);
+int pem_task_disable_smc_firmware_ctf(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data);
 
 #endif /* _EVENT_TASKS_H_ */
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index 23bba2c8b18e..fcc722ea7649 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -501,3 +501,13 @@ int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_i
 
 	return hwmgr->hwmgr_func->get_max_high_clocks(hwmgr, clocks);
 }
+
+int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr)
+{
+	PHM_FUNC_CHECK(hwmgr);
+
+	if (hwmgr->hwmgr_func->disable_smc_firmware_ctf == NULL)
+		return -EINVAL;
+
+	return hwmgr->hwmgr_func->disable_smc_firmware_ctf(hwmgr);
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
index b71525f838e6..56023114ad6f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
@@ -314,52 +314,45 @@ int pp_atomfwctrl_get_avfs_information(struct pp_hwmgr *hwmgr,
 			le32_to_cpu(profile->gb_vdroop_table_ckson_a2);
 	param->ulGbFuseTableCksoffM1 =
 			le32_to_cpu(profile->avfsgb_fuse_table_cksoff_m1);
-	param->usGbFuseTableCksoffM2 =
+	param->ulGbFuseTableCksoffM2 =
 			le16_to_cpu(profile->avfsgb_fuse_table_cksoff_m2);
 	param->ulGbFuseTableCksoffB =
 			le32_to_cpu(profile->avfsgb_fuse_table_cksoff_b);
 	param->ulGbFuseTableCksonM1 =
 			le32_to_cpu(profile->avfsgb_fuse_table_ckson_m1);
-	param->usGbFuseTableCksonM2 =
+	param->ulGbFuseTableCksonM2 =
 			le16_to_cpu(profile->avfsgb_fuse_table_ckson_m2);
 	param->ulGbFuseTableCksonB =
 			le32_to_cpu(profile->avfsgb_fuse_table_ckson_b);
-	param->usMaxVoltage025mv =
-			le16_to_cpu(profile->max_voltage_0_25mv);
-	param->ucEnableGbVdroopTableCksoff =
-			profile->enable_gb_vdroop_table_cksoff;
+
 	param->ucEnableGbVdroopTableCkson =
 			profile->enable_gb_vdroop_table_ckson;
-	param->ucEnableGbFuseTableCksoff =
-			profile->enable_gb_fuse_table_cksoff;
 	param->ucEnableGbFuseTableCkson =
 			profile->enable_gb_fuse_table_ckson;
 	param->usPsmAgeComfactor =
 			le16_to_cpu(profile->psm_age_comfactor);
-	param->ucEnableApplyAvfsCksoffVoltage =
-			profile->enable_apply_avfs_cksoff_voltage;
 
 	param->ulDispclk2GfxclkM1 =
 			le32_to_cpu(profile->dispclk2gfxclk_a);
-	param->usDispclk2GfxclkM2 =
+	param->ulDispclk2GfxclkM2 =
 			le16_to_cpu(profile->dispclk2gfxclk_b);
 	param->ulDispclk2GfxclkB =
 			le32_to_cpu(profile->dispclk2gfxclk_c);
 	param->ulDcefclk2GfxclkM1 =
 			le32_to_cpu(profile->dcefclk2gfxclk_a);
-	param->usDcefclk2GfxclkM2 =
+	param->ulDcefclk2GfxclkM2 =
 			le16_to_cpu(profile->dcefclk2gfxclk_b);
 	param->ulDcefclk2GfxclkB =
 			le32_to_cpu(profile->dcefclk2gfxclk_c);
 	param->ulPixelclk2GfxclkM1 =
 			le32_to_cpu(profile->pixclk2gfxclk_a);
-	param->usPixelclk2GfxclkM2 =
+	param->ulPixelclk2GfxclkM2 =
 			le16_to_cpu(profile->pixclk2gfxclk_b);
 	param->ulPixelclk2GfxclkB =
 			le32_to_cpu(profile->pixclk2gfxclk_c);
 	param->ulPhyclk2GfxclkM1 =
 			le32_to_cpu(profile->phyclk2gfxclk_a);
-	param->usPhyclk2GfxclkM2 =
+	param->ulPhyclk2GfxclkM2 =
 			le16_to_cpu(profile->phyclk2gfxclk_b);
 	param->ulPhyclk2GfxclkB =
 			le32_to_cpu(profile->phyclk2gfxclk_c);
@@ -394,3 +387,31 @@ int pp_atomfwctrl_get_gpio_information(struct pp_hwmgr *hwmgr,
 
 	return 0;
 }
+
+int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr,
+			struct pp_atomfwctrl_bios_boot_up_values *boot_values)
+{
+	struct atom_firmware_info_v3_1 *info = NULL;
+	uint16_t ix;
+
+	ix = GetIndexIntoMasterDataTable(firmwareinfo);
+	info = (struct atom_firmware_info_v3_1 *)
+		cgs_atom_get_data_table(hwmgr->device,
+				ix, NULL, NULL, NULL);
+
+	if (!info) {
+		pr_info("Error retrieving BIOS firmwareinfo!");
+		return -EINVAL;
+	}
+
+	boot_values->ulRevision = info->firmware_revision;
+	boot_values->ulGfxClk   = info->bootup_sclk_in10khz;
+	boot_values->ulUClk     = info->bootup_mclk_in10khz;
+	boot_values->ulSocClk   = 0;
+	boot_values->usVddc     = info->bootup_vddc_mv;
+	boot_values->usVddci    = info->bootup_vddci_mv;
+	boot_values->usMvddc    = info->bootup_mvddc_mv;
+	boot_values->usVddGfx   = info->bootup_vddgfx_mv;
+
+	return 0;
+}
+\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
index 7efe9b96cb33..43a6711e3c06 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
@@ -69,7 +69,7 @@ struct pp_atomfwctrl_clock_dividers_soc15 {
 struct pp_atomfwctrl_avfs_parameters {
 	uint32_t   ulMaxVddc;
 	uint32_t   ulMinVddc;
-	uint8_t    ucMaxVidStep;
+
 	uint32_t   ulMeanNsigmaAcontant0;
 	uint32_t   ulMeanNsigmaAcontant1;
 	uint32_t   ulMeanNsigmaAcontant2;
@@ -82,30 +82,30 @@ struct pp_atomfwctrl_avfs_parameters {
 	uint32_t   ulGbVdroopTableCksonA0;
 	uint32_t   ulGbVdroopTableCksonA1;
 	uint32_t   ulGbVdroopTableCksonA2;
+
 	uint32_t   ulGbFuseTableCksoffM1;
-	uint16_t   usGbFuseTableCksoffM2;
-	uint32_t   ulGbFuseTableCksoffB;\
+	uint32_t   ulGbFuseTableCksoffM2;
+	uint32_t   ulGbFuseTableCksoffB;
+
 	uint32_t   ulGbFuseTableCksonM1;
-	uint16_t   usGbFuseTableCksonM2;
+	uint32_t   ulGbFuseTableCksonM2;
 	uint32_t   ulGbFuseTableCksonB;
-	uint16_t   usMaxVoltage025mv;
-	uint8_t    ucEnableGbVdroopTableCksoff;
+
 	uint8_t    ucEnableGbVdroopTableCkson;
-	uint8_t    ucEnableGbFuseTableCksoff;
 	uint8_t    ucEnableGbFuseTableCkson;
 	uint16_t   usPsmAgeComfactor;
-	uint8_t    ucEnableApplyAvfsCksoffVoltage;
+
 	uint32_t   ulDispclk2GfxclkM1;
-	uint16_t   usDispclk2GfxclkM2;
+	uint32_t   ulDispclk2GfxclkM2;
 	uint32_t   ulDispclk2GfxclkB;
 	uint32_t   ulDcefclk2GfxclkM1;
-	uint16_t   usDcefclk2GfxclkM2;
+	uint32_t   ulDcefclk2GfxclkM2;
 	uint32_t   ulDcefclk2GfxclkB;
 	uint32_t   ulPixelclk2GfxclkM1;
-	uint16_t   usPixelclk2GfxclkM2;
+	uint32_t   ulPixelclk2GfxclkM2;
 	uint32_t   ulPixelclk2GfxclkB;
 	uint32_t   ulPhyclk2GfxclkM1;
-	uint16_t   usPhyclk2GfxclkM2;
+	uint32_t   ulPhyclk2GfxclkM2;
 	uint32_t   ulPhyclk2GfxclkB;
 };
 
@@ -119,6 +119,18 @@ struct pp_atomfwctrl_gpio_parameters {
 	uint8_t   ucFwCtfGpio;
 	uint8_t   ucFwCtfPolarity;
 };
+
+struct pp_atomfwctrl_bios_boot_up_values {
+	uint32_t   ulRevision;
+	uint32_t   ulGfxClk;
+	uint32_t   ulUClk;
+	uint32_t   ulSocClk;
+	uint16_t   usVddc;
+	uint16_t   usVddci;
+	uint16_t   usMvddc;
+	uint16_t   usVddGfx;
+};
+
 int pp_atomfwctrl_get_gpu_pll_dividers_vega10(struct pp_hwmgr *hwmgr,
 		uint32_t clock_type, uint32_t clock_value,
 		struct pp_atomfwctrl_clock_dividers_soc15 *dividers);
@@ -136,5 +148,8 @@ int pp_atomfwctrl_get_avfs_information(struct pp_hwmgr *hwmgr,
 int pp_atomfwctrl_get_gpio_information(struct pp_hwmgr *hwmgr,
 		struct pp_atomfwctrl_gpio_parameters *param);
 
+int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr,
+			struct pp_atomfwctrl_bios_boot_up_values *boot_values);
+
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 8f663ab56a80..102eb6d029fa 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -2655,6 +2655,28 @@ static int smu7_get_power_state_size(struct pp_hwmgr *hwmgr)
 	return sizeof(struct smu7_power_state);
 }
 
+static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr,
+				 uint32_t vblank_time_us)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	uint32_t switch_limit_us;
+
+	switch (hwmgr->chip_id) {
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+		switch_limit_us = data->is_memory_gddr5 ? 190 : 150;
+		break;
+	default:
+		switch_limit_us = data->is_memory_gddr5 ? 450 : 150;
+		break;
+	}
+
+	if (vblank_time_us < switch_limit_us)
+		return true;
+	else
+		return false;
+}
 
 static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 				struct pp_power_state *request_ps,
@@ -2669,6 +2691,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 	bool disable_mclk_switching;
 	bool disable_mclk_switching_for_frame_lock;
 	struct cgs_display_info info = {0};
+	struct cgs_mode_info mode_info = {0};
 	const struct phm_clock_and_voltage_limits *max_limits;
 	uint32_t i;
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -2677,6 +2700,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 	int32_t count;
 	int32_t stable_pstate_sclk = 0, stable_pstate_mclk = 0;
 
+	info.mode_info = &mode_info;
 	data->battery_state = (PP_StateUILabel_Battery ==
 			request_ps->classification.ui_label);
 
@@ -2703,8 +2727,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 
 	cgs_get_active_displays_info(hwmgr->device, &info);
 
-	/*TO DO result = PHM_CheckVBlankTime(hwmgr, &vblankTooShort);*/
-
 	minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock;
 	minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock;
 
@@ -2769,8 +2791,10 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 				    PHM_PlatformCaps_DisableMclkSwitchingForFrameLock);
 
 
-	disable_mclk_switching = (1 < info.display_count) ||
-				    disable_mclk_switching_for_frame_lock;
+	disable_mclk_switching = ((1 < info.display_count) ||
+				  disable_mclk_switching_for_frame_lock ||
+				  smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) ||
+				  (mode_info.refresh_rate > 120));
 
 	sclk = smu7_ps->performance_levels[0].engine_clock;
 	mclk = smu7_ps->performance_levels[0].memory_clock;
@@ -4334,26 +4358,31 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr,
 
 static int smu7_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode)
 {
-	if (mode) {
-		/* stop auto-manage */
-		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
-				PHM_PlatformCaps_MicrocodeFanControl))
-			smu7_fan_ctrl_stop_smc_fan_control(hwmgr);
-		smu7_fan_ctrl_set_static_mode(hwmgr, mode);
-	} else
-		/* restart auto-manage */
-		smu7_fan_ctrl_reset_fan_speed_to_default(hwmgr);
+	int result = 0;
 
-	return 0;
+	switch (mode) {
+	case AMD_FAN_CTRL_NONE:
+		result = smu7_fan_ctrl_set_fan_speed_percent(hwmgr, 100);
+		break;
+	case AMD_FAN_CTRL_MANUAL:
+		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_MicrocodeFanControl))
+			result = smu7_fan_ctrl_stop_smc_fan_control(hwmgr);
+		break;
+	case AMD_FAN_CTRL_AUTO:
+		result = smu7_fan_ctrl_set_static_mode(hwmgr, mode);
+		if (!result)
+			result = smu7_fan_ctrl_start_smc_fan_control(hwmgr);
+		break;
+	default:
+		break;
+	}
+	return result;
 }
 
 static int smu7_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 {
-	if (hwmgr->fan_ctrl_is_in_default_mode)
-		return hwmgr->fan_ctrl_default_mode;
-	else
-		return PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
-				CG_FDO_CTRL2, FDO_PWM_MODE);
+	return hwmgr->fan_ctrl_enabled ? AMD_FAN_CTRL_AUTO : AMD_FAN_CTRL_MANUAL;
 }
 
 static int smu7_get_sclk_od(struct pp_hwmgr *hwmgr)
@@ -4522,32 +4551,6 @@ static int smu7_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type
 	return 0;
 }
 
-static int smu7_request_firmware(struct pp_hwmgr *hwmgr)
-{
-	int ret;
-	struct cgs_firmware_info info = {0};
-
-	ret = cgs_get_firmware_info(hwmgr->device,
-				    smu7_convert_fw_type_to_cgs(UCODE_ID_SMU),
-				    &info);
-	if (ret || !info.kptr)
-		return -EINVAL;
-
-	return 0;
-}
-
-static int smu7_release_firmware(struct pp_hwmgr *hwmgr)
-{
-	int ret;
-
-	ret = cgs_rel_firmware(hwmgr->device,
-			       smu7_convert_fw_type_to_cgs(UCODE_ID_SMU));
-	if (ret)
-		return -EINVAL;
-
-	return 0;
-}
-
 static void smu7_find_min_clock_masks(struct pp_hwmgr *hwmgr,
 		uint32_t *sclk_mask, uint32_t *mclk_mask,
 		uint32_t min_sclk, uint32_t min_mclk)
@@ -4691,10 +4694,9 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = {
 	.get_clock_by_type = smu7_get_clock_by_type,
 	.read_sensor = smu7_read_sensor,
 	.dynamic_state_management_disable = smu7_disable_dpm_tasks,
-	.request_firmware = smu7_request_firmware,
-	.release_firmware = smu7_release_firmware,
 	.set_power_profile_state = smu7_set_power_profile_state,
 	.avfs_control = smu7_avfs_control,
+	.disable_smc_firmware_ctf = smu7_thermal_disable_alert,
 };
 
 uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c
index 436ca5ce8248..baddb569a8b8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c
@@ -112,10 +112,9 @@ int smu7_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed)
 */
 int smu7_fan_ctrl_set_static_mode(struct pp_hwmgr *hwmgr, uint32_t mode)
 {
-
 	if (hwmgr->fan_ctrl_is_in_default_mode) {
 		hwmgr->fan_ctrl_default_mode =
-				PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device,	CGS_IND_REG__SMC,
+				PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
 						CG_FDO_CTRL2, FDO_PWM_MODE);
 		hwmgr->tmin =
 				PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
@@ -149,7 +148,7 @@ int smu7_fan_ctrl_set_default_mode(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
-static int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr)
+int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr)
 {
 	int result;
 
@@ -179,6 +178,7 @@ static int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr)
 				PPSMC_MSG_SetFanTemperatureTarget,
 				hwmgr->thermal_controller.
 				advanceFanControlParameters.ucTargetTemperature);
+	hwmgr->fan_ctrl_enabled = true;
 
 	return result;
 }
@@ -186,6 +186,7 @@ static int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr)
 
 int smu7_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr)
 {
+	hwmgr->fan_ctrl_enabled = false;
 	return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_StopFanControl);
 }
 
@@ -280,7 +281,7 @@ int smu7_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed)
 	PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
 				CG_TACH_STATUS, TACH_PERIOD, tach_period);
 
-	return smu7_fan_ctrl_set_static_mode(hwmgr, FDO_PWM_MODE_STATIC);
+	return smu7_fan_ctrl_set_static_mode(hwmgr, FDO_PWM_MODE_STATIC_RPM);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.h
index 2ed774db42c7..ba71b608fa75 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.h
@@ -54,6 +54,6 @@ extern int smu7_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *spe
 extern int smu7_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr);
 extern int smu7_thermal_enable_alert(struct pp_hwmgr *hwmgr);
 extern int smu7_thermal_disable_alert(struct pp_hwmgr *hwmgr);
-
+extern int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr);
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 83949550edac..2614af2f553f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -111,6 +111,8 @@ static void vega10_set_default_registry_data(struct pp_hwmgr *hwmgr)
 			hwmgr->feature_mask & PP_SOCCLK_DPM_MASK ? false : true;
 	data->registry_data.mclk_dpm_key_disabled =
 			hwmgr->feature_mask & PP_MCLK_DPM_MASK ? false : true;
+	data->registry_data.pcie_dpm_key_disabled =
+			hwmgr->feature_mask & PP_PCIE_DPM_MASK ? false : true;
 
 	data->registry_data.dcefclk_dpm_key_disabled =
 			hwmgr->feature_mask & PP_DCEFCLK_DPM_MASK ? false : true;
@@ -121,7 +123,9 @@ static void vega10_set_default_registry_data(struct pp_hwmgr *hwmgr)
 		data->registry_data.enable_tdc_limit_feature = 1;
 	}
 
-	data->registry_data.pcie_dpm_key_disabled = 1;
+	data->registry_data.clock_stretcher_support =
+			hwmgr->feature_mask & PP_CLOCK_STRETCH_MASK ? false : true;
+
 	data->registry_data.disable_water_mark = 0;
 
 	data->registry_data.fan_control_support = 1;
@@ -1133,7 +1137,7 @@ static void vega10_setup_default_single_dpm_table(struct pp_hwmgr *hwmgr,
 	int i;
 
 	for (i = 0; i < dep_table->count; i++) {
-		if (i == 0 || dpm_table->dpm_levels[dpm_table->count - 1].value !=
+		if (i == 0 || dpm_table->dpm_levels[dpm_table->count - 1].value <=
 				dep_table->entries[i].clk) {
 			dpm_table->dpm_levels[dpm_table->count].value =
 					dep_table->entries[i].clk;
@@ -1178,29 +1182,9 @@ static int vega10_setup_default_pcie_table(struct pp_hwmgr *hwmgr)
 		else
 			pcie_table->lclk[i] =
 					bios_pcie_table->entries[i].pcie_sclk;
-
-		pcie_table->count++;
 	}
 
-	if (data->registry_data.pcieSpeedOverride)
-		pcie_table->pcie_gen[i] = data->registry_data.pcieSpeedOverride;
-	else
-		pcie_table->pcie_gen[i] =
-			bios_pcie_table->entries[bios_pcie_table->count - 1].gen_speed;
-
-	if (data->registry_data.pcieLaneOverride)
-		pcie_table->pcie_lane[i] = data->registry_data.pcieLaneOverride;
-	else
-		pcie_table->pcie_lane[i] =
-			bios_pcie_table->entries[bios_pcie_table->count - 1].lane_width;
-
-	if (data->registry_data.pcieClockOverride)
-		pcie_table->lclk[i] = data->registry_data.pcieClockOverride;
-	else
-		pcie_table->lclk[i] =
-			bios_pcie_table->entries[bios_pcie_table->count - 1].pcie_sclk;
-
-	pcie_table->count++;
+	pcie_table->count = NUM_LINK_LEVELS;
 
 	return 0;
 }
@@ -1290,7 +1274,7 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
 	dpm_table = &(data->dpm_table.eclk_table);
 	for (i = 0; i < dep_mm_table->count; i++) {
 		if (i == 0 || dpm_table->dpm_levels
-				[dpm_table->count - 1].value !=
+				[dpm_table->count - 1].value <=
 						dep_mm_table->entries[i].eclk) {
 			dpm_table->dpm_levels[dpm_table->count].value =
 					dep_mm_table->entries[i].eclk;
@@ -1306,7 +1290,7 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
 	dpm_table = &(data->dpm_table.vclk_table);
 	for (i = 0; i < dep_mm_table->count; i++) {
 		if (i == 0 || dpm_table->dpm_levels
-				[dpm_table->count - 1].value !=
+				[dpm_table->count - 1].value <=
 						dep_mm_table->entries[i].vclk) {
 			dpm_table->dpm_levels[dpm_table->count].value =
 					dep_mm_table->entries[i].vclk;
@@ -1320,7 +1304,7 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
 	dpm_table = &(data->dpm_table.dclk_table);
 	for (i = 0; i < dep_mm_table->count; i++) {
 		if (i == 0 || dpm_table->dpm_levels
-				[dpm_table->count - 1].value !=
+				[dpm_table->count - 1].value <=
 						dep_mm_table->entries[i].dclk) {
 			dpm_table->dpm_levels[dpm_table->count].value =
 					dep_mm_table->entries[i].dclk;
@@ -1432,9 +1416,7 @@ static int vega10_populate_ulv_state(struct pp_hwmgr *hwmgr)
 			(struct phm_ppt_v2_information *)(hwmgr->pptable);
 
 	data->smc_state_table.pp_table.UlvOffsetVid =
-			(uint8_t)(table_info->us_ulv_voltage_offset *
-					VOLTAGE_VID_OFFSET_SCALE2 /
-					VOLTAGE_VID_OFFSET_SCALE1);
+			(uint8_t)table_info->us_ulv_voltage_offset;
 
 	data->smc_state_table.pp_table.UlvSmnclkDid =
 			(uint8_t)(table_info->us_ulv_smnclk_did);
@@ -1553,7 +1535,11 @@ static int vega10_populate_single_gfx_level(struct pp_hwmgr *hwmgr,
 	current_gfxclk_level->FbMult =
 			cpu_to_le32(dividers.ulPll_fb_mult);
 	/* Spread FB Multiplier bit: bit 0:8 int, bit 31:16 frac */
-	current_gfxclk_level->SsOn = dividers.ucPll_ss_enable;
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+				PHM_PlatformCaps_EngineSpreadSpectrumSupport))
+		current_gfxclk_level->SsOn = dividers.ucPll_ss_enable;
+	else
+		current_gfxclk_level->SsOn = 0;
 	current_gfxclk_level->SsFbMult =
 			cpu_to_le32(dividers.ulPll_ss_fbsmult);
 	current_gfxclk_level->SsSlewFrac =
@@ -2044,10 +2030,10 @@ static int vega10_populate_clock_stretcher_table(struct pp_hwmgr *hwmgr)
 			table_info->vdd_dep_on_sclk;
 	uint32_t i;
 
-	for (i = 0; dep_table->count; i++) {
+	for (i = 0; i < dep_table->count; i++) {
 		pp_table->CksEnable[i] = dep_table->entries[i].cks_enable;
-		pp_table->CksVidOffset[i] = convert_to_vid(
-				dep_table->entries[i].cks_voffset);
+		pp_table->CksVidOffset[i] = (uint8_t)(dep_table->entries[i].cks_voffset
+				* VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1);
 	}
 
 	return 0;
@@ -2073,66 +2059,70 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 		result = pp_atomfwctrl_get_avfs_information(hwmgr, &avfs_params);
 		if (!result) {
 			pp_table->MinVoltageVid = (uint8_t)
-					convert_to_vid((uint16_t)(avfs_params.ulMaxVddc));
-			pp_table->MaxVoltageVid = (uint8_t)
 					convert_to_vid((uint16_t)(avfs_params.ulMinVddc));
-			pp_table->BtcGbVdroopTableCksOn.a0 =
-					cpu_to_le32(avfs_params.ulGbVdroopTableCksonA0);
-			pp_table->BtcGbVdroopTableCksOn.a1 =
-					cpu_to_le32(avfs_params.ulGbVdroopTableCksonA1);
-			pp_table->BtcGbVdroopTableCksOn.a2 =
-					cpu_to_le32(avfs_params.ulGbVdroopTableCksonA2);
+			pp_table->MaxVoltageVid = (uint8_t)
+					convert_to_vid((uint16_t)(avfs_params.ulMaxVddc));
+
+			pp_table->AConstant[0] = cpu_to_le32(avfs_params.ulMeanNsigmaAcontant0);
+			pp_table->AConstant[1] = cpu_to_le32(avfs_params.ulMeanNsigmaAcontant1);
+			pp_table->AConstant[2] = cpu_to_le32(avfs_params.ulMeanNsigmaAcontant2);
+			pp_table->DC_tol_sigma = cpu_to_le16(avfs_params.usMeanNsigmaDcTolSigma);
+			pp_table->Platform_mean = cpu_to_le16(avfs_params.usMeanNsigmaPlatformMean);
+			pp_table->Platform_sigma = cpu_to_le16(avfs_params.usMeanNsigmaDcTolSigma);
+			pp_table->PSM_Age_CompFactor = cpu_to_le16(avfs_params.usPsmAgeComfactor);
 
 			pp_table->BtcGbVdroopTableCksOff.a0 =
 					cpu_to_le32(avfs_params.ulGbVdroopTableCksoffA0);
+			pp_table->BtcGbVdroopTableCksOff.a0_shift = 20;
 			pp_table->BtcGbVdroopTableCksOff.a1 =
 					cpu_to_le32(avfs_params.ulGbVdroopTableCksoffA1);
+			pp_table->BtcGbVdroopTableCksOff.a1_shift = 20;
 			pp_table->BtcGbVdroopTableCksOff.a2 =
 					cpu_to_le32(avfs_params.ulGbVdroopTableCksoffA2);
+			pp_table->BtcGbVdroopTableCksOff.a2_shift = 20;
+
+			pp_table->OverrideBtcGbCksOn = avfs_params.ucEnableGbVdroopTableCkson;
+			pp_table->BtcGbVdroopTableCksOn.a0 =
+					cpu_to_le32(avfs_params.ulGbVdroopTableCksonA0);
+			pp_table->BtcGbVdroopTableCksOn.a0_shift = 20;
+			pp_table->BtcGbVdroopTableCksOn.a1 =
+					cpu_to_le32(avfs_params.ulGbVdroopTableCksonA1);
+			pp_table->BtcGbVdroopTableCksOn.a1_shift = 20;
+			pp_table->BtcGbVdroopTableCksOn.a2 =
+					cpu_to_le32(avfs_params.ulGbVdroopTableCksonA2);
+			pp_table->BtcGbVdroopTableCksOn.a2_shift = 20;
 
 			pp_table->AvfsGbCksOn.m1 =
 					cpu_to_le32(avfs_params.ulGbFuseTableCksonM1);
 			pp_table->AvfsGbCksOn.m2 =
-					cpu_to_le16(avfs_params.usGbFuseTableCksonM2);
+					cpu_to_le16(avfs_params.ulGbFuseTableCksonM2);
 			pp_table->AvfsGbCksOn.b =
 					cpu_to_le32(avfs_params.ulGbFuseTableCksonB);
 			pp_table->AvfsGbCksOn.m1_shift = 24;
 			pp_table->AvfsGbCksOn.m2_shift = 12;
+			pp_table->AvfsGbCksOn.b_shift = 0;
 
+			pp_table->OverrideAvfsGbCksOn =
+					avfs_params.ucEnableGbFuseTableCkson;
 			pp_table->AvfsGbCksOff.m1 =
 					cpu_to_le32(avfs_params.ulGbFuseTableCksoffM1);
 			pp_table->AvfsGbCksOff.m2 =
-					cpu_to_le16(avfs_params.usGbFuseTableCksoffM2);
+					cpu_to_le16(avfs_params.ulGbFuseTableCksoffM2);
 			pp_table->AvfsGbCksOff.b =
 					cpu_to_le32(avfs_params.ulGbFuseTableCksoffB);
 			pp_table->AvfsGbCksOff.m1_shift = 24;
 			pp_table->AvfsGbCksOff.m2_shift = 12;
-
-			pp_table->AConstant[0] =
-					cpu_to_le32(avfs_params.ulMeanNsigmaAcontant0);
-			pp_table->AConstant[1] =
-					cpu_to_le32(avfs_params.ulMeanNsigmaAcontant1);
-			pp_table->AConstant[2] =
-					cpu_to_le32(avfs_params.ulMeanNsigmaAcontant2);
-			pp_table->DC_tol_sigma =
-					cpu_to_le16(avfs_params.usMeanNsigmaDcTolSigma);
-			pp_table->Platform_mean =
-					cpu_to_le16(avfs_params.usMeanNsigmaPlatformMean);
-			pp_table->PSM_Age_CompFactor =
-					cpu_to_le16(avfs_params.usPsmAgeComfactor);
-			pp_table->Platform_sigma =
-					cpu_to_le16(avfs_params.usMeanNsigmaDcTolSigma);
-
-			for (i = 0; i < dep_table->count; i++)
-				pp_table->StaticVoltageOffsetVid[i] = (uint8_t)
-						(dep_table->entries[i].sclk_offset *
+			pp_table->AvfsGbCksOff.b_shift = 0;
+
+			for (i = 0; i < dep_table->count; i++) {
+				if (dep_table->entries[i].sclk_offset == 0)
+					pp_table->StaticVoltageOffsetVid[i] = 248;
+				else
+					pp_table->StaticVoltageOffsetVid[i] =
+						(uint8_t)(dep_table->entries[i].sclk_offset *
 								VOLTAGE_VID_OFFSET_SCALE2 /
 								VOLTAGE_VID_OFFSET_SCALE1);
-
-			pp_table->OverrideBtcGbCksOn =
-					avfs_params.ucEnableGbVdroopTableCkson;
-			pp_table->OverrideAvfsGbCksOn =
-					avfs_params.ucEnableGbFuseTableCkson;
+			}
 
 			if ((PPREGKEY_VEGA10QUADRATICEQUATION_DFLT !=
 					data->disp_clk_quad_eqn_a) &&
@@ -2141,20 +2131,21 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m1 =
 						(int32_t)data->disp_clk_quad_eqn_a;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m2 =
-						(int16_t)data->disp_clk_quad_eqn_b;
+						(int32_t)data->disp_clk_quad_eqn_b;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].b =
 						(int32_t)data->disp_clk_quad_eqn_c;
 			} else {
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m1 =
 						(int32_t)avfs_params.ulDispclk2GfxclkM1;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m2 =
-						(int16_t)avfs_params.usDispclk2GfxclkM2;
+						(int32_t)avfs_params.ulDispclk2GfxclkM2;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].b =
 						(int32_t)avfs_params.ulDispclk2GfxclkB;
 			}
 
 			pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m1_shift = 24;
 			pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m2_shift = 12;
+			pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].b_shift = 12;
 
 			if ((PPREGKEY_VEGA10QUADRATICEQUATION_DFLT !=
 					data->dcef_clk_quad_eqn_a) &&
@@ -2163,20 +2154,21 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m1 =
 						(int32_t)data->dcef_clk_quad_eqn_a;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m2 =
-						(int16_t)data->dcef_clk_quad_eqn_b;
+						(int32_t)data->dcef_clk_quad_eqn_b;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].b =
 						(int32_t)data->dcef_clk_quad_eqn_c;
 			} else {
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m1 =
 						(int32_t)avfs_params.ulDcefclk2GfxclkM1;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m2 =
-						(int16_t)avfs_params.usDcefclk2GfxclkM2;
+						(int32_t)avfs_params.ulDcefclk2GfxclkM2;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].b =
 						(int32_t)avfs_params.ulDcefclk2GfxclkB;
 			}
 
 			pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m1_shift = 24;
 			pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m2_shift = 12;
+			pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].b_shift = 12;
 
 			if ((PPREGKEY_VEGA10QUADRATICEQUATION_DFLT !=
 					data->pixel_clk_quad_eqn_a) &&
@@ -2185,21 +2177,21 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m1 =
 						(int32_t)data->pixel_clk_quad_eqn_a;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m2 =
-						(int16_t)data->pixel_clk_quad_eqn_b;
+						(int32_t)data->pixel_clk_quad_eqn_b;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].b =
 						(int32_t)data->pixel_clk_quad_eqn_c;
 			} else {
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m1 =
 						(int32_t)avfs_params.ulPixelclk2GfxclkM1;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m2 =
-						(int16_t)avfs_params.usPixelclk2GfxclkM2;
+						(int32_t)avfs_params.ulPixelclk2GfxclkM2;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].b =
 						(int32_t)avfs_params.ulPixelclk2GfxclkB;
 			}
 
 			pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m1_shift = 24;
 			pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m2_shift = 12;
-
+			pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].b_shift = 12;
 			if ((PPREGKEY_VEGA10QUADRATICEQUATION_DFLT !=
 					data->phy_clk_quad_eqn_a) &&
 				(PPREGKEY_VEGA10QUADRATICEQUATION_DFLT !=
@@ -2207,20 +2199,21 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m1 =
 						(int32_t)data->phy_clk_quad_eqn_a;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m2 =
-						(int16_t)data->phy_clk_quad_eqn_b;
+						(int32_t)data->phy_clk_quad_eqn_b;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].b =
 						(int32_t)data->phy_clk_quad_eqn_c;
 			} else {
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m1 =
 						(int32_t)avfs_params.ulPhyclk2GfxclkM1;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m2 =
-						(int16_t)avfs_params.usPhyclk2GfxclkM2;
+						(int32_t)avfs_params.ulPhyclk2GfxclkM2;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].b =
 						(int32_t)avfs_params.ulPhyclk2GfxclkB;
 			}
 
 			pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m1_shift = 24;
 			pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m2_shift = 12;
+			pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].b_shift = 12;
 		} else {
 			data->smu_features[GNLD_AVFS].supported = false;
 		}
@@ -2309,6 +2302,7 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
 			(struct phm_ppt_v2_information *)(hwmgr->pptable);
 	PPTable_t *pp_table = &(data->smc_state_table.pp_table);
 	struct pp_atomfwctrl_voltage_table voltage_table;
+	struct pp_atomfwctrl_bios_boot_up_values boot_up_values;
 
 	result = vega10_setup_default_dpm_tables(hwmgr);
 	PP_ASSERT_WITH_CODE(!result,
@@ -2331,6 +2325,7 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
 			(uint8_t)(table_info->uc_vce_dpm_voltage_mode);
 	pp_table->Mp0DpmVoltageMode =
 			(uint8_t)(table_info->uc_mp0_dpm_voltage_mode);
+
 	pp_table->DisplayDpmVoltageMode =
 			(uint8_t)(table_info->uc_dcef_dpm_voltage_mode);
 
@@ -2372,14 +2367,31 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
 			"Failed to initialize UVD Level!",
 			return result);
 
-	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
-			PHM_PlatformCaps_ClockStretcher)) {
+	if (data->registry_data.clock_stretcher_support) {
 		result = vega10_populate_clock_stretcher_table(hwmgr);
 		PP_ASSERT_WITH_CODE(!result,
 				"Failed to populate Clock Stretcher Table!",
 				return result);
 	}
 
+	result = pp_atomfwctrl_get_vbios_bootup_values(hwmgr, &boot_up_values);
+	if (!result) {
+		data->vbios_boot_state.vddc     = boot_up_values.usVddc;
+		data->vbios_boot_state.vddci    = boot_up_values.usVddci;
+		data->vbios_boot_state.mvddc    = boot_up_values.usMvddc;
+		data->vbios_boot_state.gfx_clock = boot_up_values.ulGfxClk;
+		data->vbios_boot_state.mem_clock = boot_up_values.ulUClk;
+		data->vbios_boot_state.soc_clock = boot_up_values.ulSocClk;
+		if (0 != boot_up_values.usVddc) {
+			smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+						PPSMC_MSG_SetFloorSocVoltage,
+						(boot_up_values.usVddc * 4));
+			data->vbios_boot_state.bsoc_vddc_lock = true;
+		} else {
+			data->vbios_boot_state.bsoc_vddc_lock = false;
+		}
+	}
+
 	result = vega10_populate_avfs_parameters(hwmgr);
 	PP_ASSERT_WITH_CODE(!result,
 			"Failed to initialize AVFS Parameters!",
@@ -2404,35 +2416,9 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
 	PP_ASSERT_WITH_CODE(!result,
 			"Failed to upload PPtable!", return result);
 
-	if (data->smu_features[GNLD_AVFS].supported) {
-		uint32_t features_enabled;
-		result = vega10_get_smc_features(hwmgr->smumgr, &features_enabled);
-		PP_ASSERT_WITH_CODE(!result,
-				"Failed to Retrieve Enabled Features!",
-				return result);
-		if (!(features_enabled & (1 << FEATURE_AVFS_BIT))) {
-			result = vega10_perform_btc(hwmgr->smumgr);
-			PP_ASSERT_WITH_CODE(!result,
-					"Failed to Perform BTC!",
-					return result);
-			result = vega10_avfs_enable(hwmgr, true);
-			PP_ASSERT_WITH_CODE(!result,
-					"Attempt to enable AVFS feature Failed!",
-					return result);
-			result = vega10_save_vft_table(hwmgr->smumgr,
-					(uint8_t *)&(data->smc_state_table.avfs_table));
-			PP_ASSERT_WITH_CODE(!result,
-					"Attempt to save VFT table Failed!",
+	result = vega10_avfs_enable(hwmgr, true);
+	PP_ASSERT_WITH_CODE(!result, "Attempt to enable AVFS feature Failed!",
 					return result);
-		} else {
-			data->smu_features[GNLD_AVFS].enabled = true;
-			result = vega10_restore_vft_table(hwmgr->smumgr,
-					(uint8_t *)&(data->smc_state_table.avfs_table));
-			PP_ASSERT_WITH_CODE(!result,
-					"Attempt to restore VFT table Failed!",
-					return result;);
-		}
-	}
 
 	return 0;
 }
@@ -2457,6 +2443,26 @@ static int vega10_enable_thermal_protection(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int vega10_disable_thermal_protection(struct pp_hwmgr *hwmgr)
+{
+	struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
+
+	if (data->smu_features[GNLD_THERMAL].supported) {
+		if (!data->smu_features[GNLD_THERMAL].enabled)
+			pr_info("THERMAL Feature Already disabled!");
+
+		PP_ASSERT_WITH_CODE(
+				!vega10_enable_smc_features(hwmgr->smumgr,
+				false,
+				data->smu_features[GNLD_THERMAL].smu_feature_bitmap),
+				"disable THERMAL Feature Failed!",
+				return -1);
+		data->smu_features[GNLD_THERMAL].enabled = false;
+	}
+
+	return 0;
+}
+
 static int vega10_enable_vrhot_feature(struct pp_hwmgr *hwmgr)
 {
 	struct vega10_hwmgr *data =
@@ -2535,6 +2541,37 @@ static int vega10_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int vega10_stop_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap)
+{
+	struct vega10_hwmgr *data =
+			(struct vega10_hwmgr *)(hwmgr->backend);
+	uint32_t i, feature_mask = 0;
+
+
+	if(data->smu_features[GNLD_LED_DISPLAY].supported == true){
+		PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
+				true, data->smu_features[GNLD_LED_DISPLAY].smu_feature_bitmap),
+		"Attempt to Enable LED DPM feature Failed!", return -EINVAL);
+		data->smu_features[GNLD_LED_DISPLAY].enabled = true;
+	}
+
+	for (i = 0; i < GNLD_DPM_MAX; i++) {
+		if (data->smu_features[i].smu_feature_bitmap & bitmap) {
+			if (data->smu_features[i].supported) {
+				if (data->smu_features[i].enabled) {
+					feature_mask |= data->smu_features[i].
+							smu_feature_bitmap;
+					data->smu_features[i].enabled = false;
+				}
+			}
+		}
+	}
+
+	vega10_enable_smc_features(hwmgr->smumgr, false, feature_mask);
+
+	return 0;
+}
+
 /**
  * @brief Tell SMC to enabled the supported DPMs.
  *
@@ -2576,6 +2613,12 @@ static int vega10_start_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap)
 		data->smu_features[GNLD_LED_DISPLAY].enabled = true;
 	}
 
+	if (data->vbios_boot_state.bsoc_vddc_lock) {
+		smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+						PPSMC_MSG_SetFloorSocVoltage, 0);
+		data->vbios_boot_state.bsoc_vddc_lock = false;
+	}
+
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_Falcon_QuickTransition)) {
 		if (data->smu_features[GNLD_ACDC].supported) {
@@ -2602,8 +2645,6 @@ static int vega10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 			"Failed to configure telemetry!",
 			return tmp_result);
 
-	vega10_set_tools_address(hwmgr->smumgr);
-
 	smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
 			PPSMC_MSG_NumOfDisplays, 0);
 
@@ -3880,32 +3921,36 @@ static int vega10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 
 static int vega10_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode)
 {
-	if (mode) {
-		/* stop auto-manage */
-		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
-				PHM_PlatformCaps_MicrocodeFanControl))
-			vega10_fan_ctrl_stop_smc_fan_control(hwmgr);
-		vega10_fan_ctrl_set_static_mode(hwmgr, mode);
-	} else
-		/* restart auto-manage */
-		vega10_fan_ctrl_reset_fan_speed_to_default(hwmgr);
+	int result = 0;
 
-	return 0;
+	switch (mode) {
+	case AMD_FAN_CTRL_NONE:
+		result = vega10_fan_ctrl_set_fan_speed_percent(hwmgr, 100);
+		break;
+	case AMD_FAN_CTRL_MANUAL:
+		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_MicrocodeFanControl))
+			result = vega10_fan_ctrl_stop_smc_fan_control(hwmgr);
+		break;
+	case AMD_FAN_CTRL_AUTO:
+		result = vega10_fan_ctrl_set_static_mode(hwmgr, mode);
+		if (!result)
+			result = vega10_fan_ctrl_start_smc_fan_control(hwmgr);
+		break;
+	default:
+		break;
+	}
+	return result;
 }
 
 static int vega10_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 {
-	uint32_t reg;
+	struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
 
-	if (hwmgr->fan_ctrl_is_in_default_mode) {
-		return hwmgr->fan_ctrl_default_mode;
-	} else {
-		reg = soc15_get_register_offset(THM_HWID, 0,
-			mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2);
-		return (cgs_read_register(hwmgr->device, reg) &
-				CG_FDO_CTRL2__FDO_PWM_MODE_MASK) >>
-				CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT;
-	}
+	if (data->smu_features[GNLD_FAN_CONTROL].enabled == false)
+		return AMD_FAN_CTRL_MANUAL;
+	else
+		return AMD_FAN_CTRL_AUTO;
 }
 
 static int vega10_get_dal_power_level(struct pp_hwmgr *hwmgr,
@@ -4141,62 +4186,63 @@ static int vega10_force_clock_level(struct pp_hwmgr *hwmgr,
 		enum pp_clock_type type, uint32_t mask)
 {
 	struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
-	uint32_t i;
+	int i;
 
 	if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
 		return -EINVAL;
 
 	switch (type) {
 	case PP_SCLK:
-		if (data->registry_data.sclk_dpm_key_disabled)
-			break;
-
 		for (i = 0; i < 32; i++) {
 			if (mask & (1 << i))
 				break;
 		}
+		data->smc_state_table.gfx_boot_level = i;
 
-		PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc_with_parameter(
-				hwmgr->smumgr,
-				PPSMC_MSG_SetSoftMinGfxclkByIndex,
-				i),
-				"Failed to set soft min sclk index!",
-				return -1);
+		for (i = 31; i >= 0; i--) {
+			if (mask & (1 << i))
+				break;
+		}
+		data->smc_state_table.gfx_max_level = i;
+
+		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr),
+			"Failed to upload boot level to lowest!",
+			return -EINVAL);
+
+		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr),
+			"Failed to upload dpm max level to highest!",
+			return -EINVAL);
 		break;
 
 	case PP_MCLK:
-		if (data->registry_data.mclk_dpm_key_disabled)
-			break;
-
 		for (i = 0; i < 32; i++) {
 			if (mask & (1 << i))
 				break;
 		}
 
-		PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc_with_parameter(
-				hwmgr->smumgr,
-				PPSMC_MSG_SetSoftMinUclkByIndex,
-				i),
-				"Failed to set soft min mclk index!",
-				return -1);
-		break;
-
-	case PP_PCIE:
-		if (data->registry_data.pcie_dpm_key_disabled)
-			break;
-
 		for (i = 0; i < 32; i++) {
 			if (mask & (1 << i))
 				break;
 		}
+		data->smc_state_table.mem_boot_level = i;
+
+		for (i = 31; i >= 0; i--) {
+			if (mask & (1 << i))
+				break;
+		}
+		data->smc_state_table.mem_max_level = i;
+
+		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr),
+			"Failed to upload boot level to lowest!",
+			return -EINVAL);
+
+		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr),
+			"Failed to upload dpm max level to highest!",
+			return -EINVAL);
 
-		PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc_with_parameter(
-				hwmgr->smumgr,
-				PPSMC_MSG_SetMinLinkDpmByIndex,
-				i),
-				"Failed to set min pcie index!",
-				return -1);
 		break;
+
+	case PP_PCIE:
 	default:
 		break;
 	}
@@ -4395,11 +4441,55 @@ vega10_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmg
 	return is_update_required;
 }
 
+static int vega10_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
+{
+	int tmp_result, result = 0;
+
+	tmp_result = (vega10_is_dpm_running(hwmgr)) ? 0 : -1;
+	PP_ASSERT_WITH_CODE(tmp_result == 0,
+			"DPM is not running right now, no need to disable DPM!",
+			return 0);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_ThermalController))
+		vega10_disable_thermal_protection(hwmgr);
+
+	tmp_result = vega10_disable_power_containment(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable power containment!", result = tmp_result);
+
+	tmp_result = vega10_avfs_enable(hwmgr, false);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable AVFS!", result = tmp_result);
+
+	tmp_result = vega10_stop_dpm(hwmgr, SMC_DPM_FEATURES);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to stop DPM!", result = tmp_result);
+
+	return result;
+}
+
+static int vega10_power_off_asic(struct pp_hwmgr *hwmgr)
+{
+	struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
+	int result;
+
+	result = vega10_disable_dpm_tasks(hwmgr);
+	PP_ASSERT_WITH_CODE((0 == result),
+			"[disable_dpm_tasks] Failed to disable DPM!",
+			);
+	data->water_marks_bitmap &= ~(WaterMarksLoaded);
+
+	return result;
+}
+
+
 static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
 	.backend_init = vega10_hwmgr_backend_init,
 	.backend_fini = vega10_hwmgr_backend_fini,
 	.asic_setup = vega10_setup_asic_task,
 	.dynamic_state_management_enable = vega10_enable_dpm_tasks,
+	.dynamic_state_management_disable = vega10_disable_dpm_tasks,
 	.get_num_of_pp_table_entries =
 			vega10_get_number_of_powerplay_table_entries,
 	.get_power_state_size = vega10_get_power_state_size,
@@ -4439,6 +4529,8 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
 	.check_states_equal = vega10_check_states_equal,
 	.check_smc_update_required_for_display_configuration =
 			vega10_check_smc_update_required_for_display_configuration,
+	.power_off_asic = vega10_power_off_asic,
+	.disable_smc_firmware_ctf = vega10_thermal_disable_alert,
 };
 
 int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
index 83c67b9262ff..1912e086c0cf 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
@@ -177,8 +177,11 @@ struct vega10_dpmlevel_enable_mask {
 };
 
 struct vega10_vbios_boot_state {
+	bool        bsoc_vddc_lock;
 	uint16_t    vddc;
 	uint16_t    vddci;
+	uint16_t    mvddc;
+	uint16_t    vdd_gfx;
 	uint32_t    gfx_clock;
 	uint32_t    mem_clock;
 	uint32_t    soc_clock;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
index f1e244cd2370..3f72268e99bb 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
@@ -48,8 +48,8 @@ void vega10_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr)
 	table->Tliquid1Limit = cpu_to_le16(tdp_table->usTemperatureLimitLiquid1);
 	table->Tliquid2Limit = cpu_to_le16(tdp_table->usTemperatureLimitLiquid2);
 	table->TplxLimit = cpu_to_le16(tdp_table->usTemperatureLimitPlx);
-	table->LoadLineResistance = cpu_to_le16(
-			hwmgr->platform_descriptor.LoadLineSlope);
+	table->LoadLineResistance =
+			hwmgr->platform_descriptor.LoadLineSlope * 256;
 	table->FitLimit = 0; /* Not used for Vega10 */
 
 	table->Liquid1_I2C_address = tdp_table->ucLiquid1_I2C_address;
@@ -113,6 +113,29 @@ int vega10_enable_power_containment(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+int vega10_disable_power_containment(struct pp_hwmgr *hwmgr)
+{
+	struct vega10_hwmgr *data =
+			(struct vega10_hwmgr *)(hwmgr->backend);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_PowerContainment)) {
+		if (data->smu_features[GNLD_PPT].supported)
+			PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
+					false, data->smu_features[GNLD_PPT].smu_feature_bitmap),
+					"Attempt to disable PPT feature Failed!",
+					data->smu_features[GNLD_PPT].supported = false);
+
+		if (data->smu_features[GNLD_TDC].supported)
+			PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
+					false, data->smu_features[GNLD_TDC].smu_feature_bitmap),
+					"Attempt to disable PPT feature Failed!",
+					data->smu_features[GNLD_TDC].supported = false);
+	}
+
+	return 0;
+}
+
 static int vega10_set_overdrive_target_percentage(struct pp_hwmgr *hwmgr,
 		uint32_t adjust_percent)
 {
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h
index d9662bf4a4b4..9ecaa27c0bb5 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h
@@ -60,6 +60,7 @@ int vega10_enable_smc_cac(struct pp_hwmgr *hwmgr);
 int vega10_enable_power_containment(struct pp_hwmgr *hwmgr);
 int vega10_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n);
 int vega10_power_control_set_level(struct pp_hwmgr *hwmgr);
+int vega10_disable_power_containment(struct pp_hwmgr *hwmgr);
 
 #endif  /* _VEGA10_POWERTUNE_H_ */
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
index 8b55ae01132d..00e95511e19a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
@@ -407,7 +407,7 @@ static int get_tdp_table(
 		tdp_table->ucPlx_I2C_address = power_tune_table->ucPlx_I2C_address;
 		tdp_table->ucPlx_I2C_Line = power_tune_table->ucPlx_I2C_LineSCL;
 		tdp_table->ucPlx_I2C_LineSDA = power_tune_table->ucPlx_I2C_LineSDA;
-		hwmgr->platform_descriptor.LoadLineSlope = power_tune_table->usLoadLineResistance;
+		hwmgr->platform_descriptor.LoadLineSlope = le16_to_cpu(power_tune_table->usLoadLineResistance);
 	} else {
 		power_tune_table_v2 = (ATOM_Vega10_PowerTune_Table_V2 *)table;
 		tdp_table->usMaximumPowerDeliveryLimit = le16_to_cpu(power_tune_table_v2->usSocketPowerLimit);
@@ -453,7 +453,7 @@ static int get_tdp_table(
 		tdp_table->ucPlx_I2C_LineSDA = sda;
 
 		hwmgr->platform_descriptor.LoadLineSlope =
-					power_tune_table_v2->usLoadLineResistance;
+					le16_to_cpu(power_tune_table_v2->usLoadLineResistance);
 	}
 
 	*info_tdp_table = tdp_table;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
index f4d77b62e1ba..83e40fe51b62 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
@@ -381,14 +381,10 @@ int vega10_thermal_get_temperature(struct pp_hwmgr *hwmgr)
 
 	temp = cgs_read_register(hwmgr->device, reg);
 
-	temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >>
-			CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT;
+	temp = (temp & CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP_MASK) >>
+			CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP__SHIFT;
 
-	/* Bit 9 means the reading is lower than the lowest usable value. */
-	if (temp & 0x200)
-		temp = VEGA10_THERMAL_MAXIMUM_TEMP_READING;
-	else
-		temp = temp & 0x1ff;
+	temp = temp & 0x1ff;
 
 	temp *= PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
@@ -424,23 +420,28 @@ static int vega10_thermal_set_temperature_range(struct pp_hwmgr *hwmgr,
 			mmTHM_THERMAL_INT_CTRL_BASE_IDX, mmTHM_THERMAL_INT_CTRL);
 
 	val = cgs_read_register(hwmgr->device, reg);
-	val &= ~(THM_THERMAL_INT_CTRL__DIG_THERM_INTH_MASK);
-	val |= (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES) <<
-			THM_THERMAL_INT_CTRL__DIG_THERM_INTH__SHIFT;
-	val &= ~(THM_THERMAL_INT_CTRL__DIG_THERM_INTL_MASK);
-	val |= (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES) <<
-			THM_THERMAL_INT_CTRL__DIG_THERM_INTL__SHIFT;
+
+	val &= (~THM_THERMAL_INT_CTRL__MAX_IH_CREDIT_MASK);
+	val |=  (5 << THM_THERMAL_INT_CTRL__MAX_IH_CREDIT__SHIFT);
+
+	val &= (~THM_THERMAL_INT_CTRL__THERM_IH_HW_ENA_MASK);
+	val |= (1 << THM_THERMAL_INT_CTRL__THERM_IH_HW_ENA__SHIFT);
+
+	val &= (~THM_THERMAL_INT_CTRL__DIG_THERM_INTH_MASK);
+	val |= ((high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)
+			<< THM_THERMAL_INT_CTRL__DIG_THERM_INTH__SHIFT);
+
+	val &= (~THM_THERMAL_INT_CTRL__DIG_THERM_INTL_MASK);
+	val |= ((low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)
+			<< THM_THERMAL_INT_CTRL__DIG_THERM_INTL__SHIFT);
+
+	val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK);
+
 	cgs_write_register(hwmgr->device, reg, val);
 
 	reg = soc15_get_register_offset(THM_HWID, 0,
 			mmTHM_TCON_HTC_BASE_IDX, mmTHM_TCON_HTC);
 
-	val = cgs_read_register(hwmgr->device, reg);
-	val &= ~(THM_TCON_HTC__HTC_TMP_LMT_MASK);
-	val |= (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES) <<
-			THM_TCON_HTC__HTC_TMP_LMT__SHIFT;
-	cgs_write_register(hwmgr->device, reg, val);
-
 	return 0;
 }
 
@@ -482,18 +483,28 @@ static int vega10_thermal_initialize(struct pp_hwmgr *hwmgr)
 static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr)
 {
 	struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
+	uint32_t val = 0;
+	uint32_t reg;
 
 	if (data->smu_features[GNLD_FW_CTF].supported) {
 		if (data->smu_features[GNLD_FW_CTF].enabled)
 			printk("[Thermal_EnableAlert] FW CTF Already Enabled!\n");
+
+		PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
+				true,
+				data->smu_features[GNLD_FW_CTF].smu_feature_bitmap),
+				"Attempt to Enable FW CTF feature Failed!",
+				return -1);
+		data->smu_features[GNLD_FW_CTF].enabled = true;
 	}
 
-	PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
-			true,
-			data->smu_features[GNLD_FW_CTF].smu_feature_bitmap),
-			"Attempt to Enable FW CTF feature Failed!",
-			return -1);
-	data->smu_features[GNLD_FW_CTF].enabled = true;
+	val |= (1 << THM_THERMAL_INT_ENA__THERM_INTH_CLR__SHIFT);
+	val |= (1 << THM_THERMAL_INT_ENA__THERM_INTL_CLR__SHIFT);
+	val |= (1 << THM_THERMAL_INT_ENA__THERM_TRIGGER_CLR__SHIFT);
+
+	reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA);
+	cgs_write_register(hwmgr->device, reg, val);
+
 	return 0;
 }
 
@@ -501,21 +512,27 @@ static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr)
 * Disable thermal alerts on the RV770 thermal controller.
 * @param    hwmgr The address of the hardware manager.
 */
-static int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr)
+int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr)
 {
 	struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
+	uint32_t reg;
 
 	if (data->smu_features[GNLD_FW_CTF].supported) {
 		if (!data->smu_features[GNLD_FW_CTF].enabled)
 			printk("[Thermal_EnableAlert] FW CTF Already disabled!\n");
-	}
 
-	PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
+
+		PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
 			false,
 			data->smu_features[GNLD_FW_CTF].smu_feature_bitmap),
 			"Attempt to disable FW CTF feature Failed!",
 			return -1);
-	data->smu_features[GNLD_FW_CTF].enabled = false;
+		data->smu_features[GNLD_FW_CTF].enabled = false;
+	}
+
+	reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA);
+	cgs_write_register(hwmgr->device, reg, 0);
+
 	return 0;
 }
 
@@ -561,6 +578,11 @@ int tf_vega10_thermal_setup_fan_table(struct pp_hwmgr *hwmgr,
 			advanceFanControlParameters.ulMinFanSCLKAcousticLimit);
 	table->FanTargetTemperature = hwmgr->thermal_controller.
 			advanceFanControlParameters.usTMax;
+
+	smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+				PPSMC_MSG_SetFanTemperatureTarget,
+				(uint32_t)table->FanTargetTemperature);
+
 	table->FanPwmMin = hwmgr->thermal_controller.
 			advanceFanControlParameters.usPWMMin * 255 / 100;
 	table->FanTargetGfxclk = (uint16_t)(hwmgr->thermal_controller.
@@ -687,17 +709,17 @@ static int tf_vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr,
 
 static struct phm_master_table_item
 vega10_thermal_start_thermal_controller_master_list[] = {
-	{NULL, tf_vega10_thermal_initialize},
-	{NULL, tf_vega10_thermal_set_temperature_range},
-	{NULL, tf_vega10_thermal_enable_alert},
+	{ .tableFunction = tf_vega10_thermal_initialize },
+	{ .tableFunction = tf_vega10_thermal_set_temperature_range },
+	{ .tableFunction = tf_vega10_thermal_enable_alert },
 /* We should restrict performance levels to low before we halt the SMC.
  * On the other hand we are still in boot state when we do this
  * so it would be pointless.
  * If this assumption changes we have to revisit this table.
  */
-	{NULL, tf_vega10_thermal_setup_fan_table},
-	{NULL, tf_vega10_thermal_start_smc_fan_control},
-	{NULL, NULL}
+	{ .tableFunction = tf_vega10_thermal_setup_fan_table },
+	{ .tableFunction = tf_vega10_thermal_start_smc_fan_control },
+	{ }
 };
 
 static struct phm_master_table_header
@@ -709,10 +731,10 @@ vega10_thermal_start_thermal_controller_master = {
 
 static struct phm_master_table_item
 vega10_thermal_set_temperature_range_master_list[] = {
-	{NULL, tf_vega10_thermal_disable_alert},
-	{NULL, tf_vega10_thermal_set_temperature_range},
-	{NULL, tf_vega10_thermal_enable_alert},
-	{NULL, NULL}
+	{ .tableFunction = tf_vega10_thermal_disable_alert },
+	{ .tableFunction = tf_vega10_thermal_set_temperature_range },
+	{ .tableFunction = tf_vega10_thermal_enable_alert },
+	{ }
 };
 
 struct phm_master_table_header
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h
index 8036808ec421..776f3a2effc0 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h
@@ -78,6 +78,8 @@ extern int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr,
 		uint32_t *speed);
 extern int vega10_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr);
 extern uint32_t smu7_get_xclk(struct pp_hwmgr *hwmgr);
+extern int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr);
+int vega10_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr);
 
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
index 5345b50761f4..a1ebe1014492 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
@@ -431,6 +431,6 @@ extern int phm_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
 		struct pp_display_clock_request *clock);
 
 extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
-
+extern int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr);
 #endif /* _HARDWARE_MANAGER_H_ */
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 320225dd3328..805b9df452a3 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -368,11 +368,10 @@ struct pp_hwmgr_func {
 	int (*get_mclk_od)(struct pp_hwmgr *hwmgr);
 	int (*set_mclk_od)(struct pp_hwmgr *hwmgr, uint32_t value);
 	int (*read_sensor)(struct pp_hwmgr *hwmgr, int idx, void *value, int *size);
-	int (*request_firmware)(struct pp_hwmgr *hwmgr);
-	int (*release_firmware)(struct pp_hwmgr *hwmgr);
 	int (*set_power_profile_state)(struct pp_hwmgr *hwmgr,
 			struct amd_pp_profile *request);
 	int (*avfs_control)(struct pp_hwmgr *hwmgr, bool enable);
+	int (*disable_smc_firmware_ctf)(struct pp_hwmgr *hwmgr);
 };
 
 struct pp_table_func {
@@ -765,6 +764,7 @@ struct pp_hwmgr {
 	struct pp_thermal_controller_info thermal_controller;
 	bool fan_ctrl_is_in_default_mode;
 	uint32_t fan_ctrl_default_mode;
+	bool fan_ctrl_enabled;
 	uint32_t tmin;
 	struct phm_microcode_version_info microcode_version_info;
 	uint32_t ps_size;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h
index 2037910adcb1..d43f98a910b0 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h
@@ -30,7 +30,7 @@
  * SMU TEAM: Always increment the interface version if
  * any structure is changed in this file
  */
-#define SMU9_DRIVER_IF_VERSION 0xB
+#define SMU9_DRIVER_IF_VERSION 0xD
 
 #define PPTABLE_V10_SMU_VERSION 1
 
@@ -302,7 +302,17 @@ typedef struct {
 
   uint32_t     DpmLevelPowerDelta;
 
-  uint32_t     Reserved[19];
+  uint8_t      EnableBoostState;
+  uint8_t      AConstant_Shift;
+  uint8_t      DC_tol_sigma_Shift;
+  uint8_t      PSM_Age_CompFactor_Shift;
+
+  uint16_t     BoostStartTemperature;
+  uint16_t     BoostStopTemperature;
+
+  PllSetting_t GfxBoostState;
+
+  uint32_t     Reserved[14];
 
   /* Padding - ignore */
   uint32_t     MmHubPadding[7]; /* SMU internal use */
@@ -464,4 +474,8 @@ typedef struct {
 #define DB_PCC_SHIFT 26
 #define DB_EDC_SHIFT 27
 
+#define REMOVE_FMAX_MARGIN_BIT     0x0
+#define REMOVE_DCTOL_MARGIN_BIT    0x1
+#define REMOVE_PLATFORM_MARGIN_BIT 0x2
+
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
index 90beef35bba2..254974d3d371 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
@@ -122,7 +122,10 @@ typedef uint16_t PPSMC_Result;
 #define PPSMC_MSG_SetFanMinPwm                   0x52
 #define PPSMC_MSG_ConfigureGfxDidt               0x55
 #define PPSMC_MSG_NumOfDisplays                  0x56
-#define PPSMC_Message_Count                      0x57
+#define PPSMC_MSG_ReadSerialNumTop32             0x58
+#define PPSMC_MSG_ReadSerialNumBottom32          0x59
+#define PPSMC_Message_Count                      0x5A
+
 
 typedef int PPSMC_Msg;
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
index 2685f02ab551..115f0e4b1603 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
@@ -74,18 +74,18 @@ static bool vega10_is_smc_ram_running(struct pp_smumgr *smumgr)
 	return false;
 }
 
-/**
-* Check if SMC has responded to previous message.
-*
-* @param    smumgr  the address of the powerplay hardware manager.
-* @return   TRUE    SMC has responded, FALSE otherwise.
-*/
+/*
+ * Check if SMC has responded to previous message.
+ *
+ * @param    smumgr  the address of the powerplay hardware manager.
+ * @return   TRUE    SMC has responded, FALSE otherwise.
+ */
 static uint32_t vega10_wait_for_response(struct pp_smumgr *smumgr)
 {
 	uint32_t reg;
 
 	if (!vega10_is_smc_ram_running(smumgr))
-		return -1;
+		return -EINVAL;
 
 	reg = soc15_get_register_offset(MP1_HWID, 0,
 			mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
@@ -96,20 +96,19 @@ static uint32_t vega10_wait_for_response(struct pp_smumgr *smumgr)
 	return cgs_read_register(smumgr->device, reg);
 }
 
-/**
-* Send a message to the SMC, and do not wait for its response.
-*
-* @param    smumgr  the address of the powerplay hardware manager.
-* @param    msg the message to send.
-* @return   Always return 0.
-*/
+/*
+ * Send a message to the SMC, and do not wait for its response.
+ * @param    smumgr  the address of the powerplay hardware manager.
+ * @param    msg the message to send.
+ * @return   Always return 0.
+ */
 int vega10_send_msg_to_smc_without_waiting(struct pp_smumgr *smumgr,
 		uint16_t msg)
 {
 	uint32_t reg;
 
 	if (!vega10_is_smc_ram_running(smumgr))
-		return -1;
+		return -EINVAL;
 
 	reg = soc15_get_register_offset(MP1_HWID, 0,
 			mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66);
@@ -118,19 +117,18 @@ int vega10_send_msg_to_smc_without_waiting(struct pp_smumgr *smumgr,
 	return 0;
 }
 
-/**
-* Send a message to the SMC, and wait for its response.
-*
-* @param    smumgr  the address of the powerplay hardware manager.
-* @param    msg the message to send.
-* @return   The response that came from the SMC.
-*/
+/*
+ * Send a message to the SMC, and wait for its response.
+ * @param    smumgr  the address of the powerplay hardware manager.
+ * @param    msg the message to send.
+ * @return   Always return 0.
+ */
 int vega10_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg)
 {
 	uint32_t reg;
 
 	if (!vega10_is_smc_ram_running(smumgr))
-		return -1;
+		return -EINVAL;
 
 	vega10_wait_for_response(smumgr);
 
@@ -140,19 +138,18 @@ int vega10_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg)
 
 	vega10_send_msg_to_smc_without_waiting(smumgr, msg);
 
-	PP_ASSERT_WITH_CODE(vega10_wait_for_response(smumgr) == 1,
-			"Failed to send Message.",
-			return -1);
+	if (vega10_wait_for_response(smumgr) != 1)
+		pr_err("Failed to send message: 0x%x\n", msg);
 
 	return 0;
 }
 
-/**
+/*
  * Send a message to the SMC with parameter
  * @param    smumgr:  the address of the powerplay hardware manager.
  * @param    msg: the message to send.
  * @param    parameter: the parameter to send
- * @return   The response that came from the SMC.
+ * @return   Always return 0.
  */
 int vega10_send_msg_to_smc_with_parameter(struct pp_smumgr *smumgr,
 		uint16_t msg, uint32_t parameter)
@@ -160,7 +157,7 @@ int vega10_send_msg_to_smc_with_parameter(struct pp_smumgr *smumgr,
 	uint32_t reg;
 
 	if (!vega10_is_smc_ram_running(smumgr))
-		return -1;
+		return -EINVAL;
 
 	vega10_wait_for_response(smumgr);
 
@@ -174,22 +171,20 @@ int vega10_send_msg_to_smc_with_parameter(struct pp_smumgr *smumgr,
 
 	vega10_send_msg_to_smc_without_waiting(smumgr, msg);
 
-	PP_ASSERT_WITH_CODE(vega10_wait_for_response(smumgr) == 1,
-			"Failed to send Message.",
-			return -1);
+	if (vega10_wait_for_response(smumgr) != 1)
+		pr_err("Failed to send message: 0x%x\n", msg);
 
 	return 0;
 }
 
 
-/**
-* Send a message to the SMC with parameter, do not wait for response
-*
-* @param    smumgr:  the address of the powerplay hardware manager.
-* @param    msg: the message to send.
-* @param    parameter: the parameter to send
-* @return   The response that came from the SMC.
-*/
+/*
+ * Send a message to the SMC with parameter, do not wait for response
+ * @param    smumgr:  the address of the powerplay hardware manager.
+ * @param    msg: the message to send.
+ * @param    parameter: the parameter to send
+ * @return   The response that came from the SMC.
+ */
 int vega10_send_msg_to_smc_with_parameter_without_waiting(
 		struct pp_smumgr *smumgr, uint16_t msg, uint32_t parameter)
 {
@@ -202,13 +197,12 @@ int vega10_send_msg_to_smc_with_parameter_without_waiting(
 	return vega10_send_msg_to_smc_without_waiting(smumgr, msg);
 }
 
-/**
-* Retrieve an argument from SMC.
-*
-* @param    smumgr  the address of the powerplay hardware manager.
-* @param    arg     pointer to store the argument from SMC.
-* @return   Always return 0.
-*/
+/*
+ * Retrieve an argument from SMC.
+ * @param    smumgr  the address of the powerplay hardware manager.
+ * @param    arg     pointer to store the argument from SMC.
+ * @return   Always return 0.
+ */
 int vega10_read_arg_from_smc(struct pp_smumgr *smumgr, uint32_t *arg)
 {
 	uint32_t reg;
@@ -221,11 +215,11 @@ int vega10_read_arg_from_smc(struct pp_smumgr *smumgr, uint32_t *arg)
 	return 0;
 }
 
-/**
-* Copy table from SMC into driver FB
-* @param   smumgr    the address of the SMC manager
-* @param   table_id    the driver's table ID to copy from
-*/
+/*
+ * Copy table from SMC into driver FB
+ * @param   smumgr    the address of the SMC manager
+ * @param   table_id    the driver's table ID to copy from
+ */
 int vega10_copy_table_from_smc(struct pp_smumgr *smumgr,
 		uint8_t *table, int16_t table_id)
 {
@@ -233,25 +227,25 @@ int vega10_copy_table_from_smc(struct pp_smumgr *smumgr,
 			(struct vega10_smumgr *)(smumgr->backend);
 
 	PP_ASSERT_WITH_CODE(table_id < MAX_SMU_TABLE,
-			"Invalid SMU Table ID!", return -1;);
+			"Invalid SMU Table ID!", return -EINVAL);
 	PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].version != 0,
-			"Invalid SMU Table version!", return -1;);
+			"Invalid SMU Table version!", return -EINVAL);
 	PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].size != 0,
-			"Invalid SMU Table Length!", return -1;);
+			"Invalid SMU Table Length!", return -EINVAL);
 	PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
 			priv->smu_tables.entry[table_id].table_addr_high) == 0,
-			"[CopyTableFromSMC] Attempt to Set Dram Addr High Failed!", return -1;);
+			"[CopyTableFromSMC] Attempt to Set Dram Addr High Failed!", return -EINVAL);
 	PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
 			priv->smu_tables.entry[table_id].table_addr_low) == 0,
 			"[CopyTableFromSMC] Attempt to Set Dram Addr Low Failed!",
-			return -1;);
+			return -EINVAL);
 	PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr,
 			PPSMC_MSG_TransferTableSmu2Dram,
 			priv->smu_tables.entry[table_id].table_id) == 0,
 			"[CopyTableFromSMC] Attempt to Transfer Table From SMU Failed!",
-			return -1;);
+			return -EINVAL);
 
 	memcpy(table, priv->smu_tables.entry[table_id].table,
 			priv->smu_tables.entry[table_id].size);
@@ -259,11 +253,11 @@ int vega10_copy_table_from_smc(struct pp_smumgr *smumgr,
 	return 0;
 }
 
-/**
-* Copy table from Driver FB into SMC
-* @param   smumgr    the address of the SMC manager
-* @param   table_id    the table to copy from
-*/
+/*
+ * Copy table from Driver FB into SMC
+ * @param   smumgr    the address of the SMC manager
+ * @param   table_id    the table to copy from
+ */
 int vega10_copy_table_to_smc(struct pp_smumgr *smumgr,
 		uint8_t *table, int16_t table_id)
 {
@@ -271,11 +265,11 @@ int vega10_copy_table_to_smc(struct pp_smumgr *smumgr,
 			(struct vega10_smumgr *)(smumgr->backend);
 
 	PP_ASSERT_WITH_CODE(table_id < MAX_SMU_TABLE,
-			"Invalid SMU Table ID!", return -1;);
+			"Invalid SMU Table ID!", return -EINVAL);
 	PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].version != 0,
-			"Invalid SMU Table version!", return -1;);
+			"Invalid SMU Table version!", return -EINVAL);
 	PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].size != 0,
-			"Invalid SMU Table Length!", return -1;);
+			"Invalid SMU Table Length!", return -EINVAL);
 
 	memcpy(priv->smu_tables.entry[table_id].table, table,
 			priv->smu_tables.entry[table_id].size);
@@ -284,27 +278,18 @@ int vega10_copy_table_to_smc(struct pp_smumgr *smumgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
 			priv->smu_tables.entry[table_id].table_addr_high) == 0,
 			"[CopyTableToSMC] Attempt to Set Dram Addr High Failed!",
-			return -1;);
+			return -EINVAL;);
 	PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
 			priv->smu_tables.entry[table_id].table_addr_low) == 0,
 			"[CopyTableToSMC] Attempt to Set Dram Addr Low Failed!",
-			return -1;);
+			return -EINVAL);
 	PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr,
 			PPSMC_MSG_TransferTableDram2Smu,
 			priv->smu_tables.entry[table_id].table_id) == 0,
 			"[CopyTableToSMC] Attempt to Transfer Table To SMU Failed!",
-			return -1;);
-
-	return 0;
-}
+			return -EINVAL);
 
-int vega10_perform_btc(struct pp_smumgr *smumgr)
-{
-	PP_ASSERT_WITH_CODE(!vega10_send_msg_to_smc_with_parameter(
-			smumgr, PPSMC_MSG_RunBtc, 0),
-			"Attempt to run DC BTC Failed!",
-			return -1);
 	return 0;
 }
 
@@ -312,7 +297,7 @@ int vega10_save_vft_table(struct pp_smumgr *smumgr, uint8_t *avfs_table)
 {
 	PP_ASSERT_WITH_CODE(avfs_table,
 			"No access to SMC AVFS Table",
-			return -1);
+			return -EINVAL);
 
 	return vega10_copy_table_from_smc(smumgr, avfs_table, AVFSTABLE);
 }
@@ -321,7 +306,7 @@ int vega10_restore_vft_table(struct pp_smumgr *smumgr, uint8_t *avfs_table)
 {
 	PP_ASSERT_WITH_CODE(avfs_table,
 			"No access to SMC AVFS Table",
-			return -1);
+			return -EINVAL);
 
 	return vega10_copy_table_to_smc(smumgr, avfs_table, AVFSTABLE);
 }
@@ -339,13 +324,16 @@ int vega10_enable_smc_features(struct pp_smumgr *smumgr,
 int vega10_get_smc_features(struct pp_smumgr *smumgr,
 		uint32_t *features_enabled)
 {
+	if (features_enabled == NULL)
+		return -EINVAL;
+
 	if (!vega10_send_msg_to_smc(smumgr,
 			PPSMC_MSG_GetEnabledSmuFeatures)) {
-		if (!vega10_read_arg_from_smc(smumgr, features_enabled))
-			return 0;
+		vega10_read_arg_from_smc(smumgr, features_enabled);
+		return 0;
 	}
 
-	return -1;
+	return -EINVAL;
 }
 
 int vega10_set_tools_address(struct pp_smumgr *smumgr)
@@ -372,25 +360,20 @@ static int vega10_verify_smc_interface(struct pp_smumgr *smumgr)
 	PP_ASSERT_WITH_CODE(!vega10_send_msg_to_smc(smumgr,
 			PPSMC_MSG_GetDriverIfVersion),
 			"Attempt to get SMC IF Version Number Failed!",
-			return -1);
-	PP_ASSERT_WITH_CODE(!vega10_read_arg_from_smc(smumgr,
-			&smc_driver_if_version),
-			"Attempt to read SMC IF Version Number Failed!",
-			return -1);
-
-	if (smc_driver_if_version != SMU9_DRIVER_IF_VERSION)
-		return -1;
+			return -EINVAL);
+	vega10_read_arg_from_smc(smumgr, &smc_driver_if_version);
+
+	if (smc_driver_if_version != SMU9_DRIVER_IF_VERSION) {
+		pr_err("Your firmware(0x%x) doesn't match \
+			SMU9_DRIVER_IF_VERSION(0x%x). \
+			Please update your firmware!\n",
+			smc_driver_if_version, SMU9_DRIVER_IF_VERSION);
+		return -EINVAL;
+	}
 
 	return 0;
 }
 
-/**
-* Write a 32bit value to the SMC SRAM space.
-* ALL PARAMETERS ARE IN HOST BYTE ORDER.
-* @param    smumgr  the address of the powerplay hardware manager.
-* @param    smc_addr the address in the SMC RAM to access.
-* @param    value to write to the SMC SRAM.
-*/
 static int vega10_smu_init(struct pp_smumgr *smumgr)
 {
 	struct vega10_smumgr *priv;
@@ -427,7 +410,7 @@ static int vega10_smu_init(struct pp_smumgr *smumgr)
 			kfree(smumgr->backend);
 			cgs_free_gpu_mem(smumgr->device,
 			(cgs_handle_t)handle);
-			return -1);
+			return -EINVAL);
 
 	priv->smu_tables.entry[PPTABLE].version = 0x01;
 	priv->smu_tables.entry[PPTABLE].size = sizeof(PPTable_t);
@@ -455,7 +438,7 @@ static int vega10_smu_init(struct pp_smumgr *smumgr)
 			(cgs_handle_t)priv->smu_tables.entry[PPTABLE].handle);
 			cgs_free_gpu_mem(smumgr->device,
 			(cgs_handle_t)handle);
-			return -1);
+			return -EINVAL);
 
 	priv->smu_tables.entry[WMTABLE].version = 0x01;
 	priv->smu_tables.entry[WMTABLE].size = sizeof(Watermarks_t);
@@ -485,7 +468,7 @@ static int vega10_smu_init(struct pp_smumgr *smumgr)
 			(cgs_handle_t)priv->smu_tables.entry[WMTABLE].handle);
 			cgs_free_gpu_mem(smumgr->device,
 			(cgs_handle_t)handle);
-			return -1);
+			return -EINVAL);
 
 	priv->smu_tables.entry[AVFSTABLE].version = 0x01;
 	priv->smu_tables.entry[AVFSTABLE].size = sizeof(AvfsTable_t);
@@ -497,7 +480,7 @@ static int vega10_smu_init(struct pp_smumgr *smumgr)
 	priv->smu_tables.entry[AVFSTABLE].table = kaddr;
 	priv->smu_tables.entry[AVFSTABLE].handle = handle;
 
-	tools_size = 0;
+	tools_size = 0x19000;
 	if (tools_size) {
 		smu_allocate_memory(smumgr->device,
 				tools_size,
@@ -517,9 +500,44 @@ static int vega10_smu_init(struct pp_smumgr *smumgr)
 					smu_lower_32_bits(mc_addr);
 			priv->smu_tables.entry[TOOLSTABLE].table = kaddr;
 			priv->smu_tables.entry[TOOLSTABLE].handle = handle;
+			vega10_set_tools_address(smumgr);
 		}
 	}
 
+	/* allocate space for AVFS Fuse table */
+	smu_allocate_memory(smumgr->device,
+			sizeof(AvfsFuseOverride_t),
+			CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB,
+			PAGE_SIZE,
+			&mc_addr,
+			&kaddr,
+			&handle);
+
+	PP_ASSERT_WITH_CODE(kaddr,
+			"[vega10_smu_init] Out of memory for avfs fuse table.",
+			kfree(smumgr->backend);
+			cgs_free_gpu_mem(smumgr->device,
+			(cgs_handle_t)priv->smu_tables.entry[PPTABLE].handle);
+			cgs_free_gpu_mem(smumgr->device,
+			(cgs_handle_t)priv->smu_tables.entry[WMTABLE].handle);
+			cgs_free_gpu_mem(smumgr->device,
+			(cgs_handle_t)priv->smu_tables.entry[AVFSTABLE].handle);
+			cgs_free_gpu_mem(smumgr->device,
+			(cgs_handle_t)priv->smu_tables.entry[TOOLSTABLE].handle);
+			cgs_free_gpu_mem(smumgr->device,
+			(cgs_handle_t)handle);
+			return -EINVAL);
+
+	priv->smu_tables.entry[AVFSFUSETABLE].version = 0x01;
+	priv->smu_tables.entry[AVFSFUSETABLE].size = sizeof(AvfsFuseOverride_t);
+	priv->smu_tables.entry[AVFSFUSETABLE].table_id = TABLE_AVFS_FUSE_OVERRIDE;
+	priv->smu_tables.entry[AVFSFUSETABLE].table_addr_high =
+			smu_upper_32_bits(mc_addr);
+	priv->smu_tables.entry[AVFSFUSETABLE].table_addr_low =
+			smu_lower_32_bits(mc_addr);
+	priv->smu_tables.entry[AVFSFUSETABLE].table = kaddr;
+	priv->smu_tables.entry[AVFSFUSETABLE].handle = handle;
+
 	return 0;
 }
 
@@ -538,6 +556,8 @@ static int vega10_smu_fini(struct pp_smumgr *smumgr)
 		if (priv->smu_tables.entry[TOOLSTABLE].table)
 			cgs_free_gpu_mem(smumgr->device,
 					(cgs_handle_t)priv->smu_tables.entry[TOOLSTABLE].handle);
+		cgs_free_gpu_mem(smumgr->device,
+				(cgs_handle_t)priv->smu_tables.entry[AVFSFUSETABLE].handle);
 		kfree(smumgr->backend);
 		smumgr->backend = NULL;
 	}
@@ -548,7 +568,7 @@ static int vega10_start_smu(struct pp_smumgr *smumgr)
 {
 	PP_ASSERT_WITH_CODE(!vega10_verify_smc_interface(smumgr),
 			"Failed to verify SMC interface!",
-			return -1);
+			return -EINVAL);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h
index ad050212426d..821425c1e4e0 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h
@@ -30,6 +30,7 @@ enum smu_table_id {
 	WMTABLE,
 	AVFSTABLE,
 	TOOLSTABLE,
+	AVFSFUSETABLE,
 	MAX_SMU_TABLE,
 };
 
@@ -62,7 +63,6 @@ int vega10_get_smc_features(struct pp_smumgr *smumgr,
 		uint32_t *features_enabled);
 int vega10_save_vft_table(struct pp_smumgr *smumgr, uint8_t *avfs_table);
 int vega10_restore_vft_table(struct pp_smumgr *smumgr, uint8_t *avfs_table);
-int vega10_perform_btc(struct pp_smumgr *smumgr);
 
 int vega10_set_tools_address(struct pp_smumgr *smumgr);
 
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index acd882a188bc..fea96a765cf1 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -236,6 +236,23 @@ static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb
 	dma_fence_put(f);
 }
 
+bool amd_sched_dependency_optimized(struct dma_fence* fence,
+				    struct amd_sched_entity *entity)
+{
+	struct amd_gpu_scheduler *sched = entity->sched;
+	struct amd_sched_fence *s_fence;
+
+	if (!fence || dma_fence_is_signaled(fence))
+		return false;
+	if (fence->context == entity->fence_context)
+		return true;
+	s_fence = to_amd_sched_fence(fence);
+	if (s_fence && s_fence->sched == sched)
+		return true;
+
+	return false;
+}
+
 static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 {
 	struct amd_gpu_scheduler *sched = entity->sched;
@@ -387,7 +404,9 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
 
 	spin_lock(&sched->job_list_lock);
 	list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
-		if (dma_fence_remove_callback(s_job->s_fence->parent, &s_job->s_fence->cb)) {
+		if (s_job->s_fence->parent &&
+		    dma_fence_remove_callback(s_job->s_fence->parent,
+					      &s_job->s_fence->cb)) {
 			dma_fence_put(s_job->s_fence->parent);
 			s_job->s_fence->parent = NULL;
 		}
@@ -460,9 +479,9 @@ int amd_sched_job_init(struct amd_sched_job *job,
 	job->sched = sched;
 	job->s_entity = entity;
 	job->s_fence = amd_sched_fence_create(entity, owner);
-	job->id = atomic64_inc_return(&sched->job_id_count);
 	if (!job->s_fence)
 		return -ENOMEM;
+	job->id = atomic64_inc_return(&sched->job_id_count);
 
 	INIT_WORK(&job->finish_work, amd_sched_job_finish);
 	INIT_LIST_HEAD(&job->node);
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 0255c7f8a6d8..924d4a5899e1 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -158,4 +158,6 @@ int amd_sched_job_init(struct amd_sched_job *job,
 		       void *owner);
 void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched);
 void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
+bool amd_sched_dependency_optimized(struct dma_fence* fence,
+				    struct amd_sched_entity *entity);
 #endif
diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c
index 798a3cc480a2..1a3359c0f6cd 100644
--- a/drivers/gpu/drm/arm/hdlcd_crtc.c
+++ b/drivers/gpu/drm/arm/hdlcd_crtc.c
@@ -10,6 +10,7 @@
  */
 
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
@@ -226,16 +227,33 @@ static const struct drm_crtc_helper_funcs hdlcd_crtc_helper_funcs = {
 static int hdlcd_plane_atomic_check(struct drm_plane *plane,
 				    struct drm_plane_state *state)
 {
-	u32 src_w, src_h;
+	struct drm_rect clip = { 0 };
+	struct drm_crtc_state *crtc_state;
+	u32 src_h = state->src_h >> 16;
 
-	src_w = state->src_w >> 16;
-	src_h = state->src_h >> 16;
+	/* only the HDLCD_REG_FB_LINE_COUNT register has a limit */
+	if (src_h >= HDLCD_MAX_YRES) {
+		DRM_DEBUG_KMS("Invalid source width: %d\n", src_h);
+		return -EINVAL;
+	}
+
+	if (!state->fb || !state->crtc)
+		return 0;
 
-	/* we can't do any scaling of the plane source */
-	if ((src_w != state->crtc_w) || (src_h != state->crtc_h))
+	crtc_state = drm_atomic_get_existing_crtc_state(state->state,
+							state->crtc);
+	if (!crtc_state) {
+		DRM_DEBUG_KMS("Invalid crtc state\n");
 		return -EINVAL;
+	}
 
-	return 0;
+	clip.x2 = crtc_state->adjusted_mode.hdisplay;
+	clip.y2 = crtc_state->adjusted_mode.vdisplay;
+
+	return drm_plane_helper_check_state(state, &clip,
+					    DRM_PLANE_HELPER_NO_SCALING,
+					    DRM_PLANE_HELPER_NO_SCALING,
+					    false, true);
 }
 
 static void hdlcd_plane_atomic_update(struct drm_plane *plane,
@@ -244,21 +262,20 @@ static void hdlcd_plane_atomic_update(struct drm_plane *plane,
 	struct drm_framebuffer *fb = plane->state->fb;
 	struct hdlcd_drm_private *hdlcd;
 	struct drm_gem_cma_object *gem;
-	u32 src_w, src_h, dest_w, dest_h;
+	u32 src_x, src_y, dest_h;
 	dma_addr_t scanout_start;
 
 	if (!fb)
 		return;
 
-	src_w = plane->state->src_w >> 16;
-	src_h = plane->state->src_h >> 16;
-	dest_w = plane->state->crtc_w;
-	dest_h = plane->state->crtc_h;
+	src_x = plane->state->src.x1 >> 16;
+	src_y = plane->state->src.y1 >> 16;
+	dest_h = drm_rect_height(&plane->state->dst);
 	gem = drm_fb_cma_get_gem_obj(fb, 0);
+
 	scanout_start = gem->paddr + fb->offsets[0] +
-		plane->state->crtc_y * fb->pitches[0] +
-		plane->state->crtc_x *
-		fb->format->cpp[0];
+			src_y * fb->pitches[0] +
+			src_x *	fb->format->cpp[0];
 
 	hdlcd = plane->dev->dev_private;
 	hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_LENGTH, fb->pitches[0]);
@@ -305,7 +322,6 @@ static struct drm_plane *hdlcd_plane_init(struct drm_device *drm)
 				       formats, ARRAY_SIZE(formats),
 				       DRM_PLANE_TYPE_PRIMARY, NULL);
 	if (ret) {
-		devm_kfree(drm->dev, plane);
 		return ERR_PTR(ret);
 	}
 
@@ -329,7 +345,6 @@ int hdlcd_setup_crtc(struct drm_device *drm)
 					&hdlcd_crtc_funcs, NULL);
 	if (ret) {
 		hdlcd_plane_destroy(primary);
-		devm_kfree(drm->dev, primary);
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
index 65a3bd7a0c00..423dda2785d4 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
@@ -152,8 +152,7 @@ static const struct drm_connector_funcs atmel_hlcdc_panel_connector_funcs = {
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
-static int atmel_hlcdc_attach_endpoint(struct drm_device *dev,
-				       const struct device_node *np)
+static int atmel_hlcdc_attach_endpoint(struct drm_device *dev, int endpoint)
 {
 	struct atmel_hlcdc_dc *dc = dev->dev_private;
 	struct atmel_hlcdc_rgb_output *output;
@@ -161,6 +160,11 @@ static int atmel_hlcdc_attach_endpoint(struct drm_device *dev,
 	struct drm_bridge *bridge;
 	int ret;
 
+	ret = drm_of_find_panel_or_bridge(dev->dev->of_node, 0, endpoint,
+					  &panel, &bridge);
+	if (ret)
+		return ret;
+
 	output = devm_kzalloc(dev->dev, sizeof(*output), GFP_KERNEL);
 	if (!output)
 		return -EINVAL;
@@ -177,10 +181,6 @@ static int atmel_hlcdc_attach_endpoint(struct drm_device *dev,
 
 	output->encoder.possible_crtcs = 0x1;
 
-	ret = drm_of_find_panel_or_bridge(np, 0, 0, &panel, &bridge);
-	if (ret)
-		return ret;
-
 	if (panel) {
 		output->connector.dpms = DRM_MODE_DPMS_OFF;
 		output->connector.polled = DRM_CONNECTOR_POLL_CONNECT;
@@ -220,22 +220,14 @@ err_encoder_cleanup:
 
 int atmel_hlcdc_create_outputs(struct drm_device *dev)
 {
-	struct device_node *remote;
-	int ret = -ENODEV;
-	int endpoint = 0;
-
-	while (true) {
-		/* Loop thru possible multiple connections to the output */
-		remote = of_graph_get_remote_node(dev->dev->of_node, 0,
-						  endpoint++);
-		if (!remote)
-			break;
-
-		ret = atmel_hlcdc_attach_endpoint(dev, remote);
-		of_node_put(remote);
-		if (ret)
-			return ret;
-	}
+	int endpoint, ret = 0;
+
+	for (endpoint = 0; !ret; endpoint++)
+		ret = atmel_hlcdc_attach_endpoint(dev, endpoint);
+
+	/* At least one device was successfully attached.*/
+	if (ret == -ENODEV && endpoint)
+		return 0;
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/bridge/synopsys/Kconfig b/drivers/gpu/drm/bridge/synopsys/Kconfig
index 40d2827a6d19..53e78d092d18 100644
--- a/drivers/gpu/drm/bridge/synopsys/Kconfig
+++ b/drivers/gpu/drm/bridge/synopsys/Kconfig
@@ -1,6 +1,7 @@
 config DRM_DW_HDMI
 	tristate
 	select DRM_KMS_HELPER
+	select REGMAP_MMIO
 
 config DRM_DW_HDMI_AHB_AUDIO
 	tristate "Synopsys Designware AHB Audio interface"
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 8be9719284b0..aa885a614e27 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -508,6 +508,8 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 		bool has_connectors =
 			!!new_crtc_state->connector_mask;
 
+		WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
+
 		if (!drm_mode_equal(&old_crtc_state->mode, &new_crtc_state->mode)) {
 			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] mode changed\n",
 					 crtc->base.id, crtc->name);
@@ -551,6 +553,8 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 	for_each_oldnew_connector_in_state(state, connector, old_connector_state, new_connector_state, i) {
 		const struct drm_connector_helper_funcs *funcs = connector->helper_private;
 
+		WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
+
 		/*
 		 * This only sets crtc->connectors_changed for routing changes,
 		 * drivers must set crtc->connectors_changed themselves when
@@ -650,6 +654,8 @@ drm_atomic_helper_check_planes(struct drm_device *dev,
 	for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) {
 		const struct drm_plane_helper_funcs *funcs;
 
+		WARN_ON(!drm_modeset_is_locked(&plane->mutex));
+
 		funcs = plane->helper_private;
 
 		drm_atomic_helper_plane_changed(state, old_plane_state, new_plane_state, plane);
@@ -2663,7 +2669,12 @@ int drm_atomic_helper_resume(struct drm_device *dev,
 
 	drm_modeset_acquire_init(&ctx, 0);
 	while (1) {
+		err = drm_modeset_lock_all_ctx(dev, &ctx);
+		if (err)
+			goto out;
+
 		err = drm_atomic_helper_commit_duplicated_state(state, &ctx);
+out:
 		if (err != -EDEADLK)
 			break;
 
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 9f847615ac74..48ca2457df8c 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -1229,21 +1229,6 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 	if (!connector)
 		return -ENOENT;
 
-	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
-	encoder = drm_connector_get_encoder(connector);
-	if (encoder)
-		out_resp->encoder_id = encoder->base.id;
-	else
-		out_resp->encoder_id = 0;
-
-	ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
-			(uint32_t __user *)(unsigned long)(out_resp->props_ptr),
-			(uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
-			&out_resp->count_props);
-	drm_modeset_unlock(&dev->mode_config.connection_mutex);
-	if (ret)
-		goto out_unref;
-
 	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++)
 		if (connector->encoder_ids[i] != 0)
 			encoders_count++;
@@ -1256,7 +1241,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 				if (put_user(connector->encoder_ids[i],
 					     encoder_ptr + copied)) {
 					ret = -EFAULT;
-					goto out_unref;
+					goto out;
 				}
 				copied++;
 			}
@@ -1300,15 +1285,32 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 			if (copy_to_user(mode_ptr + copied,
 					 &u_mode, sizeof(u_mode))) {
 				ret = -EFAULT;
+				mutex_unlock(&dev->mode_config.mutex);
+
 				goto out;
 			}
 			copied++;
 		}
 	}
 	out_resp->count_modes = mode_count;
-out:
 	mutex_unlock(&dev->mode_config.mutex);
-out_unref:
+
+	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+	encoder = drm_connector_get_encoder(connector);
+	if (encoder)
+		out_resp->encoder_id = encoder->base.id;
+	else
+		out_resp->encoder_id = 0;
+
+	/* Only grab properties after probing, to make sure EDID and other
+	 * properties reflect the latest status. */
+	ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
+			(uint32_t __user *)(unsigned long)(out_resp->props_ptr),
+			(uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
+			&out_resp->count_props);
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+out:
 	drm_connector_put(connector);
 
 	return ret;
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 3e5f52110ea1..213fb837e1c4 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -1208,3 +1208,86 @@ int drm_dp_stop_crc(struct drm_dp_aux *aux)
 	return 0;
 }
 EXPORT_SYMBOL(drm_dp_stop_crc);
+
+struct dpcd_quirk {
+	u8 oui[3];
+	bool is_branch;
+	u32 quirks;
+};
+
+#define OUI(first, second, third) { (first), (second), (third) }
+
+static const struct dpcd_quirk dpcd_quirk_list[] = {
+	/* Analogix 7737 needs reduced M and N at HBR2 link rates */
+	{ OUI(0x00, 0x22, 0xb9), true, BIT(DP_DPCD_QUIRK_LIMITED_M_N) },
+};
+
+#undef OUI
+
+/*
+ * Get a bit mask of DPCD quirks for the sink/branch device identified by
+ * ident. The quirk data is shared but it's up to the drivers to act on the
+ * data.
+ *
+ * For now, only the OUI (first three bytes) is used, but this may be extended
+ * to device identification string and hardware/firmware revisions later.
+ */
+static u32
+drm_dp_get_quirks(const struct drm_dp_dpcd_ident *ident, bool is_branch)
+{
+	const struct dpcd_quirk *quirk;
+	u32 quirks = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dpcd_quirk_list); i++) {
+		quirk = &dpcd_quirk_list[i];
+
+		if (quirk->is_branch != is_branch)
+			continue;
+
+		if (memcmp(quirk->oui, ident->oui, sizeof(ident->oui)) != 0)
+			continue;
+
+		quirks |= quirk->quirks;
+	}
+
+	return quirks;
+}
+
+/**
+ * drm_dp_read_desc - read sink/branch descriptor from DPCD
+ * @aux: DisplayPort AUX channel
+ * @desc: Device decriptor to fill from DPCD
+ * @is_branch: true for branch devices, false for sink devices
+ *
+ * Read DPCD 0x400 (sink) or 0x500 (branch) into @desc. Also debug log the
+ * identification.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc,
+		     bool is_branch)
+{
+	struct drm_dp_dpcd_ident *ident = &desc->ident;
+	unsigned int offset = is_branch ? DP_BRANCH_OUI : DP_SINK_OUI;
+	int ret, dev_id_len;
+
+	ret = drm_dp_dpcd_read(aux, offset, ident, sizeof(*ident));
+	if (ret < 0)
+		return ret;
+
+	desc->quirks = drm_dp_get_quirks(ident, is_branch);
+
+	dev_id_len = strnlen(ident->device_id, sizeof(ident->device_id));
+
+	DRM_DEBUG_KMS("DP %s: OUI %*phD dev-ID %*pE HW-rev %d.%d SW-rev %d.%d quirks 0x%04x\n",
+		      is_branch ? "branch" : "sink",
+		      (int)sizeof(ident->oui), ident->oui,
+		      dev_id_len, ident->device_id,
+		      ident->hw_rev >> 4, ident->hw_rev & 0xf,
+		      ident->sw_major_rev, ident->sw_minor_rev,
+		      desc->quirks);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_dp_read_desc);
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index b5c6bb46a425..37b8ad3e30d8 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -358,7 +358,12 @@ EXPORT_SYMBOL(drm_put_dev);
 void drm_unplug_dev(struct drm_device *dev)
 {
 	/* for a USB device */
-	drm_dev_unregister(dev);
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		drm_modeset_unregister_all(dev);
+
+	drm_minor_unregister(dev, DRM_MINOR_PRIMARY);
+	drm_minor_unregister(dev, DRM_MINOR_RENDER);
+	drm_minor_unregister(dev, DRM_MINOR_CONTROL);
 
 	mutex_lock(&drm_global_mutex);
 
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index fad3d44e4642..2e55599816aa 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -80,6 +80,8 @@
 #define EDID_QUIRK_FORCE_12BPC			(1 << 9)
 /* Force 6bpc */
 #define EDID_QUIRK_FORCE_6BPC			(1 << 10)
+/* Force 10bpc */
+#define EDID_QUIRK_FORCE_10BPC			(1 << 11)
 
 struct detailed_mode_closure {
 	struct drm_connector *connector;
@@ -122,6 +124,9 @@ static const struct edid_quirk {
 	{ "FCM", 13600, EDID_QUIRK_PREFER_LARGE_75 |
 	  EDID_QUIRK_DETAILED_IN_CM },
 
+	/* LGD panel of HP zBook 17 G2, eDP 10 bpc, but reports unknown bpc */
+	{ "LGD", 764, EDID_QUIRK_FORCE_10BPC },
+
 	/* LG Philips LCD LP154W01-A5 */
 	{ "LPL", 0, EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE },
 	{ "LPL", 0x2a00, EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE },
@@ -4244,6 +4249,9 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
 	if (quirks & EDID_QUIRK_FORCE_8BPC)
 		connector->display_info.bpc = 8;
 
+	if (quirks & EDID_QUIRK_FORCE_10BPC)
+		connector->display_info.bpc = 10;
+
 	if (quirks & EDID_QUIRK_FORCE_12BPC)
 		connector->display_info.bpc = 12;
 
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index fedd4d60d9cd..5dc8c4350602 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -948,8 +948,6 @@ retry:
 	}
 
 out:
-	if (ret && crtc->funcs->page_flip_target)
-		drm_crtc_vblank_put(crtc);
 	if (fb)
 		drm_framebuffer_put(fb);
 	if (crtc->primary->old_fb)
@@ -964,5 +962,8 @@ out:
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
 
+	if (ret && crtc->funcs->page_flip_target)
+		drm_crtc_vblank_put(crtc);
+
 	return ret;
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index c4a091e87426..e437fba1209d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -106,9 +106,10 @@ struct etnaviv_gem_submit {
 	struct etnaviv_gpu *gpu;
 	struct ww_acquire_ctx ticket;
 	struct dma_fence *fence;
+	u32 flags;
 	unsigned int nr_bos;
 	struct etnaviv_gem_submit_bo bos[0];
-	u32 flags;
+	/* No new members here, the previous one is variable-length! */
 };
 
 int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index e1909429837e..1013765274da 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -44,6 +44,7 @@ static struct etnaviv_gem_submit *submit_create(struct drm_device *dev,
 
 		/* initially, until copy_from_user() and bo lookup succeeds: */
 		submit->nr_bos = 0;
+		submit->fence = NULL;
 
 		ww_acquire_init(&submit->ticket, &reservation_ww_class);
 	}
@@ -171,7 +172,7 @@ static int submit_fence_sync(const struct etnaviv_gem_submit *submit)
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
 		bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
-		bool explicit = !(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
+		bool explicit = !!(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
 
 		ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write,
 						 explicit);
@@ -294,7 +295,8 @@ static void submit_cleanup(struct etnaviv_gem_submit *submit)
 	}
 
 	ww_acquire_fini(&submit->ticket);
-	dma_fence_put(submit->fence);
+	if (submit->fence)
+		dma_fence_put(submit->fence);
 	kfree(submit);
 }
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 09d3c4c3c858..50294a7bd29d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -82,14 +82,9 @@ err_file_priv_free:
 	return ret;
 }
 
-static void exynos_drm_preclose(struct drm_device *dev,
-					struct drm_file *file)
-{
-	exynos_drm_subdrv_close(dev, file);
-}
-
 static void exynos_drm_postclose(struct drm_device *dev, struct drm_file *file)
 {
+	exynos_drm_subdrv_close(dev, file);
 	kfree(file->driver_priv);
 	file->driver_priv = NULL;
 }
@@ -145,7 +140,6 @@ static struct drm_driver exynos_drm_driver = {
 	.driver_features	= DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME
 				  | DRIVER_ATOMIC | DRIVER_RENDER,
 	.open			= exynos_drm_open,
-	.preclose		= exynos_drm_preclose,
 	.lastclose		= exynos_drm_lastclose,
 	.postclose		= exynos_drm_postclose,
 	.gem_free_object_unlocked = exynos_drm_gem_free_object,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index cb3176930596..39c740572034 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -160,12 +160,9 @@ struct exynos_drm_clk {
  *	drm framework doesn't support multiple irq yet.
  *	we can refer to the crtc to current hardware interrupt occurred through
  *	this pipe value.
- * @enabled: if the crtc is enabled or not
- * @event: vblank event that is currently queued for flip
- * @wait_update: wait all pending planes updates to finish
- * @pending_update: number of pending plane updates in this crtc
  * @ops: pointer to callbacks for exynos drm specific functionality
  * @ctx: A pointer to the crtc's implementation specific context
+ * @pipe_clk: A pointer to the crtc's pipeline clock.
  */
 struct exynos_drm_crtc {
 	struct drm_crtc			base;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index fc4fda738906..d404de86d5f9 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -1633,7 +1633,6 @@ static int exynos_dsi_parse_dt(struct exynos_dsi *dsi)
 {
 	struct device *dev = dsi->dev;
 	struct device_node *node = dev->of_node;
-	struct device_node *ep;
 	int ret;
 
 	ret = exynos_dsi_of_read_u32(node, "samsung,pll-clock-frequency",
@@ -1641,32 +1640,21 @@ static int exynos_dsi_parse_dt(struct exynos_dsi *dsi)
 	if (ret < 0)
 		return ret;
 
-	ep = of_graph_get_endpoint_by_regs(node, DSI_PORT_OUT, 0);
-	if (!ep) {
-		dev_err(dev, "no output port with endpoint specified\n");
-		return -EINVAL;
-	}
-
-	ret = exynos_dsi_of_read_u32(ep, "samsung,burst-clock-frequency",
+	ret = exynos_dsi_of_read_u32(node, "samsung,burst-clock-frequency",
 				     &dsi->burst_clk_rate);
 	if (ret < 0)
-		goto end;
+		return ret;
 
-	ret = exynos_dsi_of_read_u32(ep, "samsung,esc-clock-frequency",
+	ret = exynos_dsi_of_read_u32(node, "samsung,esc-clock-frequency",
 				     &dsi->esc_clk_rate);
 	if (ret < 0)
-		goto end;
-
-	of_node_put(ep);
+		return ret;
 
 	dsi->bridge_node = of_graph_get_remote_node(node, DSI_PORT_OUT, 0);
 	if (!dsi->bridge_node)
 		return -EINVAL;
 
-end:
-	of_node_put(ep);
-
-	return ret;
+	return 0;
 }
 
 static int exynos_dsi_bind(struct device *dev, struct device *master,
@@ -1817,6 +1805,10 @@ static int exynos_dsi_probe(struct platform_device *pdev)
 
 static int exynos_dsi_remove(struct platform_device *pdev)
 {
+	struct exynos_dsi *dsi = platform_get_drvdata(pdev);
+
+	of_node_put(dsi->bridge_node);
+
 	pm_runtime_disable(&pdev->dev);
 
 	component_del(&pdev->dev, &exynos_dsi_component_ops);
diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index 0066fe7e622e..be3eefec5152 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
@@ -759,20 +759,23 @@ void psb_intel_lvds_init(struct drm_device *dev,
 		if (scan->type & DRM_MODE_TYPE_PREFERRED) {
 			mode_dev->panel_fixed_mode =
 			    drm_mode_duplicate(dev, scan);
+			DRM_DEBUG_KMS("Using mode from DDC\n");
 			goto out;	/* FIXME: check for quirks */
 		}
 	}
 
 	/* Failed to get EDID, what about VBT? do we need this? */
-	if (mode_dev->vbt_mode)
+	if (dev_priv->lfp_lvds_vbt_mode) {
 		mode_dev->panel_fixed_mode =
-		    drm_mode_duplicate(dev, mode_dev->vbt_mode);
+			drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode);
 
-	if (!mode_dev->panel_fixed_mode)
-		if (dev_priv->lfp_lvds_vbt_mode)
-			mode_dev->panel_fixed_mode =
-				drm_mode_duplicate(dev,
-					dev_priv->lfp_lvds_vbt_mode);
+		if (mode_dev->panel_fixed_mode) {
+			mode_dev->panel_fixed_mode->type |=
+				DRM_MODE_TYPE_PREFERRED;
+			DRM_DEBUG_KMS("Using mode from VBT\n");
+			goto out;
+		}
+	}
 
 	/*
 	 * If we didn't get EDID, try checking if the panel is already turned
@@ -789,6 +792,7 @@ void psb_intel_lvds_init(struct drm_device *dev,
 		if (mode_dev->panel_fixed_mode) {
 			mode_dev->panel_fixed_mode->type |=
 			    DRM_MODE_TYPE_PREFERRED;
+			DRM_DEBUG_KMS("Using pre-programmed mode\n");
 			goto out;	/* FIXME: check for quirks */
 		}
 	}
diff --git a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
index 5abc69c9630f..f77dcfaade6c 100644
--- a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
+++ b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
@@ -760,7 +760,7 @@ static int dsi_parse_dt(struct platform_device *pdev, struct dw_dsi *dsi)
 	 * Get the endpoint node. In our case, dsi has one output port1
 	 * to which the external HDMI bridge is connected.
 	 */
-	ret = drm_of_find_panel_or_bridge(np, 0, 0, NULL, &dsi->bridge);
+	ret = drm_of_find_panel_or_bridge(np, 1, 0, NULL, &dsi->bridge);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index e091809a9a9e..b00edd3b8800 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -87,3 +87,16 @@ config DRM_I915_LOW_LEVEL_TRACEPOINTS
           and also analyze the request dependency resolving timeline.
 
           If in doubt, say "N".
+
+config DRM_I915_DEBUG_VBLANK_EVADE
+	bool "Enable extra debug warnings for vblank evasion"
+	depends on DRM_I915
+	default n
+	help
+	  Choose this option to turn on extra debug warnings for the
+	  vblank evade mechanism. This gives a warning every time the
+	  the deadline allotted for the vblank evade critical section
+	  is exceeded, even if there isn't an actual risk of missing
+	  the vblank.
+
+	  If in doubt, say "N".
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index dca989eb2d42..24fe04d6307b 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -779,8 +779,26 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
 	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
 }
 
+static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+	struct intel_engine_cs *engine;
+	struct intel_vgpu_workload *pos, *n;
+	unsigned int tmp;
+
+	/* free the unsubmited workloads in the queues. */
+	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
+		list_for_each_entry_safe(pos, n,
+			&vgpu->workload_q_head[engine->id], list) {
+			list_del_init(&pos->list);
+			free_workload(pos);
+		}
+	}
+}
+
 void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu)
 {
+	clean_workloads(vgpu, ALL_ENGINES);
 	kmem_cache_destroy(vgpu->workloads);
 }
 
@@ -811,17 +829,9 @@ void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu,
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct intel_engine_cs *engine;
-	struct intel_vgpu_workload *pos, *n;
 	unsigned int tmp;
 
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
-		/* free the unsubmited workload in the queue */
-		list_for_each_entry_safe(pos, n,
-			&vgpu->workload_q_head[engine->id], list) {
-			list_del_init(&pos->list);
-			free_workload(pos);
-		}
-
+	clean_workloads(vgpu, engine_mask);
+	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
 		init_vgpu_execlist(vgpu, engine->id);
-	}
 }
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 0ad1a508e2af..0ffd69654592 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1244,7 +1244,7 @@ static int dma_ctrl_write(struct intel_vgpu *vgpu, unsigned int offset,
 	mode = vgpu_vreg(vgpu, offset);
 
 	if (GFX_MODE_BIT_SET_IN_MASK(mode, START_DMA)) {
-		WARN_ONCE(1, "VM(%d): iGVT-g doesn't supporte GuC\n",
+		WARN_ONCE(1, "VM(%d): iGVT-g doesn't support GuC\n",
 				vgpu->id);
 		return 0;
 	}
@@ -1366,18 +1366,28 @@ static int skl_misc_ctl_write(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	i915_reg_t reg = {.reg = offset};
+	u32 v = *(u32 *)p_data;
+
+	if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv))
+		return intel_vgpu_default_mmio_write(vgpu,
+				offset, p_data, bytes);
 
 	switch (offset) {
 	case 0x4ddc:
-		vgpu_vreg(vgpu, offset) = 0x8000003c;
-		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl */
-		I915_WRITE(reg, vgpu_vreg(vgpu, offset));
+		/* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */
+		vgpu_vreg(vgpu, offset) = v & ~(1 << 31);
 		break;
 	case 0x42080:
-		vgpu_vreg(vgpu, offset) = 0x8000;
-		/* WaCompressedResourceDisplayNewHashMode:skl */
-		I915_WRITE(reg, vgpu_vreg(vgpu, offset));
+		/* bypass WaCompressedResourceDisplayNewHashMode */
+		vgpu_vreg(vgpu, offset) = v & ~(1 << 15);
+		break;
+	case 0xe194:
+		/* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */
+		vgpu_vreg(vgpu, offset) = v & ~(1 << 8);
+		break;
+	case 0x7014:
+		/* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */
+		vgpu_vreg(vgpu, offset) = v & ~(1 << 13);
 		break;
 	default:
 		return -EINVAL;
@@ -1634,7 +1644,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
 		NULL, NULL);
-	MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL,
+		 skl_misc_ctl_write);
 	MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL);
@@ -2568,7 +2579,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x6e570, D_BDW_PLUS);
 	MMIO_D(0x65f10, D_BDW_PLUS);
 
-	MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL,
+		 skl_misc_ctl_write);
 	MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
index c6e7972ac21d..a5e11d89df2f 100644
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@@ -340,6 +340,9 @@ void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id)
 		} else
 			v = mmio->value;
 
+		if (mmio->in_context)
+			continue;
+
 		I915_WRITE(mmio->reg, v);
 		POSTING_READ(mmio->reg);
 
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index 79ba4b3440aa..f25ff133865f 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -129,9 +129,13 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
 	struct vgpu_sched_data *vgpu_data;
 	ktime_t cur_time;
 
-	/* no target to schedule */
-	if (!scheduler->next_vgpu)
+	/* no need to schedule if next_vgpu is the same with current_vgpu,
+	 * let scheduler chose next_vgpu again by setting it to NULL.
+	 */
+	if (scheduler->next_vgpu == scheduler->current_vgpu) {
+		scheduler->next_vgpu = NULL;
 		return;
+	}
 
 	/*
 	 * after the flag is set, workload dispatch thread will
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d689e511744e..4bd1467c17b1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -292,6 +292,8 @@ static int per_file_stats(int id, void *ptr, void *data)
 	struct file_stats *stats = data;
 	struct i915_vma *vma;
 
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
 	stats->count++;
 	stats->total += obj->base.size;
 	if (!obj->bind_count)
@@ -476,6 +478,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		struct drm_i915_gem_request *request;
 		struct task_struct *task;
 
+		mutex_lock(&dev->struct_mutex);
+
 		memset(&stats, 0, sizeof(stats));
 		stats.file_priv = file->driver_priv;
 		spin_lock(&file->table_lock);
@@ -487,7 +491,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		 * still alive (e.g. get_pid(current) => fork() => exit()).
 		 * Therefore, we need to protect this ->comm access using RCU.
 		 */
-		mutex_lock(&dev->struct_mutex);
 		request = list_first_entry_or_null(&file_priv->mm.request_list,
 						   struct drm_i915_gem_request,
 						   client_link);
@@ -497,6 +500,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 				PIDTYPE_PID);
 		print_file_stats(m, task ? task->comm : "<unknown>", stats);
 		rcu_read_unlock();
+
 		mutex_unlock(&dev->struct_mutex);
 	}
 	mutex_unlock(&dev->filelist_mutex);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3036d4835b0f..48428672fc6e 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1235,6 +1235,15 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto out_fini;
 
 	pci_set_drvdata(pdev, &dev_priv->drm);
+	/*
+	 * Disable the system suspend direct complete optimization, which can
+	 * leave the device suspended skipping the driver's suspend handlers
+	 * if the device was already runtime suspended. This is needed due to
+	 * the difference in our runtime and system suspend sequence and
+	 * becaue the HDA driver may require us to enable the audio power
+	 * domain during system suspend.
+	 */
+	pdev->dev_flags |= PCI_DEV_FLAGS_NEEDS_RESUME;
 
 	ret = i915_driver_init_early(dev_priv, ent);
 	if (ret < 0)
@@ -1272,10 +1281,6 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	dev_priv->ipc_enabled = false;
 
-	/* Everything is in place, we can now relax! */
-	DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
-		 driver.name, driver.major, driver.minor, driver.patchlevel,
-		 driver.date, pci_name(pdev), dev_priv->drm.primary->index);
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG))
 		DRM_INFO("DRM_I915_DEBUG enabled\n");
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c9b0949f6c1a..2c453a4e97d5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -562,7 +562,8 @@ struct intel_link_m_n {
 
 void intel_link_compute_m_n(int bpp, int nlanes,
 			    int pixel_clock, int link_clock,
-			    struct intel_link_m_n *m_n);
+			    struct intel_link_m_n *m_n,
+			    bool reduce_m_n);
 
 /* Interface history:
  *
@@ -2990,6 +2991,16 @@ static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
 	return false;
 }
 
+static inline bool
+intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv)
+{
+#ifdef CONFIG_INTEL_IOMMU
+	if (IS_BROXTON(dev_priv) && intel_iommu_gfx_mapped)
+		return true;
+#endif
+	return false;
+}
+
 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
 				int enable_ppgtt);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 532a577ff7a1..615f0a855222 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2285,8 +2285,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	struct page *page;
 	unsigned long last_pfn = 0;	/* suppress gcc warning */
 	unsigned int max_segment;
+	gfp_t noreclaim;
 	int ret;
-	gfp_t gfp;
 
 	/* Assert that the object is not currently in any GPU domain. As it
 	 * wasn't in the GTT, there shouldn't be any way it could have been in
@@ -2315,22 +2315,31 @@ rebuild_st:
 	 * Fail silently without starting the shrinker
 	 */
 	mapping = obj->base.filp->f_mapping;
-	gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
-	gfp |= __GFP_NORETRY | __GFP_NOWARN;
+	noreclaim = mapping_gfp_constraint(mapping,
+					   ~(__GFP_IO | __GFP_RECLAIM));
+	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
 	sg = st->sgl;
 	st->nents = 0;
 	for (i = 0; i < page_count; i++) {
-		page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-		if (unlikely(IS_ERR(page))) {
-			i915_gem_shrink(dev_priv,
-					page_count,
-					I915_SHRINK_BOUND |
-					I915_SHRINK_UNBOUND |
-					I915_SHRINK_PURGEABLE);
+		const unsigned int shrink[] = {
+			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
+			0,
+		}, *s = shrink;
+		gfp_t gfp = noreclaim;
+
+		do {
 			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-		}
-		if (unlikely(IS_ERR(page))) {
-			gfp_t reclaim;
+			if (likely(!IS_ERR(page)))
+				break;
+
+			if (!*s) {
+				ret = PTR_ERR(page);
+				goto err_sg;
+			}
+
+			i915_gem_shrink(dev_priv, 2 * page_count, *s++);
+			cond_resched();
 
 			/* We've tried hard to allocate the memory by reaping
 			 * our own buffer, now let the real VM do its job and
@@ -2340,15 +2349,26 @@ rebuild_st:
 			 * defer the oom here by reporting the ENOMEM back
 			 * to userspace.
 			 */
-			reclaim = mapping_gfp_mask(mapping);
-			reclaim |= __GFP_NORETRY; /* reclaim, but no oom */
-
-			page = shmem_read_mapping_page_gfp(mapping, i, reclaim);
-			if (IS_ERR(page)) {
-				ret = PTR_ERR(page);
-				goto err_sg;
+			if (!*s) {
+				/* reclaim and warn, but no oom */
+				gfp = mapping_gfp_mask(mapping);
+
+				/* Our bo are always dirty and so we require
+				 * kswapd to reclaim our pages (direct reclaim
+				 * does not effectively begin pageout of our
+				 * buffers on its own). However, direct reclaim
+				 * only waits for kswapd when under allocation
+				 * congestion. So as a result __GFP_RECLAIM is
+				 * unreliable and fails to actually reclaim our
+				 * dirty pages -- unless you try over and over
+				 * again with !__GFP_NORETRY. However, we still
+				 * want to fail this allocation rather than
+				 * trigger the out-of-memory killer and for
+				 * this we want the future __GFP_MAYFAIL.
+				 */
 			}
-		}
+		} while (1);
+
 		if (!i ||
 		    sg->length >= max_segment ||
 		    page_to_pfn(page) != last_pfn + 1) {
@@ -3298,6 +3318,10 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 {
 	int ret;
 
+	/* If the device is asleep, we have no requests outstanding */
+	if (!READ_ONCE(i915->gt.awake))
+		return 0;
+
 	if (flags & I915_WAIT_LOCKED) {
 		struct i915_gem_timeline *tl;
 
@@ -4218,6 +4242,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
 
 	mapping = obj->base.filp->f_mapping;
 	mapping_set_gfp_mask(mapping, mask);
+	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
 
 	i915_gem_object_init(obj, &i915_gem_object_ops);
 
@@ -4789,7 +4814,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
 					SLAB_HWCACHE_ALIGN |
 					SLAB_RECLAIM_ACCOUNT |
-					SLAB_DESTROY_BY_RCU);
+					SLAB_TYPESAFE_BY_RCU);
 	if (!dev_priv->requests)
 		goto err_vmas;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3e59c8ef27b..9ad13eeed904 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -546,11 +546,12 @@ repeat:
 }
 
 static int
-i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
+i915_gem_execbuffer_relocate_entry(struct i915_vma *vma,
 				   struct eb_vmas *eb,
 				   struct drm_i915_gem_relocation_entry *reloc,
 				   struct reloc_cache *cache)
 {
+	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct drm_gem_object *target_obj;
 	struct drm_i915_gem_object *target_i915_obj;
@@ -628,6 +629,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		return -EINVAL;
 	}
 
+	/*
+	 * If we write into the object, we need to force the synchronisation
+	 * barrier, either with an asynchronous clflush or if we executed the
+	 * patching using the GPU (though that should be serialised by the
+	 * timeline). To be completely sure, and since we are required to
+	 * do relocations we are already stalling, disable the user's opt
+	 * of our synchronisation.
+	 */
+	vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
+
 	ret = relocate_entry(obj, reloc, cache, target_offset);
 	if (ret)
 		return ret;
@@ -678,7 +689,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
 		do {
 			u64 offset = r->presumed_offset;
 
-			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);
+			ret = i915_gem_execbuffer_relocate_entry(vma, eb, r, &cache);
 			if (ret)
 				goto out;
 
@@ -726,7 +737,7 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
 
 	reloc_cache_init(&cache, eb->i915);
 	for (i = 0; i < entry->relocation_count; i++) {
-		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
+		ret = i915_gem_execbuffer_relocate_entry(vma, eb, &relocs[i], &cache);
 		if (ret)
 			break;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2aa6b97fd22f..f1989b8792dd 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -195,9 +195,12 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 	u32 pte_flags;
 	int ret;
 
-	ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size);
-	if (ret)
-		return ret;
+	if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
+		ret = vma->vm->allocate_va_range(vma->vm, vma->node.start,
+						 vma->size);
+		if (ret)
+			return ret;
+	}
 
 	vma->pages = vma->obj->mm.pages;
 
@@ -2188,6 +2191,101 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 		gen8_set_pte(&gtt_base[i], scratch_pte);
 }
 
+static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
+{
+	struct drm_i915_private *dev_priv = vm->i915;
+
+	/*
+	 * Make sure the internal GAM fifo has been cleared of all GTT
+	 * writes before exiting stop_machine(). This guarantees that
+	 * any aperture accesses waiting to start in another process
+	 * cannot back up behind the GTT writes causing a hang.
+	 * The register can be any arbitrary GAM register.
+	 */
+	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+}
+
+struct insert_page {
+	struct i915_address_space *vm;
+	dma_addr_t addr;
+	u64 offset;
+	enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
+{
+	struct insert_page *arg = _arg;
+
+	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
+					  dma_addr_t addr,
+					  u64 offset,
+					  enum i915_cache_level level,
+					  u32 unused)
+{
+	struct insert_page arg = { vm, addr, offset, level };
+
+	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
+}
+
+struct insert_entries {
+	struct i915_address_space *vm;
+	struct sg_table *st;
+	u64 start;
+	enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
+{
+	struct insert_entries *arg = _arg;
+
+	gen8_ggtt_insert_entries(arg->vm, arg->st, arg->start, arg->level, 0);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
+					     struct sg_table *st,
+					     u64 start,
+					     enum i915_cache_level level,
+					     u32 unused)
+{
+	struct insert_entries arg = { vm, st, start, level };
+
+	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
+}
+
+struct clear_range {
+	struct i915_address_space *vm;
+	u64 start;
+	u64 length;
+};
+
+static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
+{
+	struct clear_range *arg = _arg;
+
+	gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
+					  u64 start,
+					  u64 length)
+{
+	struct clear_range arg = { vm, start, length };
+
+	stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
+}
+
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 				  u64 start, u64 length)
 {
@@ -2306,10 +2404,11 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	if (flags & I915_VMA_LOCAL_BIND) {
 		struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
 
-		if (appgtt->base.allocate_va_range) {
+		if (!(vma->flags & I915_VMA_LOCAL_BIND) &&
+		    appgtt->base.allocate_va_range) {
 			ret = appgtt->base.allocate_va_range(&appgtt->base,
 							     vma->node.start,
-							     vma->node.size);
+							     vma->size);
 			if (ret)
 				goto err_pages;
 		}
@@ -2781,6 +2880,14 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 
 	ggtt->base.insert_entries = gen8_ggtt_insert_entries;
 
+	/* Serialize GTT updates with aperture access on BXT if VT-d is on. */
+	if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
+		ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
+		ggtt->base.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
+		if (ggtt->base.clear_range != nop_clear_range)
+			ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+	}
+
 	ggtt->invalidate = gen6_ggtt_invalidate;
 
 	return ggtt_probe_common(ggtt, size);
@@ -2993,7 +3100,8 @@ void i915_ggtt_enable_guc(struct drm_i915_private *i915)
 
 void i915_ggtt_disable_guc(struct drm_i915_private *i915)
 {
-	i915->ggtt.invalidate = gen6_ggtt_invalidate;
+	if (i915->ggtt.invalidate == guc_ggtt_invalidate)
+		i915->ggtt.invalidate = gen6_ggtt_invalidate;
 }
 
 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5ddbc9499775..a74d0ac737cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -623,7 +623,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * GPU processing the request, we never over-estimate the
 	 * position of the head.
 	 */
-	req->head = req->ring->tail;
+	req->head = req->ring->emit;
 
 	/* Check that we didn't interrupt ourselves with a new request */
 	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index a211c53c813f..a4a920c4c454 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -123,7 +123,7 @@ struct drm_i915_gem_request {
 	 * It is used by the driver to then queue the request for execution.
 	 */
 	struct i915_sw_fence submit;
-	wait_queue_t submitq;
+	wait_queue_entry_t submitq;
 	wait_queue_head_t execute;
 
 	/* A list of everyone we wait upon, and everyone who waits upon us.
@@ -521,7 +521,7 @@ static inline struct drm_i915_gem_request *
 __i915_gem_active_get_rcu(const struct i915_gem_active *active)
 {
 	/* Performing a lockless retrieval of the active request is super
-	 * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing
+	 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
 	 * slab of request objects will not be freed whilst we hold the
 	 * RCU read lock. It does not guarantee that the request itself
 	 * will not be freed and then *reused*. Viz,
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 129ed303a6c4..57d9f7f4ef15 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -59,9 +59,6 @@ static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock)
 		return;
 
 	mutex_unlock(&dev->struct_mutex);
-
-	/* expedite the RCU grace period to free some request slabs */
-	synchronize_rcu_expedited();
 }
 
 static bool any_vma_pinned(struct drm_i915_gem_object *obj)
@@ -274,8 +271,6 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 				I915_SHRINK_ACTIVE);
 	intel_runtime_pm_put(dev_priv);
 
-	synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */
-
 	return freed;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index a0d6d4317a49..fb5231f98c0d 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -278,7 +278,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 			obj->mm.quirked = false;
 		}
 		if (!i915_gem_object_is_tiled(obj)) {
-			GEM_BUG_ON(!obj->mm.quirked);
+			GEM_BUG_ON(obj->mm.quirked);
 			__i915_gem_object_pin_pages(obj);
 			obj->mm.quirked = true;
 		}
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 1642fff9cf13..ab5140ba108d 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
 	GEM_BUG_ON(freespace < wqi_size);
 
 	/* The GuC firmware wants the tail index in QWords, not bytes */
-	tail = rq->tail;
-	assert_ring_tail_valid(rq->ring, rq->tail);
-	tail >>= 3;
+	tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
 	GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
 
 	/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index fd97fe00cd0d..190f6aa5d15e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2953,7 +2953,6 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
 	u32 pipestat_mask;
 	u32 enable_mask;
 	enum pipe pipe;
-	u32 val;
 
 	pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV |
 			PIPE_CRC_DONE_INTERRUPT_STATUS;
@@ -2964,18 +2963,16 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
 
 	enable_mask = I915_DISPLAY_PORT_INTERRUPT |
 		I915_DISPLAY_PIPE_A_EVENT_INTERRUPT |
-		I915_DISPLAY_PIPE_B_EVENT_INTERRUPT;
+		I915_DISPLAY_PIPE_B_EVENT_INTERRUPT |
+		I915_LPE_PIPE_A_INTERRUPT |
+		I915_LPE_PIPE_B_INTERRUPT;
+
 	if (IS_CHERRYVIEW(dev_priv))
-		enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT;
+		enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT |
+			I915_LPE_PIPE_C_INTERRUPT;
 
 	WARN_ON(dev_priv->irq_mask != ~0);
 
-	val = (I915_LPE_PIPE_A_INTERRUPT |
-		I915_LPE_PIPE_B_INTERRUPT |
-		I915_LPE_PIPE_C_INTERRUPT);
-
-	enable_mask |= val;
-
 	dev_priv->irq_mask = ~enable_mask;
 
 	GEN5_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask);
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index f87b0c4e564d..1a78363c7f4a 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -208,7 +208,7 @@ static const struct intel_device_info intel_ironlake_d_info = {
 static const struct intel_device_info intel_ironlake_m_info = {
 	GEN5_FEATURES,
 	.platform = INTEL_IRONLAKE,
-	.is_mobile = 1,
+	.is_mobile = 1, .has_fbc = 1,
 };
 
 #define GEN6_FEATURES \
@@ -390,7 +390,6 @@ static const struct intel_device_info intel_skylake_gt3_info = {
 	.has_hw_contexts = 1, \
 	.has_logical_ring_contexts = 1, \
 	.has_guc = 1, \
-	.has_decoupled_mmio = 1, \
 	.has_aliasing_ppgtt = 1, \
 	.has_full_ppgtt = 1, \
 	.has_full_48bit_ppgtt = 1, \
diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h
index c0cb2974caac..2cfe96d3e5d1 100644
--- a/drivers/gpu/drm/i915/i915_pvinfo.h
+++ b/drivers/gpu/drm/i915/i915_pvinfo.h
@@ -36,10 +36,6 @@
 #define VGT_VERSION_MAJOR 1
 #define VGT_VERSION_MINOR 0
 
-#define INTEL_VGT_IF_VERSION_ENCODE(major, minor) ((major) << 16 | (minor))
-#define INTEL_VGT_IF_VERSION \
-	INTEL_VGT_IF_VERSION_ENCODE(VGT_VERSION_MAJOR, VGT_VERSION_MINOR)
-
 /*
  * notifications from guest to vgpu device model
  */
@@ -55,8 +51,8 @@ enum vgt_g2v_type {
 
 struct vgt_if {
 	u64 magic;		/* VGT_MAGIC */
-	uint16_t version_major;
-	uint16_t version_minor;
+	u16 version_major;
+	u16 version_minor;
 	u32 vgt_id;		/* ID of vGT instance */
 	u32 rsv1[12];		/* pad to offset 0x40 */
 	/*
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 11b12f412492..65b837e96fe6 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3051,10 +3051,14 @@ enum skl_disp_power_wells {
 #define CLKCFG_FSB_667					(3 << 0)	/* hrawclk 166 */
 #define CLKCFG_FSB_800					(2 << 0)	/* hrawclk 200 */
 #define CLKCFG_FSB_1067					(6 << 0)	/* hrawclk 266 */
+#define CLKCFG_FSB_1067_ALT				(0 << 0)	/* hrawclk 266 */
 #define CLKCFG_FSB_1333					(7 << 0)	/* hrawclk 333 */
-/* Note, below two are guess */
-#define CLKCFG_FSB_1600					(4 << 0)	/* hrawclk 400 */
-#define CLKCFG_FSB_1600_ALT				(0 << 0)	/* hrawclk 400 */
+/*
+ * Note that on at least on ELK the below value is reported for both
+ * 333 and 400 MHz BIOS FSB setting, but given that the gmch datasheet
+ * lists only 200/266/333 MHz FSB as supported let's decode it as 333 MHz.
+ */
+#define CLKCFG_FSB_1333_ALT				(4 << 0)	/* hrawclk 333 */
 #define CLKCFG_FSB_MASK					(7 << 0)
 #define CLKCFG_MEM_533					(1 << 4)
 #define CLKCFG_MEM_667					(2 << 4)
@@ -8276,7 +8280,7 @@ enum {
 
 /* MIPI DSI registers */
 
-#define _MIPI_PORT(port, a, c)	((port) ? c : a)	/* ports A and C only */
+#define _MIPI_PORT(port, a, c)	(((port) == PORT_A) ? a : c)	/* ports A and C only */
 #define _MMIO_MIPI(port, a, c)	_MMIO(_MIPI_PORT(port, a, c))
 
 #define MIPIO_TXESC_CLK_DIV1			_MMIO(0x160004)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index a277f8eb7beb..380de4360b8a 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -152,7 +152,7 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
 					struct list_head *continuation)
 {
 	wait_queue_head_t *x = &fence->wait;
-	wait_queue_t *pos, *next;
+	wait_queue_entry_t *pos, *next;
 	unsigned long flags;
 
 	debug_fence_deactivate(fence);
@@ -160,31 +160,30 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
 
 	/*
 	 * To prevent unbounded recursion as we traverse the graph of
-	 * i915_sw_fences, we move the task_list from this, the next ready
-	 * fence, to the tail of the original fence's task_list
+	 * i915_sw_fences, we move the entry list from this, the next ready
+	 * fence, to the tail of the original fence's entry list
 	 * (and so added to the list to be woken).
 	 */
 
 	spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation);
 	if (continuation) {
-		list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
+		list_for_each_entry_safe(pos, next, &x->head, entry) {
 			if (pos->func == autoremove_wake_function)
 				pos->func(pos, TASK_NORMAL, 0, continuation);
 			else
-				list_move_tail(&pos->task_list, continuation);
+				list_move_tail(&pos->entry, continuation);
 		}
 	} else {
 		LIST_HEAD(extra);
 
 		do {
-			list_for_each_entry_safe(pos, next,
-						 &x->task_list, task_list)
+			list_for_each_entry_safe(pos, next, &x->head, entry)
 				pos->func(pos, TASK_NORMAL, 0, &extra);
 
 			if (list_empty(&extra))
 				break;
 
-			list_splice_tail_init(&extra, &x->task_list);
+			list_splice_tail_init(&extra, &x->head);
 		} while (1);
 	}
 	spin_unlock_irqrestore(&x->lock, flags);
@@ -254,9 +253,9 @@ void i915_sw_fence_commit(struct i915_sw_fence *fence)
 	__i915_sw_fence_commit(fence);
 }
 
-static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
+static int i915_sw_fence_wake(wait_queue_entry_t *wq, unsigned mode, int flags, void *key)
 {
-	list_del(&wq->task_list);
+	list_del(&wq->entry);
 	__i915_sw_fence_complete(wq->private, key);
 	i915_sw_fence_put(wq->private);
 	if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
@@ -267,7 +266,7 @@ static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *
 static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 				    const struct i915_sw_fence * const signaler)
 {
-	wait_queue_t *wq;
+	wait_queue_entry_t *wq;
 
 	if (__test_and_set_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
 		return false;
@@ -275,7 +274,7 @@ static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 	if (fence == signaler)
 		return true;
 
-	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+	list_for_each_entry(wq, &fence->wait.head, entry) {
 		if (wq->func != i915_sw_fence_wake)
 			continue;
 
@@ -288,12 +287,12 @@ static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 
 static void __i915_sw_fence_clear_checked_bit(struct i915_sw_fence *fence)
 {
-	wait_queue_t *wq;
+	wait_queue_entry_t *wq;
 
 	if (!__test_and_clear_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
 		return;
 
-	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+	list_for_each_entry(wq, &fence->wait.head, entry) {
 		if (wq->func != i915_sw_fence_wake)
 			continue;
 
@@ -320,7 +319,7 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 
 static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 					  struct i915_sw_fence *signaler,
-					  wait_queue_t *wq, gfp_t gfp)
+					  wait_queue_entry_t *wq, gfp_t gfp)
 {
 	unsigned long flags;
 	int pending;
@@ -350,7 +349,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 		pending |= I915_SW_FENCE_FLAG_ALLOC;
 	}
 
-	INIT_LIST_HEAD(&wq->task_list);
+	INIT_LIST_HEAD(&wq->entry);
 	wq->flags = pending;
 	wq->func = i915_sw_fence_wake;
 	wq->private = i915_sw_fence_get(fence);
@@ -359,7 +358,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 
 	spin_lock_irqsave(&signaler->wait.lock, flags);
 	if (likely(!i915_sw_fence_done(signaler))) {
-		__add_wait_queue_tail(&signaler->wait, wq);
+		__add_wait_queue_entry_tail(&signaler->wait, wq);
 		pending = 1;
 	} else {
 		i915_sw_fence_wake(wq, 0, 0, NULL);
@@ -372,7 +371,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 
 int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 				 struct i915_sw_fence *signaler,
-				 wait_queue_t *wq)
+				 wait_queue_entry_t *wq)
 {
 	return __i915_sw_fence_await_sw_fence(fence, signaler, wq, 0);
 }
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index d31cefbbcc04..fd3c3bf6c8b7 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -66,7 +66,7 @@ void i915_sw_fence_commit(struct i915_sw_fence *fence);
 
 int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 				 struct i915_sw_fence *after,
-				 wait_queue_t *wq);
+				 wait_queue_entry_t *wq);
 int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
 				     struct i915_sw_fence *after,
 				     gfp_t gfp);
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 4ab8a973b61f..2e739018fb4c 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -60,8 +60,8 @@
  */
 void i915_check_vgpu(struct drm_i915_private *dev_priv)
 {
-	uint64_t magic;
-	uint32_t version;
+	u64 magic;
+	u16 version_major;
 
 	BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
 
@@ -69,10 +69,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
 	if (magic != VGT_MAGIC)
 		return;
 
-	version = INTEL_VGT_IF_VERSION_ENCODE(
-		__raw_i915_read16(dev_priv, vgtif_reg(version_major)),
-		__raw_i915_read16(dev_priv, vgtif_reg(version_minor)));
-	if (version != INTEL_VGT_IF_VERSION) {
+	version_major = __raw_i915_read16(dev_priv, vgtif_reg(version_major));
+	if (version_major < VGT_VERSION_MAJOR) {
 		DRM_INFO("VGT interface version mismatch!\n");
 		return;
 	}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 1aba47024656..f066e2d785f5 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -650,6 +650,11 @@ int i915_vma_unbind(struct i915_vma *vma)
 				break;
 		}
 
+		if (!ret) {
+			ret = i915_gem_active_retire(&vma->last_fence,
+						     &vma->vm->i915->drm.struct_mutex);
+		}
+
 		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c
index eb638a1e69d2..42fb436f6cdc 100644
--- a/drivers/gpu/drm/i915/intel_acpi.c
+++ b/drivers/gpu/drm/i915/intel_acpi.c
@@ -15,13 +15,9 @@ static struct intel_dsm_priv {
 	acpi_handle dhandle;
 } intel_dsm_priv;
 
-static const u8 intel_dsm_guid[] = {
-	0xd3, 0x73, 0xd8, 0x7e,
-	0xd0, 0xc2,
-	0x4f, 0x4e,
-	0xa8, 0x54,
-	0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c
-};
+static const guid_t intel_dsm_guid =
+	GUID_INIT(0x7ed873d3, 0xc2d0, 0x4e4f,
+		  0xa8, 0x54, 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c);
 
 static char *intel_dsm_port_name(u8 id)
 {
@@ -80,7 +76,7 @@ static void intel_dsm_platform_mux_info(void)
 	int i;
 	union acpi_object *pkg, *connector_count;
 
-	pkg = acpi_evaluate_dsm_typed(intel_dsm_priv.dhandle, intel_dsm_guid,
+	pkg = acpi_evaluate_dsm_typed(intel_dsm_priv.dhandle, &intel_dsm_guid,
 			INTEL_DSM_REVISION_ID, INTEL_DSM_FN_PLATFORM_MUX_INFO,
 			NULL, ACPI_TYPE_PACKAGE);
 	if (!pkg) {
@@ -118,7 +114,7 @@ static bool intel_dsm_pci_probe(struct pci_dev *pdev)
 	if (!dhandle)
 		return false;
 
-	if (!acpi_check_dsm(dhandle, intel_dsm_guid, INTEL_DSM_REVISION_ID,
+	if (!acpi_check_dsm(dhandle, &intel_dsm_guid, INTEL_DSM_REVISION_ID,
 			    1 << INTEL_DSM_FN_PLATFORM_MUX_INFO)) {
 		DRM_DEBUG_KMS("no _DSM method for intel device\n");
 		return false;
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index dd3ad52b7dfe..f29a226e24d8 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1798,13 +1798,11 @@ static int g4x_hrawclk(struct drm_i915_private *dev_priv)
 	case CLKCFG_FSB_800:
 		return 200000;
 	case CLKCFG_FSB_1067:
+	case CLKCFG_FSB_1067_ALT:
 		return 266667;
 	case CLKCFG_FSB_1333:
+	case CLKCFG_FSB_1333_ALT:
 		return 333333;
-	/* these two are just a guess; one of them might be right */
-	case CLKCFG_FSB_1600:
-	case CLKCFG_FSB_1600_ALT:
-		return 400000;
 	default:
 		return 133333;
 	}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3617927af269..9106ea32b048 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -120,7 +120,8 @@ static void intel_crtc_init_scalers(struct intel_crtc *crtc,
 static void skylake_pfit_enable(struct intel_crtc *crtc);
 static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force);
 static void ironlake_pfit_enable(struct intel_crtc *crtc);
-static void intel_modeset_setup_hw_state(struct drm_device *dev);
+static void intel_modeset_setup_hw_state(struct drm_device *dev,
+					 struct drm_modeset_acquire_ctx *ctx);
 static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc);
 
 struct intel_limit {
@@ -3449,7 +3450,7 @@ __intel_display_resume(struct drm_device *dev,
 	struct drm_crtc *crtc;
 	int i, ret;
 
-	intel_modeset_setup_hw_state(dev);
+	intel_modeset_setup_hw_state(dev, ctx);
 	i915_redisable_vga(to_i915(dev));
 
 	if (!state)
@@ -4598,7 +4599,7 @@ static void cpt_verify_modeset(struct drm_device *dev, int pipe)
 
 static int
 skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach,
-		  unsigned scaler_user, int *scaler_id, unsigned int rotation,
+		  unsigned int scaler_user, int *scaler_id,
 		  int src_w, int src_h, int dst_w, int dst_h)
 {
 	struct intel_crtc_scaler_state *scaler_state =
@@ -4607,9 +4608,12 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach,
 		to_intel_crtc(crtc_state->base.crtc);
 	int need_scaling;
 
-	need_scaling = drm_rotation_90_or_270(rotation) ?
-		(src_h != dst_w || src_w != dst_h):
-		(src_w != dst_w || src_h != dst_h);
+	/*
+	 * Src coordinates are already rotated by 270 degrees for
+	 * the 90/270 degree plane rotation cases (to match the
+	 * GTT mapping), hence no need to account for rotation here.
+	 */
+	need_scaling = src_w != dst_w || src_h != dst_h;
 
 	/*
 	 * if plane is being disabled or scaler is no more required or force detach
@@ -4671,7 +4675,7 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state)
 	const struct drm_display_mode *adjusted_mode = &state->base.adjusted_mode;
 
 	return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
-		&state->scaler_state.scaler_id, DRM_ROTATE_0,
+		&state->scaler_state.scaler_id,
 		state->pipe_src_w, state->pipe_src_h,
 		adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay);
 }
@@ -4700,7 +4704,6 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state,
 	ret = skl_update_scaler(crtc_state, force_detach,
 				drm_plane_index(&intel_plane->base),
 				&plane_state->scaler_id,
-				plane_state->base.rotation,
 				drm_rect_width(&plane_state->base.src) >> 16,
 				drm_rect_height(&plane_state->base.src) >> 16,
 				drm_rect_width(&plane_state->base.dst),
@@ -5823,7 +5826,8 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state,
 		intel_update_watermarks(intel_crtc);
 }
 
-static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
+static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
+					struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_encoder *encoder;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -5853,7 +5857,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
 		return;
 	}
 
-	state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
+	state->acquire_ctx = ctx;
 
 	/* Everything's already locked, -EDEADLK can't happen. */
 	crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
@@ -6101,7 +6105,7 @@ retry:
 	pipe_config->fdi_lanes = lane;
 
 	intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock,
-			       link_bw, &pipe_config->fdi_m_n);
+			       link_bw, &pipe_config->fdi_m_n, false);
 
 	ret = ironlake_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config);
 	if (ret == -EINVAL && pipe_config->pipe_bpp > 6*3) {
@@ -6277,7 +6281,8 @@ intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den)
 }
 
 static void compute_m_n(unsigned int m, unsigned int n,
-			uint32_t *ret_m, uint32_t *ret_n)
+			uint32_t *ret_m, uint32_t *ret_n,
+			bool reduce_m_n)
 {
 	/*
 	 * Reduce M/N as much as possible without loss in precision. Several DP
@@ -6285,9 +6290,11 @@ static void compute_m_n(unsigned int m, unsigned int n,
 	 * values. The passed in values are more likely to have the least
 	 * significant bits zero than M after rounding below, so do this first.
 	 */
-	while ((m & 1) == 0 && (n & 1) == 0) {
-		m >>= 1;
-		n >>= 1;
+	if (reduce_m_n) {
+		while ((m & 1) == 0 && (n & 1) == 0) {
+			m >>= 1;
+			n >>= 1;
+		}
 	}
 
 	*ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX);
@@ -6298,16 +6305,19 @@ static void compute_m_n(unsigned int m, unsigned int n,
 void
 intel_link_compute_m_n(int bits_per_pixel, int nlanes,
 		       int pixel_clock, int link_clock,
-		       struct intel_link_m_n *m_n)
+		       struct intel_link_m_n *m_n,
+		       bool reduce_m_n)
 {
 	m_n->tu = 64;
 
 	compute_m_n(bits_per_pixel * pixel_clock,
 		    link_clock * nlanes * 8,
-		    &m_n->gmch_m, &m_n->gmch_n);
+		    &m_n->gmch_m, &m_n->gmch_n,
+		    reduce_m_n);
 
 	compute_m_n(pixel_clock, link_clock,
-		    &m_n->link_m, &m_n->link_n);
+		    &m_n->link_m, &m_n->link_n,
+		    reduce_m_n);
 }
 
 static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv)
@@ -12197,6 +12207,15 @@ static void update_scanline_offset(struct intel_crtc *crtc)
 	 * type. For DP ports it behaves like most other platforms, but on HDMI
 	 * there's an extra 1 line difference. So we need to add two instead of
 	 * one to the value.
+	 *
+	 * On VLV/CHV DSI the scanline counter would appear to increment
+	 * approx. 1/3 of a scanline before start of vblank. Unfortunately
+	 * that means we can't tell whether we're in vblank or not while
+	 * we're on that particular line. We must still set scanline_offset
+	 * to 1 so that the vblank timestamps come out correct when we query
+	 * the scanline counter from within the vblank interrupt handler.
+	 * However if queried just before the start of vblank we'll get an
+	 * answer that's slightly in the future.
 	 */
 	if (IS_GEN2(dev_priv)) {
 		const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode;
@@ -15013,7 +15032,7 @@ int intel_modeset_init(struct drm_device *dev)
 	intel_setup_outputs(dev_priv);
 
 	drm_modeset_lock_all(dev);
-	intel_modeset_setup_hw_state(dev);
+	intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx);
 	drm_modeset_unlock_all(dev);
 
 	for_each_intel_crtc(dev, crtc) {
@@ -15050,13 +15069,13 @@ int intel_modeset_init(struct drm_device *dev)
 	return 0;
 }
 
-static void intel_enable_pipe_a(struct drm_device *dev)
+static void intel_enable_pipe_a(struct drm_device *dev,
+				struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 	struct drm_connector *crt = NULL;
 	struct intel_load_detect_pipe load_detect_temp;
-	struct drm_modeset_acquire_ctx *ctx = dev->mode_config.acquire_ctx;
 	int ret;
 
 	/* We can't just switch on the pipe A, we need to set things up with a
@@ -15128,7 +15147,8 @@ static bool has_pch_trancoder(struct drm_i915_private *dev_priv,
 		(HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A);
 }
 
-static void intel_sanitize_crtc(struct intel_crtc *crtc)
+static void intel_sanitize_crtc(struct intel_crtc *crtc,
+				struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -15174,7 +15194,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 		plane = crtc->plane;
 		crtc->base.primary->state->visible = true;
 		crtc->plane = !plane;
-		intel_crtc_disable_noatomic(&crtc->base);
+		intel_crtc_disable_noatomic(&crtc->base, ctx);
 		crtc->plane = plane;
 	}
 
@@ -15184,13 +15204,13 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 		 * resume. Force-enable the pipe to fix this, the update_dpms
 		 * call below we restore the pipe to the right state, but leave
 		 * the required bits on. */
-		intel_enable_pipe_a(dev);
+		intel_enable_pipe_a(dev, ctx);
 	}
 
 	/* Adjust the state of the output pipe according to whether we
 	 * have active connectors/encoders. */
 	if (crtc->active && !intel_crtc_has_encoders(crtc))
-		intel_crtc_disable_noatomic(&crtc->base);
+		intel_crtc_disable_noatomic(&crtc->base, ctx);
 
 	if (crtc->active || HAS_GMCH_DISPLAY(dev_priv)) {
 		/*
@@ -15488,7 +15508,8 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv)
  * and sanitizes it to the current state
  */
 static void
-intel_modeset_setup_hw_state(struct drm_device *dev)
+intel_modeset_setup_hw_state(struct drm_device *dev,
+			     struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	enum pipe pipe;
@@ -15508,7 +15529,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev)
 	for_each_pipe(dev_priv, pipe) {
 		crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
 
-		intel_sanitize_crtc(crtc);
+		intel_sanitize_crtc(crtc, ctx);
 		intel_dump_pipe_config(crtc, crtc->config,
 				       "[setup_hw_state]");
 	}
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index ee77b519835c..fc691b8b317c 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1507,37 +1507,6 @@ static void intel_dp_print_rates(struct intel_dp *intel_dp)
 	DRM_DEBUG_KMS("common rates: %s\n", str);
 }
 
-bool
-__intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc)
-{
-	u32 base = drm_dp_is_branch(intel_dp->dpcd) ? DP_BRANCH_OUI :
-						      DP_SINK_OUI;
-
-	return drm_dp_dpcd_read(&intel_dp->aux, base, desc, sizeof(*desc)) ==
-	       sizeof(*desc);
-}
-
-bool intel_dp_read_desc(struct intel_dp *intel_dp)
-{
-	struct intel_dp_desc *desc = &intel_dp->desc;
-	bool oui_sup = intel_dp->dpcd[DP_DOWN_STREAM_PORT_COUNT] &
-		       DP_OUI_SUPPORT;
-	int dev_id_len;
-
-	if (!__intel_dp_read_desc(intel_dp, desc))
-		return false;
-
-	dev_id_len = strnlen(desc->device_id, sizeof(desc->device_id));
-	DRM_DEBUG_KMS("DP %s: OUI %*phD%s dev-ID %*pE HW-rev %d.%d SW-rev %d.%d\n",
-		      drm_dp_is_branch(intel_dp->dpcd) ? "branch" : "sink",
-		      (int)sizeof(desc->oui), desc->oui, oui_sup ? "" : "(NS)",
-		      dev_id_len, desc->device_id,
-		      desc->hw_rev >> 4, desc->hw_rev & 0xf,
-		      desc->sw_major_rev, desc->sw_minor_rev);
-
-	return true;
-}
-
 static int rate_to_index(int find, const int *rates)
 {
 	int i = 0;
@@ -1624,6 +1593,8 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	int common_rates[DP_MAX_SUPPORTED_RATES] = {};
 	int common_len;
 	uint8_t link_bw, rate_select;
+	bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc,
+					   DP_DPCD_QUIRK_LIMITED_M_N);
 
 	common_len = intel_dp_common_rates(intel_dp, common_rates);
 
@@ -1753,7 +1724,8 @@ found:
 	intel_link_compute_m_n(bpp, lane_count,
 			       adjusted_mode->crtc_clock,
 			       pipe_config->port_clock,
-			       &pipe_config->dp_m_n);
+			       &pipe_config->dp_m_n,
+			       reduce_m_n);
 
 	if (intel_connector->panel.downclock_mode != NULL &&
 		dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) {
@@ -1761,7 +1733,8 @@ found:
 			intel_link_compute_m_n(bpp, lane_count,
 				intel_connector->panel.downclock_mode->clock,
 				pipe_config->port_clock,
-				&pipe_config->dp_m2_n2);
+				&pipe_config->dp_m2_n2,
+				reduce_m_n);
 	}
 
 	/*
@@ -3622,7 +3595,8 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
 	if (!intel_dp_read_dpcd(intel_dp))
 		return false;
 
-	intel_dp_read_desc(intel_dp);
+	drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc,
+			 drm_dp_is_branch(intel_dp->dpcd));
 
 	if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11)
 		dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] &
@@ -4624,7 +4598,8 @@ intel_dp_long_pulse(struct intel_connector *intel_connector)
 
 	intel_dp_print_rates(intel_dp);
 
-	intel_dp_read_desc(intel_dp);
+	drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc,
+			 drm_dp_is_branch(intel_dp->dpcd));
 
 	intel_dp_configure_mst(intel_dp);
 
diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
index 6532e226db29..40ba3134545e 100644
--- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
@@ -119,8 +119,6 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector,
 	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
 	struct intel_panel *panel = &connector->panel;
 
-	intel_dp_aux_enable_backlight(connector);
-
 	if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT)
 		panel->backlight.max = 0xFFFF;
 	else
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index c1f62eb07c07..989e25577ac0 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -44,6 +44,8 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	int lane_count, slots;
 	const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
 	int mst_pbn;
+	bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc,
+					   DP_DPCD_QUIRK_LIMITED_M_N);
 
 	pipe_config->has_pch_encoder = false;
 	bpp = 24;
@@ -75,7 +77,8 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	intel_link_compute_m_n(bpp, lane_count,
 			       adjusted_mode->crtc_clock,
 			       pipe_config->port_clock,
-			       &pipe_config->dp_m_n);
+			       &pipe_config->dp_m_n,
+			       reduce_m_n);
 
 	pipe_config->dp_m_n.tu = slots;
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index aaee3949a422..f630c7af5020 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -906,14 +906,6 @@ enum link_m_n_set {
 	M2_N2
 };
 
-struct intel_dp_desc {
-	u8 oui[3];
-	u8 device_id[6];
-	u8 hw_rev;
-	u8 sw_major_rev;
-	u8 sw_minor_rev;
-} __packed;
-
 struct intel_dp_compliance_data {
 	unsigned long edid;
 	uint8_t video_pattern;
@@ -957,7 +949,7 @@ struct intel_dp {
 	/* Max link BW for the sink as per DPCD registers */
 	int max_sink_link_bw;
 	/* sink or branch descriptor */
-	struct intel_dp_desc desc;
+	struct drm_dp_desc desc;
 	struct drm_dp_aux aux;
 	enum intel_display_power_domain aux_power_domain;
 	uint8_t train_set[4];
@@ -1532,9 +1524,6 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
 }
 
 bool intel_dp_read_dpcd(struct intel_dp *intel_dp);
-bool __intel_dp_read_desc(struct intel_dp *intel_dp,
-			  struct intel_dp_desc *desc);
-bool intel_dp_read_desc(struct intel_dp *intel_dp);
 int intel_dp_link_required(int pixel_clock, int bpp);
 int intel_dp_max_data_rate(int max_link_clock, int max_lanes);
 bool intel_digital_port_connected(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 3ffe8b1f1d48..fc0ef492252a 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -410,11 +410,10 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder)
 		val |= (ULPS_STATE_ENTER | DEVICE_READY);
 		I915_WRITE(MIPI_DEVICE_READY(port), val);
 
-		/* Wait for ULPS Not active */
+		/* Wait for ULPS active */
 		if (intel_wait_for_register(dev_priv,
-				MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE,
-				GLK_ULPS_NOT_ACTIVE, 20))
-			DRM_ERROR("ULPS is still active\n");
+				MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, 0, 20))
+			DRM_ERROR("ULPS not active\n");
 
 		/* Exit ULPS */
 		val = I915_READ(MIPI_DEVICE_READY(port));
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 854e8e0c836b..f94eacff196c 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1075,6 +1075,22 @@ int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
 	return 0;
 }
 
+static bool ring_is_idle(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	bool idle = true;
+
+	intel_runtime_pm_get(dev_priv);
+
+	/* No bit for gen2, so assume the CS parser is idle */
+	if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE))
+		idle = false;
+
+	intel_runtime_pm_put(dev_priv);
+
+	return idle;
+}
+
 /**
  * intel_engine_is_idle() - Report if the engine has finished process all work
  * @engine: the intel_engine_cs
@@ -1084,8 +1100,6 @@ int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
  */
 bool intel_engine_is_idle(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
 	/* Any inflight/incomplete requests? */
 	if (!i915_seqno_passed(intel_engine_get_seqno(engine),
 			       intel_engine_last_submit(engine)))
@@ -1100,7 +1114,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 		return false;
 
 	/* Ring stopped? */
-	if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE))
+	if (!ring_is_idle(engine))
 		return false;
 
 	return true;
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index ded2add18b26..d93c58410bff 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -82,20 +82,10 @@ static unsigned int get_crtc_fence_y_offset(struct intel_crtc *crtc)
 static void intel_fbc_get_plane_source_size(struct intel_fbc_state_cache *cache,
 					    int *width, int *height)
 {
-	int w, h;
-
-	if (drm_rotation_90_or_270(cache->plane.rotation)) {
-		w = cache->plane.src_h;
-		h = cache->plane.src_w;
-	} else {
-		w = cache->plane.src_w;
-		h = cache->plane.src_h;
-	}
-
 	if (width)
-		*width = w;
+		*width = cache->plane.src_w;
 	if (height)
-		*height = h;
+		*height = cache->plane.src_h;
 }
 
 static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv,
@@ -746,6 +736,11 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
 		cache->crtc.hsw_bdw_pixel_rate = crtc_state->pixel_rate;
 
 	cache->plane.rotation = plane_state->base.rotation;
+	/*
+	 * Src coordinates are already rotated by 270 degrees for
+	 * the 90/270 degree plane rotation cases (to match the
+	 * GTT mapping), hence no need to account for rotation here.
+	 */
 	cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16;
 	cache->plane.src_h = drm_rect_height(&plane_state->base.src) >> 16;
 	cache->plane.visible = plane_state->base.visible;
diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c
index 25d8e76489e4..292fedf30b00 100644
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@@ -63,6 +63,7 @@
 #include <linux/acpi.h>
 #include <linux/device.h>
 #include <linux/pci.h>
+#include <linux/pm_runtime.h>
 
 #include "i915_drv.h"
 #include <linux/delay.h>
@@ -121,6 +122,10 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
 
 	kfree(rsc);
 
+	pm_runtime_forbid(&platdev->dev);
+	pm_runtime_set_active(&platdev->dev);
+	pm_runtime_enable(&platdev->dev);
+
 	return platdev;
 
 err:
@@ -144,44 +149,10 @@ static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv)
 
 static void lpe_audio_irq_unmask(struct irq_data *d)
 {
-	struct drm_i915_private *dev_priv = d->chip_data;
-	unsigned long irqflags;
-	u32 val = (I915_LPE_PIPE_A_INTERRUPT |
-		I915_LPE_PIPE_B_INTERRUPT);
-
-	if (IS_CHERRYVIEW(dev_priv))
-		val |= I915_LPE_PIPE_C_INTERRUPT;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-
-	dev_priv->irq_mask &= ~val;
-	I915_WRITE(VLV_IIR, val);
-	I915_WRITE(VLV_IIR, val);
-	I915_WRITE(VLV_IMR, dev_priv->irq_mask);
-	POSTING_READ(VLV_IMR);
-
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
 static void lpe_audio_irq_mask(struct irq_data *d)
 {
-	struct drm_i915_private *dev_priv = d->chip_data;
-	unsigned long irqflags;
-	u32 val = (I915_LPE_PIPE_A_INTERRUPT |
-		I915_LPE_PIPE_B_INTERRUPT);
-
-	if (IS_CHERRYVIEW(dev_priv))
-		val |= I915_LPE_PIPE_C_INTERRUPT;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-
-	dev_priv->irq_mask |= val;
-	I915_WRITE(VLV_IMR, dev_priv->irq_mask);
-	I915_WRITE(VLV_IIR, val);
-	I915_WRITE(VLV_IIR, val);
-	POSTING_READ(VLV_IIR);
-
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
 static struct irq_chip lpe_audio_irqchip = {
@@ -325,8 +296,6 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
 
 	desc = irq_to_desc(dev_priv->lpe_audio.irq);
 
-	lpe_audio_irq_mask(&desc->irq_data);
-
 	lpe_audio_platdev_destroy(dev_priv);
 
 	irq_free_desc(dev_priv->lpe_audio.irq);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c8f7c631fc1f..62f44d3e7c43 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 		rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
 
-	assert_ring_tail_valid(rq->ring, rq->tail);
-	reg_state[CTX_RING_TAIL+1] = rq->tail;
+	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
 
 	/* True 32b PPGTT with dynamic page allocation: update PDP
 	 * registers and point the unallocated PDPs to scratch page.
@@ -1989,7 +1988,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 
 	ce->ring = ring;
 	ce->state = vma;
-	ce->initialised = engine->init_context == NULL;
+	ce->initialised |= engine->init_context == NULL;
 
 	return 0;
 
@@ -2036,8 +2035,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 			ce->state->obj->mm.dirty = true;
 			i915_gem_object_unpin_map(ce->state->obj);
 
-			ce->ring->head = ce->ring->tail = 0;
-			intel_ring_update_space(ce->ring);
+			intel_ring_reset(ce->ring, 0);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
index 71cbe9c08932..5abef482eacf 100644
--- a/drivers/gpu/drm/i915/intel_lspcon.c
+++ b/drivers/gpu/drm/i915/intel_lspcon.c
@@ -240,7 +240,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
 		return false;
 	}
 
-	intel_dp_read_desc(dp);
+	drm_dp_read_desc(&dp->aux, &dp->desc, drm_dp_is_branch(dp->dpcd));
 
 	DRM_DEBUG_KMS("Success: LSPCON init\n");
 	return true;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 570bd603f401..078fd1bfa5ea 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3373,20 +3373,26 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
 
 	/* n.b., src is 16.16 fixed point, dst is whole integer */
 	if (plane->id == PLANE_CURSOR) {
+		/*
+		 * Cursors only support 0/180 degree rotation,
+		 * hence no need to account for rotation here.
+		 */
 		src_w = pstate->base.src_w;
 		src_h = pstate->base.src_h;
 		dst_w = pstate->base.crtc_w;
 		dst_h = pstate->base.crtc_h;
 	} else {
+		/*
+		 * Src coordinates are already rotated by 270 degrees for
+		 * the 90/270 degree plane rotation cases (to match the
+		 * GTT mapping), hence no need to account for rotation here.
+		 */
 		src_w = drm_rect_width(&pstate->base.src);
 		src_h = drm_rect_height(&pstate->base.src);
 		dst_w = drm_rect_width(&pstate->base.dst);
 		dst_h = drm_rect_height(&pstate->base.dst);
 	}
 
-	if (drm_rotation_90_or_270(pstate->base.rotation))
-		swap(dst_w, dst_h);
-
 	downscale_h = max(src_h / dst_h, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
 	downscale_w = max(src_w / dst_w, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
 
@@ -3417,12 +3423,14 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
 	if (y && format != DRM_FORMAT_NV12)
 		return 0;
 
+	/*
+	 * Src coordinates are already rotated by 270 degrees for
+	 * the 90/270 degree plane rotation cases (to match the
+	 * GTT mapping), hence no need to account for rotation here.
+	 */
 	width = drm_rect_width(&intel_pstate->base.src) >> 16;
 	height = drm_rect_height(&intel_pstate->base.src) >> 16;
 
-	if (drm_rotation_90_or_270(pstate->rotation))
-		swap(width, height);
-
 	/* for planar format */
 	if (format == DRM_FORMAT_NV12) {
 		if (y)  /* y-plane data rate */
@@ -3505,12 +3513,14 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate,
 	    fb->modifier != I915_FORMAT_MOD_Yf_TILED)
 		return 8;
 
+	/*
+	 * Src coordinates are already rotated by 270 degrees for
+	 * the 90/270 degree plane rotation cases (to match the
+	 * GTT mapping), hence no need to account for rotation here.
+	 */
 	src_w = drm_rect_width(&intel_pstate->base.src) >> 16;
 	src_h = drm_rect_height(&intel_pstate->base.src) >> 16;
 
-	if (drm_rotation_90_or_270(pstate->rotation))
-		swap(src_w, src_h);
-
 	/* Halve UV plane width and height for NV12 */
 	if (fb->format->format == DRM_FORMAT_NV12 && !y) {
 		src_w /= 2;
@@ -3794,13 +3804,15 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 		width = intel_pstate->base.crtc_w;
 		height = intel_pstate->base.crtc_h;
 	} else {
+		/*
+		 * Src coordinates are already rotated by 270 degrees for
+		 * the 90/270 degree plane rotation cases (to match the
+		 * GTT mapping), hence no need to account for rotation here.
+		 */
 		width = drm_rect_width(&intel_pstate->base.src) >> 16;
 		height = drm_rect_height(&intel_pstate->base.src) >> 16;
 	}
 
-	if (drm_rotation_90_or_270(pstate->rotation))
-		swap(width, height);
-
 	cpp = fb->format->cpp[0];
 	plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate);
 
@@ -4335,11 +4347,19 @@ skl_compute_wm(struct drm_atomic_state *state)
 	struct drm_crtc_state *cstate;
 	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 	struct skl_wm_values *results = &intel_state->wm_results;
+	struct drm_device *dev = state->dev;
 	struct skl_pipe_wm *pipe_wm;
 	bool changed = false;
 	int ret, i;
 
 	/*
+	 * When we distrust bios wm we always need to recompute to set the
+	 * expected DDB allocations for each CRTC.
+	 */
+	if (to_i915(dev)->wm.distrust_bios_wm)
+		changed = true;
+
+	/*
 	 * If this transaction isn't actually touching any CRTC's, don't
 	 * bother with watermark calculation.  Note that if we pass this
 	 * test, we're guaranteed to hold at least one CRTC state mutex,
@@ -4349,6 +4369,7 @@ skl_compute_wm(struct drm_atomic_state *state)
 	 */
 	for_each_new_crtc_in_state(state, crtc, cstate, i)
 		changed = true;
+
 	if (!changed)
 		return 0;
 
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index c3780d0d2baf..559f1ab42bfc 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -435,8 +435,9 @@ static bool intel_psr_match_conditions(struct intel_dp *intel_dp)
 	}
 
 	/* PSR2 is restricted to work with panel resolutions upto 3200x2000 */
-	if (intel_crtc->config->pipe_src_w > 3200 ||
-				intel_crtc->config->pipe_src_h > 2000) {
+	if (dev_priv->psr.psr2_support &&
+	    (intel_crtc->config->pipe_src_w > 3200 ||
+	     intel_crtc->config->pipe_src_h > 2000)) {
 		dev_priv->psr.psr2_support = false;
 		return false;
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 66a2b8b83972..513a0f4b469b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size)
 
 void intel_ring_update_space(struct intel_ring *ring)
 {
-	ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
+	ring->space = __intel_ring_space(ring->head, ring->emit, ring->size);
 }
 
 static int
@@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
 
 	i915_gem_request_submit(request);
 
-	assert_ring_tail_valid(request->ring, request->tail);
-	I915_WRITE_TAIL(request->engine, request->tail);
+	I915_WRITE_TAIL(request->engine,
+			intel_ring_set_tail(request->ring, request->tail));
 }
 
 static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
@@ -1316,11 +1316,23 @@ err:
 	return PTR_ERR(addr);
 }
 
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+	GEM_BUG_ON(!list_empty(&ring->request_list));
+	ring->tail = tail;
+	ring->head = tail;
+	ring->emit = tail;
+	intel_ring_update_space(ring);
+}
+
 void intel_ring_unpin(struct intel_ring *ring)
 {
 	GEM_BUG_ON(!ring->vma);
 	GEM_BUG_ON(!ring->vaddr);
 
+	/* Discard any unused bytes beyond that submitted to hw. */
+	intel_ring_reset(ring, ring->tail);
+
 	if (i915_vma_is_map_and_fenceable(ring->vma))
 		i915_vma_unpin_iomap(ring->vma);
 	else
@@ -1562,8 +1574,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	/* Restart from the beginning of the rings for convenience */
 	for_each_engine(engine, dev_priv, id)
-		engine->buffer->head = engine->buffer->tail;
+		intel_ring_reset(engine->buffer, 0);
 }
 
 static int ring_request_alloc(struct drm_i915_gem_request *request)
@@ -1616,7 +1629,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 		unsigned space;
 
 		/* Would completion of this request free enough space? */
-		space = __intel_ring_space(target->postfix, ring->tail,
+		space = __intel_ring_space(target->postfix, ring->emit,
 					   ring->size);
 		if (space >= bytes)
 			break;
@@ -1641,8 +1654,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 {
 	struct intel_ring *ring = req->ring;
-	int remain_actual = ring->size - ring->tail;
-	int remain_usable = ring->effective_size - ring->tail;
+	int remain_actual = ring->size - ring->emit;
+	int remain_usable = ring->effective_size - ring->emit;
 	int bytes = num_dwords * sizeof(u32);
 	int total_bytes, wait_bytes;
 	bool need_wrap = false;
@@ -1678,17 +1691,17 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 
 	if (unlikely(need_wrap)) {
 		GEM_BUG_ON(remain_actual > ring->space);
-		GEM_BUG_ON(ring->tail + remain_actual > ring->size);
+		GEM_BUG_ON(ring->emit + remain_actual > ring->size);
 
 		/* Fill the tail with MI_NOOP */
-		memset(ring->vaddr + ring->tail, 0, remain_actual);
-		ring->tail = 0;
+		memset(ring->vaddr + ring->emit, 0, remain_actual);
+		ring->emit = 0;
 		ring->space -= remain_actual;
 	}
 
-	GEM_BUG_ON(ring->tail > ring->size - bytes);
-	cs = ring->vaddr + ring->tail;
-	ring->tail += bytes;
+	GEM_BUG_ON(ring->emit > ring->size - bytes);
+	cs = ring->vaddr + ring->emit;
+	ring->emit += bytes;
 	ring->space -= bytes;
 	GEM_BUG_ON(ring->space < 0);
 
@@ -1699,7 +1712,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
 {
 	int num_dwords =
-		(req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
+		(req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
 	u32 *cs;
 
 	if (num_dwords == 0)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a82a0807f64d..f7144fe09613 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -145,6 +145,7 @@ struct intel_ring {
 
 	u32 head;
 	u32 tail;
+	u32 emit;
 
 	int space;
 	int size;
@@ -488,6 +489,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 struct intel_ring *
 intel_engine_create_ring(struct intel_engine_cs *engine, int size);
 int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+void intel_ring_update_space(struct intel_ring *ring);
 void intel_ring_unpin(struct intel_ring *ring);
 void intel_ring_free(struct intel_ring *ring);
 
@@ -511,7 +514,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
 	 * reserved for the command packet (i.e. the value passed to
 	 * intel_ring_begin()).
 	 */
-	GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
+	GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
 }
 
 static inline u32
@@ -540,7 +543,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
 	GEM_BUG_ON(tail >= ring->size);
 }
 
-void intel_ring_update_space(struct intel_ring *ring);
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+	/* Whilst writes to the tail are strictly order, there is no
+	 * serialisation between readers and the writers. The tail may be
+	 * read by i915_gem_request_retire() just as it is being updated
+	 * by execlists, as although the breadcrumb is complete, the context
+	 * switch hasn't been seen.
+	 */
+	assert_ring_tail_valid(ring, tail);
+	ring->tail = tail;
+	return tail;
+}
 
 void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index f7d431427115..e6517edcd16b 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -83,10 +83,13 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode,
  */
 void intel_pipe_update_start(struct intel_crtc *crtc)
 {
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode;
 	long timeout = msecs_to_jiffies_timeout(1);
 	int scanline, min, max, vblank_start;
 	wait_queue_head_t *wq = drm_crtc_vblank_waitqueue(&crtc->base);
+	bool need_vlv_dsi_wa = (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
+		intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI);
 	DEFINE_WAIT(wait);
 
 	vblank_start = adjusted_mode->crtc_vblank_start;
@@ -139,6 +142,24 @@ void intel_pipe_update_start(struct intel_crtc *crtc)
 
 	drm_crtc_vblank_put(&crtc->base);
 
+	/*
+	 * On VLV/CHV DSI the scanline counter would appear to
+	 * increment approx. 1/3 of a scanline before start of vblank.
+	 * The registers still get latched at start of vblank however.
+	 * This means we must not write any registers on the first
+	 * line of vblank (since not the whole line is actually in
+	 * vblank). And unfortunately we can't use the interrupt to
+	 * wait here since it will fire too soon. We could use the
+	 * frame start interrupt instead since it will fire after the
+	 * critical scanline, but that would require more changes
+	 * in the interrupt code. So for now we'll just do the nasty
+	 * thing and poll for the bad scanline to pass us by.
+	 *
+	 * FIXME figure out if BXT+ DSI suffers from this as well
+	 */
+	while (need_vlv_dsi_wa && scanline == vblank_start)
+		scanline = intel_get_crtc_scanline(crtc);
+
 	crtc->debug.scanline_start = scanline;
 	crtc->debug.start_vbl_time = ktime_get();
 	crtc->debug.start_vbl_count = intel_crtc_get_vblank_counter(crtc);
@@ -198,12 +219,15 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work
 			  ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time),
 			  crtc->debug.min_vbl, crtc->debug.max_vbl,
 			  crtc->debug.scanline_start, scanline_end);
-	} else if (ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time) >
-		   VBLANK_EVASION_TIME_US)
+	}
+#ifdef CONFIG_DRM_I915_DEBUG_VBLANK_EVADE
+	else if (ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time) >
+		 VBLANK_EVASION_TIME_US)
 		DRM_WARN("Atomic update on pipe (%c) took %lld us, max time under evasion is %u us\n",
 			 pipe_name(pipe),
 			 ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time),
 			 VBLANK_EVASION_TIME_US);
+#endif
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 4b7f73aeddac..f84115261ae7 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -59,8 +59,6 @@ struct drm_i915_gem_request;
  *                available in the work queue (note, the queue is shared,
  *                not per-engine). It is OK for this to be nonzero, but
  *                it should not be huge!
- *   q_fail: failed to enqueue a work item. This should never happen,
- *           because we check for space beforehand.
  *   b_fail: failed to ring the doorbell. This should never happen, unless
  *           somehow the hardware misbehaves, or maybe if the GuC firmware
  *           crashes? We probably need to reset the GPU to recover.
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 1afb8b06e3e1..12b85b3278cd 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -320,7 +320,7 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
 static int igt_ctx_exec(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
+	struct drm_i915_gem_object *obj = NULL;
 	struct drm_file *file;
 	IGT_TIMEOUT(end_time);
 	LIST_HEAD(objects);
@@ -359,7 +359,7 @@ static int igt_ctx_exec(void *arg)
 		}
 
 		for_each_engine(engine, i915, id) {
-			if (dw == 0) {
+			if (!obj) {
 				obj = create_test_object(ctx, file, &objects);
 				if (IS_ERR(obj)) {
 					err = PTR_ERR(obj);
@@ -376,8 +376,10 @@ static int igt_ctx_exec(void *arg)
 				goto out_unlock;
 			}
 
-			if (++dw == max_dwords(obj))
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
 				dw = 0;
+			}
 			ndwords++;
 		}
 		ncontexts++;
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 6a8258eacdcb..9f24c5da3f8d 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -174,7 +174,7 @@ struct drm_i915_private *mock_gem_device(void)
 	i915->requests = KMEM_CACHE(mock_request,
 				    SLAB_HWCACHE_ALIGN |
 				    SLAB_RECLAIM_ACCOUNT |
-				    SLAB_DESTROY_BY_RCU);
+				    SLAB_TYPESAFE_BY_RCU);
 	if (!i915->requests)
 		goto err_vmas;
 
diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index 8fb801fab039..8b05ecb8fdef 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -673,7 +673,7 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
 		ret = drm_of_find_panel_or_bridge(child,
 						  imx_ldb->lvds_mux ? 4 : 2, 0,
 						  &channel->panel, &channel->bridge);
-		if (ret)
+		if (ret && ret != -ENODEV)
 			return ret;
 
 		/* panel ddc only if there is no bridge */
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index 808b995a990f..b5cc6e12334c 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -19,6 +19,7 @@
 #include <drm/drm_of.h>
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/iopoll.h>
 #include <linux/irq.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
@@ -900,16 +901,12 @@ static int mtk_dsi_host_detach(struct mipi_dsi_host *host,
 
 static void mtk_dsi_wait_for_idle(struct mtk_dsi *dsi)
 {
-	u32 timeout_ms = 500000; /* total 1s ~ 2s timeout */
-
-	while (timeout_ms--) {
-		if (!(readl(dsi->regs + DSI_INTSTA) & DSI_BUSY))
-			break;
-
-		usleep_range(2, 4);
-	}
+	int ret;
+	u32 val;
 
-	if (timeout_ms == 0) {
+	ret = readl_poll_timeout(dsi->regs + DSI_INTSTA, val, !(val & DSI_BUSY),
+				 4, 2000000);
+	if (ret) {
 		DRM_WARN("polling dsi wait not busy timeout!\n");
 
 		mtk_dsi_enable(dsi);
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index 41a1c03b0347..0a4ffd724146 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -1062,7 +1062,7 @@ static int mtk_hdmi_setup_vendor_specific_infoframe(struct mtk_hdmi *hdmi,
 	}
 
 	err = hdmi_vendor_infoframe_pack(&frame, buffer, sizeof(buffer));
-	if (err) {
+	if (err < 0) {
 		dev_err(hdmi->dev, "Failed to pack vendor infoframe: %zd\n",
 			err);
 		return err;
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index 75382f5f0fce..10b227d83e9a 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -152,7 +152,7 @@ static struct regmap_config meson_regmap_config = {
 	.max_register   = 0x1000,
 };
 
-static int meson_drv_bind(struct device *dev)
+static int meson_drv_bind_master(struct device *dev, bool has_components)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct meson_drm *priv;
@@ -233,10 +233,12 @@ static int meson_drv_bind(struct device *dev)
 	if (ret)
 		goto free_drm;
 
-	ret = component_bind_all(drm->dev, drm);
-	if (ret) {
-		dev_err(drm->dev, "Couldn't bind all components\n");
-		goto free_drm;
+	if (has_components) {
+		ret = component_bind_all(drm->dev, drm);
+		if (ret) {
+			dev_err(drm->dev, "Couldn't bind all components\n");
+			goto free_drm;
+		}
 	}
 
 	ret = meson_plane_create(priv);
@@ -276,6 +278,11 @@ free_drm:
 	return ret;
 }
 
+static int meson_drv_bind(struct device *dev)
+{
+	return meson_drv_bind_master(dev, true);
+}
+
 static void meson_drv_unbind(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
@@ -357,6 +364,9 @@ static int meson_drv_probe(struct platform_device *pdev)
 		count += meson_probe_remote(pdev, &match, np, remote);
 	}
 
+	if (count && !match)
+		return meson_drv_bind_master(&pdev->dev, false);
+
 	/* If some endpoints were found, initialize the nodes */
 	if (count) {
 		dev_info(&pdev->dev, "Queued %d outputs on vpu\n", count);
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index adb411a078e8..f4b53588e071 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -1173,7 +1173,10 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc,
 
 
 	if (IS_G200_SE(mdev)) {
-		if (mdev->unique_rev_id >= 0x02) {
+		if  (mdev->unique_rev_id >= 0x04) {
+			WREG8(MGAREG_CRTCEXT_INDEX, 0x06);
+			WREG8(MGAREG_CRTCEXT_DATA, 0);
+		} else if (mdev->unique_rev_id >= 0x02) {
 			u8 hi_pri_lvl;
 			u32 bpp;
 			u32 mb;
@@ -1639,6 +1642,10 @@ static int mga_vga_mode_valid(struct drm_connector *connector,
 			if (mga_vga_calculate_mode_bandwidth(mode, bpp)
 				> (30100 * 1024))
 				return MODE_BANDWIDTH;
+		} else {
+			if (mga_vga_calculate_mode_bandwidth(mode, bpp)
+				> (55000 * 1024))
+				return MODE_BANDWIDTH;
 		}
 	} else if (mdev->type == G200_WB) {
 		if (mode->hdisplay > 1280)
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 5b8e23d051f2..0a31cd6d01ce 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -13,6 +13,7 @@ config DRM_MSM
 	select QCOM_SCM
 	select SND_SOC_HDMI_CODEC if SND_SOC
 	select SYNC_FILE
+	select PM_OPP
 	default y
 	help
 	  DRM/KMS driver for MSM/snapdragon.
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c
index f8f48d014978..9c34d7824988 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c
@@ -116,7 +116,7 @@ static int mdss_hw_irqdomain_map(struct irq_domain *d, unsigned int irq,
 	return 0;
 }
 
-static struct irq_domain_ops mdss_hw_irqdomain_ops = {
+static const struct irq_domain_ops mdss_hw_irqdomain_ops = {
 	.map = mdss_hw_irqdomain_map,
 	.xlate = irq_domain_xlate_onecell,
 };
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
index a38c5fe6cc19..7d3741215387 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
@@ -225,9 +225,10 @@ mdp5_plane_duplicate_state(struct drm_plane *plane)
 
 	mdp5_state = kmemdup(to_mdp5_plane_state(plane->state),
 			sizeof(*mdp5_state), GFP_KERNEL);
+	if (!mdp5_state)
+		return NULL;
 
-	if (mdp5_state && mdp5_state->base.fb)
-		drm_framebuffer_reference(mdp5_state->base.fb);
+	__drm_atomic_helper_plane_duplicate_state(plane, &mdp5_state->base);
 
 	return &mdp5_state->base;
 }
@@ -444,6 +445,10 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state,
 			mdp5_pipe_release(state->state, old_hwpipe);
 			mdp5_pipe_release(state->state, old_right_hwpipe);
 		}
+	} else {
+		mdp5_pipe_release(state->state, mdp5_state->hwpipe);
+		mdp5_pipe_release(state->state, mdp5_state->r_hwpipe);
+		mdp5_state->hwpipe = mdp5_state->r_hwpipe = NULL;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 87b5695d4034..9d498eb81906 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -830,6 +830,7 @@ static struct drm_driver msm_driver = {
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_export   = drm_gem_prime_export,
 	.gem_prime_import   = drm_gem_prime_import,
+	.gem_prime_res_obj  = msm_gem_prime_res_obj,
 	.gem_prime_pin      = msm_gem_prime_pin,
 	.gem_prime_unpin    = msm_gem_prime_unpin,
 	.gem_prime_get_sg_table = msm_gem_prime_get_sg_table,
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 28b6f9ba5066..1b26ca626528 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -224,6 +224,7 @@ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj);
 void *msm_gem_prime_vmap(struct drm_gem_object *obj);
 void msm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+struct reservation_object *msm_gem_prime_res_obj(struct drm_gem_object *obj);
 struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev,
 		struct dma_buf_attachment *attach, struct sg_table *sg);
 int msm_gem_prime_pin(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c
index 3f299c537b77..a2f89bac9c16 100644
--- a/drivers/gpu/drm/msm/msm_fence.c
+++ b/drivers/gpu/drm/msm/msm_fence.c
@@ -99,8 +99,8 @@ void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence)
 }
 
 struct msm_fence {
-	struct msm_fence_context *fctx;
 	struct dma_fence base;
+	struct msm_fence_context *fctx;
 };
 
 static inline struct msm_fence *to_msm_fence(struct dma_fence *fence)
@@ -130,19 +130,13 @@ static bool msm_fence_signaled(struct dma_fence *fence)
 	return fence_completed(f->fctx, f->base.seqno);
 }
 
-static void msm_fence_release(struct dma_fence *fence)
-{
-	struct msm_fence *f = to_msm_fence(fence);
-	kfree_rcu(f, base.rcu);
-}
-
 static const struct dma_fence_ops msm_fence_ops = {
 	.get_driver_name = msm_fence_get_driver_name,
 	.get_timeline_name = msm_fence_get_timeline_name,
 	.enable_signaling = msm_fence_enable_signaling,
 	.signaled = msm_fence_signaled,
 	.wait = dma_fence_default_wait,
-	.release = msm_fence_release,
+	.release = dma_fence_free,
 };
 
 struct dma_fence *
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 68e509b3b9e4..50289a23baf8 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -758,6 +758,8 @@ static int msm_gem_new_impl(struct drm_device *dev,
 	struct msm_gem_object *msm_obj;
 	bool use_vram = false;
 
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
 	switch (flags & MSM_BO_CACHE_MASK) {
 	case MSM_BO_UNCACHED:
 	case MSM_BO_CACHED:
@@ -853,7 +855,11 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev,
 
 	size = PAGE_ALIGN(dmabuf->size);
 
+	/* Take mutex so we can modify the inactive list in msm_gem_new_impl */
+	mutex_lock(&dev->struct_mutex);
 	ret = msm_gem_new_impl(dev, size, MSM_BO_WC, dmabuf->resv, &obj);
+	mutex_unlock(&dev->struct_mutex);
+
 	if (ret)
 		goto fail;
 
diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c
index 60bb290700ce..13403c6da6c7 100644
--- a/drivers/gpu/drm/msm/msm_gem_prime.c
+++ b/drivers/gpu/drm/msm/msm_gem_prime.c
@@ -70,3 +70,10 @@ void msm_gem_prime_unpin(struct drm_gem_object *obj)
 	if (!obj->import_attach)
 		msm_gem_put_pages(obj);
 }
+
+struct reservation_object *msm_gem_prime_res_obj(struct drm_gem_object *obj)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+	return msm_obj->resv;
+}
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 1c545ebe6a5a..7832e6421d25 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -410,12 +410,11 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		if (!in_fence)
 			return -EINVAL;
 
-		/* TODO if we get an array-fence due to userspace merging multiple
-		 * fences, we need a way to determine if all the backing fences
-		 * are from our own context..
+		/*
+		 * Wait if the fence is from a foreign context, or if the fence
+		 * array contains any fence from a foreign context.
 		 */
-
-		if (in_fence->context != gpu->fctx->context) {
+		if (!dma_fence_match_context(in_fence, gpu->fctx->context)) {
 			ret = dma_fence_wait(in_fence, true);
 			if (ret)
 				return ret;
@@ -496,8 +495,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 			goto out;
 		}
 
-		if ((submit_cmd.size + submit_cmd.submit_offset) >=
-				msm_obj->base.size) {
+		if (!submit_cmd.size ||
+			((submit_cmd.size + submit_cmd.submit_offset) >
+				msm_obj->base.size)) {
 			DRM_ERROR("invalid cmdstream size: %u\n", submit_cmd.size);
 			ret = -EINVAL;
 			goto out;
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 97b9c38c6b3f..0fdc88d79ca8 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -549,9 +549,9 @@ static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu)
 		gpu->grp_clks[i] = get_clock(dev, name);
 
 		/* Remember the key clocks that we need to control later */
-		if (!strcmp(name, "core"))
+		if (!strcmp(name, "core") || !strcmp(name, "core_clk"))
 			gpu->core_clk = gpu->grp_clks[i];
-		else if (!strcmp(name, "rbbmtimer"))
+		else if (!strcmp(name, "rbbmtimer") || !strcmp(name, "rbbmtimer_clk"))
 			gpu->rbbmtimer_clk = gpu->grp_clks[i];
 
 		++i;
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
index 1144e0c9e894..0abe77675b76 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
@@ -35,6 +35,13 @@
 #include "mxsfb_drv.h"
 #include "mxsfb_regs.h"
 
+#define MXS_SET_ADDR		0x4
+#define MXS_CLR_ADDR		0x8
+#define MODULE_CLKGATE		BIT(30)
+#define MODULE_SFTRST		BIT(31)
+/* 1 second delay should be plenty of time for block reset */
+#define RESET_TIMEOUT		1000000
+
 static u32 set_hsync_pulse_width(struct mxsfb_drm_private *mxsfb, u32 val)
 {
 	return (val & mxsfb->devdata->hs_wdth_mask) <<
@@ -159,6 +166,36 @@ static void mxsfb_disable_controller(struct mxsfb_drm_private *mxsfb)
 		clk_disable_unprepare(mxsfb->clk_disp_axi);
 }
 
+/*
+ * Clear the bit and poll it cleared.  This is usually called with
+ * a reset address and mask being either SFTRST(bit 31) or CLKGATE
+ * (bit 30).
+ */
+static int clear_poll_bit(void __iomem *addr, u32 mask)
+{
+	u32 reg;
+
+	writel(mask, addr + MXS_CLR_ADDR);
+	return readl_poll_timeout(addr, reg, !(reg & mask), 0, RESET_TIMEOUT);
+}
+
+static int mxsfb_reset_block(void __iomem *reset_addr)
+{
+	int ret;
+
+	ret = clear_poll_bit(reset_addr, MODULE_SFTRST);
+	if (ret)
+		return ret;
+
+	writel(MODULE_CLKGATE, reset_addr + MXS_CLR_ADDR);
+
+	ret = clear_poll_bit(reset_addr, MODULE_SFTRST);
+	if (ret)
+		return ret;
+
+	return clear_poll_bit(reset_addr, MODULE_CLKGATE);
+}
+
 static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
 {
 	struct drm_display_mode *m = &mxsfb->pipe.crtc.state->adjusted_mode;
@@ -173,6 +210,11 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
 	 */
 	mxsfb_enable_axi_clk(mxsfb);
 
+	/* Mandatory eLCDIF reset as per the Reference Manual */
+	err = mxsfb_reset_block(mxsfb->base);
+	if (err)
+		return;
+
 	/* Clear the FIFOs */
 	writel(CTRL1_FIFO_CLEAR, mxsfb->base + LCDC_CTRL1 + REG_SET);
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
index 6a567fe347b3..820a4805916f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
@@ -4,6 +4,7 @@
 
 struct nvkm_alarm {
 	struct list_head head;
+	struct list_head exec;
 	u64 timestamp;
 	void (*func)(struct nvkm_alarm *);
 };
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 39468c218027..7459ef9943ec 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -60,15 +60,13 @@ bool nouveau_is_v1_dsm(void) {
 }
 
 #ifdef CONFIG_VGA_SWITCHEROO
-static const char nouveau_dsm_muid[] = {
-	0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
-	0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4,
-};
+static const guid_t nouveau_dsm_muid =
+	GUID_INIT(0x9D95A0A0, 0x0060, 0x4D48,
+		  0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4);
 
-static const char nouveau_op_dsm_muid[] = {
-	0xF8, 0xD8, 0x86, 0xA4, 0xDA, 0x0B, 0x1B, 0x47,
-	0xA7, 0x2B, 0x60, 0x42, 0xA6, 0xB5, 0xBE, 0xE0,
-};
+static const guid_t nouveau_op_dsm_muid =
+	GUID_INIT(0xA486D8F8, 0x0BDA, 0x471B,
+		  0xA7, 0x2B, 0x60, 0x42, 0xA6, 0xB5, 0xBE, 0xE0);
 
 static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t *result)
 {
@@ -86,7 +84,7 @@ static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t *
 		args_buff[i] = (arg >> i * 8) & 0xFF;
 
 	*result = 0;
-	obj = acpi_evaluate_dsm_typed(handle, nouveau_op_dsm_muid, 0x00000100,
+	obj = acpi_evaluate_dsm_typed(handle, &nouveau_op_dsm_muid, 0x00000100,
 				      func, &argv4, ACPI_TYPE_BUFFER);
 	if (!obj) {
 		acpi_handle_info(handle, "failed to evaluate _DSM\n");
@@ -138,7 +136,7 @@ static int nouveau_dsm(acpi_handle handle, int func, int arg)
 		.integer.value = arg,
 	};
 
-	obj = acpi_evaluate_dsm_typed(handle, nouveau_dsm_muid, 0x00000102,
+	obj = acpi_evaluate_dsm_typed(handle, &nouveau_dsm_muid, 0x00000102,
 				      func, &argv4, ACPI_TYPE_INTEGER);
 	if (!obj) {
 		acpi_handle_info(handle, "failed to evaluate _DSM\n");
@@ -259,7 +257,7 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out
 	if (!acpi_has_method(dhandle, "_DSM"))
 		return;
 
-	supports_mux = acpi_check_dsm(dhandle, nouveau_dsm_muid, 0x00000102,
+	supports_mux = acpi_check_dsm(dhandle, &nouveau_dsm_muid, 0x00000102,
 				      1 << NOUVEAU_DSM_POWER);
 	optimus_funcs = nouveau_dsm_get_optimus_functions(dhandle);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 21b10f9840c9..549763f5e17d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -360,6 +360,8 @@ nouveau_display_hpd_work(struct work_struct *work)
 	pm_runtime_get_sync(drm->dev->dev);
 
 	drm_helper_hpd_irq_event(drm->dev);
+	/* enable polling for external displays */
+	drm_kms_helper_poll_enable(drm->dev);
 
 	pm_runtime_mark_last_busy(drm->dev->dev);
 	pm_runtime_put_sync(drm->dev->dev);
@@ -413,10 +415,6 @@ nouveau_display_init(struct drm_device *dev)
 	if (ret)
 		return ret;
 
-	/* enable polling for external displays */
-	if (!dev->mode_config.poll_enabled)
-		drm_kms_helper_poll_enable(dev);
-
 	/* enable hotplug interrupts */
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct nouveau_connector *conn = nouveau_connector(connector);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 2b6ac24ce690..15a13d09d431 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -80,7 +80,7 @@ int nouveau_modeset = -1;
 module_param_named(modeset, nouveau_modeset, int, 0400);
 
 MODULE_PARM_DESC(runpm, "disable (0), force enable (1), optimus only default (-1)");
-int nouveau_runtime_pm = -1;
+static int nouveau_runtime_pm = -1;
 module_param_named(runpm, nouveau_runtime_pm, int, 0400);
 
 static struct drm_driver driver_stub;
@@ -495,13 +495,16 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 	nouveau_fbcon_init(dev);
 	nouveau_led_init(dev);
 
-	if (nouveau_runtime_pm != 0) {
+	if (nouveau_pmops_runtime()) {
 		pm_runtime_use_autosuspend(dev->dev);
 		pm_runtime_set_autosuspend_delay(dev->dev, 5000);
 		pm_runtime_set_active(dev->dev);
 		pm_runtime_allow(dev->dev);
 		pm_runtime_mark_last_busy(dev->dev);
 		pm_runtime_put(dev->dev);
+	} else {
+		/* enable polling for external displays */
+		drm_kms_helper_poll_enable(dev);
 	}
 	return 0;
 
@@ -524,7 +527,7 @@ nouveau_drm_unload(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	if (nouveau_runtime_pm != 0) {
+	if (nouveau_pmops_runtime()) {
 		pm_runtime_get_sync(dev->dev);
 		pm_runtime_forbid(dev->dev);
 	}
@@ -723,6 +726,14 @@ nouveau_pmops_thaw(struct device *dev)
 	return nouveau_do_resume(drm_dev, false);
 }
 
+bool
+nouveau_pmops_runtime()
+{
+	if (nouveau_runtime_pm == -1)
+		return nouveau_is_optimus() || nouveau_is_v1_dsm();
+	return nouveau_runtime_pm == 1;
+}
+
 static int
 nouveau_pmops_runtime_suspend(struct device *dev)
 {
@@ -730,14 +741,7 @@ nouveau_pmops_runtime_suspend(struct device *dev)
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
 	int ret;
 
-	if (nouveau_runtime_pm == 0) {
-		pm_runtime_forbid(dev);
-		return -EBUSY;
-	}
-
-	/* are we optimus enabled? */
-	if (nouveau_runtime_pm == -1 && !nouveau_is_optimus() && !nouveau_is_v1_dsm()) {
-		DRM_DEBUG_DRIVER("failing to power off - not optimus\n");
+	if (!nouveau_pmops_runtime()) {
 		pm_runtime_forbid(dev);
 		return -EBUSY;
 	}
@@ -762,8 +766,10 @@ nouveau_pmops_runtime_resume(struct device *dev)
 	struct nvif_device *device = &nouveau_drm(drm_dev)->client.device;
 	int ret;
 
-	if (nouveau_runtime_pm == 0)
-		return -EINVAL;
+	if (!nouveau_pmops_runtime()) {
+		pm_runtime_forbid(dev);
+		return -EBUSY;
+	}
 
 	pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
@@ -774,9 +780,6 @@ nouveau_pmops_runtime_resume(struct device *dev)
 
 	ret = nouveau_do_resume(drm_dev, true);
 
-	if (!drm_dev->mode_config.poll_enabled)
-		drm_kms_helper_poll_enable(drm_dev);
-
 	/* do magic */
 	nvif_mask(&device->object, 0x088488, (1 << 25), (1 << 25));
 	vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_ON);
@@ -796,14 +799,7 @@ nouveau_pmops_runtime_idle(struct device *dev)
 	struct nouveau_drm *drm = nouveau_drm(drm_dev);
 	struct drm_crtc *crtc;
 
-	if (nouveau_runtime_pm == 0) {
-		pm_runtime_forbid(dev);
-		return -EBUSY;
-	}
-
-	/* are we optimus enabled? */
-	if (nouveau_runtime_pm == -1 && !nouveau_is_optimus() && !nouveau_is_v1_dsm()) {
-		DRM_DEBUG_DRIVER("failing to power off - not optimus\n");
+	if (!nouveau_pmops_runtime()) {
 		pm_runtime_forbid(dev);
 		return -EBUSY;
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index eadec2f49ad3..a11b6aaed325 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -108,8 +108,6 @@ nouveau_cli(struct drm_file *fpriv)
 #include <nvif/object.h>
 #include <nvif/device.h>
 
-extern int nouveau_runtime_pm;
-
 struct nouveau_drm {
 	struct nouveau_cli client;
 	struct drm_device *dev;
@@ -195,6 +193,7 @@ nouveau_drm(struct drm_device *dev)
 
 int nouveau_pmops_suspend(struct device *);
 int nouveau_pmops_resume(struct device *);
+bool nouveau_pmops_runtime(void);
 
 #include <nvkm/core/tegra.h>
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c
index a4aacbc0cec8..02fe0efb9e16 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vga.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vga.c
@@ -87,7 +87,7 @@ void
 nouveau_vga_init(struct nouveau_drm *drm)
 {
 	struct drm_device *dev = drm->dev;
-	bool runtime = false;
+	bool runtime = nouveau_pmops_runtime();
 
 	/* only relevant for PCI devices */
 	if (!dev->pdev)
@@ -99,10 +99,6 @@ nouveau_vga_init(struct nouveau_drm *drm)
 	if (pci_is_thunderbolt_attached(dev->pdev))
 		return;
 
-	if (nouveau_runtime_pm == 1)
-		runtime = true;
-	if ((nouveau_runtime_pm == -1) && (nouveau_is_optimus() || nouveau_is_v1_dsm()))
-		runtime = true;
 	vga_switcheroo_register_client(dev->pdev, &nouveau_switcheroo_ops, runtime);
 
 	if (runtime && nouveau_is_v1_dsm() && !nouveau_is_optimus())
@@ -113,18 +109,13 @@ void
 nouveau_vga_fini(struct nouveau_drm *drm)
 {
 	struct drm_device *dev = drm->dev;
-	bool runtime = false;
+	bool runtime = nouveau_pmops_runtime();
 
 	vga_client_register(dev->pdev, NULL, NULL, NULL);
 
 	if (pci_is_thunderbolt_attached(dev->pdev))
 		return;
 
-	if (nouveau_runtime_pm == 1)
-		runtime = true;
-	if ((nouveau_runtime_pm == -1) && (nouveau_is_optimus() || nouveau_is_v1_dsm()))
-		runtime = true;
-
 	vga_switcheroo_unregister_client(dev->pdev);
 	if (runtime && nouveau_is_v1_dsm() && !nouveau_is_optimus())
 		vga_switcheroo_fini_domain_pm_ops(drm->dev->dev);
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 0e58537352fe..06e564a9ccb2 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -831,8 +831,7 @@ nv50_wndw_atomic_check_release(struct nv50_wndw *wndw,
 static int
 nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw,
 			       struct nv50_wndw_atom *asyw,
-			       struct nv50_head_atom *asyh,
-			       u32 pflip_flags)
+			       struct nv50_head_atom *asyh)
 {
 	struct nouveau_framebuffer *fb = nouveau_framebuffer(asyw->state.fb);
 	struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev);
@@ -848,7 +847,10 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw,
 	asyw->image.h = fb->base.height;
 	asyw->image.kind = (fb->nvbo->tile_flags & 0x0000ff00) >> 8;
 
-	asyw->interval = pflip_flags & DRM_MODE_PAGE_FLIP_ASYNC ? 0 : 1;
+	if (asyh->state.pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC)
+		asyw->interval = 0;
+	else
+		asyw->interval = 1;
 
 	if (asyw->image.kind) {
 		asyw->image.layout = 0;
@@ -887,7 +889,6 @@ nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state)
 	struct nv50_head_atom *harm = NULL, *asyh = NULL;
 	bool varm = false, asyv = false, asym = false;
 	int ret;
-	u32 pflip_flags = 0;
 
 	NV_ATOMIC(drm, "%s atomic_check\n", plane->name);
 	if (asyw->state.crtc) {
@@ -896,7 +897,6 @@ nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state)
 			return PTR_ERR(asyh);
 		asym = drm_atomic_crtc_needs_modeset(&asyh->state);
 		asyv = asyh->state.active;
-		pflip_flags = asyh->state.pageflip_flags;
 	}
 
 	if (armw->state.crtc) {
@@ -912,12 +912,9 @@ nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state)
 		if (memcmp(&armw->point, &asyw->point, sizeof(asyw->point)))
 			asyw->set.point = true;
 
-		if (!varm || asym || armw->state.fb != asyw->state.fb) {
-			ret = nv50_wndw_atomic_check_acquire(
-					wndw, asyw, asyh, pflip_flags);
-			if (ret)
-				return ret;
-		}
+		ret = nv50_wndw_atomic_check_acquire(wndw, asyw, asyh);
+		if (ret)
+			return ret;
 	} else
 	if (varm) {
 		nv50_wndw_atomic_check_release(wndw, asyw, harm);
@@ -1122,9 +1119,13 @@ static void
 nv50_curs_prepare(struct nv50_wndw *wndw, struct nv50_head_atom *asyh,
 		  struct nv50_wndw_atom *asyw)
 {
-	asyh->curs.handle = nv50_disp(wndw->plane.dev)->mast.base.vram.handle;
-	asyh->curs.offset = asyw->image.offset;
-	asyh->set.curs = asyh->curs.visible;
+	u32 handle = nv50_disp(wndw->plane.dev)->mast.base.vram.handle;
+	u32 offset = asyw->image.offset;
+	if (asyh->curs.handle != handle || asyh->curs.offset != offset) {
+		asyh->curs.handle = handle;
+		asyh->curs.offset = offset;
+		asyh->set.curs = asyh->curs.visible;
+	}
 }
 
 static void
@@ -2106,7 +2107,8 @@ nv50_head_atomic_check(struct drm_crtc *crtc, struct drm_crtc_state *state)
 					asyc->set.dither = true;
 			}
 		} else {
-			asyc->set.mask = ~0;
+			if (asyc)
+				asyc->set.mask = ~0;
 			asyh->set.mask = ~0;
 		}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/object.c b/drivers/gpu/drm/nouveau/nvkm/core/object.c
index 89d2e9da11c7..acd76fd4f6d8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/object.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/object.c
@@ -295,7 +295,7 @@ nvkm_object_ctor(const struct nvkm_object_func *func,
 	INIT_LIST_HEAD(&object->head);
 	INIT_LIST_HEAD(&object->tree);
 	RB_CLEAR_NODE(&object->node);
-	WARN_ON(oclass->engine && !object->engine);
+	WARN_ON(IS_ERR(object->engine));
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index 3a24788c3185..a7e55c422501 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -148,7 +148,7 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl)
 	case NVKM_MEM_TARGET_NCOH: target = 3; break;
 	default:
 		WARN_ON(1);
-		return;
+		goto unlock;
 	}
 
 	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) |
@@ -160,6 +160,7 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl)
 				       & 0x00100000),
 			       msecs_to_jiffies(2000)) == 0)
 		nvkm_error(subdev, "runlist %d update timeout\n", runl);
+unlock:
 	mutex_unlock(&subdev->mutex);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
index c63975907c90..4a9bd4f1cb93 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
@@ -638,7 +638,6 @@ gf100_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb,
 			return ret;
 	}
 
-	ram->ranks = (nvkm_rd32(device, 0x10f200) & 0x00000004) ? 2 : 1;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c
index e3e2f5e83815..f44682d62f75 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c
@@ -81,10 +81,9 @@ mxm_shadow_dsm(struct nvkm_mxm *mxm, u8 version)
 {
 	struct nvkm_subdev *subdev = &mxm->subdev;
 	struct nvkm_device *device = subdev->device;
-	static char muid[] = {
-		0x00, 0xA4, 0x04, 0x40, 0x7D, 0x91, 0xF2, 0x4C,
-		0xB8, 0x9C, 0x79, 0xB6, 0x2F, 0xD5, 0x56, 0x65
-	};
+	static guid_t muid =
+		GUID_INIT(0x4004A400, 0x917D, 0x4CF2,
+			  0xB8, 0x9C, 0x79, 0xB6, 0x2F, 0xD5, 0x56, 0x65);
 	u32 mxms_args[] = { 0x00000000 };
 	union acpi_object argv4 = {
 		.buffer.type = ACPI_TYPE_BUFFER,
@@ -105,7 +104,7 @@ mxm_shadow_dsm(struct nvkm_mxm *mxm, u8 version)
 	 * unless you pass in exactly the version it supports..
 	 */
 	rev = (version & 0xf0) << 4 | (version & 0x0f);
-	obj = acpi_evaluate_dsm(handle, muid, rev, 0x00000010, &argv4);
+	obj = acpi_evaluate_dsm(handle, &muid, rev, 0x00000010, &argv4);
 	if (!obj) {
 		nvkm_debug(subdev, "DSM MXMS failed\n");
 		return false;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c
index d1cf02d22db1..1b0c793c0192 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c
@@ -116,6 +116,7 @@ ls_ucode_img_load_gr(const struct nvkm_subdev *subdev, struct ls_ucode_img *img,
 	ret = nvkm_firmware_get(subdev->device, f, &sig);
 	if (ret)
 		goto free_data;
+
 	img->sig = kmemdup(sig->data, sig->size, GFP_KERNEL);
 	if (!img->sig) {
 		ret = -ENOMEM;
@@ -126,8 +127,9 @@ ls_ucode_img_load_gr(const struct nvkm_subdev *subdev, struct ls_ucode_img *img,
 	img->ucode_data = ls_ucode_img_build(bl, code, data,
 					     &img->ucode_desc);
 	if (IS_ERR(img->ucode_data)) {
+		kfree(img->sig);
 		ret = PTR_ERR(img->ucode_data);
-		goto free_data;
+		goto free_sig;
 	}
 	img->ucode_size = img->ucode_desc.image_size;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
index df949fa7d05d..be691a7b972f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
@@ -146,7 +146,7 @@ nvkm_therm_update(struct nvkm_therm *therm, int mode)
 		poll = false;
 	}
 
-	if (list_empty(&therm->alarm.head) && poll)
+	if (poll)
 		nvkm_timer_alarm(tmr, 1000000000ULL, &therm->alarm);
 	spin_unlock_irqrestore(&therm->lock, flags);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c
index 91198d79393a..e2feccec25f5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c
@@ -83,7 +83,7 @@ nvkm_fan_update(struct nvkm_fan *fan, bool immediate, int target)
 	spin_unlock_irqrestore(&fan->lock, flags);
 
 	/* schedule next fan update, if not at target speed already */
-	if (list_empty(&fan->alarm.head) && target != duty) {
+	if (target != duty) {
 		u16 bump_period = fan->bios.bump_period;
 		u16 slow_down_period = fan->bios.slow_down_period;
 		u64 delay;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c
index 59701b7a6597..ff9fbe7950e5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c
@@ -53,7 +53,7 @@ nvkm_fantog_update(struct nvkm_fantog *fan, int percent)
 	duty = !nvkm_gpio_get(gpio, 0, DCB_GPIO_FAN, 0xff);
 	nvkm_gpio_set(gpio, 0, DCB_GPIO_FAN, 0xff, duty);
 
-	if (list_empty(&fan->alarm.head) && percent != (duty * 100)) {
+	if (percent != (duty * 100)) {
 		u64 next_change = (percent * fan->period_us) / 100;
 		if (!duty)
 			next_change = fan->period_us - next_change;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c
index b9703c02d8ca..9a79e91fdfdc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c
@@ -185,7 +185,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm)
 	spin_unlock_irqrestore(&therm->sensor.alarm_program_lock, flags);
 
 	/* schedule the next poll in one second */
-	if (therm->func->temp_get(therm) >= 0 && list_empty(&alarm->head))
+	if (therm->func->temp_get(therm) >= 0)
 		nvkm_timer_alarm(tmr, 1000000000ULL, alarm);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
index 07dc82bfe346..2437f7d41ca2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
@@ -36,25 +36,32 @@ nvkm_timer_alarm_trigger(struct nvkm_timer *tmr)
 	unsigned long flags;
 	LIST_HEAD(exec);
 
-	/* move any due alarms off the pending list */
+	/* Process pending alarms. */
 	spin_lock_irqsave(&tmr->lock, flags);
 	list_for_each_entry_safe(alarm, atemp, &tmr->alarms, head) {
-		if (alarm->timestamp <= nvkm_timer_read(tmr))
-			list_move_tail(&alarm->head, &exec);
+		/* Have we hit the earliest alarm that hasn't gone off? */
+		if (alarm->timestamp > nvkm_timer_read(tmr)) {
+			/* Schedule it.  If we didn't race, we're done. */
+			tmr->func->alarm_init(tmr, alarm->timestamp);
+			if (alarm->timestamp > nvkm_timer_read(tmr))
+				break;
+		}
+
+		/* Move to completed list.  We'll drop the lock before
+		 * executing the callback so it can reschedule itself.
+		 */
+		list_del_init(&alarm->head);
+		list_add(&alarm->exec, &exec);
 	}
 
-	/* reschedule interrupt for next alarm time */
-	if (!list_empty(&tmr->alarms)) {
-		alarm = list_first_entry(&tmr->alarms, typeof(*alarm), head);
-		tmr->func->alarm_init(tmr, alarm->timestamp);
-	} else {
+	/* Shut down interrupt if no more pending alarms. */
+	if (list_empty(&tmr->alarms))
 		tmr->func->alarm_fini(tmr);
-	}
 	spin_unlock_irqrestore(&tmr->lock, flags);
 
-	/* execute any pending alarm handlers */
-	list_for_each_entry_safe(alarm, atemp, &exec, head) {
-		list_del_init(&alarm->head);
+	/* Execute completed callbacks. */
+	list_for_each_entry_safe(alarm, atemp, &exec, exec) {
+		list_del(&alarm->exec);
 		alarm->func(alarm);
 	}
 }
@@ -65,24 +72,37 @@ nvkm_timer_alarm(struct nvkm_timer *tmr, u32 nsec, struct nvkm_alarm *alarm)
 	struct nvkm_alarm *list;
 	unsigned long flags;
 
-	alarm->timestamp = nvkm_timer_read(tmr) + nsec;
-
-	/* append new alarm to list, in soonest-alarm-first order */
+	/* Remove alarm from pending list.
+	 *
+	 * This both protects against the corruption of the list,
+	 * and implements alarm rescheduling/cancellation.
+	 */
 	spin_lock_irqsave(&tmr->lock, flags);
-	if (!nsec) {
-		if (!list_empty(&alarm->head))
-			list_del(&alarm->head);
-	} else {
+	list_del_init(&alarm->head);
+
+	if (nsec) {
+		/* Insert into pending list, ordered earliest to latest. */
+		alarm->timestamp = nvkm_timer_read(tmr) + nsec;
 		list_for_each_entry(list, &tmr->alarms, head) {
 			if (list->timestamp > alarm->timestamp)
 				break;
 		}
+
 		list_add_tail(&alarm->head, &list->head);
+
+		/* Update HW if this is now the earliest alarm. */
+		list = list_first_entry(&tmr->alarms, typeof(*list), head);
+		if (list == alarm) {
+			tmr->func->alarm_init(tmr, alarm->timestamp);
+			/* This shouldn't happen if callers aren't stupid.
+			 *
+			 * Worst case scenario is that it'll take roughly
+			 * 4 seconds for the next alarm to trigger.
+			 */
+			WARN_ON(alarm->timestamp <= nvkm_timer_read(tmr));
+		}
 	}
 	spin_unlock_irqrestore(&tmr->lock, flags);
-
-	/* process pending alarms */
-	nvkm_timer_alarm_trigger(tmr);
 }
 
 void
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c
index 7b9ce87f0617..7f48249f41de 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c
@@ -76,8 +76,8 @@ nv04_timer_intr(struct nvkm_timer *tmr)
 	u32 stat = nvkm_rd32(device, NV04_PTIMER_INTR_0);
 
 	if (stat & 0x00000001) {
-		nvkm_timer_alarm_trigger(tmr);
 		nvkm_wr32(device, NV04_PTIMER_INTR_0, 0x00000001);
+		nvkm_timer_alarm_trigger(tmr);
 		stat &= ~0x00000001;
 	}
 
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 058340a002c2..4a340efd8ba6 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -575,8 +575,6 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 	if (ret)
 		return;
 
-	cmd = (struct qxl_cursor_cmd *) qxl_release_map(qdev, release);
-
 	if (fb != old_state->fb) {
 		obj = to_qxl_framebuffer(fb)->obj;
 		user_bo = gem_to_qxl_bo(obj);
@@ -614,6 +612,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 		qxl_bo_kunmap(cursor_bo);
 		qxl_bo_kunmap(user_bo);
 
+		cmd = (struct qxl_cursor_cmd *) qxl_release_map(qdev, release);
 		cmd->u.set.visible = 1;
 		cmd->u.set.shape = qxl_bo_physical_address(qdev,
 							   cursor_bo, 0);
@@ -624,6 +623,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 		if (ret)
 			goto out_free_release;
 
+		cmd = (struct qxl_cursor_cmd *) qxl_release_map(qdev, release);
 		cmd->type = QXL_CURSOR_MOVE;
 	}
 
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index 7ba450832e6b..ea36dc4dd5d2 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -776,6 +776,12 @@ bool ci_dpm_vblank_too_short(struct radeon_device *rdev)
 	u32 vblank_time = r600_dpm_get_vblank_time(rdev);
 	u32 switch_limit = pi->mem_gddr5 ? 450 : 300;
 
+	/* disable mclk switching if the refresh is >120Hz, even if the
+        * blanking period would allow it
+        */
+	if (r600_dpm_get_vrefresh(rdev) > 120)
+		return true;
+
 	if (vblank_time < switch_limit)
 		return true;
 	else
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 53710dd7d5dd..ca44233ceacc 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -7401,7 +7401,7 @@ static inline void cik_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
@@ -7431,7 +7431,7 @@ static inline void cik_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_RX_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
@@ -9150,23 +9150,10 @@ static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
 	a.full = dfixed_const(available_bandwidth);
 	b.full = dfixed_const(wm->num_heads);
 	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
 
-	b.full = dfixed_const(mc_latency + 512);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(b, c);
-
-	c.full = dfixed_const(dmif_size);
-	b.full = dfixed_div(c, b);
-
-	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
-
-	b.full = dfixed_const(1000);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(c, b);
-	c.full = dfixed_const(wm->bytes_per_pixel);
-	b.full = dfixed_mul(b, c);
-
-	lb_fill_bw = min(tmp, dfixed_trunc(b));
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
 
 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 	b.full = dfixed_const(1000);
@@ -9274,14 +9261,17 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
 {
 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
 	struct dce8_wm_params wm_low, wm_high;
-	u32 pixel_period;
+	u32 active_time;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
 	u32 tmp, wm_mask;
 
 	if (radeon_crtc->base.enabled && num_heads && mode) {
-		pixel_period = 1000000 / (u32)mode->clock;
-		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min(line_time, (u32)65535);
 
 		/* watermark for high clocks */
 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
@@ -9297,7 +9287,7 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
 
 		wm_high.disp_clk = mode->clock;
 		wm_high.src_width = mode->crtc_hdisplay;
-		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_high.active_time = active_time;
 		wm_high.blank_time = line_time - wm_high.active_time;
 		wm_high.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -9337,7 +9327,7 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
 
 		wm_low.disp_clk = mode->clock;
 		wm_low.src_width = mode->crtc_hdisplay;
-		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_low.active_time = active_time;
 		wm_low.blank_time = line_time - wm_low.active_time;
 		wm_low.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index d1b1e0cc3c25..534637203e70 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2188,13 +2188,7 @@ static u32 evergreen_latency_watermark(struct evergreen_wm_params *wm)
 	b.full = dfixed_const(wm->num_heads);
 	a.full = dfixed_div(a, b);
 
-	b.full = dfixed_const(1000);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(c, b);
-	c.full = dfixed_const(wm->bytes_per_pixel);
-	b.full = dfixed_mul(b, c);
-
-	lb_fill_bw = min(dfixed_trunc(a), dfixed_trunc(b));
+	lb_fill_bw = min(dfixed_trunc(a), wm->disp_clk * wm->bytes_per_pixel / 1000);
 
 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 	b.full = dfixed_const(1000);
@@ -2261,7 +2255,7 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,
 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
 	struct evergreen_wm_params wm_low, wm_high;
 	u32 dram_channels;
-	u32 pixel_period;
+	u32 active_time;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
 	u32 priority_a_mark = 0, priority_b_mark = 0;
@@ -2272,8 +2266,11 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,
 	fixed20_12 a, b, c;
 
 	if (radeon_crtc->base.enabled && num_heads && mode) {
-		pixel_period = 1000000 / (u32)mode->clock;
-		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min(line_time, (u32)65535);
 		priority_a_cnt = 0;
 		priority_b_cnt = 0;
 		dram_channels = evergreen_get_number_of_dram_channels(rdev);
@@ -2291,7 +2288,7 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,
 
 		wm_high.disp_clk = mode->clock;
 		wm_high.src_width = mode->crtc_hdisplay;
-		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_high.active_time = active_time;
 		wm_high.blank_time = line_time - wm_high.active_time;
 		wm_high.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -2318,7 +2315,7 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,
 
 		wm_low.disp_clk = mode->clock;
 		wm_low.src_width = mode->crtc_hdisplay;
-		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_low.active_time = active_time;
 		wm_low.blank_time = line_time - wm_low.active_time;
 		wm_low.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -4933,7 +4930,7 @@ static void evergreen_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
@@ -4964,7 +4961,7 @@ static void evergreen_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_RX_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 3eb0c4f9f796..45e1d4e60759 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -203,6 +203,7 @@ static void r420_clock_resume(struct radeon_device *rdev)
 
 static void r420_cp_errata_init(struct radeon_device *rdev)
 {
+	int r;
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 
 	/* RV410 and R420 can lock up if CP DMA to host memory happens
@@ -212,7 +213,8 @@ static void r420_cp_errata_init(struct radeon_device *rdev)
 	 * of the CP init, apparently.
 	 */
 	radeon_scratch_get(rdev, &rdev->config.r300.resync_scratch);
-	radeon_ring_lock(rdev, ring, 8);
+	r = radeon_ring_lock(rdev, ring, 8);
+	WARN_ON(r);
 	radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1));
 	radeon_ring_write(ring, rdev->config.r300.resync_scratch);
 	radeon_ring_write(ring, 0xDEADBEEF);
@@ -221,12 +223,14 @@ static void r420_cp_errata_init(struct radeon_device *rdev)
 
 static void r420_cp_errata_fini(struct radeon_device *rdev)
 {
+	int r;
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 
 	/* Catch the RESYNC we dispatched all the way back,
 	 * at the very beginning of the CP init.
 	 */
-	radeon_ring_lock(rdev, ring, 8);
+	r = radeon_ring_lock(rdev, ring, 8);
+	WARN_ON(r);
 	radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
 	radeon_ring_write(ring, R300_RB3D_DC_FINISH);
 	radeon_ring_unlock_commit(rdev, ring, false);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 0a085176e79b..e06e2d8feab3 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -3988,7 +3988,7 @@ static void r600_irq_ack(struct radeon_device *rdev)
 			WREG32(DC_HPD5_INT_CONTROL, tmp);
 		}
 		if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) {
-			tmp = RREG32(DC_HPD5_INT_CONTROL);
+			tmp = RREG32(DC_HPD6_INT_CONTROL);
 			tmp |= DC_HPDx_INT_ACK;
 			WREG32(DC_HPD6_INT_CONTROL, tmp);
 		}
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c1c8e2208a21..e562a78510ff 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -375,7 +375,7 @@ struct radeon_fence {
 	unsigned		ring;
 	bool			is_vm_update;
 
-	wait_queue_t		fence_wake;
+	wait_queue_entry_t		fence_wake;
 };
 
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 432480ff9d22..3178ba0c537c 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -3393,6 +3393,13 @@ void radeon_combios_asic_init(struct drm_device *dev)
 	    rdev->pdev->subsystem_vendor == 0x103c &&
 	    rdev->pdev->subsystem_device == 0x280a)
 		return;
+	/* quirk for rs4xx Toshiba Sattellite L20-183 latop to make it resume
+	 * - it hangs on resume inside the dynclk 1 table.
+	 */
+	if (rdev->family == CHIP_RS400 &&
+	    rdev->pdev->subsystem_vendor == 0x1179 &&
+	    rdev->pdev->subsystem_device == 0xff31)
+	        return;
 
 	/* DYN CLK 1 */
 	table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE);
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index df6b58c08544..3ac671f6c8e1 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -117,11 +117,13 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
 			   + !!r->write_domain;
 
-		/* the first reloc of an UVD job is the msg and that must be in
-		   VRAM, also but everything into VRAM on AGP cards and older
-		   IGP chips to avoid image corruptions */
+		/* The first reloc of an UVD job is the msg and that must be in
+		 * VRAM, the second reloc is the DPB and for WMV that must be in
+		 * VRAM as well. Also put everything into VRAM on AGP cards and older
+		 * IGP chips to avoid image corruptions
+		 */
 		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
-		    (i == 0 || pci_find_capability(p->rdev->ddev->pdev,
+		    (i <= 0 || pci_find_capability(p->rdev->ddev->pdev,
 						   PCI_CAP_ID_AGP) ||
 		     p->rdev->family == CHIP_RS780 ||
 		     p->rdev->family == CHIP_RS880)) {
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 6ecf42783d4b..0a6444d72000 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -136,6 +136,10 @@ static struct radeon_px_quirk radeon_px_quirk_list[] = {
 	 * https://bugzilla.kernel.org/show_bug.cgi?id=51381
 	 */
 	{ PCI_VENDOR_ID_ATI, 0x6840, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
+	/* Asus K53TK laptop with AMD A6-3420M APU and Radeon 7670m GPU
+	 * https://bugs.freedesktop.org/show_bug.cgi?id=101491
+	 */
+	{ PCI_VENDOR_ID_ATI, 0x6741, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
 	/* macbook pro 8.2 */
 	{ PCI_VENDOR_ID_ATI, 0x6741, PCI_VENDOR_ID_APPLE, 0x00e2, RADEON_PX_QUIRK_LONG_WAKEUP },
 	{ 0, 0, 0, 0, 0 },
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index ef09f0a63754..e86f2bd38410 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -158,7 +158,7 @@ int radeon_fence_emit(struct radeon_device *rdev,
  * for the fence locking itself, so unlocked variants are used for
  * fence_signal, and remove_wait_queue.
  */
-static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+static int radeon_fence_check_signaled(wait_queue_entry_t *wait, unsigned mode, int flags, void *key)
 {
 	struct radeon_fence *fence;
 	u64 seq;
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index e3e7cb1d10a2..4761f27f2ca2 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -116,7 +116,7 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
 	if ((radeon_runtime_pm != 0) &&
 	    radeon_has_atpx() &&
 	    ((flags & RADEON_IS_IGP) == 0) &&
-	    !pci_is_thunderbolt_attached(rdev->pdev))
+	    !pci_is_thunderbolt_attached(dev->pdev))
 		flags |= RADEON_IS_PX;
 
 	/* radeon_device_init should report only fatal error
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index bec2ec056de4..8b722297a05c 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -81,7 +81,7 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 	list_del_init(&bo->list);
 	mutex_unlock(&bo->rdev->gem.mutex);
 	radeon_bo_clear_surface_reg(bo);
-	WARN_ON(!list_empty(&bo->va));
+	WARN_ON_ONCE(!list_empty(&bo->va));
 	drm_gem_object_release(&bo->gem_base);
 	kfree(bo);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 4fdc7bda7a7d..f5e9abfadb56 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -298,7 +298,12 @@ static int radeon_test_create_and_emit_fence(struct radeon_device *rdev,
 			DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
 			return r;
 		}
-		radeon_fence_emit(rdev, fence, ring->idx);
+		r = radeon_fence_emit(rdev, fence, ring->idx);
+		if (r) {
+			DRM_ERROR("Failed to emit fence\n");
+			radeon_ring_unlock_undo(rdev, ring);
+			return r;
+		}
 		radeon_ring_unlock_commit(rdev, ring, false);
 	}
 	return 0;
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 528e5a49a214..5303f25d5280 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -2204,23 +2204,10 @@ static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
 	a.full = dfixed_const(available_bandwidth);
 	b.full = dfixed_const(wm->num_heads);
 	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
 
-	b.full = dfixed_const(mc_latency + 512);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(b, c);
-
-	c.full = dfixed_const(dmif_size);
-	b.full = dfixed_div(c, b);
-
-	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
-
-	b.full = dfixed_const(1000);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(c, b);
-	c.full = dfixed_const(wm->bytes_per_pixel);
-	b.full = dfixed_mul(b, c);
-
-	lb_fill_bw = min(tmp, dfixed_trunc(b));
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
 
 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 	b.full = dfixed_const(1000);
@@ -2287,7 +2274,7 @@ static void dce6_program_watermarks(struct radeon_device *rdev,
 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
 	struct dce6_wm_params wm_low, wm_high;
 	u32 dram_channels;
-	u32 pixel_period;
+	u32 active_time;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
 	u32 priority_a_mark = 0, priority_b_mark = 0;
@@ -2297,8 +2284,11 @@ static void dce6_program_watermarks(struct radeon_device *rdev,
 	fixed20_12 a, b, c;
 
 	if (radeon_crtc->base.enabled && num_heads && mode) {
-		pixel_period = 1000000 / (u32)mode->clock;
-		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min(line_time, (u32)65535);
 		priority_a_cnt = 0;
 		priority_b_cnt = 0;
 
@@ -2320,7 +2310,7 @@ static void dce6_program_watermarks(struct radeon_device *rdev,
 
 		wm_high.disp_clk = mode->clock;
 		wm_high.src_width = mode->crtc_hdisplay;
-		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_high.active_time = active_time;
 		wm_high.blank_time = line_time - wm_high.active_time;
 		wm_high.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -2347,7 +2337,7 @@ static void dce6_program_watermarks(struct radeon_device *rdev,
 
 		wm_low.disp_clk = mode->clock;
 		wm_low.src_width = mode->crtc_hdisplay;
-		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_low.active_time = active_time;
 		wm_low.blank_time = line_time - wm_low.active_time;
 		wm_low.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -6330,7 +6320,7 @@ static inline void si_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
@@ -6361,7 +6351,7 @@ static inline void si_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_RX_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
index d8fa7a9c9240..ce5f2d1f9994 100644
--- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
+++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
@@ -245,8 +245,6 @@ rockchip_dp_drm_encoder_atomic_check(struct drm_encoder *encoder,
 				      struct drm_connector_state *conn_state)
 {
 	struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc_state);
-	struct rockchip_dp_device *dp = to_dp(encoder);
-	int ret;
 
 	/*
 	 * The hardware IC designed that VOP must output the RGB10 video
@@ -258,16 +256,6 @@ rockchip_dp_drm_encoder_atomic_check(struct drm_encoder *encoder,
 
 	s->output_mode = ROCKCHIP_OUT_MODE_AAAA;
 	s->output_type = DRM_MODE_CONNECTOR_eDP;
-	if (dp->data->chip_type == RK3399_EDP) {
-		/*
-		 * For RK3399, VOP Lit must code the out mode to RGB888,
-		 * VOP Big must code the out mode to RGB10.
-		 */
-		ret = drm_of_encoder_active_endpoint_id(dp->dev->of_node,
-							encoder);
-		if (ret > 0)
-			s->output_mode = ROCKCHIP_OUT_MODE_P888;
-	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c
index a2169dd3d26b..14fa1f8351e8 100644
--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
@@ -615,7 +615,6 @@ static void cdn_dp_encoder_enable(struct drm_encoder *encoder)
 {
 	struct cdn_dp_device *dp = encoder_to_dp(encoder);
 	int ret, val;
-	struct rockchip_crtc_state *state;
 
 	ret = drm_of_encoder_active_endpoint_id(dp->dev->of_node, encoder);
 	if (ret < 0) {
@@ -625,14 +624,10 @@ static void cdn_dp_encoder_enable(struct drm_encoder *encoder)
 
 	DRM_DEV_DEBUG_KMS(dp->dev, "vop %s output to cdn-dp\n",
 			  (ret) ? "LIT" : "BIG");
-	state = to_rockchip_crtc_state(encoder->crtc->state);
-	if (ret) {
+	if (ret)
 		val = DP_SEL_VOP_LIT | (DP_SEL_VOP_LIT << 16);
-		state->output_mode = ROCKCHIP_OUT_MODE_P888;
-	} else {
+	else
 		val = DP_SEL_VOP_LIT << 16;
-		state->output_mode = ROCKCHIP_OUT_MODE_AAAA;
-	}
 
 	ret = cdn_dp_grf_write(dp, GRF_SOC_CON9, val);
 	if (ret)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 3f7a82d1e095..45589d6ce65e 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -875,6 +875,7 @@ static bool vop_crtc_mode_fixup(struct drm_crtc *crtc,
 static void vop_crtc_enable(struct drm_crtc *crtc)
 {
 	struct vop *vop = to_vop(crtc);
+	const struct vop_data *vop_data = vop->data;
 	struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc->state);
 	struct drm_display_mode *adjusted_mode = &crtc->state->adjusted_mode;
 	u16 hsync_len = adjusted_mode->hsync_end - adjusted_mode->hsync_start;
@@ -967,6 +968,13 @@ static void vop_crtc_enable(struct drm_crtc *crtc)
 		DRM_DEV_ERROR(vop->dev, "unsupported connector_type [%d]\n",
 			      s->output_type);
 	}
+
+	/*
+	 * if vop is not support RGB10 output, need force RGB10 to RGB888.
+	 */
+	if (s->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
+	    !(vop_data->feature & VOP_FEATURE_OUTPUT_RGB10))
+		s->output_mode = ROCKCHIP_OUT_MODE_P888;
 	VOP_CTRL_SET(vop, out_mode, s->output_mode);
 
 	VOP_CTRL_SET(vop, htotal_pw, (htotal << 16) | hsync_len);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
index 5a4faa85dbd2..9979fd0c2282 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
@@ -142,6 +142,9 @@ struct vop_data {
 	const struct vop_intr *intr;
 	const struct vop_win_data *win;
 	unsigned int win_size;
+
+#define VOP_FEATURE_OUTPUT_RGB10	BIT(0)
+	u64 feature;
 };
 
 /* interrupt define */
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
index 0da44442aab0..bafd698a28b1 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
@@ -275,6 +275,7 @@ static const struct vop_intr rk3288_vop_intr = {
 static const struct vop_data rk3288_vop = {
 	.init_table = rk3288_init_reg_table,
 	.table_size = ARRAY_SIZE(rk3288_init_reg_table),
+	.feature = VOP_FEATURE_OUTPUT_RGB10,
 	.intr = &rk3288_vop_intr,
 	.ctrl = &rk3288_ctrl_data,
 	.win = rk3288_vop_win_data,
@@ -343,6 +344,7 @@ static const struct vop_reg_data rk3399_init_reg_table[] = {
 static const struct vop_data rk3399_vop_big = {
 	.init_table = rk3399_init_reg_table,
 	.table_size = ARRAY_SIZE(rk3399_init_reg_table),
+	.feature = VOP_FEATURE_OUTPUT_RGB10,
 	.intr = &rk3399_vop_intr,
 	.ctrl = &rk3399_ctrl_data,
 	/*
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 9a1e34e48f64..81f86a67c10d 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -451,18 +451,6 @@ fail:
 
 
 #ifdef CONFIG_DRM_TEGRA_STAGING
-static struct tegra_drm_context *
-tegra_drm_file_get_context(struct tegra_drm_file *file, u32 id)
-{
-	struct tegra_drm_context *context;
-
-	mutex_lock(&file->lock);
-	context = idr_find(&file->contexts, id);
-	mutex_unlock(&file->lock);
-
-	return context;
-}
-
 static int tegra_gem_create(struct drm_device *drm, void *data,
 			    struct drm_file *file)
 {
@@ -551,7 +539,7 @@ static int tegra_client_open(struct tegra_drm_file *fpriv,
 	if (err < 0)
 		return err;
 
-	err = idr_alloc(&fpriv->contexts, context, 0, 0, GFP_KERNEL);
+	err = idr_alloc(&fpriv->contexts, context, 1, 0, GFP_KERNEL);
 	if (err < 0) {
 		client->ops->close_channel(context);
 		return err;
@@ -606,7 +594,7 @@ static int tegra_close_channel(struct drm_device *drm, void *data,
 
 	mutex_lock(&fpriv->lock);
 
-	context = tegra_drm_file_get_context(fpriv, args->context);
+	context = idr_find(&fpriv->contexts, args->context);
 	if (!context) {
 		err = -EINVAL;
 		goto unlock;
@@ -631,7 +619,7 @@ static int tegra_get_syncpt(struct drm_device *drm, void *data,
 
 	mutex_lock(&fpriv->lock);
 
-	context = tegra_drm_file_get_context(fpriv, args->context);
+	context = idr_find(&fpriv->contexts, args->context);
 	if (!context) {
 		err = -ENODEV;
 		goto unlock;
@@ -660,7 +648,7 @@ static int tegra_submit(struct drm_device *drm, void *data,
 
 	mutex_lock(&fpriv->lock);
 
-	context = tegra_drm_file_get_context(fpriv, args->context);
+	context = idr_find(&fpriv->contexts, args->context);
 	if (!context) {
 		err = -ENODEV;
 		goto unlock;
@@ -685,7 +673,7 @@ static int tegra_get_syncpt_base(struct drm_device *drm, void *data,
 
 	mutex_lock(&fpriv->lock);
 
-	context = tegra_drm_file_get_context(fpriv, args->context);
+	context = idr_find(&fpriv->contexts, args->context);
 	if (!context) {
 		err = -ENODEV;
 		goto unlock;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e44626a2e698..a6d7fcb99c0b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1394,7 +1394,7 @@ EXPORT_SYMBOL(ttm_bo_evict_mm);
 int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 			unsigned long p_size)
 {
-	int ret = -EINVAL;
+	int ret;
 	struct ttm_mem_type_manager *man;
 	unsigned i;
 
@@ -1412,7 +1412,6 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 		return ret;
 	man->bdev = bdev;
 
-	ret = 0;
 	if (type != TTM_PL_SYSTEM) {
 		ret = (*man->func->init)(man, p_size);
 		if (ret)
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 491866865c33..1e1c90b30d4a 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -175,8 +175,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
 	DRM_INFO("virgl 3d acceleration not supported by guest\n");
 #endif
 
-	ret = vgdev->vdev->config->find_vqs(vgdev->vdev, 2, vqs,
-					    callbacks, names, NULL);
+	ret = virtio_find_vqs(vgdev->vdev, 2, vqs, callbacks, names, NULL);
 	if (ret) {
 		DRM_ERROR("failed to find virt queues\n");
 		goto err_vqs;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
index 13db8a2851ed..1f013d45c9e9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
@@ -321,6 +321,7 @@ void vmw_cmdbuf_res_man_destroy(struct vmw_cmdbuf_res_manager *man)
 	list_for_each_entry_safe(entry, next, &man->list, head)
 		vmw_cmdbuf_res_free(man, entry);
 
+	drm_ht_remove(&man->resources);
 	kfree(man);
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 130d51c5ec6a..4b948fba9eec 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -41,9 +41,9 @@
 #include <drm/ttm/ttm_module.h>
 #include "vmwgfx_fence.h"
 
-#define VMWGFX_DRIVER_DATE "20170221"
+#define VMWGFX_DRIVER_DATE "20170607"
 #define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 12
+#define VMWGFX_DRIVER_MINOR 13
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index b6a0806b06bf..a1c68e6a689e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -368,6 +368,8 @@ static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv,
 				return fifo_state->static_buffer;
 			else {
 				fifo_state->dynamic_buffer = vmalloc(bytes);
+				if (!fifo_state->dynamic_buffer)
+					goto out_err;
 				return fifo_state->dynamic_buffer;
 			}
 		}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index ef9f3a2a4030..1d2db5d912b0 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -274,108 +274,6 @@ void vmw_kms_cursor_post_execbuf(struct vmw_private *dev_priv)
 }
 
 
-
-/**
- * vmw_du_cursor_plane_update() - Update cursor image and location
- *
- * @plane: plane object to update
- * @crtc: owning CRTC of @plane
- * @fb: framebuffer to flip onto plane
- * @crtc_x: x offset of plane on crtc
- * @crtc_y: y offset of plane on crtc
- * @crtc_w: width of plane rectangle on crtc
- * @crtc_h: height of plane rectangle on crtc
- * @src_x: Not used
- * @src_y: Not used
- * @src_w: Not used
- * @src_h: Not used
- *
- *
- * RETURNS:
- * Zero on success, error code on failure
- */
-int vmw_du_cursor_plane_update(struct drm_plane *plane,
-			       struct drm_crtc *crtc,
-			       struct drm_framebuffer *fb,
-			       int crtc_x, int crtc_y,
-			       unsigned int crtc_w,
-			       unsigned int crtc_h,
-			       uint32_t src_x, uint32_t src_y,
-			       uint32_t src_w, uint32_t src_h)
-{
-	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
-	struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
-	struct vmw_surface *surface = NULL;
-	struct vmw_dma_buffer *dmabuf = NULL;
-	s32 hotspot_x, hotspot_y;
-	int ret;
-
-	hotspot_x = du->hotspot_x + fb->hot_x;
-	hotspot_y = du->hotspot_y + fb->hot_y;
-
-	/* A lot of the code assumes this */
-	if (crtc_w != 64 || crtc_h != 64) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	if (vmw_framebuffer_to_vfb(fb)->dmabuf)
-		dmabuf = vmw_framebuffer_to_vfbd(fb)->buffer;
-	else
-		surface = vmw_framebuffer_to_vfbs(fb)->surface;
-
-	if (surface && !surface->snooper.image) {
-		DRM_ERROR("surface not suitable for cursor\n");
-		ret = -EINVAL;
-		goto out;
-	}
-
-	/* setup new image */
-	ret = 0;
-	if (surface) {
-		/* vmw_user_surface_lookup takes one reference */
-		du->cursor_surface = surface;
-
-		du->cursor_age = du->cursor_surface->snooper.age;
-
-		ret = vmw_cursor_update_image(dev_priv, surface->snooper.image,
-					      64, 64, hotspot_x, hotspot_y);
-	} else if (dmabuf) {
-		/* vmw_user_surface_lookup takes one reference */
-		du->cursor_dmabuf = dmabuf;
-
-		ret = vmw_cursor_update_dmabuf(dev_priv, dmabuf, crtc_w, crtc_h,
-					       hotspot_x, hotspot_y);
-	} else {
-		vmw_cursor_update_position(dev_priv, false, 0, 0);
-		goto out;
-	}
-
-	if (!ret) {
-		du->cursor_x = crtc_x + du->set_gui_x;
-		du->cursor_y = crtc_y + du->set_gui_y;
-
-		vmw_cursor_update_position(dev_priv, true,
-					   du->cursor_x + hotspot_x,
-					   du->cursor_y + hotspot_y);
-	}
-
-out:
-	return ret;
-}
-
-
-int vmw_du_cursor_plane_disable(struct drm_plane *plane)
-{
-	if (plane->fb) {
-		drm_framebuffer_unreference(plane->fb);
-		plane->fb = NULL;
-	}
-
-	return -EINVAL;
-}
-
-
 void vmw_du_cursor_plane_destroy(struct drm_plane *plane)
 {
 	vmw_cursor_update_position(plane->dev->dev_private, false, 0, 0);
@@ -473,18 +371,6 @@ vmw_du_cursor_plane_prepare_fb(struct drm_plane *plane,
 
 
 void
-vmw_du_cursor_plane_atomic_disable(struct drm_plane *plane,
-				   struct drm_plane_state *old_state)
-{
-	struct drm_crtc *crtc = plane->state->crtc ?: old_state->crtc;
-	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
-
-	drm_atomic_set_fb_for_plane(plane->state, NULL);
-	vmw_cursor_update_position(dev_priv, false, 0, 0);
-}
-
-
-void
 vmw_du_cursor_plane_atomic_update(struct drm_plane *plane,
 				  struct drm_plane_state *old_state)
 {
@@ -1498,6 +1384,7 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv,
 	 */
 	if (vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)  &&
 	    dmabuf && only_2d &&
+	    mode_cmd->width > 64 &&  /* Don't create a proxy for cursor */
 	    dev_priv->active_display_unit == vmw_du_screen_target) {
 		ret = vmw_create_dmabuf_proxy(dev_priv->dev, mode_cmd,
 					      dmabuf, &surface);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index 13f2f1d2818a..5f8d678ae675 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -256,10 +256,6 @@ int vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
 			   u16 *r, u16 *g, u16 *b,
 			   uint32_t size,
 			   struct drm_modeset_acquire_ctx *ctx);
-int vmw_du_crtc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv,
-			    uint32_t handle, uint32_t width, uint32_t height,
-			    int32_t hot_x, int32_t hot_y);
-int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y);
 int vmw_du_connector_set_property(struct drm_connector *connector,
 				  struct drm_property *property,
 				  uint64_t val);
@@ -339,15 +335,6 @@ void vmw_kms_create_implicit_placement_property(struct vmw_private *dev_priv,
 /* Universal Plane Helpers */
 void vmw_du_primary_plane_destroy(struct drm_plane *plane);
 void vmw_du_cursor_plane_destroy(struct drm_plane *plane);
-int vmw_du_cursor_plane_disable(struct drm_plane *plane);
-int vmw_du_cursor_plane_update(struct drm_plane *plane,
-			       struct drm_crtc *crtc,
-			       struct drm_framebuffer *fb,
-			       int crtc_x, int crtc_y,
-			       unsigned int crtc_w,
-			       unsigned int crtc_h,
-			       uint32_t src_x, uint32_t src_y,
-			       uint32_t src_w, uint32_t src_h);
 
 /* Atomic Helpers */
 int vmw_du_primary_plane_atomic_check(struct drm_plane *plane,
@@ -356,8 +343,6 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane,
 				     struct drm_plane_state *state);
 void vmw_du_cursor_plane_atomic_update(struct drm_plane *plane,
 				       struct drm_plane_state *old_state);
-void vmw_du_cursor_plane_atomic_disable(struct drm_plane *plane,
-					struct drm_plane_state *old_state);
 int vmw_du_cursor_plane_prepare_fb(struct drm_plane *plane,
 				   struct drm_plane_state *new_state);
 void vmw_du_plane_cleanup_fb(struct drm_plane *plane,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index bad31bdf09b6..50be1f034f9e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -56,6 +56,8 @@ enum stdu_content_type {
  * @right: Right side of bounding box.
  * @top: Top side of bounding box.
  * @bottom: Bottom side of bounding box.
+ * @fb_left: Left side of the framebuffer/content bounding box
+ * @fb_top: Top of the framebuffer/content bounding box
  * @buf: DMA buffer when DMA-ing between buffer and screen targets.
  * @sid: Surface ID when copying between surface and screen targets.
  */
@@ -63,6 +65,7 @@ struct vmw_stdu_dirty {
 	struct vmw_kms_dirty base;
 	SVGA3dTransferType  transfer;
 	s32 left, right, top, bottom;
+	s32 fb_left, fb_top;
 	u32 pitch;
 	union {
 		struct vmw_dma_buffer *buf;
@@ -647,7 +650,7 @@ static void vmw_stdu_dmabuf_fifo_commit(struct vmw_kms_dirty *dirty)
  *
  * @dirty: The closure structure.
  *
- * This function calculates the bounding box for all the incoming clips
+ * This function calculates the bounding box for all the incoming clips.
  */
 static void vmw_stdu_dmabuf_cpu_clip(struct vmw_kms_dirty *dirty)
 {
@@ -656,11 +659,19 @@ static void vmw_stdu_dmabuf_cpu_clip(struct vmw_kms_dirty *dirty)
 
 	dirty->num_hits = 1;
 
-	/* Calculate bounding box */
+	/* Calculate destination bounding box */
 	ddirty->left = min_t(s32, ddirty->left, dirty->unit_x1);
 	ddirty->top = min_t(s32, ddirty->top, dirty->unit_y1);
 	ddirty->right = max_t(s32, ddirty->right, dirty->unit_x2);
 	ddirty->bottom = max_t(s32, ddirty->bottom, dirty->unit_y2);
+
+	/*
+	 * Calculate content bounding box.  We only need the top-left
+	 * coordinate because width and height will be the same as the
+	 * destination bounding box above
+	 */
+	ddirty->fb_left = min_t(s32, ddirty->fb_left, dirty->fb_x);
+	ddirty->fb_top  = min_t(s32, ddirty->fb_top, dirty->fb_y);
 }
 
 
@@ -697,11 +708,11 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 	/* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
 	src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
 	src = ttm_kmap_obj_virtual(&stdu->host_map, &not_used);
-	src += dirty->unit_y1 * src_pitch + dirty->unit_x1 * stdu->cpp;
+	src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
 
 	dst_pitch = ddirty->pitch;
 	dst = ttm_kmap_obj_virtual(&stdu->guest_map, &not_used);
-	dst += dirty->fb_y * dst_pitch + dirty->fb_x * stdu->cpp;
+	dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
 
 
 	/* Figure out the real direction */
@@ -760,7 +771,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 	}
 
 out_cleanup:
-	ddirty->left = ddirty->top = S32_MAX;
+	ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
 	ddirty->right = ddirty->bottom = S32_MIN;
 }
 
@@ -812,6 +823,7 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
 		SVGA3D_READ_HOST_VRAM;
 	ddirty.left = ddirty.top = S32_MAX;
 	ddirty.right = ddirty.bottom = S32_MIN;
+	ddirty.fb_left = ddirty.fb_top = S32_MAX;
 	ddirty.pitch = vfb->base.pitches[0];
 	ddirty.buf = buf;
 	ddirty.base.fifo_commit = vmw_stdu_dmabuf_fifo_commit;
@@ -1355,6 +1367,11 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
 		DRM_ERROR("Failed to bind surface to STDU.\n");
 	else
 		crtc->primary->fb = plane->state->fb;
+
+	ret = vmw_stdu_update_st(dev_priv, stdu);
+
+	if (ret)
+		DRM_ERROR("Failed to update STDU.\n");
 }
 
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 7681341fe32b..6b70bd259953 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -1274,11 +1274,14 @@ int vmw_gb_surface_define_ioctl(struct drm_device *dev, void *data,
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
 	int ret;
 	uint32_t size;
-	uint32_t backup_handle;
+	uint32_t backup_handle = 0;
 
 	if (req->multisample_count != 0)
 		return -EINVAL;
 
+	if (req->mip_levels > DRM_VMW_MAX_MIP_LEVELS)
+		return -EINVAL;
+
 	if (unlikely(vmw_user_surface_size == 0))
 		vmw_user_surface_size = ttm_round_pot(sizeof(*user_srf)) +
 			128;
@@ -1314,12 +1317,16 @@ int vmw_gb_surface_define_ioctl(struct drm_device *dev, void *data,
 		ret = vmw_user_dmabuf_lookup(tfile, req->buffer_handle,
 					     &res->backup,
 					     &user_srf->backup_base);
-		if (ret == 0 && res->backup->base.num_pages * PAGE_SIZE <
-		    res->backup_size) {
-			DRM_ERROR("Surface backup buffer is too small.\n");
-			vmw_dmabuf_unreference(&res->backup);
-			ret = -EINVAL;
-			goto out_unlock;
+		if (ret == 0) {
+			if (res->backup->base.num_pages * PAGE_SIZE <
+			    res->backup_size) {
+				DRM_ERROR("Surface backup buffer is too small.\n");
+				vmw_dmabuf_unreference(&res->backup);
+				ret = -EINVAL;
+				goto out_unlock;
+			} else {
+				backup_handle = req->buffer_handle;
+			}
 		}
 	} else if (req->drm_surface_flags & drm_vmw_surface_flag_create_buffer)
 		ret = vmw_user_dmabuf_alloc(dev_priv, tfile,
@@ -1491,7 +1498,7 @@ int vmw_surface_gb_priv_define(struct drm_device *dev,
 				 dev_priv->stdu_max_height);
 
 		if (size.width > max_width || size.height > max_height) {
-			DRM_ERROR("%ux%u\n, exeeds max surface size %ux%u",
+			DRM_ERROR("%ux%u\n, exceeds max surface size %ux%u",
 				  size.width, size.height,
 				  max_width, max_height);
 			return -EINVAL;
diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig
index b2fd029d67b3..91916326957f 100644
--- a/drivers/gpu/host1x/Kconfig
+++ b/drivers/gpu/host1x/Kconfig
@@ -1,6 +1,7 @@
 config TEGRA_HOST1X
 	tristate "NVIDIA Tegra host1x driver"
 	depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
+	select IOMMU_IOVA if IOMMU_SUPPORT
 	help
 	  Driver for the NVIDIA Tegra host1x hardware.
 
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index f05ebb14fa63..ac65f52850a6 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -172,7 +172,7 @@ static int host1x_probe(struct platform_device *pdev)
 
 	host->rst = devm_reset_control_get(&pdev->dev, "host1x");
 	if (IS_ERR(host->rst)) {
-		err = PTR_ERR(host->clk);
+		err = PTR_ERR(host->rst);
 		dev_err(&pdev->dev, "failed to get reset: %d\n", err);
 		return err;
 	}
diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index 16d556816b5f..2fb5f432a54c 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -725,15 +725,16 @@ void ipu_set_ic_src_mux(struct ipu_soc *ipu, int csi_id, bool vdi)
 	spin_lock_irqsave(&ipu->lock, flags);
 
 	val = ipu_cm_read(ipu, IPU_CONF);
-	if (vdi) {
+	if (vdi)
 		val |= IPU_CONF_IC_INPUT;
-	} else {
+	else
 		val &= ~IPU_CONF_IC_INPUT;
-		if (csi_id == 1)
-			val |= IPU_CONF_CSI_SEL;
-		else
-			val &= ~IPU_CONF_CSI_SEL;
-	}
+
+	if (csi_id == 1)
+		val |= IPU_CONF_CSI_SEL;
+	else
+		val &= ~IPU_CONF_CSI_SEL;
+
 	ipu_cm_write(ipu, val, IPU_CONF);
 
 	spin_unlock_irqrestore(&ipu->lock, flags);
diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c
index c55563379e2e..c35f74c83065 100644
--- a/drivers/gpu/ipu-v3/ipu-pre.c
+++ b/drivers/gpu/ipu-v3/ipu-pre.c
@@ -131,8 +131,6 @@ int ipu_pre_get(struct ipu_pre *pre)
 	if (pre->in_use)
 		return -EBUSY;
 
-	clk_prepare_enable(pre->clk_axi);
-
 	/* first get the engine out of reset and remove clock gating */
 	writel(0, pre->regs + IPU_PRE_CTRL);
 
@@ -149,12 +147,7 @@ int ipu_pre_get(struct ipu_pre *pre)
 
 void ipu_pre_put(struct ipu_pre *pre)
 {
-	u32 val;
-
-	val = IPU_PRE_CTRL_SFTRST | IPU_PRE_CTRL_CLKGATE;
-	writel(val, pre->regs + IPU_PRE_CTRL);
-
-	clk_disable_unprepare(pre->clk_axi);
+	writel(IPU_PRE_CTRL_SFTRST, pre->regs + IPU_PRE_CTRL);
 
 	pre->in_use = false;
 }
@@ -249,6 +242,8 @@ static int ipu_pre_probe(struct platform_device *pdev)
 	if (!pre->buffer_virt)
 		return -ENOMEM;
 
+	clk_prepare_enable(pre->clk_axi);
+
 	pre->dev = dev;
 	platform_set_drvdata(pdev, pre);
 	mutex_lock(&ipu_pre_list_mutex);
@@ -268,6 +263,8 @@ static int ipu_pre_remove(struct platform_device *pdev)
 	available_pres--;
 	mutex_unlock(&ipu_pre_list_mutex);
 
+	clk_disable_unprepare(pre->clk_axi);
+
 	if (pre->buffer_virt)
 		gen_pool_free(pre->iram, (unsigned long)pre->buffer_virt,
 			      IPU_PRE_MAX_WIDTH * IPU_PRE_NUM_SCANLINES * 4);
diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index 92f1452dad57..76875f6299b8 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -417,7 +417,7 @@ int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible)
 {
 	struct vga_device *vgadev, *conflict;
 	unsigned long flags;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int rc = 0;
 
 	vga_check_first_use();
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index fe40e5e499dd..687705c50794 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -275,10 +275,12 @@ config HID_EMS_FF
 	 - Trio Linker Plus II
 
 config HID_ELECOM
-	tristate "ELECOM BM084 bluetooth mouse"
+	tristate "ELECOM HID devices"
 	depends on HID
 	---help---
-	Support for the ELECOM BM084 (bluetooth mouse).
+	Support for ELECOM devices:
+	  - BM084 Bluetooth Mouse
+	  - DEFT Trackball (Wired and wireless)
 
 config HID_ELO
 	tristate "ELO USB 4000/4500 touchscreen"
diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index 16df6cc90235..a6268f2f7408 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -69,6 +69,7 @@ MODULE_DESCRIPTION("Asus HID Keyboard and TouchPad");
 #define QUIRK_IS_MULTITOUCH		BIT(3)
 #define QUIRK_NO_CONSUMER_USAGES	BIT(4)
 #define QUIRK_USE_KBD_BACKLIGHT		BIT(5)
+#define QUIRK_T100_KEYBOARD		BIT(6)
 
 #define I2C_KEYBOARD_QUIRKS			(QUIRK_FIX_NOTEBOOK_REPORT | \
 						 QUIRK_NO_INIT_REPORTS | \
@@ -536,6 +537,8 @@ static void asus_remove(struct hid_device *hdev)
 		drvdata->kbd_backlight->removed = true;
 		cancel_work_sync(&drvdata->kbd_backlight->work);
 	}
+
+	hid_hw_stop(hdev);
 }
 
 static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc,
@@ -548,6 +551,12 @@ static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		hid_info(hdev, "Fixing up Asus notebook report descriptor\n");
 		rdesc[55] = 0xdd;
 	}
+	if (drvdata->quirks & QUIRK_T100_KEYBOARD &&
+		 *rsize == 76 && rdesc[73] == 0x81 && rdesc[74] == 0x01) {
+		hid_info(hdev, "Fixing up Asus T100 keyb report descriptor\n");
+		rdesc[74] &= ~HID_MAIN_ITEM_CONSTANT;
+	}
+
 	return rdesc;
 }
 
@@ -560,6 +569,9 @@ static const struct hid_device_id asus_devices[] = {
 		USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
 		USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2), QUIRK_USE_KBD_BACKLIGHT },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
+		USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD),
+	  QUIRK_T100_KEYBOARD | QUIRK_NO_CONSUMER_USAGES },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, asus_devices);
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 37084b645785..6e040692f1d8 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -826,11 +826,35 @@ static int hid_scan_report(struct hid_device *hid)
 				 * hid-rmi should take care of them,
 				 * not hid-generic
 				 */
-				if (IS_ENABLED(CONFIG_HID_RMI))
-					hid->group = HID_GROUP_RMI;
+				hid->group = HID_GROUP_RMI;
 		break;
 	}
 
+	/* fall back to generic driver in case specific driver doesn't exist */
+	switch (hid->group) {
+	case HID_GROUP_MULTITOUCH_WIN_8:
+		/* fall-through */
+	case HID_GROUP_MULTITOUCH:
+		if (!IS_ENABLED(CONFIG_HID_MULTITOUCH))
+			hid->group = HID_GROUP_GENERIC;
+		break;
+	case HID_GROUP_SENSOR_HUB:
+		if (!IS_ENABLED(CONFIG_HID_SENSOR_HUB))
+			hid->group = HID_GROUP_GENERIC;
+		break;
+	case HID_GROUP_RMI:
+		if (!IS_ENABLED(CONFIG_HID_RMI))
+			hid->group = HID_GROUP_GENERIC;
+		break;
+	case HID_GROUP_WACOM:
+		if (!IS_ENABLED(CONFIG_HID_WACOM))
+			hid->group = HID_GROUP_GENERIC;
+		break;
+	case HID_GROUP_LOGITECH_DJ_DEVICE:
+		if (!IS_ENABLED(CONFIG_HID_LOGITECH_DJ))
+			hid->group = HID_GROUP_GENERIC;
+		break;
+	}
 	vfree(parser);
 	return 0;
 }
@@ -1763,15 +1787,23 @@ EXPORT_SYMBOL_GPL(hid_disconnect);
  * used as a driver. See hid_scan_report().
  */
 static const struct hid_device_id hid_have_special_driver[] = {
+#if IS_ENABLED(CONFIG_HID_A4TECH)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_X5_005D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_RP_649) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ACCUTOUCH)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_ACCUTOUCH_2216) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ACRUX)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ACRUX, 0x0802) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ACRUX, 0xf705) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ALPS)
 	{ HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_DUAL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_APPLE)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MIGHTYMOUSE) },
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI) },
@@ -1792,11 +1824,6 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL2) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL3) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS) },
@@ -1851,59 +1878,100 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_ANSI) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
+#endif
+#if IS_ENABLED(CONFIG_HID_APPLEIR)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL2) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL3) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ASUS)
 	{ HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD) },
 	{ HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_AUREAL)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_AUREAL, USB_DEVICE_ID_AUREAL_W01RN) },
+#endif
+#if IS_ENABLED(CONFIG_HID_BELKIN)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_BELKIN, USB_DEVICE_ID_FLIP_KVM) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_BETOP_FF)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185BFM, 0x2208) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185PC, 0x5506) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185V2PC, 0x1850) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185V2BFM, 0x5500) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE_2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CHERRY)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CHICONY)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CMEDIA)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM6533) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CORSAIR)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CP2112)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CYPRESS)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_3) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_4) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DELCOM, USB_DEVICE_ID_DELCOM_VISUAL_IND) },
+#endif
+#if IS_ENABLED(CONFIG_HID_DRAGONRISE)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0006) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0011) },
-#if IS_ENABLED(CONFIG_HID_MAYFLASH)
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE1) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE2) },
 #endif
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_WN) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_FA) },
+#if IS_ENABLED(CONFIG_HID_ELECOM)
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRED) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRELESS) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ELO)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0009) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0030) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_ACCUTOUCH_2216) },
+#endif
+#if IS_ENABLED(CONFIG_HID_EMS_FF)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_EMS, USB_DEVICE_ID_EMS_TRIO_LINKER_PLUS_II) },
+#endif
+#if IS_ENABLED(CONFIG_HID_EZKEY)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_EZKEY, USB_DEVICE_ID_BTC_8193) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GEMBIRD)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GEMBIRD, USB_DEVICE_ID_GEMBIRD_JPD_DUALFORCE2) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0003) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GFRM)
+        { HID_BLUETOOTH_DEVICE(0x58, 0x2000) },
+        { HID_BLUETOOTH_DEVICE(0x471, 0x2210) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GREENASIA)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0012) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GT683R)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GYRATION)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE_2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE_3) },
+#endif
+#if IS_ENABLED(CONFIG_HID_HOLTEK)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK, USB_DEVICE_ID_HOLTEK_ON_LINE_GRIP) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A04A) },
@@ -1912,12 +1980,17 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A072) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ICADE)
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_KENSINGTON)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_KEYTOUCH)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KEYTOUCH, USB_DEVICE_ID_KEYTOUCH_IEC) },
+#endif
+#if IS_ENABLED(CONFIG_HID_KYE)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_MANTICORE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_GX_IMPERATOR) },
@@ -1927,21 +2000,29 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LCPOWER)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LCPOWER, USB_DEVICE_ID_LCPOWER_LC1000 ) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LED)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DELCOM, USB_DEVICE_ID_DELCOM_VISUAL_IND) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_WN) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_FA) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_LUXAFOR) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_RISO_KAGAKU, USB_DEVICE_ID_RI_KA_WEBMAIL) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_THINGM, USB_DEVICE_ID_BLINK1) },
+#endif
 #if IS_ENABLED(CONFIG_HID_LENOVO)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPKBD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CUSBKBD) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CBTKBD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPPRODOCK) },
 #endif
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LG, USB_DEVICE_ID_LG_MELFAS_MT) },
+#if IS_ENABLED(CONFIG_HID_LOGITECH)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER_2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RECEIVER) },
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) },
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_T651) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_DESKTOP) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_EDGE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_MINI) },
@@ -1954,7 +2035,6 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2_2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G29_WHEEL) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G920_WHEEL) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_F3D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FFG ) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_FORCE3D_PRO) },
@@ -1966,17 +2046,30 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DFGT_WHEEL) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G25_WHEEL) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G27_WHEEL) },
-#if IS_ENABLED(CONFIG_HID_LOGITECH_DJ)
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER_2) },
-#endif
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WII_WHEEL) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACETRAVELLER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACENAVIGATOR) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD_BOOTLOADER) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_LUXAFOR) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LOGITECH_HIDPP)
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_T651) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G920_WHEEL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LOGITECH_DJ)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER_2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MAGICMOUSE)
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MAYFLASH)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE1) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MICROSOFT)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_MOUSE_4500) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_KEYBOARD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_SIDEWINDER_GV) },
@@ -1992,9 +2085,22 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_600) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER) },
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MONTEREY)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MONTEREY, USB_DEVICE_ID_GENIUS_KB29E) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MULTITOUCH)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LG, USB_DEVICE_ID_LG_MELFAS_MT) },
+#endif
+#if IS_ENABLED(CONFIG_HID_WIIMOTE)
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE) },
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_NTI)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTI, USB_DEVICE_ID_USB_SUN) },
+#endif
+#if IS_ENABLED(CONFIG_HID_NTRIG)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_2) },
@@ -2014,13 +2120,41 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_16) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_17) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_18) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ORTEK)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_PKB1700) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_WKB2000) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_SKYCABLE, USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PANTHERLORD)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0003) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PENMOUNT)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PENMOUNT, USB_DEVICE_ID_PENMOUNT_6000) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PETALYNX)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PETALYNX, USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PICOLCD)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD_BOOTLOADER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PLANTRONICS)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS, HID_ANY_ID) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PRIMAX)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_KEYBOARD) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_RISO_KAGAKU, USB_DEVICE_ID_RI_KA_WEBMAIL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PRODIKEYS)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
+#endif
+#if IS_ENABLED(CONFIG_HID_RMI)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_COVER) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_RAZER, USB_DEVICE_ID_RAZER_BLADE_14) },
+#endif
 #if IS_ENABLED(CONFIG_HID_ROCCAT)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ARVO) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ISKU) },
@@ -2048,9 +2182,21 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT5) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT9) },
 #endif
+#if IS_ENABLED(CONFIG_HID_SAMSUNG)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_SKYCABLE, USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SMARTJOYPLUS)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_3_PRO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_DUAL_BOX_PRO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_5_PRO) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SONY)
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SMK, USB_DEVICE_ID_SMK_PS3_BDREMOTE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_BUZZ_CONTROLLER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_WIRELESS_BUZZ_CONTROLLER) },
@@ -2069,9 +2215,17 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGP_MOUSE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SINO_LITE, USB_DEVICE_ID_SINO_LITE_CONTROLLER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SPEEDLINK)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_STEELSERIES)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_STEELSERIES, USB_DEVICE_ID_STEELSERIES_SRWS1) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SUNPLUS)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SUNPLUS, USB_DEVICE_ID_SUNPLUS_WDESKTOP) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_THINGM, USB_DEVICE_ID_BLINK1) },
+#endif
+#if IS_ENABLED(CONFIG_HID_THRUSTMASTER)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb300) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb323) },
@@ -2080,12 +2234,25 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb653) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb654) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb65a) },
+#endif
+#if IS_ENABLED(CONFIG_HID_TIVO)
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_BT) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_PRO) },
+#endif
+#if IS_ENABLED(CONFIG_HID_TOPSEED)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE_2) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED2, USB_DEVICE_ID_TOPSEED2_RF_COMBO) },
+#endif
+#if IS_ENABLED(CONFIG_HID_TWINHAN)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TWINHAN, USB_DEVICE_ID_TWINHAN_IR_REMOTE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_UCLOGIC)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_HUION_TABLET) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_PF1209) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U) },
@@ -2093,20 +2260,17 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP1062) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_WIRELESS_TABLET_TWHL850) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_THQ, USB_DEVICE_ID_THQ_PS3_UDRAW) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_YIYNOVA_TABLET) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_81) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_45) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_EX07S) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_3_PRO) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_DUAL_BOX_PRO) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_5_PRO) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) },
+#endif
+#if IS_ENABLED(CONFIG_HID_UDRAW_PS3)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_THQ, USB_DEVICE_ID_THQ_PS3_UDRAW) },
+#endif
+#if IS_ENABLED(CONFIG_HID_WALTOP)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_5_8_INCH) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_12_1_INCH) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_Q_PAD) },
@@ -2114,19 +2278,18 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_XINMO)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ZEROPLUS)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ZYDACRON)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) },
-
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT) },
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE) },
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE2) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_RAZER, USB_DEVICE_ID_RAZER_BLADE_14) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM6533) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_COVER) },
+#endif
 	{ }
 };
 
diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c
index 6e3848a8d8dd..e2c7465df69f 100644
--- a/drivers/hid/hid-elecom.c
+++ b/drivers/hid/hid-elecom.c
@@ -1,10 +1,8 @@
 /*
- *  HID driver for Elecom BM084 (bluetooth mouse).
- *  Removes a non-existing horizontal wheel from
- *  the HID descriptor.
- *  (This module is based on "hid-ortek".)
- *
+ *  HID driver for ELECOM devices.
  *  Copyright (c) 2010 Richard Nauber <Richard.Nauber@gmail.com>
+ *  Copyright (c) 2016 Yuxuan Shui <yshuiv7@gmail.com>
+ *  Copyright (c) 2017 Diego Elio Pettenò <flameeyes@flameeyes.eu>
  */
 
 /*
@@ -23,15 +21,61 @@
 static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		unsigned int *rsize)
 {
-	if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) {
-		hid_info(hdev, "Fixing up Elecom BM084 report descriptor\n");
-		rdesc[47] = 0x00;
+	switch (hdev->product) {
+	case USB_DEVICE_ID_ELECOM_BM084:
+		/* The BM084 Bluetooth mouse includes a non-existing horizontal
+		 * wheel in the HID descriptor. */
+		if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) {
+			hid_info(hdev, "Fixing up Elecom BM084 report descriptor\n");
+			rdesc[47] = 0x00;
+		}
+		break;
+	case USB_DEVICE_ID_ELECOM_DEFT_WIRED:
+	case USB_DEVICE_ID_ELECOM_DEFT_WIRELESS:
+		/* The DEFT trackball has eight buttons, but its descriptor only
+		 * reports five, disabling the three Fn buttons on the top of
+		 * the mouse.
+		 *
+		 * Apply the following diff to the descriptor:
+		 *
+		 * Collection (Physical),              Collection (Physical),
+		 *     Report ID (1),                      Report ID (1),
+		 *     Report Count (5),           ->      Report Count (8),
+		 *     Report Size (1),                    Report Size (1),
+		 *     Usage Page (Button),                Usage Page (Button),
+		 *     Usage Minimum (01h),                Usage Minimum (01h),
+		 *     Usage Maximum (05h),        ->      Usage Maximum (08h),
+		 *     Logical Minimum (0),                Logical Minimum (0),
+		 *     Logical Maximum (1),                Logical Maximum (1),
+		 *     Input (Variable),                   Input (Variable),
+		 *     Report Count (1),           ->      Report Count (0),
+		 *     Report Size (3),                    Report Size (3),
+		 *     Input (Constant),                   Input (Constant),
+		 *     Report Size (16),                   Report Size (16),
+		 *     Report Count (2),                   Report Count (2),
+		 *     Usage Page (Desktop),               Usage Page (Desktop),
+		 *     Usage (X),                          Usage (X),
+		 *     Usage (Y),                          Usage (Y),
+		 *     Logical Minimum (-32768),           Logical Minimum (-32768),
+		 *     Logical Maximum (32767),            Logical Maximum (32767),
+		 *     Input (Variable, Relative),         Input (Variable, Relative),
+		 * End Collection,                     End Collection,
+		 */
+		if (*rsize == 213 && rdesc[13] == 5 && rdesc[21] == 5) {
+			hid_info(hdev, "Fixing up Elecom DEFT Fn buttons\n");
+			rdesc[13] = 8; /* Button/Variable Report Count */
+			rdesc[21] = 8; /* Button/Variable Usage Maximum */
+			rdesc[29] = 0; /* Button/Constant Report Count */
+		}
+		break;
 	}
 	return rdesc;
 }
 
 static const struct hid_device_id elecom_devices[] = {
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084)},
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRED) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRELESS) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, elecom_devices);
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 643390ba749d..4f9a3938189a 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -173,6 +173,7 @@
 #define USB_VENDOR_ID_ASUSTEK		0x0b05
 #define USB_DEVICE_ID_ASUSTEK_LCM	0x1726
 #define USB_DEVICE_ID_ASUSTEK_LCM2	0x175b
+#define USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD	0x17e0
 #define USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD	0x8585
 #define USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD	0x0101
 #define USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1 0x1854
@@ -318,6 +319,9 @@
 #define USB_VENDOR_ID_DELCOM		0x0fc5
 #define USB_DEVICE_ID_DELCOM_VISUAL_IND	0xb080
 
+#define USB_VENDOR_ID_DELL				0x413c
+#define USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE	0x301a
+
 #define USB_VENDOR_ID_DELORME		0x1163
 #define USB_DEVICE_ID_DELORME_EARTHMATE	0x0100
 #define USB_DEVICE_ID_DELORME_EM_LT20	0x0200
@@ -358,6 +362,8 @@
 
 #define USB_VENDOR_ID_ELECOM		0x056e
 #define USB_DEVICE_ID_ELECOM_BM084	0x0061
+#define USB_DEVICE_ID_ELECOM_DEFT_WIRED	0x00fe
+#define USB_DEVICE_ID_ELECOM_DEFT_WIRELESS	0x00ff
 
 #define USB_VENDOR_ID_DREAM_CHEEKY	0x1d34
 #define USB_DEVICE_ID_DREAM_CHEEKY_WN	0x0004
diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index 8daa8ce64ebb..04015032a35a 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -872,10 +872,9 @@ static int i2c_hid_fetch_hid_descriptor(struct i2c_hid *ihid)
 static int i2c_hid_acpi_pdata(struct i2c_client *client,
 		struct i2c_hid_platform_data *pdata)
 {
-	static u8 i2c_hid_guid[] = {
-		0xF7, 0xF6, 0xDF, 0x3C, 0x67, 0x42, 0x55, 0x45,
-		0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE,
-	};
+	static guid_t i2c_hid_guid =
+		GUID_INIT(0x3CDFF6F7, 0x4267, 0x4555,
+			  0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE);
 	union acpi_object *obj;
 	struct acpi_device *adev;
 	acpi_handle handle;
@@ -884,7 +883,7 @@ static int i2c_hid_acpi_pdata(struct i2c_client *client,
 	if (!handle || acpi_bus_get_device(handle, &adev))
 		return -ENODEV;
 
-	obj = acpi_evaluate_dsm_typed(handle, i2c_hid_guid, 1, 1, NULL,
+	obj = acpi_evaluate_dsm_typed(handle, &i2c_hid_guid, 1, 1, NULL,
 				      ACPI_TYPE_INTEGER);
 	if (!obj) {
 		dev_err(&client->dev, "device _DSM execution failed\n");
@@ -897,6 +896,15 @@ static int i2c_hid_acpi_pdata(struct i2c_client *client,
 	return 0;
 }
 
+static void i2c_hid_acpi_fix_up_power(struct device *dev)
+{
+	acpi_handle handle = ACPI_HANDLE(dev);
+	struct acpi_device *adev;
+
+	if (handle && acpi_bus_get_device(handle, &adev) == 0)
+		acpi_device_fix_up_power(adev);
+}
+
 static const struct acpi_device_id i2c_hid_acpi_match[] = {
 	{"ACPI0C50", 0 },
 	{"PNP0C50", 0 },
@@ -909,6 +917,8 @@ static inline int i2c_hid_acpi_pdata(struct i2c_client *client,
 {
 	return -ENODEV;
 }
+
+static inline void i2c_hid_acpi_fix_up_power(struct device *dev) {}
 #endif
 
 #ifdef CONFIG_OF
@@ -1030,6 +1040,8 @@ static int i2c_hid_probe(struct i2c_client *client,
 	if (ret < 0)
 		goto err_regulator;
 
+	i2c_hid_acpi_fix_up_power(&client->dev);
+
 	pm_runtime_get_noresume(&client->dev);
 	pm_runtime_set_active(&client->dev);
 	pm_runtime_enable(&client->dev);
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 6316498b7812..a88e7c7bea0a 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -85,6 +85,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET },
+	{ USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3, HID_QUIRK_MULTI_INPUT },
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 4b225fb19a16..e274c9dc32f3 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -1571,37 +1571,38 @@ static int wacom_tpc_irq(struct wacom_wac *wacom, size_t len)
 {
 	unsigned char *data = wacom->data;
 
-	if (wacom->pen_input)
+	if (wacom->pen_input) {
 		dev_dbg(wacom->pen_input->dev.parent,
 			"%s: received report #%d\n", __func__, data[0]);
-	else if (wacom->touch_input)
+
+		if (len == WACOM_PKGLEN_PENABLED ||
+		    data[0] == WACOM_REPORT_PENABLED)
+			return wacom_tpc_pen(wacom);
+	}
+	else if (wacom->touch_input) {
 		dev_dbg(wacom->touch_input->dev.parent,
 			"%s: received report #%d\n", __func__, data[0]);
 
-	switch (len) {
-	case WACOM_PKGLEN_TPC1FG:
-		return wacom_tpc_single_touch(wacom, len);
+		switch (len) {
+		case WACOM_PKGLEN_TPC1FG:
+			return wacom_tpc_single_touch(wacom, len);
 
-	case WACOM_PKGLEN_TPC2FG:
-		return wacom_tpc_mt_touch(wacom);
+		case WACOM_PKGLEN_TPC2FG:
+			return wacom_tpc_mt_touch(wacom);
 
-	case WACOM_PKGLEN_PENABLED:
-		return wacom_tpc_pen(wacom);
+		default:
+			switch (data[0]) {
+			case WACOM_REPORT_TPC1FG:
+			case WACOM_REPORT_TPCHID:
+			case WACOM_REPORT_TPCST:
+			case WACOM_REPORT_TPC1FGE:
+				return wacom_tpc_single_touch(wacom, len);
 
-	default:
-		switch (data[0]) {
-		case WACOM_REPORT_TPC1FG:
-		case WACOM_REPORT_TPCHID:
-		case WACOM_REPORT_TPCST:
-		case WACOM_REPORT_TPC1FGE:
-			return wacom_tpc_single_touch(wacom, len);
-
-		case WACOM_REPORT_TPCMT:
-		case WACOM_REPORT_TPCMT2:
-			return wacom_mt_touch(wacom);
+			case WACOM_REPORT_TPCMT:
+			case WACOM_REPORT_TPCMT2:
+				return wacom_mt_touch(wacom);
 
-		case WACOM_REPORT_PENABLED:
-			return wacom_tpc_pen(wacom);
+			}
 		}
 	}
 
diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c
index 26b05106f0d3..93d28c0ec8bf 100644
--- a/drivers/hsi/clients/ssi_protocol.c
+++ b/drivers/hsi/clients/ssi_protocol.c
@@ -1066,7 +1066,7 @@ static void ssip_pn_setup(struct net_device *dev)
 	dev->addr_len		= 1;
 	dev->tx_queue_len	= SSIP_TXQUEUE_LEN;
 
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->header_ops		= &phonet_header_ops;
 }
 
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 22d5eafd6815..5ef2814345ef 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -343,6 +343,7 @@ config SENSORS_ASB100
 
 config SENSORS_ASPEED
 	tristate "ASPEED AST2400/AST2500 PWM and Fan tach driver"
+	select REGMAP
 	help
 	  This driver provides support for ASPEED AST2400/AST2500 PWM
 	  and Fan Tacho controllers.
diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c
index 48403a2115be..9de13d626c68 100644
--- a/drivers/hwmon/aspeed-pwm-tacho.c
+++ b/drivers/hwmon/aspeed-pwm-tacho.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/errno.h>
 #include <linux/gpio/consumer.h>
 #include <linux/delay.h>
 #include <linux/hwmon.h>
@@ -494,7 +495,7 @@ static u32 aspeed_get_fan_tach_ch_measure_period(struct aspeed_pwm_tacho_data
 	return clk / (clk_unit * div_h * div_l * tacho_div * tacho_unit);
 }
 
-static u32 aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv,
+static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv,
 				      u8 fan_tach_ch)
 {
 	u32 raw_data, tach_div, clk_source, sec, val;
@@ -510,6 +511,9 @@ static u32 aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv,
 	msleep(sec);
 
 	regmap_read(priv->regmap, ASPEED_PTCR_RESULT, &val);
+	if (!(val & RESULT_STATUS_MASK))
+		return -ETIMEDOUT;
+
 	raw_data = val & RESULT_VALUE_MASK;
 	tach_div = priv->type_fan_tach_clock_division[type];
 	tach_div = 0x4 << (tach_div * 2);
@@ -561,12 +565,14 @@ static ssize_t show_rpm(struct device *dev, struct device_attribute *attr,
 {
 	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
 	int index = sensor_attr->index;
-	u32 rpm;
+	int rpm;
 	struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev);
 
 	rpm = aspeed_get_fan_tach_ch_rpm(priv, index);
+	if (rpm < 0)
+		return rpm;
 
-	return sprintf(buf, "%u\n", rpm);
+	return sprintf(buf, "%d\n", rpm);
 }
 
 static umode_t pwm_is_visible(struct kobject *kobj,
@@ -591,24 +597,23 @@ static umode_t fan_dev_is_visible(struct kobject *kobj,
 	return a->mode;
 }
 
-static SENSOR_DEVICE_ATTR(pwm0, 0644,
-			show_pwm, set_pwm, 0);
 static SENSOR_DEVICE_ATTR(pwm1, 0644,
-			show_pwm, set_pwm, 1);
+			show_pwm, set_pwm, 0);
 static SENSOR_DEVICE_ATTR(pwm2, 0644,
-			show_pwm, set_pwm, 2);
+			show_pwm, set_pwm, 1);
 static SENSOR_DEVICE_ATTR(pwm3, 0644,
-			show_pwm, set_pwm, 3);
+			show_pwm, set_pwm, 2);
 static SENSOR_DEVICE_ATTR(pwm4, 0644,
-			show_pwm, set_pwm, 4);
+			show_pwm, set_pwm, 3);
 static SENSOR_DEVICE_ATTR(pwm5, 0644,
-			show_pwm, set_pwm, 5);
+			show_pwm, set_pwm, 4);
 static SENSOR_DEVICE_ATTR(pwm6, 0644,
-			show_pwm, set_pwm, 6);
+			show_pwm, set_pwm, 5);
 static SENSOR_DEVICE_ATTR(pwm7, 0644,
+			show_pwm, set_pwm, 6);
+static SENSOR_DEVICE_ATTR(pwm8, 0644,
 			show_pwm, set_pwm, 7);
 static struct attribute *pwm_dev_attrs[] = {
-	&sensor_dev_attr_pwm0.dev_attr.attr,
 	&sensor_dev_attr_pwm1.dev_attr.attr,
 	&sensor_dev_attr_pwm2.dev_attr.attr,
 	&sensor_dev_attr_pwm3.dev_attr.attr,
@@ -616,6 +621,7 @@ static struct attribute *pwm_dev_attrs[] = {
 	&sensor_dev_attr_pwm5.dev_attr.attr,
 	&sensor_dev_attr_pwm6.dev_attr.attr,
 	&sensor_dev_attr_pwm7.dev_attr.attr,
+	&sensor_dev_attr_pwm8.dev_attr.attr,
 	NULL,
 };
 
@@ -624,40 +630,39 @@ static const struct attribute_group pwm_dev_group = {
 	.is_visible = pwm_is_visible,
 };
 
-static SENSOR_DEVICE_ATTR(fan0_input, 0444,
-		show_rpm, NULL, 0);
 static SENSOR_DEVICE_ATTR(fan1_input, 0444,
-		show_rpm, NULL, 1);
+		show_rpm, NULL, 0);
 static SENSOR_DEVICE_ATTR(fan2_input, 0444,
-		show_rpm, NULL, 2);
+		show_rpm, NULL, 1);
 static SENSOR_DEVICE_ATTR(fan3_input, 0444,
-		show_rpm, NULL, 3);
+		show_rpm, NULL, 2);
 static SENSOR_DEVICE_ATTR(fan4_input, 0444,
-		show_rpm, NULL, 4);
+		show_rpm, NULL, 3);
 static SENSOR_DEVICE_ATTR(fan5_input, 0444,
-		show_rpm, NULL, 5);
+		show_rpm, NULL, 4);
 static SENSOR_DEVICE_ATTR(fan6_input, 0444,
-		show_rpm, NULL, 6);
+		show_rpm, NULL, 5);
 static SENSOR_DEVICE_ATTR(fan7_input, 0444,
-		show_rpm, NULL, 7);
+		show_rpm, NULL, 6);
 static SENSOR_DEVICE_ATTR(fan8_input, 0444,
-		show_rpm, NULL, 8);
+		show_rpm, NULL, 7);
 static SENSOR_DEVICE_ATTR(fan9_input, 0444,
-		show_rpm, NULL, 9);
+		show_rpm, NULL, 8);
 static SENSOR_DEVICE_ATTR(fan10_input, 0444,
-		show_rpm, NULL, 10);
+		show_rpm, NULL, 9);
 static SENSOR_DEVICE_ATTR(fan11_input, 0444,
-		show_rpm, NULL, 11);
+		show_rpm, NULL, 10);
 static SENSOR_DEVICE_ATTR(fan12_input, 0444,
-		show_rpm, NULL, 12);
+		show_rpm, NULL, 11);
 static SENSOR_DEVICE_ATTR(fan13_input, 0444,
-		show_rpm, NULL, 13);
+		show_rpm, NULL, 12);
 static SENSOR_DEVICE_ATTR(fan14_input, 0444,
-		show_rpm, NULL, 14);
+		show_rpm, NULL, 13);
 static SENSOR_DEVICE_ATTR(fan15_input, 0444,
+		show_rpm, NULL, 14);
+static SENSOR_DEVICE_ATTR(fan16_input, 0444,
 		show_rpm, NULL, 15);
 static struct attribute *fan_dev_attrs[] = {
-	&sensor_dev_attr_fan0_input.dev_attr.attr,
 	&sensor_dev_attr_fan1_input.dev_attr.attr,
 	&sensor_dev_attr_fan2_input.dev_attr.attr,
 	&sensor_dev_attr_fan3_input.dev_attr.attr,
@@ -673,6 +678,7 @@ static struct attribute *fan_dev_attrs[] = {
 	&sensor_dev_attr_fan13_input.dev_attr.attr,
 	&sensor_dev_attr_fan14_input.dev_attr.attr,
 	&sensor_dev_attr_fan15_input.dev_attr.attr,
+	&sensor_dev_attr_fan16_input.dev_attr.attr,
 	NULL
 };
 
@@ -802,7 +808,6 @@ static int aspeed_pwm_tacho_probe(struct platform_device *pdev)
 		if (ret)
 			return ret;
 	}
-	of_node_put(np);
 
 	priv->groups[0] = &pwm_dev_group;
 	priv->groups[1] = &fan_dev_group;
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 3ac4c03ba77b..c13a4fd86b3c 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -605,6 +605,13 @@ static int coretemp_cpu_online(unsigned int cpu)
 	struct platform_data *pdata;
 
 	/*
+	 * Don't execute this on resume as the offline callback did
+	 * not get executed on suspend.
+	 */
+	if (cpuhp_tasks_frozen)
+		return 0;
+
+	/*
 	 * CPUID.06H.EAX[0] indicates whether the CPU has thermal
 	 * sensors. We check this bit only, all the early CPUs
 	 * without thermal sensors will be filtered out.
@@ -654,6 +661,13 @@ static int coretemp_cpu_offline(unsigned int cpu)
 	struct temp_data *tdata;
 	int indx, target;
 
+	/*
+	 * Don't execute this on suspend as the device remove locks
+	 * up the machine.
+	 */
+	if (cpuhp_tasks_frozen)
+		return 0;
+
 	/* If the physical CPU device does not exist, just return */
 	if (!pdev)
 		return 0;
diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c
index 45c5c4883022..6e6bf46bcb52 100644
--- a/drivers/i2c/busses/i2c-ali15x3.c
+++ b/drivers/i2c/busses/i2c-ali15x3.c
@@ -119,7 +119,7 @@
 /* If force_addr is set to anything different from 0, we forcibly enable
    the device at the given address. */
 static u16 force_addr;
-module_param(force_addr, ushort, 0);
+module_param_hw(force_addr, ushort, ioport, 0);
 MODULE_PARM_DESC(force_addr,
 		 "Initialize the base address of the i2c controller");
 
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 5a4eb6b6bd92..d1263b82d646 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -94,6 +94,7 @@ static void dw_i2c_acpi_params(struct platform_device *pdev, char method[],
 static int dw_i2c_acpi_configure(struct platform_device *pdev)
 {
 	struct dw_i2c_dev *dev = platform_get_drvdata(pdev);
+	u32 ss_ht = 0, fp_ht = 0, hs_ht = 0, fs_ht = 0;
 	acpi_handle handle = ACPI_HANDLE(&pdev->dev);
 	const struct acpi_device_id *id;
 	struct acpi_device *adev;
@@ -107,23 +108,24 @@ static int dw_i2c_acpi_configure(struct platform_device *pdev)
 	 * Try to get SDA hold time and *CNT values from an ACPI method for
 	 * selected speed modes.
 	 */
+	dw_i2c_acpi_params(pdev, "SSCN", &dev->ss_hcnt, &dev->ss_lcnt, &ss_ht);
+	dw_i2c_acpi_params(pdev, "FPCN", &dev->fp_hcnt, &dev->fp_lcnt, &fp_ht);
+	dw_i2c_acpi_params(pdev, "HSCN", &dev->hs_hcnt, &dev->hs_lcnt, &hs_ht);
+	dw_i2c_acpi_params(pdev, "FMCN", &dev->fs_hcnt, &dev->fs_lcnt, &fs_ht);
+
 	switch (dev->clk_freq) {
 	case 100000:
-		dw_i2c_acpi_params(pdev, "SSCN", &dev->ss_hcnt, &dev->ss_lcnt,
-				   &dev->sda_hold_time);
+		dev->sda_hold_time = ss_ht;
 		break;
 	case 1000000:
-		dw_i2c_acpi_params(pdev, "FPCN", &dev->fp_hcnt, &dev->fp_lcnt,
-				   &dev->sda_hold_time);
+		dev->sda_hold_time = fp_ht;
 		break;
 	case 3400000:
-		dw_i2c_acpi_params(pdev, "HSCN", &dev->hs_hcnt, &dev->hs_lcnt,
-				   &dev->sda_hold_time);
+		dev->sda_hold_time = hs_ht;
 		break;
 	case 400000:
 	default:
-		dw_i2c_acpi_params(pdev, "FMCN", &dev->fs_hcnt, &dev->fs_lcnt,
-				   &dev->sda_hold_time);
+		dev->sda_hold_time = fs_ht;
 		break;
 	}
 
@@ -157,6 +159,8 @@ static const struct acpi_device_id dw_i2c_acpi_match[] = {
 	{ "AMDI0010", ACCESS_INTR_MASK },
 	{ "AMDI0510", 0 },
 	{ "APMC0D0F", 0 },
+	{ "HISI02A1", 0 },
+	{ "HISI02A2", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(acpi, dw_i2c_acpi_match);
diff --git a/drivers/i2c/busses/i2c-elektor.c b/drivers/i2c/busses/i2c-elektor.c
index 8af62fb3fe41..5416003e0605 100644
--- a/drivers/i2c/busses/i2c-elektor.c
+++ b/drivers/i2c/busses/i2c-elektor.c
@@ -323,9 +323,9 @@ MODULE_AUTHOR("Hans Berglund <hb@spacetec.no>");
 MODULE_DESCRIPTION("I2C-Bus adapter routines for PCF8584 ISA bus adapter");
 MODULE_LICENSE("GPL");
 
-module_param(base, int, 0);
-module_param(irq, int, 0);
+module_param_hw(base, int, ioport_or_iomem, 0);
+module_param_hw(irq, int, irq, 0);
 module_param(clock, int, 0);
 module_param(own, int, 0);
-module_param(mmapped, int, 0);
+module_param_hw(mmapped, int, other, 0);
 module_isa_driver(i2c_elektor_driver, 1);
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 95ed17183e73..54a47b40546f 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -734,9 +734,9 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx,
 		 * the first read operation, otherwise the first read cost
 		 * one extra clock cycle.
 		 */
-		temp = readb(i2c_imx->base + IMX_I2C_I2CR);
+		temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
 		temp |= I2CR_MTX;
-		writeb(temp, i2c_imx->base + IMX_I2C_I2CR);
+		imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
 	}
 	msgs->buf[msgs->len-1] = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
 
@@ -857,9 +857,9 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo
 				 * the first read operation, otherwise the first read cost
 				 * one extra clock cycle.
 				 */
-				temp = readb(i2c_imx->base + IMX_I2C_I2CR);
+				temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
 				temp |= I2CR_MTX;
-				writeb(temp, i2c_imx->base + IMX_I2C_I2CR);
+				imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
 			}
 		} else if (i == (msgs->len - 2)) {
 			dev_dbg(&i2c_imx->adapter.dev,
diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c
index f573448d2132..e98e44e584a4 100644
--- a/drivers/i2c/busses/i2c-ismt.c
+++ b/drivers/i2c/busses/i2c-ismt.c
@@ -584,7 +584,7 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
 
 	/* unmap the data buffer */
 	if (dma_size != 0)
-		dma_unmap_single(&adap->dev, dma_addr, dma_size, dma_direction);
+		dma_unmap_single(dev, dma_addr, dma_size, dma_direction);
 
 	if (unlikely(!time_left)) {
 		dev_err(dev, "completion wait timed out\n");
diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index cf737ec8563b..5c4db65c5019 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c
@@ -819,7 +819,6 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data,
 		rc = -EINVAL;
 		goto out;
 	}
-	drv_data->irq = irq_of_parse_and_map(np, 0);
 
 	drv_data->rstc = devm_reset_control_get_optional(dev, NULL);
 	if (IS_ERR(drv_data->rstc)) {
@@ -902,10 +901,11 @@ mv64xxx_i2c_probe(struct platform_device *pd)
 	if (!IS_ERR(drv_data->clk))
 		clk_prepare_enable(drv_data->clk);
 
+	drv_data->irq = platform_get_irq(pd, 0);
+
 	if (pdata) {
 		drv_data->freq_m = pdata->freq_m;
 		drv_data->freq_n = pdata->freq_n;
-		drv_data->irq = platform_get_irq(pd, 0);
 		drv_data->adapter.timeout = msecs_to_jiffies(pdata->timeout);
 		drv_data->offload_enabled = false;
 		memcpy(&drv_data->reg_offsets, &mv64xxx_i2c_regs_mv64xxx, sizeof(drv_data->reg_offsets));
@@ -915,7 +915,7 @@ mv64xxx_i2c_probe(struct platform_device *pd)
 			goto exit_clk;
 	}
 	if (drv_data->irq < 0) {
-		rc = -ENXIO;
+		rc = drv_data->irq;
 		goto exit_reset;
 	}
 
diff --git a/drivers/i2c/busses/i2c-parport-light.c b/drivers/i2c/busses/i2c-parport-light.c
index 1bcdd10b68b9..faa8fb8f2b8f 100644
--- a/drivers/i2c/busses/i2c-parport-light.c
+++ b/drivers/i2c/busses/i2c-parport-light.c
@@ -38,11 +38,11 @@
 static struct platform_device *pdev;
 
 static u16 base;
-module_param(base, ushort, 0);
+module_param_hw(base, ushort, ioport, 0);
 MODULE_PARM_DESC(base, "Base I/O address");
 
 static int irq;
-module_param(irq, int, 0);
+module_param_hw(irq, int, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ (optional)");
 
 /* ----- Low-level parallel port access ----------------------------------- */
diff --git a/drivers/i2c/busses/i2c-pca-isa.c b/drivers/i2c/busses/i2c-pca-isa.c
index ba88f17f636c..946ac646de2a 100644
--- a/drivers/i2c/busses/i2c-pca-isa.c
+++ b/drivers/i2c/busses/i2c-pca-isa.c
@@ -197,9 +197,9 @@ MODULE_AUTHOR("Ian Campbell <icampbell@arcom.com>");
 MODULE_DESCRIPTION("ISA base PCA9564/PCA9665 driver");
 MODULE_LICENSE("GPL");
 
-module_param(base, ulong, 0);
+module_param_hw(base, ulong, ioport, 0);
 MODULE_PARM_DESC(base, "I/O base address");
-module_param(irq, int, 0);
+module_param_hw(irq, int, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ");
 module_param(clock, int, 0);
 MODULE_PARM_DESC(clock, "Clock rate in hertz.\n\t\t"
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index c21ca7bf2efe..0ecdb47a23ab 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -106,7 +106,7 @@ MODULE_PARM_DESC(force, "Forcibly enable the PIIX4. DANGEROUS!");
 /* If force_addr is set to anything different from 0, we forcibly enable
    the PIIX4 at the given address. VERY DANGEROUS! */
 static int force_addr;
-module_param (force_addr, int, 0);
+module_param_hw(force_addr, int, ioport, 0);
 MODULE_PARM_DESC(force_addr,
 		 "Forcibly enable the PIIX4 at the given address. "
 		 "EXTREMELY DANGEROUS!");
diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 214bf2835d1f..8be3e6cb8fe6 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -319,7 +319,7 @@ static void rcar_i2c_dma_unmap(struct rcar_i2c_priv *priv)
 	rcar_i2c_write(priv, ICFBSCR, TCYC06);
 
 	dma_unmap_single(chan->device->dev, sg_dma_address(&priv->sg),
-			 priv->msg->len, priv->dma_direction);
+			 sg_dma_len(&priv->sg), priv->dma_direction);
 
 	priv->dma_direction = DMA_NONE;
 }
diff --git a/drivers/i2c/busses/i2c-sis5595.c b/drivers/i2c/busses/i2c-sis5595.c
index 7d58a40faf2d..d543a9867ba4 100644
--- a/drivers/i2c/busses/i2c-sis5595.c
+++ b/drivers/i2c/busses/i2c-sis5595.c
@@ -119,7 +119,7 @@ static int blacklist[] = {
 /* If force_addr is set to anything different from 0, we forcibly enable
    the device at the given address. */
 static u16 force_addr;
-module_param(force_addr, ushort, 0);
+module_param_hw(force_addr, ushort, ioport, 0);
 MODULE_PARM_DESC(force_addr, "Initialize the base address of the i2c controller");
 
 static struct pci_driver sis5595_driver;
diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c
index 0ed77eeff31e..a2e3dd715380 100644
--- a/drivers/i2c/busses/i2c-tiny-usb.c
+++ b/drivers/i2c/busses/i2c-tiny-usb.c
@@ -178,22 +178,39 @@ static int usb_read(struct i2c_adapter *adapter, int cmd,
 		    int value, int index, void *data, int len)
 {
 	struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data;
+	void *dmadata = kmalloc(len, GFP_KERNEL);
+	int ret;
+
+	if (!dmadata)
+		return -ENOMEM;
 
 	/* do control transfer */
-	return usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0),
+	ret = usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0),
 			       cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE |
-			       USB_DIR_IN, value, index, data, len, 2000);
+			       USB_DIR_IN, value, index, dmadata, len, 2000);
+
+	memcpy(data, dmadata, len);
+	kfree(dmadata);
+	return ret;
 }
 
 static int usb_write(struct i2c_adapter *adapter, int cmd,
 		     int value, int index, void *data, int len)
 {
 	struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data;
+	void *dmadata = kmemdup(data, len, GFP_KERNEL);
+	int ret;
+
+	if (!dmadata)
+		return -ENOMEM;
 
 	/* do control transfer */
-	return usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0),
+	ret = usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0),
 			       cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
-			       value, index, data, len, 2000);
+			       value, index, dmadata, len, 2000);
+
+	kfree(dmadata);
+	return ret;
 }
 
 static void i2c_tiny_usb_free(struct i2c_tiny_usb *dev)
diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c
index 0ee2646f3b00..0dc45e12bb1d 100644
--- a/drivers/i2c/busses/i2c-viapro.c
+++ b/drivers/i2c/busses/i2c-viapro.c
@@ -94,7 +94,7 @@ MODULE_PARM_DESC(force, "Forcibly enable the SMBus. DANGEROUS!");
 /* If force_addr is set to anything different from 0, we forcibly enable
    the VT596 at the given address. VERY DANGEROUS! */
 static u16 force_addr;
-module_param(force_addr, ushort, 0);
+module_param_hw(force_addr, ushort, ioport, 0);
 MODULE_PARM_DESC(force_addr,
 		 "Forcibly enable the SMBus at the given address. "
 		 "EXTREMELY DANGEROUS!");
diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c
index dbe7e44c9321..6ba6c83ca8f1 100644
--- a/drivers/i2c/busses/i2c-xgene-slimpro.c
+++ b/drivers/i2c/busses/i2c-xgene-slimpro.c
@@ -416,6 +416,7 @@ static int xgene_slimpro_i2c_probe(struct platform_device *pdev)
 	adapter->class = I2C_CLASS_HWMON;
 	adapter->dev.parent = &pdev->dev;
 	adapter->dev.of_node = pdev->dev.of_node;
+	ACPI_COMPANION_SET(&adapter->dev, ACPI_COMPANION(&pdev->dev));
 	i2c_set_adapdata(adapter, ctx);
 	rc = i2c_add_adapter(adapter);
 	if (rc) {
diff --git a/drivers/i2c/busses/scx200_acb.c b/drivers/i2c/busses/scx200_acb.c
index 0a7e410b6195..e0923bee8d1f 100644
--- a/drivers/i2c/busses/scx200_acb.c
+++ b/drivers/i2c/busses/scx200_acb.c
@@ -42,7 +42,7 @@ MODULE_LICENSE("GPL");
 
 #define MAX_DEVICES 4
 static int base[MAX_DEVICES] = { 0x820, 0x840 };
-module_param_array(base, int, NULL, 0);
+module_param_hw_array(base, int, ioport, NULL, 0);
 MODULE_PARM_DESC(base, "Base addresses for the ACCESS.bus controllers");
 
 #define POLL_TIMEOUT	(HZ/5)
diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
index 26f7237558ba..9669ca4937b8 100644
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -395,18 +395,20 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc,
 	if (force_nr) {
 		priv->adap.nr = force_nr;
 		ret = i2c_add_numbered_adapter(&priv->adap);
-		dev_err(&parent->dev,
-			"failed to add mux-adapter %u as bus %u (error=%d)\n",
-			chan_id, force_nr, ret);
+		if (ret < 0) {
+			dev_err(&parent->dev,
+				"failed to add mux-adapter %u as bus %u (error=%d)\n",
+				chan_id, force_nr, ret);
+			goto err_free_priv;
+		}
 	} else {
 		ret = i2c_add_adapter(&priv->adap);
-		dev_err(&parent->dev,
-			"failed to add mux-adapter %u (error=%d)\n",
-			chan_id, ret);
-	}
-	if (ret < 0) {
-		kfree(priv);
-		return ret;
+		if (ret < 0) {
+			dev_err(&parent->dev,
+				"failed to add mux-adapter %u (error=%d)\n",
+				chan_id, ret);
+			goto err_free_priv;
+		}
 	}
 
 	WARN(sysfs_create_link(&priv->adap.dev.kobj, &muxc->dev->kobj,
@@ -422,6 +424,10 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc,
 
 	muxc->adapter[muxc->num_adapters++] = &priv->adap;
 	return 0;
+
+err_free_priv:
+	kfree(priv);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(i2c_mux_add_adapter);
 
diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
index 406d5059072c..d97031804de8 100644
--- a/drivers/i2c/muxes/i2c-mux-reg.c
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -196,20 +196,25 @@ static int i2c_mux_reg_probe(struct platform_device *pdev)
 		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 		mux->data.reg_size = resource_size(res);
 		mux->data.reg = devm_ioremap_resource(&pdev->dev, res);
-		if (IS_ERR(mux->data.reg))
-			return PTR_ERR(mux->data.reg);
+		if (IS_ERR(mux->data.reg)) {
+			ret = PTR_ERR(mux->data.reg);
+			goto err_put_parent;
+		}
 	}
 
 	if (mux->data.reg_size != 4 && mux->data.reg_size != 2 &&
 	    mux->data.reg_size != 1) {
 		dev_err(&pdev->dev, "Invalid register size\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_put_parent;
 	}
 
 	muxc = i2c_mux_alloc(parent, &pdev->dev, mux->data.n_values, 0, 0,
 			     i2c_mux_reg_select, NULL);
-	if (!muxc)
-		return -ENOMEM;
+	if (!muxc) {
+		ret = -ENOMEM;
+		goto err_put_parent;
+	}
 	muxc->priv = mux;
 
 	platform_set_drvdata(pdev, muxc);
@@ -223,7 +228,7 @@ static int i2c_mux_reg_probe(struct platform_device *pdev)
 
 		ret = i2c_mux_add_adapter(muxc, nr, mux->data.values[i], class);
 		if (ret)
-			goto add_adapter_failed;
+			goto err_del_mux_adapters;
 	}
 
 	dev_dbg(&pdev->dev, "%d port mux on %s adapter\n",
@@ -231,8 +236,10 @@ static int i2c_mux_reg_probe(struct platform_device *pdev)
 
 	return 0;
 
-add_adapter_failed:
+err_del_mux_adapters:
 	i2c_mux_del_adapters(muxc);
+err_put_parent:
+	i2c_put_adapter(parent);
 
 	return ret;
 }
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 5901937284e7..14d1e7d9a1d6 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -93,7 +93,6 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 	int error;
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	rq->special = (char *)pc;
 
@@ -200,7 +199,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	memset(sense, 0, sizeof(*sense));
 
 	blk_rq_init(rq->q, sense_rq);
-	scsi_req_init(sense_rq);
+	scsi_req_init(req);
 
 	err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
 			      GFP_NOIO);
@@ -273,7 +272,7 @@ void ide_retry_pc(ide_drive_t *drive)
 	ide_requeue_and_plug(drive, failed_rq);
 	if (ide_queue_sense_rq(drive, pc)) {
 		blk_start_request(failed_rq);
-		ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq));
+		ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq));
 	}
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
@@ -437,7 +436,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 	/* No more interrupts */
 	if ((stat & ATA_DRQ) == 0) {
-		int uptodate, error;
+		int uptodate;
+		blk_status_t error;
 
 		debug_log("Packet command completed, %d bytes transferred\n",
 			  blk_rq_bytes(rq));
@@ -490,7 +490,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 		if (ata_misc_request(rq)) {
 			scsi_req(rq)->result = 0;
-			error = 0;
+			error = BLK_STS_OK;
 		} else {
 
 			if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
@@ -498,7 +498,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 					scsi_req(rq)->result = -EIO;
 			}
 
-			error = uptodate ? 0 : -EIO;
+			error = uptodate ? BLK_STS_OK : BLK_STS_IOERR;
 		}
 
 		ide_complete_rq(drive, error, blk_rq_bytes(rq));
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 07e5ff3a64c3..81e18f9628d0 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -228,7 +228,7 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 		scsi_req(failed)->sense_len = scsi_req(rq)->sense_len;
 		cdrom_analyze_sense_data(drive, failed);
 
-		if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed)))
+		if (ide_end_rq(drive, failed, BLK_STS_IOERR, blk_rq_bytes(failed)))
 			BUG();
 	} else
 		cdrom_analyze_sense_data(drive, NULL);
@@ -438,7 +438,6 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 
 		rq = blk_get_request(drive->queue,
 			write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN,  __GFP_RECLAIM);
-		scsi_req_init(rq);
 		memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
 		ide_req(rq)->type = ATA_PRIV_PC;
 		rq->rq_flags |= rq_flags;
@@ -508,7 +507,7 @@ static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
 		nr_bytes -= cmd->last_xfer_len;
 
 	if (nr_bytes > 0) {
-		ide_complete_rq(drive, 0, nr_bytes);
+		ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
 		return true;
 	}
 
@@ -674,7 +673,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 out_end:
 	if (blk_rq_is_scsi(rq) && rc == 0) {
 		scsi_req(rq)->resid_len = 0;
-		blk_end_request_all(rq, 0);
+		blk_end_request_all(rq, BLK_STS_OK);
 		hwif->rq = NULL;
 	} else {
 		if (sense && uptodate)
@@ -699,7 +698,7 @@ out_end:
 				scsi_req(rq)->resid_len += cmd->last_xfer_len;
 		}
 
-		ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq));
+		ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, blk_rq_bytes(rq));
 
 		if (sense && rc == 2)
 			ide_error(drive, "request sense failure", stat);
@@ -844,7 +843,7 @@ out_end:
 	if (nsectors == 0)
 		nsectors = 1;
 
-	ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9);
+	ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, nsectors << 9);
 
 	return ide_stopped;
 }
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 55cd736c39c6..9d26c9737e21 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -304,7 +304,6 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
 	int ret;
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	rq->rq_flags = RQF_QUIET;
 	blk_execute_rq(drive->queue, cd->disk, rq, 0);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 9b69c32ee560..ef7c8c43a380 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -166,7 +166,6 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
 		return setting->set(drive, arg);
 
 	rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	scsi_req(rq)->cmd_len = 5;
 	scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 7c06237f3479..241983da5fc4 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -478,7 +478,6 @@ static int set_multcount(ide_drive_t *drive, int arg)
 		return -EBUSY;
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_TASKFILE;
 
 	drive->mult_req = arg;
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 51c81223e56d..54d4d78ca46a 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -104,7 +104,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *drive)
 			if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
 				ide_finish_cmd(drive, cmd, stat);
 			else
-				ide_complete_rq(drive, 0,
+				ide_complete_rq(drive, BLK_STS_OK,
 						blk_rq_sectors(cmd->rq) << 9);
 			return ide_stopped;
 		}
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 4b7ffd7d158d..47d5f3379748 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -135,7 +135,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
 			return ide_stopped;
 		}
 		scsi_req(rq)->result = err;
-		ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+		ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
 		return ide_stopped;
 	}
 
@@ -143,7 +143,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
 }
 EXPORT_SYMBOL_GPL(ide_error);
 
-static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
+static inline void ide_complete_drive_reset(ide_drive_t *drive, blk_status_t err)
 {
 	struct request *rq = drive->hwif->rq;
 
@@ -151,7 +151,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
 	    scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
 		if (err <= 0 && scsi_req(rq)->result == 0)
 			scsi_req(rq)->result = -EIO;
-		ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
+		ide_complete_rq(drive, err, blk_rq_bytes(rq));
 	}
 }
 
@@ -191,7 +191,7 @@ static ide_startstop_t atapi_reset_pollfunc(ide_drive_t *drive)
 	}
 	/* done polling */
 	hwif->polling = 0;
-	ide_complete_drive_reset(drive, 0);
+	ide_complete_drive_reset(drive, BLK_STS_OK);
 	return ide_stopped;
 }
 
@@ -225,7 +225,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
 	ide_hwif_t *hwif = drive->hwif;
 	const struct ide_port_ops *port_ops = hwif->port_ops;
 	u8 tmp;
-	int err = 0;
+	blk_status_t err = BLK_STS_OK;
 
 	if (port_ops && port_ops->reset_poll) {
 		err = port_ops->reset_poll(drive);
@@ -247,7 +247,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
 		printk(KERN_ERR "%s: reset timed-out, status=0x%02x\n",
 			hwif->name, tmp);
 		drive->failures++;
-		err = -EIO;
+		err = BLK_STS_IOERR;
 	} else  {
 		tmp = ide_read_error(drive);
 
@@ -257,7 +257,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
 		} else {
 			ide_reset_report_error(hwif, tmp);
 			drive->failures++;
-			err = -EIO;
+			err = BLK_STS_IOERR;
 		}
 	}
 out:
@@ -392,7 +392,7 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi)
 
 	if (io_ports->ctl_addr == 0) {
 		spin_unlock_irqrestore(&hwif->lock, flags);
-		ide_complete_drive_reset(drive, -ENXIO);
+		ide_complete_drive_reset(drive, BLK_STS_IOERR);
 		return ide_stopped;
 	}
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 8ac6048cd2df..627b1f62a749 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -143,7 +143,7 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
 
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
-		ide_complete_rq(drive, -EIO, done);
+		ide_complete_rq(drive, BLK_STS_IOERR, done);
 		return ide_stopped;
 	}
 
@@ -248,7 +248,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 
 		if (ata_misc_request(rq)) {
 			scsi_req(rq)->result = 0;
-			ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+			ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
 			return ide_stopped;
 		} else
 			goto out_end;
@@ -303,7 +303,7 @@ out_end:
 	drive->failed_pc = NULL;
 	if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
 		scsi_req(rq)->result = -EIO;
-	ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+	ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
 	return ide_stopped;
 }
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 323af721f8cb..3a234701d92c 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -54,7 +54,7 @@
 #include <linux/uaccess.h>
 #include <asm/io.h>
 
-int ide_end_rq(ide_drive_t *drive, struct request *rq, int error,
+int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
 	       unsigned int nr_bytes)
 {
 	/*
@@ -112,7 +112,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
 	}
 }
 
-int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
+int ide_complete_rq(ide_drive_t *drive, blk_status_t error, unsigned int nr_bytes)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq = hwif->rq;
@@ -122,7 +122,7 @@ int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
 	 * if failfast is set on a request, override number of sectors
 	 * and complete the whole request right now
 	 */
-	if (blk_noretry_request(rq) && error <= 0)
+	if (blk_noretry_request(rq) && error)
 		nr_bytes = blk_rq_sectors(rq) << 9;
 
 	rc = ide_end_rq(drive, rq, error, nr_bytes);
@@ -149,7 +149,7 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
 			scsi_req(rq)->result = -EIO;
 	}
 
-	ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+	ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
 }
 
 static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
@@ -272,7 +272,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
  	printk("%s: DRIVE_CMD (null)\n", drive->name);
 #endif
 	scsi_req(rq)->result = 0;
-	ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+	ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
 
  	return ide_stopped;
 }
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index 8c0d17297a7a..3661abb16a5f 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -126,7 +126,6 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
 		struct request *rq;
 
 		rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-		scsi_req_init(rq);
 		ide_req(rq)->type = ATA_PRIV_TASKFILE;
 		blk_execute_rq(drive->queue, NULL, rq, 0);
 		err = scsi_req(rq)->result ? -EIO : 0;
@@ -224,7 +223,6 @@ static int generic_drive_reset(ide_drive_t *drive)
 	int ret = 0;
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	scsi_req(rq)->cmd_len = 1;
 	scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 94e3107f59b9..1f264d5d3f3f 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -32,7 +32,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	spin_unlock_irq(&hwif->lock);
 
 	rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
 	scsi_req(rq)->cmd_len = 1;
 	ide_req(rq)->type = ATA_PRIV_MISC;
@@ -48,7 +47,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	 * timeout has expired, so power management will be reenabled.
 	 */
 	rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT);
-	scsi_req_init(rq);
 	if (IS_ERR(rq))
 		goto out;
 
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 0977fc1f40ce..544f02d673ca 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -19,7 +19,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 
 	memset(&rqpm, 0, sizeof(rqpm));
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
 	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -40,7 +39,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	return ret;
 }
 
-static void ide_end_sync_rq(struct request *rq, int error)
+static void ide_end_sync_rq(struct request *rq, blk_status_t error)
 {
 	complete(rq->end_io_data);
 }
@@ -57,7 +56,7 @@ static int ide_pm_execute_rq(struct request *rq)
 	if (unlikely(blk_queue_dying(q))) {
 		rq->rq_flags |= RQF_QUIET;
 		scsi_req(rq)->result = -ENXIO;
-		__blk_end_request_all(rq, 0);
+		__blk_end_request_all(rq, BLK_STS_OK);
 		spin_unlock_irq(q->queue_lock);
 		return -ENXIO;
 	}
@@ -91,7 +90,6 @@ int generic_ide_resume(struct device *dev)
 
 	memset(&rqpm, 0, sizeof(rqpm));
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_PM_RESUME;
 	rq->rq_flags |= RQF_PREEMPT;
 	rq->special = &rqpm;
@@ -235,7 +233,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
 
 	drive->hwif->rq = NULL;
 
-	if (blk_end_request(rq, 0, 0))
+	if (blk_end_request(rq, BLK_STS_OK, 0))
 		BUG();
 }
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 023562565d11..01b2adfd8226 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -741,12 +741,12 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
 	}
 }
 
-static int ide_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
+static void ide_initialize_rq(struct request *rq)
 {
 	struct ide_request *req = blk_mq_rq_to_pdu(rq);
 
+	scsi_req_init(&req->sreq);
 	req->sreq.sense = req->sense;
-	return 0;
 }
 
 /*
@@ -771,8 +771,9 @@ static int ide_init_queue(ide_drive_t *drive)
 		return 1;
 
 	q->request_fn = do_ide_request;
-	q->init_rq_fn = ide_init_rq;
+	q->initialize_rq_fn = ide_initialize_rq;
 	q->cmd_size = sizeof(struct ide_request);
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	if (blk_init_allocated_queue(q) < 0) {
 		blk_cleanup_queue(q);
 		return 1;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index a0651f948b76..fd57e8ccc47a 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -474,7 +474,7 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
 
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
-		ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+		ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
 		return ide_stopped;
 	}
 	ide_debug_log(IDE_DBG_SENSE, "retry #%d, cmd: 0x%02x", pc->retries,
@@ -855,7 +855,6 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 	BUG_ON(size < 0 || size % tape->blk_size);
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	scsi_req(rq)->cmd[13] = cmd;
 	rq->rq_disk = tape->disk;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index d71199d23c9e..4efe4c6e956c 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -318,7 +318,7 @@ static void ide_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
 		}
 
 		if (nr_bytes > 0)
-			ide_complete_rq(drive, 0, nr_bytes);
+			ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
 	}
 }
 
@@ -336,7 +336,7 @@ void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat)
 		ide_driveid_update(drive);
 	}
 
-	ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+	ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
 }
 
 /*
@@ -394,7 +394,7 @@ out_end:
 	if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
 		ide_finish_cmd(drive, cmd, stat);
 	else
-		ide_complete_rq(drive, 0, blk_rq_sectors(cmd->rq) << 9);
+		ide_complete_rq(drive, BLK_STS_OK, blk_rq_sectors(cmd->rq) << 9);
 	return ide_stopped;
 out_err:
 	ide_error_cmd(drive, cmd);
@@ -433,7 +433,6 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 	rq = blk_get_request(drive->queue,
 		(cmd->tf_flags & IDE_TFLAG_WRITE) ?
 			REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_TASKFILE;
 
 	/*
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index 6a1849bb476c..57eea5a9047f 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -406,7 +406,7 @@ static int siimage_dma_test_irq(ide_drive_t *drive)
  *	yet.
  */
 
-static int sil_sata_reset_poll(ide_drive_t *drive)
+static blk_status_t sil_sata_reset_poll(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	void __iomem *sata_status_addr
@@ -419,11 +419,11 @@ static int sil_sata_reset_poll(ide_drive_t *drive)
 		if ((sata_stat & 0x03) != 0x03) {
 			printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n",
 					    hwif->name, sata_stat);
-			return -ENXIO;
+			return BLK_STS_IOERR;
 		}
 	}
 
-	return 0;
+	return BLK_STS_OK;
 }
 
 /**
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 5805b041dd0f..216d7ec88c0c 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1097,6 +1097,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
 	ICPU(INTEL_FAM6_XEON_PHI_KNL,		idle_cpu_knl),
 	ICPU(INTEL_FAM6_XEON_PHI_KNM,		idle_cpu_knl),
 	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		idle_cpu_bxt),
+	ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE,	idle_cpu_bxt),
 	ICPU(INTEL_FAM6_ATOM_DENVERTON,		idle_cpu_dnv),
 	{}
 };
@@ -1309,6 +1310,7 @@ static void intel_idle_state_table_update(void)
 		ivt_idle_state_table_update();
 		break;
 	case INTEL_FAM6_ATOM_GOLDMONT:
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
 		bxt_idle_state_table_update();
 		break;
 	case INTEL_FAM6_SKYLAKE_DESKTOP:
diff --git a/drivers/iio/adc/bcm_iproc_adc.c b/drivers/iio/adc/bcm_iproc_adc.c
index 21d38c8af21e..7f4f9c4150e3 100644
--- a/drivers/iio/adc/bcm_iproc_adc.c
+++ b/drivers/iio/adc/bcm_iproc_adc.c
@@ -143,7 +143,7 @@ static void iproc_adc_reg_dump(struct iio_dev *indio_dev)
 	iproc_adc_dbg_reg(dev, adc_priv, IPROC_SOFT_BYPASS_DATA);
 }
 
-static irqreturn_t iproc_adc_interrupt_handler(int irq, void *data)
+static irqreturn_t iproc_adc_interrupt_thread(int irq, void *data)
 {
 	u32 channel_intr_status;
 	u32 intr_status;
@@ -167,7 +167,7 @@ static irqreturn_t iproc_adc_interrupt_handler(int irq, void *data)
 	return IRQ_NONE;
 }
 
-static irqreturn_t iproc_adc_interrupt_thread(int irq, void *data)
+static irqreturn_t iproc_adc_interrupt_handler(int irq, void *data)
 {
 	irqreturn_t retval = IRQ_NONE;
 	struct iproc_adc_priv *adc_priv;
@@ -181,7 +181,7 @@ static irqreturn_t iproc_adc_interrupt_thread(int irq, void *data)
 	adc_priv = iio_priv(indio_dev);
 
 	regmap_read(adc_priv->regmap, IPROC_INTERRUPT_STATUS, &intr_status);
-	dev_dbg(&indio_dev->dev, "iproc_adc_interrupt_thread(),INTRPT_STS:%x\n",
+	dev_dbg(&indio_dev->dev, "iproc_adc_interrupt_handler(),INTRPT_STS:%x\n",
 			intr_status);
 
 	intr_channels = (intr_status & IPROC_ADC_INTR_MASK) >> IPROC_ADC_INTR;
@@ -566,8 +566,8 @@ static int iproc_adc_probe(struct platform_device *pdev)
 	}
 
 	ret = devm_request_threaded_irq(&pdev->dev, adc_priv->irqno,
-				iproc_adc_interrupt_thread,
 				iproc_adc_interrupt_handler,
+				iproc_adc_interrupt_thread,
 				IRQF_SHARED, "iproc-adc", indio_dev);
 	if (ret) {
 		dev_err(&pdev->dev, "request_irq error %d\n", ret);
diff --git a/drivers/iio/adc/max9611.c b/drivers/iio/adc/max9611.c
index ec82106480e1..b0526e4b9530 100644
--- a/drivers/iio/adc/max9611.c
+++ b/drivers/iio/adc/max9611.c
@@ -438,10 +438,10 @@ static ssize_t max9611_shunt_resistor_show(struct device *dev,
 	struct max9611_dev *max9611 = iio_priv(dev_to_iio_dev(dev));
 	unsigned int i, r;
 
-	i = max9611->shunt_resistor_uohm / 1000;
-	r = max9611->shunt_resistor_uohm % 1000;
+	i = max9611->shunt_resistor_uohm / 1000000;
+	r = max9611->shunt_resistor_uohm % 1000000;
 
-	return sprintf(buf, "%u.%03u\n", i, r);
+	return sprintf(buf, "%u.%06u\n", i, r);
 }
 
 static IIO_DEVICE_ATTR(in_power_shunt_resistor, 0444,
@@ -536,8 +536,8 @@ static int max9611_probe(struct i2c_client *client,
 	int ret;
 
 	indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*max9611));
-	if (IS_ERR(indio_dev))
-		return PTR_ERR(indio_dev);
+	if (!indio_dev)
+		return -ENOMEM;
 
 	i2c_set_clientdata(client, indio_dev);
 
diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c
index dd4190b50df6..6066bbfc42fe 100644
--- a/drivers/iio/adc/meson_saradc.c
+++ b/drivers/iio/adc/meson_saradc.c
@@ -468,13 +468,13 @@ static void meson_sar_adc_unlock(struct iio_dev *indio_dev)
 static void meson_sar_adc_clear_fifo(struct iio_dev *indio_dev)
 {
 	struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
-	int count;
+	unsigned int count, tmp;
 
 	for (count = 0; count < MESON_SAR_ADC_MAX_FIFO_SIZE; count++) {
 		if (!meson_sar_adc_get_fifo_count(indio_dev))
 			break;
 
-		regmap_read(priv->regmap, MESON_SAR_ADC_FIFO_RD, 0);
+		regmap_read(priv->regmap, MESON_SAR_ADC_FIFO_RD, &tmp);
 	}
 }
 
diff --git a/drivers/iio/adc/mxs-lradc-adc.c b/drivers/iio/adc/mxs-lradc-adc.c
index b0c7d8ee5cb8..6888167ca1e6 100644
--- a/drivers/iio/adc/mxs-lradc-adc.c
+++ b/drivers/iio/adc/mxs-lradc-adc.c
@@ -718,9 +718,12 @@ static int mxs_lradc_adc_probe(struct platform_device *pdev)
 	adc->dev = dev;
 
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!iores)
+		return -EINVAL;
+
 	adc->base = devm_ioremap(dev, iores->start, resource_size(iores));
-	if (IS_ERR(adc->base))
-		return PTR_ERR(adc->base);
+	if (!adc->base)
+		return -ENOMEM;
 
 	init_completion(&adc->completion);
 	spin_lock_init(&adc->lock);
diff --git a/drivers/iio/adc/stx104.c b/drivers/iio/adc/stx104.c
index 2df84fa5e3fc..2da741d27540 100644
--- a/drivers/iio/adc/stx104.c
+++ b/drivers/iio/adc/stx104.c
@@ -49,7 +49,7 @@
 
 static unsigned int base[max_num_isa_dev(STX104_EXTENT)];
 static unsigned int num_stx104;
-module_param_array(base, uint, &num_stx104, 0);
+module_param_hw_array(base, uint, ioport, &num_stx104, 0);
 MODULE_PARM_DESC(base, "Apex Embedded Systems STX104 base addresses");
 
 /**
diff --git a/drivers/iio/adc/sun4i-gpadc-iio.c b/drivers/iio/adc/sun4i-gpadc-iio.c
index b23527309088..81d4c39e414a 100644
--- a/drivers/iio/adc/sun4i-gpadc-iio.c
+++ b/drivers/iio/adc/sun4i-gpadc-iio.c
@@ -105,6 +105,8 @@ struct sun4i_gpadc_iio {
 	bool				no_irq;
 	/* prevents concurrent reads of temperature and ADC */
 	struct mutex			mutex;
+	struct thermal_zone_device	*tzd;
+	struct device			*sensor_device;
 };
 
 #define SUN4I_GPADC_ADC_CHANNEL(_channel, _name) {		\
@@ -502,7 +504,6 @@ static int sun4i_gpadc_probe_dt(struct platform_device *pdev,
 {
 	struct sun4i_gpadc_iio *info = iio_priv(indio_dev);
 	const struct of_device_id *of_dev;
-	struct thermal_zone_device *tzd;
 	struct resource *mem;
 	void __iomem *base;
 	int ret;
@@ -532,13 +533,14 @@ static int sun4i_gpadc_probe_dt(struct platform_device *pdev,
 	if (!IS_ENABLED(CONFIG_THERMAL_OF))
 		return 0;
 
-	tzd = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, info,
-						   &sun4i_ts_tz_ops);
-	if (IS_ERR(tzd))
+	info->sensor_device = &pdev->dev;
+	info->tzd = thermal_zone_of_sensor_register(info->sensor_device, 0,
+						    info, &sun4i_ts_tz_ops);
+	if (IS_ERR(info->tzd))
 		dev_err(&pdev->dev, "could not register thermal sensor: %ld\n",
-			PTR_ERR(tzd));
+			PTR_ERR(info->tzd));
 
-	return PTR_ERR_OR_ZERO(tzd);
+	return PTR_ERR_OR_ZERO(info->tzd);
 }
 
 static int sun4i_gpadc_probe_mfd(struct platform_device *pdev,
@@ -584,15 +586,15 @@ static int sun4i_gpadc_probe_mfd(struct platform_device *pdev,
 		 * of_node, and the device from this driver as third argument to
 		 * return the temperature.
 		 */
-		struct thermal_zone_device *tzd;
-		tzd = devm_thermal_zone_of_sensor_register(pdev->dev.parent, 0,
-							   info,
-							   &sun4i_ts_tz_ops);
-		if (IS_ERR(tzd)) {
+		info->sensor_device = pdev->dev.parent;
+		info->tzd = thermal_zone_of_sensor_register(info->sensor_device,
+							    0, info,
+							    &sun4i_ts_tz_ops);
+		if (IS_ERR(info->tzd)) {
 			dev_err(&pdev->dev,
 				"could not register thermal sensor: %ld\n",
-				PTR_ERR(tzd));
-			return PTR_ERR(tzd);
+				PTR_ERR(info->tzd));
+			return PTR_ERR(info->tzd);
 		}
 	} else {
 		indio_dev->num_channels =
@@ -688,7 +690,13 @@ static int sun4i_gpadc_remove(struct platform_device *pdev)
 
 	pm_runtime_put(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
-	if (!info->no_irq && IS_ENABLED(CONFIG_THERMAL_OF))
+
+	if (!IS_ENABLED(CONFIG_THERMAL_OF))
+		return 0;
+
+	thermal_zone_of_sensor_unregister(info->sensor_device, info->tzd);
+
+	if (!info->no_irq)
 		iio_map_array_unregister(indio_dev);
 
 	return 0;
@@ -700,6 +708,7 @@ static const struct platform_device_id sun4i_gpadc_id[] = {
 	{ "sun6i-a31-gpadc-iio", (kernel_ulong_t)&sun6i_gpadc_data },
 	{ /* sentinel */ },
 };
+MODULE_DEVICE_TABLE(platform, sun4i_gpadc_id);
 
 static struct platform_driver sun4i_gpadc_driver = {
 	.driver = {
@@ -711,6 +720,7 @@ static struct platform_driver sun4i_gpadc_driver = {
 	.probe = sun4i_gpadc_probe,
 	.remove = sun4i_gpadc_remove,
 };
+MODULE_DEVICE_TABLE(of, sun4i_gpadc_of_id);
 
 module_platform_driver(sun4i_gpadc_driver);
 
diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index 4282ceca3d8f..6cbed7eb118a 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -614,7 +614,7 @@ static int tiadc_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*indio_dev));
+	indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*adc_dev));
 	if (indio_dev == NULL) {
 		dev_err(&pdev->dev, "failed to allocate iio device\n");
 		return -ENOMEM;
diff --git a/drivers/iio/buffer/industrialio-buffer-dma.c b/drivers/iio/buffer/industrialio-buffer-dma.c
index dd99d273bae9..ff03324dee13 100644
--- a/drivers/iio/buffer/industrialio-buffer-dma.c
+++ b/drivers/iio/buffer/industrialio-buffer-dma.c
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/poll.h>
 #include <linux/iio/buffer.h>
+#include <linux/iio/buffer_impl.h>
 #include <linux/iio/buffer-dma.h>
 #include <linux/dma-mapping.h>
 #include <linux/sizes.h>
diff --git a/drivers/iio/buffer/industrialio-buffer-dmaengine.c b/drivers/iio/buffer/industrialio-buffer-dmaengine.c
index 9fabed47053d..2b5a320f42c5 100644
--- a/drivers/iio/buffer/industrialio-buffer-dmaengine.c
+++ b/drivers/iio/buffer/industrialio-buffer-dmaengine.c
@@ -14,6 +14,7 @@
 
 #include <linux/iio/iio.h>
 #include <linux/iio/buffer.h>
+#include <linux/iio/buffer_impl.h>
 #include <linux/iio/buffer-dma.h>
 #include <linux/iio/buffer-dmaengine.h>
 
diff --git a/drivers/iio/dac/cio-dac.c b/drivers/iio/dac/cio-dac.c
index a0464227a3a0..a8dffd938615 100644
--- a/drivers/iio/dac/cio-dac.c
+++ b/drivers/iio/dac/cio-dac.c
@@ -39,7 +39,7 @@
 
 static unsigned int base[max_num_isa_dev(CIO_DAC_EXTENT)];
 static unsigned int num_cio_dac;
-module_param_array(base, uint, &num_cio_dac, 0);
+module_param_hw_array(base, uint, ioport, &num_cio_dac, 0);
 MODULE_PARM_DESC(base, "Measurement Computing CIO-DAC base addresses");
 
 /**
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
index 96dabbd2f004..88a7c5d4e4d2 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
@@ -41,6 +41,7 @@ static const int accel_scale[] = {598, 1196, 2392, 4785};
 static const struct inv_mpu6050_reg_map reg_set_6500 = {
 	.sample_rate_div	= INV_MPU6050_REG_SAMPLE_RATE_DIV,
 	.lpf                    = INV_MPU6050_REG_CONFIG,
+	.accel_lpf              = INV_MPU6500_REG_ACCEL_CONFIG_2,
 	.user_ctrl              = INV_MPU6050_REG_USER_CTRL,
 	.fifo_en                = INV_MPU6050_REG_FIFO_EN,
 	.gyro_config            = INV_MPU6050_REG_GYRO_CONFIG,
@@ -211,6 +212,37 @@ int inv_mpu6050_set_power_itg(struct inv_mpu6050_state *st, bool power_on)
 EXPORT_SYMBOL_GPL(inv_mpu6050_set_power_itg);
 
 /**
+ *  inv_mpu6050_set_lpf_regs() - set low pass filter registers, chip dependent
+ *
+ *  MPU60xx/MPU9150 use only 1 register for accelerometer + gyroscope
+ *  MPU6500 and above have a dedicated register for accelerometer
+ */
+static int inv_mpu6050_set_lpf_regs(struct inv_mpu6050_state *st,
+				    enum inv_mpu6050_filter_e val)
+{
+	int result;
+
+	result = regmap_write(st->map, st->reg->lpf, val);
+	if (result)
+		return result;
+
+	switch (st->chip_type) {
+	case INV_MPU6050:
+	case INV_MPU6000:
+	case INV_MPU9150:
+		/* old chips, nothing to do */
+		result = 0;
+		break;
+	default:
+		/* set accel lpf */
+		result = regmap_write(st->map, st->reg->accel_lpf, val);
+		break;
+	}
+
+	return result;
+}
+
+/**
  *  inv_mpu6050_init_config() - Initialize hardware, disable FIFO.
  *
  *  Initial configuration:
@@ -233,8 +265,7 @@ static int inv_mpu6050_init_config(struct iio_dev *indio_dev)
 	if (result)
 		return result;
 
-	d = INV_MPU6050_FILTER_20HZ;
-	result = regmap_write(st->map, st->reg->lpf, d);
+	result = inv_mpu6050_set_lpf_regs(st, INV_MPU6050_FILTER_20HZ);
 	if (result)
 		return result;
 
@@ -537,6 +568,8 @@ error_write_raw:
  *                  would be alising. This function basically search for the
  *                  correct low pass parameters based on the fifo rate, e.g,
  *                  sampling frequency.
+ *
+ *  lpf is set automatically when setting sampling rate to avoid any aliases.
  */
 static int inv_mpu6050_set_lpf(struct inv_mpu6050_state *st, int rate)
 {
@@ -552,7 +585,7 @@ static int inv_mpu6050_set_lpf(struct inv_mpu6050_state *st, int rate)
 	while ((h < hz[i]) && (i < ARRAY_SIZE(d) - 1))
 		i++;
 	data = d[i];
-	result = regmap_write(st->map, st->reg->lpf, data);
+	result = inv_mpu6050_set_lpf_regs(st, data);
 	if (result)
 		return result;
 	st->chip_config.lpf = data;
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
index ef13de7a2c20..953a0c09d568 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
@@ -28,6 +28,7 @@
  *  struct inv_mpu6050_reg_map - Notable registers.
  *  @sample_rate_div:	Divider applied to gyro output rate.
  *  @lpf:		Configures internal low pass filter.
+ *  @accel_lpf:		Configures accelerometer low pass filter.
  *  @user_ctrl:		Enables/resets the FIFO.
  *  @fifo_en:		Determines which data will appear in FIFO.
  *  @gyro_config:	gyro config register.
@@ -47,6 +48,7 @@
 struct inv_mpu6050_reg_map {
 	u8 sample_rate_div;
 	u8 lpf;
+	u8 accel_lpf;
 	u8 user_ctrl;
 	u8 fifo_en;
 	u8 gyro_config;
@@ -188,6 +190,7 @@ struct inv_mpu6050_state {
 #define INV_MPU6050_FIFO_THRESHOLD           500
 
 /* mpu6500 registers */
+#define INV_MPU6500_REG_ACCEL_CONFIG_2      0x1D
 #define INV_MPU6500_REG_ACCEL_OFFSET        0x77
 
 /* delay time in milliseconds */
diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index 978e1592c2a3..4061fed93f1f 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -451,7 +451,8 @@ static ssize_t iio_trigger_write_current(struct device *dev,
 	return len;
 
 out_trigger_put:
-	iio_trigger_put(trig);
+	if (trig)
+		iio_trigger_put(trig);
 	return ret;
 }
 
diff --git a/drivers/iio/light/ltr501.c b/drivers/iio/light/ltr501.c
index b30e0c1c6cc4..67838edd8b37 100644
--- a/drivers/iio/light/ltr501.c
+++ b/drivers/iio/light/ltr501.c
@@ -74,9 +74,9 @@ static const int int_time_mapping[] = {100000, 50000, 200000, 400000};
 static const struct reg_field reg_field_it =
 				REG_FIELD(LTR501_ALS_MEAS_RATE, 3, 4);
 static const struct reg_field reg_field_als_intr =
-				REG_FIELD(LTR501_INTR, 0, 0);
-static const struct reg_field reg_field_ps_intr =
 				REG_FIELD(LTR501_INTR, 1, 1);
+static const struct reg_field reg_field_ps_intr =
+				REG_FIELD(LTR501_INTR, 0, 0);
 static const struct reg_field reg_field_als_rate =
 				REG_FIELD(LTR501_ALS_MEAS_RATE, 0, 2);
 static const struct reg_field reg_field_ps_rate =
diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c
index ddf9bee89f77..aa4df0dcc8c9 100644
--- a/drivers/iio/proximity/as3935.c
+++ b/drivers/iio/proximity/as3935.c
@@ -40,9 +40,9 @@
 #define AS3935_AFE_PWR_BIT	BIT(0)
 
 #define AS3935_INT		0x03
-#define AS3935_INT_MASK		0x07
+#define AS3935_INT_MASK		0x0f
 #define AS3935_EVENT_INT	BIT(3)
-#define AS3935_NOISE_INT	BIT(1)
+#define AS3935_NOISE_INT	BIT(0)
 
 #define AS3935_DATA		0x07
 #define AS3935_DATA_MASK	0x3F
@@ -215,7 +215,7 @@ static irqreturn_t as3935_trigger_handler(int irq, void *private)
 
 	st->buffer[0] = val & AS3935_DATA_MASK;
 	iio_push_to_buffers_with_timestamp(indio_dev, &st->buffer,
-					   pf->timestamp);
+					   iio_get_time_ns(indio_dev));
 err_read:
 	iio_trigger_notify_done(indio_dev->trig);
 
@@ -244,7 +244,7 @@ static void as3935_event_work(struct work_struct *work)
 
 	switch (val) {
 	case AS3935_EVENT_INT:
-		iio_trigger_poll(st->trig);
+		iio_trigger_poll_chained(st->trig);
 		break;
 	case AS3935_NOISE_INT:
 		dev_warn(&st->spi->dev, "noise level is too high\n");
@@ -269,8 +269,6 @@ static irqreturn_t as3935_interrupt_handler(int irq, void *private)
 
 static void calibrate_as3935(struct as3935_state *st)
 {
-	mutex_lock(&st->lock);
-
 	/* mask disturber interrupt bit */
 	as3935_write(st, AS3935_INT, BIT(5));
 
@@ -280,8 +278,6 @@ static void calibrate_as3935(struct as3935_state *st)
 
 	mdelay(2);
 	as3935_write(st, AS3935_TUNE_CAP, (st->tune_cap / TUNE_CAP_DIV));
-
-	mutex_unlock(&st->lock);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -318,6 +314,8 @@ static int as3935_resume(struct device *dev)
 	val &= ~AS3935_AFE_PWR_BIT;
 	ret = as3935_write(st, AS3935_AFE_GAIN, val);
 
+	calibrate_as3935(st);
+
 err_resume:
 	mutex_unlock(&st->lock);
 
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 02971e239a18..ece6926fa2e6 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -449,12 +449,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 		return ret;
 
 	rt = (struct rt6_info *)dst;
-	if (ipv6_addr_any(&fl6.saddr)) {
-		ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
-					 &fl6.daddr, 0, &fl6.saddr);
-		if (ret)
-			goto put;
-
+	if (ipv6_addr_any(&src_in->sin6_addr)) {
 		src_in->sin6_family = AF_INET6;
 		src_in->sin6_addr = fl6.saddr;
 	}
@@ -471,9 +466,6 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 
 	*pdst = dst;
 	return 0;
-put:
-	dst_release(dst);
-	return ret;
 }
 #else
 static int addr6_resolve(struct sockaddr_in6 *src_in,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 1844770f3ae8..2b4d613a3474 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1429,7 +1429,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
 	primary_path->packet_life_time =
 		cm_req_get_primary_local_ack_timeout(req_msg);
 	primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
-	sa_path_set_service_id(primary_path, req_msg->service_id);
+	primary_path->service_id = req_msg->service_id;
 
 	if (req_msg->alt_local_lid) {
 		alt_path->dgid = req_msg->alt_local_gid;
@@ -1452,7 +1452,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
 		alt_path->packet_life_time =
 			cm_req_get_alt_local_ack_timeout(req_msg);
 		alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
-		sa_path_set_service_id(alt_path, req_msg->service_id);
+		alt_path->service_id = req_msg->service_id;
 	}
 }
 
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 91b7a2fe5a55..31bb82d8ecd7 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1140,7 +1140,7 @@ static void cma_save_ib_info(struct sockaddr *src_addr,
 			ib->sib_pkey = path->pkey;
 			ib->sib_flowinfo = path->flow_label;
 			memcpy(&ib->sib_addr, &path->sgid, 16);
-			ib->sib_sid = sa_path_get_service_id(path);
+			ib->sib_sid = path->service_id;
 			ib->sib_scope_id = 0;
 		} else {
 			ib->sib_pkey = listen_ib->sib_pkey;
@@ -1274,8 +1274,7 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event,
 		memcpy(&req->local_gid, &req_param->primary_path->sgid,
 		       sizeof(req->local_gid));
 		req->has_gid	= true;
-		req->service_id	=
-			sa_path_get_service_id(req_param->primary_path);
+		req->service_id = req_param->primary_path->service_id;
 		req->pkey	= be16_to_cpu(req_param->primary_path->pkey);
 		if (req->pkey != req_param->bth_pkey)
 			pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
@@ -1827,7 +1826,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
 	struct rdma_route *rt;
 	const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
 	struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path;
-	const __be64 service_id = sa_path_get_service_id(path);
+	const __be64 service_id =
+		ib_event->param.req_rcvd.primary_path->service_id;
 	int ret;
 
 	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
@@ -2345,9 +2345,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
 	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
 	path_rec.numb_path = 1;
 	path_rec.reversible = 1;
-	sa_path_set_service_id(&path_rec,
-			       rdma_get_service_id(&id_priv->id,
-						   cma_dst_addr(id_priv)));
+	path_rec.service_id = rdma_get_service_id(&id_priv->id,
+						  cma_dst_addr(id_priv));
 
 	comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
 		    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index cb7d372e4bdf..d92ab4eaa8f3 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -169,6 +169,16 @@ void ib_mad_cleanup(void);
 int ib_sa_init(void);
 void ib_sa_cleanup(void);
 
+int ibnl_init(void);
+void ibnl_cleanup(void);
+
+/**
+ * Check if there are any listeners to the netlink group
+ * @group: the netlink group ID
+ * Returns 0 on success or a negative for no listeners.
+ */
+int ibnl_chk_listeners(unsigned int group);
+
 int ib_nl_handle_resolve_resp(struct sk_buff *skb,
 			      struct netlink_callback *cb);
 int ib_nl_handle_set_timeout(struct sk_buff *skb,
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index b784055423c8..94931c474d41 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -37,6 +37,7 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <rdma/rdma_netlink.h>
+#include "core_priv.h"
 
 struct ibnl_client {
 	struct list_head		list;
@@ -55,7 +56,6 @@ int ibnl_chk_listeners(unsigned int group)
 		return -1;
 	return 0;
 }
-EXPORT_SYMBOL(ibnl_chk_listeners);
 
 int ibnl_add_client(int index, int nops,
 		    const struct ibnl_client_cbs cb_table[])
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index e335b09c022e..fb7aec4047c8 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -194,7 +194,7 @@ static u32 tid;
 	.field_name          = "sa_path_rec:" #field
 
 static const struct ib_field path_rec_table[] = {
-	{ PATH_REC_FIELD(ib.service_id),
+	{ PATH_REC_FIELD(service_id),
 	  .offset_words = 0,
 	  .offset_bits  = 0,
 	  .size_bits    = 64 },
@@ -296,7 +296,7 @@ static const struct ib_field path_rec_table[] = {
 	.field_name          = "sa_path_rec:" #field
 
 static const struct ib_field opa_path_rec_table[] = {
-	{ OPA_PATH_REC_FIELD(opa.service_id),
+	{ OPA_PATH_REC_FIELD(service_id),
 	  .offset_words = 0,
 	  .offset_bits  = 0,
 	  .size_bits    = 64 },
@@ -774,7 +774,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
 
 	/* Now build the attributes */
 	if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) {
-		val64 = be64_to_cpu(sa_path_get_service_id(sa_rec));
+		val64 = be64_to_cpu(sa_rec->service_id);
 		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID,
 			sizeof(val64), &val64);
 	}
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 3dbf811d3c51..21e60b1e2ff4 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -58,7 +58,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 	for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
 
 		page = sg_page(sg);
-		if (umem->writable && dirty)
+		if (!PageDirty(page) && umem->writable && dirty)
 			set_page_dirty_lock(page);
 		put_page(page);
 	}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 0780b1afefa9..8c4ec564e495 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -321,11 +321,15 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
 		struct vm_area_struct *vma;
 		struct hstate *h;
 
+		down_read(&mm->mmap_sem);
 		vma = find_vma(mm, ib_umem_start(umem));
-		if (!vma || !is_vm_hugetlb_page(vma))
+		if (!vma || !is_vm_hugetlb_page(vma)) {
+			up_read(&mm->mmap_sem);
 			return -EINVAL;
+		}
 		h = hstate_vma(vma);
 		umem->page_shift = huge_page_shift(h);
+		up_read(&mm->mmap_sem);
 		umem->hugetlb = 1;
 	} else {
 		umem->hugetlb = 0;
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index 8b9587fe2303..94fd989c9060 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -96,11 +96,11 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
 }
 EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
 
-void __ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
-				struct sa_path_rec *src)
+static void __ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+				       struct sa_path_rec *src)
 {
-	memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid);
-	memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid);
+	memcpy(dst->dgid, src->dgid.raw, sizeof(src->dgid));
+	memcpy(dst->sgid, src->sgid.raw, sizeof(src->sgid));
 
 	dst->dlid		= htons(ntohl(sa_path_get_dlid(src)));
 	dst->slid		= htons(ntohl(sa_path_get_slid(src)));
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index ebf7be8d4139..08772836fded 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -56,6 +56,10 @@
 #define BNXT_RE_MAX_SRQC_COUNT		(64 * 1024)
 #define BNXT_RE_MAX_CQ_COUNT		(64 * 1024)
 
+#define BNXT_RE_UD_QP_HW_STALL		0x400000
+
+#define BNXT_RE_RQ_WQE_THRESHOLD	32
+
 struct bnxt_re_work {
 	struct work_struct	work;
 	unsigned long		event;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 7ba9e699d7ab..c7bd68311d0c 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -61,6 +61,48 @@
 #include "ib_verbs.h"
 #include <rdma/bnxt_re-abi.h>
 
+static int __from_ib_access_flags(int iflags)
+{
+	int qflags = 0;
+
+	if (iflags & IB_ACCESS_LOCAL_WRITE)
+		qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
+	if (iflags & IB_ACCESS_REMOTE_READ)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
+	if (iflags & IB_ACCESS_REMOTE_WRITE)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
+	if (iflags & IB_ACCESS_REMOTE_ATOMIC)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
+	if (iflags & IB_ACCESS_MW_BIND)
+		qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
+	if (iflags & IB_ZERO_BASED)
+		qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
+	if (iflags & IB_ACCESS_ON_DEMAND)
+		qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
+	return qflags;
+};
+
+static enum ib_access_flags __to_ib_access_flags(int qflags)
+{
+	enum ib_access_flags iflags = 0;
+
+	if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
+		iflags |= IB_ACCESS_LOCAL_WRITE;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
+		iflags |= IB_ACCESS_REMOTE_WRITE;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
+		iflags |= IB_ACCESS_REMOTE_READ;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
+		iflags |= IB_ACCESS_REMOTE_ATOMIC;
+	if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
+		iflags |= IB_ACCESS_MW_BIND;
+	if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
+		iflags |= IB_ZERO_BASED;
+	if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
+		iflags |= IB_ACCESS_ON_DEMAND;
+	return iflags;
+};
+
 static int bnxt_re_build_sgl(struct ib_sge *ib_sg_list,
 			     struct bnxt_qplib_sge *sg_list, int num)
 {
@@ -149,8 +191,8 @@ int bnxt_re_query_device(struct ib_device *ibdev,
 	ib_attr->max_total_mcast_qp_attach = 0;
 	ib_attr->max_ah = dev_attr->max_ah;
 
-	ib_attr->max_fmr = dev_attr->max_fmr;
-	ib_attr->max_map_per_fmr = 1;	/* ? */
+	ib_attr->max_fmr = 0;
+	ib_attr->max_map_per_fmr = 0;
 
 	ib_attr->max_srq = dev_attr->max_srq;
 	ib_attr->max_srq_wr = dev_attr->max_srq_wqes;
@@ -410,6 +452,158 @@ enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
 	return IB_LINK_LAYER_ETHERNET;
 }
 
+#define	BNXT_RE_FENCE_PBL_SIZE	DIV_ROUND_UP(BNXT_RE_FENCE_BYTES, PAGE_SIZE)
+
+static void bnxt_re_create_fence_wqe(struct bnxt_re_pd *pd)
+{
+	struct bnxt_re_fence_data *fence = &pd->fence;
+	struct ib_mr *ib_mr = &fence->mr->ib_mr;
+	struct bnxt_qplib_swqe *wqe = &fence->bind_wqe;
+
+	memset(wqe, 0, sizeof(*wqe));
+	wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW;
+	wqe->wr_id = BNXT_QPLIB_FENCE_WRID;
+	wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+	wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+	wqe->bind.zero_based = false;
+	wqe->bind.parent_l_key = ib_mr->lkey;
+	wqe->bind.va = (u64)(unsigned long)fence->va;
+	wqe->bind.length = fence->size;
+	wqe->bind.access_cntl = __from_ib_access_flags(IB_ACCESS_REMOTE_READ);
+	wqe->bind.mw_type = SQ_BIND_MW_TYPE_TYPE1;
+
+	/* Save the initial rkey in fence structure for now;
+	 * wqe->bind.r_key will be set at (re)bind time.
+	 */
+	fence->bind_rkey = ib_inc_rkey(fence->mw->rkey);
+}
+
+static int bnxt_re_bind_fence_mw(struct bnxt_qplib_qp *qplib_qp)
+{
+	struct bnxt_re_qp *qp = container_of(qplib_qp, struct bnxt_re_qp,
+					     qplib_qp);
+	struct ib_pd *ib_pd = qp->ib_qp.pd;
+	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+	struct bnxt_re_fence_data *fence = &pd->fence;
+	struct bnxt_qplib_swqe *fence_wqe = &fence->bind_wqe;
+	struct bnxt_qplib_swqe wqe;
+	int rc;
+
+	memcpy(&wqe, fence_wqe, sizeof(wqe));
+	wqe.bind.r_key = fence->bind_rkey;
+	fence->bind_rkey = ib_inc_rkey(fence->bind_rkey);
+
+	dev_dbg(rdev_to_dev(qp->rdev),
+		"Posting bind fence-WQE: rkey: %#x QP: %d PD: %p\n",
+		wqe.bind.r_key, qp->qplib_qp.id, pd);
+	rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+	if (rc) {
+		dev_err(rdev_to_dev(qp->rdev), "Failed to bind fence-WQE\n");
+		return rc;
+	}
+	bnxt_qplib_post_send_db(&qp->qplib_qp);
+
+	return rc;
+}
+
+static void bnxt_re_destroy_fence_mr(struct bnxt_re_pd *pd)
+{
+	struct bnxt_re_fence_data *fence = &pd->fence;
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct device *dev = &rdev->en_dev->pdev->dev;
+	struct bnxt_re_mr *mr = fence->mr;
+
+	if (fence->mw) {
+		bnxt_re_dealloc_mw(fence->mw);
+		fence->mw = NULL;
+	}
+	if (mr) {
+		if (mr->ib_mr.rkey)
+			bnxt_qplib_dereg_mrw(&rdev->qplib_res, &mr->qplib_mr,
+					     true);
+		if (mr->ib_mr.lkey)
+			bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+		kfree(mr);
+		fence->mr = NULL;
+	}
+	if (fence->dma_addr) {
+		dma_unmap_single(dev, fence->dma_addr, BNXT_RE_FENCE_BYTES,
+				 DMA_BIDIRECTIONAL);
+		fence->dma_addr = 0;
+	}
+}
+
+static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
+{
+	int mr_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_MW_BIND;
+	struct bnxt_re_fence_data *fence = &pd->fence;
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct device *dev = &rdev->en_dev->pdev->dev;
+	struct bnxt_re_mr *mr = NULL;
+	dma_addr_t dma_addr = 0;
+	struct ib_mw *mw;
+	u64 pbl_tbl;
+	int rc;
+
+	dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES,
+				  DMA_BIDIRECTIONAL);
+	rc = dma_mapping_error(dev, dma_addr);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to dma-map fence-MR-mem\n");
+		rc = -EIO;
+		fence->dma_addr = 0;
+		goto fail;
+	}
+	fence->dma_addr = dma_addr;
+
+	/* Allocate a MR */
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+	fence->mr = mr;
+	mr->rdev = rdev;
+	mr->qplib_mr.pd = &pd->qplib_pd;
+	mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+	mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to alloc fence-HW-MR\n");
+		goto fail;
+	}
+
+	/* Register MR */
+	mr->ib_mr.lkey = mr->qplib_mr.lkey;
+	mr->qplib_mr.va = (u64)(unsigned long)fence->va;
+	mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES;
+	pbl_tbl = dma_addr;
+	rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl,
+			       BNXT_RE_FENCE_PBL_SIZE, false);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to register fence-MR\n");
+		goto fail;
+	}
+	mr->ib_mr.rkey = mr->qplib_mr.rkey;
+
+	/* Create a fence MW only for kernel consumers */
+	mw = bnxt_re_alloc_mw(&pd->ib_pd, IB_MW_TYPE_1, NULL);
+	if (!mw) {
+		dev_err(rdev_to_dev(rdev),
+			"Failed to create fence-MW for PD: %p\n", pd);
+		rc = -EINVAL;
+		goto fail;
+	}
+	fence->mw = mw;
+
+	bnxt_re_create_fence_wqe(pd);
+	return 0;
+
+fail:
+	bnxt_re_destroy_fence_mr(pd);
+	return rc;
+}
+
 /* Protection Domains */
 int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
 {
@@ -417,6 +611,7 @@ int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
 	struct bnxt_re_dev *rdev = pd->rdev;
 	int rc;
 
+	bnxt_re_destroy_fence_mr(pd);
 	if (ib_pd->uobject && pd->dpi.dbr) {
 		struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
 		struct bnxt_re_ucontext *ucntx;
@@ -498,6 +693,10 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
 		}
 	}
 
+	if (!udata)
+		if (bnxt_re_create_fence_mr(pd))
+			dev_warn(rdev_to_dev(rdev),
+				 "Failed to create Fence-MR\n");
 	return &pd->ib_pd;
 dbfail:
 	(void)bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
@@ -849,12 +1048,16 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
 	/* Shadow QP SQ depth should be same as QP1 RQ depth */
 	qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe;
 	qp->qplib_qp.sq.max_sge = 2;
+	/* Q full delta can be 1 since it is internal QP */
+	qp->qplib_qp.sq.q_full_delta = 1;
 
 	qp->qplib_qp.scq = qp1_qp->scq;
 	qp->qplib_qp.rcq = qp1_qp->rcq;
 
 	qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe;
 	qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
+	/* Q full delta can be 1 since it is internal QP */
+	qp->qplib_qp.rq.q_full_delta = 1;
 
 	qp->qplib_qp.mtu = qp1_qp->mtu;
 
@@ -917,10 +1120,6 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 	qp->qplib_qp.sig_type = ((qp_init_attr->sq_sig_type ==
 				  IB_SIGNAL_ALL_WR) ? true : false);
 
-	entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
-	qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
-					dev_attr->max_qp_wqes + 1);
-
 	qp->qplib_qp.sq.max_sge = qp_init_attr->cap.max_send_sge;
 	if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
 		qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
@@ -959,6 +1158,9 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 		qp->qplib_qp.rq.max_wqe = min_t(u32, entries,
 						dev_attr->max_qp_wqes + 1);
 
+		qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
+						qp_init_attr->cap.max_recv_wr;
+
 		qp->qplib_qp.rq.max_sge = qp_init_attr->cap.max_recv_sge;
 		if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
 			qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
@@ -967,6 +1169,12 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 	qp->qplib_qp.mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
 
 	if (qp_init_attr->qp_type == IB_QPT_GSI) {
+		/* Allocate 1 more than what's provided */
+		entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
+		qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+						dev_attr->max_qp_wqes + 1);
+		qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
+						qp_init_attr->cap.max_send_wr;
 		qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
 		if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
 			qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
@@ -1006,6 +1214,22 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 		}
 
 	} else {
+		/* Allocate 128 + 1 more than what's provided */
+		entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr +
+					     BNXT_QPLIB_RESERVED_QP_WRS + 1);
+		qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+						dev_attr->max_qp_wqes +
+						BNXT_QPLIB_RESERVED_QP_WRS + 1);
+		qp->qplib_qp.sq.q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1;
+
+		/*
+		 * Reserving one slot for Phantom WQE. Application can
+		 * post one extra entry in this case. But allowing this to avoid
+		 * unexpected Queue full condition
+		 */
+
+		qp->qplib_qp.sq.q_full_delta -= 1;
+
 		qp->qplib_qp.max_rd_atomic = dev_attr->max_qp_rd_atom;
 		qp->qplib_qp.max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
 		if (udata) {
@@ -1025,6 +1249,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 
 	qp->ib_qp.qp_num = qp->qplib_qp.id;
 	spin_lock_init(&qp->sq_lock);
+	spin_lock_init(&qp->rq_lock);
 
 	if (udata) {
 		struct bnxt_re_qp_resp resp;
@@ -1129,48 +1354,6 @@ static enum ib_mtu __to_ib_mtu(u32 mtu)
 	}
 }
 
-static int __from_ib_access_flags(int iflags)
-{
-	int qflags = 0;
-
-	if (iflags & IB_ACCESS_LOCAL_WRITE)
-		qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
-	if (iflags & IB_ACCESS_REMOTE_READ)
-		qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
-	if (iflags & IB_ACCESS_REMOTE_WRITE)
-		qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
-	if (iflags & IB_ACCESS_REMOTE_ATOMIC)
-		qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
-	if (iflags & IB_ACCESS_MW_BIND)
-		qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
-	if (iflags & IB_ZERO_BASED)
-		qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
-	if (iflags & IB_ACCESS_ON_DEMAND)
-		qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
-	return qflags;
-};
-
-static enum ib_access_flags __to_ib_access_flags(int qflags)
-{
-	enum ib_access_flags iflags = 0;
-
-	if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
-		iflags |= IB_ACCESS_LOCAL_WRITE;
-	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
-		iflags |= IB_ACCESS_REMOTE_WRITE;
-	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
-		iflags |= IB_ACCESS_REMOTE_READ;
-	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
-		iflags |= IB_ACCESS_REMOTE_ATOMIC;
-	if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
-		iflags |= IB_ACCESS_MW_BIND;
-	if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
-		iflags |= IB_ZERO_BASED;
-	if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
-		iflags |= IB_ACCESS_ON_DEMAND;
-	return iflags;
-};
-
 static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
 				    struct bnxt_re_qp *qp1_qp,
 				    int qp_attr_mask)
@@ -1378,11 +1561,21 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 		entries = roundup_pow_of_two(qp_attr->cap.max_send_wr);
 		qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
 						dev_attr->max_qp_wqes + 1);
+		qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
+						qp_attr->cap.max_send_wr;
+		/*
+		 * Reserving one slot for Phantom WQE. Some application can
+		 * post one extra entry in this case. Allowing this to avoid
+		 * unexpected Queue full condition
+		 */
+		qp->qplib_qp.sq.q_full_delta -= 1;
 		qp->qplib_qp.sq.max_sge = qp_attr->cap.max_send_sge;
 		if (qp->qplib_qp.rq.max_wqe) {
 			entries = roundup_pow_of_two(qp_attr->cap.max_recv_wr);
 			qp->qplib_qp.rq.max_wqe =
 				min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+			qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
+						       qp_attr->cap.max_recv_wr;
 			qp->qplib_qp.rq.max_sge = qp_attr->cap.max_recv_sge;
 		} else {
 			/* SRQ was used prior, just ignore the RQ caps */
@@ -1883,6 +2076,22 @@ static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev,
 	return payload_sz;
 }
 
+static void bnxt_ud_qp_hw_stall_workaround(struct bnxt_re_qp *qp)
+{
+	if ((qp->ib_qp.qp_type == IB_QPT_UD ||
+	     qp->ib_qp.qp_type == IB_QPT_GSI ||
+	     qp->ib_qp.qp_type == IB_QPT_RAW_ETHERTYPE) &&
+	     qp->qplib_qp.wqe_cnt == BNXT_RE_UD_QP_HW_STALL) {
+		int qp_attr_mask;
+		struct ib_qp_attr qp_attr;
+
+		qp_attr_mask = IB_QP_STATE;
+		qp_attr.qp_state = IB_QPS_RTS;
+		bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, qp_attr_mask, NULL);
+		qp->qplib_qp.wqe_cnt = 0;
+	}
+}
+
 static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
 				       struct bnxt_re_qp *qp,
 				struct ib_send_wr *wr)
@@ -1928,6 +2137,7 @@ bad:
 		wr = wr->next;
 	}
 	bnxt_qplib_post_send_db(&qp->qplib_qp);
+	bnxt_ud_qp_hw_stall_workaround(qp);
 	spin_unlock_irqrestore(&qp->sq_lock, flags);
 	return rc;
 }
@@ -2024,6 +2234,7 @@ bad:
 		wr = wr->next;
 	}
 	bnxt_qplib_post_send_db(&qp->qplib_qp);
+	bnxt_ud_qp_hw_stall_workaround(qp);
 	spin_unlock_irqrestore(&qp->sq_lock, flags);
 
 	return rc;
@@ -2071,7 +2282,10 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
 	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
 	struct bnxt_qplib_swqe wqe;
 	int rc = 0, payload_sz = 0;
+	unsigned long flags;
+	u32 count = 0;
 
+	spin_lock_irqsave(&qp->rq_lock, flags);
 	while (wr) {
 		/* House keeping */
 		memset(&wqe, 0, sizeof(wqe));
@@ -2100,9 +2314,21 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
 			*bad_wr = wr;
 			break;
 		}
+
+		/* Ring DB if the RQEs posted reaches a threshold value */
+		if (++count >= BNXT_RE_RQ_WQE_THRESHOLD) {
+			bnxt_qplib_post_recv_db(&qp->qplib_qp);
+			count = 0;
+		}
+
 		wr = wr->next;
 	}
-	bnxt_qplib_post_recv_db(&qp->qplib_qp);
+
+	if (count)
+		bnxt_qplib_post_recv_db(&qp->qplib_qp);
+
+	spin_unlock_irqrestore(&qp->rq_lock, flags);
+
 	return rc;
 }
 
@@ -2643,12 +2869,36 @@ static void bnxt_re_process_res_ud_wc(struct ib_wc *wc,
 		wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
 }
 
+static int send_phantom_wqe(struct bnxt_re_qp *qp)
+{
+	struct bnxt_qplib_qp *lib_qp = &qp->qplib_qp;
+	unsigned long flags;
+	int rc = 0;
+
+	spin_lock_irqsave(&qp->sq_lock, flags);
+
+	rc = bnxt_re_bind_fence_mw(lib_qp);
+	if (!rc) {
+		lib_qp->sq.phantom_wqe_cnt++;
+		dev_dbg(&lib_qp->sq.hwq.pdev->dev,
+			"qp %#x sq->prod %#x sw_prod %#x phantom_wqe_cnt %d\n",
+			lib_qp->id, lib_qp->sq.hwq.prod,
+			HWQ_CMP(lib_qp->sq.hwq.prod, &lib_qp->sq.hwq),
+			lib_qp->sq.phantom_wqe_cnt);
+	}
+
+	spin_unlock_irqrestore(&qp->sq_lock, flags);
+	return rc;
+}
+
 int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
 {
 	struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
 	struct bnxt_re_qp *qp;
 	struct bnxt_qplib_cqe *cqe;
 	int i, ncqe, budget;
+	struct bnxt_qplib_q *sq;
+	struct bnxt_qplib_qp *lib_qp;
 	u32 tbl_idx;
 	struct bnxt_re_sqp_entries *sqp_entry = NULL;
 	unsigned long flags;
@@ -2661,7 +2911,21 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
 	}
 	cqe = &cq->cql[0];
 	while (budget) {
-		ncqe = bnxt_qplib_poll_cq(&cq->qplib_cq, cqe, budget);
+		lib_qp = NULL;
+		ncqe = bnxt_qplib_poll_cq(&cq->qplib_cq, cqe, budget, &lib_qp);
+		if (lib_qp) {
+			sq = &lib_qp->sq;
+			if (sq->send_phantom) {
+				qp = container_of(lib_qp,
+						  struct bnxt_re_qp, qplib_qp);
+				if (send_phantom_wqe(qp) == -ENOMEM)
+					dev_err(rdev_to_dev(cq->rdev),
+						"Phantom failed! Scheduled to send again\n");
+				else
+					sq->send_phantom = false;
+			}
+		}
+
 		if (!ncqe)
 			break;
 
@@ -2822,6 +3086,12 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
 	struct bnxt_re_dev *rdev = mr->rdev;
 	int rc;
 
+	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc);
+		return rc;
+	}
+
 	if (mr->npages && mr->pages) {
 		rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
 							&mr->qplib_frpl);
@@ -2829,8 +3099,6 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
 		mr->npages = 0;
 		mr->pages = NULL;
 	}
-	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
-
 	if (!IS_ERR_OR_NULL(mr->ib_umem))
 		ib_umem_release(mr->ib_umem);
 
@@ -2914,97 +3182,52 @@ fail:
 	return ERR_PTR(rc);
 }
 
-/* Fast Memory Regions */
-struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *ib_pd, int mr_access_flags,
-				 struct ib_fmr_attr *fmr_attr)
+struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+			       struct ib_udata *udata)
 {
 	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
 	struct bnxt_re_dev *rdev = pd->rdev;
-	struct bnxt_re_fmr *fmr;
+	struct bnxt_re_mw *mw;
 	int rc;
 
-	if (fmr_attr->max_pages > MAX_PBL_LVL_2_PGS ||
-	    fmr_attr->max_maps > rdev->dev_attr.max_map_per_fmr) {
-		dev_err(rdev_to_dev(rdev), "Allocate FMR exceeded Max limit");
+	mw = kzalloc(sizeof(*mw), GFP_KERNEL);
+	if (!mw)
 		return ERR_PTR(-ENOMEM);
-	}
-	fmr = kzalloc(sizeof(*fmr), GFP_KERNEL);
-	if (!fmr)
-		return ERR_PTR(-ENOMEM);
-
-	fmr->rdev = rdev;
-	fmr->qplib_fmr.pd = &pd->qplib_pd;
-	fmr->qplib_fmr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+	mw->rdev = rdev;
+	mw->qplib_mw.pd = &pd->qplib_pd;
 
-	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
-	if (rc)
+	mw->qplib_mw.type = (type == IB_MW_TYPE_1 ?
+			       CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1 :
+			       CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B);
+	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mw->qplib_mw);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Allocate MW failed!");
 		goto fail;
+	}
+	mw->ib_mw.rkey = mw->qplib_mw.rkey;
 
-	fmr->qplib_fmr.flags = __from_ib_access_flags(mr_access_flags);
-	fmr->ib_fmr.lkey = fmr->qplib_fmr.lkey;
-	fmr->ib_fmr.rkey = fmr->ib_fmr.lkey;
+	atomic_inc(&rdev->mw_count);
+	return &mw->ib_mw;
 
-	atomic_inc(&rdev->mr_count);
-	return &fmr->ib_fmr;
 fail:
-	kfree(fmr);
+	kfree(mw);
 	return ERR_PTR(rc);
 }
 
-int bnxt_re_map_phys_fmr(struct ib_fmr *ib_fmr, u64 *page_list, int list_len,
-			 u64 iova)
+int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
 {
-	struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
-					     ib_fmr);
-	struct bnxt_re_dev *rdev = fmr->rdev;
+	struct bnxt_re_mw *mw = container_of(ib_mw, struct bnxt_re_mw, ib_mw);
+	struct bnxt_re_dev *rdev = mw->rdev;
 	int rc;
 
-	fmr->qplib_fmr.va = iova;
-	fmr->qplib_fmr.total_size = list_len * PAGE_SIZE;
-
-	rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &fmr->qplib_fmr, page_list,
-			       list_len, true);
-	if (rc)
-		dev_err(rdev_to_dev(rdev), "Failed to map FMR for lkey = 0x%x!",
-			fmr->ib_fmr.lkey);
-	return rc;
-}
-
-int bnxt_re_unmap_fmr(struct list_head *fmr_list)
-{
-	struct bnxt_re_dev *rdev;
-	struct bnxt_re_fmr *fmr;
-	struct ib_fmr *ib_fmr;
-	int rc = 0;
-
-	/* Validate each FMRs inside the fmr_list */
-	list_for_each_entry(ib_fmr, fmr_list, list) {
-		fmr = container_of(ib_fmr, struct bnxt_re_fmr, ib_fmr);
-		rdev = fmr->rdev;
-
-		if (rdev) {
-			rc = bnxt_qplib_dereg_mrw(&rdev->qplib_res,
-						  &fmr->qplib_fmr, true);
-			if (rc)
-				break;
-		}
+	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mw->qplib_mw);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Free MW failed: %#x\n", rc);
+		return rc;
 	}
-	return rc;
-}
-
-int bnxt_re_dealloc_fmr(struct ib_fmr *ib_fmr)
-{
-	struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
-					       ib_fmr);
-	struct bnxt_re_dev *rdev = fmr->rdev;
-	int rc;
 
-	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
-	if (rc)
-		dev_err(rdev_to_dev(rdev), "Failed to free FMR");
-
-	kfree(fmr);
-	atomic_dec(&rdev->mr_count);
+	kfree(mw);
+	atomic_dec(&rdev->mw_count);
 	return rc;
 }
 
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 5c3d71765454..6c160f6a5398 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -44,11 +44,23 @@ struct bnxt_re_gid_ctx {
 	u32			refcnt;
 };
 
+#define BNXT_RE_FENCE_BYTES	64
+struct bnxt_re_fence_data {
+	u32 size;
+	u8 va[BNXT_RE_FENCE_BYTES];
+	dma_addr_t dma_addr;
+	struct bnxt_re_mr *mr;
+	struct ib_mw *mw;
+	struct bnxt_qplib_swqe bind_wqe;
+	u32 bind_rkey;
+};
+
 struct bnxt_re_pd {
 	struct bnxt_re_dev	*rdev;
 	struct ib_pd		ib_pd;
 	struct bnxt_qplib_pd	qplib_pd;
 	struct bnxt_qplib_dpi	dpi;
+	struct bnxt_re_fence_data fence;
 };
 
 struct bnxt_re_ah {
@@ -62,6 +74,7 @@ struct bnxt_re_qp {
 	struct bnxt_re_dev	*rdev;
 	struct ib_qp		ib_qp;
 	spinlock_t		sq_lock;	/* protect sq */
+	spinlock_t		rq_lock;	/* protect rq */
 	struct bnxt_qplib_qp	qplib_qp;
 	struct ib_umem		*sumem;
 	struct ib_umem		*rumem;
@@ -181,12 +194,9 @@ int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
 struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type,
 			       u32 max_num_sg);
 int bnxt_re_dereg_mr(struct ib_mr *mr);
-struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-				 struct ib_fmr_attr *fmr_attr);
-int bnxt_re_map_phys_fmr(struct ib_fmr *fmr, u64 *page_list, int list_len,
-			 u64 iova);
-int bnxt_re_unmap_fmr(struct list_head *fmr_list);
-int bnxt_re_dealloc_fmr(struct ib_fmr *fmr);
+struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+			       struct ib_udata *udata);
+int bnxt_re_dealloc_mw(struct ib_mw *mw);
 struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int mr_access_flags,
 				  struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 5d355401179b..1fce5e73216b 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -507,10 +507,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 	ibdev->dereg_mr			= bnxt_re_dereg_mr;
 	ibdev->alloc_mr			= bnxt_re_alloc_mr;
 	ibdev->map_mr_sg		= bnxt_re_map_mr_sg;
-	ibdev->alloc_fmr		= bnxt_re_alloc_fmr;
-	ibdev->map_phys_fmr		= bnxt_re_map_phys_fmr;
-	ibdev->unmap_fmr		= bnxt_re_unmap_fmr;
-	ibdev->dealloc_fmr		= bnxt_re_dealloc_fmr;
 
 	ibdev->reg_user_mr		= bnxt_re_reg_user_mr;
 	ibdev->alloc_ucontext		= bnxt_re_alloc_ucontext;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 43d08b5e9085..f05500bcdcf1 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -284,7 +284,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_create_qp1 req;
-	struct creq_create_qp1_resp *resp;
+	struct creq_create_qp1_resp resp;
 	struct bnxt_qplib_pbl *pbl;
 	struct bnxt_qplib_q *sq = &qp->sq;
 	struct bnxt_qplib_q *rq = &qp->rq;
@@ -394,31 +394,12 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
 	req.pd_id = cpu_to_le32(qp->pd->id);
 
-	resp = (struct creq_create_qp1_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&res->pdev->dev, "QPLIB: FP: CREATE_QP1 send failed");
-		rc = -EINVAL;
-		goto fail;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP1 timed out");
-		rc = -ETIMEDOUT;
-		goto fail;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP1 failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		rc = -EINVAL;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
 		goto fail;
-	}
-	qp->id = le32_to_cpu(resp->xid);
+
+	qp->id = le32_to_cpu(resp.xid);
 	qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
 	sq->flush_in_progress = false;
 	rq->flush_in_progress = false;
@@ -442,7 +423,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
 	struct cmdq_create_qp req;
-	struct creq_create_qp_resp *resp;
+	struct creq_create_qp_resp resp;
 	struct bnxt_qplib_pbl *pbl;
 	struct sq_psn_search **psn_search_ptr;
 	unsigned long int psn_search, poff = 0;
@@ -627,31 +608,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 	}
 	req.pd_id = cpu_to_le32(qp->pd->id);
 
-	resp = (struct creq_create_qp_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP send failed");
-		rc = -EINVAL;
-		goto fail;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP timed out");
-		rc = -ETIMEDOUT;
-		goto fail;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		rc = -EINVAL;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
 		goto fail;
-	}
-	qp->id = le32_to_cpu(resp->xid);
+
+	qp->id = le32_to_cpu(resp.xid);
 	qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
 	sq->flush_in_progress = false;
 	rq->flush_in_progress = false;
@@ -769,10 +731,11 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_modify_qp req;
-	struct creq_modify_qp_resp *resp;
+	struct creq_modify_qp_resp resp;
 	u16 cmd_flags = 0, pkey;
 	u32 temp32[4];
 	u32 bmask;
+	int rc;
 
 	RCFW_CMD_PREP(req, MODIFY_QP, cmd_flags);
 
@@ -862,27 +825,10 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
 	req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(qp->vlan_id);
 
-	resp = (struct creq_modify_qp_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
+		return rc;
 	qp->cur_qp_state = qp->state;
 	return 0;
 }
@@ -891,37 +837,26 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_query_qp req;
-	struct creq_query_qp_resp *resp;
+	struct creq_query_qp_resp resp;
+	struct bnxt_qplib_rcfw_sbuf *sbuf;
 	struct creq_query_qp_resp_sb *sb;
 	u16 cmd_flags = 0;
 	u32 temp32[4];
-	int i;
+	int i, rc = 0;
 
 	RCFW_CMD_PREP(req, QUERY_QP, cmd_flags);
 
+	sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+	if (!sbuf)
+		return -ENOMEM;
+	sb = sbuf->sb;
+
 	req.qp_cid = cpu_to_le32(qp->id);
 	req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
-	resp = (struct creq_query_qp_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     (void **)&sb, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  (void *)sbuf, 0);
+	if (rc)
+		goto bail;
 	/* Extract the context from the side buffer */
 	qp->state = sb->en_sqd_async_notify_state &
 			CREQ_QUERY_QP_RESP_SB_STATE_MASK;
@@ -976,7 +911,9 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 	qp->dest_qpn = le32_to_cpu(sb->dest_qp_id);
 	memcpy(qp->smac, sb->src_mac, 6);
 	qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id);
-	return 0;
+bail:
+	bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+	return rc;
 }
 
 static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
@@ -1021,34 +958,18 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_destroy_qp req;
-	struct creq_destroy_qp_resp *resp;
+	struct creq_destroy_qp_resp resp;
 	unsigned long flags;
 	u16 cmd_flags = 0;
+	int rc;
 
 	RCFW_CMD_PREP(req, DESTROY_QP, cmd_flags);
 
 	req.qp_cid = cpu_to_le32(qp->id);
-	resp = (struct creq_destroy_qp_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
+		return rc;
 
 	/* Must walk the associated CQs to nullified the QP ptr */
 	spin_lock_irqsave(&qp->scq->hwq.lock, flags);
@@ -1162,8 +1083,12 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
 		rc = -EINVAL;
 		goto done;
 	}
-	if (HWQ_CMP((sq->hwq.prod + 1), &sq->hwq) ==
-	    HWQ_CMP(sq->hwq.cons, &sq->hwq)) {
+
+	if (bnxt_qplib_queue_full(sq)) {
+		dev_err(&sq->hwq.pdev->dev,
+			"QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x",
+			sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
+			sq->q_full_delta);
 		rc = -ENOMEM;
 		goto done;
 	}
@@ -1373,6 +1298,9 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
 	}
 
 	sq->hwq.prod++;
+
+	qp->wqe_cnt++;
+
 done:
 	return rc;
 }
@@ -1411,8 +1339,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
 		rc = -EINVAL;
 		goto done;
 	}
-	if (HWQ_CMP((rq->hwq.prod + 1), &rq->hwq) ==
-	    HWQ_CMP(rq->hwq.cons, &rq->hwq)) {
+	if (bnxt_qplib_queue_full(rq)) {
 		dev_err(&rq->hwq.pdev->dev,
 			"QPLIB: FP: QP (0x%x) RQ is full!", qp->id);
 		rc = -EINVAL;
@@ -1483,7 +1410,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_create_cq req;
-	struct creq_create_cq_resp *resp;
+	struct creq_create_cq_resp resp;
 	struct bnxt_qplib_pbl *pbl;
 	u16 cmd_flags = 0;
 	int rc;
@@ -1525,30 +1452,12 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 			(cq->cnq_hw_ring_id & CMDQ_CREATE_CQ_CNQ_ID_MASK) <<
 			 CMDQ_CREATE_CQ_CNQ_ID_SFT);
 
-	resp = (struct creq_create_cq_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ timed out");
-		rc = -ETIMEDOUT;
-		goto fail;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		rc = -EINVAL;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
 		goto fail;
-	}
-	cq->id = le32_to_cpu(resp->xid);
+
+	cq->id = le32_to_cpu(resp.xid);
 	cq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
 	cq->period = BNXT_QPLIB_QUEUE_START_PERIOD;
 	init_waitqueue_head(&cq->waitq);
@@ -1566,33 +1475,17 @@ int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_destroy_cq req;
-	struct creq_destroy_cq_resp *resp;
+	struct creq_destroy_cq_resp resp;
 	u16 cmd_flags = 0;
+	int rc;
 
 	RCFW_CMD_PREP(req, DESTROY_CQ, cmd_flags);
 
 	req.cq_cid = cpu_to_le32(cq->id);
-	resp = (struct creq_destroy_cq_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
+		return rc;
 	bnxt_qplib_free_hwq(res->pdev, &cq->hwq);
 	return 0;
 }
@@ -1664,14 +1557,113 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
 	return rc;
 }
 
+/* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive)
+ *       CQE is track from sw_cq_cons to max_element but valid only if VALID=1
+ */
+static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
+		     u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons)
+{
+	struct bnxt_qplib_q *sq = &qp->sq;
+	struct bnxt_qplib_swq *swq;
+	u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx;
+	struct cq_base *peek_hwcqe, **peek_hw_cqe_ptr;
+	struct cq_req *peek_req_hwcqe;
+	struct bnxt_qplib_qp *peek_qp;
+	struct bnxt_qplib_q *peek_sq;
+	int i, rc = 0;
+
+	/* Normal mode */
+	/* Check for the psn_search marking before completing */
+	swq = &sq->swq[sw_sq_cons];
+	if (swq->psn_search &&
+	    le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) {
+		/* Unmark */
+		swq->psn_search->flags_next_psn = cpu_to_le32
+			(le32_to_cpu(swq->psn_search->flags_next_psn)
+				     & ~0x80000000);
+		dev_dbg(&cq->hwq.pdev->dev,
+			"FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n",
+			cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+		sq->condition = true;
+		sq->send_phantom = true;
+
+		/* TODO: Only ARM if the previous SQE is ARMALL */
+		bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ_ARMALL);
+
+		rc = -EAGAIN;
+		goto out;
+	}
+	if (sq->condition) {
+		/* Peek at the completions */
+		peek_raw_cq_cons = cq->hwq.cons;
+		peek_sw_cq_cons = cq_cons;
+		i = cq->hwq.max_elements;
+		while (i--) {
+			peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq);
+			peek_hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
+			peek_hwcqe = &peek_hw_cqe_ptr[CQE_PG(peek_sw_cq_cons)]
+						     [CQE_IDX(peek_sw_cq_cons)];
+			/* If the next hwcqe is VALID */
+			if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons,
+					  cq->hwq.max_elements)) {
+				/* If the next hwcqe is a REQ */
+				if ((peek_hwcqe->cqe_type_toggle &
+				    CQ_BASE_CQE_TYPE_MASK) ==
+				    CQ_BASE_CQE_TYPE_REQ) {
+					peek_req_hwcqe = (struct cq_req *)
+							 peek_hwcqe;
+					peek_qp = (struct bnxt_qplib_qp *)
+						((unsigned long)
+						 le64_to_cpu
+						 (peek_req_hwcqe->qp_handle));
+					peek_sq = &peek_qp->sq;
+					peek_sq_cons_idx = HWQ_CMP(le16_to_cpu(
+						peek_req_hwcqe->sq_cons_idx) - 1
+						, &sq->hwq);
+					/* If the hwcqe's sq's wr_id matches */
+					if (peek_sq == sq &&
+					    sq->swq[peek_sq_cons_idx].wr_id ==
+					    BNXT_QPLIB_FENCE_WRID) {
+						/*
+						 *  Unbreak only if the phantom
+						 *  comes back
+						 */
+						dev_dbg(&cq->hwq.pdev->dev,
+							"FP:Got Phantom CQE");
+						sq->condition = false;
+						sq->single = true;
+						rc = 0;
+						goto out;
+					}
+				}
+				/* Valid but not the phantom, so keep looping */
+			} else {
+				/* Not valid yet, just exit and wait */
+				rc = -EINVAL;
+				goto out;
+			}
+			peek_sw_cq_cons++;
+			peek_raw_cq_cons++;
+		}
+		dev_err(&cq->hwq.pdev->dev,
+			"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x",
+			cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+		rc = -EINVAL;
+	}
+out:
+	return rc;
+}
+
 static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 				     struct cq_req *hwcqe,
-				     struct bnxt_qplib_cqe **pcqe, int *budget)
+				     struct bnxt_qplib_cqe **pcqe, int *budget,
+				     u32 cq_cons, struct bnxt_qplib_qp **lib_qp)
 {
 	struct bnxt_qplib_qp *qp;
 	struct bnxt_qplib_q *sq;
 	struct bnxt_qplib_cqe *cqe;
-	u32 sw_cons, cqe_cons;
+	u32 sw_sq_cons, cqe_sq_cons;
+	struct bnxt_qplib_swq *swq;
 	int rc = 0;
 
 	qp = (struct bnxt_qplib_qp *)((unsigned long)
@@ -1683,13 +1675,13 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 	}
 	sq = &qp->sq;
 
-	cqe_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
-	if (cqe_cons > sq->hwq.max_elements) {
+	cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
+	if (cqe_sq_cons > sq->hwq.max_elements) {
 		dev_err(&cq->hwq.pdev->dev,
 			"QPLIB: FP: CQ Process req reported ");
 		dev_err(&cq->hwq.pdev->dev,
 			"QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
-			cqe_cons, sq->hwq.max_elements);
+			cqe_sq_cons, sq->hwq.max_elements);
 		return -EINVAL;
 	}
 	/* If we were in the middle of flushing the SQ, continue */
@@ -1698,53 +1690,74 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 
 	/* Require to walk the sq's swq to fabricate CQEs for all previously
 	 * signaled SWQEs due to CQE aggregation from the current sq cons
-	 * to the cqe_cons
+	 * to the cqe_sq_cons
 	 */
 	cqe = *pcqe;
 	while (*budget) {
-		sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
-		if (sw_cons == cqe_cons)
+		sw_sq_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
+		if (sw_sq_cons == cqe_sq_cons)
+			/* Done */
 			break;
+
+		swq = &sq->swq[sw_sq_cons];
 		memset(cqe, 0, sizeof(*cqe));
 		cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
 		cqe->qp_handle = (u64)(unsigned long)qp;
 		cqe->src_qp = qp->id;
-		cqe->wr_id = sq->swq[sw_cons].wr_id;
-		cqe->type = sq->swq[sw_cons].type;
+		cqe->wr_id = swq->wr_id;
+		if (cqe->wr_id == BNXT_QPLIB_FENCE_WRID)
+			goto skip;
+		cqe->type = swq->type;
 
 		/* For the last CQE, check for status.  For errors, regardless
 		 * of the request being signaled or not, it must complete with
 		 * the hwcqe error status
 		 */
-		if (HWQ_CMP((sw_cons + 1), &sq->hwq) == cqe_cons &&
+		if (HWQ_CMP((sw_sq_cons + 1), &sq->hwq) == cqe_sq_cons &&
 		    hwcqe->status != CQ_REQ_STATUS_OK) {
 			cqe->status = hwcqe->status;
 			dev_err(&cq->hwq.pdev->dev,
 				"QPLIB: FP: CQ Processed Req ");
 			dev_err(&cq->hwq.pdev->dev,
 				"QPLIB: wr_id[%d] = 0x%llx with status 0x%x",
-				sw_cons, cqe->wr_id, cqe->status);
+				sw_sq_cons, cqe->wr_id, cqe->status);
 			cqe++;
 			(*budget)--;
 			sq->flush_in_progress = true;
 			/* Must block new posting of SQ and RQ */
 			qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+			sq->condition = false;
+			sq->single = false;
 		} else {
-			if (sq->swq[sw_cons].flags &
-			    SQ_SEND_FLAGS_SIGNAL_COMP) {
+			if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
+				/* Before we complete, do WA 9060 */
+				if (do_wa9060(qp, cq, cq_cons, sw_sq_cons,
+					      cqe_sq_cons)) {
+					*lib_qp = qp;
+					goto out;
+				}
 				cqe->status = CQ_REQ_STATUS_OK;
 				cqe++;
 				(*budget)--;
 			}
 		}
+skip:
 		sq->hwq.cons++;
+		if (sq->single)
+			break;
 	}
+out:
 	*pcqe = cqe;
-	if (!*budget && HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_cons) {
+	if (HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_sq_cons) {
 		/* Out of budget */
 		rc = -EAGAIN;
 		goto done;
 	}
+	/*
+	 * Back to normal completion mode only after it has completed all of
+	 * the WC for this CQE
+	 */
+	sq->single = false;
 	if (!sq->flush_in_progress)
 		goto done;
 flush:
@@ -2074,7 +2087,7 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
 }
 
 int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
-		       int num_cqes)
+		       int num_cqes, struct bnxt_qplib_qp **lib_qp)
 {
 	struct cq_base *hw_cqe, **hw_cqe_ptr;
 	unsigned long flags;
@@ -2099,7 +2112,8 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 		case CQ_BASE_CQE_TYPE_REQ:
 			rc = bnxt_qplib_cq_process_req(cq,
 						       (struct cq_req *)hw_cqe,
-						       &cqe, &budget);
+						       &cqe, &budget,
+						       sw_cons, lib_qp);
 			break;
 		case CQ_BASE_CQE_TYPE_RES_RC:
 			rc = bnxt_qplib_cq_process_res_rc(cq,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index f0150f8da1e3..36b7b7db0e3f 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -88,6 +88,7 @@ struct bnxt_qplib_swq {
 
 struct bnxt_qplib_swqe {
 	/* General */
+#define	BNXT_QPLIB_FENCE_WRID	0x46454E43	/* "FENC" */
 	u64				wr_id;
 	u8				reqs_type;
 	u8				type;
@@ -216,9 +217,16 @@ struct bnxt_qplib_q {
 	struct scatterlist		*sglist;
 	u32				nmap;
 	u32				max_wqe;
+	u16				q_full_delta;
 	u16				max_sge;
 	u32				psn;
 	bool				flush_in_progress;
+	bool				condition;
+	bool				single;
+	bool				send_phantom;
+	u32				phantom_wqe_cnt;
+	u32				phantom_cqe_cnt;
+	u32				next_cq_cons;
 };
 
 struct bnxt_qplib_qp {
@@ -242,6 +250,7 @@ struct bnxt_qplib_qp {
 	u8				timeout;
 	u8				retry_cnt;
 	u8				rnr_retry;
+	u64				wqe_cnt;
 	u32				min_rnr_timer;
 	u32				max_rd_atomic;
 	u32				max_dest_rd_atomic;
@@ -301,6 +310,13 @@ struct bnxt_qplib_qp {
 	(!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) ==		\
 	   !((raw_cons) & (cp_bit)))
 
+static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q)
+{
+	return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta),
+		       &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons,
+						 &qplib_q->hwq);
+}
+
 struct bnxt_qplib_cqe {
 	u8				status;
 	u8				type;
@@ -432,7 +448,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
 int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
-		       int num);
+		       int num, struct bnxt_qplib_qp **qp);
 void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
 void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
 int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 23fb7260662b..16e42754dbec 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -39,72 +39,55 @@
 #include <linux/spinlock.h>
 #include <linux/pci.h>
 #include <linux/prefetch.h>
+#include <linux/delay.h>
+
 #include "roce_hsi.h"
 #include "qplib_res.h"
 #include "qplib_rcfw.h"
 static void bnxt_qplib_service_creq(unsigned long data);
 
 /* Hardware communication channel */
-int bnxt_qplib_rcfw_wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
 {
 	u16 cbit;
 	int rc;
 
-	cookie &= RCFW_MAX_COOKIE_VALUE;
 	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
-	if (!test_bit(cbit, rcfw->cmdq_bitmap))
-		dev_warn(&rcfw->pdev->dev,
-			 "QPLIB: CMD bit %d for cookie 0x%x is not set?",
-			 cbit, cookie);
-
 	rc = wait_event_timeout(rcfw->waitq,
 				!test_bit(cbit, rcfw->cmdq_bitmap),
 				msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS));
-	if (!rc) {
-		dev_warn(&rcfw->pdev->dev,
-			 "QPLIB: Bono Error: timeout %d msec, msg {0x%x}\n",
-			 RCFW_CMD_WAIT_TIME_MS, cookie);
-	}
-
-	return rc;
+	return rc ? 0 : -ETIMEDOUT;
 };
 
-int bnxt_qplib_rcfw_block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
 {
-	u32 count = -1;
+	u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT;
 	u16 cbit;
 
-	cookie &= RCFW_MAX_COOKIE_VALUE;
 	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
 	if (!test_bit(cbit, rcfw->cmdq_bitmap))
 		goto done;
 	do {
+		mdelay(1); /* 1m sec */
 		bnxt_qplib_service_creq((unsigned long)rcfw);
 	} while (test_bit(cbit, rcfw->cmdq_bitmap) && --count);
 done:
-	return count;
+	return count ? 0 : -ETIMEDOUT;
 };
 
-void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
-				   struct cmdq_base *req, void **crsbe,
-				   u8 is_block)
+static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
+			  struct creq_base *resp, void *sb, u8 is_block)
 {
-	struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
 	struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr;
 	struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
-	struct bnxt_qplib_hwq *crsb = &rcfw->crsb;
-	struct bnxt_qplib_crsqe *crsqe = NULL;
-	struct bnxt_qplib_crsbe **crsb_ptr;
+	struct bnxt_qplib_crsq *crsqe;
 	u32 sw_prod, cmdq_prod;
-	u8 retry_cnt = 0xFF;
-	dma_addr_t dma_addr;
 	unsigned long flags;
 	u32 size, opcode;
 	u16 cookie, cbit;
 	int pg, idx;
 	u8 *preq;
 
-retry:
 	opcode = req->opcode;
 	if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
 	    (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
@@ -112,63 +95,50 @@ retry:
 		dev_err(&rcfw->pdev->dev,
 			"QPLIB: RCFW not initialized, reject opcode 0x%x",
 			opcode);
-		return NULL;
+		return -EINVAL;
 	}
 
 	if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
 	    opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
 		dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
-		return NULL;
+		return -EINVAL;
 	}
 
 	/* Cmdq are in 16-byte units, each request can consume 1 or more
 	 * cmdqe
 	 */
 	spin_lock_irqsave(&cmdq->lock, flags);
-	if (req->cmd_size > cmdq->max_elements -
-	    ((HWQ_CMP(cmdq->prod, cmdq) - HWQ_CMP(cmdq->cons, cmdq)) &
-	     (cmdq->max_elements - 1))) {
+	if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
 		dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!");
 		spin_unlock_irqrestore(&cmdq->lock, flags);
-
-		if (!retry_cnt--)
-			return NULL;
-		goto retry;
+		return -EAGAIN;
 	}
 
-	retry_cnt = 0xFF;
 
-	cookie = atomic_inc_return(&rcfw->seq_num) & RCFW_MAX_COOKIE_VALUE;
+	cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE;
 	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
 	if (is_block)
 		cookie |= RCFW_CMD_IS_BLOCKING;
+
+	set_bit(cbit, rcfw->cmdq_bitmap);
 	req->cookie = cpu_to_le16(cookie);
-	if (test_and_set_bit(cbit, rcfw->cmdq_bitmap)) {
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: RCFW MAX outstanding cmd reached!");
-		atomic_dec(&rcfw->seq_num);
+	crsqe = &rcfw->crsqe_tbl[cbit];
+	if (crsqe->resp) {
 		spin_unlock_irqrestore(&cmdq->lock, flags);
-
-		if (!retry_cnt--)
-			return NULL;
-		goto retry;
+		return -EBUSY;
 	}
-	/* Reserve a resp buffer slot if requested */
-	if (req->resp_size && crsbe) {
-		spin_lock(&crsb->lock);
-		sw_prod = HWQ_CMP(crsb->prod, crsb);
-		crsb_ptr = (struct bnxt_qplib_crsbe **)crsb->pbl_ptr;
-		*crsbe = (void *)&crsb_ptr[get_crsb_pg(sw_prod)]
-					  [get_crsb_idx(sw_prod)];
-		bnxt_qplib_crsb_dma_next(crsb->pbl_dma_ptr, sw_prod, &dma_addr);
-		req->resp_addr = cpu_to_le64(dma_addr);
-		crsb->prod++;
-		spin_unlock(&crsb->lock);
-
-		req->resp_size = (sizeof(struct bnxt_qplib_crsbe) +
-				  BNXT_QPLIB_CMDQE_UNITS - 1) /
-				 BNXT_QPLIB_CMDQE_UNITS;
+	memset(resp, 0, sizeof(*resp));
+	crsqe->resp = (struct creq_qp_event *)resp;
+	crsqe->resp->cookie = req->cookie;
+	crsqe->req_size = req->cmd_size;
+	if (req->resp_size && sb) {
+		struct bnxt_qplib_rcfw_sbuf *sbuf = sb;
+
+		req->resp_addr = cpu_to_le64(sbuf->dma_addr);
+		req->resp_size = (sbuf->size + BNXT_QPLIB_CMDQE_UNITS - 1) /
+				  BNXT_QPLIB_CMDQE_UNITS;
 	}
+
 	cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr;
 	preq = (u8 *)req;
 	size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS;
@@ -190,23 +160,24 @@ retry:
 		preq += min_t(u32, size, sizeof(*cmdqe));
 		size -= min_t(u32, size, sizeof(*cmdqe));
 		cmdq->prod++;
+		rcfw->seq_num++;
 	} while (size > 0);
 
+	rcfw->seq_num++;
+
 	cmdq_prod = cmdq->prod;
 	if (rcfw->flags & FIRMWARE_FIRST_FLAG) {
-		/* The very first doorbell write is required to set this flag
-		 * which prompts the FW to reset its internal pointers
+		/* The very first doorbell write
+		 * is required to set this flag
+		 * which prompts the FW to reset
+		 * its internal pointers
 		 */
 		cmdq_prod |= FIRMWARE_FIRST_FLAG;
 		rcfw->flags &= ~FIRMWARE_FIRST_FLAG;
 	}
-	sw_prod = HWQ_CMP(crsq->prod, crsq);
-	crsqe = &crsq->crsq[sw_prod];
-	memset(crsqe, 0, sizeof(*crsqe));
-	crsq->prod++;
-	crsqe->req_size = req->cmd_size;
 
 	/* ring CMDQ DB */
+	wmb();
 	writel(cmdq_prod, rcfw->cmdq_bar_reg_iomem +
 	       rcfw->cmdq_bar_reg_prod_off);
 	writel(RCFW_CMDQ_TRIG_VAL, rcfw->cmdq_bar_reg_iomem +
@@ -214,9 +185,56 @@ retry:
 done:
 	spin_unlock_irqrestore(&cmdq->lock, flags);
 	/* Return the CREQ response pointer */
-	return crsqe ? &crsqe->qp_event : NULL;
+	return 0;
 }
 
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+				 struct cmdq_base *req,
+				 struct creq_base *resp,
+				 void *sb, u8 is_block)
+{
+	struct creq_qp_event *evnt = (struct creq_qp_event *)resp;
+	u16 cookie;
+	u8 opcode, retry_cnt = 0xFF;
+	int rc = 0;
+
+	do {
+		opcode = req->opcode;
+		rc = __send_message(rcfw, req, resp, sb, is_block);
+		cookie = le16_to_cpu(req->cookie) & RCFW_MAX_COOKIE_VALUE;
+		if (!rc)
+			break;
+
+		if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
+			/* send failed */
+			dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed",
+				cookie, opcode);
+			return rc;
+		}
+		is_block ? mdelay(1) : usleep_range(500, 1000);
+
+	} while (retry_cnt--);
+
+	if (is_block)
+		rc = __block_for_resp(rcfw, cookie);
+	else
+		rc = __wait_for_resp(rcfw, cookie);
+	if (rc) {
+		/* timed out */
+		dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec",
+			cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
+		return rc;
+	}
+
+	if (evnt->status) {
+		/* failed with status */
+		dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x",
+			cookie, opcode, evnt->status);
+		rc = -EFAULT;
+	}
+
+	return rc;
+}
 /* Completions */
 static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
 					 struct creq_func_event *func_event)
@@ -260,12 +278,12 @@ static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
 static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 				       struct creq_qp_event *qp_event)
 {
-	struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
 	struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
-	struct bnxt_qplib_crsqe *crsqe;
-	u16 cbit, cookie, blocked = 0;
+	struct bnxt_qplib_crsq *crsqe;
 	unsigned long flags;
-	u32 sw_cons;
+	u16 cbit, blocked = 0;
+	u16 cookie;
+	__le16  mcookie;
 
 	switch (qp_event->event) {
 	case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
@@ -275,24 +293,31 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 	default:
 		/* Command Response */
 		spin_lock_irqsave(&cmdq->lock, flags);
-		sw_cons = HWQ_CMP(crsq->cons, crsq);
-		crsqe = &crsq->crsq[sw_cons];
-		crsq->cons++;
-		memcpy(&crsqe->qp_event, qp_event, sizeof(crsqe->qp_event));
-
-		cookie = le16_to_cpu(crsqe->qp_event.cookie);
+		cookie = le16_to_cpu(qp_event->cookie);
+		mcookie = qp_event->cookie;
 		blocked = cookie & RCFW_CMD_IS_BLOCKING;
 		cookie &= RCFW_MAX_COOKIE_VALUE;
 		cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+		crsqe = &rcfw->crsqe_tbl[cbit];
+		if (crsqe->resp &&
+		    crsqe->resp->cookie  == mcookie) {
+			memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
+			crsqe->resp = NULL;
+		} else {
+			dev_err(&rcfw->pdev->dev,
+				"QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x",
+				crsqe->resp ? "mismatch" : "collision",
+				crsqe->resp ? crsqe->resp->cookie : 0, mcookie);
+		}
 		if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
 			dev_warn(&rcfw->pdev->dev,
 				 "QPLIB: CMD bit %d was not requested", cbit);
-
 		cmdq->cons += crsqe->req_size;
-		spin_unlock_irqrestore(&cmdq->lock, flags);
+		crsqe->req_size = 0;
+
 		if (!blocked)
 			wake_up(&rcfw->waitq);
-		break;
+		spin_unlock_irqrestore(&cmdq->lock, flags);
 	}
 	return 0;
 }
@@ -305,12 +330,12 @@ static void bnxt_qplib_service_creq(unsigned long data)
 	struct creq_base *creqe, **creq_ptr;
 	u32 sw_cons, raw_cons;
 	unsigned long flags;
-	u32 type;
+	u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
 
-	/* Service the CREQ until empty */
+	/* Service the CREQ until budget is over */
 	spin_lock_irqsave(&creq->lock, flags);
 	raw_cons = creq->cons;
-	while (1) {
+	while (budget > 0) {
 		sw_cons = HWQ_CMP(raw_cons, creq);
 		creq_ptr = (struct creq_base **)creq->pbl_ptr;
 		creqe = &creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)];
@@ -320,15 +345,9 @@ static void bnxt_qplib_service_creq(unsigned long data)
 		type = creqe->type & CREQ_BASE_TYPE_MASK;
 		switch (type) {
 		case CREQ_BASE_TYPE_QP_EVENT:
-			if (!bnxt_qplib_process_qp_event
-			    (rcfw, (struct creq_qp_event *)creqe))
-				rcfw->creq_qp_event_processed++;
-			else {
-				dev_warn(&rcfw->pdev->dev, "QPLIB: crsqe with");
-				dev_warn(&rcfw->pdev->dev,
-					 "QPLIB: type = 0x%x not handled",
-					 type);
-			}
+			bnxt_qplib_process_qp_event
+				(rcfw, (struct creq_qp_event *)creqe);
+			rcfw->creq_qp_event_processed++;
 			break;
 		case CREQ_BASE_TYPE_FUNC_EVENT:
 			if (!bnxt_qplib_process_func_event
@@ -346,7 +365,9 @@ static void bnxt_qplib_service_creq(unsigned long data)
 			break;
 		}
 		raw_cons++;
+		budget--;
 	}
+
 	if (creq->cons != raw_cons) {
 		creq->cons = raw_cons;
 		CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, raw_cons,
@@ -375,23 +396,16 @@ static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
 /* RCFW */
 int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw)
 {
-	struct creq_deinitialize_fw_resp *resp;
 	struct cmdq_deinitialize_fw req;
+	struct creq_deinitialize_fw_resp resp;
 	u16 cmd_flags = 0;
+	int rc;
 
 	RCFW_CMD_PREP(req, DEINITIALIZE_FW, cmd_flags);
-	resp = (struct creq_deinitialize_fw_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp)
-		return -EINVAL;
-
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie)))
-		return -ETIMEDOUT;
-
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie))
-		return -EFAULT;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 0);
+	if (rc)
+		return rc;
 
 	clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
 	return 0;
@@ -417,9 +431,10 @@ static int __get_pbl_pg_idx(struct bnxt_qplib_pbl *pbl)
 int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
 			 struct bnxt_qplib_ctx *ctx, int is_virtfn)
 {
-	struct creq_initialize_fw_resp *resp;
 	struct cmdq_initialize_fw req;
+	struct creq_initialize_fw_resp resp;
 	u16 cmd_flags = 0, level;
+	int rc;
 
 	RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags);
 
@@ -482,37 +497,19 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
 
 skip_ctx_setup:
 	req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
-	resp = (struct creq_initialize_fw_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: RCFW: INITIALIZE_FW send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: RCFW: INITIALIZE_FW timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: RCFW: INITIALIZE_FW failed");
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 0);
+	if (rc)
+		return rc;
 	set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
 	return 0;
 }
 
 void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
 {
-	bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->crsb);
-	kfree(rcfw->crsq.crsq);
+	kfree(rcfw->crsqe_tbl);
 	bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->cmdq);
 	bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->creq);
-
 	rcfw->pdev = NULL;
 }
 
@@ -539,21 +536,11 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
 		goto fail;
 	}
 
-	rcfw->crsq.max_elements = rcfw->cmdq.max_elements;
-	rcfw->crsq.crsq = kcalloc(rcfw->crsq.max_elements,
-				  sizeof(*rcfw->crsq.crsq), GFP_KERNEL);
-	if (!rcfw->crsq.crsq)
+	rcfw->crsqe_tbl = kcalloc(rcfw->cmdq.max_elements,
+				  sizeof(*rcfw->crsqe_tbl), GFP_KERNEL);
+	if (!rcfw->crsqe_tbl)
 		goto fail;
 
-	rcfw->crsb.max_elements = BNXT_QPLIB_CRSBE_MAX_CNT;
-	if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->crsb, NULL, 0,
-				      &rcfw->crsb.max_elements,
-				      BNXT_QPLIB_CRSBE_UNITS, 0, PAGE_SIZE,
-				      HWQ_TYPE_CTX)) {
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: HW channel CRSB allocation failed");
-		goto fail;
-	}
 	return 0;
 
 fail:
@@ -606,7 +593,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 	int rc;
 
 	/* General */
-	atomic_set(&rcfw->seq_num, 0);
+	rcfw->seq_num = 0;
 	rcfw->flags = FIRMWARE_FIRST_FLAG;
 	bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD *
 				  sizeof(unsigned long));
@@ -636,10 +623,6 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 
 	rcfw->cmdq_bar_reg_trig_off = RCFW_COMM_TRIG_OFFSET;
 
-	/* CRSQ */
-	rcfw->crsq.prod = 0;
-	rcfw->crsq.cons = 0;
-
 	/* CREQ */
 	rcfw->creq_bar_reg = RCFW_COMM_CONS_PCI_BAR_REGION;
 	res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
@@ -692,3 +675,34 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 	__iowrite32_copy(rcfw->cmdq_bar_reg_iomem, &init, sizeof(init) / 4);
 	return 0;
 }
+
+struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
+		struct bnxt_qplib_rcfw *rcfw,
+		u32 size)
+{
+	struct bnxt_qplib_rcfw_sbuf *sbuf;
+
+	sbuf = kzalloc(sizeof(*sbuf), GFP_ATOMIC);
+	if (!sbuf)
+		return NULL;
+
+	sbuf->size = size;
+	sbuf->sb = dma_zalloc_coherent(&rcfw->pdev->dev, sbuf->size,
+				       &sbuf->dma_addr, GFP_ATOMIC);
+	if (!sbuf->sb)
+		goto bail;
+
+	return sbuf;
+bail:
+	kfree(sbuf);
+	return NULL;
+}
+
+void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
+			       struct bnxt_qplib_rcfw_sbuf *sbuf)
+{
+	if (sbuf->sb)
+		dma_free_coherent(&rcfw->pdev->dev, sbuf->size,
+				  sbuf->sb, sbuf->dma_addr);
+	kfree(sbuf);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index d3567d75bf58..09ce121770cd 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -73,6 +73,7 @@
 #define RCFW_MAX_OUTSTANDING_CMD	BNXT_QPLIB_CMDQE_MAX_CNT
 #define RCFW_MAX_COOKIE_VALUE		0x7FFF
 #define RCFW_CMD_IS_BLOCKING		0x8000
+#define RCFW_BLOCKED_CMD_WAIT_COUNT	0x4E20
 
 /* Cmdq contains a fix number of a 16-Byte slots */
 struct bnxt_qplib_cmdqe {
@@ -94,32 +95,6 @@ struct bnxt_qplib_crsbe {
 	u8			data[1024];
 };
 
-/* CRSQ SB */
-#define BNXT_QPLIB_CRSBE_MAX_CNT	4
-#define BNXT_QPLIB_CRSBE_UNITS		sizeof(struct bnxt_qplib_crsbe)
-#define BNXT_QPLIB_CRSBE_CNT_PER_PG	(PAGE_SIZE / BNXT_QPLIB_CRSBE_UNITS)
-
-#define MAX_CRSB_IDX			(BNXT_QPLIB_CRSBE_MAX_CNT - 1)
-#define MAX_CRSB_IDX_PER_PG		(BNXT_QPLIB_CRSBE_CNT_PER_PG - 1)
-
-static inline u32 get_crsb_pg(u32 val)
-{
-	return (val & ~MAX_CRSB_IDX_PER_PG) / BNXT_QPLIB_CRSBE_CNT_PER_PG;
-}
-
-static inline u32 get_crsb_idx(u32 val)
-{
-	return val & MAX_CRSB_IDX_PER_PG;
-}
-
-static inline void bnxt_qplib_crsb_dma_next(dma_addr_t *pg_map_arr,
-					    u32 prod, dma_addr_t *dma_addr)
-{
-		*dma_addr = pg_map_arr[(prod) / BNXT_QPLIB_CRSBE_CNT_PER_PG];
-		*dma_addr += ((prod) % BNXT_QPLIB_CRSBE_CNT_PER_PG) *
-			      BNXT_QPLIB_CRSBE_UNITS;
-}
-
 /* CREQ */
 /* Allocate 1 per QP for async error notification for now */
 #define BNXT_QPLIB_CREQE_MAX_CNT	(64 * 1024)
@@ -158,17 +133,19 @@ static inline u32 get_creq_idx(u32 val)
 #define CREQ_DB(db, raw_cons, cp_bit)				\
 	writel(CREQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
 
+#define CREQ_ENTRY_POLL_BUDGET		0x100
+
 /* HWQ */
-struct bnxt_qplib_crsqe {
-	struct creq_qp_event	qp_event;
+
+struct bnxt_qplib_crsq {
+	struct creq_qp_event	*resp;
 	u32			req_size;
 };
 
-struct bnxt_qplib_crsq {
-	struct bnxt_qplib_crsqe	*crsq;
-	u32			prod;
-	u32			cons;
-	u32			max_elements;
+struct bnxt_qplib_rcfw_sbuf {
+	void *sb;
+	dma_addr_t dma_addr;
+	u32 size;
 };
 
 /* RCFW Communication Channels */
@@ -185,7 +162,7 @@ struct bnxt_qplib_rcfw {
 	wait_queue_head_t	waitq;
 	int			(*aeq_handler)(struct bnxt_qplib_rcfw *,
 					       struct creq_func_event *);
-	atomic_t		seq_num;
+	u32			seq_num;
 
 	/* Bar region info */
 	void __iomem		*cmdq_bar_reg_iomem;
@@ -203,8 +180,7 @@ struct bnxt_qplib_rcfw {
 
 	/* Actual Cmd and Resp Queues */
 	struct bnxt_qplib_hwq	cmdq;
-	struct bnxt_qplib_crsq	crsq;
-	struct bnxt_qplib_hwq	crsb;
+	struct bnxt_qplib_crsq	*crsqe_tbl;
 };
 
 void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
@@ -219,11 +195,14 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 					(struct bnxt_qplib_rcfw *,
 					 struct creq_func_event *));
 
-int bnxt_qplib_rcfw_block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie);
-int bnxt_qplib_rcfw_wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie);
-void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
-				   struct cmdq_base *req, void **crsbe,
-				   u8 is_block);
+struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
+				struct bnxt_qplib_rcfw *rcfw,
+				u32 size);
+void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
+			       struct bnxt_qplib_rcfw_sbuf *sbuf);
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+				 struct cmdq_base *req, struct creq_base *resp,
+				 void *sbuf, u8 is_block);
 
 int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw);
 int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 6277d802ca4b..2e4855509719 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -48,6 +48,10 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
 
 #define HWQ_CMP(idx, hwq)	((idx) & ((hwq)->max_elements - 1))
 
+#define HWQ_FREE_SLOTS(hwq)	(hwq->max_elements - \
+				((HWQ_CMP(hwq->prod, hwq)\
+				- HWQ_CMP(hwq->cons, hwq))\
+				& (hwq->max_elements - 1)))
 enum bnxt_qplib_hwq_type {
 	HWQ_TYPE_CTX,
 	HWQ_TYPE_QUEUE,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 7b31eccedf11..fde18cf0e406 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -55,37 +55,30 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 			    struct bnxt_qplib_dev_attr *attr)
 {
 	struct cmdq_query_func req;
-	struct creq_query_func_resp *resp;
+	struct creq_query_func_resp resp;
+	struct bnxt_qplib_rcfw_sbuf *sbuf;
 	struct creq_query_func_resp_sb *sb;
 	u16 cmd_flags = 0;
 	u32 temp;
 	u8 *tqm_alloc;
-	int i;
+	int i, rc = 0;
 
 	RCFW_CMD_PREP(req, QUERY_FUNC, cmd_flags);
 
-	req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
-	resp = (struct creq_query_func_resp *)
-		bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void **)&sb,
-					     0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC failed ");
+	sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+	if (!sbuf) {
 		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
+			"QPLIB: SP: QUERY_FUNC alloc side buffer failed");
+		return -ENOMEM;
 	}
+
+	sb = sbuf->sb;
+	req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  (void *)sbuf, 0);
+	if (rc)
+		goto bail;
+
 	/* Extract the context from the side buffer */
 	attr->max_qp = le32_to_cpu(sb->max_qp);
 	attr->max_qp_rd_atom =
@@ -95,6 +88,11 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 		sb->max_qp_init_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
 		BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom;
 	attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr);
+	/*
+	 * 128 WQEs needs to be reserved for the HW (8916). Prevent
+	 * reporting the max number
+	 */
+	attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS;
 	attr->max_qp_sges = sb->max_sge;
 	attr->max_cq = le32_to_cpu(sb->max_cq);
 	attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
@@ -130,7 +128,10 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 		attr->tqm_alloc_reqs[i * 4 + 2] = *(++tqm_alloc);
 		attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
 	}
-	return 0;
+
+bail:
+	bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+	return rc;
 }
 
 /* SGID */
@@ -178,8 +179,9 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 	/* Remove GID from the SGID table */
 	if (update) {
 		struct cmdq_delete_gid req;
-		struct creq_delete_gid_resp *resp;
+		struct creq_delete_gid_resp resp;
 		u16 cmd_flags = 0;
+		int rc;
 
 		RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
 		if (sgid_tbl->hw_id[index] == 0xFFFF) {
@@ -188,31 +190,10 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 			return -EINVAL;
 		}
 		req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
-		resp = (struct creq_delete_gid_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, NULL,
-						     0);
-		if (!resp) {
-			dev_err(&res->pdev->dev,
-				"QPLIB: SP: DELETE_GID send failed");
-			return -EINVAL;
-		}
-		if (!bnxt_qplib_rcfw_wait_for_resp(rcfw,
-						   le16_to_cpu(req.cookie))) {
-			/* Cmd timed out */
-			dev_err(&res->pdev->dev,
-				"QPLIB: SP: DELETE_GID timed out");
-			return -ETIMEDOUT;
-		}
-		if (resp->status ||
-		    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-			dev_err(&res->pdev->dev,
-				"QPLIB: SP: DELETE_GID failed ");
-			dev_err(&res->pdev->dev,
-				"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-				resp->status, le16_to_cpu(req.cookie),
-				le16_to_cpu(resp->cookie));
-			return -EINVAL;
-		}
+		rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						  (void *)&resp, NULL, 0);
+		if (rc)
+			return rc;
 	}
 	memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
 	       sizeof(bnxt_qplib_gid_zero));
@@ -234,7 +215,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 						   struct bnxt_qplib_res,
 						   sgid_tbl);
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
-	int i, free_idx, rc = 0;
+	int i, free_idx;
 
 	if (!sgid_tbl) {
 		dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
@@ -266,10 +247,11 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 	}
 	if (update) {
 		struct cmdq_add_gid req;
-		struct creq_add_gid_resp *resp;
+		struct creq_add_gid_resp resp;
 		u16 cmd_flags = 0;
 		u32 temp32[4];
 		u16 temp16[3];
+		int rc;
 
 		RCFW_CMD_PREP(req, ADD_GID, cmd_flags);
 
@@ -290,31 +272,11 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 		req.src_mac[1] = cpu_to_be16(temp16[1]);
 		req.src_mac[2] = cpu_to_be16(temp16[2]);
 
-		resp = (struct creq_add_gid_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-		if (!resp) {
-			dev_err(&res->pdev->dev,
-				"QPLIB: SP: ADD_GID send failed");
-			return -EINVAL;
-		}
-		if (!bnxt_qplib_rcfw_wait_for_resp(rcfw,
-						   le16_to_cpu(req.cookie))) {
-			/* Cmd timed out */
-			dev_err(&res->pdev->dev,
-				"QPIB: SP: ADD_GID timed out");
-			return -ETIMEDOUT;
-		}
-		if (resp->status ||
-		    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-			dev_err(&res->pdev->dev, "QPLIB: SP: ADD_GID failed ");
-			dev_err(&res->pdev->dev,
-				"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-				resp->status, le16_to_cpu(req.cookie),
-				le16_to_cpu(resp->cookie));
-			return -EINVAL;
-		}
-		sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp->xid);
+		rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						  (void *)&resp, NULL, 0);
+		if (rc)
+			return rc;
+		sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp.xid);
 	}
 	/* Add GID to the sgid_tbl */
 	memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid));
@@ -325,7 +287,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 
 	*index = free_idx;
 	/* unlock */
-	return rc;
+	return 0;
 }
 
 /* pkeys */
@@ -422,10 +384,11 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_create_ah req;
-	struct creq_create_ah_resp *resp;
+	struct creq_create_ah_resp resp;
 	u16 cmd_flags = 0;
 	u32 temp32[4];
 	u16 temp16[3];
+	int rc;
 
 	RCFW_CMD_PREP(req, CREATE_AH, cmd_flags);
 
@@ -450,28 +413,12 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
 	req.dest_mac[1] = cpu_to_le16(temp16[1]);
 	req.dest_mac[2] = cpu_to_le16(temp16[2]);
 
-	resp = (struct creq_create_ah_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 1);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
-	ah->id = le32_to_cpu(resp->xid);
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 1);
+	if (rc)
+		return rc;
+
+	ah->id = le32_to_cpu(resp.xid);
 	return 0;
 }
 
@@ -479,35 +426,19 @@ int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_destroy_ah req;
-	struct creq_destroy_ah_resp *resp;
+	struct creq_destroy_ah_resp resp;
 	u16 cmd_flags = 0;
+	int rc;
 
 	/* Clean up the AH table in the device */
 	RCFW_CMD_PREP(req, DESTROY_AH, cmd_flags);
 
 	req.ah_cid = cpu_to_le32(ah->id);
 
-	resp = (struct creq_destroy_ah_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 1);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 1);
+	if (rc)
+		return rc;
 	return 0;
 }
 
@@ -516,8 +447,9 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_deallocate_key req;
-	struct creq_deallocate_key_resp *resp;
+	struct creq_deallocate_key_resp resp;
 	u16 cmd_flags = 0;
+	int rc;
 
 	if (mrw->lkey == 0xFFFFFFFF) {
 		dev_info(&res->pdev->dev,
@@ -536,27 +468,11 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 	else
 		req.key = cpu_to_le32(mrw->lkey);
 
-	resp = (struct creq_deallocate_key_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR failed ");
-		dev_err(&res->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 0);
+	if (rc)
+		return rc;
+
 	/* Free the qplib's MRW memory */
 	if (mrw->hwq.max_elements)
 		bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
@@ -568,9 +484,10 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_allocate_mrw req;
-	struct creq_allocate_mrw_resp *resp;
+	struct creq_allocate_mrw_resp resp;
 	u16 cmd_flags = 0;
 	unsigned long tmp;
+	int rc;
 
 	RCFW_CMD_PREP(req, ALLOCATE_MRW, cmd_flags);
 
@@ -584,33 +501,17 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 	tmp = (unsigned long)mrw;
 	req.mrw_handle = cpu_to_le64(tmp);
 
-	resp = (struct creq_allocate_mrw_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
+		return rc;
+
 	if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1)  ||
 	    (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
 	    (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
-		mrw->rkey = le32_to_cpu(resp->xid);
+		mrw->rkey = le32_to_cpu(resp.xid);
 	else
-		mrw->lkey = le32_to_cpu(resp->xid);
+		mrw->lkey = le32_to_cpu(resp.xid);
 	return 0;
 }
 
@@ -619,40 +520,17 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_deregister_mr req;
-	struct creq_deregister_mr_resp *resp;
+	struct creq_deregister_mr_resp resp;
 	u16 cmd_flags = 0;
 	int rc;
 
 	RCFW_CMD_PREP(req, DEREGISTER_MR, cmd_flags);
 
 	req.lkey = cpu_to_le32(mrw->lkey);
-	resp = (struct creq_deregister_mr_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, block);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR send failed");
-		return -EINVAL;
-	}
-	if (block)
-		rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
-						    le16_to_cpu(req.cookie));
-	else
-		rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
-						   le16_to_cpu(req.cookie));
-	if (!rc) {
-		/* Cmd timed out */
-		dev_err(&res->pdev->dev, "QPLIB: SP: DEREG_MR timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, block);
+	if (rc)
+		return rc;
 
 	/* Free the qplib's MR memory */
 	if (mrw->hwq.max_elements) {
@@ -669,7 +547,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_register_mr req;
-	struct creq_register_mr_resp *resp;
+	struct creq_register_mr_resp resp;
 	u16 cmd_flags = 0, level;
 	int pg_ptrs, pages, i, rc;
 	dma_addr_t **pbl_ptr;
@@ -730,36 +608,11 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
 	req.key = cpu_to_le32(mr->lkey);
 	req.mr_size = cpu_to_le64(mr->total_size);
 
-	resp = (struct creq_register_mr_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, block);
-	if (!resp) {
-		dev_err(&res->pdev->dev, "SP: REG_MR send failed");
-		rc = -EINVAL;
-		goto fail;
-	}
-	if (block)
-		rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
-						    le16_to_cpu(req.cookie));
-	else
-		rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
-						   le16_to_cpu(req.cookie));
-	if (!rc) {
-		/* Cmd timed out */
-		dev_err(&res->pdev->dev, "SP: REG_MR timed out");
-		rc = -ETIMEDOUT;
-		goto fail;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: REG_MR failed ");
-		dev_err(&res->pdev->dev,
-			"QPLIB: SP: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		rc = -EINVAL;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, block);
+	if (rc)
 		goto fail;
-	}
+
 	return 0;
 
 fail:
@@ -804,35 +657,15 @@ int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_map_tc_to_cos req;
-	struct creq_map_tc_to_cos_resp *resp;
+	struct creq_map_tc_to_cos_resp resp;
 	u16 cmd_flags = 0;
-	int tleft;
+	int rc = 0;
 
 	RCFW_CMD_PREP(req, MAP_TC_TO_COS, cmd_flags);
 	req.cos0 = cpu_to_le16(cids[0]);
 	req.cos1 = cpu_to_le16(cids[1]);
 
-	resp = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, NULL, 0);
-	if (!resp) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS send failed");
-		return -EINVAL;
-	}
-
-	tleft = bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie));
-	if (!tleft) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS timed out");
-		return -ETIMEDOUT;
-	}
-
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS failed ");
-		dev_err(&res->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
-
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
 	return 0;
 }
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 1442a617e968..a543f959098b 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -40,6 +40,8 @@
 #ifndef __BNXT_QPLIB_SP_H__
 #define __BNXT_QPLIB_SP_H__
 
+#define BNXT_QPLIB_RESERVED_QP_WRS	128
+
 struct bnxt_qplib_dev_attr {
 	char				fw_ver[32];
 	u16				max_sgid;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index b6fe45924c6e..0910faf3587b 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -488,6 +488,7 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
 
 	ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
 	release_ep_resources(ep);
+	kfree_skb(skb);
 	return 0;
 }
 
@@ -498,6 +499,7 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
 	ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
 	c4iw_put_ep(&ep->parent_ep->com);
 	release_ep_resources(ep);
+	kfree_skb(skb);
 	return 0;
 }
 
@@ -569,11 +571,13 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb)
 
 	pr_debug("%s rdev %p\n", __func__, rdev);
 	req->cmd = CPL_ABORT_NO_RST;
+	skb_get(skb);
 	ret = c4iw_ofld_send(rdev, skb);
 	if (ret) {
 		__state_set(&ep->com, DEAD);
 		queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
-	}
+	} else
+		kfree_skb(skb);
 }
 
 static int send_flowc(struct c4iw_ep *ep)
@@ -2517,7 +2521,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 		goto reject;
 	}
 
-	hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) +
+	hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) +
+	       sizeof(struct tcphdr) +
 	       ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
 	if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
 		child_ep->mtu = peer_mss + hdrs;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 329fb65e8fb0..ae0b79aeea2e 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -767,7 +767,7 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
 		kfree(entry);
 	}
 
-	list_for_each_safe(pos, nxt, &uctx->qpids) {
+	list_for_each_safe(pos, nxt, &uctx->cqids) {
 		entry = list_entry(pos, struct c4iw_qid_list, entry);
 		list_del_init(&entry->entry);
 		kfree(entry);
@@ -880,13 +880,15 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 	rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free");
 	if (!rdev->free_workq) {
 		err = -ENOMEM;
-		goto err_free_status_page;
+		goto err_free_status_page_and_wr_log;
 	}
 
 	rdev->status_page->db_off = 0;
 
 	return 0;
-err_free_status_page:
+err_free_status_page_and_wr_log:
+	if (c4iw_wr_log && rdev->wr_log)
+		kfree(rdev->wr_log);
 	free_page((unsigned long)rdev->status_page);
 destroy_ocqp_pool:
 	c4iw_ocqp_pool_destroy(rdev);
@@ -903,9 +905,11 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 {
 	destroy_workqueue(rdev->free_workq);
 	kfree(rdev->wr_log);
+	c4iw_release_dev_ucontext(rdev, &rdev->uctx);
 	free_page((unsigned long)rdev->status_page);
 	c4iw_pblpool_destroy(rdev);
 	c4iw_rqtpool_destroy(rdev);
+	c4iw_ocqp_pool_destroy(rdev);
 	c4iw_destroy_resource(&rdev->resource);
 }
 
@@ -971,7 +975,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 		 devp->rdev.lldi.sge_egrstatuspagesize);
 
 	devp->rdev.hw_queue.t4_eq_status_entries =
-		devp->rdev.lldi.sge_ingpadboundary > 64 ? 2 : 1;
+		devp->rdev.lldi.sge_egrstatuspagesize / 64;
 	devp->rdev.hw_queue.t4_max_eq_size = 65520;
 	devp->rdev.hw_queue.t4_max_iq_size = 65520;
 	devp->rdev.hw_queue.t4_max_rq_size = 8192 -
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 5d6b1eeaa9a0..2ba00b89df6a 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -6312,25 +6312,38 @@ static void handle_8051_request(struct hfi1_pportdata *ppd)
 	}
 }
 
-static void write_global_credit(struct hfi1_devdata *dd,
-				u8 vau, u16 total, u16 shared)
+/*
+ * Set up allocation unit vaulue.
+ */
+void set_up_vau(struct hfi1_devdata *dd, u8 vau)
 {
-	write_csr(dd, SEND_CM_GLOBAL_CREDIT,
-		  ((u64)total <<
-		   SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) |
-		  ((u64)shared <<
-		   SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) |
-		  ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
+	u64 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
+
+	/* do not modify other values in the register */
+	reg &= ~SEND_CM_GLOBAL_CREDIT_AU_SMASK;
+	reg |= (u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT;
+	write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
 }
 
 /*
  * Set up initial VL15 credits of the remote.  Assumes the rest of
- * the CM credit registers are zero from a previous global or credit reset .
+ * the CM credit registers are zero from a previous global or credit reset.
+ * Shared limit for VL15 will always be 0.
  */
-void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
+void set_up_vl15(struct hfi1_devdata *dd, u16 vl15buf)
 {
-	/* leave shared count at zero for both global and VL15 */
-	write_global_credit(dd, vau, vl15buf, 0);
+	u64 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
+
+	/* set initial values for total and shared credit limit */
+	reg &= ~(SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK |
+		 SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK);
+
+	/*
+	 * Set total limit to be equal to VL15 credits.
+	 * Leave shared limit at 0.
+	 */
+	reg |= (u64)vl15buf << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT;
+	write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
 
 	write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
 		  << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
@@ -6348,9 +6361,11 @@ void reset_link_credits(struct hfi1_devdata *dd)
 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
 		write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0);
 	write_csr(dd, SEND_CM_CREDIT_VL15, 0);
-	write_global_credit(dd, 0, 0, 0);
+	write_csr(dd, SEND_CM_GLOBAL_CREDIT, 0);
 	/* reset the CM block */
 	pio_send_control(dd, PSC_CM_RESET);
+	/* reset cached value */
+	dd->vl15buf_cached = 0;
 }
 
 /* convert a vCU to a CU */
@@ -6839,24 +6854,35 @@ void handle_link_up(struct work_struct *work)
 {
 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
 						  link_up_work);
+	struct hfi1_devdata *dd = ppd->dd;
+
 	set_link_state(ppd, HLS_UP_INIT);
 
 	/* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
-	read_ltp_rtt(ppd->dd);
+	read_ltp_rtt(dd);
 	/*
 	 * OPA specifies that certain counters are cleared on a transition
 	 * to link up, so do that.
 	 */
-	clear_linkup_counters(ppd->dd);
+	clear_linkup_counters(dd);
 	/*
 	 * And (re)set link up default values.
 	 */
 	set_linkup_defaults(ppd);
 
+	/*
+	 * Set VL15 credits. Use cached value from verify cap interrupt.
+	 * In case of quick linkup or simulator, vl15 value will be set by
+	 * handle_linkup_change. VerifyCap interrupt handler will not be
+	 * called in those scenarios.
+	 */
+	if (!(quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR))
+		set_up_vl15(dd, dd->vl15buf_cached);
+
 	/* enforce link speed enabled */
 	if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
 		/* oops - current speed is not enabled, bounce */
-		dd_dev_err(ppd->dd,
+		dd_dev_err(dd,
 			   "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
 			   ppd->link_speed_active, ppd->link_speed_enabled);
 		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
@@ -7357,7 +7383,14 @@ void handle_verify_cap(struct work_struct *work)
 	 */
 	if (vau == 0)
 		vau = 1;
-	set_up_vl15(dd, vau, vl15buf);
+	set_up_vau(dd, vau);
+
+	/*
+	 * Set VL15 credits to 0 in global credit register. Cache remote VL15
+	 * credits value and wait for link-up interrupt ot set it.
+	 */
+	set_up_vl15(dd, 0);
+	dd->vl15buf_cached = vl15buf;
 
 	/* set up the LCB CRC mode */
 	crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc;
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 5bfa839d1c48..793514f1d15f 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -839,7 +839,9 @@
 #define SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK 0x8ull
 #define SEND_CM_CTRL_RESETCSR 0x0000000000000020ull
 #define SEND_CM_GLOBAL_CREDIT (TXE + 0x000000000508)
+#define SEND_CM_GLOBAL_CREDIT_AU_MASK 0x7ull
 #define SEND_CM_GLOBAL_CREDIT_AU_SHIFT 16
+#define SEND_CM_GLOBAL_CREDIT_AU_SMASK 0x70000ull
 #define SEND_CM_GLOBAL_CREDIT_RESETCSR 0x0000094000030000ull
 #define SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK 0xFFFFull
 #define SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT 0
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index da322e6668cc..414a04a481c2 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1045,6 +1045,14 @@ struct hfi1_devdata {
 	/* initial vl15 credits to use */
 	u16 vl15_init;
 
+	/*
+	 * Cached value for vl15buf, read during verify cap interrupt. VL15
+	 * credits are to be kept at 0 and set when handling the link-up
+	 * interrupt. This removes the possibility of receiving VL15 MAD
+	 * packets before this HFI is ready.
+	 */
+	u16 vl15buf_cached;
+
 	/* Misc small ints */
 	u8 n_krcv_queues;
 	u8 qos_shift;
@@ -1598,7 +1606,8 @@ int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encode);
 int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t);
 int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t);
 
-void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf);
+void set_up_vau(struct hfi1_devdata *dd, u8 vau);
+void set_up_vl15(struct hfi1_devdata *dd, u16 vl15buf);
 void reset_link_credits(struct hfi1_devdata *dd);
 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
 
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index ba265d0ae93b..04a5082d5ac5 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -130,7 +130,8 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
 		 * the remote values.  Both sides must be using the values.
 		 */
 		if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
-			set_up_vl15(dd, dd->vau, dd->vl15_init);
+			set_up_vau(dd, dd->vau);
+			set_up_vl15(dd, dd->vl15_init);
 			assign_remote_cm_au_table(dd, dd->vcu);
 		}
 
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 93faf86d54b6..6a9f6f9819e1 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -207,8 +207,8 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
 	/*
 	 * Save BARs and command to rewrite after device reset.
 	 */
-	dd->pcibar0 = addr;
-	dd->pcibar1 = addr >> 32;
+	pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0);
+	pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1);
 	pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom);
 	pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command);
 	pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 069bdaf061ab..1080778a1f7c 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -2159,8 +2159,11 @@ send_last:
 		ret = hfi1_rvt_get_rwqe(qp, 1);
 		if (ret < 0)
 			goto nack_op_err;
-		if (!ret)
+		if (!ret) {
+			/* peer will send again */
+			rvt_put_ss(&qp->r_sge);
 			goto rnr_nak;
+		}
 		wc.ex.imm_data = ohdr->u.rc.imm_data;
 		wc.wc_flags = IB_WC_WITH_IMM;
 		goto send_last;
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 50d140d25e38..2f3bbcac1e34 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -196,7 +196,8 @@ static const struct sysfs_ops port_cc_sysfs_ops = {
 };
 
 static struct attribute *port_cc_default_attributes[] = {
-	&cc_prescan_attr.attr
+	&cc_prescan_attr.attr,
+	NULL
 };
 
 static struct kobj_type port_cc_ktype = {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index f3bc01bce483..6ae98aa7f74e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -784,7 +784,6 @@ static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node,
 	}
 
 	ctrl_ird |= IETF_PEER_TO_PEER;
-	ctrl_ird |= IETF_FLPDU_ZERO_LEN;
 
 	switch (mpa_key) {
 	case MPA_KEY_REQUEST:
@@ -2446,8 +2445,8 @@ static void i40iw_handle_rcv_mpa(struct i40iw_cm_node *cm_node,
 		} else {
 			type = I40IW_CM_EVENT_CONNECTED;
 			cm_node->state = I40IW_CM_STATE_OFFLOADED;
-			i40iw_send_ack(cm_node);
 		}
+		i40iw_send_ack(cm_node);
 		break;
 	default:
 		pr_err("%s wrong cm_node state =%d\n", __func__, cm_node->state);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index f82483b3d1e7..a027e2072477 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -285,28 +285,20 @@ void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2pa
 	struct i40iw_sc_dev *dev = vsi->dev;
 	struct i40iw_sc_qp *qp = NULL;
 	bool qs_handle_change = false;
-	bool mss_change = false;
 	unsigned long flags;
 	u16 qs_handle;
 	int i;
 
-	if (vsi->mss != l2params->mss) {
-		mss_change = true;
-		vsi->mss = l2params->mss;
-	}
+	vsi->mss = l2params->mss;
 
 	i40iw_fill_qos_list(l2params->qs_handle_list);
 	for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
 		qs_handle = l2params->qs_handle_list[i];
 		if (vsi->qos[i].qs_handle != qs_handle)
 			qs_handle_change = true;
-		else if (!mss_change)
-			continue;       /* no MSS nor qs handle change */
 		spin_lock_irqsave(&vsi->qos[i].lock, flags);
 		qp = i40iw_get_qp(&vsi->qos[i].qplist, qp);
 		while (qp) {
-			if (mss_change)
-				i40iw_qp_mss_modify(dev, qp);
 			if (qs_handle_change) {
 				qp->qs_handle = qs_handle;
 				/* issue cqp suspend command */
@@ -2395,7 +2387,6 @@ static enum i40iw_status_code i40iw_sc_qp_modify(
 
 	set_64bit_val(wqe,
 		      8,
-		      LS_64(info->new_mss, I40IW_CQPSQ_QP_NEWMSS) |
 		      LS_64(term_len, I40IW_CQPSQ_QP_TERMLEN));
 
 	set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
@@ -2410,7 +2401,6 @@ static enum i40iw_status_code i40iw_sc_qp_modify(
 		 LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
 		 LS_64(info->force_loopback, I40IW_CQPSQ_QP_FORCELOOPBACK) |
 		 LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
-		 LS_64(info->mss_change, I40IW_CQPSQ_QP_MSSCHANGE) |
 		 LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) |
 		 LS_64(info->remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) |
 		 LS_64(term_actions, I40IW_CQPSQ_QP_TERMACT) |
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 2728af3103ce..e0f47cc2effc 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -1319,13 +1319,13 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
 	status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_QUERY_FPM_BUF_SIZE,
 				       I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK);
 	if (status)
-		goto exit;
+		goto error;
 	info.fpm_query_buf_pa = mem.pa;
 	info.fpm_query_buf = mem.va;
 	status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_COMMIT_FPM_BUF_SIZE,
 				       I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK);
 	if (status)
-		goto exit;
+		goto error;
 	info.fpm_commit_buf_pa = mem.pa;
 	info.fpm_commit_buf = mem.va;
 	info.hmc_fn_id = ldev->fid;
@@ -1347,11 +1347,9 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
 	info.exception_lan_queue = 1;
 	info.vchnl_send = i40iw_virtchnl_send;
 	status = i40iw_device_init(&iwdev->sc_dev, &info);
-exit:
-	if (status) {
-		kfree(iwdev->hmc_info_mem);
-		iwdev->hmc_info_mem = NULL;
-	}
+
+	if (status)
+		goto error;
 	memset(&vsi_info, 0, sizeof(vsi_info));
 	vsi_info.dev = &iwdev->sc_dev;
 	vsi_info.back_vsi = (void *)iwdev;
@@ -1362,11 +1360,19 @@ exit:
 		memset(&stats_info, 0, sizeof(stats_info));
 		stats_info.fcn_id = ldev->fid;
 		stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL);
+		if (!stats_info.pestat) {
+			status = I40IW_ERR_NO_MEMORY;
+			goto error;
+		}
 		stats_info.stats_initialize = true;
 		if (stats_info.pestat)
 			i40iw_vsi_stats_init(&iwdev->vsi, &stats_info);
 	}
 	return status;
+error:
+	kfree(iwdev->hmc_info_mem);
+	iwdev->hmc_info_mem = NULL;
+	return status;
 }
 
 /**
@@ -1933,7 +1939,7 @@ static int i40iw_virtchnl_receive(struct i40e_info *ldev,
 bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev)
 {
 	struct i40iw_device *iwdev;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	iwdev = dev->back_dev;
 
diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
index aa66c1c63dfa..f27be3e7830b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
@@ -199,7 +199,6 @@ void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
 			    struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx);
 void *i40iw_remove_head(struct list_head *list);
 void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend);
-void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
 
 void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len);
 void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index 7b76259752b0..959ec81fba99 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -541,7 +541,6 @@ struct i40iw_create_qp_info {
 struct i40iw_modify_qp_info {
 	u64 rx_win0;
 	u64 rx_win1;
-	u16 new_mss;
 	u8 next_iwarp_state;
 	u8 termlen;
 	bool ord_valid;
@@ -554,7 +553,6 @@ struct i40iw_modify_qp_info {
 	bool dont_send_term;
 	bool dont_send_fin;
 	bool cached_var_valid;
-	bool mss_change;
 	bool force_loopback;
 };
 
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 409a3781e735..56d986924a4c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -757,23 +757,6 @@ void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, b
 }
 
 /**
- * i40iw_qp_mss_modify - modify mss for qp
- * @dev: hardware control device structure
- * @qp: hardware control qp
- */
-void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
-{
-	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-	struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
-	struct i40iw_modify_qp_info info;
-
-	memset(&info, 0, sizeof(info));
-	info.mss_change = true;
-	info.new_mss = qp->vsi->mss;
-	i40iw_hw_modify_qp(iwdev, iwqp, &info, false);
-}
-
-/**
  * i40iw_term_modify_qp - modify qp for term message
  * @qp: hardware control qp
  * @next_state: qp's next state
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
index f4d13683a403..48fd327f876b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
@@ -443,10 +443,7 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
 	if (!dev->vchnl_up)
 		return I40IW_ERR_NOT_READY;
 	if (vchnl_msg->iw_op_code == I40IW_VCHNL_OP_GET_VER) {
-		if (vchnl_msg->iw_op_ver != I40IW_VCHNL_OP_GET_VER_V0)
-			vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
-		else
-			vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
+		vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
 		return I40IW_SUCCESS;
 	}
 	for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index b4694717f6f3..21d31cb1325f 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1578,6 +1578,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
 	if (port < 0)
 		return;
 	ah.av.ib.port_pd = cpu_to_be32(port << 24 | (be32_to_cpu(ah.av.ib.port_pd) & 0xffffff));
+	ah.ibah.type = rdma_ah_find_type(&dev->ib_dev, port);
 
 	mlx4_ib_query_ah(&ah.ibah, &ah_attr);
 	if (rdma_ah_get_ah_flags(&ah_attr) & IB_AH_GRH)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d45772da0963..9ecc089d4529 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2979,6 +2979,18 @@ error_0:
 	return ret;
 }
 
+static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
+{
+	switch (umr_fence_cap) {
+	case MLX5_CAP_UMR_FENCE_NONE:
+		return MLX5_FENCE_MODE_NONE;
+	case MLX5_CAP_UMR_FENCE_SMALL:
+		return MLX5_FENCE_MODE_INITIATOR_SMALL;
+	default:
+		return MLX5_FENCE_MODE_STRONG_ORDERING;
+	}
+}
+
 static int create_dev_resources(struct mlx5_ib_resources *devr)
 {
 	struct ib_srq_init_attr attr;
@@ -3680,8 +3692,10 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
 	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
 	dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
-	dev->ib_dev.alloc_rdma_netdev	= mlx5_ib_alloc_rdma_netdev;
-	dev->ib_dev.free_rdma_netdev	= mlx5_ib_free_rdma_netdev;
+	if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) {
+		dev->ib_dev.alloc_rdma_netdev	= mlx5_ib_alloc_rdma_netdev;
+		dev->ib_dev.free_rdma_netdev	= mlx5_ib_free_rdma_netdev;
+	}
 	if (mlx5_core_is_pf(mdev)) {
 		dev->ib_dev.get_vf_config	= mlx5_ib_get_vf_config;
 		dev->ib_dev.set_vf_link_state	= mlx5_ib_set_vf_link_state;
@@ -3693,6 +3707,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 
 	mlx5_ib_internal_fill_odp_caps(dev);
 
+	dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
+
 	if (MLX5_CAP_GEN(mdev, imaicl)) {
 		dev->ib_dev.alloc_mw		= mlx5_ib_alloc_mw;
 		dev->ib_dev.dealloc_mw		= mlx5_ib_dealloc_mw;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 38c877bc45e5..bdcf25410c99 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -349,7 +349,7 @@ struct mlx5_ib_qp {
 	struct mlx5_ib_wq	rq;
 
 	u8			sq_signal_bits;
-	u8			fm_cache;
+	u8			next_fence;
 	struct mlx5_ib_wq	sq;
 
 	/* serialize qp state modifications
@@ -654,6 +654,7 @@ struct mlx5_ib_dev {
 	struct mlx5_ib_port	*port;
 	struct mlx5_sq_bfreg     bfreg;
 	struct mlx5_sq_bfreg     fp_bfreg;
+	u8				umr_fence;
 };
 
 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 93959e1e43a3..ebb6768684de 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3738,24 +3738,6 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
 	}
 }
 
-static u8 get_fence(u8 fence, struct ib_send_wr *wr)
-{
-	if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
-		     wr->send_flags & IB_SEND_FENCE))
-		return MLX5_FENCE_MODE_STRONG_ORDERING;
-
-	if (unlikely(fence)) {
-		if (wr->send_flags & IB_SEND_FENCE)
-			return MLX5_FENCE_MODE_SMALL_AND_FENCE;
-		else
-			return fence;
-	} else if (unlikely(wr->send_flags & IB_SEND_FENCE)) {
-		return MLX5_FENCE_MODE_FENCE;
-	}
-
-	return 0;
-}
-
 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
 		     struct mlx5_wqe_ctrl_seg **ctrl,
 		     struct ib_send_wr *wr, unsigned *idx,
@@ -3784,8 +3766,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
 static void finish_wqe(struct mlx5_ib_qp *qp,
 		       struct mlx5_wqe_ctrl_seg *ctrl,
 		       u8 size, unsigned idx, u64 wr_id,
-		       int nreq, u8 fence, u8 next_fence,
-		       u32 mlx5_opcode)
+		       int nreq, u8 fence, u32 mlx5_opcode)
 {
 	u8 opmod = 0;
 
@@ -3793,7 +3774,6 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
 					     mlx5_opcode | ((u32)opmod << 24));
 	ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
 	ctrl->fm_ce_se |= fence;
-	qp->fm_cache = next_fence;
 	if (unlikely(qp->wq_sig))
 		ctrl->signature = wq_sig(ctrl);
 
@@ -3853,7 +3833,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			goto out;
 		}
 
-		fence = qp->fm_cache;
 		num_sge = wr->num_sge;
 		if (unlikely(num_sge > qp->sq.max_gs)) {
 			mlx5_ib_warn(dev, "\n");
@@ -3870,6 +3849,19 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			goto out;
 		}
 
+		if (wr->opcode == IB_WR_LOCAL_INV ||
+		    wr->opcode == IB_WR_REG_MR) {
+			fence = dev->umr_fence;
+			next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+		} else if (wr->send_flags & IB_SEND_FENCE) {
+			if (qp->next_fence)
+				fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
+			else
+				fence = MLX5_FENCE_MODE_FENCE;
+		} else {
+			fence = qp->next_fence;
+		}
+
 		switch (ibqp->qp_type) {
 		case IB_QPT_XRC_INI:
 			xrc = seg;
@@ -3896,7 +3888,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				goto out;
 
 			case IB_WR_LOCAL_INV:
-				next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
 				qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
 				ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
 				set_linv_wr(qp, &seg, &size);
@@ -3904,7 +3895,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				break;
 
 			case IB_WR_REG_MR:
-				next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
 				qp->sq.wr_data[idx] = IB_WR_REG_MR;
 				ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
 				err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
@@ -3927,9 +3917,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 					goto out;
 				}
 
-				finish_wqe(qp, ctrl, size, idx, wr->wr_id,
-					   nreq, get_fence(fence, wr),
-					   next_fence, MLX5_OPCODE_UMR);
+				finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+					   fence, MLX5_OPCODE_UMR);
 				/*
 				 * SET_PSV WQEs are not signaled and solicited
 				 * on error
@@ -3954,9 +3943,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 					goto out;
 				}
 
-				finish_wqe(qp, ctrl, size, idx, wr->wr_id,
-					   nreq, get_fence(fence, wr),
-					   next_fence, MLX5_OPCODE_SET_PSV);
+				finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+					   fence, MLX5_OPCODE_SET_PSV);
 				err = begin_wqe(qp, &seg, &ctrl, wr,
 						&idx, &size, nreq);
 				if (err) {
@@ -3966,7 +3954,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 					goto out;
 				}
 
-				next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
 				err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire,
 						 mr->sig->psv_wire.psv_idx, &seg,
 						 &size);
@@ -3976,9 +3963,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 					goto out;
 				}
 
-				finish_wqe(qp, ctrl, size, idx, wr->wr_id,
-					   nreq, get_fence(fence, wr),
-					   next_fence, MLX5_OPCODE_SET_PSV);
+				finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+					   fence, MLX5_OPCODE_SET_PSV);
+				qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
 				num_sge = 0;
 				goto skip_psv;
 
@@ -4089,8 +4076,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			}
 		}
 
-		finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
-			   get_fence(fence, wr), next_fence,
+		qp->next_fence = next_fence;
+		finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence,
 			   mlx5_ib_opcode[wr->opcode]);
 skip_psv:
 		if (0)
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index fb983df7c157..30b256a2c54e 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -610,7 +610,6 @@ static void build_mpa_v2(struct nes_cm_node *cm_node,
 		ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD;
 	}
 	ctrl_ird |= IETF_PEER_TO_PEER;
-	ctrl_ird |= IETF_FLPDU_ZERO_LEN;
 
 	switch (mpa_key) {
 	case MPA_KEY_REQUEST:
@@ -1826,7 +1825,7 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb)
 			type = NES_CM_EVENT_CONNECTED;
 			cm_node->state = NES_CM_STATE_TSA;
 		}
-
+		send_ack(cm_node, NULL);
 		break;
 	default:
 		WARN_ON(1);
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index aa08c76a4245..d961f79b317c 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -58,7 +58,10 @@
 #define QEDR_MSG_QP   "  QP"
 #define QEDR_MSG_GSI  " GSI"
 
-#define QEDR_CQ_MAGIC_NUMBER   (0x11223344)
+#define QEDR_CQ_MAGIC_NUMBER	(0x11223344)
+
+#define FW_PAGE_SIZE		(RDMA_RING_PAGE_SIZE)
+#define FW_PAGE_SHIFT		(12)
 
 struct qedr_dev;
 
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c
index 3d7705cec770..d86dbe814d98 100644
--- a/drivers/infiniband/hw/qedr/qedr_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_cm.c
@@ -270,11 +270,13 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
 		return rc;
 	}
 
-	vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
-	if (vlan_id < VLAN_CFI_MASK)
-		has_vlan = true;
-	if (sgid_attr.ndev)
+	if (sgid_attr.ndev) {
+		vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
+		if (vlan_id < VLAN_CFI_MASK)
+			has_vlan = true;
+
 		dev_put(sgid_attr.ndev);
+	}
 
 	if (!memcmp(&sgid, &zgid, sizeof(sgid))) {
 		DP_ERR(dev, "gsi post send: GID not found GID index %d\n",
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 17685cfea6a2..d6723c365c7f 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -653,14 +653,15 @@ static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
 
 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
 			       struct qedr_pbl *pbl,
-			       struct qedr_pbl_info *pbl_info)
+			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
 {
 	int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+	u32 fw_pg_cnt, fw_pg_per_umem_pg;
 	struct qedr_pbl *pbl_tbl;
 	struct scatterlist *sg;
 	struct regpair *pbe;
+	u64 pg_addr;
 	int entry;
-	u32 addr;
 
 	if (!pbl_info->num_pbes)
 		return;
@@ -683,29 +684,35 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
 
 	shift = umem->page_shift;
 
+	fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
+
 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
 		pages = sg_dma_len(sg) >> shift;
+		pg_addr = sg_dma_address(sg);
 		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
-			/* store the page address in pbe */
-			pbe->lo = cpu_to_le32(sg_dma_address(sg) +
-					      (pg_cnt << shift));
-			addr = upper_32_bits(sg_dma_address(sg) +
-					     (pg_cnt << shift));
-			pbe->hi = cpu_to_le32(addr);
-			pbe_cnt++;
-			total_num_pbes++;
-			pbe++;
-
-			if (total_num_pbes == pbl_info->num_pbes)
-				return;
-
-			/* If the given pbl is full storing the pbes,
-			 * move to next pbl.
-			 */
-			if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
-				pbl_tbl++;
-				pbe = (struct regpair *)pbl_tbl->va;
-				pbe_cnt = 0;
+			for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
+				pbe->lo = cpu_to_le32(pg_addr);
+				pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
+
+				pg_addr += BIT(pg_shift);
+				pbe_cnt++;
+				total_num_pbes++;
+				pbe++;
+
+				if (total_num_pbes == pbl_info->num_pbes)
+					return;
+
+				/* If the given pbl is full storing the pbes,
+				 * move to next pbl.
+				 */
+				if (pbe_cnt ==
+				    (pbl_info->pbl_size / sizeof(u64))) {
+					pbl_tbl++;
+					pbe = (struct regpair *)pbl_tbl->va;
+					pbe_cnt = 0;
+				}
+
+				fw_pg_cnt++;
 			}
 		}
 	}
@@ -754,7 +761,7 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
 				       u64 buf_addr, size_t buf_len,
 				       int access, int dmasync)
 {
-	int page_cnt;
+	u32 fw_pages;
 	int rc;
 
 	q->buf_addr = buf_addr;
@@ -766,8 +773,10 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
 		return PTR_ERR(q->umem);
 	}
 
-	page_cnt = ib_umem_page_count(q->umem);
-	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0);
+	fw_pages = ib_umem_page_count(q->umem) <<
+	    (q->umem->page_shift - FW_PAGE_SHIFT);
+
+	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
 	if (rc)
 		goto err0;
 
@@ -777,7 +786,8 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
 		goto err0;
 	}
 
-	qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
+		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
+				   FW_PAGE_SHIFT);
 
 	return 0;
 
@@ -2226,7 +2236,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
 		goto err1;
 
 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
-			   &mr->info.pbl_info);
+			   &mr->info.pbl_info, mr->umem->page_shift);
 
 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
 	if (rc) {
@@ -3209,6 +3219,10 @@ static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
 		case IB_WC_REG_MR:
 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
 			break;
+		case IB_WC_RDMA_READ:
+		case IB_WC_SEND:
+			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
+			break;
 		default:
 			break;
 		}
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index fc8b88514da5..4ddbcac5eabe 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -1956,8 +1956,10 @@ send_last:
 		ret = qib_get_rwqe(qp, 1);
 		if (ret < 0)
 			goto nack_op_err;
-		if (!ret)
+		if (!ret) {
+			rvt_put_ss(&qp->r_sge);
 			goto rnr_nak;
+		}
 		wc.ex.imm_data = ohdr->u.rc.imm_data;
 		hdrsize += 4;
 		wc.wc_flags = IB_WC_WITH_IMM;
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index ecdba2fce083..1ac5b8551a4d 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -68,6 +68,7 @@
 static inline u32 rxe_crc32(struct rxe_dev *rxe,
 			    u32 crc, void *next, size_t len)
 {
+	u32 retval;
 	int err;
 
 	SHASH_DESC_ON_STACK(shash, rxe->tfm);
@@ -81,7 +82,9 @@ static inline u32 rxe_crc32(struct rxe_dev *rxe,
 		return crc32_le(crc, next, len);
 	}
 
-	return *(u32 *)shash_desc_ctx(shash);
+	retval = *(u32 *)shash_desc_ctx(shash);
+	barrier_data(shash_desc_ctx(shash));
+	return retval;
 }
 
 int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 83d709e74dfb..073e66783f1d 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -740,13 +740,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
 
 		sge = ibwr->sg_list;
 		for (i = 0; i < num_sge; i++, sge++) {
-			if (qp->is_user && copy_from_user(p, (__user void *)
-					    (uintptr_t)sge->addr, sge->length))
-				return -EFAULT;
-
-			else if (!qp->is_user)
-				memcpy(p, (void *)(uintptr_t)sge->addr,
-				       sge->length);
+			memcpy(p, (void *)(uintptr_t)sge->addr,
+					sge->length);
 
 			p += sge->length;
 		}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 874b24366e4d..7871379342f4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -178,7 +178,7 @@ static inline int ib_speed_enum_to_int(int speed)
 static int ipoib_get_link_ksettings(struct net_device *netdev,
 				    struct ethtool_link_ksettings *cmd)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(netdev);
+	struct ipoib_dev_priv *priv = ipoib_priv(netdev);
 	struct ib_port_attr attr;
 	int ret, speed, width;
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 0060b2f9f659..efe7402f4885 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -863,7 +863,6 @@ dev_stop:
 	set_bit(IPOIB_STOP_REAPER, &priv->flags);
 	cancel_delayed_work(&priv->ah_reap_task);
 	set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
-	napi_enable(&priv->napi);
 	ipoib_ib_dev_stop(dev);
 	return -1;
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 2869d1adb1de..1015a63de6ae 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1590,12 +1590,14 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
 	wait_for_completion(&priv->ntbl.deleted);
 }
 
-void ipoib_dev_uninit_default(struct net_device *dev)
+static void ipoib_dev_uninit_default(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	ipoib_transport_dev_cleanup(dev);
 
+	netif_napi_del(&priv->napi);
+
 	ipoib_cm_dev_cleanup(dev);
 
 	kfree(priv->rx_ring);
@@ -1649,6 +1651,7 @@ out_rx_ring_cleanup:
 	kfree(priv->rx_ring);
 
 out:
+	netif_napi_del(&priv->napi);
 	return -ENOMEM;
 }
 
@@ -2237,6 +2240,7 @@ event_failed:
 
 device_init_failed:
 	free_netdev(priv->dev);
+	kfree(priv);
 
 alloc_mem_failed:
 	return ERR_PTR(result);
@@ -2277,7 +2281,7 @@ static void ipoib_add_one(struct ib_device *device)
 
 static void ipoib_remove_one(struct ib_device *device, void *client_data)
 {
-	struct ipoib_dev_priv *priv, *tmp;
+	struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
 	struct list_head *dev_list = client_data;
 
 	if (!dev_list)
@@ -2300,7 +2304,14 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
 		flush_workqueue(priv->wq);
 
 		unregister_netdev(priv->dev);
-		free_netdev(priv->dev);
+		if (device->free_rdma_netdev)
+			device->free_rdma_netdev(priv->dev);
+		else
+			free_netdev(priv->dev);
+
+		list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list)
+			kfree(cpriv);
+
 		kfree(priv);
 	}
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 36dc4fcaa3cd..081b33deff1b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -133,13 +133,13 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 	snprintf(intf_name, sizeof intf_name, "%s.%04x",
 		 ppriv->dev->name, pkey);
 
+	if (!rtnl_trylock())
+		return restart_syscall();
+
 	priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
 	if (!priv)
 		return -ENOMEM;
 
-	if (!rtnl_trylock())
-		return restart_syscall();
-
 	down_write(&ppriv->vlan_rwsem);
 
 	/*
@@ -167,8 +167,10 @@ out:
 
 	rtnl_unlock();
 
-	if (result)
+	if (result) {
 		free_netdev(priv->dev);
+		kfree(priv);
+	}
 
 	return result;
 }
@@ -209,6 +211,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 
 	if (dev) {
 		free_netdev(dev);
+		kfree(priv);
 		return 0;
 	}
 
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index def723a5df29..2354c742caa1 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -320,7 +320,7 @@ static int srp_new_cm_id(struct srp_rdma_ch *ch)
 	ch->path.sgid = target->sgid;
 	ch->path.dgid = target->orig_dgid;
 	ch->path.pkey = target->pkey;
-	sa_path_set_service_id(&ch->path, target->service_id);
+	ch->path.service_id = target->service_id;
 
 	return 0;
 }
@@ -575,7 +575,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
 	return 0;
 
 err_qp:
-	srp_destroy_qp(ch, qp);
+	ib_destroy_qp(qp);
 
 err_send_cq:
 	ib_free_cq(send_cq);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 7d6c199de2d6..1ced0731c140 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -2302,12 +2302,8 @@ static void srpt_queue_response(struct se_cmd *cmd)
 	}
 	spin_unlock_irqrestore(&ioctx->spinlock, flags);
 
-	if (unlikely(transport_check_aborted_status(&ioctx->cmd, false)
-		     || WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) {
-		atomic_inc(&ch->req_lim_delta);
-		srpt_abort_cmd(ioctx);
+	if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT)))
 		return;
-	}
 
 	/* For read commands, transfer the data to the initiator. */
 	if (ioctx->cmd.data_direction == DMA_FROM_DEVICE &&
@@ -2689,7 +2685,8 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
 	struct srpt_rdma_ch *ch = ioctx->ch;
 	unsigned long flags;
 
-	WARN_ON(ioctx->state != SRPT_STATE_DONE);
+	WARN_ON_ONCE(ioctx->state != SRPT_STATE_DONE &&
+		     !(ioctx->cmd.transport_state & CMD_T_ABORTED));
 
 	if (ioctx->n_rw_ctx) {
 		srpt_free_rw_ctxs(ch, ioctx);
diff --git a/drivers/input/joystick/Kconfig b/drivers/input/joystick/Kconfig
index 4215b5382092..f3c2f6ea8b44 100644
--- a/drivers/input/joystick/Kconfig
+++ b/drivers/input/joystick/Kconfig
@@ -330,4 +330,25 @@ config JOYSTICK_MAPLE
 	  To compile this as a module choose M here: the module will be called
 	  maplecontrol.
 
+config JOYSTICK_PSXPAD_SPI
+	tristate "PlayStation 1/2 joypads via SPI interface"
+	depends on SPI
+	select INPUT_POLLDEV
+	help
+	  Say Y here if you wish to connect PlayStation 1/2 joypads
+	  via SPI interface.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called psxpad-spi.
+
+config JOYSTICK_PSXPAD_SPI_FF
+	bool "PlayStation 1/2 joypads force feedback (rumble) support"
+	depends on JOYSTICK_PSXPAD_SPI
+	select INPUT_FF_MEMLESS
+	help
+	  Say Y here if you want to take advantage of PlayStation 1/2
+	  joypads rumble features.
+
+	  To drive rumble motor a dedicated power supply is required.
+
 endif
diff --git a/drivers/input/joystick/Makefile b/drivers/input/joystick/Makefile
index 92dc0de9dfed..496fd56b3f1b 100644
--- a/drivers/input/joystick/Makefile
+++ b/drivers/input/joystick/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_JOYSTICK_INTERACT)		+= interact.o
 obj-$(CONFIG_JOYSTICK_JOYDUMP)		+= joydump.o
 obj-$(CONFIG_JOYSTICK_MAGELLAN)		+= magellan.o
 obj-$(CONFIG_JOYSTICK_MAPLE)		+= maplecontrol.o
+obj-$(CONFIG_JOYSTICK_PSXPAD_SPI)	+= psxpad-spi.o
 obj-$(CONFIG_JOYSTICK_SIDEWINDER)	+= sidewinder.o
 obj-$(CONFIG_JOYSTICK_SPACEBALL)	+= spaceball.o
 obj-$(CONFIG_JOYSTICK_SPACEORB)		+= spaceorb.o
diff --git a/drivers/input/joystick/psxpad-spi.c b/drivers/input/joystick/psxpad-spi.c
new file mode 100644
index 000000000000..28b473f6cbb6
--- /dev/null
+++ b/drivers/input/joystick/psxpad-spi.c
@@ -0,0 +1,401 @@
+/*
+ * PlayStation 1/2 joypads via SPI interface Driver
+ *
+ * Copyright (C) 2017 Tomohiro Yoshidomi <sylph23k@gmail.com>
+ * Licensed under the GPL-2 or later.
+ *
+ * PlayStation 1/2 joypad's plug (not socket)
+ *  123 456 789
+ * (...|...|...)
+ *
+ * 1: DAT -> MISO (pullup with 1k owm to 3.3V)
+ * 2: CMD -> MOSI
+ * 3: 9V (for motor, if not use N.C.)
+ * 4: GND
+ * 5: 3.3V
+ * 6: Attention -> CS(SS)
+ * 7: SCK -> SCK
+ * 8: N.C.
+ * 9: ACK -> N.C.
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/input.h>
+#include <linux/input-polldev.h>
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+#include <linux/types.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+
+#define REVERSE_BIT(x) ((((x) & 0x80) >> 7) | (((x) & 0x40) >> 5) | \
+	(((x) & 0x20) >> 3) | (((x) & 0x10) >> 1) | (((x) & 0x08) << 1) | \
+	(((x) & 0x04) << 3) | (((x) & 0x02) << 5) | (((x) & 0x01) << 7))
+
+/* PlayStation 1/2 joypad command and response are LSBFIRST. */
+
+/*
+ *	0x01, 0x42, 0x00, 0x00, 0x00,
+ *	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ *	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ */
+static const u8 PSX_CMD_POLL[] = {
+	0x80, 0x42, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+/*	0x01, 0x43, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 */
+static const u8 PSX_CMD_ENTER_CFG[] = {
+	0x80, 0xC2, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+/*	0x01, 0x43, 0x00, 0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A */
+static const u8 PSX_CMD_EXIT_CFG[] = {
+	0x80, 0xC2, 0x00, 0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A
+};
+/*	0x01, 0x4D, 0x00, 0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF */
+static const u8 PSX_CMD_ENABLE_MOTOR[]	= {
+	0x80, 0xB2, 0x00, 0x00, 0x80, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+struct psxpad {
+	struct spi_device *spi;
+	struct input_polled_dev *pdev;
+	char phys[0x20];
+	bool motor1enable;
+	bool motor2enable;
+	u8 motor1level;
+	u8 motor2level;
+	u8 sendbuf[0x20] ____cacheline_aligned;
+	u8 response[sizeof(PSX_CMD_POLL)] ____cacheline_aligned;
+};
+
+static int psxpad_command(struct psxpad *pad, const u8 sendcmdlen)
+{
+	struct spi_transfer xfers = {
+		.tx_buf		= pad->sendbuf,
+		.rx_buf		= pad->response,
+		.len		= sendcmdlen,
+	};
+	int err;
+
+	err = spi_sync_transfer(pad->spi, &xfers, 1);
+	if (err) {
+		dev_err(&pad->spi->dev,
+			"%s: failed to SPI xfers mode: %d\n",
+			__func__, err);
+		return err;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_JOYSTICK_PSXPAD_SPI_FF
+static void psxpad_control_motor(struct psxpad *pad,
+				 bool motor1enable, bool motor2enable)
+{
+	int err;
+
+	pad->motor1enable = motor1enable;
+	pad->motor2enable = motor2enable;
+
+	memcpy(pad->sendbuf, PSX_CMD_ENTER_CFG, sizeof(PSX_CMD_ENTER_CFG));
+	err = psxpad_command(pad, sizeof(PSX_CMD_ENTER_CFG));
+	if (err) {
+		dev_err(&pad->spi->dev,
+			"%s: failed to enter config mode: %d\n",
+			__func__, err);
+		return;
+	}
+
+	memcpy(pad->sendbuf, PSX_CMD_ENABLE_MOTOR,
+	       sizeof(PSX_CMD_ENABLE_MOTOR));
+	pad->sendbuf[3] = pad->motor1enable ? 0x00 : 0xFF;
+	pad->sendbuf[4] = pad->motor2enable ? 0x80 : 0xFF;
+	err = psxpad_command(pad, sizeof(PSX_CMD_ENABLE_MOTOR));
+	if (err) {
+		dev_err(&pad->spi->dev,
+			"%s: failed to enable motor mode: %d\n",
+			__func__, err);
+		return;
+	}
+
+	memcpy(pad->sendbuf, PSX_CMD_EXIT_CFG, sizeof(PSX_CMD_EXIT_CFG));
+	err = psxpad_command(pad, sizeof(PSX_CMD_EXIT_CFG));
+	if (err) {
+		dev_err(&pad->spi->dev,
+			"%s: failed to exit config mode: %d\n",
+			__func__, err);
+		return;
+	}
+}
+
+static void psxpad_set_motor_level(struct psxpad *pad,
+				   u8 motor1level, u8 motor2level)
+{
+	pad->motor1level = motor1level ? 0xFF : 0x00;
+	pad->motor2level = REVERSE_BIT(motor2level);
+}
+
+static int psxpad_spi_play_effect(struct input_dev *idev,
+				  void *data, struct ff_effect *effect)
+{
+	struct input_polled_dev *pdev = input_get_drvdata(idev);
+	struct psxpad *pad = pdev->private;
+
+	switch (effect->type) {
+	case FF_RUMBLE:
+		psxpad_set_motor_level(pad,
+			(effect->u.rumble.weak_magnitude >> 8) & 0xFFU,
+			(effect->u.rumble.strong_magnitude >> 8) & 0xFFU);
+		break;
+	}
+
+	return 0;
+}
+
+static int psxpad_spi_init_ff(struct psxpad *pad)
+{
+	int err;
+
+	input_set_capability(pad->pdev->input, EV_FF, FF_RUMBLE);
+
+	err = input_ff_create_memless(pad->pdev->input, NULL,
+				      psxpad_spi_play_effect);
+	if (err) {
+		dev_err(&pad->spi->dev,
+			"input_ff_create_memless() failed: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+#else	/* CONFIG_JOYSTICK_PSXPAD_SPI_FF */
+
+static void psxpad_control_motor(struct psxpad *pad,
+				 bool motor1enable, bool motor2enable)
+{
+}
+
+static void psxpad_set_motor_level(struct psxpad *pad,
+				   u8 motor1level, u8 motor2level)
+{
+}
+
+static inline int psxpad_spi_init_ff(struct psxpad *pad)
+{
+	return 0;
+}
+#endif	/* CONFIG_JOYSTICK_PSXPAD_SPI_FF */
+
+static void psxpad_spi_poll_open(struct input_polled_dev *pdev)
+{
+	struct psxpad *pad = pdev->private;
+
+	pm_runtime_get_sync(&pad->spi->dev);
+}
+
+static void psxpad_spi_poll_close(struct input_polled_dev *pdev)
+{
+	struct psxpad *pad = pdev->private;
+
+	pm_runtime_put_sync(&pad->spi->dev);
+}
+
+static void psxpad_spi_poll(struct input_polled_dev *pdev)
+{
+	struct psxpad *pad = pdev->private;
+	struct input_dev *input = pdev->input;
+	u8 b_rsp3, b_rsp4;
+	int err;
+
+	psxpad_control_motor(pad, true, true);
+
+	memcpy(pad->sendbuf, PSX_CMD_POLL, sizeof(PSX_CMD_POLL));
+	pad->sendbuf[3] = pad->motor1enable ? pad->motor1level : 0x00;
+	pad->sendbuf[4] = pad->motor2enable ? pad->motor2level : 0x00;
+	err = psxpad_command(pad, sizeof(PSX_CMD_POLL));
+	if (err) {
+		dev_err(&pad->spi->dev,
+			"%s: poll command failed mode: %d\n", __func__, err);
+		return;
+	}
+
+	switch (pad->response[1]) {
+	case 0xCE:	/* 0x73 : analog 1 */
+		/* button data is inverted */
+		b_rsp3 = ~pad->response[3];
+		b_rsp4 = ~pad->response[4];
+
+		input_report_abs(input, ABS_X, REVERSE_BIT(pad->response[7]));
+		input_report_abs(input, ABS_Y, REVERSE_BIT(pad->response[8]));
+		input_report_abs(input, ABS_RX, REVERSE_BIT(pad->response[5]));
+		input_report_abs(input, ABS_RY, REVERSE_BIT(pad->response[6]));
+		input_report_key(input, BTN_DPAD_UP, b_rsp3 & BIT(3));
+		input_report_key(input, BTN_DPAD_DOWN, b_rsp3 & BIT(1));
+		input_report_key(input, BTN_DPAD_LEFT, b_rsp3 & BIT(0));
+		input_report_key(input, BTN_DPAD_RIGHT, b_rsp3 & BIT(2));
+		input_report_key(input, BTN_X, b_rsp4 & BIT(3));
+		input_report_key(input, BTN_A, b_rsp4 & BIT(2));
+		input_report_key(input, BTN_B, b_rsp4 & BIT(1));
+		input_report_key(input, BTN_Y, b_rsp4 & BIT(0));
+		input_report_key(input, BTN_TL, b_rsp4 & BIT(5));
+		input_report_key(input, BTN_TR, b_rsp4 & BIT(4));
+		input_report_key(input, BTN_TL2, b_rsp4 & BIT(7));
+		input_report_key(input, BTN_TR2, b_rsp4 & BIT(6));
+		input_report_key(input, BTN_THUMBL, b_rsp3 & BIT(6));
+		input_report_key(input, BTN_THUMBR, b_rsp3 & BIT(5));
+		input_report_key(input, BTN_SELECT, b_rsp3 & BIT(7));
+		input_report_key(input, BTN_START, b_rsp3 & BIT(4));
+		break;
+
+	case 0x82:	/* 0x41 : digital */
+		/* button data is inverted */
+		b_rsp3 = ~pad->response[3];
+		b_rsp4 = ~pad->response[4];
+
+		input_report_abs(input, ABS_X, 0x80);
+		input_report_abs(input, ABS_Y, 0x80);
+		input_report_abs(input, ABS_RX, 0x80);
+		input_report_abs(input, ABS_RY, 0x80);
+		input_report_key(input, BTN_DPAD_UP, b_rsp3 & BIT(3));
+		input_report_key(input, BTN_DPAD_DOWN, b_rsp3 & BIT(1));
+		input_report_key(input, BTN_DPAD_LEFT, b_rsp3 & BIT(0));
+		input_report_key(input, BTN_DPAD_RIGHT, b_rsp3 & BIT(2));
+		input_report_key(input, BTN_X, b_rsp4 & BIT(3));
+		input_report_key(input, BTN_A, b_rsp4 & BIT(2));
+		input_report_key(input, BTN_B, b_rsp4 & BIT(1));
+		input_report_key(input, BTN_Y, b_rsp4 & BIT(0));
+		input_report_key(input, BTN_TL, b_rsp4 & BIT(5));
+		input_report_key(input, BTN_TR, b_rsp4 & BIT(4));
+		input_report_key(input, BTN_TL2, b_rsp4 & BIT(7));
+		input_report_key(input, BTN_TR2, b_rsp4 & BIT(6));
+		input_report_key(input, BTN_THUMBL, false);
+		input_report_key(input, BTN_THUMBR, false);
+		input_report_key(input, BTN_SELECT, b_rsp3 & BIT(7));
+		input_report_key(input, BTN_START, b_rsp3 & BIT(4));
+		break;
+	}
+
+	input_sync(input);
+}
+
+static int psxpad_spi_probe(struct spi_device *spi)
+{
+	struct psxpad *pad;
+	struct input_polled_dev *pdev;
+	struct input_dev *idev;
+	int err;
+
+	pad = devm_kzalloc(&spi->dev, sizeof(struct psxpad), GFP_KERNEL);
+	if (!pad)
+		return -ENOMEM;
+
+	pdev = input_allocate_polled_device();
+	if (!pdev) {
+		dev_err(&spi->dev, "failed to allocate input device\n");
+		return -ENOMEM;
+	}
+
+	/* input poll device settings */
+	pad->pdev = pdev;
+	pad->spi = spi;
+
+	pdev->private = pad;
+	pdev->open = psxpad_spi_poll_open;
+	pdev->close = psxpad_spi_poll_close;
+	pdev->poll = psxpad_spi_poll;
+	/* poll interval is about 60fps */
+	pdev->poll_interval = 16;
+	pdev->poll_interval_min = 8;
+	pdev->poll_interval_max = 32;
+
+	/* input device settings */
+	idev = pdev->input;
+	idev->name = "PlayStation 1/2 joypad";
+	snprintf(pad->phys, sizeof(pad->phys), "%s/input", dev_name(&spi->dev));
+	idev->id.bustype = BUS_SPI;
+
+	/* key/value map settings */
+	input_set_abs_params(idev, ABS_X, 0, 255, 0, 0);
+	input_set_abs_params(idev, ABS_Y, 0, 255, 0, 0);
+	input_set_abs_params(idev, ABS_RX, 0, 255, 0, 0);
+	input_set_abs_params(idev, ABS_RY, 0, 255, 0, 0);
+	input_set_capability(idev, EV_KEY, BTN_DPAD_UP);
+	input_set_capability(idev, EV_KEY, BTN_DPAD_DOWN);
+	input_set_capability(idev, EV_KEY, BTN_DPAD_LEFT);
+	input_set_capability(idev, EV_KEY, BTN_DPAD_RIGHT);
+	input_set_capability(idev, EV_KEY, BTN_A);
+	input_set_capability(idev, EV_KEY, BTN_B);
+	input_set_capability(idev, EV_KEY, BTN_X);
+	input_set_capability(idev, EV_KEY, BTN_Y);
+	input_set_capability(idev, EV_KEY, BTN_TL);
+	input_set_capability(idev, EV_KEY, BTN_TR);
+	input_set_capability(idev, EV_KEY, BTN_TL2);
+	input_set_capability(idev, EV_KEY, BTN_TR2);
+	input_set_capability(idev, EV_KEY, BTN_THUMBL);
+	input_set_capability(idev, EV_KEY, BTN_THUMBR);
+	input_set_capability(idev, EV_KEY, BTN_SELECT);
+	input_set_capability(idev, EV_KEY, BTN_START);
+
+	err = psxpad_spi_init_ff(pad);
+	if (err)
+		return err;
+
+	/* SPI settings */
+	spi->mode = SPI_MODE_3;
+	spi->bits_per_word = 8;
+	/* (PlayStation 1/2 joypad might be possible works 250kHz/500kHz) */
+	spi->master->min_speed_hz = 125000;
+	spi->master->max_speed_hz = 125000;
+	spi_setup(spi);
+
+	/* pad settings */
+	psxpad_set_motor_level(pad, 0, 0);
+
+	/* register input poll device */
+	err = input_register_polled_device(pdev);
+	if (err) {
+		dev_err(&spi->dev,
+			"failed to register input poll device: %d\n", err);
+		return err;
+	}
+
+	pm_runtime_enable(&spi->dev);
+
+	return 0;
+}
+
+static int __maybe_unused psxpad_spi_suspend(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct psxpad *pad = spi_get_drvdata(spi);
+
+	psxpad_set_motor_level(pad, 0, 0);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(psxpad_spi_pm, psxpad_spi_suspend, NULL);
+
+static const struct spi_device_id psxpad_spi_id[] = {
+	{ "psxpad-spi", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, psxpad_spi_id);
+
+static struct spi_driver psxpad_spi_driver = {
+	.driver = {
+		.name = "psxpad-spi",
+		.pm = &psxpad_spi_pm,
+	},
+	.id_table = psxpad_spi_id,
+	.probe   = psxpad_spi_probe,
+};
+
+module_spi_driver(psxpad_spi_driver);
+
+MODULE_AUTHOR("Tomohiro Yoshidomi <sylph23k@gmail.com>");
+MODULE_DESCRIPTION("PlayStation 1/2 joypads via SPI interface Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index df83fdc6c0e7..def96cd2479b 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -126,25 +126,26 @@ static const struct xpad_device {
 	u8 mapping;
 	u8 xtype;
 } xpad_device[] = {
+	{ 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
+	{ 0x044f, 0xb326, "Thrustmaster Gamepad GP XID", 0, XTYPE_XBOX360 },
 	{ 0x045e, 0x0202, "Microsoft X-Box pad v1 (US)", 0, XTYPE_XBOX },
 	{ 0x045e, 0x0285, "Microsoft X-Box pad (Japan)", 0, XTYPE_XBOX },
 	{ 0x045e, 0x0287, "Microsoft Xbox Controller S", 0, XTYPE_XBOX },
 	{ 0x045e, 0x0289, "Microsoft X-Box pad v2 (US)", 0, XTYPE_XBOX },
 	{ 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 },
+	{ 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
 	{ 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE },
 	{ 0x045e, 0x02dd, "Microsoft X-Box One pad (Firmware 2015)", 0, XTYPE_XBOXONE },
 	{ 0x045e, 0x02e3, "Microsoft X-Box One Elite pad", 0, XTYPE_XBOXONE },
 	{ 0x045e, 0x02ea, "Microsoft X-Box One S pad", 0, XTYPE_XBOXONE },
-	{ 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
 	{ 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
-	{ 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
-	{ 0x044f, 0xb326, "Thrustmaster Gamepad GP XID", 0, XTYPE_XBOX360 },
 	{ 0x046d, 0xc21d, "Logitech Gamepad F310", 0, XTYPE_XBOX360 },
 	{ 0x046d, 0xc21e, "Logitech Gamepad F510", 0, XTYPE_XBOX360 },
 	{ 0x046d, 0xc21f, "Logitech Gamepad F710", 0, XTYPE_XBOX360 },
 	{ 0x046d, 0xc242, "Logitech Chillstream Controller", 0, XTYPE_XBOX360 },
 	{ 0x046d, 0xca84, "Logitech Xbox Cordless Controller", 0, XTYPE_XBOX },
 	{ 0x046d, 0xca88, "Logitech Compact Controller for Xbox", 0, XTYPE_XBOX },
+	{ 0x056e, 0x2004, "Elecom JC-U3613M", 0, XTYPE_XBOX360 },
 	{ 0x05fd, 0x1007, "Mad Catz Controller (unverified)", 0, XTYPE_XBOX },
 	{ 0x05fd, 0x107a, "InterAct 'PowerPad Pro' X-Box pad (Germany)", 0, XTYPE_XBOX },
 	{ 0x0738, 0x4516, "Mad Catz Control Pad", 0, XTYPE_XBOX },
@@ -179,13 +180,15 @@ static const struct xpad_device {
 	{ 0x0e6f, 0x0006, "Edge wireless Controller", 0, XTYPE_XBOX },
 	{ 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x0113, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
+	{ 0x0e6f, 0x011f, "Rock Candy Gamepad Wired Controller", 0, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x0139, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE },
+	{ 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE },
 	{ 0x0e6f, 0x0201, "Pelican PL-3601 'TSZ' Wired Xbox 360 Controller", 0, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x0213, "Afterglow Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x021f, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
-	{ 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE },
 	{ 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 },
+	{ 0x0e6f, 0x0413, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
 	{ 0x0e8f, 0x0201, "SmartJoy Frag Xpad/PS2 adaptor", 0, XTYPE_XBOX },
 	{ 0x0e8f, 0x3008, "Generic xbox control (dealextreme)", 0, XTYPE_XBOX },
 	{ 0x0f0d, 0x000a, "Hori Co. DOA4 FightStick", 0, XTYPE_XBOX360 },
@@ -209,27 +212,38 @@ static const struct xpad_device {
 	{ 0x162e, 0xbeef, "Joytech Neo-Se Take2", 0, XTYPE_XBOX360 },
 	{ 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 },
 	{ 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 },
-	{ 0x24c6, 0x542a, "Xbox ONE spectra", 0, XTYPE_XBOXONE },
-	{ 0x24c6, 0x5d04, "Razer Sabertooth", 0, XTYPE_XBOX360 },
+	{ 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf016, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 },
+	{ 0x1bad, 0xf018, "Mad Catz Street Fighter IV SE Fighting Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
+	{ 0x1bad, 0xf019, "Mad Catz Brawlstick for Xbox 360", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
+	{ 0x1bad, 0xf021, "Mad Cats Ghost Recon FS GamePad", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf023, "MLG Pro Circuit Controller (Xbox)", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf028, "Street Fighter IV FightPad", 0, XTYPE_XBOX360 },
+	{ 0x1bad, 0xf02e, "Mad Catz Fightpad", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf038, "Street Fighter IV FightStick TE", 0, XTYPE_XBOX360 },
+	{ 0x1bad, 0xf03a, "Mad Catz SFxT Fightstick Pro", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf900, "Harmonix Xbox 360 Controller", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf901, "Gamestop Xbox 360 Controller", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf903, "Tron Xbox 360 controller", 0, XTYPE_XBOX360 },
+	{ 0x1bad, 0xfa01, "MadCatz GamePad", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5000, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5303, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 },
+	{ 0x24c6, 0x531a, "PowerA Pro Ex", 0, XTYPE_XBOX360 },
+	{ 0x24c6, 0x5397, "FUS1ON Tournament Controller", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x541a, "PowerA Xbox One Mini Wired Controller", 0, XTYPE_XBOXONE },
+	{ 0x24c6, 0x542a, "Xbox ONE spectra", 0, XTYPE_XBOXONE },
 	{ 0x24c6, 0x543a, "PowerA Xbox One wired controller", 0, XTYPE_XBOXONE },
 	{ 0x24c6, 0x5500, "Hori XBOX 360 EX 2 with Turbo", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5501, "Hori Real Arcade Pro VX-SA", 0, XTYPE_XBOX360 },
+	{ 0x24c6, 0x5503, "Hori Fighting Edge", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5506, "Hori SOULCALIBUR V Stick", 0, XTYPE_XBOX360 },
+	{ 0x24c6, 0x550d, "Hori GEM Xbox controller", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5b02, "Thrustmaster, Inc. GPX Controller", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 },
+	{ 0x24c6, 0x5d04, "Razer Sabertooth", 0, XTYPE_XBOX360 },
 	{ 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX },
 	{ 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN }
 };
@@ -316,6 +330,7 @@ static struct usb_device_id xpad_table[] = {
 	XPAD_XBOX360_VENDOR(0x045e),		/* Microsoft X-Box 360 controllers */
 	XPAD_XBOXONE_VENDOR(0x045e),		/* Microsoft X-Box One controllers */
 	XPAD_XBOX360_VENDOR(0x046d),		/* Logitech X-Box 360 style controllers */
+	XPAD_XBOX360_VENDOR(0x056e),		/* Elecom JC-U3613M */
 	XPAD_XBOX360_VENDOR(0x0738),		/* Mad Catz X-Box 360 controllers */
 	{ USB_DEVICE(0x0738, 0x4540) },		/* Mad Catz Beat Pad */
 	XPAD_XBOXONE_VENDOR(0x0738),		/* Mad Catz FightStick TE 2 */
diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c
index c7a8120b13c0..79eb29550c34 100644
--- a/drivers/input/keyboard/cros_ec_keyb.c
+++ b/drivers/input/keyboard/cros_ec_keyb.c
@@ -660,7 +660,7 @@ static const struct of_device_id cros_ec_keyb_of_match[] = {
 MODULE_DEVICE_TABLE(of, cros_ec_keyb_of_match);
 #endif
 
-static const SIMPLE_DEV_PM_OPS(cros_ec_keyb_pm_ops, NULL, cros_ec_keyb_resume);
+static SIMPLE_DEV_PM_OPS(cros_ec_keyb_pm_ops, NULL, cros_ec_keyb_resume);
 
 static struct platform_driver cros_ec_keyb_driver = {
 	.probe = cros_ec_keyb_probe,
diff --git a/drivers/input/keyboard/tm2-touchkey.c b/drivers/input/keyboard/tm2-touchkey.c
index 485900f953e0..abc266e40e17 100644
--- a/drivers/input/keyboard/tm2-touchkey.c
+++ b/drivers/input/keyboard/tm2-touchkey.c
@@ -213,7 +213,7 @@ static int tm2_touchkey_probe(struct i2c_client *client,
 	/* led device */
 	touchkey->led_dev.name = TM2_TOUCHKEY_DEV_NAME;
 	touchkey->led_dev.brightness = LED_FULL;
-	touchkey->led_dev.max_brightness = LED_FULL;
+	touchkey->led_dev.max_brightness = LED_ON;
 	touchkey->led_dev.brightness_set = tm2_touchkey_led_brightness_set;
 
 	error = devm_led_classdev_register(&client->dev, &touchkey->led_dev);
diff --git a/drivers/input/misc/axp20x-pek.c b/drivers/input/misc/axp20x-pek.c
index f11807db6979..400869e61a06 100644
--- a/drivers/input/misc/axp20x-pek.c
+++ b/drivers/input/misc/axp20x-pek.c
@@ -256,6 +256,42 @@ static int axp20x_pek_probe_input_device(struct axp20x_pek *axp20x_pek,
 	return 0;
 }
 
+#ifdef CONFIG_ACPI
+static bool axp20x_pek_should_register_input(struct axp20x_pek *axp20x_pek,
+					     struct platform_device *pdev)
+{
+	unsigned long long hrv = 0;
+	acpi_status status;
+
+	if (IS_ENABLED(CONFIG_INPUT_SOC_BUTTON_ARRAY) &&
+	    axp20x_pek->axp20x->variant == AXP288_ID) {
+		status = acpi_evaluate_integer(ACPI_HANDLE(pdev->dev.parent),
+					       "_HRV", NULL, &hrv);
+		if (ACPI_FAILURE(status))
+			dev_err(&pdev->dev, "Failed to get PMIC hardware revision\n");
+
+		/*
+		 * On Cherry Trail platforms (hrv == 3), do not register the
+		 * input device if there is an "INTCFD9" or "ACPI0011" gpio
+		 * button ACPI device, as that handles the power button too,
+		 * and otherwise we end up reporting all presses twice.
+		 */
+		if (hrv == 3 && (acpi_dev_present("INTCFD9", NULL, -1) ||
+				 acpi_dev_present("ACPI0011", NULL, -1)))
+			return false;
+
+	}
+
+	return true;
+}
+#else
+static bool axp20x_pek_should_register_input(struct axp20x_pek *axp20x_pek,
+					     struct platform_device *pdev)
+{
+	return true;
+}
+#endif
+
 static int axp20x_pek_probe(struct platform_device *pdev)
 {
 	struct axp20x_pek *axp20x_pek;
@@ -268,13 +304,7 @@ static int axp20x_pek_probe(struct platform_device *pdev)
 
 	axp20x_pek->axp20x = dev_get_drvdata(pdev->dev.parent);
 
-	/*
-	 * Do not register the input device if there is an "INTCFD9"
-	 * gpio button ACPI device, that handles the power button too,
-	 * and otherwise we end up reporting all presses twice.
-	 */
-	if (!acpi_dev_found("INTCFD9") ||
-	    !IS_ENABLED(CONFIG_INPUT_SOC_BUTTON_ARRAY)) {
+	if (axp20x_pek_should_register_input(axp20x_pek, pdev)) {
 		error = axp20x_pek_probe_input_device(axp20x_pek, pdev);
 		if (error)
 			return error;
diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c
index e37d37273182..f600f3a7a3c6 100644
--- a/drivers/input/misc/soc_button_array.c
+++ b/drivers/input/misc/soc_button_array.c
@@ -248,7 +248,8 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev)
 
 	if (!btns_desc) {
 		dev_err(dev, "ACPI Button Descriptors not found\n");
-		return ERR_PTR(-ENODEV);
+		button_info = ERR_PTR(-ENODEV);
+		goto out;
 	}
 
 	/* The first package describes the collection */
@@ -264,24 +265,31 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev)
 	}
 	if (collection_uid == -1) {
 		dev_err(dev, "Invalid Button Collection Descriptor\n");
-		return ERR_PTR(-ENODEV);
+		button_info = ERR_PTR(-ENODEV);
+		goto out;
 	}
 
 	/* There are package.count - 1 buttons + 1 terminating empty entry */
 	button_info = devm_kcalloc(dev, btns_desc->package.count,
 				   sizeof(*button_info), GFP_KERNEL);
-	if (!button_info)
-		return ERR_PTR(-ENOMEM);
+	if (!button_info) {
+		button_info = ERR_PTR(-ENOMEM);
+		goto out;
+	}
 
 	/* Parse the button descriptors */
 	for (i = 1, btn = 0; i < btns_desc->package.count; i++, btn++) {
 		if (soc_button_parse_btn_desc(dev,
 					      &btns_desc->package.elements[i],
 					      collection_uid,
-					      &button_info[btn]))
-			return ERR_PTR(-ENODEV);
+					      &button_info[btn])) {
+			button_info = ERR_PTR(-ENODEV);
+			goto out;
+		}
 	}
 
+out:
+	kfree(buf.pointer);
 	return button_info;
 }
 
diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c
index a679e56c44cd..f431da07f861 100644
--- a/drivers/input/mouse/elan_i2c_i2c.c
+++ b/drivers/input/mouse/elan_i2c_i2c.c
@@ -554,32 +554,34 @@ static int elan_i2c_finish_fw_update(struct i2c_client *client,
 				     struct completion *completion)
 {
 	struct device *dev = &client->dev;
-	long ret;
 	int error;
 	int len;
-	u8 buffer[ETP_I2C_INF_LENGTH];
+	u8 buffer[ETP_I2C_REPORT_LEN];
+
+	len = i2c_master_recv(client, buffer, ETP_I2C_REPORT_LEN);
+	if (len != ETP_I2C_REPORT_LEN) {
+		error = len < 0 ? len : -EIO;
+		dev_warn(dev, "failed to read I2C data after FW WDT reset: %d (%d)\n",
+			error, len);
+	}
 
 	reinit_completion(completion);
 	enable_irq(client->irq);
 
 	error = elan_i2c_write_cmd(client, ETP_I2C_STAND_CMD, ETP_I2C_RESET);
-	if (!error)
-		ret = wait_for_completion_interruptible_timeout(completion,
-							msecs_to_jiffies(300));
-	disable_irq(client->irq);
-
 	if (error) {
 		dev_err(dev, "device reset failed: %d\n", error);
-		return error;
-	} else if (ret == 0) {
+	} else if (!wait_for_completion_timeout(completion,
+						msecs_to_jiffies(300))) {
 		dev_err(dev, "timeout waiting for device reset\n");
-		return -ETIMEDOUT;
-	} else if (ret < 0) {
-		error = ret;
-		dev_err(dev, "error waiting for device reset: %d\n", error);
-		return error;
+		error = -ETIMEDOUT;
 	}
 
+	disable_irq(client->irq);
+
+	if (error)
+		return error;
+
 	len = i2c_master_recv(client, buffer, ETP_I2C_INF_LENGTH);
 	if (len != ETP_I2C_INF_LENGTH) {
 		error = len < 0 ? len : -EIO;
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index e73d968023f7..f1fa1f172107 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1118,8 +1118,10 @@ static int elantech_get_resolution_v4(struct psmouse *psmouse,
  * Asus UX32VD             0x361f02        00, 15, 0e      clickpad
  * Avatar AVIU-145A2       0x361f00        ?               clickpad
  * Fujitsu LIFEBOOK E544   0x470f00        d0, 12, 09      2 hw buttons
+ * Fujitsu LIFEBOOK E546   0x470f00        50, 12, 09      2 hw buttons
  * Fujitsu LIFEBOOK E547   0x470f00        50, 12, 09      2 hw buttons
  * Fujitsu LIFEBOOK E554   0x570f01        40, 14, 0c      2 hw buttons
+ * Fujitsu LIFEBOOK E557   0x570f01        40, 14, 0c      2 hw buttons
  * Fujitsu T725            0x470f01        05, 12, 09      2 hw buttons
  * Fujitsu H730            0x570f00        c0, 14, 0c      3 hw buttons (**)
  * Gigabyte U2442          0x450f01        58, 17, 0c      2 hw buttons
@@ -1525,6 +1527,13 @@ static const struct dmi_system_id elantech_dmi_force_crc_enabled[] = {
 		},
 	},
 	{
+		/* Fujitsu LIFEBOOK E546  does not work with crc_enabled == 0 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E546"),
+		},
+	},
+	{
 		/* Fujitsu LIFEBOOK E547 does not work with crc_enabled == 0 */
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
@@ -1546,6 +1555,13 @@ static const struct dmi_system_id elantech_dmi_force_crc_enabled[] = {
 		},
 	},
 	{
+		/* Fujitsu LIFEBOOK E557 does not work with crc_enabled == 0 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E557"),
+		},
+	},
+	{
 		/* Fujitsu LIFEBOOK U745 does not work with crc_enabled == 0 */
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
diff --git a/drivers/input/mouse/inport.c b/drivers/input/mouse/inport.c
index 3827a22362de..9ce71dfa0de1 100644
--- a/drivers/input/mouse/inport.c
+++ b/drivers/input/mouse/inport.c
@@ -78,7 +78,7 @@ MODULE_LICENSE("GPL");
 #define INPORT_IRQ		5
 
 static int inport_irq = INPORT_IRQ;
-module_param_named(irq, inport_irq, uint, 0);
+module_param_hw_named(irq, inport_irq, uint, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ number (5=default)");
 
 static struct input_dev *inport_dev;
diff --git a/drivers/input/mouse/logibm.c b/drivers/input/mouse/logibm.c
index e2413113df22..6f165e053f4d 100644
--- a/drivers/input/mouse/logibm.c
+++ b/drivers/input/mouse/logibm.c
@@ -69,7 +69,7 @@ MODULE_LICENSE("GPL");
 #define LOGIBM_IRQ		5
 
 static int logibm_irq = LOGIBM_IRQ;
-module_param_named(irq, logibm_irq, uint, 0);
+module_param_hw_named(irq, logibm_irq, uint, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ number (5=default)");
 
 static struct input_dev *logibm_dev;
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 131df9d3660f..16c30460ef04 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -176,6 +176,12 @@ static const char * const smbus_pnp_ids[] = {
 	NULL
 };
 
+static const char * const forcepad_pnp_ids[] = {
+	"SYN300D",
+	"SYN3014",
+	NULL
+};
+
 /*
  * Send a command to the synpatics touchpad by special commands
  */
@@ -397,6 +403,8 @@ static int synaptics_query_hardware(struct psmouse *psmouse,
 {
 	int error;
 
+	memset(info, 0, sizeof(*info));
+
 	error = synaptics_identify(psmouse, info);
 	if (error)
 		return error;
@@ -480,13 +488,6 @@ static const struct min_max_quirk min_max_pnpid_table[] = {
 	{ }
 };
 
-/* This list has been kindly provided by Synaptics. */
-static const char * const forcepad_pnp_ids[] = {
-	"SYN300D",
-	"SYN3014",
-	NULL
-};
-
 /*****************************************************************************
  *	Synaptics communications functions
  ****************************************************************************/
@@ -1687,7 +1688,8 @@ enum {
 	SYNAPTICS_INTERTOUCH_ON,
 };
 
-static int synaptics_intertouch = SYNAPTICS_INTERTOUCH_NOT_SET;
+static int synaptics_intertouch = IS_ENABLED(CONFIG_RMI4_SMB) ?
+		SYNAPTICS_INTERTOUCH_NOT_SET : SYNAPTICS_INTERTOUCH_OFF;
 module_param_named(synaptics_intertouch, synaptics_intertouch, int, 0644);
 MODULE_PARM_DESC(synaptics_intertouch, "Use a secondary bus for the Synaptics device.");
 
@@ -1737,8 +1739,16 @@ static int synaptics_setup_intertouch(struct psmouse *psmouse,
 
 	if (synaptics_intertouch == SYNAPTICS_INTERTOUCH_NOT_SET) {
 		if (!psmouse_matches_pnp_id(psmouse, topbuttonpad_pnp_ids) &&
-		    !psmouse_matches_pnp_id(psmouse, smbus_pnp_ids))
+		    !psmouse_matches_pnp_id(psmouse, smbus_pnp_ids)) {
+
+			if (!psmouse_matches_pnp_id(psmouse, forcepad_pnp_ids))
+				psmouse_info(psmouse,
+					     "Your touchpad (%s) says it can support a different bus. "
+					     "If i2c-hid and hid-rmi are not used, you might want to try setting psmouse.synaptics_intertouch to 1 and report this to linux-input@vger.kernel.org.\n",
+					     psmouse->ps2dev.serio->firmware_id);
+
 			return -ENXIO;
+		}
 	}
 
 	psmouse_info(psmouse, "Trying to set up SMBus access\n");
@@ -1810,6 +1820,15 @@ int synaptics_init(struct psmouse *psmouse)
 	}
 
 	if (SYN_CAP_INTERTOUCH(info.ext_cap_0c)) {
+		if ((!IS_ENABLED(CONFIG_RMI4_SMB) ||
+		     !IS_ENABLED(CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS)) &&
+		    /* Forcepads need F21, which is not ready */
+		    !psmouse_matches_pnp_id(psmouse, forcepad_pnp_ids)) {
+			psmouse_warn(psmouse,
+				     "The touchpad can support a better bus than the too old PS/2 protocol. "
+				     "Make sure MOUSE_PS2_SYNAPTICS_SMBUS and RMI4_SMB are enabled to get a better touchpad experience.\n");
+		}
+
 		error = synaptics_setup_intertouch(psmouse, &info, true);
 		if (!error)
 			return PSMOUSE_SYNAPTICS_SMBUS;
diff --git a/drivers/input/rmi4/rmi_f03.c b/drivers/input/rmi4/rmi_f03.c
index 77dad045a468..ad71a5e768dc 100644
--- a/drivers/input/rmi4/rmi_f03.c
+++ b/drivers/input/rmi4/rmi_f03.c
@@ -146,7 +146,7 @@ static int rmi_f03_register_pt(struct f03_data *f03)
 	if (!serio)
 		return -ENOMEM;
 
-	serio->id.type = SERIO_8042;
+	serio->id.type = SERIO_PS_PSTHRU;
 	serio->write = rmi_f03_pt_write;
 	serio->port_data = f03;
 
diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c
index dea63e2db3e6..f5206e2c767e 100644
--- a/drivers/input/rmi4/rmi_f54.c
+++ b/drivers/input/rmi4/rmi_f54.c
@@ -31,9 +31,6 @@
 #define F54_GET_REPORT          1
 #define F54_FORCE_CAL           2
 
-/* Fixed sizes of reports */
-#define F54_QUERY_LEN			27
-
 /* F54 capabilities */
 #define F54_CAP_BASELINE	(1 << 2)
 #define F54_CAP_IMAGE8		(1 << 3)
@@ -95,7 +92,6 @@ struct rmi_f54_reports {
 struct f54_data {
 	struct rmi_function *fn;
 
-	u8 qry[F54_QUERY_LEN];
 	u8 num_rx_electrodes;
 	u8 num_tx_electrodes;
 	u8 capabilities;
@@ -632,22 +628,23 @@ static int rmi_f54_detect(struct rmi_function *fn)
 {
 	int error;
 	struct f54_data *f54;
+	u8 buf[6];
 
 	f54 = dev_get_drvdata(&fn->dev);
 
 	error = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr,
-			       &f54->qry, sizeof(f54->qry));
+			       buf, sizeof(buf));
 	if (error) {
 		dev_err(&fn->dev, "%s: Failed to query F54 properties\n",
 			__func__);
 		return error;
 	}
 
-	f54->num_rx_electrodes = f54->qry[0];
-	f54->num_tx_electrodes = f54->qry[1];
-	f54->capabilities = f54->qry[2];
-	f54->clock_rate = f54->qry[3] | (f54->qry[4] << 8);
-	f54->family = f54->qry[5];
+	f54->num_rx_electrodes = buf[0];
+	f54->num_tx_electrodes = buf[1];
+	f54->capabilities = buf[2];
+	f54->clock_rate = buf[3] | (buf[4] << 8);
+	f54->family = buf[5];
 
 	rmi_dbg(RMI_DEBUG_FN, &fn->dev, "F54 num_rx_electrodes: %d\n",
 		f54->num_rx_electrodes);
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 09720d950686..f932a83b4990 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -723,6 +723,13 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"),
 		},
 	},
+	{
+		/* Fujitsu UH554 laptop */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK UH544"),
+		},
+	},
 	{ }
 };
 
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 2302aef2b2d4..dd042a9b0aaa 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -350,6 +350,7 @@ static bool mxt_object_readable(unsigned int type)
 	case MXT_TOUCH_KEYARRAY_T15:
 	case MXT_TOUCH_PROXIMITY_T23:
 	case MXT_TOUCH_PROXKEY_T52:
+	case MXT_TOUCH_MULTITOUCHSCREEN_T100:
 	case MXT_PROCI_GRIPFACE_T20:
 	case MXT_PROCG_NOISE_T22:
 	case MXT_PROCI_ONETOUCH_T24:
diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
index 8cf8d8d5d4ef..f872817e81e4 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c
@@ -471,7 +471,7 @@ static EDT_ATTR(gain, S_IWUSR | S_IRUGO, WORK_REGISTER_GAIN,
 static EDT_ATTR(offset, S_IWUSR | S_IRUGO, WORK_REGISTER_OFFSET,
 		M09_REGISTER_OFFSET, 0, 31);
 static EDT_ATTR(threshold, S_IWUSR | S_IRUGO, WORK_REGISTER_THRESHOLD,
-		M09_REGISTER_THRESHOLD, 20, 80);
+		M09_REGISTER_THRESHOLD, 0, 80);
 static EDT_ATTR(report_rate, S_IWUSR | S_IRUGO, WORK_REGISTER_REPORT_RATE,
 		NO_REGISTER, 3, 14);
 
diff --git a/drivers/input/touchscreen/mk712.c b/drivers/input/touchscreen/mk712.c
index 36e57deacd03..bd5352824f77 100644
--- a/drivers/input/touchscreen/mk712.c
+++ b/drivers/input/touchscreen/mk712.c
@@ -50,11 +50,11 @@ MODULE_DESCRIPTION("ICS MicroClock MK712 TouchScreen driver");
 MODULE_LICENSE("GPL");
 
 static unsigned int mk712_io = 0x260;	/* Also 0x200, 0x208, 0x300 */
-module_param_named(io, mk712_io, uint, 0);
+module_param_hw_named(io, mk712_io, uint, ioport, 0);
 MODULE_PARM_DESC(io, "I/O base address of MK712 touchscreen controller");
 
 static unsigned int mk712_irq = 10;	/* Also 12, 14, 15 */
-module_param_named(irq, mk712_irq, uint, 0);
+module_param_hw_named(irq, mk712_irq, uint, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ of MK712 touchscreen controller");
 
 /* eight 8-bit registers */
diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c
index 813dd68a5c82..0dbcf105f7db 100644
--- a/drivers/input/touchscreen/silead.c
+++ b/drivers/input/touchscreen/silead.c
@@ -526,6 +526,7 @@ static int __maybe_unused silead_ts_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
+	disable_irq(client->irq);
 	silead_ts_set_power(client, SILEAD_POWER_OFF);
 	return 0;
 }
@@ -551,6 +552,8 @@ static int __maybe_unused silead_ts_resume(struct device *dev)
 		return -ENODEV;
 	}
 
+	enable_irq(client->irq);
+
 	return 0;
 }
 
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 63cacf5d6cf2..0f1219fa8561 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3879,11 +3879,9 @@ static void irte_ga_prepare(void *entry,
 			    u8 vector, u32 dest_apicid, int devid)
 {
 	struct irte_ga *irte = (struct irte_ga *) entry;
-	struct iommu_dev_data *dev_data = search_dev_data(devid);
 
 	irte->lo.val                      = 0;
 	irte->hi.val                      = 0;
-	irte->lo.fields_remap.guest_mode  = dev_data ? dev_data->use_vapic : 0;
 	irte->lo.fields_remap.int_type    = delivery_mode;
 	irte->lo.fields_remap.dm          = dest_mode;
 	irte->hi.fields.vector            = vector;
@@ -3939,10 +3937,10 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index,
 	struct irte_ga *irte = (struct irte_ga *) entry;
 	struct iommu_dev_data *dev_data = search_dev_data(devid);
 
-	if (!dev_data || !dev_data->use_vapic) {
+	if (!dev_data || !dev_data->use_vapic ||
+	    !irte->lo.fields_remap.guest_mode) {
 		irte->hi.fields.vector = vector;
 		irte->lo.fields_remap.destination = dest_apicid;
-		irte->lo.fields_remap.guest_mode = 0;
 		modify_irte_ga(devid, index, irte, NULL);
 	}
 }
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 8348f366ddd1..62618e77bedc 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -396,13 +396,13 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 		dma_addr_t iova, size_t size)
 {
 	struct iova_domain *iovad = &cookie->iovad;
-	unsigned long shift = iova_shift(iovad);
 
 	/* The MSI case is only ever cleaning up its most recent allocation */
 	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
 		cookie->msi_iova -= size;
 	else
-		free_iova_fast(iovad, iova >> shift, size >> shift);
+		free_iova_fast(iovad, iova_pfn(iovad, iova),
+				size >> iova_shift(iovad));
 }
 
 static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
@@ -617,11 +617,14 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
-	struct iova_domain *iovad = &cookie->iovad;
-	size_t iova_off = iova_offset(iovad, phys);
+	size_t iova_off = 0;
 	dma_addr_t iova;
 
-	size = iova_align(iovad, size + iova_off);
+	if (cookie->type == IOMMU_DMA_IOVA_COOKIE) {
+		iova_off = iova_offset(&cookie->iovad, phys);
+		size = iova_align(&cookie->iovad, size + iova_off);
+	}
+
 	iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
 	if (!iova)
 		return DMA_ERROR_CODE;
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index cbf7763d8091..c8b0329c85d2 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1808,10 +1808,9 @@ IOMMU_INIT_POST(detect_intel_iommu);
  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
  * "Remapping Hardware Unit Hot Plug".
  */
-static u8 dmar_hp_uuid[] = {
-	/* 0000 */    0xA6, 0xA3, 0xC1, 0xD8, 0x9B, 0xBE, 0x9B, 0x4C,
-	/* 0008 */    0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF
-};
+static guid_t dmar_hp_guid =
+	GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
+		  0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
 
 /*
  * Currently there's only one revision and BIOS will not check the revision id,
@@ -1824,7 +1823,7 @@ static u8 dmar_hp_uuid[] = {
 
 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
 {
-	return acpi_check_dsm(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, 1 << func);
+	return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
 }
 
 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
@@ -1843,7 +1842,7 @@ static int dmar_walk_dsm_resource(acpi_handle handle, int func,
 	if (!dmar_detect_dsm(handle, func))
 		return 0;
 
-	obj = acpi_evaluate_dsm_typed(handle, dmar_hp_uuid, DMAR_DSM_REV_ID,
+	obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
 				      func, NULL, ACPI_TYPE_BUFFER);
 	if (!obj)
 		return -ENODEV;
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 90ab0115d78e..8500deda9175 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2055,11 +2055,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 	if (context_copied(context)) {
 		u16 did_old = context_domain_id(context);
 
-		if (did_old >= 0 && did_old < cap_ndoms(iommu->cap))
+		if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) {
 			iommu->flush.flush_context(iommu, did_old,
 						   (((u16)bus) << 8) | devfn,
 						   DMA_CCMD_MASK_NOBIT,
 						   DMA_CCMD_DEVICE_INVL);
+			iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
+						 DMA_TLB_DSI_FLUSH);
+		}
 	}
 
 	pgd = domain->pgd;
@@ -4312,7 +4315,7 @@ int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
 	struct acpi_dmar_atsr *atsr;
 	struct dmar_atsr_unit *atsru;
 
-	if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
+	if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
 		return 0;
 
 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
@@ -4562,7 +4565,7 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
 	struct acpi_dmar_atsr *atsr;
 	struct acpi_dmar_reserved_memory *rmrr;
 
-	if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
+	if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
 		return 0;
 
 	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index a27ef570c328..bc1efbfb9ddf 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -18,6 +18,7 @@
 #include <linux/clk.h>
 #include <linux/component.h>
 #include <linux/device.h>
+#include <linux/dma-mapping.h>
 #include <linux/dma-iommu.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 9f44ee8ea1bc..8cb60829a7a1 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -103,7 +103,7 @@ static bool of_iommu_driver_present(struct device_node *np)
 	 * it never will be. We don't want to defer indefinitely, nor attempt
 	 * to dereference __iommu_of_table after it's been freed.
 	 */
-	if (system_state > SYSTEM_BOOTING)
+	if (system_state >= SYSTEM_RUNNING)
 		return false;
 
 	return of_match_node(&__iommu_of_table, np);
@@ -118,6 +118,7 @@ static const struct iommu_ops
 
 	ops = iommu_ops_from_fwnode(fwnode);
 	if ((ops && !ops->of_xlate) ||
+	    !of_device_is_available(iommu_spec->np) ||
 	    (!ops && !of_iommu_driver_present(iommu_spec->np)))
 		return NULL;
 
@@ -236,6 +237,12 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
 			ops = ERR_PTR(err);
 	}
 
+	/* Ignore all other errors apart from EPROBE_DEFER */
+	if (IS_ERR(ops) && (PTR_ERR(ops) != -EPROBE_DEFER)) {
+		dev_dbg(dev, "Adding to IOMMU failed: %ld\n", PTR_ERR(ops));
+		ops = NULL;
+	}
+
 	return ops;
 }
 
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 595d0c95563b..478f8ace2664 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -134,7 +134,9 @@ config IMGPDC_IRQ
 config IRQ_MIPS_CPU
 	bool
 	select GENERIC_IRQ_CHIP
+	select GENERIC_IRQ_IPI if SYS_SUPPORTS_MULTITHREADING
 	select IRQ_DOMAIN
+	select IRQ_DOMAIN_HIERARCHY if GENERIC_IRQ_IPI
 
 config CLPS711X_IRQCHIP
 	bool
diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index d2306c821ebb..31d6b5a582d2 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -106,10 +106,7 @@ static inline void get_mbigen_type_reg(irq_hw_number_t hwirq,
 static inline void get_mbigen_clear_reg(irq_hw_number_t hwirq,
 					u32 *mask, u32 *addr)
 {
-	unsigned int ofst;
-
-	hwirq -= RESERVED_IRQ_PER_MBIGEN_CHIP;
-	ofst = hwirq / 32 * 4;
+	unsigned int ofst = (hwirq / 32) * 4;
 
 	*mask = 1 << (hwirq % 32);
 	*addr = ofst + REG_MBIGEN_CLEAR_OFFSET;
@@ -337,9 +334,15 @@ static int mbigen_device_probe(struct platform_device *pdev)
 	mgn_chip->pdev = pdev;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	mgn_chip->base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(mgn_chip->base))
-		return PTR_ERR(mgn_chip->base);
+	if (!res)
+		return -EINVAL;
+
+	mgn_chip->base = devm_ioremap(&pdev->dev, res->start,
+				      resource_size(res));
+	if (!mgn_chip->base) {
+		dev_err(&pdev->dev, "failed to ioremap %pR\n", res);
+		return -ENOMEM;
+	}
 
 	if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node)
 		err = mbigen_of_create_domain(pdev, mgn_chip);
diff --git a/drivers/irqchip/irq-mips-cpu.c b/drivers/irqchip/irq-mips-cpu.c
index 8c504f562e9d..b247f3c743ac 100644
--- a/drivers/irqchip/irq-mips-cpu.c
+++ b/drivers/irqchip/irq-mips-cpu.c
@@ -17,15 +17,14 @@
 /*
  * Almost all MIPS CPUs define 8 interrupt sources.  They are typically
  * level triggered (i.e., cannot be cleared from CPU; must be cleared from
- * device).  The first two are software interrupts which we don't really
- * use or support.  The last one is usually the CPU timer interrupt if
- * counter register is present or, for CPUs with an external FPU, by
- * convention it's the FPU exception interrupt.
+ * device).
  *
- * Don't even think about using this on SMP.  You have been warned.
+ * The first two are software interrupts (i.e. not exposed as pins) which
+ * may be used for IPIs in multi-threaded single-core systems.
  *
- * This file exports one global function:
- *	void mips_cpu_irq_init(void);
+ * The last one is usually the CPU timer interrupt if the counter register
+ * is present, or for old CPUs with an external FPU by convention it's the
+ * FPU exception interrupt.
  */
 #include <linux/init.h>
 #include <linux/interrupt.h>
@@ -39,15 +38,18 @@
 #include <asm/mipsmtregs.h>
 #include <asm/setup.h>
 
+static struct irq_domain *irq_domain;
+static struct irq_domain *ipi_domain;
+
 static inline void unmask_mips_irq(struct irq_data *d)
 {
-	set_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
+	set_c0_status(IE_SW0 << d->hwirq);
 	irq_enable_hazard();
 }
 
 static inline void mask_mips_irq(struct irq_data *d)
 {
-	clear_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
+	clear_c0_status(IE_SW0 << d->hwirq);
 	irq_disable_hazard();
 }
 
@@ -70,7 +72,7 @@ static unsigned int mips_mt_cpu_irq_startup(struct irq_data *d)
 {
 	unsigned int vpflags = dvpe();
 
-	clear_c0_cause(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
+	clear_c0_cause(C_SW0 << d->hwirq);
 	evpe(vpflags);
 	unmask_mips_irq(d);
 	return 0;
@@ -83,11 +85,34 @@ static unsigned int mips_mt_cpu_irq_startup(struct irq_data *d)
 static void mips_mt_cpu_irq_ack(struct irq_data *d)
 {
 	unsigned int vpflags = dvpe();
-	clear_c0_cause(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
+	clear_c0_cause(C_SW0 << d->hwirq);
 	evpe(vpflags);
 	mask_mips_irq(d);
 }
 
+#ifdef CONFIG_GENERIC_IRQ_IPI
+
+static void mips_mt_send_ipi(struct irq_data *d, unsigned int cpu)
+{
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	unsigned long flags;
+	int vpflags;
+
+	local_irq_save(flags);
+
+	/* We can only send IPIs to VPEs within the local core */
+	WARN_ON(cpu_data[cpu].core != current_cpu_data.core);
+
+	vpflags = dvpe();
+	settc(cpu_vpe_id(&cpu_data[cpu]));
+	write_vpe_c0_cause(read_vpe_c0_cause() | (C_SW0 << hwirq));
+	evpe(vpflags);
+
+	local_irq_restore(flags);
+}
+
+#endif /* CONFIG_GENERIC_IRQ_IPI */
+
 static struct irq_chip mips_mt_cpu_irq_controller = {
 	.name		= "MIPS",
 	.irq_startup	= mips_mt_cpu_irq_startup,
@@ -98,11 +123,15 @@ static struct irq_chip mips_mt_cpu_irq_controller = {
 	.irq_eoi	= unmask_mips_irq,
 	.irq_disable	= mask_mips_irq,
 	.irq_enable	= unmask_mips_irq,
+#ifdef CONFIG_GENERIC_IRQ_IPI
+	.ipi_send_single = mips_mt_send_ipi,
+#endif
 };
 
 asmlinkage void __weak plat_irq_dispatch(void)
 {
 	unsigned long pending = read_c0_cause() & read_c0_status() & ST0_IM;
+	unsigned int virq;
 	int irq;
 
 	if (!pending) {
@@ -113,7 +142,11 @@ asmlinkage void __weak plat_irq_dispatch(void)
 	pending >>= CAUSEB_IP;
 	while (pending) {
 		irq = fls(pending) - 1;
-		do_IRQ(MIPS_CPU_IRQ_BASE + irq);
+		if (IS_ENABLED(CONFIG_GENERIC_IRQ_IPI) && irq < 2)
+			virq = irq_linear_revmap(ipi_domain, irq);
+		else
+			virq = irq_linear_revmap(irq_domain, irq);
+		do_IRQ(virq);
 		pending &= ~BIT(irq);
 	}
 }
@@ -143,18 +176,97 @@ static const struct irq_domain_ops mips_cpu_intc_irq_domain_ops = {
 	.xlate = irq_domain_xlate_onecell,
 };
 
-static void __init __mips_cpu_irq_init(struct device_node *of_node)
+#ifdef CONFIG_GENERIC_IRQ_IPI
+
+struct cpu_ipi_domain_state {
+	DECLARE_BITMAP(allocated, 2);
+};
+
+static int mips_cpu_ipi_alloc(struct irq_domain *domain, unsigned int virq,
+			      unsigned int nr_irqs, void *arg)
 {
-	struct irq_domain *domain;
+	struct cpu_ipi_domain_state *state = domain->host_data;
+	unsigned int i, hwirq;
+	int ret;
 
+	for (i = 0; i < nr_irqs; i++) {
+		hwirq = find_first_zero_bit(state->allocated, 2);
+		if (hwirq == 2)
+			return -EBUSY;
+		bitmap_set(state->allocated, hwirq, 1);
+
+		ret = irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq,
+						    &mips_mt_cpu_irq_controller,
+						    NULL);
+		if (ret)
+			return ret;
+
+		ret = irq_set_irq_type(virq + i, IRQ_TYPE_LEVEL_HIGH);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int mips_cpu_ipi_match(struct irq_domain *d, struct device_node *node,
+			      enum irq_domain_bus_token bus_token)
+{
+	bool is_ipi;
+
+	switch (bus_token) {
+	case DOMAIN_BUS_IPI:
+		is_ipi = d->bus_token == bus_token;
+		return (!node || (to_of_node(d->fwnode) == node)) && is_ipi;
+	default:
+		return 0;
+	}
+}
+
+static const struct irq_domain_ops mips_cpu_ipi_chip_ops = {
+	.alloc	= mips_cpu_ipi_alloc,
+	.match	= mips_cpu_ipi_match,
+};
+
+static void mips_cpu_register_ipi_domain(struct device_node *of_node)
+{
+	struct cpu_ipi_domain_state *ipi_domain_state;
+
+	ipi_domain_state = kzalloc(sizeof(*ipi_domain_state), GFP_KERNEL);
+	ipi_domain = irq_domain_add_hierarchy(irq_domain,
+					      IRQ_DOMAIN_FLAG_IPI_SINGLE,
+					      2, of_node,
+					      &mips_cpu_ipi_chip_ops,
+					      ipi_domain_state);
+	if (!ipi_domain)
+		panic("Failed to add MIPS CPU IPI domain");
+	ipi_domain->bus_token = DOMAIN_BUS_IPI;
+}
+
+#else /* !CONFIG_GENERIC_IRQ_IPI */
+
+static inline void mips_cpu_register_ipi_domain(struct device_node *of_node) {}
+
+#endif /* !CONFIG_GENERIC_IRQ_IPI */
+
+static void __init __mips_cpu_irq_init(struct device_node *of_node)
+{
 	/* Mask interrupts. */
 	clear_c0_status(ST0_IM);
 	clear_c0_cause(CAUSEF_IP);
 
-	domain = irq_domain_add_legacy(of_node, 8, MIPS_CPU_IRQ_BASE, 0,
-				       &mips_cpu_intc_irq_domain_ops, NULL);
-	if (!domain)
+	irq_domain = irq_domain_add_legacy(of_node, 8, MIPS_CPU_IRQ_BASE, 0,
+					   &mips_cpu_intc_irq_domain_ops,
+					   NULL);
+	if (!irq_domain)
 		panic("Failed to add irqdomain for MIPS CPU");
+
+	/*
+	 * Only proceed to register the software interrupt IPI implementation
+	 * for CPUs which implement the MIPS MT (multi-threading) ASE.
+	 */
+	if (cpu_has_mipsmt)
+		mips_cpu_register_ipi_domain(of_node);
 }
 
 void __init mips_cpu_irq_init(void)
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index eb7fbe159963..929f8558bf1c 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -140,7 +140,7 @@ static inline void gic_map_to_vpe(unsigned int intr, unsigned int vpe)
 }
 
 #ifdef CONFIG_CLKSRC_MIPS_GIC
-u64 gic_read_count(void)
+u64 notrace gic_read_count(void)
 {
 	unsigned int hi, hi2, lo;
 
@@ -167,7 +167,7 @@ unsigned int gic_get_count_width(void)
 	return bits;
 }
 
-void gic_write_compare(u64 cnt)
+void notrace gic_write_compare(u64 cnt)
 {
 	if (mips_cm_is64) {
 		gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_COMPARE), cnt);
@@ -179,7 +179,7 @@ void gic_write_compare(u64 cnt)
 	}
 }
 
-void gic_write_cpu_compare(u64 cnt, int cpu)
+void notrace gic_write_cpu_compare(u64 cnt, int cpu)
 {
 	unsigned long flags;
 
diff --git a/drivers/irqchip/irq-xtensa-mx.c b/drivers/irqchip/irq-xtensa-mx.c
index bb3ac5fe5846..72a391e01011 100644
--- a/drivers/irqchip/irq-xtensa-mx.c
+++ b/drivers/irqchip/irq-xtensa-mx.c
@@ -142,7 +142,7 @@ static struct irq_chip xtensa_mx_irq_chip = {
 int __init xtensa_mx_init_legacy(struct device_node *interrupt_parent)
 {
 	struct irq_domain *root_domain =
-		irq_domain_add_legacy(NULL, NR_IRQS, 0, 0,
+		irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0,
 				&xtensa_mx_irq_domain_ops,
 				&xtensa_mx_irq_chip);
 	irq_set_default_host(root_domain);
diff --git a/drivers/irqchip/irq-xtensa-pic.c b/drivers/irqchip/irq-xtensa-pic.c
index 472ae1770964..f728755fa292 100644
--- a/drivers/irqchip/irq-xtensa-pic.c
+++ b/drivers/irqchip/irq-xtensa-pic.c
@@ -89,7 +89,7 @@ static struct irq_chip xtensa_irq_chip = {
 int __init xtensa_pic_init_legacy(struct device_node *interrupt_parent)
 {
 	struct irq_domain *root_domain =
-		irq_domain_add_legacy(NULL, NR_IRQS, 0, 0,
+		irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0,
 				&xtensa_irq_domain_ops, &xtensa_irq_chip);
 	irq_set_default_host(root_domain);
 	return 0;
diff --git a/drivers/isdn/hardware/avm/b1isa.c b/drivers/isdn/hardware/avm/b1isa.c
index 31ef8130a87f..54e871a47387 100644
--- a/drivers/isdn/hardware/avm/b1isa.c
+++ b/drivers/isdn/hardware/avm/b1isa.c
@@ -169,8 +169,8 @@ static struct pci_dev isa_dev[MAX_CARDS];
 static int io[MAX_CARDS];
 static int irq[MAX_CARDS];
 
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(io, "I/O base address(es)");
 MODULE_PARM_DESC(irq, "IRQ number(s) (assigned)");
 
diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c
index 72ef18853951..9516203c735f 100644
--- a/drivers/isdn/hardware/avm/t1isa.c
+++ b/drivers/isdn/hardware/avm/t1isa.c
@@ -516,8 +516,8 @@ static int io[MAX_CARDS];
 static int irq[MAX_CARDS];
 static int cardnr[MAX_CARDS];
 
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_array(cardnr, int, NULL, 0);
 MODULE_PARM_DESC(io, "I/O base address(es)");
 MODULE_PARM_DESC(irq, "IRQ number(s) (assigned)");
diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c
index 2d12c6ceeb89..c7d68675b028 100644
--- a/drivers/isdn/hisax/config.c
+++ b/drivers/isdn/hisax/config.c
@@ -350,13 +350,13 @@ MODULE_AUTHOR("Karsten Keil");
 MODULE_LICENSE("GPL");
 module_param_array(type, int, NULL, 0);
 module_param_array(protocol, int, NULL, 0);
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
-module_param_array(mem, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
+module_param_hw_array(mem, int, iomem, NULL, 0);
 module_param(id, charp, 0);
 #ifdef IO0_IO1
-module_param_array(io0, int, NULL, 0);
-module_param_array(io1, int, NULL, 0);
+module_param_hw_array(io0, int, ioport, NULL, 0);
+module_param_hw_array(io1, int, ioport, NULL, 0);
 #endif
 #endif /* MODULE */
 
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index d07dd5196ffc..8aa158a09180 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -2364,7 +2364,7 @@ static struct ippp_ccp_reset_state *isdn_ppp_ccp_reset_alloc_state(struct ippp_s
 		       id);
 		return NULL;
 	} else {
-		rs = kzalloc(sizeof(struct ippp_ccp_reset_state), GFP_KERNEL);
+		rs = kzalloc(sizeof(struct ippp_ccp_reset_state), GFP_ATOMIC);
 		if (!rs)
 			return NULL;
 		rs->state = CCPResetIdle;
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index 8b7faea2ddf8..422dced7c90a 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c
@@ -75,7 +75,7 @@ send_socklist(struct mISDN_sock_list *sl, struct sk_buff *skb)
 		if (sk->sk_state != MISDN_BOUND)
 			continue;
 		if (!cskb)
-			cskb = skb_copy(skb, GFP_KERNEL);
+			cskb = skb_copy(skb, GFP_ATOMIC);
 		if (!cskb) {
 			printk(KERN_WARNING "%s no skb\n", __func__);
 			break;
diff --git a/drivers/leds/leds-bcm6328.c b/drivers/leds/leds-bcm6328.c
index 1548259297c1..2cfd9389ee96 100644
--- a/drivers/leds/leds-bcm6328.c
+++ b/drivers/leds/leds-bcm6328.c
@@ -242,7 +242,7 @@ static int bcm6328_hwled(struct device *dev, struct device_node *nc, u32 reg,
 
 		spin_lock_irqsave(lock, flags);
 		val = bcm6328_led_read(addr);
-		val |= (BIT(reg) << (((sel % 4) * 4) + 16));
+		val |= (BIT(reg % 4) << (((sel % 4) * 4) + 16));
 		bcm6328_led_write(addr, val);
 		spin_unlock_irqrestore(lock, flags);
 	}
@@ -269,7 +269,7 @@ static int bcm6328_hwled(struct device *dev, struct device_node *nc, u32 reg,
 
 		spin_lock_irqsave(lock, flags);
 		val = bcm6328_led_read(addr);
-		val |= (BIT(reg) << ((sel % 4) * 4));
+		val |= (BIT(reg % 4) << ((sel % 4) * 4));
 		bcm6328_led_write(addr, val);
 		spin_unlock_irqrestore(lock, flags);
 	}
diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
index 78a7ce816a47..9a873118ea5f 100644
--- a/drivers/leds/leds-pca955x.c
+++ b/drivers/leds/leds-pca955x.c
@@ -285,7 +285,7 @@ static int pca955x_probe(struct i2c_client *client,
 			"slave address 0x%02x\n",
 			client->name, chip->bits, client->addr);
 
-	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C))
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
 		return -EIO;
 
 	if (pdata) {
diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c
index afa3b4099214..e95ea65380c8 100644
--- a/drivers/leds/trigger/ledtrig-heartbeat.c
+++ b/drivers/leds/trigger/ledtrig-heartbeat.c
@@ -20,7 +20,6 @@
 #include <linux/sched/loadavg.h>
 #include <linux/leds.h>
 #include <linux/reboot.h>
-#include <linux/suspend.h>
 #include "../leds.h"
 
 static int panic_heartbeats;
@@ -163,30 +162,6 @@ static struct led_trigger heartbeat_led_trigger = {
 	.deactivate = heartbeat_trig_deactivate,
 };
 
-static int heartbeat_pm_notifier(struct notifier_block *nb,
-				 unsigned long pm_event, void *unused)
-{
-	int rc;
-
-	switch (pm_event) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-	case PM_RESTORE_PREPARE:
-		led_trigger_unregister(&heartbeat_led_trigger);
-		break;
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-	case PM_POST_RESTORE:
-		rc = led_trigger_register(&heartbeat_led_trigger);
-		if (rc)
-			pr_err("could not re-register heartbeat trigger\n");
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_DONE;
-}
-
 static int heartbeat_reboot_notifier(struct notifier_block *nb,
 				     unsigned long code, void *unused)
 {
@@ -201,10 +176,6 @@ static int heartbeat_panic_notifier(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
-static struct notifier_block heartbeat_pm_nb = {
-	.notifier_call = heartbeat_pm_notifier,
-};
-
 static struct notifier_block heartbeat_reboot_nb = {
 	.notifier_call = heartbeat_reboot_notifier,
 };
@@ -221,14 +192,12 @@ static int __init heartbeat_trig_init(void)
 		atomic_notifier_chain_register(&panic_notifier_list,
 					       &heartbeat_panic_nb);
 		register_reboot_notifier(&heartbeat_reboot_nb);
-		register_pm_notifier(&heartbeat_pm_nb);
 	}
 	return rc;
 }
 
 static void __exit heartbeat_trig_exit(void)
 {
-	unregister_pm_notifier(&heartbeat_pm_nb);
 	unregister_reboot_notifier(&heartbeat_reboot_nb);
 	atomic_notifier_chain_unregister(&panic_notifier_list,
 					 &heartbeat_panic_nb);
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 6a4aa608ad95..ddae430b6eae 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -252,8 +252,9 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 	}
 	mutex_unlock(&dev->mlock);
 
-	if (nvm_reserve_luns(dev, s->lun_begin, s->lun_end))
-		return -ENOMEM;
+	ret = nvm_reserve_luns(dev, s->lun_begin, s->lun_end);
+	if (ret)
+		return ret;
 
 	t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
 	if (!t) {
@@ -640,6 +641,7 @@ EXPORT_SYMBOL(nvm_max_phys_sects);
 int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 {
 	struct nvm_dev *dev = tgt_dev->parent;
+	int ret;
 
 	if (!dev->ops->submit_io)
 		return -ENODEV;
@@ -647,7 +649,12 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 	nvm_rq_tgt_to_dev(tgt_dev, rqd);
 
 	rqd->dev = tgt_dev;
-	return dev->ops->submit_io(dev, rqd);
+
+	/* In case of error, fail with right address format */
+	ret = dev->ops->submit_io(dev, rqd);
+	if (ret)
+		nvm_rq_dev_to_tgt(tgt_dev, rqd);
+	return ret;
 }
 EXPORT_SYMBOL(nvm_submit_io);
 
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 59bcea88db84..024a8fc93069 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -31,9 +31,13 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
 	 */
 retry:
 	ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
-	if (ret == NVM_IO_REQUEUE) {
+	switch (ret) {
+	case NVM_IO_REQUEUE:
 		io_schedule();
 		goto retry;
+	case NVM_IO_ERR:
+		pblk_pipeline_stop(pblk);
+		goto out;
 	}
 
 	if (unlikely(!bio_has_data(bio)))
@@ -58,6 +62,8 @@ retry:
 	atomic_long_add(nr_entries, &pblk->req_writes);
 #endif
 
+	pblk_rl_inserted(&pblk->rl, nr_entries);
+
 out:
 	pblk_write_should_kick(pblk);
 	return ret;
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 5e44768ccffa..11fe0c5b2a9c 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -17,7 +17,6 @@
  */
 
 #include "pblk.h"
-#include <linux/time.h>
 
 static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
 			 struct ppa_addr *ppa)
@@ -34,7 +33,7 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
 		pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
 							line->id, pos);
 
-	pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
+	pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, pblk->bb_wq);
 }
 
 static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
@@ -54,6 +53,8 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
 		*ppa = rqd->ppa_addr;
 		pblk_mark_bb(pblk, line, ppa);
 	}
+
+	atomic_dec(&pblk->inflight_io);
 }
 
 /* Erase completion assumes that only one block is erased at the time */
@@ -61,13 +62,12 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
 {
 	struct pblk *pblk = rqd->private;
 
-	up(&pblk->erase_sem);
 	__pblk_end_io_erase(pblk, rqd);
-	mempool_free(rqd, pblk->r_rq_pool);
+	mempool_free(rqd, pblk->g_rq_pool);
 }
 
-static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
-				  u64 paddr)
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+			   u64 paddr)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct list_head *move_list = NULL;
@@ -88,7 +88,7 @@ static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
 		spin_unlock(&line->lock);
 		return;
 	}
-	line->vsc--;
+	le32_add_cpu(line->vsc, -1);
 
 	if (line->state == PBLK_LINESTATE_CLOSED)
 		move_list = pblk_line_gc_list(pblk, line);
@@ -130,18 +130,6 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
 	__pblk_map_invalidate(pblk, line, paddr);
 }
 
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
-			     u64 paddr)
-{
-	__pblk_map_invalidate(pblk, line, paddr);
-
-	pblk_rb_sync_init(&pblk->rwb, NULL);
-	line->left_ssecs--;
-	if (!line->left_ssecs)
-		pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
-	pblk_rb_sync_end(&pblk->rwb, NULL);
-}
-
 static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
 				  unsigned int nr_secs)
 {
@@ -172,8 +160,8 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
 		pool = pblk->w_rq_pool;
 		rq_size = pblk_w_rq_size;
 	} else {
-		pool = pblk->r_rq_pool;
-		rq_size = pblk_r_rq_size;
+		pool = pblk->g_rq_pool;
+		rq_size = pblk_g_rq_size;
 	}
 
 	rqd = mempool_alloc(pool, GFP_KERNEL);
@@ -189,7 +177,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
 	if (rw == WRITE)
 		pool = pblk->w_rq_pool;
 	else
-		pool = pblk->r_rq_pool;
+		pool = pblk->g_rq_pool;
 
 	mempool_free(rqd, pool);
 }
@@ -271,35 +259,26 @@ void pblk_end_io_sync(struct nvm_rq *rqd)
 	complete(waiting);
 }
 
-void pblk_flush_writer(struct pblk *pblk)
+void pblk_wait_for_meta(struct pblk *pblk)
 {
-	struct bio *bio;
-	int ret;
-	DECLARE_COMPLETION_ONSTACK(wait);
-
-	bio = bio_alloc(GFP_KERNEL, 1);
-	if (!bio)
-		return;
-
-	bio->bi_iter.bi_sector = 0; /* internal bio */
-	bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
-	bio->bi_private = &wait;
-	bio->bi_end_io = pblk_end_bio_sync;
+	do {
+		if (!atomic_read(&pblk->inflight_io))
+			break;
 
-	ret = pblk_write_to_cache(pblk, bio, 0);
-	if (ret == NVM_IO_OK) {
-		if (!wait_for_completion_io_timeout(&wait,
-				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
-			pr_err("pblk: flush cache timed out\n");
-		}
-	} else if (ret != NVM_IO_DONE) {
-		pr_err("pblk: tear down bio failed\n");
-	}
+		schedule();
+	} while (1);
+}
 
-	if (bio->bi_error)
-		pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error);
+static void pblk_flush_writer(struct pblk *pblk)
+{
+	pblk_rb_flush(&pblk->rwb);
+	do {
+		if (!pblk_rb_sync_count(&pblk->rwb))
+			break;
 
-	bio_put(bio);
+		pblk_write_kick(pblk);
+		schedule();
+	} while (1);
 }
 
 struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
@@ -307,28 +286,31 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct list_head *move_list = NULL;
+	int vsc = le32_to_cpu(*line->vsc);
 
-	if (!line->vsc) {
+	lockdep_assert_held(&line->lock);
+
+	if (!vsc) {
 		if (line->gc_group != PBLK_LINEGC_FULL) {
 			line->gc_group = PBLK_LINEGC_FULL;
 			move_list = &l_mg->gc_full_list;
 		}
-	} else if (line->vsc < lm->mid_thrs) {
+	} else if (vsc < lm->high_thrs) {
 		if (line->gc_group != PBLK_LINEGC_HIGH) {
 			line->gc_group = PBLK_LINEGC_HIGH;
 			move_list = &l_mg->gc_high_list;
 		}
-	} else if (line->vsc < lm->high_thrs) {
+	} else if (vsc < lm->mid_thrs) {
 		if (line->gc_group != PBLK_LINEGC_MID) {
 			line->gc_group = PBLK_LINEGC_MID;
 			move_list = &l_mg->gc_mid_list;
 		}
-	} else if (line->vsc < line->sec_in_line) {
+	} else if (vsc < line->sec_in_line) {
 		if (line->gc_group != PBLK_LINEGC_LOW) {
 			line->gc_group = PBLK_LINEGC_LOW;
 			move_list = &l_mg->gc_low_list;
 		}
-	} else if (line->vsc == line->sec_in_line) {
+	} else if (vsc == line->sec_in_line) {
 		if (line->gc_group != PBLK_LINEGC_EMPTY) {
 			line->gc_group = PBLK_LINEGC_EMPTY;
 			move_list = &l_mg->gc_empty_list;
@@ -338,7 +320,7 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
 		line->gc_group = PBLK_LINEGC_NONE;
 		move_list =  &l_mg->corrupt_list;
 		pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
-						line->id, line->vsc,
+						line->id, vsc,
 						line->sec_in_line,
 						lm->high_thrs, lm->mid_thrs);
 	}
@@ -397,6 +379,11 @@ void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
 #endif
 }
 
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
+{
+	pblk->sec_per_write = sec_per_write;
+}
+
 int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
@@ -431,21 +418,23 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
 		}
 	}
 #endif
+
+	atomic_inc(&pblk->inflight_io);
+
 	return nvm_submit_io(dev, rqd);
 }
 
 struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
 			      unsigned int nr_secs, unsigned int len,
-			      gfp_t gfp_mask)
+			      int alloc_type, gfp_t gfp_mask)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
-	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	void *kaddr = data;
 	struct page *page;
 	struct bio *bio;
 	int i, ret;
 
-	if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
+	if (alloc_type == PBLK_KMALLOC_META)
 		return bio_map_kern(dev->q, kaddr, len, gfp_mask);
 
 	bio = bio_kmalloc(gfp_mask, nr_secs);
@@ -478,7 +467,7 @@ out:
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
 		   unsigned long secs_to_flush)
 {
-	int max = pblk->max_write_pgs;
+	int max = pblk->sec_per_write;
 	int min = pblk->min_write_pgs;
 	int secs_to_sync = 0;
 
@@ -492,12 +481,26 @@ int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
 	return secs_to_sync;
 }
 
-static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line,
-			     int nr_secs)
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
+{
+	u64 addr;
+	int i;
+
+	addr = find_next_zero_bit(line->map_bitmap,
+					pblk->lm.sec_per_line, line->cur_sec);
+	line->cur_sec = addr - nr_secs;
+
+	for (i = 0; i < nr_secs; i++, line->cur_sec--)
+		WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
+}
+
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
 {
 	u64 addr;
 	int i;
 
+	lockdep_assert_held(&line->lock);
+
 	/* logic error: ppa out-of-bounds. Prevent generating bad address */
 	if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
 		WARN(1, "pblk: page allocation out of bounds\n");
@@ -528,27 +531,38 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
 	return addr;
 }
 
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
+{
+	u64 paddr;
+
+	spin_lock(&line->lock);
+	paddr = find_next_zero_bit(line->map_bitmap,
+					pblk->lm.sec_per_line, line->cur_sec);
+	spin_unlock(&line->lock);
+
+	return paddr;
+}
+
 /*
  * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
  * taking the per LUN semaphore.
  */
 static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
-				     u64 paddr, int dir)
+				     void *emeta_buf, u64 paddr, int dir)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line_meta *lm = &pblk->lm;
+	void *ppa_list, *meta_list;
 	struct bio *bio;
 	struct nvm_rq rqd;
-	struct ppa_addr *ppa_list;
-	dma_addr_t dma_ppa_list;
-	void *emeta = line->emeta;
+	dma_addr_t dma_ppa_list, dma_meta_list;
 	int min = pblk->min_write_pgs;
-	int left_ppas = lm->emeta_sec;
+	int left_ppas = lm->emeta_sec[0];
 	int id = line->id;
 	int rq_ppas, rq_len;
 	int cmd_op, bio_op;
-	int flags;
 	int i, j;
 	int ret;
 	DECLARE_COMPLETION_ONSTACK(wait);
@@ -556,25 +570,28 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
 	if (dir == WRITE) {
 		bio_op = REQ_OP_WRITE;
 		cmd_op = NVM_OP_PWRITE;
-		flags = pblk_set_progr_mode(pblk, WRITE);
 	} else if (dir == READ) {
 		bio_op = REQ_OP_READ;
 		cmd_op = NVM_OP_PREAD;
-		flags = pblk_set_read_mode(pblk);
 	} else
 		return -EINVAL;
 
-	ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list);
-	if (!ppa_list)
+	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+							&dma_meta_list);
+	if (!meta_list)
 		return -ENOMEM;
 
+	ppa_list = meta_list + pblk_dma_meta_size;
+	dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
 next_rq:
 	memset(&rqd, 0, sizeof(struct nvm_rq));
 
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 	rq_len = rq_ppas * geo->sec_size;
 
-	bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL);
+	bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
+					l_mg->emeta_alloc_type, GFP_KERNEL);
 	if (IS_ERR(bio)) {
 		ret = PTR_ERR(bio);
 		goto free_rqd_dma;
@@ -584,27 +601,38 @@ next_rq:
 	bio_set_op_attrs(bio, bio_op, 0);
 
 	rqd.bio = bio;
-	rqd.opcode = cmd_op;
-	rqd.flags = flags;
-	rqd.nr_ppas = rq_ppas;
+	rqd.meta_list = meta_list;
 	rqd.ppa_list = ppa_list;
+	rqd.dma_meta_list = dma_meta_list;
 	rqd.dma_ppa_list = dma_ppa_list;
+	rqd.opcode = cmd_op;
+	rqd.nr_ppas = rq_ppas;
 	rqd.end_io = pblk_end_io_sync;
 	rqd.private = &wait;
 
 	if (dir == WRITE) {
+		struct pblk_sec_meta *meta_list = rqd.meta_list;
+
+		rqd.flags = pblk_set_progr_mode(pblk, WRITE);
 		for (i = 0; i < rqd.nr_ppas; ) {
 			spin_lock(&line->lock);
 			paddr = __pblk_alloc_page(pblk, line, min);
 			spin_unlock(&line->lock);
-			for (j = 0; j < min; j++, i++, paddr++)
+			for (j = 0; j < min; j++, i++, paddr++) {
+				meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
 				rqd.ppa_list[i] =
 					addr_to_gen_ppa(pblk, paddr, id);
+			}
 		}
 	} else {
 		for (i = 0; i < rqd.nr_ppas; ) {
 			struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
 			int pos = pblk_dev_ppa_to_pos(geo, ppa);
+			int read_type = PBLK_READ_RANDOM;
+
+			if (pblk_io_aligned(pblk, rq_ppas))
+				read_type = PBLK_READ_SEQUENTIAL;
+			rqd.flags = pblk_set_read_mode(pblk, read_type);
 
 			while (test_bit(pos, line->blk_bitmap)) {
 				paddr += min;
@@ -645,9 +673,11 @@ next_rq:
 				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
 		pr_err("pblk: emeta I/O timed out\n");
 	}
+	atomic_dec(&pblk->inflight_io);
 	reinit_completion(&wait);
 
-	bio_put(bio);
+	if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META))
+		bio_put(bio);
 
 	if (rqd.error) {
 		if (dir == WRITE)
@@ -656,12 +686,12 @@ next_rq:
 			pblk_log_read_err(pblk, &rqd);
 	}
 
-	emeta += rq_len;
+	emeta_buf += rq_len;
 	left_ppas -= rq_ppas;
 	if (left_ppas)
 		goto next_rq;
 free_rqd_dma:
-	nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list);
+	nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
 	return ret;
 }
 
@@ -697,21 +727,24 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
 		bio_op = REQ_OP_WRITE;
 		cmd_op = NVM_OP_PWRITE;
 		flags = pblk_set_progr_mode(pblk, WRITE);
-		lba_list = pblk_line_emeta_to_lbas(line->emeta);
+		lba_list = emeta_to_lbas(pblk, line->emeta->buf);
 	} else if (dir == READ) {
 		bio_op = REQ_OP_READ;
 		cmd_op = NVM_OP_PREAD;
-		flags = pblk_set_read_mode(pblk);
+		flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
 	} else
 		return -EINVAL;
 
 	memset(&rqd, 0, sizeof(struct nvm_rq));
 
-	rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
-							&rqd.dma_ppa_list);
-	if (!rqd.ppa_list)
+	rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+							&rqd.dma_meta_list);
+	if (!rqd.meta_list)
 		return -ENOMEM;
 
+	rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+	rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
+
 	bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
 	if (IS_ERR(bio)) {
 		ret = PTR_ERR(bio);
@@ -729,9 +762,15 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
 	rqd.private = &wait;
 
 	for (i = 0; i < lm->smeta_sec; i++, paddr++) {
+		struct pblk_sec_meta *meta_list = rqd.meta_list;
+
 		rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
-		if (dir == WRITE)
-			lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
+
+		if (dir == WRITE) {
+			__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+			meta_list[i].lba = lba_list[paddr] = addr_empty;
+		}
 	}
 
 	/*
@@ -750,6 +789,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
 				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
 		pr_err("pblk: smeta I/O timed out\n");
 	}
+	atomic_dec(&pblk->inflight_io);
 
 	if (rqd.error) {
 		if (dir == WRITE)
@@ -759,7 +799,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
 	}
 
 free_ppa_list:
-	nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+	nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
 
 	return ret;
 }
@@ -771,9 +811,11 @@ int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
 	return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
 }
 
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line)
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+			 void *emeta_buf)
 {
-	return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ);
+	return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
+						line->emeta_ssec, READ);
 }
 
 static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -789,7 +831,7 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
 static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
 {
 	struct nvm_rq rqd;
-	int ret;
+	int ret = 0;
 	DECLARE_COMPLETION_ONSTACK(wait);
 
 	memset(&rqd, 0, sizeof(struct nvm_rq));
@@ -824,14 +866,14 @@ out:
 	rqd.private = pblk;
 	__pblk_end_io_erase(pblk, &rqd);
 
-	return 0;
+	return ret;
 }
 
 int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
 {
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct ppa_addr ppa;
-	int bit = -1;
+	int ret, bit = -1;
 
 	/* Erase only good blocks, one at a time */
 	do {
@@ -850,27 +892,59 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
 		WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
 		spin_unlock(&line->lock);
 
-		if (pblk_blk_erase_sync(pblk, ppa)) {
+		ret = pblk_blk_erase_sync(pblk, ppa);
+		if (ret) {
 			pr_err("pblk: failed to erase line %d\n", line->id);
-			return -ENOMEM;
+			return ret;
 		}
 	} while (1);
 
 	return 0;
 }
 
+static void pblk_line_setup_metadata(struct pblk_line *line,
+				     struct pblk_line_mgmt *l_mg,
+				     struct pblk_line_meta *lm)
+{
+	int meta_line;
+
+	lockdep_assert_held(&l_mg->free_lock);
+
+retry_meta:
+	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+	if (meta_line == PBLK_DATA_LINES) {
+		spin_unlock(&l_mg->free_lock);
+		io_schedule();
+		spin_lock(&l_mg->free_lock);
+		goto retry_meta;
+	}
+
+	set_bit(meta_line, &l_mg->meta_bitmap);
+	line->meta_line = meta_line;
+
+	line->smeta = l_mg->sline_meta[meta_line];
+	line->emeta = l_mg->eline_meta[meta_line];
+
+	memset(line->smeta, 0, lm->smeta_len);
+	memset(line->emeta->buf, 0, lm->emeta_len[0]);
+
+	line->emeta->mem = 0;
+	atomic_set(&line->emeta->sync, 0);
+}
+
 /* For now lines are always assumed full lines. Thus, smeta former and current
  * lun bitmaps are omitted.
  */
-static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
+static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
 				  struct pblk_line *cur)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-	struct line_smeta *smeta = line->smeta;
-	struct line_emeta *emeta = line->emeta;
+	struct pblk_emeta *emeta = line->emeta;
+	struct line_emeta *emeta_buf = emeta->buf;
+	struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
 	int nr_blk_line;
 
 	/* After erasing the line, new bad blocks might appear and we risk
@@ -893,42 +967,44 @@ static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
 	}
 
 	/* Run-time metadata */
-	line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta);
+	line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
 
 	/* Mark LUNs allocated in this line (all for now) */
 	bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
 
-	smeta->header.identifier = cpu_to_le32(PBLK_MAGIC);
-	memcpy(smeta->header.uuid, pblk->instance_uuid, 16);
-	smeta->header.id = cpu_to_le32(line->id);
-	smeta->header.type = cpu_to_le16(line->type);
-	smeta->header.version = cpu_to_le16(1);
+	smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
+	memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
+	smeta_buf->header.id = cpu_to_le32(line->id);
+	smeta_buf->header.type = cpu_to_le16(line->type);
+	smeta_buf->header.version = cpu_to_le16(1);
 
 	/* Start metadata */
-	smeta->seq_nr = cpu_to_le64(line->seq_nr);
-	smeta->window_wr_lun = cpu_to_le32(geo->nr_luns);
+	smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+	smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns);
 
 	/* Fill metadata among lines */
 	if (cur) {
 		memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
-		smeta->prev_id = cpu_to_le32(cur->id);
-		cur->emeta->next_id = cpu_to_le32(line->id);
+		smeta_buf->prev_id = cpu_to_le32(cur->id);
+		cur->emeta->buf->next_id = cpu_to_le32(line->id);
 	} else {
-		smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
+		smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
 	}
 
 	/* All smeta must be set at this point */
-	smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta));
-	smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta));
+	smeta_buf->header.crc = cpu_to_le32(
+			pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
+	smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
 
 	/* End metadata */
-	memcpy(&emeta->header, &smeta->header, sizeof(struct line_header));
-	emeta->seq_nr = cpu_to_le64(line->seq_nr);
-	emeta->nr_lbas = cpu_to_le64(line->sec_in_line);
-	emeta->nr_valid_lbas = cpu_to_le64(0);
-	emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
-	emeta->crc = cpu_to_le32(0);
-	emeta->prev_id = smeta->prev_id;
+	memcpy(&emeta_buf->header, &smeta_buf->header,
+						sizeof(struct line_header));
+	emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+	emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
+	emeta_buf->nr_valid_lbas = cpu_to_le64(0);
+	emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
+	emeta_buf->crc = cpu_to_le32(0);
+	emeta_buf->prev_id = smeta_buf->prev_id;
 
 	return 1;
 }
@@ -965,7 +1041,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
 	/* Mark smeta metadata sectors as bad sectors */
 	bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
 	off = bit * geo->sec_per_pl;
-retry_smeta:
 	bitmap_set(line->map_bitmap, off, lm->smeta_sec);
 	line->sec_in_line -= lm->smeta_sec;
 	line->smeta_ssec = off;
@@ -973,8 +1048,7 @@ retry_smeta:
 
 	if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
 		pr_debug("pblk: line smeta I/O failed. Retry\n");
-		off += geo->sec_per_pl;
-		goto retry_smeta;
+		return 1;
 	}
 
 	bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
@@ -983,8 +1057,8 @@ retry_smeta:
 	 * blocks to make sure that there are enough sectors to store emeta
 	 */
 	bit = lm->sec_per_line;
-	off = lm->sec_per_line - lm->emeta_sec;
-	bitmap_set(line->invalid_bitmap, off, lm->emeta_sec);
+	off = lm->sec_per_line - lm->emeta_sec[0];
+	bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
 	while (nr_bb) {
 		off -= geo->sec_per_pl;
 		if (!test_bit(off, line->invalid_bitmap)) {
@@ -993,9 +1067,11 @@ retry_smeta:
 		}
 	}
 
-	line->sec_in_line -= lm->emeta_sec;
+	line->sec_in_line -= lm->emeta_sec[0];
 	line->emeta_ssec = off;
-	line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line;
+	line->nr_valid_lbas = 0;
+	line->left_msecs = line->sec_in_line;
+	*line->vsc = cpu_to_le32(line->sec_in_line);
 
 	if (lm->sec_per_line - line->sec_in_line !=
 		bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
@@ -1034,14 +1110,20 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
 
 	spin_lock(&line->lock);
 	if (line->state != PBLK_LINESTATE_FREE) {
+		mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+		mempool_free(line->map_bitmap, pblk->line_meta_pool);
 		spin_unlock(&line->lock);
-		WARN(1, "pblk: corrupted line state\n");
-		return -EINTR;
+		WARN(1, "pblk: corrupted line %d, state %d\n",
+							line->id, line->state);
+		return -EAGAIN;
 	}
+
 	line->state = PBLK_LINESTATE_OPEN;
 
 	atomic_set(&line->left_eblks, blk_in_line);
 	atomic_set(&line->left_seblks, blk_in_line);
+
+	line->meta_distance = lm->meta_distance;
 	spin_unlock(&line->lock);
 
 	/* Bad blocks do not need to be erased */
@@ -1091,15 +1173,15 @@ struct pblk_line *pblk_line_get(struct pblk *pblk)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line_meta *lm = &pblk->lm;
-	struct pblk_line *line = NULL;
-	int bit;
+	struct pblk_line *line;
+	int ret, bit;
 
 	lockdep_assert_held(&l_mg->free_lock);
 
-retry_get:
+retry:
 	if (list_empty(&l_mg->free_list)) {
 		pr_err("pblk: no free lines\n");
-		goto out;
+		return NULL;
 	}
 
 	line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
@@ -1115,16 +1197,22 @@ retry_get:
 		list_add_tail(&line->list, &l_mg->bad_list);
 
 		pr_debug("pblk: line %d is bad\n", line->id);
-		goto retry_get;
+		goto retry;
 	}
 
-	if (pblk_line_prepare(pblk, line)) {
-		pr_err("pblk: failed to prepare line %d\n", line->id);
-		list_add(&line->list, &l_mg->free_list);
-		return NULL;
+	ret = pblk_line_prepare(pblk, line);
+	if (ret) {
+		if (ret == -EAGAIN) {
+			list_add(&line->list, &l_mg->corrupt_list);
+			goto retry;
+		} else {
+			pr_err("pblk: failed to prepare line %d\n", line->id);
+			list_add(&line->list, &l_mg->free_list);
+			l_mg->nr_free_lines++;
+			return NULL;
+		}
 	}
 
-out:
 	return line;
 }
 
@@ -1134,6 +1222,7 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line *retry_line;
 
+retry:
 	spin_lock(&l_mg->free_lock);
 	retry_line = pblk_line_get(pblk);
 	if (!retry_line) {
@@ -1150,23 +1239,25 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
 	l_mg->data_line = retry_line;
 	spin_unlock(&l_mg->free_lock);
 
-	if (pblk_line_erase(pblk, retry_line)) {
-		spin_lock(&l_mg->free_lock);
-		l_mg->data_line = NULL;
-		spin_unlock(&l_mg->free_lock);
-		return NULL;
-	}
-
 	pblk_rl_free_lines_dec(&pblk->rl, retry_line);
 
+	if (pblk_line_erase(pblk, retry_line))
+		goto retry;
+
 	return retry_line;
 }
 
+static void pblk_set_space_limit(struct pblk *pblk)
+{
+	struct pblk_rl *rl = &pblk->rl;
+
+	atomic_set(&rl->rb_space, 0);
+}
+
 struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line *line;
-	int meta_line;
 	int is_next = 0;
 
 	spin_lock(&l_mg->free_lock);
@@ -1180,30 +1271,37 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
 	line->type = PBLK_LINETYPE_DATA;
 	l_mg->data_line = line;
 
-	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
-	set_bit(meta_line, &l_mg->meta_bitmap);
-	line->smeta = l_mg->sline_meta[meta_line].meta;
-	line->emeta = l_mg->eline_meta[meta_line].meta;
-	line->meta_line = meta_line;
+	pblk_line_setup_metadata(line, l_mg, &pblk->lm);
 
 	/* Allocate next line for preparation */
 	l_mg->data_next = pblk_line_get(pblk);
-	if (l_mg->data_next) {
+	if (!l_mg->data_next) {
+		/* If we cannot get a new line, we need to stop the pipeline.
+		 * Only allow as many writes in as we can store safely and then
+		 * fail gracefully
+		 */
+		pblk_set_space_limit(pblk);
+
+		l_mg->data_next = NULL;
+	} else {
 		l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
 		l_mg->data_next->type = PBLK_LINETYPE_DATA;
 		is_next = 1;
 	}
 	spin_unlock(&l_mg->free_lock);
 
+	if (pblk_line_erase(pblk, line)) {
+		line = pblk_line_retry(pblk, line);
+		if (!line)
+			return NULL;
+	}
+
 	pblk_rl_free_lines_dec(&pblk->rl, line);
 	if (is_next)
 		pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
 
-	if (pblk_line_erase(pblk, line))
-		return NULL;
-
 retry_setup:
-	if (!pblk_line_set_metadata(pblk, line, NULL)) {
+	if (!pblk_line_init_metadata(pblk, line, NULL)) {
 		line = pblk_line_retry(pblk, line);
 		if (!line)
 			return NULL;
@@ -1222,69 +1320,89 @@ retry_setup:
 	return line;
 }
 
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
+static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
+{
+	lockdep_assert_held(&pblk->l_mg.free_lock);
+
+	pblk_set_space_limit(pblk);
+	pblk->state = PBLK_STATE_STOPPING;
+}
+
+void pblk_pipeline_stop(struct pblk *pblk)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	int ret;
+
+	spin_lock(&l_mg->free_lock);
+	if (pblk->state == PBLK_STATE_RECOVERING ||
+					pblk->state == PBLK_STATE_STOPPED) {
+		spin_unlock(&l_mg->free_lock);
+		return;
+	}
+	pblk->state = PBLK_STATE_RECOVERING;
+	spin_unlock(&l_mg->free_lock);
+
+	pblk_flush_writer(pblk);
+	pblk_wait_for_meta(pblk);
+
+	ret = pblk_recov_pad(pblk);
+	if (ret) {
+		pr_err("pblk: could not close data on teardown(%d)\n", ret);
+		return;
+	}
+
+	flush_workqueue(pblk->bb_wq);
+	pblk_line_close_meta_sync(pblk);
+
+	spin_lock(&l_mg->free_lock);
+	pblk->state = PBLK_STATE_STOPPED;
+	l_mg->data_line = NULL;
+	l_mg->data_next = NULL;
+	spin_unlock(&l_mg->free_lock);
+}
+
+void pblk_line_replace_data(struct pblk *pblk)
 {
-	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line *cur, *new;
 	unsigned int left_seblks;
-	int meta_line;
 	int is_next = 0;
 
 	cur = l_mg->data_line;
 	new = l_mg->data_next;
 	if (!new)
-		return NULL;
+		return;
 	l_mg->data_line = new;
 
-retry_line:
+	spin_lock(&l_mg->free_lock);
+	if (pblk->state != PBLK_STATE_RUNNING) {
+		l_mg->data_line = NULL;
+		l_mg->data_next = NULL;
+		spin_unlock(&l_mg->free_lock);
+		return;
+	}
+
+	pblk_line_setup_metadata(new, l_mg, &pblk->lm);
+	spin_unlock(&l_mg->free_lock);
+
+retry_erase:
 	left_seblks = atomic_read(&new->left_seblks);
 	if (left_seblks) {
 		/* If line is not fully erased, erase it */
 		if (atomic_read(&new->left_eblks)) {
 			if (pblk_line_erase(pblk, new))
-				return NULL;
+				return;
 		} else {
 			io_schedule();
 		}
-		goto retry_line;
+		goto retry_erase;
 	}
 
-	spin_lock(&l_mg->free_lock);
-	/* Allocate next line for preparation */
-	l_mg->data_next = pblk_line_get(pblk);
-	if (l_mg->data_next) {
-		l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
-		l_mg->data_next->type = PBLK_LINETYPE_DATA;
-		is_next = 1;
-	}
-
-retry_meta:
-	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
-	if (meta_line == PBLK_DATA_LINES) {
-		spin_unlock(&l_mg->free_lock);
-		io_schedule();
-		spin_lock(&l_mg->free_lock);
-		goto retry_meta;
-	}
-
-	set_bit(meta_line, &l_mg->meta_bitmap);
-	new->smeta = l_mg->sline_meta[meta_line].meta;
-	new->emeta = l_mg->eline_meta[meta_line].meta;
-	new->meta_line = meta_line;
-
-	memset(new->smeta, 0, lm->smeta_len);
-	memset(new->emeta, 0, lm->emeta_len);
-	spin_unlock(&l_mg->free_lock);
-
-	if (is_next)
-		pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
-
 retry_setup:
-	if (!pblk_line_set_metadata(pblk, new, cur)) {
+	if (!pblk_line_init_metadata(pblk, new, cur)) {
 		new = pblk_line_retry(pblk, new);
 		if (!new)
-			return NULL;
+			return;
 
 		goto retry_setup;
 	}
@@ -1292,12 +1410,30 @@ retry_setup:
 	if (!pblk_line_init_bb(pblk, new, 1)) {
 		new = pblk_line_retry(pblk, new);
 		if (!new)
-			return NULL;
+			return;
 
 		goto retry_setup;
 	}
 
-	return new;
+	/* Allocate next line for preparation */
+	spin_lock(&l_mg->free_lock);
+	l_mg->data_next = pblk_line_get(pblk);
+	if (!l_mg->data_next) {
+		/* If we cannot get a new line, we need to stop the pipeline.
+		 * Only allow as many writes in as we can store safely and then
+		 * fail gracefully
+		 */
+		pblk_stop_writes(pblk, new);
+		l_mg->data_next = NULL;
+	} else {
+		l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
+		l_mg->data_next->type = PBLK_LINETYPE_DATA;
+		is_next = 1;
+	}
+	spin_unlock(&l_mg->free_lock);
+
+	if (is_next)
+		pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
 }
 
 void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
@@ -1307,6 +1443,8 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
 	if (line->invalid_bitmap)
 		mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
 
+	*line->vsc = cpu_to_le32(EMPTY_ENTRY);
+
 	line->map_bitmap = NULL;
 	line->invalid_bitmap = NULL;
 	line->smeta = NULL;
@@ -1339,8 +1477,8 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
 	struct nvm_rq *rqd;
 	int err;
 
-	rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL);
-	memset(rqd, 0, pblk_r_rq_size);
+	rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL);
+	memset(rqd, 0, pblk_g_rq_size);
 
 	pblk_setup_e_rq(pblk, rqd, ppa);
 
@@ -1368,7 +1506,8 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk)
 	return pblk->l_mg.data_line;
 }
 
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk)
+/* For now, always erase next line */
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
 {
 	return pblk->l_mg.data_next;
 }
@@ -1378,18 +1517,58 @@ int pblk_line_is_full(struct pblk_line *line)
 	return (line->left_msecs == 0);
 }
 
+void pblk_line_close_meta_sync(struct pblk *pblk)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line *line, *tline;
+	LIST_HEAD(list);
+
+	spin_lock(&l_mg->close_lock);
+	if (list_empty(&l_mg->emeta_list)) {
+		spin_unlock(&l_mg->close_lock);
+		return;
+	}
+
+	list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev);
+	spin_unlock(&l_mg->close_lock);
+
+	list_for_each_entry_safe(line, tline, &list, list) {
+		struct pblk_emeta *emeta = line->emeta;
+
+		while (emeta->mem < lm->emeta_len[0]) {
+			int ret;
+
+			ret = pblk_submit_meta_io(pblk, line);
+			if (ret) {
+				pr_err("pblk: sync meta line %d failed (%d)\n",
+							line->id, ret);
+				return;
+			}
+		}
+	}
+
+	pblk_wait_for_meta(pblk);
+	flush_workqueue(pblk->close_wq);
+}
+
+static void pblk_line_should_sync_meta(struct pblk *pblk)
+{
+	if (pblk_rl_is_limit(&pblk->rl))
+		pblk_line_close_meta_sync(pblk);
+}
+
 void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct list_head *move_list;
 
-	line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta));
-
-	if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
-		pr_err("pblk: line %d close I/O failed\n", line->id);
+#ifdef CONFIG_NVM_DEBUG
+	struct pblk_line_meta *lm = &pblk->lm;
 
-	WARN(!bitmap_full(line->map_bitmap, line->sec_in_line),
+	WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
 				"pblk: corrupt closed line %d\n", line->id);
+#endif
 
 	spin_lock(&l_mg->free_lock);
 	WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
@@ -1410,6 +1589,31 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
 
 	spin_unlock(&line->lock);
 	spin_unlock(&l_mg->gc_lock);
+
+	pblk_gc_should_kick(pblk);
+}
+
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_emeta *emeta = line->emeta;
+	struct line_emeta *emeta_buf = emeta->buf;
+
+	/* No need for exact vsc value; avoid a big line lock and take aprox. */
+	memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
+	memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
+
+	emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
+	emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
+
+	spin_lock(&l_mg->close_lock);
+	spin_lock(&line->lock);
+	list_add_tail(&line->list, &l_mg->emeta_list);
+	spin_unlock(&line->lock);
+	spin_unlock(&l_mg->close_lock);
+
+	pblk_line_should_sync_meta(pblk);
 }
 
 void pblk_line_close_ws(struct work_struct *work)
@@ -1449,7 +1653,8 @@ void pblk_line_mark_bb(struct work_struct *work)
 }
 
 void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
-		      void (*work)(struct work_struct *))
+		      void (*work)(struct work_struct *),
+		      struct workqueue_struct *wq)
 {
 	struct pblk_line_ws *line_ws;
 
@@ -1462,7 +1667,7 @@ void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
 	line_ws->priv = priv;
 
 	INIT_WORK(&line_ws->ws, work);
-	queue_work(pblk->kw_wq, &line_ws->ws);
+	queue_work(wq, &line_ws->ws);
 }
 
 void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -1471,7 +1676,7 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct pblk_lun *rlun;
-	int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun;
+	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
 	int ret;
 
 	/*
@@ -1488,10 +1693,10 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 	/* If the LUN has been locked for this same request, do no attempt to
 	 * lock it again
 	 */
-	if (test_and_set_bit(lun_id, lun_bitmap))
+	if (test_and_set_bit(pos, lun_bitmap))
 		return;
 
-	rlun = &pblk->luns[lun_id];
+	rlun = &pblk->luns[pos];
 	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
 	if (ret) {
 		switch (ret) {
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index eaf479c6b63c..6090d28f7995 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -20,8 +20,7 @@
 
 static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
 {
-	kfree(gc_rq->data);
-	kfree(gc_rq->lba_list);
+	vfree(gc_rq->data);
 	kfree(gc_rq);
 }
 
@@ -37,10 +36,8 @@ static int pblk_gc_write(struct pblk *pblk)
 		return 1;
 	}
 
-	list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) {
-		list_move_tail(&gc_rq->list, &w_list);
-		gc->w_entries--;
-	}
+	list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
+	gc->w_entries = 0;
 	spin_unlock(&gc->w_lock);
 
 	list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
@@ -48,9 +45,8 @@ static int pblk_gc_write(struct pblk *pblk)
 				gc_rq->nr_secs, gc_rq->secs_to_gc,
 				gc_rq->line, PBLK_IOTYPE_GC);
 
-		kref_put(&gc_rq->line->ref, pblk_line_put);
-
 		list_del(&gc_rq->list);
+		kref_put(&gc_rq->line->ref, pblk_line_put);
 		pblk_gc_free_gc_rq(gc_rq);
 	}
 
@@ -66,52 +62,41 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc)
  * Responsible for managing all memory related to a gc request. Also in case of
  * failure
  */
-static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
-				   u64 *lba_list, unsigned int nr_secs)
+static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct pblk_gc *gc = &pblk->gc;
-	struct pblk_gc_rq *gc_rq;
+	struct pblk_line *line = gc_rq->line;
 	void *data;
 	unsigned int secs_to_gc;
-	int ret = NVM_IO_OK;
+	int ret = 0;
 
-	data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL);
+	data = vmalloc(gc_rq->nr_secs * geo->sec_size);
 	if (!data) {
-		ret = NVM_IO_ERR;
-		goto free_lba_list;
+		ret = -ENOMEM;
+		goto out;
 	}
 
 	/* Read from GC victim block */
-	if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs,
+	if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
 							&secs_to_gc, line)) {
-		ret = NVM_IO_ERR;
+		ret = -EFAULT;
 		goto free_data;
 	}
 
 	if (!secs_to_gc)
-		goto free_data;
-
-	gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
-	if (!gc_rq) {
-		ret = NVM_IO_ERR;
-		goto free_data;
-	}
+		goto free_rq;
 
-	gc_rq->line = line;
 	gc_rq->data = data;
-	gc_rq->lba_list = lba_list;
-	gc_rq->nr_secs = nr_secs;
 	gc_rq->secs_to_gc = secs_to_gc;
 
-	kref_get(&line->ref);
-
 retry:
 	spin_lock(&gc->w_lock);
-	if (gc->w_entries > 256) {
+	if (gc->w_entries >= PBLK_GC_W_QD) {
 		spin_unlock(&gc->w_lock);
-		usleep_range(256, 1024);
+		pblk_gc_writer_kick(&pblk->gc);
+		usleep_range(128, 256);
 		goto retry;
 	}
 	gc->w_entries++;
@@ -120,13 +105,14 @@ retry:
 
 	pblk_gc_writer_kick(&pblk->gc);
 
-	return NVM_IO_OK;
+	return 0;
 
+free_rq:
+	kfree(gc_rq);
 free_data:
-	kfree(data);
-free_lba_list:
-	kfree(lba_list);
-
+	vfree(data);
+out:
+	kref_put(&line->ref, pblk_line_put);
 	return ret;
 }
 
@@ -150,140 +136,206 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
 
 static void pblk_gc_line_ws(struct work_struct *work)
 {
+	struct pblk_line_ws *line_rq_ws = container_of(work,
+						struct pblk_line_ws, ws);
+	struct pblk *pblk = line_rq_ws->pblk;
+	struct pblk_gc *gc = &pblk->gc;
+	struct pblk_line *line = line_rq_ws->line;
+	struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
+
+	up(&gc->gc_sem);
+
+	if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
+		pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
+						line->id, *line->vsc,
+						gc_rq->nr_secs);
+	}
+
+	mempool_free(line_rq_ws, pblk->line_ws_pool);
+}
+
+static void pblk_gc_line_prepare_ws(struct work_struct *work)
+{
 	struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
 									ws);
 	struct pblk *pblk = line_ws->pblk;
-	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line *line = line_ws->line;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line_meta *lm = &pblk->lm;
-	__le64 *lba_list = line_ws->priv;
-	u64 *gc_list;
-	int sec_left;
-	int nr_ppas, bit;
-	int put_line = 1;
+	struct pblk_gc *gc = &pblk->gc;
+	struct line_emeta *emeta_buf;
+	struct pblk_line_ws *line_rq_ws;
+	struct pblk_gc_rq *gc_rq;
+	__le64 *lba_list;
+	int sec_left, nr_secs, bit;
+	int ret;
 
-	pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
+	emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
+								GFP_KERNEL);
+	if (!emeta_buf) {
+		pr_err("pblk: cannot use GC emeta\n");
+		return;
+	}
 
-	spin_lock(&line->lock);
-	sec_left = line->vsc;
-	if (!sec_left) {
-		/* Lines are erased before being used (l_mg->data_/log_next) */
-		spin_unlock(&line->lock);
-		goto out;
+	ret = pblk_line_read_emeta(pblk, line, emeta_buf);
+	if (ret) {
+		pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
+		goto fail_free_emeta;
+	}
+
+	/* If this read fails, it means that emeta is corrupted. For now, leave
+	 * the line untouched. TODO: Implement a recovery routine that scans and
+	 * moves all sectors on the line.
+	 */
+	lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
+	if (!lba_list) {
+		pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
+		goto fail_free_emeta;
 	}
-	spin_unlock(&line->lock);
 
+	sec_left = pblk_line_vsc(line);
 	if (sec_left < 0) {
 		pr_err("pblk: corrupted GC line (%d)\n", line->id);
-		put_line = 0;
-		pblk_put_line_back(pblk, line);
-		goto out;
+		goto fail_free_emeta;
 	}
 
 	bit = -1;
 next_rq:
-	gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL);
-	if (!gc_list) {
-		put_line = 0;
-		pblk_put_line_back(pblk, line);
-		goto out;
-	}
+	gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
+	if (!gc_rq)
+		goto fail_free_emeta;
 
-	nr_ppas = 0;
+	nr_secs = 0;
 	do {
 		bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
 								bit + 1);
 		if (bit > line->emeta_ssec)
 			break;
 
-		gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]);
-	} while (nr_ppas < pblk->max_write_pgs);
+		gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
+	} while (nr_secs < pblk->max_write_pgs);
 
-	if (unlikely(!nr_ppas)) {
-		kfree(gc_list);
+	if (unlikely(!nr_secs)) {
+		kfree(gc_rq);
 		goto out;
 	}
 
-	if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) {
-		pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
-						line->id, line->vsc,
-						nr_ppas, nr_ppas);
-		put_line = 0;
-		pblk_put_line_back(pblk, line);
-		goto out;
-	}
+	gc_rq->nr_secs = nr_secs;
+	gc_rq->line = line;
+
+	line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+	if (!line_rq_ws)
+		goto fail_free_gc_rq;
 
-	sec_left -= nr_ppas;
+	line_rq_ws->pblk = pblk;
+	line_rq_ws->line = line;
+	line_rq_ws->priv = gc_rq;
+
+	down(&gc->gc_sem);
+	kref_get(&line->ref);
+
+	INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
+	queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
+
+	sec_left -= nr_secs;
 	if (sec_left > 0)
 		goto next_rq;
 
 out:
-	pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
+	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
 	mempool_free(line_ws, pblk->line_ws_pool);
-	atomic_dec(&pblk->gc.inflight_gc);
-	if (put_line)
-		kref_put(&line->ref, pblk_line_put);
+
+	kref_put(&line->ref, pblk_line_put);
+	atomic_dec(&gc->inflight_gc);
+
+	return;
+
+fail_free_gc_rq:
+	kfree(gc_rq);
+fail_free_emeta:
+	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+	pblk_put_line_back(pblk, line);
+	kref_put(&line->ref, pblk_line_put);
+	mempool_free(line_ws, pblk->line_ws_pool);
+	atomic_dec(&gc->inflight_gc);
+
+	pr_err("pblk: Failed to GC line %d\n", line->id);
 }
 
 static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
 {
-	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_gc *gc = &pblk->gc;
 	struct pblk_line_ws *line_ws;
-	__le64 *lba_list;
-	int ret;
 
-	line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
-	line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
-								GFP_KERNEL);
-	if (!line->emeta) {
-		pr_err("pblk: cannot use GC emeta\n");
-		goto fail_free_ws;
-	}
-
-	ret = pblk_line_read_emeta(pblk, line);
-	if (ret) {
-		pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
-		goto fail_free_emeta;
-	}
+	pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
 
-	/* If this read fails, it means that emeta is corrupted. For now, leave
-	 * the line untouched. TODO: Implement a recovery routine that scans and
-	 * moves all sectors on the line.
-	 */
-	lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
-	if (!lba_list) {
-		pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
-		goto fail_free_emeta;
-	}
+	line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+	if (!line_ws)
+		return -ENOMEM;
 
 	line_ws->pblk = pblk;
 	line_ws->line = line;
-	line_ws->priv = lba_list;
 
-	INIT_WORK(&line_ws->ws, pblk_gc_line_ws);
-	queue_work(pblk->gc.gc_reader_wq, &line_ws->ws);
+	INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
+	queue_work(gc->gc_reader_wq, &line_ws->ws);
 
 	return 0;
+}
 
-fail_free_emeta:
-	pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
-fail_free_ws:
-	mempool_free(line_ws, pblk->line_ws_pool);
-	pblk_put_line_back(pblk, line);
+static int pblk_gc_read(struct pblk *pblk)
+{
+	struct pblk_gc *gc = &pblk->gc;
+	struct pblk_line *line;
+
+	spin_lock(&gc->r_lock);
+	if (list_empty(&gc->r_list)) {
+		spin_unlock(&gc->r_lock);
+		return 1;
+	}
+
+	line = list_first_entry(&gc->r_list, struct pblk_line, list);
+	list_del(&line->list);
+	spin_unlock(&gc->r_lock);
+
+	pblk_gc_kick(pblk);
 
-	return 1;
+	if (pblk_gc_line(pblk, line))
+		pr_err("pblk: failed to GC line %d\n", line->id);
+
+	return 0;
 }
 
-static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list)
+static void pblk_gc_reader_kick(struct pblk_gc *gc)
 {
-	struct pblk_line *line, *tline;
+	wake_up_process(gc->gc_reader_ts);
+}
 
-	list_for_each_entry_safe(line, tline, gc_list, list) {
-		if (pblk_gc_line(pblk, line))
-			pr_err("pblk: failed to GC line %d\n", line->id);
-		list_del(&line->list);
+static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
+						 struct list_head *group_list)
+{
+	struct pblk_line *line, *victim;
+	int line_vsc, victim_vsc;
+
+	victim = list_first_entry(group_list, struct pblk_line, list);
+	list_for_each_entry(line, group_list, list) {
+		line_vsc = le32_to_cpu(*line->vsc);
+		victim_vsc = le32_to_cpu(*victim->vsc);
+		if (line_vsc < victim_vsc)
+			victim = line;
 	}
+
+	return victim;
+}
+
+static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
+{
+	unsigned int nr_blocks_free, nr_blocks_need;
+
+	nr_blocks_need = pblk_rl_high_thrs(rl);
+	nr_blocks_free = pblk_rl_nr_free_blks(rl);
+
+	/* This is not critical, no need to take lock here */
+	return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
 }
 
 /*
@@ -296,71 +348,83 @@ static void pblk_gc_run(struct pblk *pblk)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_gc *gc = &pblk->gc;
-	struct pblk_line *line, *tline;
-	unsigned int nr_blocks_free, nr_blocks_need;
+	struct pblk_line *line;
 	struct list_head *group_list;
-	int run_gc, gc_group = 0;
-	int prev_gc = 0;
-	int inflight_gc = atomic_read(&gc->inflight_gc);
-	LIST_HEAD(gc_list);
+	bool run_gc;
+	int inflight_gc, gc_group = 0, prev_group = 0;
+
+	do {
+		spin_lock(&l_mg->gc_lock);
+		if (list_empty(&l_mg->gc_full_list)) {
+			spin_unlock(&l_mg->gc_lock);
+			break;
+		}
+
+		line = list_first_entry(&l_mg->gc_full_list,
+							struct pblk_line, list);
 
-	spin_lock(&l_mg->gc_lock);
-	list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
 		spin_lock(&line->lock);
 		WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
 		line->state = PBLK_LINESTATE_GC;
 		spin_unlock(&line->lock);
 
 		list_del(&line->list);
+		spin_unlock(&l_mg->gc_lock);
+
 		kref_put(&line->ref, pblk_line_put);
-	}
-	spin_unlock(&l_mg->gc_lock);
+	} while (1);
 
-	nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl);
-	nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl);
-	run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
+	run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+	if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
+		return;
 
 next_gc_group:
 	group_list = l_mg->gc_lists[gc_group++];
-	spin_lock(&l_mg->gc_lock);
-	while (run_gc && !list_empty(group_list)) {
-		/* No need to queue up more GC lines than we can handle */
-		if (!run_gc || inflight_gc > gc->gc_jobs_active) {
+
+	do {
+		spin_lock(&l_mg->gc_lock);
+		if (list_empty(group_list)) {
 			spin_unlock(&l_mg->gc_lock);
-			pblk_gc_lines(pblk, &gc_list);
-			return;
+			break;
 		}
 
-		line = list_first_entry(group_list, struct pblk_line, list);
-		nr_blocks_free += atomic_read(&line->blk_in_line);
+		line = pblk_gc_get_victim_line(pblk, group_list);
 
 		spin_lock(&line->lock);
 		WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
 		line->state = PBLK_LINESTATE_GC;
-		list_move_tail(&line->list, &gc_list);
-		atomic_inc(&gc->inflight_gc);
-		inflight_gc++;
 		spin_unlock(&line->lock);
 
-		prev_gc = 1;
-		run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
-	}
-	spin_unlock(&l_mg->gc_lock);
+		list_del(&line->list);
+		spin_unlock(&l_mg->gc_lock);
+
+		spin_lock(&gc->r_lock);
+		list_add_tail(&line->list, &gc->r_list);
+		spin_unlock(&gc->r_lock);
 
-	pblk_gc_lines(pblk, &gc_list);
+		inflight_gc = atomic_inc_return(&gc->inflight_gc);
+		pblk_gc_reader_kick(gc);
 
-	if (!prev_gc && pblk->rl.rb_state > gc_group &&
-						gc_group < PBLK_NR_GC_LISTS)
+		prev_group = 1;
+
+		/* No need to queue up more GC lines than we can handle */
+		run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+		if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
+			break;
+	} while (1);
+
+	if (!prev_group && pblk->rl.rb_state > gc_group &&
+						gc_group < PBLK_GC_NR_LISTS)
 		goto next_gc_group;
 }
 
-
-static void pblk_gc_kick(struct pblk *pblk)
+void pblk_gc_kick(struct pblk *pblk)
 {
 	struct pblk_gc *gc = &pblk->gc;
 
 	wake_up_process(gc->gc_ts);
 	pblk_gc_writer_kick(gc);
+	pblk_gc_reader_kick(gc);
 	mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
 }
 
@@ -398,42 +462,34 @@ static int pblk_gc_writer_ts(void *data)
 	return 0;
 }
 
-static void pblk_gc_start(struct pblk *pblk)
+static int pblk_gc_reader_ts(void *data)
 {
-	pblk->gc.gc_active = 1;
+	struct pblk *pblk = data;
 
-	pr_debug("pblk: gc start\n");
+	while (!kthread_should_stop()) {
+		if (!pblk_gc_read(pblk))
+			continue;
+		set_current_state(TASK_INTERRUPTIBLE);
+		io_schedule();
+	}
+
+	return 0;
 }
 
-int pblk_gc_status(struct pblk *pblk)
+static void pblk_gc_start(struct pblk *pblk)
 {
-	struct pblk_gc *gc = &pblk->gc;
-	int ret;
-
-	spin_lock(&gc->lock);
-	ret = gc->gc_active;
-	spin_unlock(&gc->lock);
-
-	return ret;
+	pblk->gc.gc_active = 1;
+	pr_debug("pblk: gc start\n");
 }
 
-static void __pblk_gc_should_start(struct pblk *pblk)
+void pblk_gc_should_start(struct pblk *pblk)
 {
 	struct pblk_gc *gc = &pblk->gc;
 
-	lockdep_assert_held(&gc->lock);
-
 	if (gc->gc_enabled && !gc->gc_active)
 		pblk_gc_start(pblk);
-}
 
-void pblk_gc_should_start(struct pblk *pblk)
-{
-	struct pblk_gc *gc = &pblk->gc;
-
-	spin_lock(&gc->lock);
-	__pblk_gc_should_start(pblk);
-	spin_unlock(&gc->lock);
+	pblk_gc_kick(pblk);
 }
 
 /*
@@ -442,10 +498,7 @@ void pblk_gc_should_start(struct pblk *pblk)
  */
 static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
 {
-	spin_lock(&pblk->gc.lock);
 	pblk->gc.gc_active = 0;
-	spin_unlock(&pblk->gc.lock);
-
 	pr_debug("pblk: gc stop\n");
 }
 
@@ -468,20 +521,25 @@ void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
 	spin_unlock(&gc->lock);
 }
 
-void pblk_gc_sysfs_force(struct pblk *pblk, int force)
+int pblk_gc_sysfs_force(struct pblk *pblk, int force)
 {
 	struct pblk_gc *gc = &pblk->gc;
-	int rsv = 0;
+
+	if (force < 0 || force > 1)
+		return -EINVAL;
 
 	spin_lock(&gc->lock);
-	if (force) {
-		gc->gc_enabled = 1;
-		rsv = 64;
-	}
-	pblk_rl_set_gc_rsc(&pblk->rl, rsv);
 	gc->gc_forced = force;
-	__pblk_gc_should_start(pblk);
+
+	if (force)
+		gc->gc_enabled = 1;
+	else
+		gc->gc_enabled = 0;
 	spin_unlock(&gc->lock);
+
+	pblk_gc_should_start(pblk);
+
+	return 0;
 }
 
 int pblk_gc_init(struct pblk *pblk)
@@ -503,30 +561,58 @@ int pblk_gc_init(struct pblk *pblk)
 		goto fail_free_main_kthread;
 	}
 
+	gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
+							"pblk-gc-reader-ts");
+	if (IS_ERR(gc->gc_reader_ts)) {
+		pr_err("pblk: could not allocate GC reader kthread\n");
+		ret = PTR_ERR(gc->gc_reader_ts);
+		goto fail_free_writer_kthread;
+	}
+
 	setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
 	mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
 
 	gc->gc_active = 0;
 	gc->gc_forced = 0;
 	gc->gc_enabled = 1;
-	gc->gc_jobs_active = 8;
 	gc->w_entries = 0;
 	atomic_set(&gc->inflight_gc, 0);
 
-	gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq",
-			WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active);
+	/* Workqueue that reads valid sectors from a line and submit them to the
+	 * GC writer to be recycled.
+	 */
+	gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
+			WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
+	if (!gc->gc_line_reader_wq) {
+		pr_err("pblk: could not allocate GC line reader workqueue\n");
+		ret = -ENOMEM;
+		goto fail_free_reader_kthread;
+	}
+
+	/* Workqueue that prepare lines for GC */
+	gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
+					WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
 	if (!gc->gc_reader_wq) {
 		pr_err("pblk: could not allocate GC reader workqueue\n");
 		ret = -ENOMEM;
-		goto fail_free_writer_kthread;
+		goto fail_free_reader_line_wq;
 	}
 
 	spin_lock_init(&gc->lock);
 	spin_lock_init(&gc->w_lock);
+	spin_lock_init(&gc->r_lock);
+
+	sema_init(&gc->gc_sem, 128);
+
 	INIT_LIST_HEAD(&gc->w_list);
+	INIT_LIST_HEAD(&gc->r_list);
 
 	return 0;
 
+fail_free_reader_line_wq:
+	destroy_workqueue(gc->gc_line_reader_wq);
+fail_free_reader_kthread:
+	kthread_stop(gc->gc_reader_ts);
 fail_free_writer_kthread:
 	kthread_stop(gc->gc_writer_ts);
 fail_free_main_kthread:
@@ -540,6 +626,7 @@ void pblk_gc_exit(struct pblk *pblk)
 	struct pblk_gc *gc = &pblk->gc;
 
 	flush_workqueue(gc->gc_reader_wq);
+	flush_workqueue(gc->gc_line_reader_wq);
 
 	del_timer(&gc->gc_timer);
 	pblk_gc_stop(pblk, 1);
@@ -547,9 +634,15 @@ void pblk_gc_exit(struct pblk *pblk)
 	if (gc->gc_ts)
 		kthread_stop(gc->gc_ts);
 
-	if (pblk->gc.gc_reader_wq)
-		destroy_workqueue(pblk->gc.gc_reader_wq);
+	if (gc->gc_reader_wq)
+		destroy_workqueue(gc->gc_reader_wq);
+
+	if (gc->gc_line_reader_wq)
+		destroy_workqueue(gc->gc_line_reader_wq);
 
 	if (gc->gc_writer_ts)
 		kthread_stop(gc->gc_writer_ts);
+
+	if (gc->gc_reader_ts)
+		kthread_stop(gc->gc_reader_ts);
 }
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index ae8cd6d5af8b..1b0f61233c21 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -20,9 +20,10 @@
 
 #include "pblk.h"
 
-static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache,
-					*pblk_w_rq_cache, *pblk_line_meta_cache;
+static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
+				*pblk_w_rq_cache, *pblk_line_meta_cache;
 static DECLARE_RWSEM(pblk_lock);
+struct bio_set *pblk_bio_set;
 
 static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
 			  struct bio *bio)
@@ -33,7 +34,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
 	 * constraint. Writes can be of arbitrary size.
 	 */
 	if (bio_data_dir(bio) == READ) {
-		blk_queue_split(q, &bio, q->bio_split);
+		blk_queue_split(q, &bio);
 		ret = pblk_submit_read(pblk, bio);
 		if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
 			bio_put(bio);
@@ -46,7 +47,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
 	 * available for user I/O.
 	 */
 	if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
-		blk_queue_split(q, &bio, q->bio_split);
+		blk_queue_split(q, &bio);
 
 	return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
 }
@@ -199,9 +200,9 @@ static int pblk_init_global_caches(struct pblk *pblk)
 		return -ENOMEM;
 	}
 
-	pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size,
+	pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
 				0, 0, NULL);
-	if (!pblk_r_rq_cache) {
+	if (!pblk_g_rq_cache) {
 		kmem_cache_destroy(pblk_blk_ws_cache);
 		kmem_cache_destroy(pblk_rec_cache);
 		up_write(&pblk_lock);
@@ -213,7 +214,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
 	if (!pblk_w_rq_cache) {
 		kmem_cache_destroy(pblk_blk_ws_cache);
 		kmem_cache_destroy(pblk_rec_cache);
-		kmem_cache_destroy(pblk_r_rq_cache);
+		kmem_cache_destroy(pblk_g_rq_cache);
 		up_write(&pblk_lock);
 		return -ENOMEM;
 	}
@@ -225,7 +226,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
 	if (!pblk_line_meta_cache) {
 		kmem_cache_destroy(pblk_blk_ws_cache);
 		kmem_cache_destroy(pblk_rec_cache);
-		kmem_cache_destroy(pblk_r_rq_cache);
+		kmem_cache_destroy(pblk_g_rq_cache);
 		kmem_cache_destroy(pblk_w_rq_cache);
 		up_write(&pblk_lock);
 		return -ENOMEM;
@@ -239,27 +240,10 @@ static int pblk_core_init(struct pblk *pblk)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
-	int max_write_ppas;
-	int mod;
 
-	pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
-	max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
-	pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
-				max_write_ppas : nvm_max_phys_sects(dev);
 	pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
 						geo->nr_planes * geo->nr_luns;
 
-	if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
-		pr_err("pblk: cannot support device max_phys_sect\n");
-		return -EINVAL;
-	}
-
-	div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
-	if (mod) {
-		pr_err("pblk: bad configuration of sectors/pages\n");
-		return -EINVAL;
-	}
-
 	if (pblk_init_global_caches(pblk))
 		return -ENOMEM;
 
@@ -267,7 +251,7 @@ static int pblk_core_init(struct pblk *pblk)
 	if (!pblk->page_pool)
 		return -ENOMEM;
 
-	pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns,
+	pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE,
 							pblk_blk_ws_cache);
 	if (!pblk->line_ws_pool)
 		goto free_page_pool;
@@ -276,41 +260,51 @@ static int pblk_core_init(struct pblk *pblk)
 	if (!pblk->rec_pool)
 		goto free_blk_ws_pool;
 
-	pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache);
-	if (!pblk->r_rq_pool)
+	pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE,
+							pblk_g_rq_cache);
+	if (!pblk->g_rq_pool)
 		goto free_rec_pool;
 
-	pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache);
+	pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2,
+							pblk_w_rq_cache);
 	if (!pblk->w_rq_pool)
-		goto free_r_rq_pool;
+		goto free_g_rq_pool;
 
 	pblk->line_meta_pool =
-			mempool_create_slab_pool(16, pblk_line_meta_cache);
+			mempool_create_slab_pool(PBLK_META_POOL_SIZE,
+							pblk_line_meta_cache);
 	if (!pblk->line_meta_pool)
 		goto free_w_rq_pool;
 
-	pblk->kw_wq = alloc_workqueue("pblk-aux-wq",
-					WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
-	if (!pblk->kw_wq)
+	pblk->close_wq = alloc_workqueue("pblk-close-wq",
+			WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
+	if (!pblk->close_wq)
 		goto free_line_meta_pool;
 
+	pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
+			WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+	if (!pblk->bb_wq)
+		goto free_close_wq;
+
 	if (pblk_set_ppaf(pblk))
-		goto free_kw_wq;
+		goto free_bb_wq;
 
 	if (pblk_rwb_init(pblk))
-		goto free_kw_wq;
+		goto free_bb_wq;
 
 	INIT_LIST_HEAD(&pblk->compl_list);
 	return 0;
 
-free_kw_wq:
-	destroy_workqueue(pblk->kw_wq);
+free_bb_wq:
+	destroy_workqueue(pblk->bb_wq);
+free_close_wq:
+	destroy_workqueue(pblk->close_wq);
 free_line_meta_pool:
 	mempool_destroy(pblk->line_meta_pool);
 free_w_rq_pool:
 	mempool_destroy(pblk->w_rq_pool);
-free_r_rq_pool:
-	mempool_destroy(pblk->r_rq_pool);
+free_g_rq_pool:
+	mempool_destroy(pblk->g_rq_pool);
 free_rec_pool:
 	mempool_destroy(pblk->rec_pool);
 free_blk_ws_pool:
@@ -322,19 +316,22 @@ free_page_pool:
 
 static void pblk_core_free(struct pblk *pblk)
 {
-	if (pblk->kw_wq)
-		destroy_workqueue(pblk->kw_wq);
+	if (pblk->close_wq)
+		destroy_workqueue(pblk->close_wq);
+
+	if (pblk->bb_wq)
+		destroy_workqueue(pblk->bb_wq);
 
 	mempool_destroy(pblk->page_pool);
 	mempool_destroy(pblk->line_ws_pool);
 	mempool_destroy(pblk->rec_pool);
-	mempool_destroy(pblk->r_rq_pool);
+	mempool_destroy(pblk->g_rq_pool);
 	mempool_destroy(pblk->w_rq_pool);
 	mempool_destroy(pblk->line_meta_pool);
 
 	kmem_cache_destroy(pblk_blk_ws_cache);
 	kmem_cache_destroy(pblk_rec_cache);
-	kmem_cache_destroy(pblk_r_rq_cache);
+	kmem_cache_destroy(pblk_g_rq_cache);
 	kmem_cache_destroy(pblk_w_rq_cache);
 	kmem_cache_destroy(pblk_line_meta_cache);
 }
@@ -344,6 +341,12 @@ static void pblk_luns_free(struct pblk *pblk)
 	kfree(pblk->luns);
 }
 
+static void pblk_free_line_bitmaps(struct pblk_line *line)
+{
+	kfree(line->blk_bitmap);
+	kfree(line->erase_bitmap);
+}
+
 static void pblk_lines_free(struct pblk *pblk)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -355,8 +358,7 @@ static void pblk_lines_free(struct pblk *pblk)
 		line = &pblk->lines[i];
 
 		pblk_line_free(pblk, line);
-		kfree(line->blk_bitmap);
-		kfree(line->erase_bitmap);
+		pblk_free_line_bitmaps(line);
 	}
 	spin_unlock(&l_mg->free_lock);
 }
@@ -368,11 +370,15 @@ static void pblk_line_meta_free(struct pblk *pblk)
 
 	kfree(l_mg->bb_template);
 	kfree(l_mg->bb_aux);
+	kfree(l_mg->vsc_list);
 
+	spin_lock(&l_mg->free_lock);
 	for (i = 0; i < PBLK_DATA_LINES; i++) {
-		pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
-		pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
+		kfree(l_mg->sline_meta[i]);
+		pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
+		kfree(l_mg->eline_meta[i]);
 	}
+	spin_unlock(&l_mg->free_lock);
 
 	kfree(pblk->lines);
 }
@@ -411,13 +417,31 @@ out:
 	return ret;
 }
 
-static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
+static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line,
+			int blk_per_line)
 {
-	struct pblk_line_meta *lm = &pblk->lm;
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
 	struct pblk_lun *rlun;
 	int bb_cnt = 0;
 	int i;
 
+	for (i = 0; i < blk_per_line; i++) {
+		rlun = &pblk->luns[i];
+		if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
+			continue;
+
+		set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap);
+		bb_cnt++;
+	}
+
+	return bb_cnt;
+}
+
+static int pblk_alloc_line_bitmaps(struct pblk *pblk, struct pblk_line *line)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+
 	line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
 	if (!line->blk_bitmap)
 		return -ENOMEM;
@@ -428,16 +452,7 @@ static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < lm->blk_per_line; i++) {
-		rlun = &pblk->luns[i];
-		if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
-			continue;
-
-		set_bit(i, line->blk_bitmap);
-		bb_cnt++;
-	}
-
-	return bb_cnt;
+	return 0;
 }
 
 static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
@@ -505,12 +520,32 @@ static int pblk_lines_configure(struct pblk *pblk, int flags)
 }
 
 /* See comment over struct line_emeta definition */
-static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm)
+static unsigned int calc_emeta_len(struct pblk *pblk)
 {
-	return (sizeof(struct line_emeta) +
-			((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) +
-			(pblk->l_mg.nr_lines * sizeof(u32)) +
-			lm->blk_bitmap_len);
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+
+	/* Round to sector size so that lba_list starts on its own sector */
+	lm->emeta_sec[1] = DIV_ROUND_UP(
+			sizeof(struct line_emeta) + lm->blk_bitmap_len,
+			geo->sec_size);
+	lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size;
+
+	/* Round to sector size so that vsc_list starts on its own sector */
+	lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
+	lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
+			geo->sec_size);
+	lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size;
+
+	lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
+			geo->sec_size);
+	lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size;
+
+	lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
+
+	return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
 }
 
 static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
@@ -534,6 +569,78 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
 	atomic_set(&pblk->rl.free_blocks, nr_free_blks);
 }
 
+static int pblk_lines_alloc_metadata(struct pblk *pblk)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line_meta *lm = &pblk->lm;
+	int i;
+
+	/* smeta is always small enough to fit on a kmalloc memory allocation,
+	 * emeta depends on the number of LUNs allocated to the pblk instance
+	 */
+	for (i = 0; i < PBLK_DATA_LINES; i++) {
+		l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
+		if (!l_mg->sline_meta[i])
+			goto fail_free_smeta;
+	}
+
+	/* emeta allocates three different buffers for managing metadata with
+	 * in-memory and in-media layouts
+	 */
+	for (i = 0; i < PBLK_DATA_LINES; i++) {
+		struct pblk_emeta *emeta;
+
+		emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
+		if (!emeta)
+			goto fail_free_emeta;
+
+		if (lm->emeta_len[0] > KMALLOC_MAX_CACHE_SIZE) {
+			l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
+
+			emeta->buf = vmalloc(lm->emeta_len[0]);
+			if (!emeta->buf) {
+				kfree(emeta);
+				goto fail_free_emeta;
+			}
+
+			emeta->nr_entries = lm->emeta_sec[0];
+			l_mg->eline_meta[i] = emeta;
+		} else {
+			l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
+
+			emeta->buf = kmalloc(lm->emeta_len[0], GFP_KERNEL);
+			if (!emeta->buf) {
+				kfree(emeta);
+				goto fail_free_emeta;
+			}
+
+			emeta->nr_entries = lm->emeta_sec[0];
+			l_mg->eline_meta[i] = emeta;
+		}
+	}
+
+	l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
+	if (!l_mg->vsc_list)
+		goto fail_free_emeta;
+
+	for (i = 0; i < l_mg->nr_lines; i++)
+		l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
+
+	return 0;
+
+fail_free_emeta:
+	while (--i >= 0) {
+		vfree(l_mg->eline_meta[i]->buf);
+		kfree(l_mg->eline_meta[i]);
+	}
+
+fail_free_smeta:
+	for (i = 0; i < PBLK_DATA_LINES; i++)
+		kfree(l_mg->sline_meta[i]);
+
+	return -ENOMEM;
+}
+
 static int pblk_lines_init(struct pblk *pblk)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
@@ -542,10 +649,32 @@ static int pblk_lines_init(struct pblk *pblk)
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_line *line;
 	unsigned int smeta_len, emeta_len;
-	long nr_bad_blks, nr_meta_blks, nr_free_blks;
-	int bb_distance;
-	int i;
-	int ret;
+	long nr_bad_blks, nr_free_blks;
+	int bb_distance, max_write_ppas, mod;
+	int i, ret;
+
+	pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
+	max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
+	pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
+				max_write_ppas : nvm_max_phys_sects(dev);
+	pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
+
+	if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
+		pr_err("pblk: cannot support device max_phys_sect\n");
+		return -EINVAL;
+	}
+
+	div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
+	if (mod) {
+		pr_err("pblk: bad configuration of sectors/pages\n");
+		return -EINVAL;
+	}
+
+	l_mg->nr_lines = geo->blks_per_lun;
+	l_mg->log_line = l_mg->data_line = NULL;
+	l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
+	l_mg->nr_free_lines = 0;
+	bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
 
 	lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
 	lm->blk_per_line = geo->nr_luns;
@@ -554,20 +683,17 @@ static int pblk_lines_init(struct pblk *pblk)
 	lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
 	lm->high_thrs = lm->sec_per_line / 2;
 	lm->mid_thrs = lm->sec_per_line / 4;
+	lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs;
 
 	/* Calculate necessary pages for smeta. See comment over struct
 	 * line_smeta definition
 	 */
-	lm->smeta_len = sizeof(struct line_smeta) +
-				PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
-
 	i = 1;
 add_smeta_page:
 	lm->smeta_sec = i * geo->sec_per_pl;
 	lm->smeta_len = lm->smeta_sec * geo->sec_size;
 
-	smeta_len = sizeof(struct line_smeta) +
-				PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
+	smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
 	if (smeta_len > lm->smeta_len) {
 		i++;
 		goto add_smeta_page;
@@ -578,66 +704,28 @@ add_smeta_page:
 	 */
 	i = 1;
 add_emeta_page:
-	lm->emeta_sec = i * geo->sec_per_pl;
-	lm->emeta_len = lm->emeta_sec * geo->sec_size;
+	lm->emeta_sec[0] = i * geo->sec_per_pl;
+	lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size;
 
-	emeta_len = calc_emeta_len(pblk, lm);
-	if (emeta_len > lm->emeta_len) {
+	emeta_len = calc_emeta_len(pblk);
+	if (emeta_len > lm->emeta_len[0]) {
 		i++;
 		goto add_emeta_page;
 	}
-	lm->emeta_bb = geo->nr_luns - i;
-
-	nr_meta_blks = (lm->smeta_sec + lm->emeta_sec +
-				(geo->sec_per_blk / 2)) / geo->sec_per_blk;
-	lm->min_blk_line = nr_meta_blks + 1;
-
-	l_mg->nr_lines = geo->blks_per_lun;
-	l_mg->log_line = l_mg->data_line = NULL;
-	l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
-	l_mg->nr_free_lines = 0;
-	bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
 
-	/* smeta is always small enough to fit on a kmalloc memory allocation,
-	 * emeta depends on the number of LUNs allocated to the pblk instance
-	 */
-	l_mg->smeta_alloc_type = PBLK_KMALLOC_META;
-	for (i = 0; i < PBLK_DATA_LINES; i++) {
-		l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL);
-		if (!l_mg->sline_meta[i].meta)
-			while (--i >= 0) {
-				kfree(l_mg->sline_meta[i].meta);
-				ret = -ENOMEM;
-				goto fail;
-			}
+	lm->emeta_bb = geo->nr_luns - i;
+	lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0],
+							geo->sec_per_blk);
+	if (lm->min_blk_line > lm->blk_per_line) {
+		pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
+							lm->blk_per_line);
+		ret = -EINVAL;
+		goto fail;
 	}
 
-	if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) {
-		l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
-
-		for (i = 0; i < PBLK_DATA_LINES; i++) {
-			l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len);
-			if (!l_mg->eline_meta[i].meta)
-				while (--i >= 0) {
-					vfree(l_mg->eline_meta[i].meta);
-					ret = -ENOMEM;
-					goto fail;
-				}
-		}
-	} else {
-		l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
-
-		for (i = 0; i < PBLK_DATA_LINES; i++) {
-			l_mg->eline_meta[i].meta =
-					kmalloc(lm->emeta_len, GFP_KERNEL);
-			if (!l_mg->eline_meta[i].meta)
-				while (--i >= 0) {
-					kfree(l_mg->eline_meta[i].meta);
-					ret = -ENOMEM;
-					goto fail;
-				}
-		}
-	}
+	ret = pblk_lines_alloc_metadata(pblk);
+	if (ret)
+		goto fail;
 
 	l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
 	if (!l_mg->bb_template) {
@@ -664,11 +752,14 @@ add_emeta_page:
 	INIT_LIST_HEAD(&l_mg->gc_low_list);
 	INIT_LIST_HEAD(&l_mg->gc_empty_list);
 
+	INIT_LIST_HEAD(&l_mg->emeta_list);
+
 	l_mg->gc_lists[0] = &l_mg->gc_high_list;
 	l_mg->gc_lists[1] = &l_mg->gc_mid_list;
 	l_mg->gc_lists[2] = &l_mg->gc_low_list;
 
 	spin_lock_init(&l_mg->free_lock);
+	spin_lock_init(&l_mg->close_lock);
 	spin_lock_init(&l_mg->gc_lock);
 
 	pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
@@ -689,10 +780,16 @@ add_emeta_page:
 		line->type = PBLK_LINETYPE_FREE;
 		line->state = PBLK_LINESTATE_FREE;
 		line->gc_group = PBLK_LINEGC_NONE;
+		line->vsc = &l_mg->vsc_list[i];
 		spin_lock_init(&line->lock);
 
-		nr_bad_blks = pblk_bb_line(pblk, line);
+		ret = pblk_alloc_line_bitmaps(pblk, line);
+		if (ret)
+			goto fail_free_lines;
+
+		nr_bad_blks = pblk_bb_line(pblk, line, lm->blk_per_line);
 		if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
+			pblk_free_line_bitmaps(line);
 			ret = -EINVAL;
 			goto fail_free_lines;
 		}
@@ -713,24 +810,20 @@ add_emeta_page:
 
 	pblk_set_provision(pblk, nr_free_blks);
 
-	sema_init(&pblk->erase_sem, 1);
-
 	/* Cleanup per-LUN bad block lists - managed within lines on run-time */
 	for (i = 0; i < geo->nr_luns; i++)
 		kfree(pblk->luns[i].bb_list);
 
 	return 0;
 fail_free_lines:
-	kfree(pblk->lines);
+	while (--i >= 0)
+		pblk_free_line_bitmaps(&pblk->lines[i]);
 fail_free_bb_aux:
 	kfree(l_mg->bb_aux);
 fail_free_bb_template:
 	kfree(l_mg->bb_template);
 fail_free_meta:
-	for (i = 0; i < PBLK_DATA_LINES; i++) {
-		pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
-		pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
-	}
+	pblk_line_meta_free(pblk);
 fail:
 	for (i = 0; i < geo->nr_luns; i++)
 		kfree(pblk->luns[i].bb_list);
@@ -754,6 +847,15 @@ static int pblk_writer_init(struct pblk *pblk)
 
 static void pblk_writer_stop(struct pblk *pblk)
 {
+	/* The pipeline must be stopped and the write buffer emptied before the
+	 * write thread is stopped
+	 */
+	WARN(pblk_rb_read_count(&pblk->rwb),
+			"Stopping not fully persisted write buffer\n");
+
+	WARN(pblk_rb_sync_count(&pblk->rwb),
+			"Stopping not fully synced write buffer\n");
+
 	if (pblk->writer_ts)
 		kthread_stop(pblk->writer_ts);
 	del_timer(&pblk->wtimer);
@@ -772,10 +874,9 @@ static void pblk_free(struct pblk *pblk)
 
 static void pblk_tear_down(struct pblk *pblk)
 {
-	pblk_flush_writer(pblk);
+	pblk_pipeline_stop(pblk);
 	pblk_writer_stop(pblk);
 	pblk_rb_sync_l2p(&pblk->rwb);
-	pblk_recov_pad(pblk);
 	pblk_rwb_free(pblk);
 	pblk_rl_free(&pblk->rl);
 
@@ -821,6 +922,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 
 	pblk->dev = dev;
 	pblk->disk = tdisk;
+	pblk->state = PBLK_STATE_RUNNING;
 
 	spin_lock_init(&pblk->trans_lock);
 	spin_lock_init(&pblk->lock);
@@ -836,8 +938,8 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 	atomic_long_set(&pblk->req_writes, 0);
 	atomic_long_set(&pblk->sub_writes, 0);
 	atomic_long_set(&pblk->sync_writes, 0);
-	atomic_long_set(&pblk->compl_writes, 0);
 	atomic_long_set(&pblk->inflight_reads, 0);
+	atomic_long_set(&pblk->cache_reads, 0);
 	atomic_long_set(&pblk->sync_reads, 0);
 	atomic_long_set(&pblk->recov_writes, 0);
 	atomic_long_set(&pblk->recov_writes, 0);
@@ -946,11 +1048,20 @@ static struct nvm_tgt_type tt_pblk = {
 
 static int __init pblk_module_init(void)
 {
-	return nvm_register_tgt_type(&tt_pblk);
+	int ret;
+
+	pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
+	if (!pblk_bio_set)
+		return -ENOMEM;
+	ret = nvm_register_tgt_type(&tt_pblk);
+	if (ret)
+		bioset_free(pblk_bio_set);
+	return ret;
 }
 
 static void pblk_module_exit(void)
 {
+	bioset_free(pblk_bio_set);
 	nvm_unregister_tgt_type(&tt_pblk);
 }
 
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 17c16955284d..fddb924f6dde 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -25,9 +25,9 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
 			       unsigned int valid_secs)
 {
 	struct pblk_line *line = pblk_line_get_data(pblk);
-	struct line_emeta *emeta = line->emeta;
+	struct pblk_emeta *emeta = line->emeta;
 	struct pblk_w_ctx *w_ctx;
-	__le64 *lba_list = pblk_line_emeta_to_lbas(emeta);
+	__le64 *lba_list = emeta_to_lbas(pblk, emeta->buf);
 	u64 paddr;
 	int nr_secs = pblk->min_write_pgs;
 	int i;
@@ -51,18 +51,20 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
 			w_ctx->ppa = ppa_list[i];
 			meta_list[i].lba = cpu_to_le64(w_ctx->lba);
 			lba_list[paddr] = cpu_to_le64(w_ctx->lba);
-			le64_add_cpu(&line->emeta->nr_valid_lbas, 1);
+			line->nr_valid_lbas++;
 		} else {
-			meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
-			lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
-			pblk_map_pad_invalidate(pblk, line, paddr);
+			__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+			lba_list[paddr] = meta_list[i].lba = addr_empty;
+			__pblk_map_invalidate(pblk, line, paddr);
 		}
 	}
 
 	if (pblk_line_is_full(line)) {
-		line = pblk_line_replace_data(pblk);
-		if (!line)
-			return;
+		struct pblk_line *prev_line = line;
+
+		pblk_line_replace_data(pblk);
+		pblk_line_close_meta(pblk, prev_line);
 	}
 
 	pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
@@ -91,8 +93,9 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
-	struct pblk_line *e_line = pblk_line_get_data_next(pblk);
+	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	struct pblk_line *e_line, *d_line;
 	unsigned int map_secs;
 	int min = pblk->min_write_pgs;
 	int i, erase_lun;
@@ -102,35 +105,63 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
 		pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
 					lun_bitmap, &meta_list[i], map_secs);
 
-		erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls +
-							rqd->ppa_list[i].g.ch;
+		erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
 
-		if (!test_bit(erase_lun, e_line->erase_bitmap)) {
-			if (down_trylock(&pblk->erase_sem))
-				continue;
+		/* line can change after page map. We might also be writing the
+		 * last line.
+		 */
+		e_line = pblk_line_get_erase(pblk);
+		if (!e_line)
+			return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
+							valid_secs, i + min);
 
+		spin_lock(&e_line->lock);
+		if (!test_bit(erase_lun, e_line->erase_bitmap)) {
 			set_bit(erase_lun, e_line->erase_bitmap);
 			atomic_dec(&e_line->left_eblks);
+
 			*erase_ppa = rqd->ppa_list[i];
 			erase_ppa->g.blk = e_line->id;
 
+			spin_unlock(&e_line->lock);
+
 			/* Avoid evaluating e_line->left_eblks */
 			return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
 							valid_secs, i + min);
 		}
+		spin_unlock(&e_line->lock);
 	}
 
-	/* Erase blocks that are bad in this line but might not be in next */
-	if (unlikely(ppa_empty(*erase_ppa))) {
-		struct pblk_line_meta *lm = &pblk->lm;
+	d_line = pblk_line_get_data(pblk);
+
+	/* line can change after page map. We might also be writing the
+	 * last line.
+	 */
+	e_line = pblk_line_get_erase(pblk);
+	if (!e_line)
+		return;
 
-		i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line);
-		if (i == lm->blk_per_line)
+	/* Erase blocks that are bad in this line but might not be in next */
+	if (unlikely(ppa_empty(*erase_ppa)) &&
+			bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
+		int bit = -1;
+
+retry:
+		bit = find_next_bit(d_line->blk_bitmap,
+						lm->blk_per_line, bit + 1);
+		if (bit >= lm->blk_per_line)
 			return;
 
-		set_bit(i, e_line->erase_bitmap);
+		spin_lock(&e_line->lock);
+		if (test_bit(bit, e_line->erase_bitmap)) {
+			spin_unlock(&e_line->lock);
+			goto retry;
+		}
+		spin_unlock(&e_line->lock);
+
+		set_bit(bit, e_line->erase_bitmap);
 		atomic_dec(&e_line->left_eblks);
-		*erase_ppa = pblk->luns[i].bppa; /* set ch and lun */
+		*erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
 		erase_ppa->g.blk = e_line->id;
 	}
 }
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index 045384ddc1f9..5ecc154f6831 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -150,6 +150,7 @@ try:
 	/* Release flags on context. Protect from writes and reads */
 	smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
 	pblk_ppa_set_empty(&w_ctx->ppa);
+	w_ctx->lba = ADDR_EMPTY;
 }
 
 #define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
@@ -180,6 +181,14 @@ unsigned int pblk_rb_read_count(struct pblk_rb *rb)
 	return pblk_rb_ring_count(mem, subm, rb->nr_entries);
 }
 
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
+{
+	unsigned int mem = READ_ONCE(rb->mem);
+	unsigned int sync = READ_ONCE(rb->sync);
+
+	return pblk_rb_ring_count(mem, sync, rb->nr_entries);
+}
+
 unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
 {
 	unsigned int subm;
@@ -199,12 +208,22 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
 	struct pblk_line *line;
 	struct pblk_rb_entry *entry;
 	struct pblk_w_ctx *w_ctx;
+	unsigned int user_io = 0, gc_io = 0;
 	unsigned int i;
+	int flags;
 
 	for (i = 0; i < to_update; i++) {
 		entry = &rb->entries[*l2p_upd];
 		w_ctx = &entry->w_ctx;
 
+		flags = READ_ONCE(entry->w_ctx.flags);
+		if (flags & PBLK_IOTYPE_USER)
+			user_io++;
+		else if (flags & PBLK_IOTYPE_GC)
+			gc_io++;
+		else
+			WARN(1, "pblk: unknown IO type\n");
+
 		pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
 							entry->cacheline);
 
@@ -214,6 +233,8 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
 		*l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1);
 	}
 
+	pblk_rl_out(&pblk->rl, user_io, gc_io);
+
 	return 0;
 }
 
@@ -357,6 +378,9 @@ static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio,
 	/* Protect syncs */
 	smp_store_release(&rb->sync_point, sync_point);
 
+	if (!bio)
+		return 0;
+
 	spin_lock_irq(&rb->s_lock);
 	bio_list_add(&entry->w_ctx.bios, bio);
 	spin_unlock_irq(&rb->s_lock);
@@ -395,6 +419,17 @@ static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
 	return 1;
 }
 
+void pblk_rb_flush(struct pblk_rb *rb)
+{
+	struct pblk *pblk = container_of(rb, struct pblk, rwb);
+	unsigned int mem = READ_ONCE(rb->mem);
+
+	if (pblk_rb_sync_point_set(rb, NULL, mem))
+		return;
+
+	pblk_write_should_kick(pblk);
+}
+
 static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
 				   unsigned int *pos, struct bio *bio,
 				   int *io_ret)
@@ -431,15 +466,16 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
 			   unsigned int nr_entries, unsigned int *pos)
 {
 	struct pblk *pblk = container_of(rb, struct pblk, rwb);
-	int flush_done;
+	int io_ret;
 
 	spin_lock(&rb->w_lock);
-	if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) {
+	io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
+	if (io_ret) {
 		spin_unlock(&rb->w_lock);
-		return NVM_IO_REQUEUE;
+		return io_ret;
 	}
 
-	if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) {
+	if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
 		spin_unlock(&rb->w_lock);
 		return NVM_IO_REQUEUE;
 	}
@@ -447,7 +483,7 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
 	pblk_rl_user_in(&pblk->rl, nr_entries);
 	spin_unlock(&rb->w_lock);
 
-	return flush_done;
+	return io_ret;
 }
 
 /*
@@ -521,20 +557,18 @@ out:
  * This function is used by the write thread to form the write bio that will
  * persist data on the write buffer to the media.
  */
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
-				 struct pblk_c_ctx *c_ctx,
-				 unsigned int pos,
-				 unsigned int nr_entries,
-				 unsigned int count)
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+				 struct bio *bio, unsigned int pos,
+				 unsigned int nr_entries, unsigned int count)
 {
 	struct pblk *pblk = container_of(rb, struct pblk, rwb);
+	struct request_queue *q = pblk->dev->q;
+	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
 	struct pblk_rb_entry *entry;
 	struct page *page;
-	unsigned int pad = 0, read = 0, to_read = nr_entries;
-	unsigned int user_io = 0, gc_io = 0;
+	unsigned int pad = 0, to_read = nr_entries;
 	unsigned int i;
 	int flags;
-	int ret;
 
 	if (count < nr_entries) {
 		pad = nr_entries - count;
@@ -553,15 +587,10 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
 		 */
 try:
 		flags = READ_ONCE(entry->w_ctx.flags);
-		if (!(flags & PBLK_WRITTEN_DATA))
+		if (!(flags & PBLK_WRITTEN_DATA)) {
+			io_schedule();
 			goto try;
-
-		if (flags & PBLK_IOTYPE_USER)
-			user_io++;
-		else if (flags & PBLK_IOTYPE_GC)
-			gc_io++;
-		else
-			WARN(1, "pblk: unknown IO type\n");
+		}
 
 		page = virt_to_page(entry->data);
 		if (!page) {
@@ -570,17 +599,17 @@ try:
 			flags |= PBLK_SUBMITTED_ENTRY;
 			/* Release flags on context. Protect from writes */
 			smp_store_release(&entry->w_ctx.flags, flags);
-			goto out;
+			return NVM_IO_ERR;
 		}
 
-		ret = bio_add_page(bio, page, rb->seg_size, 0);
-		if (ret != rb->seg_size) {
+		if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
+								rb->seg_size) {
 			pr_err("pblk: could not add page to write bio\n");
 			flags &= ~PBLK_WRITTEN_DATA;
 			flags |= PBLK_SUBMITTED_ENTRY;
 			/* Release flags on context. Protect from writes */
 			smp_store_release(&entry->w_ctx.flags, flags);
-			goto out;
+			return NVM_IO_ERR;
 		}
 
 		if (flags & PBLK_FLUSH_ENTRY) {
@@ -607,14 +636,19 @@ try:
 		pos = (pos + 1) & (rb->nr_entries - 1);
 	}
 
-	read = to_read;
-	pblk_rl_out(&pblk->rl, user_io, gc_io);
+	if (pad) {
+		if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
+			pr_err("pblk: could not pad page in write bio\n");
+			return NVM_IO_ERR;
+		}
+	}
+
 #ifdef CONFIG_NVM_DEBUG
 	atomic_long_add(pad, &((struct pblk *)
 			(container_of(rb, struct pblk, rwb)))->padded_writes);
 #endif
-out:
-	return read;
+
+	return NVM_IO_OK;
 }
 
 /*
@@ -623,15 +657,17 @@ out:
  * be directed to disk.
  */
 int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
-			u64 pos, int bio_iter)
+			struct ppa_addr ppa, int bio_iter)
 {
+	struct pblk *pblk = container_of(rb, struct pblk, rwb);
 	struct pblk_rb_entry *entry;
 	struct pblk_w_ctx *w_ctx;
+	struct ppa_addr l2p_ppa;
+	u64 pos = pblk_addr_to_cacheline(ppa);
 	void *data;
 	int flags;
 	int ret = 1;
 
-	spin_lock(&rb->w_lock);
 
 #ifdef CONFIG_NVM_DEBUG
 	/* Caller must ensure that the access will not cause an overflow */
@@ -641,8 +677,14 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
 	w_ctx = &entry->w_ctx;
 	flags = READ_ONCE(w_ctx->flags);
 
+	spin_lock(&rb->w_lock);
+	spin_lock(&pblk->trans_lock);
+	l2p_ppa = pblk_trans_map_get(pblk, lba);
+	spin_unlock(&pblk->trans_lock);
+
 	/* Check if the entry has been overwritten or is scheduled to be */
-	if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) {
+	if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
+						flags & PBLK_WRITABLE_ENTRY) {
 		ret = 0;
 		goto out;
 	}
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 4a12f14d78c6..4e5c48f3de62 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -34,8 +34,7 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
 	BUG_ON(!pblk_addr_in_cache(ppa));
 #endif
 
-	return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba,
-					pblk_addr_to_cacheline(ppa), bio_iter);
+	return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, bio_iter);
 }
 
 static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -76,6 +75,9 @@ retry:
 			}
 			WARN_ON(test_and_set_bit(i, read_bitmap));
 			advanced_bio = 1;
+#ifdef CONFIG_NVM_DEBUG
+			atomic_long_inc(&pblk->cache_reads);
+#endif
 		} else {
 			/* Read from media non-cached sectors */
 			rqd->ppa_list[j++] = p;
@@ -85,6 +87,11 @@ retry:
 			bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
 	}
 
+	if (pblk_io_aligned(pblk, nr_secs))
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+	else
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
 #ifdef CONFIG_NVM_DEBUG
 	atomic_long_add(nr_secs, &pblk->inflight_reads);
 #endif
@@ -94,8 +101,6 @@ static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
 {
 	int err;
 
-	rqd->flags = pblk_set_read_mode(pblk);
-
 	err = pblk_submit_io(pblk, rqd);
 	if (err)
 		return NVM_IO_ERR;
@@ -107,27 +112,27 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
 {
 	struct pblk *pblk = rqd->private;
 	struct nvm_tgt_dev *dev = pblk->dev;
-	struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+	struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
 	struct bio *bio = rqd->bio;
 
 	if (rqd->error)
 		pblk_log_read_err(pblk, rqd);
 #ifdef CONFIG_NVM_DEBUG
 	else
-		WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n");
+		WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
 #endif
 
-	if (rqd->nr_ppas > 1)
-		nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
+	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 
 	bio_put(bio);
-	if (r_ctx->orig_bio) {
+	if (r_ctx->private) {
+		struct bio *orig_bio = r_ctx->private;
+
 #ifdef CONFIG_NVM_DEBUG
-		WARN_ONCE(r_ctx->orig_bio->bi_error,
-						"pblk: corrupted read bio\n");
+		WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n");
 #endif
-		bio_endio(r_ctx->orig_bio);
-		bio_put(r_ctx->orig_bio);
+		bio_endio(orig_bio);
+		bio_put(orig_bio);
 	}
 
 #ifdef CONFIG_NVM_DEBUG
@@ -136,6 +141,7 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
 #endif
 
 	pblk_free_rqd(pblk, rqd, READ);
+	atomic_dec(&pblk->inflight_io);
 }
 
 static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
@@ -173,6 +179,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 
 	rqd->bio = new_bio;
 	rqd->nr_ppas = nr_holes;
+	rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
 	rqd->end_io = NULL;
 
 	if (unlikely(nr_secs > 1 && nr_holes == 1)) {
@@ -280,9 +287,14 @@ retry:
 			goto retry;
 		}
 		WARN_ON(test_and_set_bit(0, read_bitmap));
+#ifdef CONFIG_NVM_DEBUG
+			atomic_long_inc(&pblk->cache_reads);
+#endif
 	} else {
 		rqd->ppa_addr = ppa;
 	}
+
+	rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
 }
 
 int pblk_submit_read(struct pblk *pblk, struct bio *bio)
@@ -316,13 +328,16 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 	 */
 	bio_init_idx = pblk_get_bi_idx(bio);
 
+	rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+							&rqd->dma_meta_list);
+	if (!rqd->meta_list) {
+		pr_err("pblk: not able to allocate ppa list\n");
+		goto fail_rqd_free;
+	}
+
 	if (nr_secs > 1) {
-		rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
-						&rqd->dma_ppa_list);
-		if (!rqd->ppa_list) {
-			pr_err("pblk: not able to allocate ppa list\n");
-			goto fail_rqd_free;
-		}
+		rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
+		rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
 
 		pblk_read_ppalist_rq(pblk, rqd, &read_bitmap);
 	} else {
@@ -332,6 +347,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 	bio_get(bio);
 	if (bitmap_full(&read_bitmap, nr_secs)) {
 		bio_endio(bio);
+		atomic_inc(&pblk->inflight_io);
 		pblk_end_io_read(rqd);
 		return NVM_IO_OK;
 	}
@@ -339,17 +355,17 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 	/* All sectors are to be read from the device */
 	if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) {
 		struct bio *int_bio = NULL;
-		struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+		struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
 
 		/* Clone read bio to deal with read errors internally */
-		int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);
+		int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
 		if (!int_bio) {
 			pr_err("pblk: could not clone read bio\n");
 			return NVM_IO_ERR;
 		}
 
 		rqd->bio = int_bio;
-		r_ctx->orig_bio = bio;
+		r_ctx->private = bio;
 
 		ret = pblk_submit_read_io(pblk, rqd);
 		if (ret) {
@@ -445,7 +461,6 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
-	struct request_queue *q = dev->q;
 	struct bio *bio;
 	struct nvm_rq rqd;
 	int ret, data_len;
@@ -453,22 +468,19 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 
 	memset(&rqd, 0, sizeof(struct nvm_rq));
 
+	rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+							&rqd.dma_meta_list);
+	if (!rqd.meta_list)
+		return NVM_IO_ERR;
+
 	if (nr_secs > 1) {
-		rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
-							&rqd.dma_ppa_list);
-		if (!rqd.ppa_list)
-			return NVM_IO_ERR;
+		rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+		rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
 
 		*secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list,
 								nr_secs);
-		if (*secs_to_gc == 1) {
-			struct ppa_addr ppa;
-
-			ppa = rqd.ppa_list[0];
-			nvm_dev_dma_free(dev->parent, rqd.ppa_list,
-							rqd.dma_ppa_list);
-			rqd.ppa_addr = ppa;
-		}
+		if (*secs_to_gc == 1)
+			rqd.ppa_addr = rqd.ppa_list[0];
 	} else {
 		*secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]);
 	}
@@ -477,7 +489,8 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 		goto out;
 
 	data_len = (*secs_to_gc) * geo->sec_size;
-	bio = bio_map_kern(q, data, data_len, GFP_KERNEL);
+	bio = pblk_bio_map_addr(pblk, data, *secs_to_gc, data_len,
+						PBLK_KMALLOC_META, GFP_KERNEL);
 	if (IS_ERR(bio)) {
 		pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio));
 		goto err_free_dma;
@@ -490,6 +503,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 	rqd.end_io = pblk_end_io_sync;
 	rqd.private = &wait;
 	rqd.nr_ppas = *secs_to_gc;
+	rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
 	rqd.bio = bio;
 
 	ret = pblk_submit_read_io(pblk, &rqd);
@@ -503,6 +517,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
 		pr_err("pblk: GC read I/O timed out\n");
 	}
+	atomic_dec(&pblk->inflight_io);
 
 	if (rqd.error) {
 		atomic_long_inc(&pblk->read_failed_gc);
@@ -518,12 +533,10 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 #endif
 
 out:
-	if (rqd.nr_ppas > 1)
-		nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+	nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
 	return NVM_IO_OK;
 
 err_free_dma:
-	if (rqd.nr_ppas > 1)
-		nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+	nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
 	return NVM_IO_ERR;
 }
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index f8f85087cd3c..0e48d3e4e143 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -120,18 +120,18 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
 	return 0;
 }
 
-__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta)
+__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta_buf)
 {
 	u32 crc;
 
-	crc = pblk_calc_emeta_crc(pblk, emeta);
-	if (le32_to_cpu(emeta->crc) != crc)
+	crc = pblk_calc_emeta_crc(pblk, emeta_buf);
+	if (le32_to_cpu(emeta_buf->crc) != crc)
 		return NULL;
 
-	if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC)
+	if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
 		return NULL;
 
-	return pblk_line_emeta_to_lbas(emeta);
+	return emeta_to_lbas(pblk, emeta_buf);
 }
 
 static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
@@ -139,19 +139,20 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct pblk_line_meta *lm = &pblk->lm;
-	struct line_emeta *emeta = line->emeta;
+	struct pblk_emeta *emeta = line->emeta;
+	struct line_emeta *emeta_buf = emeta->buf;
 	__le64 *lba_list;
 	int data_start;
 	int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
 	int i;
 
-	lba_list = pblk_recov_get_lba_list(pblk, emeta);
+	lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
 	if (!lba_list)
 		return 1;
 
 	data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
-	nr_data_lbas = lm->sec_per_line - lm->emeta_sec;
-	nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas);
+	nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0];
+	nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
 
 	for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
 		struct ppa_addr ppa;
@@ -169,7 +170,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
 			if (test_and_set_bit(i, line->invalid_bitmap))
 				WARN_ONCE(1, "pblk: rec. double invalidate:\n");
 			else
-				line->vsc--;
+				le32_add_cpu(line->vsc, -1);
 			spin_unlock(&line->lock);
 
 			continue;
@@ -181,7 +182,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
 
 	if (nr_valid_lbas != nr_lbas)
 		pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
-				line->id, line->emeta->nr_valid_lbas, nr_lbas);
+				line->id, emeta_buf->nr_valid_lbas, nr_lbas);
 
 	line->left_msecs = 0;
 
@@ -195,7 +196,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
 	struct pblk_line_meta *lm = &pblk->lm;
 	int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
 
-	return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec -
+	return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
 				nr_bb * geo->sec_per_blk;
 }
 
@@ -240,7 +241,7 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
 	r_ptr_int = r_ptr;
 
 next_read_rq:
-	memset(rqd, 0, pblk_r_rq_size);
+	memset(rqd, 0, pblk_g_rq_size);
 
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 	if (!rq_ppas)
@@ -256,7 +257,6 @@ next_read_rq:
 
 	rqd->bio = bio;
 	rqd->opcode = NVM_OP_PREAD;
-	rqd->flags = pblk_set_read_mode(pblk);
 	rqd->meta_list = meta_list;
 	rqd->nr_ppas = rq_ppas;
 	rqd->ppa_list = ppa_list;
@@ -265,6 +265,11 @@ next_read_rq:
 	rqd->end_io = pblk_end_io_sync;
 	rqd->private = &wait;
 
+	if (pblk_io_aligned(pblk, rq_ppas))
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+	else
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
 	for (i = 0; i < rqd->nr_ppas; ) {
 		struct ppa_addr ppa;
 		int pos;
@@ -295,7 +300,7 @@ next_read_rq:
 		pr_err("pblk: L2P recovery read timed out\n");
 		return -EINTR;
 	}
-
+	atomic_dec(&pblk->inflight_io);
 	reinit_completion(&wait);
 
 	/* At this point, the read should not fail. If it does, it is a problem
@@ -322,47 +327,94 @@ next_read_rq:
 	return 0;
 }
 
+static void pblk_recov_complete(struct kref *ref)
+{
+	struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
+
+	complete(&pad_rq->wait);
+}
+
+static void pblk_end_io_recov(struct nvm_rq *rqd)
+{
+	struct pblk_pad_rq *pad_rq = rqd->private;
+	struct pblk *pblk = pad_rq->pblk;
+	struct nvm_tgt_dev *dev = pblk->dev;
+
+	kref_put(&pad_rq->ref, pblk_recov_complete);
+	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
+	pblk_free_rqd(pblk, rqd, WRITE);
+}
+
 static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
-			      struct pblk_recov_alloc p, int left_ppas)
+			      int left_ppas)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr *ppa_list;
 	struct pblk_sec_meta *meta_list;
+	struct pblk_pad_rq *pad_rq;
 	struct nvm_rq *rqd;
 	struct bio *bio;
 	void *data;
 	dma_addr_t dma_ppa_list, dma_meta_list;
-	__le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta);
+	__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
 	u64 w_ptr = line->cur_sec;
-	int left_line_ppas = line->left_msecs;
-	int rq_ppas, rq_len;
+	int left_line_ppas, rq_ppas, rq_len;
 	int i, j;
 	int ret = 0;
-	DECLARE_COMPLETION_ONSTACK(wait);
 
-	ppa_list = p.ppa_list;
-	meta_list = p.meta_list;
-	rqd = p.rqd;
-	data = p.data;
-	dma_ppa_list = p.dma_ppa_list;
-	dma_meta_list = p.dma_meta_list;
+	spin_lock(&line->lock);
+	left_line_ppas = line->left_msecs;
+	spin_unlock(&line->lock);
+
+	pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
+	if (!pad_rq)
+		return -ENOMEM;
+
+	data = vzalloc(pblk->max_write_pgs * geo->sec_size);
+	if (!data) {
+		ret = -ENOMEM;
+		goto free_rq;
+	}
+
+	pad_rq->pblk = pblk;
+	init_completion(&pad_rq->wait);
+	kref_init(&pad_rq->ref);
 
 next_pad_rq:
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
-	if (!rq_ppas)
-		rq_ppas = pblk->min_write_pgs;
+	if (rq_ppas < pblk->min_write_pgs) {
+		pr_err("pblk: corrupted pad line %d\n", line->id);
+		goto free_rq;
+	}
+
 	rq_len = rq_ppas * geo->sec_size;
 
+	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
+	if (!meta_list) {
+		ret = -ENOMEM;
+		goto free_data;
+	}
+
+	ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
+	dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
+	rqd = pblk_alloc_rqd(pblk, WRITE);
+	if (IS_ERR(rqd)) {
+		ret = PTR_ERR(rqd);
+		goto fail_free_meta;
+	}
+	memset(rqd, 0, pblk_w_rq_size);
+
 	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
-	if (IS_ERR(bio))
-		return PTR_ERR(bio);
+	if (IS_ERR(bio)) {
+		ret = PTR_ERR(bio);
+		goto fail_free_rqd;
+	}
 
 	bio->bi_iter.bi_sector = 0; /* internal bio */
 	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
-	memset(rqd, 0, pblk_r_rq_size);
-
 	rqd->bio = bio;
 	rqd->opcode = NVM_OP_PWRITE;
 	rqd->flags = pblk_set_progr_mode(pblk, WRITE);
@@ -371,8 +423,8 @@ next_pad_rq:
 	rqd->ppa_list = ppa_list;
 	rqd->dma_ppa_list = dma_ppa_list;
 	rqd->dma_meta_list = dma_meta_list;
-	rqd->end_io = pblk_end_io_sync;
-	rqd->private = &wait;
+	rqd->end_io = pblk_end_io_recov;
+	rqd->private = pad_rq;
 
 	for (i = 0; i < rqd->nr_ppas; ) {
 		struct ppa_addr ppa;
@@ -390,34 +442,51 @@ next_pad_rq:
 
 		for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
 			struct ppa_addr dev_ppa;
+			__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
 
 			dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
 
 			pblk_map_invalidate(pblk, dev_ppa);
-			meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
-			lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY);
+			lba_list[w_ptr] = meta_list[i].lba = addr_empty;
 			rqd->ppa_list[i] = dev_ppa;
 		}
 	}
 
+	kref_get(&pad_rq->ref);
+
 	ret = pblk_submit_io(pblk, rqd);
 	if (ret) {
 		pr_err("pblk: I/O submission failed: %d\n", ret);
-		return ret;
+		goto free_data;
 	}
 
-	if (!wait_for_completion_io_timeout(&wait,
-				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
-		pr_err("pblk: L2P recovery write timed out\n");
-	}
-	reinit_completion(&wait);
+	atomic_dec(&pblk->inflight_io);
 
 	left_line_ppas -= rq_ppas;
 	left_ppas -= rq_ppas;
-	if (left_ppas > 0 && left_line_ppas)
+	if (left_ppas && left_line_ppas)
 		goto next_pad_rq;
 
-	return 0;
+	kref_put(&pad_rq->ref, pblk_recov_complete);
+
+	if (!wait_for_completion_io_timeout(&pad_rq->wait,
+				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+		pr_err("pblk: pad write timed out\n");
+		ret = -ETIME;
+	}
+
+free_rq:
+	kfree(pad_rq);
+free_data:
+	vfree(data);
+	return ret;
+
+fail_free_rqd:
+	pblk_free_rqd(pblk, rqd, WRITE);
+fail_free_meta:
+	nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+	kfree(pad_rq);
+	return ret;
 }
 
 /* When this function is called, it means that not all upper pages have been
@@ -456,7 +525,7 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
 	rec_round = 0;
 
 next_rq:
-	memset(rqd, 0, pblk_r_rq_size);
+	memset(rqd, 0, pblk_g_rq_size);
 
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 	if (!rq_ppas)
@@ -472,7 +541,6 @@ next_rq:
 
 	rqd->bio = bio;
 	rqd->opcode = NVM_OP_PREAD;
-	rqd->flags = pblk_set_read_mode(pblk);
 	rqd->meta_list = meta_list;
 	rqd->nr_ppas = rq_ppas;
 	rqd->ppa_list = ppa_list;
@@ -481,6 +549,11 @@ next_rq:
 	rqd->end_io = pblk_end_io_sync;
 	rqd->private = &wait;
 
+	if (pblk_io_aligned(pblk, rq_ppas))
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+	else
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
 	for (i = 0; i < rqd->nr_ppas; ) {
 		struct ppa_addr ppa;
 		int pos;
@@ -510,6 +583,7 @@ next_rq:
 				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
 		pr_err("pblk: L2P recovery read timed out\n");
 	}
+	atomic_dec(&pblk->inflight_io);
 	reinit_completion(&wait);
 
 	/* This should not happen since the read failed during normal recovery,
@@ -544,7 +618,7 @@ next_rq:
 		if (pad_secs > line->left_msecs)
 			pad_secs = line->left_msecs;
 
-		ret = pblk_recov_pad_oob(pblk, line, p, pad_secs);
+		ret = pblk_recov_pad_oob(pblk, line, pad_secs);
 		if (ret)
 			pr_err("pblk: OOB padding failed (err:%d)\n", ret);
 
@@ -552,7 +626,6 @@ next_rq:
 		if (ret)
 			pr_err("pblk: OOB read failed (err:%d)\n", ret);
 
-		line->left_ssecs = line->left_msecs;
 		left_ppas = 0;
 	}
 
@@ -591,7 +664,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 	*done = 1;
 
 next_rq:
-	memset(rqd, 0, pblk_r_rq_size);
+	memset(rqd, 0, pblk_g_rq_size);
 
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 	if (!rq_ppas)
@@ -607,7 +680,6 @@ next_rq:
 
 	rqd->bio = bio;
 	rqd->opcode = NVM_OP_PREAD;
-	rqd->flags = pblk_set_read_mode(pblk);
 	rqd->meta_list = meta_list;
 	rqd->nr_ppas = rq_ppas;
 	rqd->ppa_list = ppa_list;
@@ -616,6 +688,11 @@ next_rq:
 	rqd->end_io = pblk_end_io_sync;
 	rqd->private = &wait;
 
+	if (pblk_io_aligned(pblk, rq_ppas))
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+	else
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
 	for (i = 0; i < rqd->nr_ppas; ) {
 		struct ppa_addr ppa;
 		int pos;
@@ -646,6 +723,7 @@ next_rq:
 				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
 		pr_err("pblk: L2P recovery read timed out\n");
 	}
+	atomic_dec(&pblk->inflight_io);
 	reinit_completion(&wait);
 
 	/* Reached the end of the written line */
@@ -658,7 +736,6 @@ next_rq:
 		/* Roll back failed sectors */
 		line->cur_sec -= nr_error_bits;
 		line->left_msecs += nr_error_bits;
-		line->left_ssecs = line->left_msecs;
 		bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
 
 		left_ppas = 0;
@@ -770,8 +847,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line *line, *tline, *data_line = NULL;
-	struct line_smeta *smeta;
-	struct line_emeta *emeta;
+	struct pblk_smeta *smeta;
+	struct pblk_emeta *emeta;
+	struct line_smeta *smeta_buf;
 	int found_lines = 0, recovered_lines = 0, open_lines = 0;
 	int is_next = 0;
 	int meta_line;
@@ -784,8 +862,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 	spin_lock(&l_mg->free_lock);
 	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
 	set_bit(meta_line, &l_mg->meta_bitmap);
-	smeta = l_mg->sline_meta[meta_line].meta;
-	emeta = l_mg->eline_meta[meta_line].meta;
+	smeta = l_mg->sline_meta[meta_line];
+	emeta = l_mg->eline_meta[meta_line];
+	smeta_buf = (struct line_smeta *)smeta;
 	spin_unlock(&l_mg->free_lock);
 
 	/* Order data lines using their sequence number */
@@ -796,33 +875,33 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 
 		memset(smeta, 0, lm->smeta_len);
 		line->smeta = smeta;
-		line->lun_bitmap = ((void *)(smeta)) +
+		line->lun_bitmap = ((void *)(smeta_buf)) +
 						sizeof(struct line_smeta);
 
 		/* Lines that cannot be read are assumed as not written here */
 		if (pblk_line_read_smeta(pblk, line))
 			continue;
 
-		crc = pblk_calc_smeta_crc(pblk, smeta);
-		if (le32_to_cpu(smeta->crc) != crc)
+		crc = pblk_calc_smeta_crc(pblk, smeta_buf);
+		if (le32_to_cpu(smeta_buf->crc) != crc)
 			continue;
 
-		if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC)
+		if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
 			continue;
 
-		if (le16_to_cpu(smeta->header.version) != 1) {
+		if (le16_to_cpu(smeta_buf->header.version) != 1) {
 			pr_err("pblk: found incompatible line version %u\n",
-					smeta->header.version);
+					smeta_buf->header.version);
 			return ERR_PTR(-EINVAL);
 		}
 
 		/* The first valid instance uuid is used for initialization */
 		if (!valid_uuid) {
-			memcpy(pblk->instance_uuid, smeta->header.uuid, 16);
+			memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
 			valid_uuid = 1;
 		}
 
-		if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) {
+		if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
 			pr_debug("pblk: ignore line %u due to uuid mismatch\n",
 					i);
 			continue;
@@ -830,9 +909,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 
 		/* Update line metadata */
 		spin_lock(&line->lock);
-		line->id = le32_to_cpu(line->smeta->header.id);
-		line->type = le16_to_cpu(line->smeta->header.type);
-		line->seq_nr = le64_to_cpu(line->smeta->seq_nr);
+		line->id = le32_to_cpu(smeta_buf->header.id);
+		line->type = le16_to_cpu(smeta_buf->header.type);
+		line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
 		spin_unlock(&line->lock);
 
 		/* Update general metadata */
@@ -848,7 +927,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 		pblk_recov_line_add_ordered(&recov_list, line);
 		found_lines++;
 		pr_debug("pblk: recovering data line %d, seq:%llu\n",
-						line->id, smeta->seq_nr);
+						line->id, smeta_buf->seq_nr);
 	}
 
 	if (!found_lines) {
@@ -868,15 +947,15 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 
 		recovered_lines++;
 		/* Calculate where emeta starts based on the line bb */
-		off = lm->sec_per_line - lm->emeta_sec;
+		off = lm->sec_per_line - lm->emeta_sec[0];
 		nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
 		off -= nr_bb * geo->sec_per_pl;
 
-		memset(emeta, 0, lm->emeta_len);
-		line->emeta = emeta;
 		line->emeta_ssec = off;
+		line->emeta = emeta;
+		memset(line->emeta->buf, 0, lm->emeta_len[0]);
 
-		if (pblk_line_read_emeta(pblk, line)) {
+		if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
 			pblk_recov_l2p_from_oob(pblk, line);
 			goto next;
 		}
@@ -941,58 +1020,26 @@ out:
 }
 
 /*
- * Pad until smeta can be read on current data line
+ * Pad current line
  */
-void pblk_recov_pad(struct pblk *pblk)
+int pblk_recov_pad(struct pblk *pblk)
 {
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
 	struct pblk_line *line;
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-	struct nvm_rq *rqd;
-	struct pblk_recov_alloc p;
-	struct ppa_addr *ppa_list;
-	struct pblk_sec_meta *meta_list;
-	void *data;
-	dma_addr_t dma_ppa_list, dma_meta_list;
+	int left_msecs;
+	int ret = 0;
 
 	spin_lock(&l_mg->free_lock);
 	line = l_mg->data_line;
+	left_msecs = line->left_msecs;
 	spin_unlock(&l_mg->free_lock);
 
-	rqd = pblk_alloc_rqd(pblk, READ);
-	if (IS_ERR(rqd))
-		return;
-
-	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
-	if (!meta_list)
-		goto free_rqd;
-
-	ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
-	dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
-
-	data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
-	if (!data)
-		goto free_meta_list;
-
-	p.ppa_list = ppa_list;
-	p.meta_list = meta_list;
-	p.rqd = rqd;
-	p.data = data;
-	p.dma_ppa_list = dma_ppa_list;
-	p.dma_meta_list = dma_meta_list;
-
-	if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) {
-		pr_err("pblk: Tear down padding failed\n");
-		goto free_data;
+	ret = pblk_recov_pad_oob(pblk, line, left_msecs);
+	if (ret) {
+		pr_err("pblk: Tear down padding failed (%d)\n", ret);
+		return ret;
 	}
 
-	pblk_line_close(pblk, line);
-
-free_data:
-	kfree(data);
-free_meta_list:
-	nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
-free_rqd:
-	pblk_free_rqd(pblk, rqd, READ);
+	pblk_line_close_meta(pblk, line);
+	return ret;
 }
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
index ab7cbb144f3f..2e6a5361baf0 100644
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c
@@ -23,11 +23,35 @@ static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
 	mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
 }
 
+int pblk_rl_is_limit(struct pblk_rl *rl)
+{
+	int rb_space;
+
+	rb_space = atomic_read(&rl->rb_space);
+
+	return (rb_space == 0);
+}
+
 int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
 {
 	int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
+	int rb_space = atomic_read(&rl->rb_space);
 
-	return (!(rb_user_cnt + nr_entries > rl->rb_user_max));
+	if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
+		return NVM_IO_ERR;
+
+	if (rb_user_cnt >= rl->rb_user_max)
+		return NVM_IO_REQUEUE;
+
+	return NVM_IO_OK;
+}
+
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
+{
+	int rb_space = atomic_read(&rl->rb_space);
+
+	if (unlikely(rb_space >= 0))
+		atomic_sub(nr_entries, &rl->rb_space);
 }
 
 int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
@@ -37,7 +61,7 @@ int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
 
 	/* If there is no user I/O let GC take over space on the write buffer */
 	rb_user_active = READ_ONCE(rl->rb_user_active);
-	return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active));
+	return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
 }
 
 void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
@@ -77,33 +101,32 @@ static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max)
 	unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
 
 	if (free_blocks >= rl->high) {
-		rl->rb_user_max = max - rl->rb_gc_rsv;
-		rl->rb_gc_max = rl->rb_gc_rsv;
+		rl->rb_user_max = max;
+		rl->rb_gc_max = 0;
 		rl->rb_state = PBLK_RL_HIGH;
 	} else if (free_blocks < rl->high) {
 		int shift = rl->high_pw - rl->rb_windows_pw;
 		int user_windows = free_blocks >> shift;
 		int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW;
-		int gc_max;
 
 		rl->rb_user_max = user_max;
-		gc_max = max - rl->rb_user_max;
-		rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv);
-
-		if (free_blocks > rl->low)
-			rl->rb_state = PBLK_RL_MID;
-		else
-			rl->rb_state = PBLK_RL_LOW;
+		rl->rb_gc_max = max - user_max;
+
+		if (free_blocks <= rl->rsv_blocks) {
+			rl->rb_user_max = 0;
+			rl->rb_gc_max = max;
+		}
+
+		/* In the worst case, we will need to GC lines in the low list
+		 * (high valid sector count). If there are lines to GC on high
+		 * or mid lists, these will be prioritized
+		 */
+		rl->rb_state = PBLK_RL_LOW;
 	}
 
 	return rl->rb_state;
 }
 
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
-{
-	rl->rb_gc_rsv = rl->rb_gc_max = rsv;
-}
-
 void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
 {
 	struct pblk *pblk = container_of(rl, struct pblk, rl);
@@ -122,11 +145,15 @@ void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
 
 void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
 {
-	struct pblk *pblk = container_of(rl, struct pblk, rl);
 	int blk_in_line = atomic_read(&line->blk_in_line);
-	int ret;
 
 	atomic_sub(blk_in_line, &rl->free_blocks);
+}
+
+void pblk_gc_should_kick(struct pblk *pblk)
+{
+	struct pblk_rl *rl = &pblk->rl;
+	int ret;
 
 	/* Rates will not change that often - no need to lock update */
 	ret = pblk_rl_update_rates(rl, rl->rb_budget);
@@ -136,11 +163,16 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
 		pblk_gc_should_stop(pblk);
 }
 
-int pblk_rl_gc_thrs(struct pblk_rl *rl)
+int pblk_rl_high_thrs(struct pblk_rl *rl)
 {
 	return rl->high;
 }
 
+int pblk_rl_low_thrs(struct pblk_rl *rl)
+{
+	return rl->low;
+}
+
 int pblk_rl_sysfs_rate_show(struct pblk_rl *rl)
 {
 	return rl->rb_user_max;
@@ -161,24 +193,36 @@ void pblk_rl_free(struct pblk_rl *rl)
 
 void pblk_rl_init(struct pblk_rl *rl, int budget)
 {
+	struct pblk *pblk = container_of(rl, struct pblk, rl);
+	struct pblk_line_meta *lm = &pblk->lm;
+	int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
 	unsigned int rb_windows;
 
 	rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
-	rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
 	rl->high_pw = get_count_order(rl->high);
 
+	rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
+	if (rl->low < min_blocks)
+		rl->low = min_blocks;
+
+	rl->rsv_blocks = min_blocks;
+
 	/* This will always be a power-of-2 */
 	rb_windows = budget / PBLK_MAX_REQ_ADDRS;
-	rl->rb_windows_pw = get_count_order(rb_windows) + 1;
+	rl->rb_windows_pw = get_count_order(rb_windows);
 
 	/* To start with, all buffer is available to user I/O writers */
 	rl->rb_budget = budget;
 	rl->rb_user_max = budget;
-	atomic_set(&rl->rb_user_cnt, 0);
 	rl->rb_gc_max = 0;
 	rl->rb_state = PBLK_RL_HIGH;
+
+	atomic_set(&rl->rb_user_cnt, 0);
 	atomic_set(&rl->rb_gc_cnt, 0);
+	atomic_set(&rl->rb_space, -1);
 
 	setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl);
+
 	rl->rb_user_active = 0;
+	rl->rb_gc_active = 0;
 }
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index f0af1d1ceeff..95fb434e2f01 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -49,30 +49,26 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
 
 static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
 {
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
 	int free_blocks, total_blocks;
 	int rb_user_max, rb_user_cnt;
-	int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state;
+	int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
 
 	free_blocks = atomic_read(&pblk->rl.free_blocks);
 	rb_user_max = pblk->rl.rb_user_max;
 	rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
 	rb_gc_max = pblk->rl.rb_gc_max;
-	rb_gc_rsv = pblk->rl.rb_gc_rsv;
 	rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
 	rb_budget = pblk->rl.rb_budget;
 	rb_state = pblk->rl.rb_state;
 
-	total_blocks = geo->blks_per_lun * geo->nr_luns;
+	total_blocks = pblk->rl.total_blocks;
 
 	return snprintf(page, PAGE_SIZE,
-		"u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
+		"u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
 				rb_user_cnt,
 				rb_user_max,
 				rb_gc_cnt,
 				rb_gc_max,
-				rb_gc_rsv,
 				rb_state,
 				rb_budget,
 				pblk->rl.low,
@@ -150,11 +146,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 	ssize_t sz = 0;
 	int nr_free_lines;
 	int cur_data, cur_log;
-	int free_line_cnt = 0, closed_line_cnt = 0;
+	int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
 	int d_line_cnt = 0, l_line_cnt = 0;
 	int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
-	int free = 0, bad = 0, cor = 0;
-	int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
+	int bad = 0, cor = 0;
+	int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
 	int map_weight = 0, meta_weight = 0;
 
 	spin_lock(&l_mg->free_lock);
@@ -166,6 +162,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 		free_line_cnt++;
 	spin_unlock(&l_mg->free_lock);
 
+	spin_lock(&l_mg->close_lock);
+	list_for_each_entry(line, &l_mg->emeta_list, list)
+		emeta_line_cnt++;
+	spin_unlock(&l_mg->close_lock);
+
 	spin_lock(&l_mg->gc_lock);
 	list_for_each_entry(line, &l_mg->gc_full_list, list) {
 		if (line->type == PBLK_LINETYPE_DATA)
@@ -212,8 +213,6 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 		gc_empty++;
 	}
 
-	list_for_each_entry(line, &l_mg->free_list, list)
-		free++;
 	list_for_each_entry(line, &l_mg->bad_list, list)
 		bad++;
 	list_for_each_entry(line, &l_mg->corrupt_list, list)
@@ -224,8 +223,7 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 	if (l_mg->data_line) {
 		cur_sec = l_mg->data_line->cur_sec;
 		msecs = l_mg->data_line->left_msecs;
-		ssecs = l_mg->data_line->left_ssecs;
-		vsc = l_mg->data_line->vsc;
+		vsc = le32_to_cpu(*l_mg->data_line->vsc);
 		sec_in_line = l_mg->data_line->sec_in_line;
 		meta_weight = bitmap_weight(&l_mg->meta_bitmap,
 							PBLK_DATA_LINES);
@@ -235,17 +233,20 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 	spin_unlock(&l_mg->free_lock);
 
 	if (nr_free_lines != free_line_cnt)
-		pr_err("pblk: corrupted free line list\n");
+		pr_err("pblk: corrupted free line list:%d/%d\n",
+						nr_free_lines, free_line_cnt);
 
 	sz = snprintf(page, PAGE_SIZE - sz,
 		"line: nluns:%d, nblks:%d, nsecs:%d\n",
 		geo->nr_luns, lm->blk_per_line, lm->sec_per_line);
 
 	sz += snprintf(page + sz, PAGE_SIZE - sz,
-		"lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n",
+		"lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
 					cur_data, cur_log,
-					free, nr_free_lines, bad, cor,
+					nr_free_lines,
+					emeta_line_cnt, meta_weight,
 					closed_line_cnt,
+					bad, cor,
 					d_line_cnt, l_line_cnt,
 					l_mg->nr_lines);
 
@@ -255,9 +256,10 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 			atomic_read(&pblk->gc.inflight_gc));
 
 	sz += snprintf(page + sz, PAGE_SIZE - sz,
-		"data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
-			cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line,
-			map_weight, lm->sec_per_line, meta_weight);
+		"data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
+			cur_data, cur_sec, msecs, vsc, sec_in_line,
+			map_weight, lm->sec_per_line,
+			atomic_read(&pblk->inflight_io));
 
 	return sz;
 }
@@ -274,7 +276,7 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
 					lm->smeta_len, lm->smeta_sec);
 	sz += snprintf(page + sz, PAGE_SIZE - sz,
 				"emeta - len:%d, sec:%d, bb_start:%d\n",
-					lm->emeta_len, lm->emeta_sec,
+					lm->emeta_len[0], lm->emeta_sec[0],
 					lm->emeta_bb);
 	sz += snprintf(page + sz, PAGE_SIZE - sz,
 				"bitmap lengths: sec:%d, blk:%d, lun:%d\n",
@@ -290,6 +292,11 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
 	return sz;
 }
 
+static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
+}
+
 #ifdef CONFIG_NVM_DEBUG
 static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
 {
@@ -303,52 +310,51 @@ static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
 			atomic_long_read(&pblk->padded_wb),
 			atomic_long_read(&pblk->sub_writes),
 			atomic_long_read(&pblk->sync_writes),
-			atomic_long_read(&pblk->compl_writes),
 			atomic_long_read(&pblk->recov_writes),
 			atomic_long_read(&pblk->recov_gc_writes),
 			atomic_long_read(&pblk->recov_gc_reads),
+			atomic_long_read(&pblk->cache_reads),
 			atomic_long_read(&pblk->sync_reads));
 }
 #endif
 
-static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page,
-				     size_t len)
+static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
+				   size_t len)
 {
-	struct pblk_gc *gc = &pblk->gc;
 	size_t c_len;
-	int value;
+	int force;
 
 	c_len = strcspn(page, "\n");
 	if (c_len >= len)
 		return -EINVAL;
 
-	if (kstrtouint(page, 0, &value))
+	if (kstrtouint(page, 0, &force))
 		return -EINVAL;
 
-	spin_lock(&gc->lock);
-	pblk_rl_set_gc_rsc(&pblk->rl, value);
-	spin_unlock(&gc->lock);
+	pblk_gc_sysfs_force(pblk, force);
 
 	return len;
 }
 
-static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
-				   size_t len)
+static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
+					     const char *page, size_t len)
 {
 	size_t c_len;
-	int force;
+	int sec_per_write;
 
 	c_len = strcspn(page, "\n");
 	if (c_len >= len)
 		return -EINVAL;
 
-	if (kstrtouint(page, 0, &force))
+	if (kstrtouint(page, 0, &sec_per_write))
 		return -EINVAL;
 
-	if (force < 0 || force > 1)
+	if (sec_per_write < pblk->min_write_pgs
+				|| sec_per_write > pblk->max_write_pgs
+				|| sec_per_write % pblk->min_write_pgs != 0)
 		return -EINVAL;
 
-	pblk_gc_sysfs_force(pblk, force);
+	pblk_set_sec_per_write(pblk, sec_per_write);
 
 	return len;
 }
@@ -398,9 +404,9 @@ static struct attribute sys_gc_force = {
 	.mode = 0200,
 };
 
-static struct attribute sys_gc_rl_max = {
-	.name = "gc_rl_max",
-	.mode = 0200,
+static struct attribute sys_max_sec_per_write = {
+	.name = "max_sec_per_write",
+	.mode = 0644,
 };
 
 #ifdef CONFIG_NVM_DEBUG
@@ -416,7 +422,7 @@ static struct attribute *pblk_attrs[] = {
 	&sys_errors_attr,
 	&sys_gc_state,
 	&sys_gc_force,
-	&sys_gc_rl_max,
+	&sys_max_sec_per_write,
 	&sys_rb_attr,
 	&sys_stats_ppaf_attr,
 	&sys_lines_attr,
@@ -448,6 +454,8 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
 		return pblk_sysfs_lines(pblk, buf);
 	else if (strcmp(attr->name, "lines_info") == 0)
 		return pblk_sysfs_lines_info(pblk, buf);
+	else if (strcmp(attr->name, "max_sec_per_write") == 0)
+		return pblk_sysfs_get_sec_per_write(pblk, buf);
 #ifdef CONFIG_NVM_DEBUG
 	else if (strcmp(attr->name, "stats") == 0)
 		return pblk_sysfs_stats_debug(pblk, buf);
@@ -460,10 +468,10 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
 {
 	struct pblk *pblk = container_of(kobj, struct pblk, kobj);
 
-	if (strcmp(attr->name, "gc_rl_max") == 0)
-		return pblk_sysfs_rate_store(pblk, buf, len);
-	else if (strcmp(attr->name, "gc_force") == 0)
+	if (strcmp(attr->name, "gc_force") == 0)
 		return pblk_sysfs_gc_force(pblk, buf, len);
+	else if (strcmp(attr->name, "max_sec_per_write") == 0)
+		return pblk_sysfs_set_sec_per_write(pblk, buf, len);
 
 	return 0;
 }
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index aef6fd7c4a0c..d62a8f4faaf4 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -17,18 +17,6 @@
 
 #include "pblk.h"
 
-static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line)
-{
-#ifdef CONFIG_NVM_DEBUG
-	atomic_long_inc(&pblk->sync_writes);
-#endif
-
-	/* Counter protected by rb sync lock */
-	line->left_ssecs--;
-	if (!line->left_ssecs)
-		pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
-}
-
 static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
 				    struct pblk_c_ctx *c_ctx)
 {
@@ -39,21 +27,14 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
 
 	for (i = 0; i < c_ctx->nr_valid; i++) {
 		struct pblk_w_ctx *w_ctx;
-		struct ppa_addr p;
-		struct pblk_line *line;
 
 		w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i);
-
-		p = rqd->ppa_list[i];
-		line = &pblk->lines[pblk_dev_ppa_to_line(p)];
-		pblk_sync_line(pblk, line);
-
 		while ((original_bio = bio_list_pop(&w_ctx->bios)))
 			bio_endio(original_bio);
 	}
 
 #ifdef CONFIG_NVM_DEBUG
-	atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes);
+	atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes);
 #endif
 
 	ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
@@ -169,7 +150,7 @@ static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
 	}
 
 	INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
-	queue_work(pblk->kw_wq, &recovery->ws_rec);
+	queue_work(pblk->close_wq, &recovery->ws_rec);
 
 out:
 	pblk_complete_write(pblk, rqd, c_ctx);
@@ -186,14 +167,50 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
 	}
 #ifdef CONFIG_NVM_DEBUG
 	else
-		WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n");
+		WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
 #endif
 
 	pblk_complete_write(pblk, rqd, c_ctx);
+	atomic_dec(&pblk->inflight_io);
+}
+
+static void pblk_end_io_write_meta(struct nvm_rq *rqd)
+{
+	struct pblk *pblk = rqd->private;
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
+	struct pblk_line *line = m_ctx->private;
+	struct pblk_emeta *emeta = line->emeta;
+	int pos = pblk_ppa_to_pos(geo, rqd->ppa_list[0]);
+	struct pblk_lun *rlun = &pblk->luns[pos];
+	int sync;
+
+	up(&rlun->wr_sem);
+
+	if (rqd->error) {
+		pblk_log_write_err(pblk, rqd);
+		pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
+	}
+#ifdef CONFIG_NVM_DEBUG
+	else
+		WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
+#endif
+
+	sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
+	if (sync == emeta->nr_entries)
+		pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws,
+								pblk->close_wq);
+
+	bio_put(rqd->bio);
+	pblk_free_rqd(pblk, rqd, READ);
+
+	atomic_dec(&pblk->inflight_io);
 }
 
 static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
-			   unsigned int nr_secs)
+			   unsigned int nr_secs,
+			   nvm_end_io_fn(*end_io))
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 
@@ -202,7 +219,7 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
 	rqd->nr_ppas = nr_secs;
 	rqd->flags = pblk_set_progr_mode(pblk, WRITE);
 	rqd->private = pblk;
-	rqd->end_io = pblk_end_io_write;
+	rqd->end_io = end_io;
 
 	rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
 							&rqd->dma_meta_list);
@@ -219,11 +236,10 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
 }
 
 static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
-			   struct pblk_c_ctx *c_ctx)
+			   struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa)
 {
 	struct pblk_line_meta *lm = &pblk->lm;
-	struct pblk_line *e_line = pblk_line_get_data_next(pblk);
-	struct ppa_addr erase_ppa;
+	struct pblk_line *e_line = pblk_line_get_erase(pblk);
 	unsigned int valid = c_ctx->nr_valid;
 	unsigned int padded = c_ctx->nr_padded;
 	unsigned int nr_secs = valid + padded;
@@ -231,40 +247,23 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
 	int ret = 0;
 
 	lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
-	if (!lun_bitmap) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!lun_bitmap)
+		return -ENOMEM;
 	c_ctx->lun_bitmap = lun_bitmap;
 
-	ret = pblk_alloc_w_rq(pblk, rqd, nr_secs);
+	ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write);
 	if (ret) {
 		kfree(lun_bitmap);
-		goto out;
+		return ret;
 	}
 
-	ppa_set_empty(&erase_ppa);
 	if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
 		pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
 	else
 		pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
-							valid, &erase_ppa);
-
-out:
-	if (unlikely(e_line && !ppa_empty(erase_ppa))) {
-		if (pblk_blk_erase_async(pblk, erase_ppa)) {
-			struct nvm_tgt_dev *dev = pblk->dev;
-			struct nvm_geo *geo = &dev->geo;
-			int bit;
-
-			atomic_inc(&e_line->left_eblks);
-			bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
-			WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
-			up(&pblk->erase_sem);
-		}
-	}
+							valid, erase_ppa);
 
-	return ret;
+	return 0;
 }
 
 int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -280,7 +279,7 @@ int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
 
 	c_ctx->lun_bitmap = lun_bitmap;
 
-	ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas);
+	ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write);
 	if (ret)
 		return ret;
 
@@ -311,16 +310,237 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
 	return secs_to_sync;
 }
 
+static inline int pblk_valid_meta_ppa(struct pblk *pblk,
+				      struct pblk_line *meta_line,
+				      struct ppa_addr *ppa_list, int nr_ppas)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line *data_line;
+	struct ppa_addr ppa, ppa_opt;
+	u64 paddr;
+	int i;
+
+	data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])];
+	paddr = pblk_lookup_page(pblk, meta_line);
+	ppa = addr_to_gen_ppa(pblk, paddr, 0);
+
+	if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap))
+		return 1;
+
+	/* Schedule a metadata I/O that is half the distance from the data I/O
+	 * with regards to the number of LUNs forming the pblk instance. This
+	 * balances LUN conflicts across every I/O.
+	 *
+	 * When the LUN configuration changes (e.g., due to GC), this distance
+	 * can align, which would result on a LUN deadlock. In this case, modify
+	 * the distance to not be optimal, but allow metadata I/Os to succeed.
+	 */
+	ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
+	if (unlikely(ppa_opt.ppa == ppa.ppa)) {
+		data_line->meta_distance--;
+		return 0;
+	}
+
+	for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+		if (ppa_list[i].g.ch == ppa_opt.g.ch &&
+					ppa_list[i].g.lun == ppa_opt.g.lun)
+			return 1;
+
+	if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) {
+		for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+			if (ppa_list[i].g.ch == ppa.g.ch &&
+						ppa_list[i].g.lun == ppa.g.lun)
+				return 0;
+
+		return 1;
+	}
+
+	return 0;
+}
+
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_emeta *emeta = meta_line->emeta;
+	struct pblk_g_ctx *m_ctx;
+	struct pblk_lun *rlun;
+	struct bio *bio;
+	struct nvm_rq *rqd;
+	void *data;
+	u64 paddr;
+	int rq_ppas = pblk->min_write_pgs;
+	int id = meta_line->id;
+	int rq_len;
+	int i, j;
+	int ret;
+
+	rqd = pblk_alloc_rqd(pblk, READ);
+	if (IS_ERR(rqd)) {
+		pr_err("pblk: cannot allocate write req.\n");
+		return PTR_ERR(rqd);
+	}
+	m_ctx = nvm_rq_to_pdu(rqd);
+	m_ctx->private = meta_line;
+
+	rq_len = rq_ppas * geo->sec_size;
+	data = ((void *)emeta->buf) + emeta->mem;
+
+	bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
+					l_mg->emeta_alloc_type, GFP_KERNEL);
+	if (IS_ERR(bio)) {
+		ret = PTR_ERR(bio);
+		goto fail_free_rqd;
+	}
+	bio->bi_iter.bi_sector = 0; /* internal bio */
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+	rqd->bio = bio;
+
+	ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
+	if (ret)
+		goto fail_free_bio;
+
+	for (i = 0; i < rqd->nr_ppas; ) {
+		spin_lock(&meta_line->lock);
+		paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
+		spin_unlock(&meta_line->lock);
+		for (j = 0; j < rq_ppas; j++, i++, paddr++)
+			rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
+	}
+
+	rlun = &pblk->luns[pblk_ppa_to_pos(geo, rqd->ppa_list[0])];
+	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+	if (ret) {
+		pr_err("pblk: lun semaphore timed out (%d)\n", ret);
+		goto fail_free_bio;
+	}
+
+	emeta->mem += rq_len;
+	if (emeta->mem >= lm->emeta_len[0]) {
+		spin_lock(&l_mg->close_lock);
+		list_del(&meta_line->list);
+		WARN(!bitmap_full(meta_line->map_bitmap, lm->sec_per_line),
+				"pblk: corrupt meta line %d\n", meta_line->id);
+		spin_unlock(&l_mg->close_lock);
+	}
+
+	ret = pblk_submit_io(pblk, rqd);
+	if (ret) {
+		pr_err("pblk: emeta I/O submission failed: %d\n", ret);
+		goto fail_rollback;
+	}
+
+	return NVM_IO_OK;
+
+fail_rollback:
+	spin_lock(&l_mg->close_lock);
+	pblk_dealloc_page(pblk, meta_line, rq_ppas);
+	list_add(&meta_line->list, &meta_line->list);
+	spin_unlock(&l_mg->close_lock);
+fail_free_bio:
+	if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
+		bio_put(bio);
+fail_free_rqd:
+	pblk_free_rqd(pblk, rqd, READ);
+	return ret;
+}
+
+static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
+			       int prev_n)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line *meta_line;
+
+	spin_lock(&l_mg->close_lock);
+retry:
+	if (list_empty(&l_mg->emeta_list)) {
+		spin_unlock(&l_mg->close_lock);
+		return 0;
+	}
+	meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
+	if (bitmap_full(meta_line->map_bitmap, lm->sec_per_line))
+		goto retry;
+	spin_unlock(&l_mg->close_lock);
+
+	if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n))
+		return 0;
+
+	return pblk_submit_meta_io(pblk, meta_line);
+}
+
+static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
+{
+	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+	struct ppa_addr erase_ppa;
+	int err;
+
+	ppa_set_empty(&erase_ppa);
+
+	/* Assign lbas to ppas and populate request structure */
+	err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa);
+	if (err) {
+		pr_err("pblk: could not setup write request: %d\n", err);
+		return NVM_IO_ERR;
+	}
+
+	if (likely(ppa_empty(erase_ppa))) {
+		/* Submit metadata write for previous data line */
+		err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas);
+		if (err) {
+			pr_err("pblk: metadata I/O submission failed: %d", err);
+			return NVM_IO_ERR;
+		}
+
+		/* Submit data write for current data line */
+		err = pblk_submit_io(pblk, rqd);
+		if (err) {
+			pr_err("pblk: data I/O submission failed: %d\n", err);
+			return NVM_IO_ERR;
+		}
+	} else {
+		/* Submit data write for current data line */
+		err = pblk_submit_io(pblk, rqd);
+		if (err) {
+			pr_err("pblk: data I/O submission failed: %d\n", err);
+			return NVM_IO_ERR;
+		}
+
+		/* Submit available erase for next data line */
+		if (pblk_blk_erase_async(pblk, erase_ppa)) {
+			struct pblk_line *e_line = pblk_line_get_erase(pblk);
+			struct nvm_tgt_dev *dev = pblk->dev;
+			struct nvm_geo *geo = &dev->geo;
+			int bit;
+
+			atomic_inc(&e_line->left_eblks);
+			bit = pblk_ppa_to_pos(geo, erase_ppa);
+			WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
+		}
+	}
+
+	return NVM_IO_OK;
+}
+
+static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
+{
+	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+	struct bio *bio = rqd->bio;
+
+	if (c_ctx->nr_padded)
+		pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded);
+}
+
 static int pblk_submit_write(struct pblk *pblk)
 {
 	struct bio *bio;
 	struct nvm_rq *rqd;
-	struct pblk_c_ctx *c_ctx;
-	unsigned int pgs_read;
 	unsigned int secs_avail, secs_to_sync, secs_to_com;
 	unsigned int secs_to_flush;
 	unsigned long pos;
-	int err;
 
 	/* If there are no sectors in the cache, flushes (bios without data)
 	 * will be cleared on the cache threads
@@ -338,7 +558,6 @@ static int pblk_submit_write(struct pblk *pblk)
 		pr_err("pblk: cannot allocate write req.\n");
 		return 1;
 	}
-	c_ctx = nvm_rq_to_pdu(rqd);
 
 	bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
 	if (!bio) {
@@ -358,29 +577,14 @@ static int pblk_submit_write(struct pblk *pblk)
 	secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
 	pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
 
-	pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos,
-						secs_to_sync, secs_avail);
-	if (!pgs_read) {
+	if (pblk_rb_read_to_bio(&pblk->rwb, rqd, bio, pos, secs_to_sync,
+								secs_avail)) {
 		pr_err("pblk: corrupted write bio\n");
 		goto fail_put_bio;
 	}
 
-	if (c_ctx->nr_padded)
-		if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded))
-			goto fail_put_bio;
-
-	/* Assign lbas to ppas and populate request structure */
-	err = pblk_setup_w_rq(pblk, rqd, c_ctx);
-	if (err) {
-		pr_err("pblk: could not setup write request\n");
-		goto fail_free_bio;
-	}
-
-	err = pblk_submit_io(pblk, rqd);
-	if (err) {
-		pr_err("pblk: I/O submission failed: %d\n", err);
+	if (pblk_submit_io_set(pblk, rqd))
 		goto fail_free_bio;
-	}
 
 #ifdef CONFIG_NVM_DEBUG
 	atomic_long_add(secs_to_sync, &pblk->sub_writes);
@@ -389,8 +593,7 @@ static int pblk_submit_write(struct pblk *pblk)
 	return 0;
 
 fail_free_bio:
-	if (c_ctx->nr_padded)
-		pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded);
+	pblk_free_write_rqd(pblk, rqd);
 fail_put_bio:
 	bio_put(bio);
 fail_free_rqd:
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 99f3186b5288..15931381348c 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -40,6 +40,12 @@
 #define PBLK_MAX_REQ_ADDRS (64)
 #define PBLK_MAX_REQ_ADDRS_PW (6)
 
+#define PBLK_WS_POOL_SIZE (128)
+#define PBLK_META_POOL_SIZE (128)
+#define PBLK_READ_REQ_POOL_SIZE (1024)
+
+#define PBLK_NR_CLOSE_JOBS (4)
+
 #define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
 
 #define PBLK_COMMAND_TIMEOUT_MS 30000
@@ -72,11 +78,15 @@ enum {
 	PBLK_BLK_ST_CLOSED =	0x2,
 };
 
+struct pblk_sec_meta {
+	u64 reserved;
+	__le64 lba;
+};
+
 /* The number of GC lists and the rate-limiter states go together. This way the
  * rate-limiter can dictate how much GC is needed based on resource utilization.
  */
-#define PBLK_NR_GC_LISTS 3
-#define PBLK_MAX_GC_JOBS 32
+#define PBLK_GC_NR_LISTS 3
 
 enum {
 	PBLK_RL_HIGH = 1,
@@ -84,14 +94,9 @@ enum {
 	PBLK_RL_LOW = 3,
 };
 
-struct pblk_sec_meta {
-	u64 reserved;
-	__le64 lba;
-};
-
 #define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
 
-/* write completion context */
+/* write buffer completion context */
 struct pblk_c_ctx {
 	struct list_head list;		/* Head for out-of-order completion */
 
@@ -101,9 +106,16 @@ struct pblk_c_ctx {
 	unsigned int nr_padded;
 };
 
-/* Read context */
-struct pblk_r_ctx {
-	struct bio *orig_bio;
+/* generic context */
+struct pblk_g_ctx {
+	void *private;
+};
+
+/* Pad context */
+struct pblk_pad_rq {
+	struct pblk *pblk;
+	struct completion wait;
+	struct kref ref;
 };
 
 /* Recovery context */
@@ -195,29 +207,39 @@ struct pblk_lun {
 struct pblk_gc_rq {
 	struct pblk_line *line;
 	void *data;
-	u64 *lba_list;
+	u64 lba_list[PBLK_MAX_REQ_ADDRS];
 	int nr_secs;
 	int secs_to_gc;
 	struct list_head list;
 };
 
 struct pblk_gc {
+	/* These states are not protected by a lock since (i) they are in the
+	 * fast path, and (ii) they are not critical.
+	 */
 	int gc_active;
 	int gc_enabled;
 	int gc_forced;
-	int gc_jobs_active;
-	atomic_t inflight_gc;
 
 	struct task_struct *gc_ts;
 	struct task_struct *gc_writer_ts;
+	struct task_struct *gc_reader_ts;
+
+	struct workqueue_struct *gc_line_reader_wq;
 	struct workqueue_struct *gc_reader_wq;
+
 	struct timer_list gc_timer;
 
+	struct semaphore gc_sem;
+	atomic_t inflight_gc;
 	int w_entries;
+
 	struct list_head w_list;
+	struct list_head r_list;
 
 	spinlock_t lock;
 	spinlock_t w_lock;
+	spinlock_t r_lock;
 };
 
 struct pblk_rl {
@@ -229,10 +251,8 @@ struct pblk_rl {
 				 */
 	unsigned int high_pw;	/* High rounded up as a power of 2 */
 
-#define PBLK_USER_HIGH_THRS 2	/* Begin write limit at 50 percent
-				 * available blks
-				 */
-#define PBLK_USER_LOW_THRS 20	/* Aggressive GC at 5% available blocks */
+#define PBLK_USER_HIGH_THRS 8	/* Begin write limit at 12% available blks */
+#define PBLK_USER_LOW_THRS 10	/* Aggressive GC at 10% available blocks */
 
 	int rb_windows_pw;	/* Number of rate windows in the write buffer
 				 * given as a power-of-2. This guarantees that
@@ -244,13 +264,19 @@ struct pblk_rl {
 				 */
 	int rb_budget;		/* Total number of entries available for I/O */
 	int rb_user_max;	/* Max buffer entries available for user I/O */
-	atomic_t rb_user_cnt;	/* User I/O buffer counter */
 	int rb_gc_max;		/* Max buffer entries available for GC I/O */
 	int rb_gc_rsv;		/* Reserved buffer entries for GC I/O */
 	int rb_state;		/* Rate-limiter current state */
+
+	atomic_t rb_user_cnt;	/* User I/O buffer counter */
 	atomic_t rb_gc_cnt;	/* GC I/O buffer counter */
+	atomic_t rb_space;	/* Space limit in case of reaching capacity */
+
+	int rsv_blocks;		/* Reserved blocks for GC */
 
 	int rb_user_active;
+	int rb_gc_active;
+
 	struct timer_list u_timer;
 
 	unsigned long long nr_secs;
@@ -258,8 +284,6 @@ struct pblk_rl {
 	atomic_t free_blocks;
 };
 
-#define PBLK_LINE_NR_LUN_BITMAP 2
-#define PBLK_LINE_NR_SEC_BITMAP 2
 #define PBLK_LINE_EMPTY (~0U)
 
 enum {
@@ -310,16 +334,19 @@ struct line_smeta {
 	__le32 window_wr_lun;	/* Number of parallel LUNs to write */
 
 	__le32 rsvd[2];
+
+	__le64 lun_bitmap[];
 };
 
 /*
- * Metadata Layout:
- *	1. struct pblk_emeta
- *	2. nr_lbas u64 forming lba list
- *	3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line)
- *	4. nr_luns bits (u64 format) forming line bad block bitmap
- *
- *	3. and 4. will be part of FTL log
+ * Metadata layout in media:
+ *	First sector:
+ *		1. struct line_emeta
+ *		2. bad block bitmap (u64 * window_wr_lun)
+ *	Mid sectors (start at lbas_sector):
+ *		3. nr_lbas (u64) forming lba list
+ *	Last sectors (start at vsc_sector):
+ *		4. u32 valid sector count (vsc) for all lines (~0U: free line)
  */
 struct line_emeta {
 	struct line_header header;
@@ -339,6 +366,23 @@ struct line_emeta {
 	__le32 next_id;		/* Line id for next line */
 	__le64 nr_lbas;		/* Number of lbas mapped in line */
 	__le64 nr_valid_lbas;	/* Number of valid lbas mapped in line */
+	__le64 bb_bitmap[];	/* Updated bad block bitmap for line */
+};
+
+struct pblk_emeta {
+	struct line_emeta *buf;		/* emeta buffer in media format */
+	int mem;			/* Write offset - points to next
+					 * writable entry in memory
+					 */
+	atomic_t sync;			/* Synced - backpointer that signals the
+					 * last entry that has been successfully
+					 * persisted to media
+					 */
+	unsigned int nr_entries;	/* Number of emeta entries */
+};
+
+struct pblk_smeta {
+	struct line_smeta *buf;		/* smeta buffer in persistent format */
 };
 
 struct pblk_line {
@@ -355,9 +399,12 @@ struct pblk_line {
 
 	unsigned long *lun_bitmap;	/* Bitmap for LUNs mapped in line */
 
-	struct line_smeta *smeta;	/* Start metadata */
-	struct line_emeta *emeta;	/* End metadata */
+	struct pblk_smeta *smeta;	/* Start metadata */
+	struct pblk_emeta *emeta;	/* End medatada */
+
 	int meta_line;			/* Metadata line id */
+	int meta_distance;		/* Distance between data and metadata */
+
 	u64 smeta_ssec;			/* Sector where smeta starts */
 	u64 emeta_ssec;			/* Sector where emeta starts */
 
@@ -374,9 +421,10 @@ struct pblk_line {
 	atomic_t left_seblks;		/* Blocks left for sync erasing */
 
 	int left_msecs;			/* Sectors left for mapping */
-	int left_ssecs;			/* Sectors left to sync */
 	unsigned int cur_sec;		/* Sector map pointer */
-	unsigned int vsc;		/* Valid sector count in line */
+	unsigned int nr_valid_lbas;	/* Number of valid lbas in line */
+
+	__le32 *vsc;			/* Valid sector count in line */
 
 	struct kref ref;		/* Write buffer L2P references */
 
@@ -385,13 +433,15 @@ struct pblk_line {
 
 #define PBLK_DATA_LINES 4
 
-enum{
+enum {
 	PBLK_KMALLOC_META = 1,
 	PBLK_VMALLOC_META = 2,
 };
 
-struct pblk_line_metadata {
-	void *meta;
+enum {
+	PBLK_EMETA_TYPE_HEADER = 1,	/* struct line_emeta first sector */
+	PBLK_EMETA_TYPE_LLBA = 2,	/* lba list - type: __le64 */
+	PBLK_EMETA_TYPE_VSC = 3,	/* vsc list - type: __le32 */
 };
 
 struct pblk_line_mgmt {
@@ -404,7 +454,7 @@ struct pblk_line_mgmt {
 	struct list_head bad_list;	/* Full lines bad */
 
 	/* GC lists - use gc_lock */
-	struct list_head *gc_lists[PBLK_NR_GC_LISTS];
+	struct list_head *gc_lists[PBLK_GC_NR_LISTS];
 	struct list_head gc_high_list;	/* Full lines ready to GC, high isc */
 	struct list_head gc_mid_list;	/* Full lines ready to GC, mid isc */
 	struct list_head gc_low_list;	/* Full lines ready to GC, low isc */
@@ -417,13 +467,16 @@ struct pblk_line_mgmt {
 	struct pblk_line *log_next;	/* Next FTL log line */
 	struct pblk_line *data_next;	/* Next data line */
 
+	struct list_head emeta_list;	/* Lines queued to schedule emeta */
+
+	__le32 *vsc_list;		/* Valid sector counts for all lines */
+
 	/* Metadata allocation type: VMALLOC | KMALLOC */
-	int smeta_alloc_type;
 	int emeta_alloc_type;
 
 	/* Pre-allocated metadata for data lines */
-	struct pblk_line_metadata sline_meta[PBLK_DATA_LINES];
-	struct pblk_line_metadata eline_meta[PBLK_DATA_LINES];
+	struct pblk_smeta *sline_meta[PBLK_DATA_LINES];
+	struct pblk_emeta *eline_meta[PBLK_DATA_LINES];
 	unsigned long meta_bitmap;
 
 	/* Helpers for fast bitmap calculations */
@@ -434,25 +487,40 @@ struct pblk_line_mgmt {
 	unsigned long l_seq_nr;		/* Log line unique sequence number */
 
 	spinlock_t free_lock;
+	spinlock_t close_lock;
 	spinlock_t gc_lock;
 };
 
 struct pblk_line_meta {
 	unsigned int smeta_len;		/* Total length for smeta */
-	unsigned int smeta_sec;		/* Sectors needed for smeta*/
-	unsigned int emeta_len;		/* Total length for emeta */
-	unsigned int emeta_sec;		/* Sectors needed for emeta*/
+	unsigned int smeta_sec;		/* Sectors needed for smeta */
+
+	unsigned int emeta_len[4];	/* Lengths for emeta:
+					 *  [0]: Total length
+					 *  [1]: struct line_emeta length
+					 *  [2]: L2P portion length
+					 *  [3]: vsc list length
+					 */
+	unsigned int emeta_sec[4];	/* Sectors needed for emeta. Same layout
+					 * as emeta_len
+					 */
+
 	unsigned int emeta_bb;		/* Boundary for bb that affects emeta */
+
+	unsigned int vsc_list_len;	/* Length for vsc list */
 	unsigned int sec_bitmap_len;	/* Length for sector bitmap in line */
 	unsigned int blk_bitmap_len;	/* Length for block bitmap in line */
 	unsigned int lun_bitmap_len;	/* Length for lun bitmap in line */
 
 	unsigned int blk_per_line;	/* Number of blocks in a full line */
 	unsigned int sec_per_line;	/* Number of sectors in a line */
+	unsigned int dsec_per_line;	/* Number of data sectors in a line */
 	unsigned int min_blk_line;	/* Min. number of good blocks in line */
 
 	unsigned int mid_thrs;		/* Threshold for GC mid list */
 	unsigned int high_thrs;		/* Threshold for GC high list */
+
+	unsigned int meta_distance;	/* Distance between data and metadata */
 };
 
 struct pblk_addr_format {
@@ -470,6 +538,13 @@ struct pblk_addr_format {
 	u8	sec_offset;
 };
 
+enum {
+	PBLK_STATE_RUNNING = 0,
+	PBLK_STATE_STOPPING = 1,
+	PBLK_STATE_RECOVERING = 2,
+	PBLK_STATE_STOPPED = 3,
+};
+
 struct pblk {
 	struct nvm_tgt_dev *dev;
 	struct gendisk *disk;
@@ -487,6 +562,8 @@ struct pblk {
 
 	struct pblk_rb rwb;
 
+	int state;			/* pblk line state */
+
 	int min_write_pgs; /* Minimum amount of pages required by controller */
 	int max_write_pgs; /* Maximum amount of pages supported by controller */
 	int pgs_in_buffer; /* Number of pages that need to be held in buffer to
@@ -499,7 +576,7 @@ struct pblk {
 	/* pblk provisioning values. Used by rate limiter */
 	struct pblk_rl rl;
 
-	struct semaphore erase_sem;
+	int sec_per_write;
 
 	unsigned char instance_uuid[16];
 #ifdef CONFIG_NVM_DEBUG
@@ -511,8 +588,8 @@ struct pblk {
 	atomic_long_t req_writes;	/* Sectors stored on write buffer */
 	atomic_long_t sub_writes;	/* Sectors submitted from buffer */
 	atomic_long_t sync_writes;	/* Sectors synced to media */
-	atomic_long_t compl_writes;	/* Sectors completed in write bio */
 	atomic_long_t inflight_reads;	/* Inflight sector read requests */
+	atomic_long_t cache_reads;	/* Read requests that hit the cache */
 	atomic_long_t sync_reads;	/* Completed sector read requests */
 	atomic_long_t recov_writes;	/* Sectors submitted from recovery */
 	atomic_long_t recov_gc_writes;	/* Sectors submitted from write GC */
@@ -528,6 +605,8 @@ struct pblk {
 	atomic_long_t write_failed;
 	atomic_long_t erase_failed;
 
+	atomic_t inflight_io;		/* General inflight I/O counter */
+
 	struct task_struct *writer_ts;
 
 	/* Simple translation map of logical addresses to physical addresses.
@@ -542,11 +621,13 @@ struct pblk {
 	mempool_t *page_pool;
 	mempool_t *line_ws_pool;
 	mempool_t *rec_pool;
-	mempool_t *r_rq_pool;
+	mempool_t *g_rq_pool;
 	mempool_t *w_rq_pool;
 	mempool_t *line_meta_pool;
 
-	struct workqueue_struct *kw_wq;
+	struct workqueue_struct *close_wq;
+	struct workqueue_struct *bb_wq;
+
 	struct timer_list wtimer;
 
 	struct pblk_gc gc;
@@ -559,7 +640,7 @@ struct pblk_line_ws {
 	struct work_struct ws;
 };
 
-#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx))
+#define pblk_g_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_g_ctx))
 #define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
 
 /*
@@ -579,18 +660,17 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
 			    struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
 			    unsigned int pos);
 struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
+void pblk_rb_flush(struct pblk_rb *rb);
 
 void pblk_rb_sync_l2p(struct pblk_rb *rb);
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
-				 struct pblk_c_ctx *c_ctx,
-				 unsigned int pos,
-				 unsigned int nr_entries,
-				 unsigned int count);
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+				 struct bio *bio, unsigned int pos,
+				 unsigned int nr_entries, unsigned int count);
 unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
 				      struct list_head *list,
 				      unsigned int max);
 int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
-			u64 pos, int bio_iter);
+			struct ppa_addr ppa, int bio_iter);
 unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
 
 unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
@@ -601,6 +681,7 @@ void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
 unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb);
 
 unsigned int pblk_rb_read_count(struct pblk_rb *rb);
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb);
 unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
 
 int pblk_rb_tear_down_check(struct pblk_rb *rb);
@@ -612,40 +693,50 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
  * pblk core
  */
 struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw);
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
 int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
 			struct pblk_c_ctx *c_ctx);
 void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw);
-void pblk_flush_writer(struct pblk *pblk);
+void pblk_wait_for_meta(struct pblk *pblk);
 struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba);
 void pblk_discard(struct pblk *pblk, struct bio *bio);
 void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
 void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
 int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
 struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
 			      unsigned int nr_secs, unsigned int len,
-			      gfp_t gfp_mask);
+			      int alloc_type, gfp_t gfp_mask);
 struct pblk_line *pblk_line_get(struct pblk *pblk);
 struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
+void pblk_line_replace_data(struct pblk *pblk);
 int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
 void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
 struct pblk_line *pblk_line_get_data(struct pblk *pblk);
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk);
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
 int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
 int pblk_line_is_full(struct pblk_line *line);
 void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
-void pblk_line_close_ws(struct work_struct *work);
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
 void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_close_meta_sync(struct pblk *pblk);
+void pblk_line_close_ws(struct work_struct *work);
+void pblk_pipeline_stop(struct pblk *pblk);
 void pblk_line_mark_bb(struct work_struct *work);
 void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
-		      void (*work)(struct work_struct *));
+		      void (*work)(struct work_struct *),
+		      struct workqueue_struct *wq);
 u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
 int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line);
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line);
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+			 void *emeta_buf);
 int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
 void pblk_line_put(struct kref *ref);
 struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line);
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
 		   unsigned long secs_to_flush);
 void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -656,11 +747,11 @@ void pblk_end_bio_sync(struct bio *bio);
 void pblk_end_io_sync(struct nvm_rq *rqd);
 int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
 		       int nr_pages);
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
-			     u64 paddr);
 void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
 			 int nr_pages);
 void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+			   u64 paddr);
 void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
 void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
 			   struct ppa_addr ppa);
@@ -702,6 +793,7 @@ void pblk_write_should_kick(struct pblk *pblk);
 /*
  * pblk read path
  */
+extern struct bio_set *pblk_bio_set;
 int pblk_submit_read(struct pblk *pblk, struct bio *bio);
 int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 			unsigned int nr_secs, unsigned int *secs_to_gc,
@@ -711,7 +803,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
  */
 void pblk_submit_rec(struct work_struct *work);
 struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
-void pblk_recov_pad(struct pblk *pblk);
+int pblk_recov_pad(struct pblk *pblk);
 __le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta);
 int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
 			struct pblk_rec_ctx *recovery, u64 *comp_bits,
@@ -720,33 +812,40 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
 /*
  * pblk gc
  */
-#define PBLK_GC_TRIES 3
+#define PBLK_GC_MAX_READERS 8	/* Max number of outstanding GC reader jobs */
+#define PBLK_GC_W_QD 128	/* Queue depth for inflight GC write I/Os */
+#define PBLK_GC_L_QD 4		/* Queue depth for inflight GC lines */
+#define PBLK_GC_RSV_LINE 1	/* Reserved lines for GC */
 
 int pblk_gc_init(struct pblk *pblk);
 void pblk_gc_exit(struct pblk *pblk);
 void pblk_gc_should_start(struct pblk *pblk);
 void pblk_gc_should_stop(struct pblk *pblk);
-int pblk_gc_status(struct pblk *pblk);
+void pblk_gc_should_kick(struct pblk *pblk);
+void pblk_gc_kick(struct pblk *pblk);
 void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
 			      int *gc_active);
-void pblk_gc_sysfs_force(struct pblk *pblk, int force);
+int pblk_gc_sysfs_force(struct pblk *pblk, int force);
 
 /*
  * pblk rate limiter
  */
 void pblk_rl_init(struct pblk_rl *rl, int budget);
 void pblk_rl_free(struct pblk_rl *rl);
-int pblk_rl_gc_thrs(struct pblk_rl *rl);
+int pblk_rl_high_thrs(struct pblk_rl *rl);
+int pblk_rl_low_thrs(struct pblk_rl *rl);
 unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
 int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries);
 void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
 int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
 void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
 void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv);
 int pblk_rl_sysfs_rate_show(struct pblk_rl *rl);
 void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
 void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
+void pblk_rl_set_space_limit(struct pblk_rl *rl, int entries_left);
+int pblk_rl_is_limit(struct pblk_rl *rl);
 
 /*
  * pblk sysfs
@@ -774,9 +873,30 @@ static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx)
 	return c_ctx - sizeof(struct nvm_rq);
 }
 
-static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta)
+static inline void *emeta_to_bb(struct line_emeta *emeta)
+{
+	return emeta->bb_bitmap;
+}
+
+static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
+{
+	return ((void *)emeta + pblk->lm.emeta_len[1]);
+}
+
+static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta)
 {
-	return (emeta) + 1;
+	return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]);
+}
+
+static inline int pblk_line_vsc(struct pblk_line *line)
+{
+	int vsc;
+
+	spin_lock(&line->lock);
+	vsc = le32_to_cpu(*line->vsc);
+	spin_unlock(&line->lock);
+
+	return vsc;
 }
 
 #define NVM_MEM_PAGE_WRITE (8)
@@ -917,6 +1037,14 @@ static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr)
 	ppa_addr->ppa = ADDR_EMPTY;
 }
 
+static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa)
+{
+	if (lppa.ppa == rppa.ppa)
+		return true;
+
+	return false;
+}
+
 static inline int pblk_addr_in_cache(struct ppa_addr ppa)
 {
 	return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
@@ -964,11 +1092,11 @@ static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr,
 }
 
 static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
-					    struct line_smeta *smeta)
+					    struct line_header *header)
 {
 	u32 crc = ~(u32)0;
 
-	crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc),
+	crc = crc32_le(crc, (unsigned char *)header + sizeof(crc),
 				sizeof(struct line_header) - sizeof(crc));
 
 	return crc;
@@ -996,7 +1124,7 @@ static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
 
 	crc = crc32_le(crc, (unsigned char *)emeta +
 				sizeof(struct line_header) + sizeof(crc),
-				lm->emeta_len -
+				lm->emeta_len[0] -
 				sizeof(struct line_header) - sizeof(crc));
 
 	return crc;
@@ -1016,9 +1144,27 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
 	return flags;
 }
 
-static inline int pblk_set_read_mode(struct pblk *pblk)
+enum {
+	PBLK_READ_RANDOM	= 0,
+	PBLK_READ_SEQUENTIAL	= 1,
+};
+
+static inline int pblk_set_read_mode(struct pblk *pblk, int type)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	int flags;
+
+	flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+	if (type == PBLK_READ_SEQUENTIAL)
+		flags |= geo->plane_mode >> 1;
+
+	return flags;
+}
+
+static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
 {
-	return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+	return !(nr_secs % pblk->min_write_pgs);
 }
 
 #ifdef CONFIG_NVM_DEBUG
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index cf0e28a0ff61..267f01ae87e4 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -279,8 +279,8 @@ static void rrpc_end_sync_bio(struct bio *bio)
 {
 	struct completion *waiting = bio->bi_private;
 
-	if (bio->bi_error)
-		pr_err("nvm: gc request failed (%u).\n", bio->bi_error);
+	if (bio->bi_status)
+		pr_err("nvm: gc request failed (%u).\n", bio->bi_status);
 
 	complete(waiting);
 }
@@ -359,7 +359,7 @@ try:
 			goto finished;
 		}
 		wait_for_completion_io(&wait);
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			rrpc_inflight_laddr_release(rrpc, rqd);
 			goto finished;
 		}
@@ -385,7 +385,7 @@ try:
 		wait_for_completion_io(&wait);
 
 		rrpc_inflight_laddr_release(rrpc, rqd);
-		if (bio->bi_error)
+		if (bio->bi_status)
 			goto finished;
 
 		bio_reset(bio);
@@ -994,7 +994,7 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
 	struct nvm_rq *rqd;
 	int err;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (bio_op(bio) == REQ_OP_DISCARD) {
 		rrpc_discard(rrpc, bio);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index c3ea03c9a1a8..dee542fff68e 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -849,10 +849,11 @@ static inline void wake_up_allocators(struct cache_set *c)
 
 /* Forward declarations */
 
-void bch_count_io_errors(struct cache *, int, const char *);
+void bch_count_io_errors(struct cache *, blk_status_t, const char *);
 void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
-			      int, const char *);
-void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
+			      blk_status_t, const char *);
+void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
+		const char *);
 void bch_bbio_free(struct bio *, struct cache_set *);
 struct bio *bch_bbio_alloc(struct cache_set *);
 
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 450d0e848ae4..866dcf78ff8e 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -307,7 +307,7 @@ static void bch_btree_node_read(struct btree *b)
 	bch_submit_bbio(bio, b->c, &b->key, 0);
 	closure_sync(&cl);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		set_btree_node_io_error(b);
 
 	bch_bbio_free(bio, b->c);
@@ -374,10 +374,10 @@ static void btree_node_write_endio(struct bio *bio)
 	struct closure *cl = bio->bi_private;
 	struct btree *b = container_of(cl, struct btree, io);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		set_btree_node_io_error(b);
 
-	bch_bbio_count_io_errors(b->c, bio, bio->bi_error, "writing btree");
+	bch_bbio_count_io_errors(b->c, bio, bio->bi_status, "writing btree");
 	closure_put(cl);
 }
 
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 9b80417cd547..73da1f5626cb 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -207,7 +207,7 @@ void bkey_put(struct cache_set *c, struct bkey *k);
 
 struct btree_op {
 	/* for waiting on btree reserve in btree_split() */
-	wait_queue_t		wait;
+	wait_queue_entry_t		wait;
 
 	/* Btree level at which we start taking write locks */
 	short			lock;
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 06f55056aaae..35a5a7210e51 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -110,7 +110,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
 	struct bio_vec bv, cbv;
 	struct bvec_iter iter, citer = { 0 };
 
-	check = bio_clone(bio, GFP_NOIO);
+	check = bio_clone_kmalloc(bio, GFP_NOIO);
 	if (!check)
 		return;
 	check->bi_opf = REQ_OP_READ;
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index db45a88c0ce9..6a9b85095e7b 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -50,7 +50,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
 
 /* IO errors */
 
-void bch_count_io_errors(struct cache *ca, int error, const char *m)
+void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
 {
 	/*
 	 * The halflife of an error is:
@@ -103,7 +103,7 @@ void bch_count_io_errors(struct cache *ca, int error, const char *m)
 }
 
 void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
-			      int error, const char *m)
+			      blk_status_t error, const char *m)
 {
 	struct bbio *b = container_of(bio, struct bbio, bio);
 	struct cache *ca = PTR_CACHE(c, &b->key, 0);
@@ -132,7 +132,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
 }
 
 void bch_bbio_endio(struct cache_set *c, struct bio *bio,
-		    int error, const char *m)
+		    blk_status_t error, const char *m)
 {
 	struct closure *cl = bio->bi_private;
 
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 1198e53d5670..0352d05e495c 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -549,7 +549,7 @@ static void journal_write_endio(struct bio *bio)
 {
 	struct journal_write *w = bio->bi_private;
 
-	cache_set_err_on(bio->bi_error, w->c, "journal io error");
+	cache_set_err_on(bio->bi_status, w->c, "journal io error");
 	closure_put(&w->c->journal.io);
 }
 
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 13b8a907006d..f633b30c962e 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -63,14 +63,14 @@ static void read_moving_endio(struct bio *bio)
 	struct moving_io *io = container_of(bio->bi_private,
 					    struct moving_io, cl);
 
-	if (bio->bi_error)
-		io->op.error = bio->bi_error;
+	if (bio->bi_status)
+		io->op.status = bio->bi_status;
 	else if (!KEY_DIRTY(&b->key) &&
 		 ptr_stale(io->op.c, &b->key, 0)) {
-		io->op.error = -EINTR;
+		io->op.status = BLK_STS_IOERR;
 	}
 
-	bch_bbio_endio(io->op.c, bio, bio->bi_error, "reading data to move");
+	bch_bbio_endio(io->op.c, bio, bio->bi_status, "reading data to move");
 }
 
 static void moving_init(struct moving_io *io)
@@ -92,7 +92,7 @@ static void write_moving(struct closure *cl)
 	struct moving_io *io = container_of(cl, struct moving_io, cl);
 	struct data_insert_op *op = &io->op;
 
-	if (!op->error) {
+	if (!op->status) {
 		moving_init(io);
 
 		io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 709c9cc34369..019b3df9f1c6 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -81,7 +81,7 @@ static void bch_data_insert_keys(struct closure *cl)
 	if (ret == -ESRCH) {
 		op->replace_collision = true;
 	} else if (ret) {
-		op->error		= -ENOMEM;
+		op->status		= BLK_STS_RESOURCE;
 		op->insert_data_done	= true;
 	}
 
@@ -178,17 +178,17 @@ static void bch_data_insert_endio(struct bio *bio)
 	struct closure *cl = bio->bi_private;
 	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		/* TODO: We could try to recover from this. */
 		if (op->writeback)
-			op->error = bio->bi_error;
+			op->status = bio->bi_status;
 		else if (!op->replace)
 			set_closure_fn(cl, bch_data_insert_error, op->wq);
 		else
 			set_closure_fn(cl, NULL, NULL);
 	}
 
-	bch_bbio_endio(op->c, bio, bio->bi_error, "writing data to cache");
+	bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
 }
 
 static void bch_data_insert_start(struct closure *cl)
@@ -488,15 +488,15 @@ static void bch_cache_read_endio(struct bio *bio)
 	 * from the backing device.
 	 */
 
-	if (bio->bi_error)
-		s->iop.error = bio->bi_error;
+	if (bio->bi_status)
+		s->iop.status = bio->bi_status;
 	else if (!KEY_DIRTY(&b->key) &&
 		 ptr_stale(s->iop.c, &b->key, 0)) {
 		atomic_long_inc(&s->iop.c->cache_read_races);
-		s->iop.error = -EINTR;
+		s->iop.status = BLK_STS_IOERR;
 	}
 
-	bch_bbio_endio(s->iop.c, bio, bio->bi_error, "reading from cache");
+	bch_bbio_endio(s->iop.c, bio, bio->bi_status, "reading from cache");
 }
 
 /*
@@ -593,9 +593,9 @@ static void request_endio(struct bio *bio)
 {
 	struct closure *cl = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		struct search *s = container_of(cl, struct search, cl);
-		s->iop.error = bio->bi_error;
+		s->iop.status = bio->bi_status;
 		/* Only cache read errors are recoverable */
 		s->recoverable = false;
 	}
@@ -611,7 +611,7 @@ static void bio_complete(struct search *s)
 				    &s->d->disk->part0, s->start_time);
 
 		trace_bcache_request_end(s->d, s->orig_bio);
-		s->orig_bio->bi_error = s->iop.error;
+		s->orig_bio->bi_status = s->iop.status;
 		bio_endio(s->orig_bio);
 		s->orig_bio = NULL;
 	}
@@ -664,7 +664,7 @@ static inline struct search *search_alloc(struct bio *bio,
 	s->iop.inode		= d->id;
 	s->iop.write_point	= hash_long((unsigned long) current, 16);
 	s->iop.write_prio	= 0;
-	s->iop.error		= 0;
+	s->iop.status		= 0;
 	s->iop.flags		= 0;
 	s->iop.flush_journal	= op_is_flush(bio->bi_opf);
 	s->iop.wq		= bcache_wq;
@@ -707,7 +707,7 @@ static void cached_dev_read_error(struct closure *cl)
 		/* Retry from the backing device: */
 		trace_bcache_read_retry(s->orig_bio);
 
-		s->iop.error = 0;
+		s->iop.status = 0;
 		do_bio_hook(s, s->orig_bio);
 
 		/* XXX: invalidate cache */
@@ -767,7 +767,7 @@ static void cached_dev_read_done_bh(struct closure *cl)
 				  !s->cache_miss, s->iop.bypass);
 	trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
 
-	if (s->iop.error)
+	if (s->iop.status)
 		continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
 	else if (s->iop.bio || verify(dc, &s->bio.bio))
 		continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 1ff36875c2b3..7689176951ce 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -10,7 +10,7 @@ struct data_insert_op {
 	unsigned		inode;
 	uint16_t		write_point;
 	uint16_t		write_prio;
-	short			error;
+	blk_status_t		status;
 
 	union {
 		uint16_t	flags;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e57353e39168..8352fad765f6 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -271,7 +271,7 @@ static void write_super_endio(struct bio *bio)
 {
 	struct cache *ca = bio->bi_private;
 
-	bch_count_io_errors(ca, bio->bi_error, "writing superblock");
+	bch_count_io_errors(ca, bio->bi_status, "writing superblock");
 	closure_put(&ca->set->sb_write);
 }
 
@@ -321,7 +321,7 @@ static void uuid_endio(struct bio *bio)
 	struct closure *cl = bio->bi_private;
 	struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
 
-	cache_set_err_on(bio->bi_error, c, "accessing uuids");
+	cache_set_err_on(bio->bi_status, c, "accessing uuids");
 	bch_bbio_free(bio, c);
 	closure_put(cl);
 }
@@ -494,7 +494,7 @@ static void prio_endio(struct bio *bio)
 {
 	struct cache *ca = bio->bi_private;
 
-	cache_set_err_on(bio->bi_error, ca->set, "accessing priorities");
+	cache_set_err_on(bio->bi_status, ca->set, "accessing priorities");
 	bch_bbio_free(bio, ca->set);
 	closure_put(&ca->prio);
 }
@@ -782,7 +782,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
 
 	minor *= BCACHE_MINORS;
 
-	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+					   BIOSET_NEED_BVECS |
+					   BIOSET_NEED_RESCUER)) ||
 	    !(d->disk = alloc_disk(BCACHE_MINORS))) {
 		ida_simple_remove(&bcache_minor, minor);
 		return -ENOMEM;
@@ -1516,7 +1518,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 				sizeof(struct bbio) + sizeof(struct bio_vec) *
 				bucket_pages(c))) ||
 	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
-	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+					   BIOSET_NEED_BVECS |
+					   BIOSET_NEED_RESCUER)) ||
 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
 						WQ_MEM_RECLAIM, 0)) ||
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 6ac2e48b9235..42c66e76f05e 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -167,7 +167,7 @@ static void dirty_endio(struct bio *bio)
 	struct keybuf_key *w = bio->bi_private;
 	struct dirty_io *io = w->private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		SET_KEY_DIRTY(&w->key, false);
 
 	closure_put(&io->cl);
@@ -195,7 +195,7 @@ static void read_dirty_endio(struct bio *bio)
 	struct dirty_io *io = w->private;
 
 	bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
-			    bio->bi_error, "reading dirty data from cache");
+			    bio->bi_status, "reading dirty data from cache");
 
 	dirty_endio(bio);
 }
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index bf7419a56454..f4eace5ea184 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -485,10 +485,10 @@ void bitmap_print_sb(struct bitmap *bitmap)
 	pr_debug("         magic: %08x\n", le32_to_cpu(sb->magic));
 	pr_debug("       version: %d\n", le32_to_cpu(sb->version));
 	pr_debug("          uuid: %08x.%08x.%08x.%08x\n",
-		 *(__u32 *)(sb->uuid+0),
-		 *(__u32 *)(sb->uuid+4),
-		 *(__u32 *)(sb->uuid+8),
-		 *(__u32 *)(sb->uuid+12));
+		 le32_to_cpu(*(__u32 *)(sb->uuid+0)),
+		 le32_to_cpu(*(__u32 *)(sb->uuid+4)),
+		 le32_to_cpu(*(__u32 *)(sb->uuid+8)),
+		 le32_to_cpu(*(__u32 *)(sb->uuid+12)));
 	pr_debug("        events: %llu\n",
 		 (unsigned long long) le64_to_cpu(sb->events));
 	pr_debug("events cleared: %llu\n",
diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index ae7da2c30a57..82d27384d31f 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c
@@ -229,7 +229,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
 EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
 
 void dm_cell_error(struct dm_bio_prison *prison,
-		   struct dm_bio_prison_cell *cell, int error)
+		   struct dm_bio_prison_cell *cell, blk_status_t error)
 {
 	struct bio_list bios;
 	struct bio *bio;
@@ -238,7 +238,7 @@ void dm_cell_error(struct dm_bio_prison *prison,
 	dm_cell_release(prison, cell, &bios);
 
 	while ((bio = bio_list_pop(&bios))) {
-		bio->bi_error = error;
+		bio->bi_status = error;
 		bio_endio(bio);
 	}
 }
diff --git a/drivers/md/dm-bio-prison-v1.h b/drivers/md/dm-bio-prison-v1.h
index cddd4ac07e2c..cec52ac5e1ae 100644
--- a/drivers/md/dm-bio-prison-v1.h
+++ b/drivers/md/dm-bio-prison-v1.h
@@ -91,7 +91,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
 			       struct dm_bio_prison_cell *cell,
 			       struct bio_list *inmates);
 void dm_cell_error(struct dm_bio_prison *prison,
-		   struct dm_bio_prison_cell *cell, int error);
+		   struct dm_bio_prison_cell *cell, blk_status_t error);
 
 /*
  * Visits the cell and then releases.  Guarantees no new inmates are
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 5db11a405129..850ff6c67994 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -145,8 +145,8 @@ struct dm_buffer {
 	enum data_mode data_mode;
 	unsigned char list_mode;		/* LIST_* */
 	unsigned hold_count;
-	int read_error;
-	int write_error;
+	blk_status_t read_error;
+	blk_status_t write_error;
 	unsigned long state;
 	unsigned long last_accessed;
 	struct dm_bufio_client *c;
@@ -218,7 +218,7 @@ static DEFINE_SPINLOCK(param_spinlock);
  * Buffers are freed after this timeout
  */
 static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
-static unsigned dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
+static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
 
 static unsigned long dm_bufio_peak_allocated;
 static unsigned long dm_bufio_allocated_kmem_cache;
@@ -555,7 +555,7 @@ static void dmio_complete(unsigned long error, void *context)
 {
 	struct dm_buffer *b = context;
 
-	b->bio.bi_error = error ? -EIO : 0;
+	b->bio.bi_status = error ? BLK_STS_IOERR : 0;
 	b->bio.bi_end_io(&b->bio);
 }
 
@@ -588,7 +588,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
 
 	r = dm_io(&io_req, 1, &region, NULL);
 	if (r) {
-		b->bio.bi_error = r;
+		b->bio.bi_status = errno_to_blk_status(r);
 		end_io(&b->bio);
 	}
 }
@@ -596,7 +596,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
 static void inline_endio(struct bio *bio)
 {
 	bio_end_io_t *end_fn = bio->bi_private;
-	int error = bio->bi_error;
+	blk_status_t status = bio->bi_status;
 
 	/*
 	 * Reset the bio to free any attached resources
@@ -604,7 +604,7 @@ static void inline_endio(struct bio *bio)
 	 */
 	bio_reset(bio);
 
-	bio->bi_error = error;
+	bio->bi_status = status;
 	end_fn(bio);
 }
 
@@ -685,11 +685,12 @@ static void write_endio(struct bio *bio)
 {
 	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
 
-	b->write_error = bio->bi_error;
-	if (unlikely(bio->bi_error)) {
+	b->write_error = bio->bi_status;
+	if (unlikely(bio->bi_status)) {
 		struct dm_bufio_client *c = b->c;
-		int error = bio->bi_error;
-		(void)cmpxchg(&c->async_write_error, 0, error);
+
+		(void)cmpxchg(&c->async_write_error, 0,
+				blk_status_to_errno(bio->bi_status));
 	}
 
 	BUG_ON(!test_bit(B_WRITING, &b->state));
@@ -1063,7 +1064,7 @@ static void read_endio(struct bio *bio)
 {
 	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
 
-	b->read_error = bio->bi_error;
+	b->read_error = bio->bi_status;
 
 	BUG_ON(!test_bit(B_READING, &b->state));
 
@@ -1107,7 +1108,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
 	wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
 
 	if (b->read_error) {
-		int error = b->read_error;
+		int error = blk_status_to_errno(b->read_error);
 
 		dm_bufio_release(b);
 
@@ -1257,7 +1258,8 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
  */
 int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
 {
-	int a, f;
+	blk_status_t a;
+	int f;
 	unsigned long buffers_processed = 0;
 	struct dm_buffer *b, *tmp;
 
@@ -1334,7 +1336,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c)
 {
 	struct dm_io_request io_req = {
 		.bi_op = REQ_OP_WRITE,
-		.bi_op_flags = REQ_PREFLUSH,
+		.bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = NULL,
 		.client = c->dm_io,
@@ -1558,10 +1560,10 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
 	return true;
 }
 
-static unsigned get_retain_buffers(struct dm_bufio_client *c)
+static unsigned long get_retain_buffers(struct dm_bufio_client *c)
 {
-        unsigned retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes);
-        return retain_bytes / c->block_size;
+        unsigned long retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes);
+        return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT);
 }
 
 static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
@@ -1571,7 +1573,7 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
 	struct dm_buffer *b, *tmp;
 	unsigned long freed = 0;
 	unsigned long count = nr_to_scan;
-	unsigned retain_target = get_retain_buffers(c);
+	unsigned long retain_target = get_retain_buffers(c);
 
 	for (l = 0; l < LIST_SIZE; l++) {
 		list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
@@ -1794,8 +1796,8 @@ static bool older_than(struct dm_buffer *b, unsigned long age_hz)
 static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
 {
 	struct dm_buffer *b, *tmp;
-	unsigned retain_target = get_retain_buffers(c);
-	unsigned count;
+	unsigned long retain_target = get_retain_buffers(c);
+	unsigned long count;
 	LIST_HEAD(write_list);
 
 	dm_bufio_lock(c);
@@ -1955,7 +1957,7 @@ MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
 module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
 
-module_param_named(retain_bytes, dm_bufio_retain_bytes, uint, S_IRUGO | S_IWUSR);
+module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");
 
 module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c
index 9b1afdfb13f0..707233891291 100644
--- a/drivers/md/dm-cache-background-tracker.c
+++ b/drivers/md/dm-cache-background-tracker.c
@@ -33,6 +33,11 @@ struct background_tracker *btracker_create(unsigned max_work)
 {
 	struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL);
 
+	if (!b) {
+		DMERR("couldn't create background_tracker");
+		return NULL;
+	}
+
 	b->max_work = max_work;
 	atomic_set(&b->pending_promotes, 0);
 	atomic_set(&b->pending_writebacks, 0);
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index 72479bd61e11..e5eb9c9b4bc8 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -1120,8 +1120,6 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
 	 * Cache entries may not be populated.  So we cannot rely on the
 	 * size of the clean queue.
 	 */
-	unsigned nr_clean;
-
 	if (idle) {
 		/*
 		 * We'd like to clean everything.
@@ -1129,18 +1127,16 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
 		return q_size(&mq->dirty) == 0u;
 	}
 
-	nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
-	return (nr_clean + btracker_nr_writebacks_queued(mq->bg_work)) >=
-		percent_to_target(mq, CLEAN_TARGET);
+	/*
+	 * If we're busy we don't worry about cleaning at all.
+	 */
+	return true;
 }
 
-static bool free_target_met(struct smq_policy *mq, bool idle)
+static bool free_target_met(struct smq_policy *mq)
 {
 	unsigned nr_free;
 
-	if (!idle)
-		return true;
-
 	nr_free = from_cblock(mq->cache_size) - mq->cache_alloc.nr_allocated;
 	return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >=
 		percent_to_target(mq, FREE_TARGET);
@@ -1190,9 +1186,9 @@ static void queue_demotion(struct smq_policy *mq)
 	if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed)))
 		return;
 
-	e = q_peek(&mq->clean, mq->clean.nr_levels, true);
+	e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true);
 	if (!e) {
-		if (!clean_target_met(mq, false))
+		if (!clean_target_met(mq, true))
 			queue_writeback(mq);
 		return;
 	}
@@ -1220,7 +1216,7 @@ static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
 		 * We always claim to be 'idle' to ensure some demotions happen
 		 * with continuous loads.
 		 */
-		if (!free_target_met(mq, true))
+		if (!free_target_met(mq))
 			queue_demotion(mq);
 		return;
 	}
@@ -1421,14 +1417,10 @@ static int smq_get_background_work(struct dm_cache_policy *p, bool idle,
 	spin_lock_irqsave(&mq->lock, flags);
 	r = btracker_issue(mq->bg_work, result);
 	if (r == -ENODATA) {
-		/* find some writeback work to do */
-		if (mq->migrations_allowed && !free_target_met(mq, idle))
-			queue_demotion(mq);
-
-		else if (!clean_target_met(mq, idle))
+		if (!clean_target_met(mq, idle)) {
 			queue_writeback(mq);
-
-		r = btracker_issue(mq->bg_work, result);
+			r = btracker_issue(mq->bg_work, result);
+		}
 	}
 	spin_unlock_irqrestore(&mq->lock, flags);
 
@@ -1452,6 +1444,7 @@ static void __complete_background_work(struct smq_policy *mq,
 		clear_pending(mq, e);
 		if (success) {
 			e->oblock = work->oblock;
+			e->level = NR_CACHE_LEVELS - 1;
 			push(mq, e);
 			// h, q, a
 		} else {
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 1db375f50a13..c5ea03fc7ee1 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -94,6 +94,9 @@ static void iot_io_begin(struct io_tracker *iot, sector_t len)
 
 static void __iot_io_end(struct io_tracker *iot, sector_t len)
 {
+	if (!len)
+		return;
+
 	iot->in_flight -= len;
 	if (!iot->in_flight)
 		iot->idle_time = jiffies;
@@ -116,7 +119,7 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
  */
 struct continuation {
 	struct work_struct ws;
-	int input;
+	blk_status_t input;
 };
 
 static inline void init_continuation(struct continuation *k,
@@ -142,7 +145,7 @@ struct batcher {
 	/*
 	 * The operation that everyone is waiting for.
 	 */
-	int (*commit_op)(void *context);
+	blk_status_t (*commit_op)(void *context);
 	void *commit_context;
 
 	/*
@@ -168,8 +171,7 @@ struct batcher {
 static void __commit(struct work_struct *_ws)
 {
 	struct batcher *b = container_of(_ws, struct batcher, commit_work);
-
-	int r;
+	blk_status_t r;
 	unsigned long flags;
 	struct list_head work_items;
 	struct work_struct *ws, *tmp;
@@ -202,7 +204,7 @@ static void __commit(struct work_struct *_ws)
 
 	while ((bio = bio_list_pop(&bios))) {
 		if (r) {
-			bio->bi_error = r;
+			bio->bi_status = r;
 			bio_endio(bio);
 		} else
 			b->issue_op(bio, b->issue_context);
@@ -210,7 +212,7 @@ static void __commit(struct work_struct *_ws)
 }
 
 static void batcher_init(struct batcher *b,
-			 int (*commit_op)(void *),
+			 blk_status_t (*commit_op)(void *),
 			 void *commit_context,
 			 void (*issue_op)(struct bio *bio, void *),
 			 void *issue_context,
@@ -474,7 +476,7 @@ struct cache {
 	spinlock_t invalidation_lock;
 	struct list_head invalidation_requests;
 
-	struct io_tracker origin_tracker;
+	struct io_tracker tracker;
 
 	struct work_struct commit_ws;
 	struct batcher committer;
@@ -901,8 +903,7 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
 
 static bool accountable_bio(struct cache *cache, struct bio *bio)
 {
-	return ((bio->bi_bdev == cache->origin_dev->bdev) &&
-		bio_op(bio) != REQ_OP_DISCARD);
+	return bio_op(bio) != REQ_OP_DISCARD;
 }
 
 static void accounted_begin(struct cache *cache, struct bio *bio)
@@ -912,7 +913,7 @@ static void accounted_begin(struct cache *cache, struct bio *bio)
 
 	if (accountable_bio(cache, bio)) {
 		pb->len = bio_sectors(bio);
-		iot_io_begin(&cache->origin_tracker, pb->len);
+		iot_io_begin(&cache->tracker, pb->len);
 	}
 }
 
@@ -921,7 +922,7 @@ static void accounted_complete(struct cache *cache, struct bio *bio)
 	size_t pb_data_size = get_per_bio_data_size(cache);
 	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
 
-	iot_io_end(&cache->origin_tracker, pb->len);
+	iot_io_end(&cache->tracker, pb->len);
 }
 
 static void accounted_request(struct cache *cache, struct bio *bio)
@@ -953,7 +954,7 @@ static void writethrough_endio(struct bio *bio)
 
 	dm_unhook_bio(&pb->hook_info, bio);
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		bio_endio(bio);
 		return;
 	}
@@ -1218,7 +1219,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
 	struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
 
 	if (read_err || write_err)
-		mg->k.input = -EIO;
+		mg->k.input = BLK_STS_IOERR;
 
 	queue_continuation(mg->cache->wq, &mg->k);
 }
@@ -1264,8 +1265,8 @@ static void overwrite_endio(struct bio *bio)
 
 	dm_unhook_bio(&pb->hook_info, bio);
 
-	if (bio->bi_error)
-		mg->k.input = bio->bi_error;
+	if (bio->bi_status)
+		mg->k.input = bio->bi_status;
 
 	queue_continuation(mg->cache->wq, &mg->k);
 }
@@ -1321,8 +1322,10 @@ static void mg_complete(struct dm_cache_migration *mg, bool success)
 		if (mg->overwrite_bio) {
 			if (success)
 				force_set_dirty(cache, cblock);
+			else if (mg->k.input)
+				mg->overwrite_bio->bi_status = mg->k.input;
 			else
-				mg->overwrite_bio->bi_error = (mg->k.input ? : -EIO);
+				mg->overwrite_bio->bi_status = BLK_STS_IOERR;
 			bio_endio(mg->overwrite_bio);
 		} else {
 			if (success)
@@ -1502,7 +1505,7 @@ static void mg_copy(struct work_struct *ws)
 		r = copy(mg, is_policy_promote);
 		if (r) {
 			DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
-			mg->k.input = -EIO;
+			mg->k.input = BLK_STS_IOERR;
 			mg_complete(mg, false);
 		}
 	}
@@ -1716,20 +1719,19 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
 
 enum busy {
 	IDLE,
-	MODERATE,
 	BUSY
 };
 
 static enum busy spare_migration_bandwidth(struct cache *cache)
 {
-	bool idle = iot_idle_for(&cache->origin_tracker, HZ);
+	bool idle = iot_idle_for(&cache->tracker, HZ);
 	sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
 		cache->sectors_per_block;
 
-	if (current_volume <= cache->migration_threshold)
-		return idle ? IDLE : MODERATE;
+	if (idle && current_volume <= cache->migration_threshold)
+		return IDLE;
 	else
-		return idle ? MODERATE : BUSY;
+		return BUSY;
 }
 
 static void inc_hit_counter(struct cache *cache, struct bio *bio)
@@ -1906,12 +1908,12 @@ static int commit(struct cache *cache, bool clean_shutdown)
 /*
  * Used by the batcher.
  */
-static int commit_op(void *context)
+static blk_status_t commit_op(void *context)
 {
 	struct cache *cache = context;
 
 	if (dm_cache_changed_this_transaction(cache->cmd))
-		return commit(cache, false);
+		return errno_to_blk_status(commit(cache, false));
 
 	return 0;
 }
@@ -2017,7 +2019,7 @@ static void requeue_deferred_bios(struct cache *cache)
 	bio_list_init(&cache->deferred_bios);
 
 	while ((bio = bio_list_pop(&bios))) {
-		bio->bi_error = DM_ENDIO_REQUEUE;
+		bio->bi_status = BLK_STS_DM_REQUEUE;
 		bio_endio(bio);
 	}
 }
@@ -2045,8 +2047,6 @@ static void check_migrations(struct work_struct *ws)
 
 	for (;;) {
 		b = spare_migration_bandwidth(cache);
-		if (b == BUSY)
-			break;
 
 		r = policy_get_background_work(cache->policy, b == IDLE, &op);
 		if (r == -ENODATA)
@@ -2717,7 +2717,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 
 	batcher_init(&cache->committer, commit_op, cache,
 		     issue_op, cache, cache->wq);
-	iot_init(&cache->origin_tracker);
+	iot_init(&cache->tracker);
 
 	init_rwsem(&cache->background_work_lock);
 	prevent_background_work(cache);
@@ -2821,7 +2821,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 	return r;
 }
 
-static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int cache_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct cache *cache = ti->private;
 	unsigned long flags;
@@ -2839,7 +2840,7 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
 	bio_drop_shared_lock(cache, bio);
 	accounted_complete(cache, bio);
 
-	return 0;
+	return DM_ENDIO_DONE;
 }
 
 static int write_dirty_bitset(struct cache *cache)
@@ -2941,7 +2942,7 @@ static void cache_postsuspend(struct dm_target *ti)
 
 	cancel_delayed_work(&cache->waker);
 	flush_workqueue(cache->wq);
-	WARN_ON(cache->origin_tracker.in_flight);
+	WARN_ON(cache->tracker.in_flight);
 
 	/*
 	 * If it's a flush suspend there won't be any deferred bios, so this
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index ebf9e72d479b..9e1b72e8f7ef 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -71,7 +71,7 @@ struct dm_crypt_io {
 	struct convert_context ctx;
 
 	atomic_t io_pending;
-	int error;
+	blk_status_t error;
 	sector_t sector;
 
 	struct rb_node rb_node;
@@ -1292,7 +1292,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
 /*
  * Encrypt / decrypt data from one bio to another one (can be the same one)
  */
-static int crypt_convert(struct crypt_config *cc,
+static blk_status_t crypt_convert(struct crypt_config *cc,
 			 struct convert_context *ctx)
 {
 	unsigned int tag_offset = 0;
@@ -1343,13 +1343,13 @@ static int crypt_convert(struct crypt_config *cc,
 		 */
 		case -EBADMSG:
 			atomic_dec(&ctx->cc_pending);
-			return -EILSEQ;
+			return BLK_STS_PROTECTION;
 		/*
 		 * There was an error while processing the request.
 		 */
 		default:
 			atomic_dec(&ctx->cc_pending);
-			return -EIO;
+			return BLK_STS_IOERR;
 		}
 	}
 
@@ -1463,7 +1463,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
 {
 	struct crypt_config *cc = io->cc;
 	struct bio *base_bio = io->base_bio;
-	int error = io->error;
+	blk_status_t error = io->error;
 
 	if (!atomic_dec_and_test(&io->io_pending))
 		return;
@@ -1476,7 +1476,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
 	else
 		kfree(io->integrity_metadata);
 
-	base_bio->bi_error = error;
+	base_bio->bi_status = error;
 	bio_endio(base_bio);
 }
 
@@ -1502,7 +1502,7 @@ static void crypt_endio(struct bio *clone)
 	struct dm_crypt_io *io = clone->bi_private;
 	struct crypt_config *cc = io->cc;
 	unsigned rw = bio_data_dir(clone);
-	int error;
+	blk_status_t error;
 
 	/*
 	 * free the processed pages
@@ -1510,7 +1510,7 @@ static void crypt_endio(struct bio *clone)
 	if (rw == WRITE)
 		crypt_free_buffer_pages(cc, clone);
 
-	error = clone->bi_error;
+	error = clone->bi_status;
 	bio_put(clone);
 
 	if (rw == READ && !error) {
@@ -1570,7 +1570,7 @@ static void kcryptd_io_read_work(struct work_struct *work)
 
 	crypt_inc_pending(io);
 	if (kcryptd_io_read(io, GFP_NOIO))
-		io->error = -ENOMEM;
+		io->error = BLK_STS_RESOURCE;
 	crypt_dec_pending(io);
 }
 
@@ -1656,7 +1656,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
 	sector_t sector;
 	struct rb_node **rbp, *parent;
 
-	if (unlikely(io->error < 0)) {
+	if (unlikely(io->error)) {
 		crypt_free_buffer_pages(cc, clone);
 		bio_put(clone);
 		crypt_dec_pending(io);
@@ -1697,7 +1697,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 	struct bio *clone;
 	int crypt_finished;
 	sector_t sector = io->sector;
-	int r;
+	blk_status_t r;
 
 	/*
 	 * Prevent io from disappearing until this function completes.
@@ -1707,7 +1707,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 
 	clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
 	if (unlikely(!clone)) {
-		io->error = -EIO;
+		io->error = BLK_STS_IOERR;
 		goto dec;
 	}
 
@@ -1718,7 +1718,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 
 	crypt_inc_pending(io);
 	r = crypt_convert(cc, &io->ctx);
-	if (r < 0)
+	if (r)
 		io->error = r;
 	crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
 
@@ -1740,7 +1740,7 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
 static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
 {
 	struct crypt_config *cc = io->cc;
-	int r = 0;
+	blk_status_t r;
 
 	crypt_inc_pending(io);
 
@@ -1748,7 +1748,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
 			   io->sector);
 
 	r = crypt_convert(cc, &io->ctx);
-	if (r < 0)
+	if (r)
 		io->error = r;
 
 	if (atomic_dec_and_test(&io->ctx.cc_pending))
@@ -1781,9 +1781,9 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
 	if (error == -EBADMSG) {
 		DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu",
 			    (unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)));
-		io->error = -EILSEQ;
+		io->error = BLK_STS_PROTECTION;
 	} else if (error < 0)
-		io->error = -EIO;
+		io->error = BLK_STS_IOERR;
 
 	crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
 
@@ -2677,7 +2677,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	cc->bs = bioset_create(MIN_IOS, 0);
+	cc->bs = bioset_create(MIN_IOS, 0, (BIOSET_NEED_BVECS |
+					    BIOSET_NEED_RESCUER));
 	if (!cc->bs) {
 		ti->error = "Cannot allocate crypt bioset";
 		goto bad;
@@ -2795,10 +2796,10 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
 	 * and is aligned to this size as defined in IO hints.
 	 */
 	if (unlikely((bio->bi_iter.bi_sector & ((cc->sector_size >> SECTOR_SHIFT) - 1)) != 0))
-		return -EIO;
+		return DM_MAPIO_KILL;
 
 	if (unlikely(bio->bi_iter.bi_size & (cc->sector_size - 1)))
-		return -EIO;
+		return DM_MAPIO_KILL;
 
 	io = dm_per_bio_data(bio, cc->per_bio_data_size);
 	crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 13305a182611..3d04d5ce19d9 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -321,7 +321,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
 		if (bio_data_dir(bio) == READ) {
 			if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) &&
 			    !test_bit(ERROR_WRITES, &fc->flags))
-				return -EIO;
+				return DM_MAPIO_KILL;
 			goto map_bio;
 		}
 
@@ -349,7 +349,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
 		/*
 		 * By default, error all I/O.
 		 */
-		return -EIO;
+		return DM_MAPIO_KILL;
 	}
 
 map_bio:
@@ -358,12 +358,13 @@ map_bio:
 	return DM_MAPIO_REMAPPED;
 }
 
-static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int flakey_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct flakey_c *fc = ti->private;
 	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
 
-	if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
+	if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
 		if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
 		    all_corrupt_bio_flags_match(bio, fc)) {
 			/*
@@ -377,11 +378,11 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
 			 * Error read during the down_interval if drop_writes
 			 * and error_writes were not configured.
 			 */
-			return -EIO;
+			*error = BLK_STS_IOERR;
 		}
 	}
 
-	return error;
+	return DM_ENDIO_DONE;
 }
 
 static void flakey_status(struct dm_target *ti, status_type_t type,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index c7f7c8d76576..1b224aa9cf15 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -246,7 +246,7 @@ struct dm_integrity_io {
 	unsigned metadata_offset;
 
 	atomic_t in_flight;
-	int bi_error;
+	blk_status_t bi_status;
 
 	struct completion *completion;
 
@@ -783,7 +783,8 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi
 			for (i = 0; i < commit_sections; i++)
 				rw_section_mac(ic, commit_start + i, true);
 		}
-		rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, commit_sections, &io_comp);
+		rw_journal(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, commit_start,
+			   commit_sections, &io_comp);
 	} else {
 		unsigned to_end;
 		io_comp.in_flight = (atomic_t)ATOMIC_INIT(2);
@@ -1104,18 +1105,21 @@ static void schedule_autocommit(struct dm_integrity_c *ic)
 static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
 {
 	struct bio *bio;
-	spin_lock_irq(&ic->endio_wait.lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ic->endio_wait.lock, flags);
 	bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
 	bio_list_add(&ic->flush_bio_list, bio);
-	spin_unlock_irq(&ic->endio_wait.lock);
+	spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
+
 	queue_work(ic->commit_wq, &ic->commit_work);
 }
 
 static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
 {
 	int r = dm_integrity_failed(ic);
-	if (unlikely(r) && !bio->bi_error)
-		bio->bi_error = r;
+	if (unlikely(r) && !bio->bi_status)
+		bio->bi_status = errno_to_blk_status(r);
 	bio_endio(bio);
 }
 
@@ -1123,7 +1127,7 @@ static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *di
 {
 	struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
 
-	if (unlikely(dio->fua) && likely(!bio->bi_error) && likely(!dm_integrity_failed(ic)))
+	if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
 		submit_flush_bio(ic, dio);
 	else
 		do_endio(ic, bio);
@@ -1142,9 +1146,9 @@ static void dec_in_flight(struct dm_integrity_io *dio)
 
 		bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
 
-		if (unlikely(dio->bi_error) && !bio->bi_error)
-			bio->bi_error = dio->bi_error;
-		if (likely(!bio->bi_error) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
+		if (unlikely(dio->bi_status) && !bio->bi_status)
+			bio->bi_status = dio->bi_status;
+		if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
 			dio->range.logical_sector += dio->range.n_sectors;
 			bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
 			INIT_WORK(&dio->work, integrity_bio_wait);
@@ -1318,7 +1322,7 @@ skip_io:
 	dec_in_flight(dio);
 	return;
 error:
-	dio->bi_error = r;
+	dio->bi_status = errno_to_blk_status(r);
 	dec_in_flight(dio);
 }
 
@@ -1331,7 +1335,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
 	sector_t area, offset;
 
 	dio->ic = ic;
-	dio->bi_error = 0;
+	dio->bi_status = 0;
 
 	if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
 		submit_flush_bio(ic, dio);
@@ -1352,13 +1356,13 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
 		DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
 		      (unsigned long long)dio->range.logical_sector, bio_sectors(bio),
 		      (unsigned long long)ic->provided_data_sectors);
-		return -EIO;
+		return DM_MAPIO_KILL;
 	}
 	if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
 		DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
 		      ic->sectors_per_block,
 		      (unsigned long long)dio->range.logical_sector, bio_sectors(bio));
-		return -EIO;
+		return DM_MAPIO_KILL;
 	}
 
 	if (ic->sectors_per_block > 1) {
@@ -1368,7 +1372,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
 			if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
 				DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
 					bv.bv_offset, bv.bv_len, ic->sectors_per_block);
-				return -EIO;
+				return DM_MAPIO_KILL;
 			}
 		}
 	}
@@ -1383,18 +1387,18 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
 				wanted_tag_size *= ic->tag_size;
 			if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
 				DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
-				return -EIO;
+				return DM_MAPIO_KILL;
 			}
 		}
 	} else {
 		if (unlikely(bip != NULL)) {
 			DMERR("Unexpected integrity data when using internal hash");
-			return -EIO;
+			return DM_MAPIO_KILL;
 		}
 	}
 
 	if (unlikely(ic->mode == 'R') && unlikely(dio->write))
-		return -EIO;
+		return DM_MAPIO_KILL;
 
 	get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
 	dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
@@ -2374,21 +2378,6 @@ static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic)
 	blk_queue_max_integrity_segments(disk->queue, UINT_MAX);
 }
 
-/* FIXME: use new kvmalloc */
-static void *dm_integrity_kvmalloc(size_t size, gfp_t gfp)
-{
-	void *ptr = NULL;
-
-	if (size <= PAGE_SIZE)
-		ptr = kmalloc(size, GFP_KERNEL | gfp);
-	if (!ptr && size <= KMALLOC_MAX_SIZE)
-		ptr = kmalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | gfp);
-	if (!ptr)
-		ptr = __vmalloc(size, GFP_KERNEL | gfp, PAGE_KERNEL);
-
-	return ptr;
-}
-
 static void dm_integrity_free_page_list(struct dm_integrity_c *ic, struct page_list *pl)
 {
 	unsigned i;
@@ -2407,7 +2396,7 @@ static struct page_list *dm_integrity_alloc_page_list(struct dm_integrity_c *ic)
 	struct page_list *pl;
 	unsigned i;
 
-	pl = dm_integrity_kvmalloc(page_list_desc_size, __GFP_ZERO);
+	pl = kvmalloc(page_list_desc_size, GFP_KERNEL | __GFP_ZERO);
 	if (!pl)
 		return NULL;
 
@@ -2437,7 +2426,7 @@ static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_int
 	struct scatterlist **sl;
 	unsigned i;
 
-	sl = dm_integrity_kvmalloc(ic->journal_sections * sizeof(struct scatterlist *), __GFP_ZERO);
+	sl = kvmalloc(ic->journal_sections * sizeof(struct scatterlist *), GFP_KERNEL | __GFP_ZERO);
 	if (!sl)
 		return NULL;
 
@@ -2453,7 +2442,7 @@ static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_int
 
 		n_pages = (end_index - start_index + 1);
 
-		s = dm_integrity_kvmalloc(n_pages * sizeof(struct scatterlist), 0);
+		s = kvmalloc(n_pages * sizeof(struct scatterlist), GFP_KERNEL);
 		if (!s) {
 			dm_integrity_free_journal_scatterlist(ic, sl);
 			return NULL;
@@ -2617,7 +2606,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
 				goto bad;
 			}
 
-			sg = dm_integrity_kvmalloc((ic->journal_pages + 1) * sizeof(struct scatterlist), 0);
+			sg = kvmalloc((ic->journal_pages + 1) * sizeof(struct scatterlist), GFP_KERNEL);
 			if (!sg) {
 				*error = "Unable to allocate sg list";
 				r = -ENOMEM;
@@ -2673,7 +2662,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
 				r = -ENOMEM;
 				goto bad;
 			}
-			ic->sk_requests = dm_integrity_kvmalloc(ic->journal_sections * sizeof(struct skcipher_request *), __GFP_ZERO);
+			ic->sk_requests = kvmalloc(ic->journal_sections * sizeof(struct skcipher_request *), GFP_KERNEL | __GFP_ZERO);
 			if (!ic->sk_requests) {
 				*error = "Unable to allocate sk requests";
 				r = -ENOMEM;
@@ -2740,7 +2729,7 @@ retest_commit_id:
 		r = -ENOMEM;
 		goto bad;
 	}
-	ic->journal_tree = dm_integrity_kvmalloc(journal_tree_size, 0);
+	ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL);
 	if (!ic->journal_tree) {
 		*error = "Could not allocate memory for journal tree";
 		r = -ENOMEM;
@@ -3054,6 +3043,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		ti->error = "The device is too small";
 		goto bad;
 	}
+	if (ti->len > ic->provided_data_sectors) {
+		r = -EINVAL;
+		ti->error = "Not enough provided sectors for requested mapping size";
+		goto bad;
+	}
 
 	if (!buffer_sectors)
 		buffer_sectors = 1;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 3702e502466d..25039607f3cb 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -58,7 +58,8 @@ struct dm_io_client *dm_io_client_create(void)
 	if (!client->pool)
 		goto bad;
 
-	client->bios = bioset_create(min_ios, 0);
+	client->bios = bioset_create(min_ios, 0, (BIOSET_NEED_BVECS |
+						  BIOSET_NEED_RESCUER));
 	if (!client->bios)
 		goto bad;
 
@@ -124,7 +125,7 @@ static void complete_io(struct io *io)
 	fn(error_bits, context);
 }
 
-static void dec_count(struct io *io, unsigned int region, int error)
+static void dec_count(struct io *io, unsigned int region, blk_status_t error)
 {
 	if (error)
 		set_bit(region, &io->error_bits);
@@ -137,9 +138,9 @@ static void endio(struct bio *bio)
 {
 	struct io *io;
 	unsigned region;
-	int error;
+	blk_status_t error;
 
-	if (bio->bi_error && bio_data_dir(bio) == READ)
+	if (bio->bi_status && bio_data_dir(bio) == READ)
 		zero_fill_bio(bio);
 
 	/*
@@ -147,7 +148,7 @@ static void endio(struct bio *bio)
 	 */
 	retrieve_io_and_region_from_bio(bio, &io, &region);
 
-	error = bio->bi_error;
+	error = bio->bi_status;
 	bio_put(bio);
 
 	dec_count(io, region, error);
@@ -317,9 +318,9 @@ static void do_region(int op, int op_flags, unsigned region,
 	else if (op == REQ_OP_WRITE_SAME)
 		special_cmd_max_sectors = q->limits.max_write_same_sectors;
 	if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
-	     op == REQ_OP_WRITE_SAME)  &&
-	    special_cmd_max_sectors == 0) {
-		dec_count(io, region, -EOPNOTSUPP);
+	     op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) {
+		atomic_inc(&io->count);
+		dec_count(io, region, BLK_STS_NOTSUPP);
 		return;
 	}
 
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 0555b4410e05..41852ae287a5 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1710,12 +1710,13 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
 	}
 
 	/*
-	 * Try to avoid low memory issues when a device is suspended.
+	 * Use __GFP_HIGH to avoid low memory issues when a device is
+	 * suspended and the ioctl is needed to resume it.
 	 * Use kmalloc() rather than vmalloc() when we can.
 	 */
 	dmi = NULL;
 	noio_flag = memalloc_noio_save();
-	dmi = kvmalloc(param_kernel->data_size, GFP_KERNEL);
+	dmi = kvmalloc(param_kernel->data_size, GFP_KERNEL | __GFP_HIGH);
 	memalloc_noio_restore(noio_flag);
 
 	if (!dmi) {
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 4dfe38655a49..a1da0eb58a93 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -150,10 +150,10 @@ static void log_end_io(struct bio *bio)
 {
 	struct log_writes_c *lc = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		unsigned long flags;
 
-		DMERR("Error writing log block, error=%d", bio->bi_error);
+		DMERR("Error writing log block, error=%d", bio->bi_status);
 		spin_lock_irqsave(&lc->blocks_lock, flags);
 		lc->logging_enabled = false;
 		spin_unlock_irqrestore(&lc->blocks_lock, flags);
@@ -586,7 +586,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
 		spin_lock_irq(&lc->blocks_lock);
 		lc->logging_enabled = false;
 		spin_unlock_irq(&lc->blocks_lock);
-		return -ENOMEM;
+		return DM_MAPIO_KILL;
 	}
 	INIT_LIST_HEAD(&block->list);
 	pb->block = block;
@@ -639,7 +639,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
 			spin_lock_irq(&lc->blocks_lock);
 			lc->logging_enabled = false;
 			spin_unlock_irq(&lc->blocks_lock);
-			return -ENOMEM;
+			return DM_MAPIO_KILL;
 		}
 
 		src = kmap_atomic(bv.bv_page);
@@ -664,7 +664,8 @@ map_bio:
 	return DM_MAPIO_REMAPPED;
 }
 
-static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int normal_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct log_writes_c *lc = ti->private;
 	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
@@ -686,7 +687,7 @@ static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
 		spin_unlock_irqrestore(&lc->blocks_lock, flags);
 	}
 
-	return error;
+	return DM_ENDIO_DONE;
 }
 
 /*
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 926a6bcb32c8..0e8ab5bb3575 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -447,7 +447,7 @@ failed:
  * it has been invoked.
  */
 #define dm_report_EIO(m)						\
-({									\
+do {									\
 	struct mapped_device *md = dm_table_get_md((m)->ti->table);	\
 									\
 	pr_debug("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d\n", \
@@ -455,8 +455,7 @@ failed:
 		 test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags),	\
 		 test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags),	\
 		 dm_noflush_suspending((m)->ti));			\
-	-EIO;								\
-})
+} while (0)
 
 /*
  * Map cloned requests (request-based multipath)
@@ -481,7 +480,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
 	if (!pgpath) {
 		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
 			return DM_MAPIO_DELAY_REQUEUE;
-		return dm_report_EIO(m);	/* Failed */
+		dm_report_EIO(m);	/* Failed */
+		return DM_MAPIO_KILL;
 	} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
 		   test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
 		if (pg_init_all_paths(m))
@@ -558,13 +558,14 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
 	if (!pgpath) {
 		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
 			return DM_MAPIO_REQUEUE;
-		return dm_report_EIO(m);
+		dm_report_EIO(m);
+		return DM_MAPIO_KILL;
 	}
 
 	mpio->pgpath = pgpath;
 	mpio->nr_bytes = nr_bytes;
 
-	bio->bi_error = 0;
+	bio->bi_status = 0;
 	bio->bi_bdev = pgpath->path.dev->bdev;
 	bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
 
@@ -620,11 +621,19 @@ static void process_queued_bios(struct work_struct *work)
 	blk_start_plug(&plug);
 	while ((bio = bio_list_pop(&bios))) {
 		r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
-		if (r < 0 || r == DM_MAPIO_REQUEUE) {
-			bio->bi_error = r;
+		switch (r) {
+		case DM_MAPIO_KILL:
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
-		} else if (r == DM_MAPIO_REMAPPED)
+			break;
+		case DM_MAPIO_REQUEUE:
+			bio->bi_status = BLK_STS_DM_REQUEUE;
+			bio_endio(bio);
+			break;
+		case DM_MAPIO_REMAPPED:
 			generic_make_request(bio);
+			break;
+		}
 	}
 	blk_finish_plug(&plug);
 }
@@ -1441,22 +1450,15 @@ static void activate_path_work(struct work_struct *work)
 	activate_or_offline_path(pgpath);
 }
 
-static int noretry_error(int error)
+static int noretry_error(blk_status_t error)
 {
 	switch (error) {
-	case -EBADE:
-		/*
-		 * EBADE signals an reservation conflict.
-		 * We shouldn't fail the path here as we can communicate with
-		 * the target.  We should failover to the next path, but in
-		 * doing so we might be causing a ping-pong between paths.
-		 * So just return the reservation conflict error.
-		 */
-	case -EOPNOTSUPP:
-	case -EREMOTEIO:
-	case -EILSEQ:
-	case -ENODATA:
-	case -ENOSPC:
+	case BLK_STS_NOTSUPP:
+	case BLK_STS_NOSPC:
+	case BLK_STS_TARGET:
+	case BLK_STS_NEXUS:
+	case BLK_STS_MEDIUM:
+	case BLK_STS_RESOURCE:
 		return 1;
 	}
 
@@ -1465,7 +1467,7 @@ static int noretry_error(int error)
 }
 
 static int multipath_end_io(struct dm_target *ti, struct request *clone,
-			    int error, union map_info *map_context)
+			    blk_status_t error, union map_info *map_context)
 {
 	struct dm_mpath_io *mpio = get_mpio(map_context);
 	struct pgpath *pgpath = mpio->pgpath;
@@ -1492,8 +1494,8 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 
 		if (atomic_read(&m->nr_valid_paths) == 0 &&
 		    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
-			if (error == -EIO)
-				error = dm_report_EIO(m);
+			if (error == BLK_STS_IOERR)
+				dm_report_EIO(m);
 			/* complete with the original error */
 			r = DM_ENDIO_DONE;
 		}
@@ -1509,23 +1511,27 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 	return r;
 }
 
-static int do_end_io_bio(struct multipath *m, struct bio *clone,
-			 int error, struct dm_mpath_io *mpio)
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
+		blk_status_t *error)
 {
+	struct multipath *m = ti->private;
+	struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
+	struct pgpath *pgpath = mpio->pgpath;
 	unsigned long flags;
+	int r = DM_ENDIO_DONE;
 
-	if (!error)
-		return 0;	/* I/O complete */
-
-	if (noretry_error(error))
-		return error;
+	if (!*error || noretry_error(*error))
+		goto done;
 
-	if (mpio->pgpath)
-		fail_path(mpio->pgpath);
+	if (pgpath)
+		fail_path(pgpath);
 
 	if (atomic_read(&m->nr_valid_paths) == 0 &&
-	    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
-		return dm_report_EIO(m);
+	    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
+		dm_report_EIO(m);
+		*error = BLK_STS_IOERR;
+		goto done;
+	}
 
 	/* Queue for the daemon to resubmit */
 	dm_bio_restore(get_bio_details_from_bio(clone), clone);
@@ -1536,23 +1542,11 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
 	if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
 		queue_work(kmultipathd, &m->process_queued_bios);
 
-	return DM_ENDIO_INCOMPLETE;
-}
-
-static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
-{
-	struct multipath *m = ti->private;
-	struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
-	struct pgpath *pgpath;
-	struct path_selector *ps;
-	int r;
-
-	BUG_ON(!mpio);
-
-	r = do_end_io_bio(m, clone, error, mpio);
-	pgpath = mpio->pgpath;
+	r = DM_ENDIO_INCOMPLETE;
+done:
 	if (pgpath) {
-		ps = &pgpath->pg->ps;
+		struct path_selector *ps = &pgpath->pg->ps;
+
 		if (ps->type->end_io)
 			ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
 	}
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 7d893228c40f..b4b75dad816a 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1927,7 +1927,7 @@ struct dm_raid_superblock {
 	/********************************************************************
 	 * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
 	 *
-	 * FEATURE_FLAG_SUPPORTS_V190 in the features member indicates that those exist
+	 * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
 	 */
 
 	__le32 flags; /* Flags defining array states for reshaping */
@@ -2092,6 +2092,11 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
 	sb->layout = cpu_to_le32(mddev->layout);
 	sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
 
+	/********************************************************************
+	 * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
+	 *
+	 * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
+	 */
 	sb->new_level = cpu_to_le32(mddev->new_level);
 	sb->new_layout = cpu_to_le32(mddev->new_layout);
 	sb->new_stripe_sectors = cpu_to_le32(mddev->new_chunk_sectors);
@@ -2438,8 +2443,14 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
 	mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
 
 	if (!test_and_clear_bit(FirstUse, &rdev->flags)) {
-		/* Retrieve device size stored in superblock to be prepared for shrink */
-		rdev->sectors = le64_to_cpu(sb->sectors);
+		/*
+		 * Retrieve rdev size stored in superblock to be prepared for shrink.
+		 * Check extended superblock members are present otherwise the size
+		 * will not be set!
+		 */
+		if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190)
+			rdev->sectors = le64_to_cpu(sb->sectors);
+
 		rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
 		if (rdev->recovery_offset == MaxSector)
 			set_bit(In_sync, &rdev->flags);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index a95cbb80fb34..a4fbd911d566 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -145,6 +145,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list)
 
 struct dm_raid1_bio_record {
 	struct mirror *m;
+	/* if details->bi_bdev == NULL, details were not saved */
 	struct dm_bio_details details;
 	region_t write_region;
 };
@@ -260,7 +261,7 @@ static int mirror_flush(struct dm_target *ti)
 	struct mirror *m;
 	struct dm_io_request io_req = {
 		.bi_op = REQ_OP_WRITE,
-		.bi_op_flags = REQ_PREFLUSH,
+		.bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = NULL,
 		.client = ms->io_client,
@@ -490,9 +491,9 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio)
 		 * If device is suspended, complete the bio.
 		 */
 		if (dm_noflush_suspending(ms->ti))
-			bio->bi_error = DM_ENDIO_REQUEUE;
+			bio->bi_status = BLK_STS_DM_REQUEUE;
 		else
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 
 		bio_endio(bio);
 		return;
@@ -626,7 +627,7 @@ static void write_callback(unsigned long error, void *context)
 	 * degrade the array.
 	 */
 	if (bio_op(bio) == REQ_OP_DISCARD) {
-		bio->bi_error = -EOPNOTSUPP;
+		bio->bi_status = BLK_STS_NOTSUPP;
 		bio_endio(bio);
 		return;
 	}
@@ -1198,6 +1199,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 	struct dm_raid1_bio_record *bio_record =
 	  dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
 
+	bio_record->details.bi_bdev = NULL;
+
 	if (rw == WRITE) {
 		/* Save region for mirror_end_io() handler */
 		bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio);
@@ -1207,14 +1210,14 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 
 	r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0);
 	if (r < 0 && r != -EWOULDBLOCK)
-		return r;
+		return DM_MAPIO_KILL;
 
 	/*
 	 * If region is not in-sync queue the bio.
 	 */
 	if (!r || (r == -EWOULDBLOCK)) {
 		if (bio->bi_opf & REQ_RAHEAD)
-			return -EWOULDBLOCK;
+			return DM_MAPIO_KILL;
 
 		queue_bio(ms, bio, rw);
 		return DM_MAPIO_SUBMITTED;
@@ -1226,7 +1229,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 	 */
 	m = choose_mirror(ms, bio->bi_iter.bi_sector);
 	if (unlikely(!m))
-		return -EIO;
+		return DM_MAPIO_KILL;
 
 	dm_bio_record(&bio_record->details, bio);
 	bio_record->m = m;
@@ -1236,7 +1239,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }
 
-static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int mirror_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	int rw = bio_data_dir(bio);
 	struct mirror_set *ms = (struct mirror_set *) ti->private;
@@ -1252,16 +1256,26 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 		if (!(bio->bi_opf & REQ_PREFLUSH) &&
 		    bio_op(bio) != REQ_OP_DISCARD)
 			dm_rh_dec(ms->rh, bio_record->write_region);
-		return error;
+		return DM_ENDIO_DONE;
 	}
 
-	if (error == -EOPNOTSUPP)
-		return error;
+	if (*error == BLK_STS_NOTSUPP)
+		goto out;
+
+	if (bio->bi_opf & REQ_RAHEAD)
+		goto out;
 
-	if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
-		return error;
+	if (unlikely(*error)) {
+		if (!bio_record->details.bi_bdev) {
+			/*
+			 * There wasn't enough memory to record necessary
+			 * information for a retry or there was no other
+			 * mirror in-sync.
+			 */
+			DMERR_LIMIT("Mirror read failed.");
+			return DM_ENDIO_DONE;
+		}
 
-	if (unlikely(error)) {
 		m = bio_record->m;
 
 		DMERR("Mirror read failed from %s. Trying alternative device.",
@@ -1277,7 +1291,8 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 			bd = &bio_record->details;
 
 			dm_bio_restore(bd, bio);
-			bio->bi_error = 0;
+			bio_record->details.bi_bdev = NULL;
+			bio->bi_status = 0;
 
 			queue_bio(ms, bio, rw);
 			return DM_ENDIO_INCOMPLETE;
@@ -1285,7 +1300,10 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 		DMERR("All replicated volumes dead, failing I/O");
 	}
 
-	return error;
+out:
+	bio_record->details.bi_bdev = NULL;
+
+	return DM_ENDIO_DONE;
 }
 
 static void mirror_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 2af27026aa2e..c6ebc5b1e00e 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -71,7 +71,7 @@ static void dm_old_start_queue(struct request_queue *q)
 
 static void dm_mq_start_queue(struct request_queue *q)
 {
-	blk_mq_start_stopped_hw_queues(q, true);
+	blk_mq_unquiesce_queue(q);
 	blk_mq_kick_requeue_list(q);
 }
 
@@ -119,7 +119,7 @@ static void end_clone_bio(struct bio *clone)
 	struct dm_rq_target_io *tio = info->tio;
 	struct bio *bio = info->orig;
 	unsigned int nr_bytes = info->orig->bi_iter.bi_size;
-	int error = clone->bi_error;
+	blk_status_t error = clone->bi_status;
 
 	bio_put(clone);
 
@@ -158,7 +158,7 @@ static void end_clone_bio(struct bio *clone)
 	 * Do not use blk_end_request() here, because it may complete
 	 * the original request before the clone, and break the ordering.
 	 */
-	blk_update_request(tio->orig, 0, nr_bytes);
+	blk_update_request(tio->orig, BLK_STS_OK, nr_bytes);
 }
 
 static struct dm_rq_target_io *tio_from_request(struct request *rq)
@@ -216,7 +216,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
  * Must be called without clone's queue lock held,
  * see end_clone_request() for more details.
  */
-static void dm_end_request(struct request *clone, int error)
+static void dm_end_request(struct request *clone, blk_status_t error)
 {
 	int rw = rq_data_dir(clone);
 	struct dm_rq_target_io *tio = clone->end_io_data;
@@ -285,7 +285,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
 	rq_completed(md, rw, false);
 }
 
-static void dm_done(struct request *clone, int error, bool mapped)
+static void dm_done(struct request *clone, blk_status_t error, bool mapped)
 {
 	int r = DM_ENDIO_DONE;
 	struct dm_rq_target_io *tio = clone->end_io_data;
@@ -298,7 +298,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
 			r = rq_end_io(tio->ti, clone, error, &tio->info);
 	}
 
-	if (unlikely(error == -EREMOTEIO)) {
+	if (unlikely(error == BLK_STS_TARGET)) {
 		if (req_op(clone) == REQ_OP_WRITE_SAME &&
 		    !clone->q->limits.max_write_same_sectors)
 			disable_write_same(tio->md);
@@ -358,7 +358,7 @@ static void dm_softirq_done(struct request *rq)
  * Complete the clone and the original request with the error status
  * through softirq context.
  */
-static void dm_complete_request(struct request *rq, int error)
+static void dm_complete_request(struct request *rq, blk_status_t error)
 {
 	struct dm_rq_target_io *tio = tio_from_request(rq);
 
@@ -375,7 +375,7 @@ static void dm_complete_request(struct request *rq, int error)
  * Target's rq_end_io() function isn't called.
  * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
  */
-static void dm_kill_unmapped_request(struct request *rq, int error)
+static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
 {
 	rq->rq_flags |= RQF_FAILED;
 	dm_complete_request(rq, error);
@@ -384,7 +384,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
 /*
  * Called with the clone's queue lock held (in the case of .request_fn)
  */
-static void end_clone_request(struct request *clone, int error)
+static void end_clone_request(struct request *clone, blk_status_t error)
 {
 	struct dm_rq_target_io *tio = clone->end_io_data;
 
@@ -401,7 +401,7 @@ static void end_clone_request(struct request *clone, int error)
 
 static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
 {
-	int r;
+	blk_status_t r;
 
 	if (blk_queue_io_stat(clone->q))
 		clone->rq_flags |= RQF_IO_STAT;
@@ -506,7 +506,8 @@ static int map_request(struct dm_rq_target_io *tio)
 		break;
 	case DM_MAPIO_KILL:
 		/* The target wants to complete the I/O */
-		dm_kill_unmapped_request(rq, -EIO);
+		dm_kill_unmapped_request(rq, BLK_STS_IOERR);
+		break;
 	default:
 		DMWARN("unimplemented target map return value: %d", r);
 		BUG();
@@ -726,7 +727,7 @@ static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
 	return __dm_rq_init_rq(set->driver_data, rq);
 }
 
-static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 			  const struct blk_mq_queue_data *bd)
 {
 	struct request *rq = bd->rq;
@@ -743,7 +744,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 	}
 
 	if (ti->type->busy && ti->type->busy(ti))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	dm_start_request(md, rq);
 
@@ -761,10 +762,10 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 		rq_end_stats(md, rq);
 		rq_completed(md, rq_data_dir(rq), false);
 		blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static const struct blk_mq_ops dm_mq_ops = {
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index f0020d21b95f..9813922e4fe5 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -24,7 +24,7 @@ struct dm_rq_target_io {
 	struct dm_target *ti;
 	struct request *orig, *clone;
 	struct kthread_work work;
-	int error;
+	blk_status_t error;
 	union map_info info;
 	struct dm_stats_aux stats_aux;
 	unsigned long duration_jiffies;
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index b93476c3ba3f..c5534d294773 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -741,7 +741,8 @@ static void persistent_commit_exception(struct dm_exception_store *store,
 	/*
 	 * Commit exceptions to disk.
 	 */
-	if (ps->valid && area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA))
+	if (ps->valid && area_io(ps, REQ_OP_WRITE,
+				 REQ_PREFLUSH | REQ_FUA | REQ_SYNC))
 		ps->valid = 0;
 
 	/*
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index e152d9817c81..1ba41048b438 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1590,7 +1590,7 @@ static void full_bio_end_io(struct bio *bio)
 {
 	void *callback_data = bio->bi_private;
 
-	dm_kcopyd_do_callback(callback_data, 0, bio->bi_error ? 1 : 0);
+	dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
 }
 
 static void start_full_bio(struct dm_snap_pending_exception *pe,
@@ -1690,7 +1690,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
 	/* Full snapshots are not usable */
 	/* To get here the table must be live so s->active is always set. */
 	if (!s->valid)
-		return -EIO;
+		return DM_MAPIO_KILL;
 
 	/* FIXME: should only take write lock if we need
 	 * to copy an exception */
@@ -1698,7 +1698,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
 
 	if (!s->valid || (unlikely(s->snapshot_overflowed) &&
 	    bio_data_dir(bio) == WRITE)) {
-		r = -EIO;
+		r = DM_MAPIO_KILL;
 		goto out_unlock;
 	}
 
@@ -1723,7 +1723,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
 
 			if (!s->valid || s->snapshot_overflowed) {
 				free_pending_exception(pe);
-				r = -EIO;
+				r = DM_MAPIO_KILL;
 				goto out_unlock;
 			}
 
@@ -1741,7 +1741,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
 					DMERR("Snapshot overflowed: Unable to allocate exception.");
 				} else
 					__invalidate_snapshot(s, -ENOMEM);
-				r = -EIO;
+				r = DM_MAPIO_KILL;
 				goto out_unlock;
 			}
 		}
@@ -1851,14 +1851,15 @@ out_unlock:
 	return r;
 }
 
-static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct dm_snapshot *s = ti->private;
 
 	if (is_bio_tracked(bio))
 		stop_tracking_chunk(s, bio);
 
-	return 0;
+	return DM_ENDIO_DONE;
 }
 
 static void snapshot_merge_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 75152482f3ad..11621a0af887 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -375,20 +375,21 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
 	}
 }
 
-static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int stripe_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	unsigned i;
 	char major_minor[16];
 	struct stripe_c *sc = ti->private;
 
-	if (!error)
-		return 0; /* I/O complete */
+	if (!*error)
+		return DM_ENDIO_DONE; /* I/O complete */
 
-	if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
-		return error;
+	if (bio->bi_opf & REQ_RAHEAD)
+		return DM_ENDIO_DONE;
 
-	if (error == -EOPNOTSUPP)
-		return error;
+	if (*error == BLK_STS_NOTSUPP)
+		return DM_ENDIO_DONE;
 
 	memset(major_minor, 0, sizeof(major_minor));
 	sprintf(major_minor, "%d:%d",
@@ -409,7 +410,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
 				schedule_work(&sc->trigger_event);
 		}
 
-	return error;
+	return DM_ENDIO_DONE;
 }
 
 static int stripe_iterate_devices(struct dm_target *ti,
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index b242b750542f..c0d7e60820c4 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -128,7 +128,7 @@ static void io_err_dtr(struct dm_target *tt)
 
 static int io_err_map(struct dm_target *tt, struct bio *bio)
 {
-	return -EIO;
+	return DM_MAPIO_KILL;
 }
 
 static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 0f0251d0d337..d31d18d9727c 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -484,11 +484,11 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd)
 	if (r < 0)
 		return r;
 
-	r = save_sm_roots(pmd);
+	r = dm_tm_pre_commit(pmd->tm);
 	if (r < 0)
 		return r;
 
-	r = dm_tm_pre_commit(pmd->tm);
+	r = save_sm_roots(pmd);
 	if (r < 0)
 		return r;
 
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 17ad50daed08..9dec2f8cc739 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -383,8 +383,8 @@ static void end_discard(struct discard_op *op, int r)
 	 * Even if r is set, there could be sub discards in flight that we
 	 * need to wait for.
 	 */
-	if (r && !op->parent_bio->bi_error)
-		op->parent_bio->bi_error = r;
+	if (r && !op->parent_bio->bi_status)
+		op->parent_bio->bi_status = errno_to_blk_status(r);
 	bio_endio(op->parent_bio);
 }
 
@@ -450,22 +450,20 @@ static void cell_release_no_holder(struct pool *pool,
 }
 
 static void cell_error_with_code(struct pool *pool,
-				 struct dm_bio_prison_cell *cell, int error_code)
+		struct dm_bio_prison_cell *cell, blk_status_t error_code)
 {
 	dm_cell_error(pool->prison, cell, error_code);
 	dm_bio_prison_free_cell(pool->prison, cell);
 }
 
-static int get_pool_io_error_code(struct pool *pool)
+static blk_status_t get_pool_io_error_code(struct pool *pool)
 {
-	return pool->out_of_data_space ? -ENOSPC : -EIO;
+	return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
 }
 
 static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
 {
-	int error = get_pool_io_error_code(pool);
-
-	cell_error_with_code(pool, cell, error);
+	cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
 }
 
 static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
@@ -475,7 +473,7 @@ static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
 
 static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
 {
-	cell_error_with_code(pool, cell, DM_ENDIO_REQUEUE);
+	cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
 }
 
 /*----------------------------------------------------------------*/
@@ -555,17 +553,18 @@ static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
 	bio_list_init(master);
 }
 
-static void error_bio_list(struct bio_list *bios, int error)
+static void error_bio_list(struct bio_list *bios, blk_status_t error)
 {
 	struct bio *bio;
 
 	while ((bio = bio_list_pop(bios))) {
-		bio->bi_error = error;
+		bio->bi_status = error;
 		bio_endio(bio);
 	}
 }
 
-static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error)
+static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
+		blk_status_t error)
 {
 	struct bio_list bios;
 	unsigned long flags;
@@ -608,11 +607,11 @@ static void requeue_io(struct thin_c *tc)
 	__merge_bio_list(&bios, &tc->retry_on_resume_list);
 	spin_unlock_irqrestore(&tc->lock, flags);
 
-	error_bio_list(&bios, DM_ENDIO_REQUEUE);
+	error_bio_list(&bios, BLK_STS_DM_REQUEUE);
 	requeue_deferred_cells(tc);
 }
 
-static void error_retry_list_with_code(struct pool *pool, int error)
+static void error_retry_list_with_code(struct pool *pool, blk_status_t error)
 {
 	struct thin_c *tc;
 
@@ -624,9 +623,7 @@ static void error_retry_list_with_code(struct pool *pool, int error)
 
 static void error_retry_list(struct pool *pool)
 {
-	int error = get_pool_io_error_code(pool);
-
-	error_retry_list_with_code(pool, error);
+	error_retry_list_with_code(pool, get_pool_io_error_code(pool));
 }
 
 /*
@@ -774,7 +771,7 @@ struct dm_thin_new_mapping {
 	 */
 	atomic_t prepare_actions;
 
-	int err;
+	blk_status_t status;
 	struct thin_c *tc;
 	dm_block_t virt_begin, virt_end;
 	dm_block_t data_block;
@@ -814,7 +811,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
 {
 	struct dm_thin_new_mapping *m = context;
 
-	m->err = read_err || write_err ? -EIO : 0;
+	m->status = read_err || write_err ? BLK_STS_IOERR : 0;
 	complete_mapping_preparation(m);
 }
 
@@ -825,7 +822,7 @@ static void overwrite_endio(struct bio *bio)
 
 	bio->bi_end_io = m->saved_bi_end_io;
 
-	m->err = bio->bi_error;
+	m->status = bio->bi_status;
 	complete_mapping_preparation(m);
 }
 
@@ -925,7 +922,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
 	struct bio *bio = m->bio;
 	int r;
 
-	if (m->err) {
+	if (m->status) {
 		cell_error(pool, m->cell);
 		goto out;
 	}
@@ -1094,6 +1091,19 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
 		return;
 	}
 
+	/*
+	 * Increment the unmapped blocks.  This prevents a race between the
+	 * passdown io and reallocation of freed blocks.
+	 */
+	r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
+	if (r) {
+		metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
+		bio_io_error(m->bio);
+		cell_defer_no_holder(tc, m->cell);
+		mempool_free(m, pool->mapping_pool);
+		return;
+	}
+
 	discard_parent = bio_alloc(GFP_NOIO, 1);
 	if (!discard_parent) {
 		DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
@@ -1114,19 +1124,6 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
 			end_discard(&op, r);
 		}
 	}
-
-	/*
-	 * Increment the unmapped blocks.  This prevents a race between the
-	 * passdown io and reallocation of freed blocks.
-	 */
-	r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
-	if (r) {
-		metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
-		bio_io_error(m->bio);
-		cell_defer_no_holder(tc, m->cell);
-		mempool_free(m, pool->mapping_pool);
-		return;
-	}
 }
 
 static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
@@ -1495,7 +1492,7 @@ static void retry_on_resume(struct bio *bio)
 	spin_unlock_irqrestore(&tc->lock, flags);
 }
 
-static int should_error_unserviceable_bio(struct pool *pool)
+static blk_status_t should_error_unserviceable_bio(struct pool *pool)
 {
 	enum pool_mode m = get_pool_mode(pool);
 
@@ -1503,27 +1500,27 @@ static int should_error_unserviceable_bio(struct pool *pool)
 	case PM_WRITE:
 		/* Shouldn't get here */
 		DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	case PM_OUT_OF_DATA_SPACE:
-		return pool->pf.error_if_no_space ? -ENOSPC : 0;
+		return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
 
 	case PM_READ_ONLY:
 	case PM_FAIL:
-		return -EIO;
+		return BLK_STS_IOERR;
 	default:
 		/* Shouldn't get here */
 		DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
-		return -EIO;
+		return BLK_STS_IOERR;
 	}
 }
 
 static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
 {
-	int error = should_error_unserviceable_bio(pool);
+	blk_status_t error = should_error_unserviceable_bio(pool);
 
 	if (error) {
-		bio->bi_error = error;
+		bio->bi_status = error;
 		bio_endio(bio);
 	} else
 		retry_on_resume(bio);
@@ -1533,7 +1530,7 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
 {
 	struct bio *bio;
 	struct bio_list bios;
-	int error;
+	blk_status_t error;
 
 	error = should_error_unserviceable_bio(pool);
 	if (error) {
@@ -2071,7 +2068,8 @@ static void process_thin_deferred_bios(struct thin_c *tc)
 	unsigned count = 0;
 
 	if (tc->requeue_mode) {
-		error_thin_bio_list(tc, &tc->deferred_bio_list, DM_ENDIO_REQUEUE);
+		error_thin_bio_list(tc, &tc->deferred_bio_list,
+				BLK_STS_DM_REQUEUE);
 		return;
 	}
 
@@ -2322,7 +2320,7 @@ static void do_no_space_timeout(struct work_struct *ws)
 	if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
 		pool->pf.error_if_no_space = true;
 		notify_of_pool_mode_change_to_oods(pool);
-		error_retry_list_with_code(pool, -ENOSPC);
+		error_retry_list_with_code(pool, BLK_STS_NOSPC);
 	}
 }
 
@@ -2624,7 +2622,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
 	thin_hook_bio(tc, bio);
 
 	if (tc->requeue_mode) {
-		bio->bi_error = DM_ENDIO_REQUEUE;
+		bio->bi_status = BLK_STS_DM_REQUEUE;
 		bio_endio(bio);
 		return DM_MAPIO_SUBMITTED;
 	}
@@ -4177,7 +4175,8 @@ static int thin_map(struct dm_target *ti, struct bio *bio)
 	return thin_bio_map(ti, bio);
 }
 
-static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
+static int thin_endio(struct dm_target *ti, struct bio *bio,
+		blk_status_t *err)
 {
 	unsigned long flags;
 	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -4212,7 +4211,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
 	if (h->cell)
 		cell_defer_no_holder(h->tc, h->cell);
 
-	return 0;
+	return DM_ENDIO_DONE;
 }
 
 static void thin_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 97de961a3bfc..b46705ebf01f 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -166,7 +166,7 @@ static int verity_hash_init(struct dm_verity *v, struct ahash_request *req,
 		return r;
 	}
 
-	if (likely(v->version >= 1))
+	if (likely(v->salt_size && (v->version >= 1)))
 		r = verity_hash_update(v, req, v->salt, v->salt_size, res);
 
 	return r;
@@ -177,7 +177,7 @@ static int verity_hash_final(struct dm_verity *v, struct ahash_request *req,
 {
 	int r;
 
-	if (unlikely(!v->version)) {
+	if (unlikely(v->salt_size && (!v->version))) {
 		r = verity_hash_update(v, req, v->salt, v->salt_size, res);
 
 		if (r < 0) {
@@ -538,13 +538,13 @@ static int verity_verify_io(struct dm_verity_io *io)
 /*
  * End one "io" structure with a given error.
  */
-static void verity_finish_io(struct dm_verity_io *io, int error)
+static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
 {
 	struct dm_verity *v = io->v;
 	struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
 
 	bio->bi_end_io = io->orig_bi_end_io;
-	bio->bi_error = error;
+	bio->bi_status = status;
 
 	verity_fec_finish_io(io);
 
@@ -555,15 +555,15 @@ static void verity_work(struct work_struct *w)
 {
 	struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
 
-	verity_finish_io(io, verity_verify_io(io));
+	verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
 }
 
 static void verity_end_io(struct bio *bio)
 {
 	struct dm_verity_io *io = bio->bi_private;
 
-	if (bio->bi_error && !verity_fec_is_enabled(io->v)) {
-		verity_finish_io(io, bio->bi_error);
+	if (bio->bi_status && !verity_fec_is_enabled(io->v)) {
+		verity_finish_io(io, bio->bi_status);
 		return;
 	}
 
@@ -643,17 +643,17 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
 	if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
 	    ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
 		DMERR_LIMIT("unaligned io");
-		return -EIO;
+		return DM_MAPIO_KILL;
 	}
 
 	if (bio_end_sector(bio) >>
 	    (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
 		DMERR_LIMIT("io out of range");
-		return -EIO;
+		return DM_MAPIO_KILL;
 	}
 
 	if (bio_data_dir(bio) == WRITE)
-		return -EIO;
+		return DM_MAPIO_KILL;
 
 	io = dm_per_bio_data(bio, ti->per_io_data_size);
 	io->v = v;
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index b616f11d8473..b65ca8dcfbdc 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -39,7 +39,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
 	case REQ_OP_READ:
 		if (bio->bi_opf & REQ_RAHEAD) {
 			/* readahead of null bytes only wastes buffer cache */
-			return -EIO;
+			return DM_MAPIO_KILL;
 		}
 		zero_fill_bio(bio);
 		break;
@@ -47,7 +47,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
 		/* writes get silently dropped */
 		break;
 	default:
-		return -EIO;
+		return DM_MAPIO_KILL;
 	}
 
 	bio_endio(bio);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6ef9500226c0..402946035308 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -63,7 +63,7 @@ static struct workqueue_struct *deferred_remove_workqueue;
  */
 struct dm_io {
 	struct mapped_device *md;
-	int error;
+	blk_status_t status;
 	atomic_t io_count;
 	struct bio *bio;
 	unsigned long start_time;
@@ -768,23 +768,24 @@ static int __noflush_suspending(struct mapped_device *md)
  * Decrements the number of outstanding ios that a bio has been
  * cloned into, completing the original io if necc.
  */
-static void dec_pending(struct dm_io *io, int error)
+static void dec_pending(struct dm_io *io, blk_status_t error)
 {
 	unsigned long flags;
-	int io_error;
+	blk_status_t io_error;
 	struct bio *bio;
 	struct mapped_device *md = io->md;
 
 	/* Push-back supersedes any I/O errors */
 	if (unlikely(error)) {
 		spin_lock_irqsave(&io->endio_lock, flags);
-		if (!(io->error > 0 && __noflush_suspending(md)))
-			io->error = error;
+		if (!(io->status == BLK_STS_DM_REQUEUE &&
+				__noflush_suspending(md)))
+			io->status = error;
 		spin_unlock_irqrestore(&io->endio_lock, flags);
 	}
 
 	if (atomic_dec_and_test(&io->io_count)) {
-		if (io->error == DM_ENDIO_REQUEUE) {
+		if (io->status == BLK_STS_DM_REQUEUE) {
 			/*
 			 * Target requested pushing back the I/O.
 			 */
@@ -793,16 +794,16 @@ static void dec_pending(struct dm_io *io, int error)
 				bio_list_add_head(&md->deferred, io->bio);
 			else
 				/* noflush suspend was interrupted. */
-				io->error = -EIO;
+				io->status = BLK_STS_IOERR;
 			spin_unlock_irqrestore(&md->deferred_lock, flags);
 		}
 
-		io_error = io->error;
+		io_error = io->status;
 		bio = io->bio;
 		end_io_acct(io);
 		free_io(md, io);
 
-		if (io_error == DM_ENDIO_REQUEUE)
+		if (io_error == BLK_STS_DM_REQUEUE)
 			return;
 
 		if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
@@ -814,7 +815,7 @@ static void dec_pending(struct dm_io *io, int error)
 			queue_io(md, bio);
 		} else {
 			/* done with normal IO or empty flush */
-			bio->bi_error = io_error;
+			bio->bi_status = io_error;
 			bio_endio(bio);
 		}
 	}
@@ -838,31 +839,13 @@ void disable_write_zeroes(struct mapped_device *md)
 
 static void clone_endio(struct bio *bio)
 {
-	int error = bio->bi_error;
-	int r = error;
+	blk_status_t error = bio->bi_status;
 	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
 	struct dm_io *io = tio->io;
 	struct mapped_device *md = tio->io->md;
 	dm_endio_fn endio = tio->ti->type->end_io;
 
-	if (endio) {
-		r = endio(tio->ti, bio, error);
-		if (r < 0 || r == DM_ENDIO_REQUEUE)
-			/*
-			 * error and requeue request are handled
-			 * in dec_pending().
-			 */
-			error = r;
-		else if (r == DM_ENDIO_INCOMPLETE)
-			/* The target will handle the io */
-			return;
-		else if (r) {
-			DMWARN("unimplemented target endio return value: %d", r);
-			BUG();
-		}
-	}
-
-	if (unlikely(r == -EREMOTEIO)) {
+	if (unlikely(error == BLK_STS_TARGET)) {
 		if (bio_op(bio) == REQ_OP_WRITE_SAME &&
 		    !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
 			disable_write_same(md);
@@ -871,6 +854,23 @@ static void clone_endio(struct bio *bio)
 			disable_write_zeroes(md);
 	}
 
+	if (endio) {
+		int r = endio(tio->ti, bio, &error);
+		switch (r) {
+		case DM_ENDIO_REQUEUE:
+			error = BLK_STS_DM_REQUEUE;
+			/*FALLTHRU*/
+		case DM_ENDIO_DONE:
+			break;
+		case DM_ENDIO_INCOMPLETE:
+			/* The target will handle the io */
+			return;
+		default:
+			DMWARN("unimplemented target endio return value: %d", r);
+			BUG();
+		}
+	}
+
 	free_tio(tio);
 	dec_pending(io, error);
 }
@@ -1036,7 +1036,8 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
 
 		while ((bio = bio_list_pop(&list))) {
 			struct bio_set *bs = bio->bi_pool;
-			if (unlikely(!bs) || bs == fs_bio_set) {
+			if (unlikely(!bs) || bs == fs_bio_set ||
+			    !bs->rescue_workqueue) {
 				bio_list_add(&current->bio_list[i], bio);
 				continue;
 			}
@@ -1084,18 +1085,24 @@ static void __map_bio(struct dm_target_io *tio)
 	r = ti->type->map(ti, clone);
 	dm_offload_end(&o);
 
-	if (r == DM_MAPIO_REMAPPED) {
+	switch (r) {
+	case DM_MAPIO_SUBMITTED:
+		break;
+	case DM_MAPIO_REMAPPED:
 		/* the bio has been remapped so dispatch it */
-
 		trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
 				      tio->io->bio->bi_bdev->bd_dev, sector);
-
 		generic_make_request(clone);
-	} else if (r < 0 || r == DM_MAPIO_REQUEUE) {
-		/* error the io and bail out, or requeue it if needed */
-		dec_pending(tio->io, r);
+		break;
+	case DM_MAPIO_KILL:
+		dec_pending(tio->io, BLK_STS_IOERR);
+		free_tio(tio);
+		break;
+	case DM_MAPIO_REQUEUE:
+		dec_pending(tio->io, BLK_STS_DM_REQUEUE);
 		free_tio(tio);
-	} else if (r != DM_MAPIO_SUBMITTED) {
+		break;
+	default:
 		DMWARN("unimplemented target map return value: %d", r);
 		BUG();
 	}
@@ -1360,7 +1367,7 @@ static void __split_and_process_bio(struct mapped_device *md,
 	ci.map = map;
 	ci.md = md;
 	ci.io = alloc_io(md);
-	ci.io->error = 0;
+	ci.io->status = 0;
 	atomic_set(&ci.io->io_count, 1);
 	ci.io->bio = bio;
 	ci.io->md = md;
@@ -1527,7 +1534,6 @@ void dm_init_normal_md_queue(struct mapped_device *md)
 	 * Initialize aspects of queue that aren't relevant for blk-mq
 	 */
 	md->queue->backing_dev_info->congested_fn = dm_any_congested;
-	blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
 }
 
 static void cleanup_mapped_device(struct mapped_device *md)
@@ -1657,7 +1663,7 @@ static struct mapped_device *alloc_dev(int minor)
 
 	bio_init(&md->flush_bio, NULL, 0);
 	md->flush_bio.bi_bdev = md->bdev;
-	md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+	md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
 
 	dm_stats_init(&md->stats);
 
@@ -2654,7 +2660,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
 		BUG();
 	}
 
-	pools->bs = bioset_create_nobvec(pool_size, front_pad);
+	pools->bs = bioset_create(pool_size, front_pad, BIOSET_NEED_RESCUER);
 	if (!pools->bs)
 		goto out;
 
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 7299ce2f08a8..03082e17c65c 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -1311,8 +1311,10 @@ static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev)
 	cmsg.raid_slot = cpu_to_le32(rdev->desc_nr);
 	lock_comm(cinfo, 1);
 	ret = __sendmsg(cinfo, &cmsg);
-	if (ret)
+	if (ret) {
+		unlock_comm(cinfo);
 		return ret;
+	}
 	cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE;
 	ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX);
 	cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 82f798be964f..31bcbfb09fef 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -185,7 +185,7 @@ static int start_readonly;
 static bool create_on_open = true;
 
 /* bio_clone_mddev
- * like bio_clone, but with a local bio set
+ * like bio_clone_bioset, but with a local bio set
  */
 
 struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
@@ -265,7 +265,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 	unsigned int sectors;
 	int cpu;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (mddev == NULL || mddev->pers == NULL) {
 		bio_io_error(bio);
@@ -273,7 +273,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 	}
 	if (mddev->ro == 1 && unlikely(rw == WRITE)) {
 		if (bio_sectors(bio) != 0)
-			bio->bi_error = -EROFS;
+			bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		return BLK_QC_T_NONE;
 	}
@@ -719,8 +719,8 @@ static void super_written(struct bio *bio)
 	struct md_rdev *rdev = bio->bi_private;
 	struct mddev *mddev = rdev->mddev;
 
-	if (bio->bi_error) {
-		pr_err("md: super_written gets error=%d\n", bio->bi_error);
+	if (bio->bi_status) {
+		pr_err("md: super_written gets error=%d\n", bio->bi_status);
 		md_error(mddev, rdev);
 		if (!test_bit(Faulty, &rdev->flags)
 		    && (bio->bi_opf & MD_FAILFAST)) {
@@ -765,7 +765,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
 	    test_bit(FailFast, &rdev->flags) &&
 	    !test_bit(LastDev, &rdev->flags))
 		ff = MD_FAILFAST;
-	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA | ff;
+	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
 
 	atomic_inc(&mddev->pending_writes);
 	submit_bio(bio);
@@ -801,7 +801,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
 
 	submit_bio_wait(bio);
 
-	ret = !bio->bi_error;
+	ret = !bio->bi_status;
 	bio_put(bio);
 	return ret;
 }
@@ -825,7 +825,7 @@ fail:
 	return -EINVAL;
 }
 
-static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
+static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
 {
 	return	sb1->set_uuid0 == sb2->set_uuid0 &&
 		sb1->set_uuid1 == sb2->set_uuid1 &&
@@ -833,7 +833,7 @@ static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
 		sb1->set_uuid3 == sb2->set_uuid3;
 }
 
-static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
+static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
 {
 	int ret;
 	mdp_super_t *tmp1, *tmp2;
@@ -1025,12 +1025,12 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
 	} else {
 		__u64 ev1, ev2;
 		mdp_super_t *refsb = page_address(refdev->sb_page);
-		if (!uuid_equal(refsb, sb)) {
+		if (!md_uuid_equal(refsb, sb)) {
 			pr_warn("md: %s has different UUID to %s\n",
 				b, bdevname(refdev->bdev,b2));
 			goto abort;
 		}
-		if (!sb_equal(refsb, sb)) {
+		if (!md_sb_equal(refsb, sb)) {
 			pr_warn("md: %s has same UUID but different superblock to %s\n",
 				b, bdevname(refdev->bdev, b2));
 			goto abort;
@@ -5174,6 +5174,18 @@ static void mddev_delayed_delete(struct work_struct *ws)
 
 static void no_op(struct percpu_ref *r) {}
 
+int mddev_init_writes_pending(struct mddev *mddev)
+{
+	if (mddev->writes_pending.percpu_count_ptr)
+		return 0;
+	if (percpu_ref_init(&mddev->writes_pending, no_op, 0, GFP_KERNEL) < 0)
+		return -ENOMEM;
+	/* We want to start with the refcount at zero */
+	percpu_ref_put(&mddev->writes_pending);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
+
 static int md_alloc(dev_t dev, char *name)
 {
 	/*
@@ -5239,10 +5251,6 @@ static int md_alloc(dev_t dev, char *name)
 	blk_queue_make_request(mddev->queue, md_make_request);
 	blk_set_stacking_limits(&mddev->queue->limits);
 
-	if (percpu_ref_init(&mddev->writes_pending, no_op, 0, GFP_KERNEL) < 0)
-		goto abort;
-	/* We want to start with the refcount at zero */
-	percpu_ref_put(&mddev->writes_pending);
 	disk = alloc_disk(1 << shift);
 	if (!disk) {
 		blk_cleanup_queue(mddev->queue);
@@ -5420,7 +5428,7 @@ int md_run(struct mddev *mddev)
 	}
 
 	if (mddev->bio_set == NULL) {
-		mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
+		mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 		if (!mddev->bio_set)
 			return -ENOMEM;
 	}
@@ -8022,18 +8030,15 @@ EXPORT_SYMBOL(md_write_end);
  * may proceed without blocking.  It is important to call this before
  * attempting a GFP_KERNEL allocation while holding the mddev lock.
  * Must be called with mddev_lock held.
- *
- * In the ->external case MD_SB_CHANGE_PENDING can not be cleared until mddev->lock
- * is dropped, so return -EAGAIN after notifying userspace.
  */
-int md_allow_write(struct mddev *mddev)
+void md_allow_write(struct mddev *mddev)
 {
 	if (!mddev->pers)
-		return 0;
+		return;
 	if (mddev->ro)
-		return 0;
+		return;
 	if (!mddev->pers->sync_request)
-		return 0;
+		return;
 
 	spin_lock(&mddev->lock);
 	if (mddev->in_sync) {
@@ -8046,13 +8051,12 @@ int md_allow_write(struct mddev *mddev)
 		spin_unlock(&mddev->lock);
 		md_update_sb(mddev, 0);
 		sysfs_notify_dirent_safe(mddev->sysfs_state);
+		/* wait for the dirty state to be recorded in the metadata */
+		wait_event(mddev->sb_wait,
+			   !test_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags) &&
+			   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
 	} else
 		spin_unlock(&mddev->lock);
-
-	if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
-		return -EAGAIN;
-	else
-		return 0;
 }
 EXPORT_SYMBOL_GPL(md_allow_write);
 
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 4e75d121bfcc..0fa1de42c42b 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -648,6 +648,7 @@ extern void md_unregister_thread(struct md_thread **threadp);
 extern void md_wakeup_thread(struct md_thread *thread);
 extern void md_check_recovery(struct mddev *mddev);
 extern void md_reap_sync_thread(struct mddev *mddev);
+extern int mddev_init_writes_pending(struct mddev *mddev);
 extern void md_write_start(struct mddev *mddev, struct bio *bi);
 extern void md_write_inc(struct mddev *mddev, struct bio *bi);
 extern void md_write_end(struct mddev *mddev);
@@ -665,7 +666,7 @@ extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
 			bool metadata_op);
 extern void md_do_sync(struct md_thread *thread);
 extern void md_new_event(struct mddev *mddev);
-extern int md_allow_write(struct mddev *mddev);
+extern void md_allow_write(struct mddev *mddev);
 extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
 extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
 extern int md_check_no_bitmap(struct mddev *mddev);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index e95d521d93e9..68d036e64041 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -73,12 +73,12 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
  * operation and are ready to return a success/failure code to the buffer
  * cache layer.
  */
-static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err)
+static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
 {
 	struct bio *bio = mp_bh->master_bio;
 	struct mpconf *conf = mp_bh->mddev->private;
 
-	bio->bi_error = err;
+	bio->bi_status = status;
 	bio_endio(bio);
 	mempool_free(mp_bh, conf->pool);
 }
@@ -89,7 +89,7 @@ static void multipath_end_request(struct bio *bio)
 	struct mpconf *conf = mp_bh->mddev->private;
 	struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		multipath_end_bh_io(mp_bh, 0);
 	else if (!(bio->bi_opf & REQ_RAHEAD)) {
 		/*
@@ -102,7 +102,7 @@ static void multipath_end_request(struct bio *bio)
 			(unsigned long long)bio->bi_iter.bi_sector);
 		multipath_reschedule_retry(mp_bh);
 	} else
-		multipath_end_bh_io(mp_bh, bio->bi_error);
+		multipath_end_bh_io(mp_bh, bio->bi_status);
 	rdev_dec_pending(rdev, conf->mddev);
 }
 
@@ -347,7 +347,7 @@ static void multipathd(struct md_thread *thread)
 			pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
 			       bdevname(bio->bi_bdev,b),
 			       (unsigned long long)bio->bi_iter.bi_sector);
-			multipath_end_bh_io(mp_bh, -EIO);
+			multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
 		} else {
 			pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
 			       bdevname(bio->bi_bdev,b),
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index ebb280a14325..32adf6b4a9c7 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -142,10 +142,23 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
 
 static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
 {
+	int r;
+	uint32_t old_count;
 	enum allocation_event ev;
 	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 
-	return sm_ll_dec(&smd->ll, b, &ev);
+	r = sm_ll_dec(&smd->ll, b, &ev);
+	if (!r && (ev == SM_FREE)) {
+		/*
+		 * It's only free if it's also free in the last
+		 * transaction.
+		 */
+		r = sm_ll_lookup(&smd->old_ll, b, &old_count);
+		if (!r && !old_count)
+			smd->nr_allocated_this_transaction--;
+	}
+
+	return r;
 }
 
 static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 84e58596594d..d6c0bc76e837 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -385,7 +385,7 @@ static int raid0_run(struct mddev *mddev)
 		blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
 		blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
 		blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
-		blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
+		blk_queue_max_discard_sectors(mddev->queue, UINT_MAX);
 
 		blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
 		blk_queue_io_opt(mddev->queue,
@@ -459,6 +459,95 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
 	}
 }
 
+static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
+{
+	struct r0conf *conf = mddev->private;
+	struct strip_zone *zone;
+	sector_t start = bio->bi_iter.bi_sector;
+	sector_t end;
+	unsigned int stripe_size;
+	sector_t first_stripe_index, last_stripe_index;
+	sector_t start_disk_offset;
+	unsigned int start_disk_index;
+	sector_t end_disk_offset;
+	unsigned int end_disk_index;
+	unsigned int disk;
+
+	zone = find_zone(conf, &start);
+
+	if (bio_end_sector(bio) > zone->zone_end) {
+		struct bio *split = bio_split(bio,
+			zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
+			mddev->bio_set);
+		bio_chain(split, bio);
+		generic_make_request(bio);
+		bio = split;
+		end = zone->zone_end;
+	} else
+		end = bio_end_sector(bio);
+
+	if (zone != conf->strip_zone)
+		end = end - zone[-1].zone_end;
+
+	/* Now start and end is the offset in zone */
+	stripe_size = zone->nb_dev * mddev->chunk_sectors;
+
+	first_stripe_index = start;
+	sector_div(first_stripe_index, stripe_size);
+	last_stripe_index = end;
+	sector_div(last_stripe_index, stripe_size);
+
+	start_disk_index = (int)(start - first_stripe_index * stripe_size) /
+		mddev->chunk_sectors;
+	start_disk_offset = ((int)(start - first_stripe_index * stripe_size) %
+		mddev->chunk_sectors) +
+		first_stripe_index * mddev->chunk_sectors;
+	end_disk_index = (int)(end - last_stripe_index * stripe_size) /
+		mddev->chunk_sectors;
+	end_disk_offset = ((int)(end - last_stripe_index * stripe_size) %
+		mddev->chunk_sectors) +
+		last_stripe_index * mddev->chunk_sectors;
+
+	for (disk = 0; disk < zone->nb_dev; disk++) {
+		sector_t dev_start, dev_end;
+		struct bio *discard_bio = NULL;
+		struct md_rdev *rdev;
+
+		if (disk < start_disk_index)
+			dev_start = (first_stripe_index + 1) *
+				mddev->chunk_sectors;
+		else if (disk > start_disk_index)
+			dev_start = first_stripe_index * mddev->chunk_sectors;
+		else
+			dev_start = start_disk_offset;
+
+		if (disk < end_disk_index)
+			dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
+		else if (disk > end_disk_index)
+			dev_end = last_stripe_index * mddev->chunk_sectors;
+		else
+			dev_end = end_disk_offset;
+
+		if (dev_end <= dev_start)
+			continue;
+
+		rdev = conf->devlist[(zone - conf->strip_zone) *
+			conf->strip_zone[0].nb_dev + disk];
+		if (__blkdev_issue_discard(rdev->bdev,
+			dev_start + zone->dev_start + rdev->data_offset,
+			dev_end - dev_start, GFP_NOIO, 0, &discard_bio) ||
+		    !discard_bio)
+			continue;
+		bio_chain(discard_bio, bio);
+		if (mddev->gendisk)
+			trace_block_bio_remap(bdev_get_queue(rdev->bdev),
+				discard_bio, disk_devt(mddev->gendisk),
+				bio->bi_iter.bi_sector);
+		generic_make_request(discard_bio);
+	}
+	bio_endio(bio);
+}
+
 static void raid0_make_request(struct mddev *mddev, struct bio *bio)
 {
 	struct strip_zone *zone;
@@ -473,6 +562,11 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
 		return;
 	}
 
+	if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) {
+		raid0_handle_discard(mddev, bio);
+		return;
+	}
+
 	bio_sector = bio->bi_iter.bi_sector;
 	sector = bio_sector;
 	chunk_sects = mddev->chunk_sectors;
@@ -498,19 +592,13 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
 	bio->bi_iter.bi_sector = sector + zone->dev_start +
 		tmp_dev->data_offset;
 
-	if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
-		     !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
-		/* Just ignore it */
-		bio_endio(bio);
-	} else {
-		if (mddev->gendisk)
-			trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
-					      bio, disk_devt(mddev->gendisk),
-					      bio_sector);
-		mddev_check_writesame(mddev, bio);
-		mddev_check_write_zeroes(mddev, bio);
-		generic_make_request(bio);
-	}
+	if (mddev->gendisk)
+		trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
+				      bio, disk_devt(mddev->gendisk),
+				      bio_sector);
+	mddev_check_writesame(mddev, bio);
+	mddev_check_write_zeroes(mddev, bio);
+	generic_make_request(bio);
 }
 
 static void raid0_status(struct seq_file *seq, struct mddev *mddev)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7ed59351fe97..98ca2c1d3226 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -277,7 +277,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
 	struct r1conf *conf = r1_bio->mddev->private;
 
 	if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 
 	bio_endio(bio);
 	/*
@@ -335,7 +335,7 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
 
 static void raid1_end_read_request(struct bio *bio)
 {
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct r1bio *r1_bio = bio->bi_private;
 	struct r1conf *conf = r1_bio->mddev->private;
 	struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
@@ -426,12 +426,12 @@ static void raid1_end_write_request(struct bio *bio)
 	struct md_rdev *rdev = conf->mirrors[mirror].rdev;
 	bool discard_error;
 
-	discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+	discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
 
 	/*
 	 * 'one mirror IO has finished' event handler:
 	 */
-	if (bio->bi_error && !discard_error) {
+	if (bio->bi_status && !discard_error) {
 		set_bit(WriteErrorSeen,	&rdev->flags);
 		if (!test_and_set_bit(WantReplacement, &rdev->flags))
 			set_bit(MD_RECOVERY_NEEDED, &
@@ -666,8 +666,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
 					break;
 			}
 			continue;
-		} else
+		} else {
+			if ((sectors > best_good_sectors) && (best_disk >= 0))
+				best_disk = -1;
 			best_good_sectors = sectors;
+		}
 
 		if (best_disk >= 0)
 			/* At least two disks to choose from so failfast is OK */
@@ -799,7 +802,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
 		bio->bi_next = NULL;
 		bio->bi_bdev = rdev->bdev;
 		if (test_bit(Faulty, &rdev->flags)) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 		} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
 				    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1529,17 +1532,16 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 			plug = container_of(cb, struct raid1_plug_cb, cb);
 		else
 			plug = NULL;
-		spin_lock_irqsave(&conf->device_lock, flags);
 		if (plug) {
 			bio_list_add(&plug->pending, mbio);
 			plug->pending_cnt++;
 		} else {
+			spin_lock_irqsave(&conf->device_lock, flags);
 			bio_list_add(&conf->pending_bio_list, mbio);
 			conf->pending_count++;
-		}
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-		if (!plug)
+			spin_unlock_irqrestore(&conf->device_lock, flags);
 			md_wakeup_thread(mddev->thread);
+		}
 	}
 
 	r1_bio_write_done(r1_bio);
@@ -1854,7 +1856,7 @@ static void end_sync_read(struct bio *bio)
 	 * or re-read if the read failed.
 	 * We don't do much here, just schedule handling by raid1d
 	 */
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		set_bit(R1BIO_Uptodate, &r1_bio->state);
 
 	if (atomic_dec_and_test(&r1_bio->remaining))
@@ -1863,7 +1865,7 @@ static void end_sync_read(struct bio *bio)
 
 static void end_sync_write(struct bio *bio)
 {
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct r1bio *r1_bio = get_resync_r1bio(bio);
 	struct mddev *mddev = r1_bio->mddev;
 	struct r1conf *conf = mddev->private;
@@ -2056,7 +2058,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
 		idx ++;
 	}
 	set_bit(R1BIO_Uptodate, &r1_bio->state);
-	bio->bi_error = 0;
+	bio->bi_status = 0;
 	return 1;
 }
 
@@ -2080,16 +2082,16 @@ static void process_checks(struct r1bio *r1_bio)
 	for (i = 0; i < conf->raid_disks * 2; i++) {
 		int j;
 		int size;
-		int error;
+		blk_status_t status;
 		struct bio_vec *bi;
 		struct bio *b = r1_bio->bios[i];
 		struct resync_pages *rp = get_resync_pages(b);
 		if (b->bi_end_io != end_sync_read)
 			continue;
 		/* fixup the bio for reuse, but preserve errno */
-		error = b->bi_error;
+		status = b->bi_status;
 		bio_reset(b);
-		b->bi_error = error;
+		b->bi_status = status;
 		b->bi_vcnt = vcnt;
 		b->bi_iter.bi_size = r1_bio->sectors << 9;
 		b->bi_iter.bi_sector = r1_bio->sector +
@@ -2111,7 +2113,7 @@ static void process_checks(struct r1bio *r1_bio)
 	}
 	for (primary = 0; primary < conf->raid_disks * 2; primary++)
 		if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
-		    !r1_bio->bios[primary]->bi_error) {
+		    !r1_bio->bios[primary]->bi_status) {
 			r1_bio->bios[primary]->bi_end_io = NULL;
 			rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
 			break;
@@ -2121,7 +2123,7 @@ static void process_checks(struct r1bio *r1_bio)
 		int j;
 		struct bio *pbio = r1_bio->bios[primary];
 		struct bio *sbio = r1_bio->bios[i];
-		int error = sbio->bi_error;
+		blk_status_t status = sbio->bi_status;
 		struct page **ppages = get_resync_pages(pbio)->pages;
 		struct page **spages = get_resync_pages(sbio)->pages;
 		struct bio_vec *bi;
@@ -2130,12 +2132,12 @@ static void process_checks(struct r1bio *r1_bio)
 		if (sbio->bi_end_io != end_sync_read)
 			continue;
 		/* Now we can 'fixup' the error value */
-		sbio->bi_error = 0;
+		sbio->bi_status = 0;
 
 		bio_for_each_segment_all(bi, sbio, j)
 			page_len[j] = bi->bv_len;
 
-		if (!error) {
+		if (!status) {
 			for (j = vcnt; j-- ; ) {
 				if (memcmp(page_address(ppages[j]),
 					   page_address(spages[j]),
@@ -2147,7 +2149,7 @@ static void process_checks(struct r1bio *r1_bio)
 		if (j >= 0)
 			atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
 		if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
-			      && !error)) {
+			      && !status)) {
 			/* No need to write to this device. */
 			sbio->bi_end_io = NULL;
 			rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@ -2398,11 +2400,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
 		struct bio *bio = r1_bio->bios[m];
 		if (bio->bi_end_io == NULL)
 			continue;
-		if (!bio->bi_error &&
+		if (!bio->bi_status &&
 		    test_bit(R1BIO_MadeGood, &r1_bio->state)) {
 			rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
 		}
-		if (bio->bi_error &&
+		if (bio->bi_status &&
 		    test_bit(R1BIO_WriteError, &r1_bio->state)) {
 			if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
 				md_error(conf->mddev, rdev);
@@ -2953,7 +2955,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
 	if (!conf->r1bio_pool)
 		goto abort;
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
 	if (!conf->bio_split)
 		goto abort;
 
@@ -3061,6 +3063,8 @@ static int raid1_run(struct mddev *mddev)
 			mdname(mddev));
 		return -EIO;
 	}
+	if (mddev_init_writes_pending(mddev) < 0)
+		return -ENOMEM;
 	/*
 	 * copy the already verified devices into our private RAID1
 	 * bookkeeping area. [whatever we allocate in run(),
@@ -3197,7 +3201,7 @@ static int raid1_reshape(struct mddev *mddev)
 	struct r1conf *conf = mddev->private;
 	int cnt, raid_disks;
 	unsigned long flags;
-	int d, d2, err;
+	int d, d2;
 
 	/* Cannot change chunk_size, layout, or level */
 	if (mddev->chunk_sectors != mddev->new_chunk_sectors ||
@@ -3209,11 +3213,8 @@ static int raid1_reshape(struct mddev *mddev)
 		return -EINVAL;
 	}
 
-	if (!mddev_is_clustered(mddev)) {
-		err = md_allow_write(mddev);
-		if (err)
-			return err;
-	}
+	if (!mddev_is_clustered(mddev))
+		md_allow_write(mddev);
 
 	raid_disks = mddev->raid_disks + mddev->delta_disks;
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6b86a0032cf8..57a250fdbbcc 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -336,7 +336,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
 	struct r10conf *conf = r10_bio->mddev->private;
 
 	if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 
 	bio_endio(bio);
 	/*
@@ -389,7 +389,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
 
 static void raid10_end_read_request(struct bio *bio)
 {
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct r10bio *r10_bio = bio->bi_private;
 	int slot, dev;
 	struct md_rdev *rdev;
@@ -477,7 +477,7 @@ static void raid10_end_write_request(struct bio *bio)
 	struct bio *to_put = NULL;
 	bool discard_error;
 
-	discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+	discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
 
 	dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
 
@@ -491,7 +491,7 @@ static void raid10_end_write_request(struct bio *bio)
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
-	if (bio->bi_error && !discard_error) {
+	if (bio->bi_status && !discard_error) {
 		if (repl)
 			/* Never record new bad blocks to replacement,
 			 * just fail it.
@@ -913,7 +913,7 @@ static void flush_pending_writes(struct r10conf *conf)
 			bio->bi_next = NULL;
 			bio->bi_bdev = rdev->bdev;
 			if (test_bit(Faulty, &rdev->flags)) {
-				bio->bi_error = -EIO;
+				bio->bi_status = BLK_STS_IOERR;
 				bio_endio(bio);
 			} else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
 					    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1098,7 +1098,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 		bio->bi_next = NULL;
 		bio->bi_bdev = rdev->bdev;
 		if (test_bit(Faulty, &rdev->flags)) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 		} else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
 				    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1282,17 +1282,16 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
 		plug = container_of(cb, struct raid10_plug_cb, cb);
 	else
 		plug = NULL;
-	spin_lock_irqsave(&conf->device_lock, flags);
 	if (plug) {
 		bio_list_add(&plug->pending, mbio);
 		plug->pending_cnt++;
 	} else {
+		spin_lock_irqsave(&conf->device_lock, flags);
 		bio_list_add(&conf->pending_bio_list, mbio);
 		conf->pending_count++;
-	}
-	spin_unlock_irqrestore(&conf->device_lock, flags);
-	if (!plug)
+		spin_unlock_irqrestore(&conf->device_lock, flags);
 		md_wakeup_thread(mddev->thread);
+	}
 }
 
 static void raid10_write_request(struct mddev *mddev, struct bio *bio,
@@ -1889,7 +1888,7 @@ static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d)
 {
 	struct r10conf *conf = r10_bio->mddev->private;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		set_bit(R10BIO_Uptodate, &r10_bio->state);
 	else
 		/* The write handler will notice the lack of
@@ -1973,7 +1972,7 @@ static void end_sync_write(struct bio *bio)
 	else
 		rdev = conf->mirrors[d].rdev;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		if (repl)
 			md_error(mddev, rdev);
 		else {
@@ -2022,7 +2021,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 
 	/* find the first device with a block */
 	for (i=0; i<conf->copies; i++)
-		if (!r10_bio->devs[i].bio->bi_error)
+		if (!r10_bio->devs[i].bio->bi_status)
 			break;
 
 	if (i == conf->copies)
@@ -2051,7 +2050,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 		tpages = get_resync_pages(tbio)->pages;
 		d = r10_bio->devs[i].devnum;
 		rdev = conf->mirrors[d].rdev;
-		if (!r10_bio->devs[i].bio->bi_error) {
+		if (!r10_bio->devs[i].bio->bi_status) {
 			/* We know that the bi_io_vec layout is the same for
 			 * both 'first' and 'i', so we just compare them.
 			 * All vec entries are PAGE_SIZE;
@@ -2634,7 +2633,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 			rdev = conf->mirrors[dev].rdev;
 			if (r10_bio->devs[m].bio == NULL)
 				continue;
-			if (!r10_bio->devs[m].bio->bi_error) {
+			if (!r10_bio->devs[m].bio->bi_status) {
 				rdev_clear_badblocks(
 					rdev,
 					r10_bio->devs[m].addr,
@@ -2650,7 +2649,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 			if (r10_bio->devs[m].repl_bio == NULL)
 				continue;
 
-			if (!r10_bio->devs[m].repl_bio->bi_error) {
+			if (!r10_bio->devs[m].repl_bio->bi_status) {
 				rdev_clear_badblocks(
 					rdev,
 					r10_bio->devs[m].addr,
@@ -2676,7 +2675,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 					r10_bio->devs[m].addr,
 					r10_bio->sectors, 0);
 				rdev_dec_pending(rdev, conf->mddev);
-			} else if (bio != NULL && bio->bi_error) {
+			} else if (bio != NULL && bio->bi_status) {
 				fail = true;
 				if (!narrow_write_error(r10_bio, m)) {
 					md_error(conf->mddev, rdev);
@@ -3268,7 +3267,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 				r10_bio->devs[i].repl_bio->bi_end_io = NULL;
 
 			bio = r10_bio->devs[i].bio;
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			rcu_read_lock();
 			rdev = rcu_dereference(conf->mirrors[d].rdev);
 			if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
@@ -3310,7 +3309,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 
 			/* Need to set up for writing to the replacement */
 			bio = r10_bio->devs[i].repl_bio;
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 
 			sector = r10_bio->devs[i].addr;
 			bio->bi_next = biolist;
@@ -3376,7 +3375,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 
 		if (bio->bi_end_io == end_sync_read) {
 			md_sync_acct(bio->bi_bdev, nr_sectors);
-			bio->bi_error = 0;
+			bio->bi_status = 0;
 			generic_make_request(bio);
 		}
 	}
@@ -3553,7 +3552,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
 	if (!conf->r10bio_pool)
 		goto out;
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
 	if (!conf->bio_split)
 		goto out;
 
@@ -3612,6 +3611,9 @@ static int raid10_run(struct mddev *mddev)
 	int first = 1;
 	bool discard_supported = false;
 
+	if (mddev_init_writes_pending(mddev) < 0)
+		return -ENOMEM;
+
 	if (mddev->private == NULL) {
 		conf = setup_conf(mddev);
 		if (IS_ERR(conf))
@@ -4395,7 +4397,7 @@ read_more:
 	read_bio->bi_end_io = end_reshape_read;
 	bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
 	read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
-	read_bio->bi_error = 0;
+	read_bio->bi_status = 0;
 	read_bio->bi_vcnt = 0;
 	read_bio->bi_iter.bi_size = 0;
 	r10_bio->master_bio = read_bio;
@@ -4639,7 +4641,7 @@ static void end_reshape_write(struct bio *bio)
 		rdev = conf->mirrors[d].rdev;
 	}
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		/* FIXME should record badblock */
 		md_error(mddev, rdev);
 	}
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 26ba09282e7c..bfa1e907c472 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -24,6 +24,7 @@
 #include "md.h"
 #include "raid5.h"
 #include "bitmap.h"
+#include "raid5-log.h"
 
 /*
  * metadata/data stored in disk with 4k size unit (a block) regardless
@@ -571,7 +572,7 @@ static void r5l_log_endio(struct bio *bio)
 	struct r5l_log *log = io->log;
 	unsigned long flags;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		md_error(log->rdev->mddev, log->rdev);
 
 	bio_put(bio);
@@ -622,20 +623,30 @@ static void r5l_do_submit_io(struct r5l_log *log, struct r5l_io_unit *io)
 	__r5l_set_io_unit_state(io, IO_UNIT_IO_START);
 	spin_unlock_irqrestore(&log->io_list_lock, flags);
 
+	/*
+	 * In case of journal device failures, submit_bio will get error
+	 * and calls endio, then active stripes will continue write
+	 * process. Therefore, it is not necessary to check Faulty bit
+	 * of journal device here.
+	 *
+	 * We can't check split_bio after current_bio is submitted. If
+	 * io->split_bio is null, after current_bio is submitted, current_bio
+	 * might already be completed and the io_unit is freed. We submit
+	 * split_bio first to avoid the issue.
+	 */
+	if (io->split_bio) {
+		if (io->has_flush)
+			io->split_bio->bi_opf |= REQ_PREFLUSH;
+		if (io->has_fua)
+			io->split_bio->bi_opf |= REQ_FUA;
+		submit_bio(io->split_bio);
+	}
+
 	if (io->has_flush)
 		io->current_bio->bi_opf |= REQ_PREFLUSH;
 	if (io->has_fua)
 		io->current_bio->bi_opf |= REQ_FUA;
 	submit_bio(io->current_bio);
-
-	if (!io->split_bio)
-		return;
-
-	if (io->has_flush)
-		io->split_bio->bi_opf |= REQ_PREFLUSH;
-	if (io->has_fua)
-		io->split_bio->bi_opf |= REQ_FUA;
-	submit_bio(io->split_bio);
 }
 
 /* deferred io_unit will be dispatched here */
@@ -670,6 +681,11 @@ static void r5c_disable_writeback_async(struct work_struct *work)
 		return;
 	pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n",
 		mdname(mddev));
+
+	/* wait superblock change before suspend */
+	wait_event(mddev->sb_wait,
+		   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+
 	mddev_suspend(mddev);
 	log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
 	mddev_resume(mddev);
@@ -1231,7 +1247,7 @@ static void r5l_log_flush_endio(struct bio *bio)
 	unsigned long flags;
 	struct r5l_io_unit *io;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		md_error(log->rdev->mddev, log->rdev);
 
 	spin_lock_irqsave(&log->io_list_lock, flags);
@@ -1766,7 +1782,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos,
 	mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
 					     mb, PAGE_SIZE));
 	if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE,
-			  REQ_FUA, false)) {
+			  REQ_SYNC | REQ_FUA, false)) {
 		__free_page(page);
 		return -EIO;
 	}
@@ -2372,7 +2388,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 		mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
 						     mb, PAGE_SIZE));
 		sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page,
-			     REQ_OP_WRITE, REQ_FUA, false);
+			     REQ_OP_WRITE, REQ_SYNC | REQ_FUA, false);
 		sh->log_start = ctx->pos;
 		list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
 		atomic_inc(&log->stripe_in_journal_count);
@@ -2621,8 +2637,11 @@ int r5c_try_caching_write(struct r5conf *conf,
 	 * When run in degraded mode, array is set to write-through mode.
 	 * This check helps drain pending write safely in the transition to
 	 * write-through mode.
+	 *
+	 * When a stripe is syncing, the write is also handled in write
+	 * through mode.
 	 */
-	if (s->failed) {
+	if (s->failed || test_bit(STRIPE_SYNCING, &sh->state)) {
 		r5c_make_stripe_write_out(sh);
 		return -EAGAIN;
 	}
@@ -2825,6 +2844,9 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
 	}
 
 	r5l_append_flush_payload(log, sh->sector);
+	/* stripe is flused to raid disks, we can do resync now */
+	if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
+		set_bit(STRIPE_HANDLE, &sh->state);
 }
 
 int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
@@ -2973,7 +2995,7 @@ ioerr:
 	return ret;
 }
 
-void r5c_update_on_rdev_error(struct mddev *mddev)
+void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
 {
 	struct r5conf *conf = mddev->private;
 	struct r5l_log *log = conf->log;
@@ -2981,7 +3003,8 @@ void r5c_update_on_rdev_error(struct mddev *mddev)
 	if (!log)
 		return;
 
-	if (raid5_calc_degraded(conf) > 0 &&
+	if ((raid5_calc_degraded(conf) > 0 ||
+	     test_bit(Journal, &rdev->flags)) &&
 	    conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
 		schedule_work(&log->disable_writeback_work);
 }
@@ -3040,7 +3063,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 	if (!log->io_pool)
 		goto io_pool;
 
-	log->bs = bioset_create(R5L_POOL_SIZE, 0);
+	log->bs = bioset_create(R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!log->bs)
 		goto io_bs;
 
diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h
index 27097101ccca..328d67aedda4 100644
--- a/drivers/md/raid5-log.h
+++ b/drivers/md/raid5-log.h
@@ -28,7 +28,8 @@ extern void r5c_flush_cache(struct r5conf *conf, int num);
 extern void r5c_check_stripe_cache_usage(struct r5conf *conf);
 extern void r5c_check_cached_full_stripe(struct r5conf *conf);
 extern struct md_sysfs_entry r5c_journal_mode;
-extern void r5c_update_on_rdev_error(struct mddev *mddev);
+extern void r5c_update_on_rdev_error(struct mddev *mddev,
+				     struct md_rdev *rdev);
 extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
 
 extern struct dma_async_tx_descriptor *
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 5d25bebf3328..77cce3573aa8 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -397,7 +397,7 @@ static void ppl_log_endio(struct bio *bio)
 
 	pr_debug("%s: seq: %llu\n", __func__, io->seq);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		md_error(ppl_conf->mddev, log->rdev);
 
 	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
@@ -907,8 +907,8 @@ static int ppl_write_empty_header(struct ppl_log *log)
 	pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE));
 
 	if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset,
-			  PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_FUA, 0,
-			  false)) {
+			  PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC |
+			  REQ_FUA, 0, false)) {
 		md_error(rdev->mddev, rdev);
 		ret = -EIO;
 	}
@@ -1150,7 +1150,7 @@ int ppl_init_log(struct r5conf *conf)
 		goto err;
 	}
 
-	ppl_conf->bs = bioset_create(conf->raid_disks, 0);
+	ppl_conf->bs = bioset_create(conf->raid_disks, 0, 0);
 	if (!ppl_conf->bs) {
 		ret = -ENOMEM;
 		goto err;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2e38cfac5b1d..62c965be97e1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -103,8 +103,7 @@ static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
 static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
 {
 	int i;
-	local_irq_disable();
-	spin_lock(conf->hash_locks);
+	spin_lock_irq(conf->hash_locks);
 	for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
 		spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks);
 	spin_lock(&conf->device_lock);
@@ -114,9 +113,9 @@ static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
 {
 	int i;
 	spin_unlock(&conf->device_lock);
-	for (i = NR_STRIPE_HASH_LOCKS; i; i--)
-		spin_unlock(conf->hash_locks + i - 1);
-	local_irq_enable();
+	for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--)
+		spin_unlock(conf->hash_locks + i);
+	spin_unlock_irq(conf->hash_locks);
 }
 
 /* Find first data disk in a raid6 stripe */
@@ -234,11 +233,15 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
 			if (test_bit(R5_InJournal, &sh->dev[i].flags))
 				injournal++;
 	/*
-	 * When quiesce in r5c write back, set STRIPE_HANDLE for stripes with
-	 * data in journal, so they are not released to cached lists
+	 * In the following cases, the stripe cannot be released to cached
+	 * lists. Therefore, we make the stripe write out and set
+	 * STRIPE_HANDLE:
+	 *   1. when quiesce in r5c write back;
+	 *   2. when resync is requested fot the stripe.
 	 */
-	if (conf->quiesce && r5c_is_writeback(conf->log) &&
-	    !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0) {
+	if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) ||
+	    (conf->quiesce && r5c_is_writeback(conf->log) &&
+	     !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) {
 		if (test_bit(STRIPE_R5C_CACHING, &sh->state))
 			r5c_make_stripe_write_out(sh);
 		set_bit(STRIPE_HANDLE, &sh->state);
@@ -714,12 +717,11 @@ static bool is_full_stripe_write(struct stripe_head *sh)
 
 static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
 {
-	local_irq_disable();
 	if (sh1 > sh2) {
-		spin_lock(&sh2->stripe_lock);
+		spin_lock_irq(&sh2->stripe_lock);
 		spin_lock_nested(&sh1->stripe_lock, 1);
 	} else {
-		spin_lock(&sh1->stripe_lock);
+		spin_lock_irq(&sh1->stripe_lock);
 		spin_lock_nested(&sh2->stripe_lock, 1);
 	}
 }
@@ -727,8 +729,7 @@ static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
 static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
 {
 	spin_unlock(&sh1->stripe_lock);
-	spin_unlock(&sh2->stripe_lock);
-	local_irq_enable();
+	spin_unlock_irq(&sh2->stripe_lock);
 }
 
 /* Only freshly new full stripe normal write stripe can be added to a batch list */
@@ -2312,14 +2313,12 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 	struct stripe_head *osh, *nsh;
 	LIST_HEAD(newstripes);
 	struct disk_info *ndisks;
-	int err;
+	int err = 0;
 	struct kmem_cache *sc;
 	int i;
 	int hash, cnt;
 
-	err = md_allow_write(conf->mddev);
-	if (err)
-		return err;
+	md_allow_write(conf->mddev);
 
 	/* Step 1 */
 	sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
@@ -2477,7 +2476,7 @@ static void raid5_end_read_request(struct bio * bi)
 
 	pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
-		bi->bi_error);
+		bi->bi_status);
 	if (i == disks) {
 		bio_reset(bi);
 		BUG();
@@ -2497,7 +2496,7 @@ static void raid5_end_read_request(struct bio * bi)
 		s = sh->sector + rdev->new_data_offset;
 	else
 		s = sh->sector + rdev->data_offset;
-	if (!bi->bi_error) {
+	if (!bi->bi_status) {
 		set_bit(R5_UPTODATE, &sh->dev[i].flags);
 		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
 			/* Note that this cannot happen on a
@@ -2614,7 +2613,7 @@ static void raid5_end_write_request(struct bio *bi)
 	}
 	pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
-		bi->bi_error);
+		bi->bi_status);
 	if (i == disks) {
 		bio_reset(bi);
 		BUG();
@@ -2622,14 +2621,14 @@ static void raid5_end_write_request(struct bio *bi)
 	}
 
 	if (replacement) {
-		if (bi->bi_error)
+		if (bi->bi_status)
 			md_error(conf->mddev, rdev);
 		else if (is_badblock(rdev, sh->sector,
 				     STRIPE_SECTORS,
 				     &first_bad, &bad_sectors))
 			set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
 	} else {
-		if (bi->bi_error) {
+		if (bi->bi_status) {
 			set_bit(STRIPE_DEGRADED, &sh->state);
 			set_bit(WriteErrorSeen, &rdev->flags);
 			set_bit(R5_WriteError, &sh->dev[i].flags);
@@ -2650,7 +2649,7 @@ static void raid5_end_write_request(struct bio *bi)
 	}
 	rdev_dec_pending(rdev, conf->mddev);
 
-	if (sh->batch_head && bi->bi_error && !replacement)
+	if (sh->batch_head && bi->bi_status && !replacement)
 		set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
 
 	bio_reset(bi);
@@ -2694,7 +2693,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
 		bdevname(rdev->bdev, b),
 		mdname(mddev),
 		conf->raid_disks - mddev->degraded);
-	r5c_update_on_rdev_error(mddev);
+	r5c_update_on_rdev_error(mddev, rdev);
 }
 
 /*
@@ -3055,6 +3054,11 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
  *      When LOG_CRITICAL, stripes with injournal == 0 will be sent to
  *      no_space_stripes list.
  *
+ *   3. during journal failure
+ *      In journal failure, we try to flush all cached data to raid disks
+ *      based on data in stripe cache. The array is read-only to upper
+ *      layers, so we would skip all pending writes.
+ *
  */
 static inline bool delay_towrite(struct r5conf *conf,
 				 struct r5dev *dev,
@@ -3068,6 +3072,9 @@ static inline bool delay_towrite(struct r5conf *conf,
 	if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) &&
 	    s->injournal > 0)
 		return true;
+	/* case 3 above */
+	if (s->log_failed && s->injournal)
+		return true;
 	return false;
 }
 
@@ -3374,7 +3381,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 			sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
 
-			bi->bi_error = -EIO;
+			bi->bi_status = BLK_STS_IOERR;
 			md_write_end(conf->mddev);
 			bio_endio(bi);
 			bi = nextbi;
@@ -3396,7 +3403,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 		       sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
 
-			bi->bi_error = -EIO;
+			bi->bi_status = BLK_STS_IOERR;
 			md_write_end(conf->mddev);
 			bio_endio(bi);
 			bi = bi2;
@@ -3422,7 +3429,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 				struct bio *nextbi =
 					r5_next_bio(bi, sh->dev[i].sector);
 
-				bi->bi_error = -EIO;
+				bi->bi_status = BLK_STS_IOERR;
 				bio_endio(bi);
 				bi = nextbi;
 			}
@@ -4078,10 +4085,15 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
 			set_bit(STRIPE_INSYNC, &sh->state);
 		else {
 			atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches);
-			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) {
 				/* don't try to repair!! */
 				set_bit(STRIPE_INSYNC, &sh->state);
-			else {
+				pr_warn_ratelimited("%s: mismatch sector in range "
+						    "%llu-%llu\n", mdname(conf->mddev),
+						    (unsigned long long) sh->sector,
+						    (unsigned long long) sh->sector +
+						    STRIPE_SECTORS);
+			} else {
 				sh->check_state = check_state_compute_run;
 				set_bit(STRIPE_COMPUTE_RUN, &sh->state);
 				set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
@@ -4230,10 +4242,15 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
 			}
 		} else {
 			atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches);
-			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) {
 				/* don't try to repair!! */
 				set_bit(STRIPE_INSYNC, &sh->state);
-			else {
+				pr_warn_ratelimited("%s: mismatch sector in range "
+						    "%llu-%llu\n", mdname(conf->mddev),
+						    (unsigned long long) sh->sector,
+						    (unsigned long long) sh->sector +
+						    STRIPE_SECTORS);
+			} else {
 				int *target = &sh->ops.target;
 
 				sh->ops.target = -1;
@@ -4653,8 +4670,13 @@ static void handle_stripe(struct stripe_head *sh)
 
 	if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
 		spin_lock(&sh->stripe_lock);
-		/* Cannot process 'sync' concurrently with 'discard' */
-		if (!test_bit(STRIPE_DISCARD, &sh->state) &&
+		/*
+		 * Cannot process 'sync' concurrently with 'discard'.
+		 * Flush data in r5cache before 'sync'.
+		 */
+		if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) &&
+		    !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) &&
+		    !test_bit(STRIPE_DISCARD, &sh->state) &&
 		    test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
 			set_bit(STRIPE_SYNCING, &sh->state);
 			clear_bit(STRIPE_INSYNC, &sh->state);
@@ -4701,10 +4723,15 @@ static void handle_stripe(struct stripe_head *sh)
 	       " to_write=%d failed=%d failed_num=%d,%d\n",
 	       s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
 	       s.failed_num[0], s.failed_num[1]);
-	/* check if the array has lost more than max_degraded devices and,
+	/*
+	 * check if the array has lost more than max_degraded devices and,
 	 * if so, some requests might need to be failed.
+	 *
+	 * When journal device failed (log_failed), we will only process
+	 * the stripe if there is data need write to raid disks
 	 */
-	if (s.failed > conf->max_degraded || s.log_failed) {
+	if (s.failed > conf->max_degraded ||
+	    (s.log_failed && s.injournal == 0)) {
 		sh->check_state = 0;
 		sh->reconstruct_state = 0;
 		break_stripe_batch_list(sh, 0);
@@ -5127,7 +5154,7 @@ static void raid5_align_endio(struct bio *bi)
 	struct mddev *mddev;
 	struct r5conf *conf;
 	struct md_rdev *rdev;
-	int error = bi->bi_error;
+	blk_status_t error = bi->bi_status;
 
 	bio_put(bi);
 
@@ -5277,8 +5304,10 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
 	struct stripe_head *sh, *tmp;
 	struct list_head *handle_list = NULL;
 	struct r5worker_group *wg;
-	bool second_try = !r5c_is_writeback(conf->log);
-	bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state);
+	bool second_try = !r5c_is_writeback(conf->log) &&
+		!r5l_log_disk_error(conf);
+	bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) ||
+		r5l_log_disk_error(conf);
 
 again:
 	wg = NULL;
@@ -5702,7 +5731,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
 			release_stripe_plug(mddev, sh);
 		} else {
 			/* cannot get stripe for read-ahead, just give-up */
-			bi->bi_error = -EIO;
+			bi->bi_status = BLK_STS_IOERR;
 			break;
 		}
 	}
@@ -6313,7 +6342,6 @@ int
 raid5_set_cache_size(struct mddev *mddev, int size)
 {
 	struct r5conf *conf = mddev->private;
-	int err;
 
 	if (size <= 16 || size > 32768)
 		return -EINVAL;
@@ -6325,10 +6353,7 @@ raid5_set_cache_size(struct mddev *mddev, int size)
 		;
 	mutex_unlock(&conf->cache_size_mutex);
 
-
-	err = md_allow_write(mddev);
-	if (err)
-		return err;
+	md_allow_write(mddev);
 
 	mutex_lock(&conf->cache_size_mutex);
 	while (size > conf->max_nr_stripes)
@@ -6918,7 +6943,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 			goto abort;
 	}
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
 	if (!conf->bio_split)
 		goto abort;
 	conf->mddev = mddev;
@@ -7093,6 +7118,9 @@ static int raid5_run(struct mddev *mddev)
 	long long min_offset_diff = 0;
 	int first = 1;
 
+	if (mddev_init_writes_pending(mddev) < 0)
+		return -ENOMEM;
+
 	if (mddev->recovery_cp != MaxSector)
 		pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
 			  mdname(mddev));
@@ -7530,7 +7558,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
 		 * neilb: there is no locking about new writes here,
 		 * so this cannot be safe.
 		 */
-		if (atomic_read(&conf->active_stripes)) {
+		if (atomic_read(&conf->active_stripes) ||
+		    atomic_read(&conf->r5c_cached_full_stripes) ||
+		    atomic_read(&conf->r5c_cached_partial_stripes)) {
 			return -EBUSY;
 		}
 		log_exit(conf);
diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig
index b72edd27f880..55d9c2b82b7e 100644
--- a/drivers/media/Kconfig
+++ b/drivers/media/Kconfig
@@ -2,6 +2,12 @@
 # Multimedia device configuration
 #
 
+config CEC_CORE
+	tristate
+
+config CEC_NOTIFIER
+	bool
+
 menuconfig MEDIA_SUPPORT
 	tristate "Multimedia support"
 	depends on HAS_IOMEM
diff --git a/drivers/media/Makefile b/drivers/media/Makefile
index 523fea3648ad..044503aa8801 100644
--- a/drivers/media/Makefile
+++ b/drivers/media/Makefile
@@ -4,8 +4,6 @@
 
 media-objs	:= media-device.o media-devnode.o media-entity.o
 
-obj-$(CONFIG_CEC_CORE) += cec/
-
 #
 # I2C drivers should come before other drivers, otherwise they'll fail
 # when compiled as builtin drivers
@@ -26,6 +24,8 @@ obj-$(CONFIG_DVB_CORE)  += dvb-core/
 # There are both core and drivers at RC subtree - merge before drivers
 obj-y += rc/
 
+obj-$(CONFIG_CEC_CORE) += cec/
+
 #
 # Finally, merge the drivers that require the core
 #
diff --git a/drivers/media/cec/Kconfig b/drivers/media/cec/Kconfig
index f944d93e3167..43428cec3a01 100644
--- a/drivers/media/cec/Kconfig
+++ b/drivers/media/cec/Kconfig
@@ -1,19 +1,6 @@
-config CEC_CORE
-	tristate
-	depends on MEDIA_CEC_SUPPORT
-	default y
-
-config MEDIA_CEC_NOTIFIER
-	bool
-
 config MEDIA_CEC_RC
 	bool "HDMI CEC RC integration"
 	depends on CEC_CORE && RC_CORE
+	depends on CEC_CORE=m || RC_CORE=y
 	---help---
 	  Pass on CEC remote control messages to the RC framework.
-
-config MEDIA_CEC_DEBUG
-	bool "HDMI CEC debugfs interface"
-	depends on CEC_CORE && DEBUG_FS
-	---help---
-	  Turns on the DebugFS interface for CEC devices.
diff --git a/drivers/media/cec/Makefile b/drivers/media/cec/Makefile
index 402a6c62a3e8..eaf408e64669 100644
--- a/drivers/media/cec/Makefile
+++ b/drivers/media/cec/Makefile
@@ -1,6 +1,6 @@
 cec-objs := cec-core.o cec-adap.o cec-api.o cec-edid.o
 
-ifeq ($(CONFIG_MEDIA_CEC_NOTIFIER),y)
+ifeq ($(CONFIG_CEC_NOTIFIER),y)
   cec-objs += cec-notifier.o
 endif
 
diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c
index f5fe01c9da8a..9dfc79800c71 100644
--- a/drivers/media/cec/cec-adap.c
+++ b/drivers/media/cec/cec-adap.c
@@ -1864,7 +1864,7 @@ void cec_monitor_all_cnt_dec(struct cec_adapter *adap)
 		WARN_ON(call_op(adap, adap_monitor_all_enable, 0));
 }
 
-#ifdef CONFIG_MEDIA_CEC_DEBUG
+#ifdef CONFIG_DEBUG_FS
 /*
  * Log the current state of the CEC adapter.
  * Very useful for debugging.
diff --git a/drivers/media/cec/cec-api.c b/drivers/media/cec/cec-api.c
index 0860fb458757..999926f731c8 100644
--- a/drivers/media/cec/cec-api.c
+++ b/drivers/media/cec/cec-api.c
@@ -271,16 +271,10 @@ static long cec_receive(struct cec_adapter *adap, struct cec_fh *fh,
 			bool block, struct cec_msg __user *parg)
 {
 	struct cec_msg msg = {};
-	long err = 0;
+	long err;
 
 	if (copy_from_user(&msg, parg, sizeof(msg)))
 		return -EFAULT;
-	mutex_lock(&adap->lock);
-	if (!adap->is_configured && fh->mode_follower < CEC_MODE_MONITOR)
-		err = -ENONET;
-	mutex_unlock(&adap->lock);
-	if (err)
-		return err;
 
 	err = cec_receive_msg(fh, &msg, block);
 	if (err)
diff --git a/drivers/media/cec/cec-core.c b/drivers/media/cec/cec-core.c
index f9ebff90f8eb..2f87748ba4fc 100644
--- a/drivers/media/cec/cec-core.c
+++ b/drivers/media/cec/cec-core.c
@@ -187,7 +187,7 @@ static void cec_devnode_unregister(struct cec_devnode *devnode)
 	put_device(&devnode->dev);
 }
 
-#ifdef CONFIG_MEDIA_CEC_NOTIFIER
+#ifdef CONFIG_CEC_NOTIFIER
 static void cec_cec_notify(struct cec_adapter *adap, u16 pa)
 {
 	cec_s_phys_addr(adap, pa, false);
@@ -323,7 +323,7 @@ int cec_register_adapter(struct cec_adapter *adap,
 	}
 
 	dev_set_drvdata(&adap->devnode.dev, adap);
-#ifdef CONFIG_MEDIA_CEC_DEBUG
+#ifdef CONFIG_DEBUG_FS
 	if (!top_cec_dir)
 		return 0;
 
@@ -355,7 +355,7 @@ void cec_unregister_adapter(struct cec_adapter *adap)
 	adap->rc = NULL;
 #endif
 	debugfs_remove_recursive(adap->cec_dir);
-#ifdef CONFIG_MEDIA_CEC_NOTIFIER
+#ifdef CONFIG_CEC_NOTIFIER
 	if (adap->notifier)
 		cec_notifier_unregister(adap->notifier);
 #endif
@@ -395,7 +395,7 @@ static int __init cec_devnode_init(void)
 		return ret;
 	}
 
-#ifdef CONFIG_MEDIA_CEC_DEBUG
+#ifdef CONFIG_DEBUG_FS
 	top_cec_dir = debugfs_create_dir("cec", NULL);
 	if (IS_ERR_OR_NULL(top_cec_dir)) {
 		pr_warn("cec: Failed to create debugfs cec dir\n");
diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig
index fd181c99ce11..aaa9471c7d11 100644
--- a/drivers/media/i2c/Kconfig
+++ b/drivers/media/i2c/Kconfig
@@ -220,7 +220,8 @@ config VIDEO_ADV7604
 
 config VIDEO_ADV7604_CEC
 	bool "Enable Analog Devices ADV7604 CEC support"
-	depends on VIDEO_ADV7604 && CEC_CORE
+	depends on VIDEO_ADV7604
+	select CEC_CORE
 	---help---
 	  When selected the adv7604 will support the optional
 	  HDMI CEC feature.
@@ -240,7 +241,8 @@ config VIDEO_ADV7842
 
 config VIDEO_ADV7842_CEC
 	bool "Enable Analog Devices ADV7842 CEC support"
-	depends on VIDEO_ADV7842 && CEC_CORE
+	depends on VIDEO_ADV7842
+	select CEC_CORE
 	---help---
 	  When selected the adv7842 will support the optional
 	  HDMI CEC feature.
@@ -478,7 +480,8 @@ config VIDEO_ADV7511
 
 config VIDEO_ADV7511_CEC
 	bool "Enable Analog Devices ADV7511 CEC support"
-	depends on VIDEO_ADV7511 && CEC_CORE
+	depends on VIDEO_ADV7511
+	select CEC_CORE
 	---help---
 	  When selected the adv7511 will support the optional
 	  HDMI CEC feature.
diff --git a/drivers/media/i2c/tc358743.c b/drivers/media/i2c/tc358743.c
index acef4eca269f..3251cba89e8f 100644
--- a/drivers/media/i2c/tc358743.c
+++ b/drivers/media/i2c/tc358743.c
@@ -223,7 +223,7 @@ static void i2c_wr8(struct v4l2_subdev *sd, u16 reg, u8 val)
 static void i2c_wr8_and_or(struct v4l2_subdev *sd, u16 reg,
 		u8 mask, u8 val)
 {
-	i2c_wrreg(sd, reg, (i2c_rdreg(sd, reg, 2) & mask) | val, 2);
+	i2c_wrreg(sd, reg, (i2c_rdreg(sd, reg, 1) & mask) | val, 1);
 }
 
 static u16 i2c_rd16(struct v4l2_subdev *sd, u16 reg)
diff --git a/drivers/media/pci/zoran/zoran_card.c b/drivers/media/pci/zoran/zoran_card.c
index 5266755add63..4680f001653a 100644
--- a/drivers/media/pci/zoran/zoran_card.c
+++ b/drivers/media/pci/zoran/zoran_card.c
@@ -69,7 +69,7 @@ MODULE_PARM_DESC(card, "Card type");
  */
 
 static unsigned long vidmem;	/* default = 0 - Video memory base address */
-module_param(vidmem, ulong, 0444);
+module_param_hw(vidmem, ulong, iomem, 0444);
 MODULE_PARM_DESC(vidmem, "Default video memory base address");
 
 /*
diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index ac026ee1ca07..041cb80a26b1 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -501,8 +501,9 @@ if CEC_PLATFORM_DRIVERS
 
 config VIDEO_SAMSUNG_S5P_CEC
        tristate "Samsung S5P CEC driver"
-       depends on CEC_CORE && (PLAT_S5P || ARCH_EXYNOS || COMPILE_TEST)
-       select MEDIA_CEC_NOTIFIER
+       depends on PLAT_S5P || ARCH_EXYNOS || COMPILE_TEST
+       select CEC_CORE
+       select CEC_NOTIFIER
        ---help---
          This is a driver for Samsung S5P HDMI CEC interface. It uses the
          generic CEC framework interface.
@@ -511,8 +512,9 @@ config VIDEO_SAMSUNG_S5P_CEC
 
 config VIDEO_STI_HDMI_CEC
        tristate "STMicroelectronics STiH4xx HDMI CEC driver"
-       depends on CEC_CORE && (ARCH_STI || COMPILE_TEST)
-       select MEDIA_CEC_NOTIFIER
+       depends on ARCH_STI || COMPILE_TEST
+       select CEC_CORE
+       select CEC_NOTIFIER
        ---help---
          This is a driver for STIH4xx HDMI CEC interface. It uses the
          generic CEC framework interface.
diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c
index 57a842ff3097..b7731b18ecae 100644
--- a/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c
+++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c
@@ -493,10 +493,10 @@ static int vdec_h264_get_param(unsigned long h_vdec,
 }
 
 static struct vdec_common_if vdec_h264_if = {
-	vdec_h264_init,
-	vdec_h264_decode,
-	vdec_h264_get_param,
-	vdec_h264_deinit,
+	.init		= vdec_h264_init,
+	.decode		= vdec_h264_decode,
+	.get_param	= vdec_h264_get_param,
+	.deinit		= vdec_h264_deinit,
 };
 
 struct vdec_common_if *get_h264_dec_comm_if(void);
diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c
index 6e7a62ae0842..b9fad6a48879 100644
--- a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c
+++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c
@@ -620,10 +620,10 @@ static void vdec_vp8_deinit(unsigned long h_vdec)
 }
 
 static struct vdec_common_if vdec_vp8_if = {
-	vdec_vp8_init,
-	vdec_vp8_decode,
-	vdec_vp8_get_param,
-	vdec_vp8_deinit,
+	.init		= vdec_vp8_init,
+	.decode		= vdec_vp8_decode,
+	.get_param	= vdec_vp8_get_param,
+	.deinit		= vdec_vp8_deinit,
 };
 
 struct vdec_common_if *get_vp8_dec_comm_if(void);
diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c
index 5539b1853f16..1daee1207469 100644
--- a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c
+++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c
@@ -979,10 +979,10 @@ static int vdec_vp9_get_param(unsigned long h_vdec,
 }
 
 static struct vdec_common_if vdec_vp9_if = {
-	vdec_vp9_init,
-	vdec_vp9_decode,
-	vdec_vp9_get_param,
-	vdec_vp9_deinit,
+	.init		= vdec_vp9_init,
+	.decode		= vdec_vp9_decode,
+	.get_param	= vdec_vp9_get_param,
+	.deinit		= vdec_vp9_deinit,
 };
 
 struct vdec_common_if *get_vp9_dec_comm_if(void);
diff --git a/drivers/media/platform/vivid/Kconfig b/drivers/media/platform/vivid/Kconfig
index b36ac19dc6e4..154de92dd809 100644
--- a/drivers/media/platform/vivid/Kconfig
+++ b/drivers/media/platform/vivid/Kconfig
@@ -26,7 +26,8 @@ config VIDEO_VIVID
 
 config VIDEO_VIVID_CEC
 	bool "Enable CEC emulation support"
-	depends on VIDEO_VIVID && CEC_CORE
+	depends on VIDEO_VIVID
+	select CEC_CORE
 	---help---
 	  When selected the vivid module will emulate the optional
 	  HDMI CEC feature.
diff --git a/drivers/media/rc/rc-ir-raw.c b/drivers/media/rc/rc-ir-raw.c
index 90f66dc7c0d7..a2fc1a1d58b0 100644
--- a/drivers/media/rc/rc-ir-raw.c
+++ b/drivers/media/rc/rc-ir-raw.c
@@ -211,7 +211,7 @@ EXPORT_SYMBOL_GPL(ir_raw_event_set_idle);
  */
 void ir_raw_event_handle(struct rc_dev *dev)
 {
-	if (!dev->raw)
+	if (!dev->raw || !dev->raw->thread)
 		return;
 
 	wake_up_process(dev->raw->thread);
@@ -490,6 +490,7 @@ int ir_raw_event_register(struct rc_dev *dev)
 {
 	int rc;
 	struct ir_raw_handler *handler;
+	struct task_struct *thread;
 
 	if (!dev)
 		return -EINVAL;
@@ -507,13 +508,15 @@ int ir_raw_event_register(struct rc_dev *dev)
 	 * because the event is coming from userspace
 	 */
 	if (dev->driver_type != RC_DRIVER_IR_RAW_TX) {
-		dev->raw->thread = kthread_run(ir_raw_event_thread, dev->raw,
-					       "rc%u", dev->minor);
+		thread = kthread_run(ir_raw_event_thread, dev->raw, "rc%u",
+				     dev->minor);
 
-		if (IS_ERR(dev->raw->thread)) {
-			rc = PTR_ERR(dev->raw->thread);
+		if (IS_ERR(thread)) {
+			rc = PTR_ERR(thread);
 			goto out;
 		}
+
+		dev->raw->thread = thread;
 	}
 
 	mutex_lock(&ir_raw_handler_lock);
diff --git a/drivers/media/rc/serial_ir.c b/drivers/media/rc/serial_ir.c
index 2f0a0d248936..77d5d4cbed0a 100644
--- a/drivers/media/rc/serial_ir.c
+++ b/drivers/media/rc/serial_ir.c
@@ -833,11 +833,11 @@ MODULE_LICENSE("GPL");
 module_param(type, int, 0444);
 MODULE_PARM_DESC(type, "Hardware type (0 = home-brew, 1 = IRdeo, 2 = IRdeo Remote, 3 = AnimaX, 4 = IgorPlug");
 
-module_param(io, int, 0444);
+module_param_hw(io, int, ioport, 0444);
 MODULE_PARM_DESC(io, "I/O address base (0x3f8 or 0x2f8)");
 
 /* some architectures (e.g. intel xscale) have memory mapped registers */
-module_param(iommap, ulong, 0444);
+module_param_hw(iommap, ulong, other, 0444);
 MODULE_PARM_DESC(iommap, "physical base for memory mapped I/O (0 = no memory mapped io)");
 
 /*
@@ -845,13 +845,13 @@ MODULE_PARM_DESC(iommap, "physical base for memory mapped I/O (0 = no memory map
  * on 32bit word boundaries.
  * See linux-kernel/drivers/tty/serial/8250/8250.c serial_in()/out()
  */
-module_param(ioshift, int, 0444);
+module_param_hw(ioshift, int, other, 0444);
 MODULE_PARM_DESC(ioshift, "shift I/O register offset (0 = no shift)");
 
-module_param(irq, int, 0444);
+module_param_hw(irq, int, irq, 0444);
 MODULE_PARM_DESC(irq, "Interrupt (4 or 3)");
 
-module_param(share_irq, bool, 0444);
+module_param_hw(share_irq, bool, other, 0444);
 MODULE_PARM_DESC(share_irq, "Share interrupts (0 = off, 1 = on)");
 
 module_param(sense, int, 0444);
diff --git a/drivers/media/rc/sir_ir.c b/drivers/media/rc/sir_ir.c
index e12ec50bf0bf..90a5f8fd5eea 100644
--- a/drivers/media/rc/sir_ir.c
+++ b/drivers/media/rc/sir_ir.c
@@ -183,9 +183,15 @@ static irqreturn_t sir_interrupt(int irq, void *dev_id)
 	static unsigned long delt;
 	unsigned long deltintr;
 	unsigned long flags;
+	int counter = 0;
 	int iir, lsr;
 
 	while ((iir = inb(io + UART_IIR) & UART_IIR_ID)) {
+		if (++counter > 256) {
+			dev_err(&sir_ir_dev->dev, "Trapped in interrupt");
+			break;
+		}
+
 		switch (iir & UART_IIR_ID) { /* FIXME toto treba preriedit */
 		case UART_IIR_MSI:
 			(void)inb(io + UART_MSR);
diff --git a/drivers/media/usb/pulse8-cec/Kconfig b/drivers/media/usb/pulse8-cec/Kconfig
index 8937f3986a01..18ead44824ba 100644
--- a/drivers/media/usb/pulse8-cec/Kconfig
+++ b/drivers/media/usb/pulse8-cec/Kconfig
@@ -1,6 +1,7 @@
 config USB_PULSE8_CEC
 	tristate "Pulse Eight HDMI CEC"
-	depends on USB_ACM && CEC_CORE
+	depends on USB_ACM
+	select CEC_CORE
 	select SERIO
 	select SERIO_SERPORT
 	---help---
diff --git a/drivers/media/usb/rainshadow-cec/Kconfig b/drivers/media/usb/rainshadow-cec/Kconfig
index 3eb86607efb8..030ef01b1ff0 100644
--- a/drivers/media/usb/rainshadow-cec/Kconfig
+++ b/drivers/media/usb/rainshadow-cec/Kconfig
@@ -1,6 +1,7 @@
 config USB_RAINSHADOW_CEC
 	tristate "RainShadow Tech HDMI CEC"
-	depends on USB_ACM && CEC_CORE
+	depends on USB_ACM
+	select CEC_CORE
 	select SERIO
 	select SERIO_SERPORT
 	---help---
diff --git a/drivers/media/usb/rainshadow-cec/rainshadow-cec.c b/drivers/media/usb/rainshadow-cec/rainshadow-cec.c
index 541ca543f71f..4126552c9055 100644
--- a/drivers/media/usb/rainshadow-cec/rainshadow-cec.c
+++ b/drivers/media/usb/rainshadow-cec/rainshadow-cec.c
@@ -119,7 +119,7 @@ static void rain_irq_work_handler(struct work_struct *work)
 
 	while (true) {
 		unsigned long flags;
-		bool exit_loop;
+		bool exit_loop = false;
 		char data;
 
 		spin_lock_irqsave(&rain->buf_lock, flags);
@@ -336,6 +336,7 @@ static int rain_connect(struct serio *serio, struct serio_driver *drv)
 	serio_set_drvdata(serio, rain);
 	INIT_WORK(&rain->work, rain_irq_work_handler);
 	mutex_init(&rain->write_lock);
+	spin_lock_init(&rain->buf_lock);
 
 	err = serio_open(serio, drv);
 	if (err)
diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index 94afbbf92807..c0175ea7e7ad 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -868,7 +868,7 @@ EXPORT_SYMBOL_GPL(vb2_core_create_bufs);
 
 void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no)
 {
-	if (plane_no > vb->num_planes || !vb->planes[plane_no].mem_priv)
+	if (plane_no >= vb->num_planes || !vb->planes[plane_no].mem_priv)
 		return NULL;
 
 	return call_ptr_memop(vb, vaddr, vb->planes[plane_no].mem_priv);
diff --git a/drivers/memory/Kconfig b/drivers/memory/Kconfig
index 3ecc429297a0..ffc350258041 100644
--- a/drivers/memory/Kconfig
+++ b/drivers/memory/Kconfig
@@ -116,7 +116,7 @@ config FSL_CORENET_CF
 
 config FSL_IFC
 	bool
-	depends on FSL_SOC || ARCH_LAYERSCAPE
+	depends on FSL_SOC || ARCH_LAYERSCAPE || SOC_LS1021A
 
 config JZ4780_NEMC
 	bool "Ingenic JZ4780 SoC NEMC driver"
diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c
index 35910f945bfa..99e644cda4d1 100644
--- a/drivers/memory/atmel-ebi.c
+++ b/drivers/memory/atmel-ebi.c
@@ -581,7 +581,7 @@ static int atmel_ebi_probe(struct platform_device *pdev)
 	return of_platform_populate(np, NULL, NULL, dev);
 }
 
-static int atmel_ebi_resume(struct device *dev)
+static __maybe_unused int atmel_ebi_resume(struct device *dev)
 {
 	struct atmel_ebi *ebi = dev_get_drvdata(dev);
 	struct atmel_ebi_dev *ebid;
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index bf0fe0137dfe..6d1b4b707cc2 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -512,7 +512,7 @@ static void gpmc_cs_show_timings(int cs, const char *desc)
 	pr_info("gpmc cs%i access configuration:\n", cs);
 	GPMC_GET_RAW_BOOL(GPMC_CS_CONFIG1,  4,  4, "time-para-granularity");
 	GPMC_GET_RAW(GPMC_CS_CONFIG1,  8,  9, "mux-add-data");
-	GPMC_GET_RAW_MAX(GPMC_CS_CONFIG1, 12, 13,
+	GPMC_GET_RAW_SHIFT_MAX(GPMC_CS_CONFIG1, 12, 13, 1,
 			 GPMC_CONFIG1_DEVICESIZE_MAX, "device-width");
 	GPMC_GET_RAW(GPMC_CS_CONFIG1, 16, 17, "wait-pin");
 	GPMC_GET_RAW_BOOL(GPMC_CS_CONFIG1, 21, 21, "wait-on-write");
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 99e651c27fb7..22de7f5ed032 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -1921,12 +1921,13 @@ static void msb_io_work(struct work_struct *work)
 		spin_lock_irqsave(&msb->q_lock, flags);
 
 		if (len)
-			if (!__blk_end_request(msb->req, 0, len))
+			if (!__blk_end_request(msb->req, BLK_STS_OK, len))
 				msb->req = NULL;
 
 		if (error && msb->req) {
+			blk_status_t ret = errno_to_blk_status(error);
 			dbg_verbose("IO: ending one sector of the request with error");
-			if (!__blk_end_request(msb->req, error, msb->page_size))
+			if (!__blk_end_request(msb->req, ret, msb->page_size))
 				msb->req = NULL;
 		}
 
@@ -2014,7 +2015,7 @@ static void msb_submit_req(struct request_queue *q)
 		WARN_ON(!msb->io_queue_stopped);
 
 		while ((req = blk_fetch_request(q)) != NULL)
-			__blk_end_request_all(req, -ENODEV);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		return;
 	}
 
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index c00d8a266878..8897962781bb 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -709,7 +709,8 @@ try_again:
 					       msb->req_sg);
 
 		if (!msb->seg_count) {
-			chunk = __blk_end_request_cur(msb->block_req, -ENOMEM);
+			chunk = __blk_end_request_cur(msb->block_req,
+					BLK_STS_RESOURCE);
 			continue;
 		}
 
@@ -776,7 +777,8 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error)
 		if (error && !t_len)
 			t_len = blk_rq_cur_bytes(msb->block_req);
 
-		chunk = __blk_end_request(msb->block_req, error, t_len);
+		chunk = __blk_end_request(msb->block_req,
+				errno_to_blk_status(error), t_len);
 
 		error = mspro_block_issue_req(card, chunk);
 
@@ -838,7 +840,7 @@ static void mspro_block_submit_req(struct request_queue *q)
 
 	if (msb->eject) {
 		while ((req = blk_fetch_request(q)) != NULL)
-			__blk_end_request_all(req, -ENODEV);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 
 		return;
 	}
diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index 75488e65cd96..8d46e3ad9529 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -245,8 +245,7 @@ static int arizona_poll_reg(struct arizona *arizona,
 	int ret;
 
 	ret = regmap_read_poll_timeout(arizona->regmap,
-				       ARIZONA_INTERRUPT_RAW_STATUS_5, val,
-				       ((val & mask) == target),
+				       reg, val, ((val & mask) == target),
 				       ARIZONA_REG_POLL_DELAY_US,
 				       timeout_ms * 1000);
 	if (ret)
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 2cba76e6fa3c..07bbd4cc1852 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -492,6 +492,7 @@ config ASPEED_LPC_CTRL
 
 config PCI_ENDPOINT_TEST
 	depends on PCI
+	select CRC32
 	tristate "PCI Endpoint Test driver"
 	---help---
            Enable this configuration option to enable the host side test driver
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 4472ce11f98d..8c32040b9c09 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -45,7 +45,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
 	mutex_init(&ctx->mapping_lock);
 	ctx->mapping = NULL;
 
-	if (cxl_is_psl8(afu)) {
+	if (cxl_is_power8()) {
 		spin_lock_init(&ctx->sste_lock);
 
 		/*
@@ -189,7 +189,7 @@ int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma)
 		if (start + len > ctx->afu->adapter->ps_size)
 			return -EINVAL;
 
-		if (cxl_is_psl9(ctx->afu)) {
+		if (cxl_is_power9()) {
 			/*
 			 * Make sure there is a valid problem state
 			 * area space for this AFU.
@@ -324,7 +324,7 @@ static void reclaim_ctx(struct rcu_head *rcu)
 {
 	struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu);
 
-	if (cxl_is_psl8(ctx->afu))
+	if (cxl_is_power8())
 		free_page((u64)ctx->sstp);
 	if (ctx->ff_page)
 		__free_page(ctx->ff_page);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index c8568ea7c518..a03f8e7535e5 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -357,6 +357,7 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An     = {0x0A0};
 #define CXL_PSL9_DSISR_An_PF_RGP  0x0000000000000090ULL  /* PTE not found (Radix Guest (parent)) 0b10010000 */
 #define CXL_PSL9_DSISR_An_PF_HRH  0x0000000000000094ULL  /* PTE not found (HPT/Radix Host)       0b10010100 */
 #define CXL_PSL9_DSISR_An_PF_STEG 0x000000000000009CULL  /* PTE not found (STEG VA)              0b10011100 */
+#define CXL_PSL9_DSISR_An_URTCH   0x00000000000000B4ULL  /* Unsupported Radix Tree Configuration 0b10110100 */
 
 /****** CXL_PSL_TFC_An ******************************************************/
 #define CXL_PSL_TFC_An_A  (1ull << (63-28)) /* Acknowledge non-translation fault */
@@ -844,24 +845,15 @@ static inline bool cxl_is_power8(void)
 
 static inline bool cxl_is_power9(void)
 {
-	/* intermediate solution */
-	if (!cxl_is_power8() &&
-	   (cpu_has_feature(CPU_FTRS_POWER9) ||
-	    cpu_has_feature(CPU_FTR_POWER9_DD1)))
+	if (pvr_version_is(PVR_POWER9))
 		return true;
 	return false;
 }
 
-static inline bool cxl_is_psl8(struct cxl_afu *afu)
+static inline bool cxl_is_power9_dd1(void)
 {
-	if (afu->adapter->caia_major == 1)
-		return true;
-	return false;
-}
-
-static inline bool cxl_is_psl9(struct cxl_afu *afu)
-{
-	if (afu->adapter->caia_major == 2)
+	if ((pvr_version_is(PVR_POWER9)) &&
+	    cpu_has_feature(CPU_FTR_POWER9_DD1))
 		return true;
 	return false;
 }
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index 5344448f514e..c79e39bad7a4 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -187,7 +187,7 @@ static struct mm_struct *get_mem_context(struct cxl_context *ctx)
 
 static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
 {
-	if ((cxl_is_psl8(ctx->afu)) && (dsisr & CXL_PSL_DSISR_An_DS))
+	if ((cxl_is_power8() && (dsisr & CXL_PSL_DSISR_An_DS)))
 		return true;
 
 	return false;
@@ -195,16 +195,23 @@ static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
 
 static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr)
 {
-	if ((cxl_is_psl8(ctx->afu)) && (dsisr & CXL_PSL_DSISR_An_DM))
-		return true;
+	u64 crs; /* Translation Checkout Response Status */
 
-	if ((cxl_is_psl9(ctx->afu)) &&
-	   ((dsisr & CXL_PSL9_DSISR_An_CO_MASK) &
-		(CXL_PSL9_DSISR_An_PF_SLR | CXL_PSL9_DSISR_An_PF_RGC |
-		 CXL_PSL9_DSISR_An_PF_RGP | CXL_PSL9_DSISR_An_PF_HRH |
-		 CXL_PSL9_DSISR_An_PF_STEG)))
+	if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM))
 		return true;
 
+	if (cxl_is_power9()) {
+		crs = (dsisr & CXL_PSL9_DSISR_An_CO_MASK);
+		if ((crs == CXL_PSL9_DSISR_An_PF_SLR) ||
+		    (crs == CXL_PSL9_DSISR_An_PF_RGC) ||
+		    (crs == CXL_PSL9_DSISR_An_PF_RGP) ||
+		    (crs == CXL_PSL9_DSISR_An_PF_HRH) ||
+		    (crs == CXL_PSL9_DSISR_An_PF_STEG) ||
+		    (crs == CXL_PSL9_DSISR_An_URTCH)) {
+			return true;
+		}
+	}
+
 	return false;
 }
 
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 17b433f1ce23..0761271d68c5 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -159,11 +159,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 
 	/* Do this outside the status_mutex to avoid a circular dependency with
 	 * the locking in cxl_mmap_fault() */
-	if (copy_from_user(&work, uwork,
-			   sizeof(struct cxl_ioctl_start_work))) {
-		rc = -EFAULT;
-		goto out;
-	}
+	if (copy_from_user(&work, uwork, sizeof(work)))
+		return -EFAULT;
 
 	mutex_lock(&ctx->status_mutex);
 	if (ctx->status != OPENED) {
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index 1703655072b1..c1ba0d42cbc8 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -329,8 +329,15 @@ static int __init init_cxl(void)
 
 	cxl_debugfs_init();
 
-	if ((rc = register_cxl_calls(&cxl_calls)))
-		goto err;
+	/*
+	 * we don't register the callback on P9. slb callack is only
+	 * used for the PSL8 MMU and CX4.
+	 */
+	if (cxl_is_power8()) {
+		rc = register_cxl_calls(&cxl_calls);
+		if (rc)
+			goto err;
+	}
 
 	if (cpu_has_feature(CPU_FTR_HVMODE)) {
 		cxl_ops = &cxl_native_ops;
@@ -347,7 +354,8 @@ static int __init init_cxl(void)
 
 	return 0;
 err1:
-	unregister_cxl_calls(&cxl_calls);
+	if (cxl_is_power8())
+		unregister_cxl_calls(&cxl_calls);
 err:
 	cxl_debugfs_exit();
 	cxl_file_exit();
@@ -366,7 +374,8 @@ static void exit_cxl(void)
 
 	cxl_debugfs_exit();
 	cxl_file_exit();
-	unregister_cxl_calls(&cxl_calls);
+	if (cxl_is_power8())
+		unregister_cxl_calls(&cxl_calls);
 	idr_destroy(&cxl_adapter_idr);
 }
 
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 871a2f09c718..2b2f8894149d 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -105,11 +105,16 @@ static int native_afu_reset(struct cxl_afu *afu)
 			   CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK,
 			   false);
 
-	/* Re-enable any masked interrupts */
-	serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
-	serr &= ~CXL_PSL_SERR_An_IRQ_MASKS;
-	cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
-
+	/*
+	 * Re-enable any masked interrupts when the AFU is not
+	 * activated to avoid side effects after attaching a process
+	 * in dedicated mode.
+	 */
+	if (afu->current_mode == 0) {
+		serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+		serr &= ~CXL_PSL_SERR_An_IRQ_MASKS;
+		cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+	}
 
 	return rc;
 }
@@ -139,9 +144,9 @@ int cxl_psl_purge(struct cxl_afu *afu)
 
 	pr_devel("PSL purge request\n");
 
-	if (cxl_is_psl8(afu))
+	if (cxl_is_power8())
 		trans_fault = CXL_PSL_DSISR_TRANS;
-	if (cxl_is_psl9(afu))
+	if (cxl_is_power9())
 		trans_fault = CXL_PSL9_DSISR_An_TF;
 
 	if (!cxl_ops->link_ok(afu->adapter, afu)) {
@@ -603,7 +608,7 @@ static u64 calculate_sr(struct cxl_context *ctx)
 		if (!test_tsk_thread_flag(current, TIF_32BIT))
 			sr |= CXL_PSL_SR_An_SF;
 	}
-	if (cxl_is_psl9(ctx->afu)) {
+	if (cxl_is_power9()) {
 		if (radix_enabled())
 			sr |= CXL_PSL_SR_An_XLAT_ror;
 		else
@@ -1117,10 +1122,10 @@ static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx,
 
 static bool cxl_is_translation_fault(struct cxl_afu *afu, u64 dsisr)
 {
-	if ((cxl_is_psl8(afu)) && (dsisr & CXL_PSL_DSISR_TRANS))
+	if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_TRANS))
 		return true;
 
-	if ((cxl_is_psl9(afu)) && (dsisr & CXL_PSL9_DSISR_An_TF))
+	if ((cxl_is_power9()) && (dsisr & CXL_PSL9_DSISR_An_TF))
 		return true;
 
 	return false;
@@ -1194,10 +1199,10 @@ static void native_irq_wait(struct cxl_context *ctx)
 		if (ph != ctx->pe)
 			return;
 		dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An);
-		if (cxl_is_psl8(ctx->afu) &&
+		if (cxl_is_power8() &&
 		   ((dsisr & CXL_PSL_DSISR_PENDING) == 0))
 			return;
-		if (cxl_is_psl9(ctx->afu) &&
+		if (cxl_is_power9() &&
 		   ((dsisr & CXL_PSL9_DSISR_PENDING) == 0))
 			return;
 		/*
@@ -1302,13 +1307,16 @@ int cxl_native_register_psl_err_irq(struct cxl *adapter)
 
 void cxl_native_release_psl_err_irq(struct cxl *adapter)
 {
-	if (adapter->native->err_virq != irq_find_mapping(NULL, adapter->native->err_hwirq))
+	if (adapter->native->err_virq == 0 ||
+	    adapter->native->err_virq !=
+	    irq_find_mapping(NULL, adapter->native->err_hwirq))
 		return;
 
 	cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000);
 	cxl_unmap_irq(adapter->native->err_virq, adapter);
 	cxl_ops->release_one_irq(adapter, adapter->native->err_hwirq);
 	kfree(adapter->irq_name);
+	adapter->native->err_virq = 0;
 }
 
 int cxl_native_register_serr_irq(struct cxl_afu *afu)
@@ -1346,13 +1354,15 @@ int cxl_native_register_serr_irq(struct cxl_afu *afu)
 
 void cxl_native_release_serr_irq(struct cxl_afu *afu)
 {
-	if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
+	if (afu->serr_virq == 0 ||
+	    afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
 		return;
 
 	cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000);
 	cxl_unmap_irq(afu->serr_virq, afu);
 	cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq);
 	kfree(afu->err_irq_name);
+	afu->serr_virq = 0;
 }
 
 int cxl_native_register_psl_irq(struct cxl_afu *afu)
@@ -1375,12 +1385,15 @@ int cxl_native_register_psl_irq(struct cxl_afu *afu)
 
 void cxl_native_release_psl_irq(struct cxl_afu *afu)
 {
-	if (afu->native->psl_virq != irq_find_mapping(NULL, afu->native->psl_hwirq))
+	if (afu->native->psl_virq == 0 ||
+	    afu->native->psl_virq !=
+	    irq_find_mapping(NULL, afu->native->psl_hwirq))
 		return;
 
 	cxl_unmap_irq(afu->native->psl_virq, afu);
 	cxl_ops->release_one_irq(afu->adapter, afu->native->psl_hwirq);
 	kfree(afu->psl_irq_name);
+	afu->native->psl_virq = 0;
 }
 
 static void recover_psl_err(struct cxl_afu *afu, u64 errstat)
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 6dc1ee5b92c9..1eb9859809bf 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -436,7 +436,7 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter, struct pci
 	/* nMMU_ID Defaults to: b’000001001’*/
 	xsl_dsnctl |= ((u64)0x09 << (63-28));
 
-	if (cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+	if (!(cxl_is_power9_dd1())) {
 		/*
 		 * Used to identify CAPI packets which should be sorted into
 		 * the Non-Blocking queues by the PHB. This field should match
@@ -491,7 +491,7 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter, struct pci
 	cxl_p1_write(adapter, CXL_PSL9_APCDEDTYPE, 0x40000003FFFF0000ULL);
 
 	/* Disable vc dd1 fix */
-	if ((cxl_is_power9() && cpu_has_feature(CPU_FTR_POWER9_DD1)))
+	if (cxl_is_power9_dd1())
 		cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0400000000000001ULL);
 
 	return 0;
@@ -1439,8 +1439,7 @@ int cxl_pci_reset(struct cxl *adapter)
 	 * The adapter is about to be reset, so ignore errors.
 	 * Not supported on P9 DD1
 	 */
-	if ((cxl_is_power8()) ||
-	    ((cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1))))
+	if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
 		cxl_data_cache_flush(adapter);
 
 	/* pcie_warm_reset requests a fundamental pci reset which includes a
@@ -1750,7 +1749,6 @@ static const struct cxl_service_layer_ops psl9_ops = {
 	.debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl9,
 	.debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl9,
 	.psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9,
-	.err_irq_dump_registers = cxl_native_err_irq_dump_regs,
 	.debugfs_stop_trace = cxl_stop_trace_psl9,
 	.write_timebase_ctrl = write_timebase_ctrl_psl9,
 	.timebase_read = timebase_read_psl9,
@@ -1889,8 +1887,7 @@ static void cxl_pci_remove_adapter(struct cxl *adapter)
 	 * Flush adapter datacache as its about to be removed.
 	 * Not supported on P9 DD1.
 	 */
-	if ((cxl_is_power8()) ||
-	    ((cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1))))
+	if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
 		cxl_data_cache_flush(adapter);
 
 	cxl_deconfigure_adapter(adapter);
diff --git a/drivers/misc/dummy-irq.c b/drivers/misc/dummy-irq.c
index acbbe0390be4..76a1015d5783 100644
--- a/drivers/misc/dummy-irq.c
+++ b/drivers/misc/dummy-irq.c
@@ -59,6 +59,6 @@ module_exit(dummy_irq_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jiri Kosina");
-module_param(irq, uint, 0444);
+module_param_hw(irq, uint, irq, 0444);
 MODULE_PARM_DESC(irq, "The IRQ to register for");
 MODULE_DESCRIPTION("Dummy IRQ handler driver");
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index d1928fdd0f43..07aad8576334 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -763,8 +763,10 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
 {
 	struct mei_cl_device *cldev = to_mei_cl_device(dev);
 	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+	u8 version = mei_me_cl_ver(cldev->me_cl);
 
-	return scnprintf(buf, PAGE_SIZE, "mei:%s:%pUl:", cldev->name, uuid);
+	return scnprintf(buf, PAGE_SIZE, "mei:%s:%pUl:%02X:",
+			 cldev->name, uuid, version);
 }
 static DEVICE_ATTR_RO(modalias);
 
diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
index c2e29d7f0de8..a341938c7e2c 100644
--- a/drivers/misc/mic/vop/vop_main.c
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -278,7 +278,7 @@ static void vop_del_vqs(struct virtio_device *dev)
 static struct virtqueue *vop_find_vq(struct virtio_device *dev,
 				     unsigned index,
 				     void (*callback)(struct virtqueue *vq),
-				     const char *name)
+				     const char *name, bool ctx)
 {
 	struct _vop_vdev *vdev = to_vopvdev(dev);
 	struct vop_device *vpdev = vdev->vpdev;
@@ -314,6 +314,7 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev,
 				le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN,
 				dev,
 				false,
+				ctx,
 				(void __force *)va, vop_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
@@ -374,7 +375,8 @@ unmap:
 static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
 			struct virtqueue *vqs[],
 			vq_callback_t *callbacks[],
-			const char * const names[], struct irq_affinity *desc)
+			const char * const names[], const bool *ctx,
+			struct irq_affinity *desc)
 {
 	struct _vop_vdev *vdev = to_vopvdev(dev);
 	struct vop_device *vpdev = vdev->vpdev;
@@ -388,7 +390,8 @@ static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
 	for (i = 0; i < nvqs; ++i) {
 		dev_dbg(_vop_dev(vdev), "%s: %d: %s\n",
 			__func__, i, names[i]);
-		vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i]);
+		vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i],
+				     ctx ? ctx[i] : false);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
 			goto error;
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index c862cd4583cc..b8069eec18cb 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -309,6 +309,9 @@ static inline enum xp_retval
 xpc_send(short partid, int ch_number, u32 flags, void *payload,
 	 u16 payload_size)
 {
+	if (!xpc_interface.send)
+		return xpNotLoaded;
+
 	return xpc_interface.send(partid, ch_number, flags, payload,
 				  payload_size);
 }
@@ -317,6 +320,9 @@ static inline enum xp_retval
 xpc_send_notify(short partid, int ch_number, u32 flags, void *payload,
 		u16 payload_size, xpc_notify_func func, void *key)
 {
+	if (!xpc_interface.send_notify)
+		return xpNotLoaded;
+
 	return xpc_interface.send_notify(partid, ch_number, flags, payload,
 					 payload_size, func, key);
 }
@@ -324,12 +330,16 @@ xpc_send_notify(short partid, int ch_number, u32 flags, void *payload,
 static inline void
 xpc_received(short partid, int ch_number, void *payload)
 {
-	return xpc_interface.received(partid, ch_number, payload);
+	if (xpc_interface.received)
+		xpc_interface.received(partid, ch_number, payload);
 }
 
 static inline enum xp_retval
 xpc_partid_to_nasids(short partid, void *nasids)
 {
+	if (!xpc_interface.partid_to_nasids)
+		return xpNotLoaded;
+
 	return xpc_interface.partid_to_nasids(partid, nasids);
 }
 
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index 01be66d02ca8..6d7f557fd1c1 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -69,23 +69,9 @@ struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS];
 EXPORT_SYMBOL_GPL(xpc_registrations);
 
 /*
- * Initialize the XPC interface to indicate that XPC isn't loaded.
+ * Initialize the XPC interface to NULL to indicate that XPC isn't loaded.
  */
-static enum xp_retval
-xpc_notloaded(void)
-{
-	return xpNotLoaded;
-}
-
-struct xpc_interface xpc_interface = {
-	(void (*)(int))xpc_notloaded,
-	(void (*)(int))xpc_notloaded,
-	(enum xp_retval(*)(short, int, u32, void *, u16))xpc_notloaded,
-	(enum xp_retval(*)(short, int, u32, void *, u16, xpc_notify_func,
-			   void *))xpc_notloaded,
-	(void (*)(short, int, void *))xpc_notloaded,
-	(enum xp_retval(*)(short, void *))xpc_notloaded
-};
+struct xpc_interface xpc_interface = { };
 EXPORT_SYMBOL_GPL(xpc_interface);
 
 /*
@@ -115,17 +101,7 @@ EXPORT_SYMBOL_GPL(xpc_set_interface);
 void
 xpc_clear_interface(void)
 {
-	xpc_interface.connect = (void (*)(int))xpc_notloaded;
-	xpc_interface.disconnect = (void (*)(int))xpc_notloaded;
-	xpc_interface.send = (enum xp_retval(*)(short, int, u32, void *, u16))
-	    xpc_notloaded;
-	xpc_interface.send_notify = (enum xp_retval(*)(short, int, u32, void *,
-						       u16, xpc_notify_func,
-						       void *))xpc_notloaded;
-	xpc_interface.received = (void (*)(short, int, void *))
-	    xpc_notloaded;
-	xpc_interface.partid_to_nasids = (enum xp_retval(*)(short, void *))
-	    xpc_notloaded;
+	memset(&xpc_interface, 0, sizeof(xpc_interface));
 }
 EXPORT_SYMBOL_GPL(xpc_clear_interface);
 
@@ -188,7 +164,8 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
 
 	mutex_unlock(&registration->mutex);
 
-	xpc_interface.connect(ch_number);
+	if (xpc_interface.connect)
+		xpc_interface.connect(ch_number);
 
 	return xpSuccess;
 }
@@ -237,7 +214,8 @@ xpc_disconnect(int ch_number)
 	registration->assigned_limit = 0;
 	registration->idle_limit = 0;
 
-	xpc_interface.disconnect(ch_number);
+	if (xpc_interface.disconnect)
+		xpc_interface.disconnect(ch_number);
 
 	mutex_unlock(&registration->mutex);
 
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 8273b078686d..6ff94a948a4b 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1184,9 +1184,10 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
 	struct mmc_card *card = md->queue.card;
 	unsigned int from, nr, arg;
 	int err = 0, type = MMC_BLK_DISCARD;
+	blk_status_t status = BLK_STS_OK;
 
 	if (!mmc_can_erase(card)) {
-		err = -EOPNOTSUPP;
+		status = BLK_STS_NOTSUPP;
 		goto fail;
 	}
 
@@ -1212,10 +1213,12 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
 		if (!err)
 			err = mmc_erase(card, from, nr, arg);
 	} while (err == -EIO && !mmc_blk_reset(md, card->host, type));
-	if (!err)
+	if (err)
+		status = BLK_STS_IOERR;
+	else
 		mmc_blk_reset_success(md, type);
 fail:
-	blk_end_request(req, err, blk_rq_bytes(req));
+	blk_end_request(req, status, blk_rq_bytes(req));
 }
 
 static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
@@ -1225,9 +1228,10 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
 	struct mmc_card *card = md->queue.card;
 	unsigned int from, nr, arg;
 	int err = 0, type = MMC_BLK_SECDISCARD;
+	blk_status_t status = BLK_STS_OK;
 
 	if (!(mmc_can_secure_erase_trim(card))) {
-		err = -EOPNOTSUPP;
+		status = BLK_STS_NOTSUPP;
 		goto out;
 	}
 
@@ -1254,8 +1258,10 @@ retry:
 	err = mmc_erase(card, from, nr, arg);
 	if (err == -EIO)
 		goto out_retry;
-	if (err)
+	if (err) {
+		status = BLK_STS_IOERR;
 		goto out;
+	}
 
 	if (arg == MMC_SECURE_TRIM1_ARG) {
 		if (card->quirks & MMC_QUIRK_INAND_CMD38) {
@@ -1270,8 +1276,10 @@ retry:
 		err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG);
 		if (err == -EIO)
 			goto out_retry;
-		if (err)
+		if (err) {
+			status = BLK_STS_IOERR;
 			goto out;
+		}
 	}
 
 out_retry:
@@ -1280,7 +1288,7 @@ out_retry:
 	if (!err)
 		mmc_blk_reset_success(md, type);
 out:
-	blk_end_request(req, err, blk_rq_bytes(req));
+	blk_end_request(req, status, blk_rq_bytes(req));
 }
 
 static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
@@ -1290,10 +1298,7 @@ static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
 	int ret = 0;
 
 	ret = mmc_flush_cache(card);
-	if (ret)
-		ret = -EIO;
-
-	blk_end_request_all(req, ret);
+	blk_end_request_all(req, ret ? BLK_STS_IOERR : BLK_STS_OK);
 }
 
 /*
@@ -1641,7 +1646,7 @@ static void mmc_blk_rw_cmd_abort(struct mmc_queue *mq, struct mmc_card *card,
 {
 	if (mmc_card_removed(card))
 		req->rq_flags |= RQF_QUIET;
-	while (blk_end_request(req, -EIO, blk_rq_cur_bytes(req)));
+	while (blk_end_request(req, BLK_STS_IOERR, blk_rq_cur_bytes(req)));
 	mmc_queue_req_free(mq, mqrq);
 }
 
@@ -1661,7 +1666,7 @@ static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req,
 	 */
 	if (mmc_card_removed(mq->card)) {
 		req->rq_flags |= RQF_QUIET;
-		blk_end_request_all(req, -EIO);
+		blk_end_request_all(req, BLK_STS_IOERR);
 		mmc_queue_req_free(mq, mqrq);
 		return;
 	}
@@ -1743,7 +1748,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 			 */
 			mmc_blk_reset_success(md, type);
 
-			req_pending = blk_end_request(old_req, 0,
+			req_pending = blk_end_request(old_req, BLK_STS_OK,
 						      brq->data.bytes_xfered);
 			/*
 			 * If the blk_end_request function returns non-zero even
@@ -1811,7 +1816,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 			 * time, so we only reach here after trying to
 			 * read a single sector.
 			 */
-			req_pending = blk_end_request(old_req, -EIO,
+			req_pending = blk_end_request(old_req, BLK_STS_IOERR,
 						      brq->data.blksz);
 			if (!req_pending) {
 				mmc_queue_req_free(mq, mq_rq);
@@ -1860,7 +1865,7 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	ret = mmc_blk_part_switch(card, md);
 	if (ret) {
 		if (req) {
-			blk_end_request_all(req, -EIO);
+			blk_end_request_all(req, BLK_STS_IOERR);
 		}
 		goto out;
 	}
diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index 1304160de168..13ef162cf066 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c
@@ -27,6 +27,7 @@ struct mmc_pwrseq_simple {
 	struct mmc_pwrseq pwrseq;
 	bool clk_enabled;
 	u32 post_power_on_delay_ms;
+	u32 power_off_delay_us;
 	struct clk *ext_clk;
 	struct gpio_descs *reset_gpios;
 };
@@ -78,6 +79,10 @@ static void mmc_pwrseq_simple_power_off(struct mmc_host *host)
 
 	mmc_pwrseq_simple_set_gpios_value(pwrseq, 1);
 
+	if (pwrseq->power_off_delay_us)
+		usleep_range(pwrseq->power_off_delay_us,
+			2 * pwrseq->power_off_delay_us);
+
 	if (!IS_ERR(pwrseq->ext_clk) && pwrseq->clk_enabled) {
 		clk_disable_unprepare(pwrseq->ext_clk);
 		pwrseq->clk_enabled = false;
@@ -119,6 +124,8 @@ static int mmc_pwrseq_simple_probe(struct platform_device *pdev)
 
 	device_property_read_u32(dev, "post-power-on-delay-ms",
 				 &pwrseq->post_power_on_delay_ms);
+	device_property_read_u32(dev, "power-off-delay-us",
+				 &pwrseq->power_off_delay_us);
 
 	pwrseq->pwrseq.dev = dev;
 	pwrseq->pwrseq.ops = &mmc_pwrseq_simple_ops;
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 5c37b6be3e7b..b659a28c8018 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -133,7 +133,7 @@ static void mmc_request_fn(struct request_queue *q)
 	if (!mq) {
 		while ((req = blk_fetch_request(q)) != NULL) {
 			req->rq_flags |= RQF_QUIET;
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		}
 		return;
 	}
@@ -388,7 +388,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 		mmc_queue_setup_discard(mq->queue, card);
 
 	if (card->bouncesz) {
-		blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
 		blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512);
 		blk_queue_max_segments(mq->queue, card->bouncesz / 512);
 		blk_queue_max_segment_size(mq->queue, card->bouncesz);
diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c
index 772d0900026d..951d2cdd7888 100644
--- a/drivers/mmc/host/cavium-octeon.c
+++ b/drivers/mmc/host/cavium-octeon.c
@@ -108,7 +108,7 @@ static void octeon_mmc_release_bus(struct cvm_mmc_host *host)
 static void octeon_mmc_int_enable(struct cvm_mmc_host *host, u64 val)
 {
 	writeq(val, host->base + MIO_EMM_INT(host));
-	if (!host->dma_active || (host->dma_active && !host->has_ciu3))
+	if (!host->has_ciu3)
 		writeq(val, host->base + MIO_EMM_INT_EN(host));
 }
 
@@ -267,7 +267,7 @@ static int octeon_mmc_probe(struct platform_device *pdev)
 	}
 
 	host->global_pwr_gpiod = devm_gpiod_get_optional(&pdev->dev,
-							 "power-gpios",
+							 "power",
 							 GPIOD_OUT_HIGH);
 	if (IS_ERR(host->global_pwr_gpiod)) {
 		dev_err(&pdev->dev, "Invalid power GPIO\n");
@@ -288,11 +288,20 @@ static int octeon_mmc_probe(struct platform_device *pdev)
 		if (ret) {
 			dev_err(&pdev->dev, "Error populating slots\n");
 			octeon_mmc_set_shared_power(host, 0);
-			return ret;
+			goto error;
 		}
 		i++;
 	}
 	return 0;
+
+error:
+	for (i = 0; i < CAVIUM_MAX_MMC; i++) {
+		if (host->slot[i])
+			cvm_mmc_of_slot_remove(host->slot[i]);
+		if (host->slot_pdev[i])
+			of_platform_device_destroy(&host->slot_pdev[i]->dev, NULL);
+	}
+	return ret;
 }
 
 static int octeon_mmc_remove(struct platform_device *pdev)
diff --git a/drivers/mmc/host/cavium-thunderx.c b/drivers/mmc/host/cavium-thunderx.c
index fe3d77267cd6..b9cc95998799 100644
--- a/drivers/mmc/host/cavium-thunderx.c
+++ b/drivers/mmc/host/cavium-thunderx.c
@@ -146,6 +146,12 @@ static int thunder_mmc_probe(struct pci_dev *pdev,
 	return 0;
 
 error:
+	for (i = 0; i < CAVIUM_MAX_MMC; i++) {
+		if (host->slot[i])
+			cvm_mmc_of_slot_remove(host->slot[i]);
+		if (host->slot_pdev[i])
+			of_platform_device_destroy(&host->slot_pdev[i]->dev, NULL);
+	}
 	clk_disable_unprepare(host->clk);
 	return ret;
 }
diff --git a/drivers/mmc/host/cavium.c b/drivers/mmc/host/cavium.c
index 58b51ba6aabd..b8aaf0fdb77c 100644
--- a/drivers/mmc/host/cavium.c
+++ b/drivers/mmc/host/cavium.c
@@ -839,14 +839,14 @@ static void cvm_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		cvm_mmc_reset_bus(slot);
 		if (host->global_pwr_gpiod)
 			host->set_shared_power(host, 0);
-		else
+		else if (!IS_ERR(mmc->supply.vmmc))
 			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
 		break;
 
 	case MMC_POWER_UP:
 		if (host->global_pwr_gpiod)
 			host->set_shared_power(host, 1);
-		else
+		else if (!IS_ERR(mmc->supply.vmmc))
 			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd);
 		break;
 	}
@@ -968,20 +968,15 @@ static int cvm_mmc_of_parse(struct device *dev, struct cvm_mmc_slot *slot)
 		return -EINVAL;
 	}
 
-	mmc->supply.vmmc = devm_regulator_get_optional(dev, "vmmc");
-	if (IS_ERR(mmc->supply.vmmc)) {
-		if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER)
-			return -EPROBE_DEFER;
-		/*
-		 * Legacy Octeon firmware has no regulator entry, fall-back to
-		 * a hard-coded voltage to get a sane OCR.
-		 */
+	ret = mmc_regulator_get_supply(mmc);
+	if (ret == -EPROBE_DEFER)
+		return ret;
+	/*
+	 * Legacy Octeon firmware has no regulator entry, fall-back to
+	 * a hard-coded voltage to get a sane OCR.
+	 */
+	if (IS_ERR(mmc->supply.vmmc))
 		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
-	} else {
-		ret = mmc_regulator_get_ocrmask(mmc->supply.vmmc);
-		if (ret > 0)
-			mmc->ocr_avail = ret;
-	}
 
 	/* Common MMC bindings */
 	ret = mmc_of_parse(mmc);
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 1842ed341af1..de962c2d5e00 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -210,6 +210,15 @@ static void meson_mmc_get_transfer_mode(struct mmc_host *mmc,
 	int i;
 	bool use_desc_chain_mode = true;
 
+	/*
+	 * Broken SDIO with AP6255-based WiFi on Khadas VIM Pro has been
+	 * reported. For some strange reason this occurs in descriptor
+	 * chain mode only. So let's fall back to bounce buffer mode
+	 * for command SD_IO_RW_EXTENDED.
+	 */
+	if (mrq->cmd->opcode == SD_IO_RW_EXTENDED)
+		return;
+
 	for_each_sg(data->sg, sg, data->sg_len, i)
 		/* check for 8 byte alignment */
 		if (sg->offset & 7) {
diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index 3275d4995812..61666d269771 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -187,7 +187,8 @@ static const struct sdhci_iproc_data iproc_cygnus_data = {
 };
 
 static const struct sdhci_pltfm_data sdhci_iproc_pltfm_data = {
-	.quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK,
+	.quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
+		  SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12,
 	.quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN,
 	.ops = &sdhci_iproc_ops,
 };
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 92fc3f7c538d..18957fea82ff 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -404,10 +404,9 @@ struct intel_host {
 	bool	d3_retune;
 };
 
-const u8 intel_dsm_uuid[] = {
-	0xA5, 0x3E, 0xC1, 0xF6, 0xCD, 0x65, 0x1F, 0x46,
-	0xAB, 0x7A, 0x29, 0xF7, 0xE8, 0xD5, 0xBD, 0x61,
-};
+static const guid_t intel_dsm_guid =
+	GUID_INIT(0xF6C13EA5, 0x65CD, 0x461F,
+		  0xAB, 0x7A, 0x29, 0xF7, 0xE8, 0xD5, 0xBD, 0x61);
 
 static int __intel_dsm(struct intel_host *intel_host, struct device *dev,
 		       unsigned int fn, u32 *result)
@@ -416,7 +415,7 @@ static int __intel_dsm(struct intel_host *intel_host, struct device *dev,
 	int err = 0;
 	size_t len;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), intel_dsm_uuid, 0, fn, NULL);
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), &intel_dsm_guid, 0, fn, NULL);
 	if (!obj)
 		return -EOPNOTSUPP;
 
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index 6356781f1cca..f7e26b031e76 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -787,14 +787,6 @@ int xenon_phy_adj(struct sdhci_host *host, struct mmc_ios *ios)
 	return ret;
 }
 
-void xenon_clean_phy(struct sdhci_host *host)
-{
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
-
-	kfree(priv->phy_params);
-}
-
 static int xenon_add_phy(struct device_node *np, struct sdhci_host *host,
 			 const char *phy_name)
 {
@@ -819,11 +811,7 @@ static int xenon_add_phy(struct device_node *np, struct sdhci_host *host,
 	if (ret)
 		return ret;
 
-	ret = xenon_emmc_phy_parse_param_dt(host, np, priv->phy_params);
-	if (ret)
-		xenon_clean_phy(host);
-
-	return ret;
+	return xenon_emmc_phy_parse_param_dt(host, np, priv->phy_params);
 }
 
 int xenon_phy_parse_dt(struct device_node *np, struct sdhci_host *host)
diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c
index 67246655315b..bc1781bb070b 100644
--- a/drivers/mmc/host/sdhci-xenon.c
+++ b/drivers/mmc/host/sdhci-xenon.c
@@ -486,7 +486,7 @@ static int xenon_probe(struct platform_device *pdev)
 
 	err = xenon_sdhc_prepare(host);
 	if (err)
-		goto clean_phy_param;
+		goto err_clk;
 
 	err = sdhci_add_host(host);
 	if (err)
@@ -496,8 +496,6 @@ static int xenon_probe(struct platform_device *pdev)
 
 remove_sdhc:
 	xenon_sdhc_unprepare(host);
-clean_phy_param:
-	xenon_clean_phy(host);
 err_clk:
 	clk_disable_unprepare(pltfm_host->clk);
 free_pltfm:
@@ -510,8 +508,6 @@ static int xenon_remove(struct platform_device *pdev)
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 
-	xenon_clean_phy(host);
-
 	sdhci_remove_host(host, 0);
 
 	xenon_sdhc_unprepare(host);
diff --git a/drivers/mmc/host/sdhci-xenon.h b/drivers/mmc/host/sdhci-xenon.h
index 6e6523ea01ce..73debb42dc2f 100644
--- a/drivers/mmc/host/sdhci-xenon.h
+++ b/drivers/mmc/host/sdhci-xenon.h
@@ -93,7 +93,6 @@ struct xenon_priv {
 };
 
 int xenon_phy_adj(struct sdhci_host *host, struct mmc_ios *ios);
-void xenon_clean_phy(struct sdhci_host *host);
 int xenon_phy_parse_dt(struct device_node *np,
 		       struct sdhci_host *host);
 void xenon_soc_pad_ctrl(struct sdhci_host *host,
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index bd04e8bae010..e15a9733fcfd 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -2001,11 +2001,11 @@ static void __exit wbsd_drv_exit(void)
 module_init(wbsd_drv_init);
 module_exit(wbsd_drv_exit);
 #ifdef CONFIG_PNP
-module_param_named(nopnp, param_nopnp, uint, 0444);
+module_param_hw_named(nopnp, param_nopnp, uint, other, 0444);
 #endif
-module_param_named(io, param_io, uint, 0444);
-module_param_named(irq, param_irq, uint, 0444);
-module_param_named(dma, param_dma, int, 0444);
+module_param_hw_named(io, param_io, uint, ioport, 0444);
+module_param_hw_named(irq, param_irq, uint, irq, 0444);
+module_param_hw_named(dma, param_dma, int, dma, 0444);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Pierre Ossman <pierre@ossman.eu>");
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 9dca881bb378..56aa6b75213d 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -323,7 +323,8 @@ static void fixup_sst38vf640x_sectorsize(struct mtd_info *mtd)
 	 * it should report a size of 8KBytes (0x0020*256).
 	 */
 	cfi->cfiq->EraseRegionInfo[0] = 0x002003ff;
-	pr_warning("%s: Bad 38VF640x CFI data; adjusting sector size from 64 to 8KiB\n", mtd->name);
+	pr_warn("%s: Bad 38VF640x CFI data; adjusting sector size from 64 to 8KiB\n",
+		mtd->name);
 }
 
 static void fixup_s29gl064n_sectors(struct mtd_info *mtd)
@@ -333,7 +334,8 @@ static void fixup_s29gl064n_sectors(struct mtd_info *mtd)
 
 	if ((cfi->cfiq->EraseRegionInfo[0] & 0xffff) == 0x003f) {
 		cfi->cfiq->EraseRegionInfo[0] |= 0x0040;
-		pr_warning("%s: Bad S29GL064N CFI data; adjust from 64 to 128 sectors\n", mtd->name);
+		pr_warn("%s: Bad S29GL064N CFI data; adjust from 64 to 128 sectors\n",
+			mtd->name);
 	}
 }
 
@@ -344,7 +346,8 @@ static void fixup_s29gl032n_sectors(struct mtd_info *mtd)
 
 	if ((cfi->cfiq->EraseRegionInfo[1] & 0xffff) == 0x007e) {
 		cfi->cfiq->EraseRegionInfo[1] &= ~0x0040;
-		pr_warning("%s: Bad S29GL032N CFI data; adjust from 127 to 63 sectors\n", mtd->name);
+		pr_warn("%s: Bad S29GL032N CFI data; adjust from 127 to 63 sectors\n",
+			mtd->name);
 	}
 }
 
@@ -358,7 +361,8 @@ static void fixup_s29ns512p_sectors(struct mtd_info *mtd)
 	 * which is not permitted by CFI.
 	 */
 	cfi->cfiq->EraseRegionInfo[0] = 0x020001ff;
-	pr_warning("%s: Bad S29NS512P CFI data; adjust to 512 sectors\n", mtd->name);
+	pr_warn("%s: Bad S29NS512P CFI data; adjust to 512 sectors\n",
+		mtd->name);
 }
 
 /* Used to fix CFI-Tables of chips without Extended Query Tables */
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index aef1846b4de2..5a09a72ab112 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -17,12 +17,10 @@ obj-$(CONFIG_MTD_CK804XROM)	+= ck804xrom.o
 obj-$(CONFIG_MTD_TSUNAMI)	+= tsunami_flash.o
 obj-$(CONFIG_MTD_PXA2XX)	+= pxa2xx-flash.o
 obj-$(CONFIG_MTD_PHYSMAP)	+= physmap.o
-ifdef CONFIG_MTD_PHYSMAP_OF_VERSATILE
-physmap_of-objs += physmap_of_versatile.o
-endif
-ifdef CONFIG_MTD_PHYSMAP_OF_GEMINI
-physmap_of-objs += physmap_of_gemini.o
-endif
+physmap_of-objs-y		+= physmap_of_core.o
+physmap_of-objs-$(CONFIG_MTD_PHYSMAP_OF_VERSATILE) += physmap_of_versatile.o
+physmap_of-objs-$(CONFIG_MTD_PHYSMAP_OF_GEMINI) += physmap_of_gemini.o
+physmap_of-objs			:= $(physmap_of-objs-y)
 obj-$(CONFIG_MTD_PHYSMAP_OF)	+= physmap_of.o
 obj-$(CONFIG_MTD_PISMO)		+= pismo.o
 obj-$(CONFIG_MTD_PMC_MSP_EVM)   += pmcmsp-flash.o
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of_core.c
index 14e8909c9955..62fa6836f218 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of_core.c
@@ -116,32 +116,22 @@ static const char * const part_probe_types_def[] = {
 
 static const char * const *of_get_probes(struct device_node *dp)
 {
-	const char *cp;
-	int cplen;
-	unsigned int l;
-	unsigned int count;
 	const char **res;
+	int count;
 
-	cp = of_get_property(dp, "linux,part-probe", &cplen);
-	if (cp == NULL)
+	count = of_property_count_strings(dp, "linux,part-probe");
+	if (count < 0)
 		return part_probe_types_def;
 
-	count = 0;
-	for (l = 0; l != cplen; l++)
-		if (cp[l] == 0)
-			count++;
-
-	res = kzalloc((count + 1)*sizeof(*res), GFP_KERNEL);
+	res = kzalloc((count + 1) * sizeof(*res), GFP_KERNEL);
 	if (!res)
 		return NULL;
-	count = 0;
-	while (cplen > 0) {
-		res[count] = cp;
-		l = strlen(cp) + 1;
-		cp += l;
-		cplen -= l;
-		count++;
-	}
+
+	count = of_property_read_string_array(dp, "linux,part-probe", res,
+					      count);
+	if (count < 0)
+		return NULL;
+
 	return res;
 }
 
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 6b8d5cd7dbf6..f336a9b85576 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -73,7 +73,7 @@ static void blktrans_dev_put(struct mtd_blktrans_dev *dev)
 }
 
 
-static int do_blktrans_request(struct mtd_blktrans_ops *tr,
+static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
 			       struct mtd_blktrans_dev *dev,
 			       struct request *req)
 {
@@ -84,33 +84,37 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
 	buf = bio_data(req->bio);
 
-	if (req_op(req) == REQ_OP_FLUSH)
-		return tr->flush(dev);
+	if (req_op(req) == REQ_OP_FLUSH) {
+		if (tr->flush(dev))
+			return BLK_STS_IOERR;
+		return BLK_STS_OK;
+	}
 
 	if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
 	    get_capacity(req->rq_disk))
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	switch (req_op(req)) {
 	case REQ_OP_DISCARD:
-		return tr->discard(dev, block, nsect);
+		if (tr->discard(dev, block, nsect))
+			return BLK_STS_IOERR;
+		return BLK_STS_OK;
 	case REQ_OP_READ:
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->readsect(dev, block, buf))
-				return -EIO;
+				return BLK_STS_IOERR;
 		rq_flush_dcache_pages(req);
-		return 0;
+		return BLK_STS_OK;
 	case REQ_OP_WRITE:
 		if (!tr->writesect)
-			return -EIO;
+			return BLK_STS_IOERR;
 
 		rq_flush_dcache_pages(req);
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->writesect(dev, block, buf))
-				return -EIO;
-		return 0;
+				return BLK_STS_IOERR;
 	default:
-		return -EIO;
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -132,7 +136,7 @@ static void mtd_blktrans_work(struct work_struct *work)
 	spin_lock_irq(rq->queue_lock);
 
 	while (1) {
-		int res;
+		blk_status_t res;
 
 		dev->bg_stop = false;
 		if (!req && !(req = blk_fetch_request(rq))) {
@@ -178,7 +182,7 @@ static void mtd_blktrans_request(struct request_queue *rq)
 
 	if (!dev)
 		while ((req = blk_fetch_request(rq)) != NULL)
-			__blk_end_request_all(req, -ENODEV);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 	else
 		queue_work(dev->wq, &dev->work);
 }
@@ -413,6 +417,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	new->rq->queuedata = new;
 	blk_queue_logical_block_size(new->rq, tr->blksize);
 
+	blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);
 	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq);
 
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index c40e2c951758..f12879a3d4ff 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -1235,10 +1235,8 @@ static int mtdswap_show(struct seq_file *s, void *data)
 
 		if (root->rb_node) {
 			count[i] = d->trees[i].count;
-			min[i] = rb_entry(rb_first(root), struct swap_eb,
-					rb)->erase_count;
-			max[i] = rb_entry(rb_last(root), struct swap_eb,
-					rb)->erase_count;
+			min[i] = MTDSWAP_ECNT_MIN(root);
+			max[i] = MTDSWAP_ECNT_MAX(root);
 		} else
 			count[i] = 0;
 	}
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 6d4d5672d1d8..c3029528063b 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -13,7 +13,6 @@ config MTD_NAND_ECC_SMC
 menuconfig MTD_NAND
 	tristate "NAND Device Support"
 	depends on MTD
-	select MTD_NAND_IDS
 	select MTD_NAND_ECC
 	help
 	  This enables support for accessing all type of NAND flash
@@ -60,17 +59,6 @@ config MTD_NAND_DENALI_DT
 	  Enable the driver for NAND flash on platforms using a Denali NAND
 	  controller as a DT device.
 
-config MTD_NAND_DENALI_SCRATCH_REG_ADDR
-        hex "Denali NAND size scratch register address"
-        default "0xFF108018"
-        depends on MTD_NAND_DENALI_PCI
-        help
-          Some platforms place the NAND chip size in a scratch register
-          because (some versions of) the driver aren't able to automatically
-          determine the size of certain chips. Set the address of the
-          scratch register here to enable this feature. On Intel Moorestown
-          boards, the scratch register is at 0xFF108018.
-
 config MTD_NAND_GPIO
 	tristate "GPIO assisted NAND Flash driver"
 	depends on GPIOLIB || COMPILE_TEST
@@ -109,9 +97,6 @@ config MTD_NAND_OMAP_BCH
 config MTD_NAND_OMAP_BCH_BUILD
 	def_tristate MTD_NAND_OMAP2 && MTD_NAND_OMAP_BCH
 
-config MTD_NAND_IDS
-	tristate
-
 config MTD_NAND_RICOH
 	tristate "Ricoh xD card reader"
 	default n
@@ -321,11 +306,11 @@ config MTD_NAND_CS553X
 	  If you say "m", the module will be called cs553x_nand.
 
 config MTD_NAND_ATMEL
-	tristate "Support for NAND Flash / SmartMedia on AT91 and AVR32"
-	depends on ARCH_AT91 || AVR32
+	tristate "Support for NAND Flash / SmartMedia on AT91"
+	depends on ARCH_AT91
 	help
 	  Enables support for NAND Flash / Smart Media Card interface
-	  on Atmel AT91 and AVR32 processors.
+	  on Atmel AT91 processors.
 
 config MTD_NAND_PXA3xx
 	tristate "NAND support on PXA3xx and Armada 370/XP"
@@ -443,7 +428,7 @@ config MTD_NAND_FSL_ELBC
 
 config MTD_NAND_FSL_IFC
 	tristate "NAND support for Freescale IFC controller"
-	depends on FSL_SOC || ARCH_LAYERSCAPE
+	depends on FSL_SOC || ARCH_LAYERSCAPE || SOC_LS1021A
 	select FSL_IFC
 	select MEMORY
 	help
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 19a66e404d5b..ade5fc4c3819 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -5,7 +5,6 @@
 obj-$(CONFIG_MTD_NAND)			+= nand.o
 obj-$(CONFIG_MTD_NAND_ECC)		+= nand_ecc.o
 obj-$(CONFIG_MTD_NAND_BCH)		+= nand_bch.o
-obj-$(CONFIG_MTD_NAND_IDS)		+= nand_ids.o
 obj-$(CONFIG_MTD_SM_COMMON) 		+= sm_common.o
 
 obj-$(CONFIG_MTD_NAND_CAFE)		+= cafe_nand.o
@@ -25,7 +24,7 @@ obj-$(CONFIG_MTD_NAND_SHARPSL)		+= sharpsl.o
 obj-$(CONFIG_MTD_NAND_NANDSIM)		+= nandsim.o
 obj-$(CONFIG_MTD_NAND_CS553X)		+= cs553x_nand.o
 obj-$(CONFIG_MTD_NAND_NDFC)		+= ndfc.o
-obj-$(CONFIG_MTD_NAND_ATMEL)		+= atmel_nand.o
+obj-$(CONFIG_MTD_NAND_ATMEL)		+= atmel/
 obj-$(CONFIG_MTD_NAND_GPIO)		+= gpio.o
 omap2_nand-objs := omap2.o
 obj-$(CONFIG_MTD_NAND_OMAP2) 		+= omap2_nand.o
@@ -61,4 +60,10 @@ obj-$(CONFIG_MTD_NAND_BRCMNAND)		+= brcmnand/
 obj-$(CONFIG_MTD_NAND_QCOM)		+= qcom_nandc.o
 obj-$(CONFIG_MTD_NAND_MTK)		+= mtk_nand.o mtk_ecc.o
 
-nand-objs := nand_base.o nand_bbt.o nand_timings.o
+nand-objs := nand_base.o nand_bbt.o nand_timings.o nand_ids.o
+nand-objs += nand_amd.o
+nand-objs += nand_hynix.o
+nand-objs += nand_macronix.o
+nand-objs += nand_micron.o
+nand-objs += nand_samsung.o
+nand-objs += nand_toshiba.o
diff --git a/drivers/mtd/nand/atmel/Makefile b/drivers/mtd/nand/atmel/Makefile
new file mode 100644
index 000000000000..288db4f38a8f
--- /dev/null
+++ b/drivers/mtd/nand/atmel/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_MTD_NAND_ATMEL)	+= atmel-nand-controller.o atmel-pmecc.o
+
+atmel-nand-controller-objs	:= nand-controller.o
+atmel-pmecc-objs		:= pmecc.o
diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c
new file mode 100644
index 000000000000..3b2446896147
--- /dev/null
+++ b/drivers/mtd/nand/atmel/nand-controller.c
@@ -0,0 +1,2197 @@
+/*
+ * Copyright 2017 ATMEL
+ * Copyright 2017 Free Electrons
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * Derived from the atmel_nand.c driver which contained the following
+ * copyrights:
+ *
+ *   Copyright 2003 Rick Bronson
+ *
+ *   Derived from drivers/mtd/nand/autcpu12.c
+ *	Copyright 2001 Thomas Gleixner (gleixner@autronix.de)
+ *
+ *   Derived from drivers/mtd/spia.c
+ *	Copyright 2000 Steven J. Hill (sjhill@cotw.com)
+ *
+ *
+ *   Add Hardware ECC support for AT91SAM9260 / AT91SAM9263
+ *	Richard Genoud (richard.genoud@gmail.com), Adeneo Copyright 2007
+ *
+ *   Derived from Das U-Boot source code
+ *	(u-boot-1.1.5/board/atmel/at91sam9263ek/nand.c)
+ *	Copyright 2006 ATMEL Rousset, Lacressonniere Nicolas
+ *
+ *   Add Programmable Multibit ECC support for various AT91 SoC
+ *	Copyright 2012 ATMEL, Hong Xu
+ *
+ *   Add Nand Flash Controller support for SAMA5 SoC
+ *	Copyright 2013 ATMEL, Josh Wu (josh.wu@atmel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * A few words about the naming convention in this file. This convention
+ * applies to structure and function names.
+ *
+ * Prefixes:
+ *
+ * - atmel_nand_: all generic structures/functions
+ * - atmel_smc_nand_: all structures/functions specific to the SMC interface
+ *		      (at91sam9 and avr32 SoCs)
+ * - atmel_hsmc_nand_: all structures/functions specific to the HSMC interface
+ *		       (sama5 SoCs and later)
+ * - atmel_nfc_: all structures/functions used to manipulate the NFC sub-block
+ *		 that is available in the HSMC block
+ * - <soc>_nand_: all SoC specific structures/functions
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/genalloc.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mfd/syscon/atmel-matrix.h>
+#include <linux/module.h>
+#include <linux/mtd/nand.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/iopoll.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/atmel.h>
+#include <linux/regmap.h>
+
+#include "pmecc.h"
+
+#define ATMEL_HSMC_NFC_CFG			0x0
+#define ATMEL_HSMC_NFC_CFG_SPARESIZE(x)		(((x) / 4) << 24)
+#define ATMEL_HSMC_NFC_CFG_SPARESIZE_MASK	GENMASK(30, 24)
+#define ATMEL_HSMC_NFC_CFG_DTO(cyc, mul)	(((cyc) << 16) | ((mul) << 20))
+#define ATMEL_HSMC_NFC_CFG_DTO_MAX		GENMASK(22, 16)
+#define ATMEL_HSMC_NFC_CFG_RBEDGE		BIT(13)
+#define ATMEL_HSMC_NFC_CFG_FALLING_EDGE		BIT(12)
+#define ATMEL_HSMC_NFC_CFG_RSPARE		BIT(9)
+#define ATMEL_HSMC_NFC_CFG_WSPARE		BIT(8)
+#define ATMEL_HSMC_NFC_CFG_PAGESIZE_MASK	GENMASK(2, 0)
+#define ATMEL_HSMC_NFC_CFG_PAGESIZE(x)		(fls((x) / 512) - 1)
+
+#define ATMEL_HSMC_NFC_CTRL			0x4
+#define ATMEL_HSMC_NFC_CTRL_EN			BIT(0)
+#define ATMEL_HSMC_NFC_CTRL_DIS			BIT(1)
+
+#define ATMEL_HSMC_NFC_SR			0x8
+#define ATMEL_HSMC_NFC_IER			0xc
+#define ATMEL_HSMC_NFC_IDR			0x10
+#define ATMEL_HSMC_NFC_IMR			0x14
+#define ATMEL_HSMC_NFC_SR_ENABLED		BIT(1)
+#define ATMEL_HSMC_NFC_SR_RB_RISE		BIT(4)
+#define ATMEL_HSMC_NFC_SR_RB_FALL		BIT(5)
+#define ATMEL_HSMC_NFC_SR_BUSY			BIT(8)
+#define ATMEL_HSMC_NFC_SR_WR			BIT(11)
+#define ATMEL_HSMC_NFC_SR_CSID			GENMASK(14, 12)
+#define ATMEL_HSMC_NFC_SR_XFRDONE		BIT(16)
+#define ATMEL_HSMC_NFC_SR_CMDDONE		BIT(17)
+#define ATMEL_HSMC_NFC_SR_DTOE			BIT(20)
+#define ATMEL_HSMC_NFC_SR_UNDEF			BIT(21)
+#define ATMEL_HSMC_NFC_SR_AWB			BIT(22)
+#define ATMEL_HSMC_NFC_SR_NFCASE		BIT(23)
+#define ATMEL_HSMC_NFC_SR_ERRORS		(ATMEL_HSMC_NFC_SR_DTOE | \
+						 ATMEL_HSMC_NFC_SR_UNDEF | \
+						 ATMEL_HSMC_NFC_SR_AWB | \
+						 ATMEL_HSMC_NFC_SR_NFCASE)
+#define ATMEL_HSMC_NFC_SR_RBEDGE(x)		BIT((x) + 24)
+
+#define ATMEL_HSMC_NFC_ADDR			0x18
+#define ATMEL_HSMC_NFC_BANK			0x1c
+
+#define ATMEL_NFC_MAX_RB_ID			7
+
+#define ATMEL_NFC_SRAM_SIZE			0x2400
+
+#define ATMEL_NFC_CMD(pos, cmd)			((cmd) << (((pos) * 8) + 2))
+#define ATMEL_NFC_VCMD2				BIT(18)
+#define ATMEL_NFC_ACYCLE(naddrs)		((naddrs) << 19)
+#define ATMEL_NFC_CSID(cs)			((cs) << 22)
+#define ATMEL_NFC_DATAEN			BIT(25)
+#define ATMEL_NFC_NFCWR				BIT(26)
+
+#define ATMEL_NFC_MAX_ADDR_CYCLES		5
+
+#define ATMEL_NAND_ALE_OFFSET			BIT(21)
+#define ATMEL_NAND_CLE_OFFSET			BIT(22)
+
+#define DEFAULT_TIMEOUT_MS			1000
+#define MIN_DMA_LEN				128
+
+enum atmel_nand_rb_type {
+	ATMEL_NAND_NO_RB,
+	ATMEL_NAND_NATIVE_RB,
+	ATMEL_NAND_GPIO_RB,
+};
+
+struct atmel_nand_rb {
+	enum atmel_nand_rb_type type;
+	union {
+		struct gpio_desc *gpio;
+		int id;
+	};
+};
+
+struct atmel_nand_cs {
+	int id;
+	struct atmel_nand_rb rb;
+	struct gpio_desc *csgpio;
+	struct {
+		void __iomem *virt;
+		dma_addr_t dma;
+	} io;
+};
+
+struct atmel_nand {
+	struct list_head node;
+	struct device *dev;
+	struct nand_chip base;
+	struct atmel_nand_cs *activecs;
+	struct atmel_pmecc_user *pmecc;
+	struct gpio_desc *cdgpio;
+	int numcs;
+	struct atmel_nand_cs cs[];
+};
+
+static inline struct atmel_nand *to_atmel_nand(struct nand_chip *chip)
+{
+	return container_of(chip, struct atmel_nand, base);
+}
+
+enum atmel_nfc_data_xfer {
+	ATMEL_NFC_NO_DATA,
+	ATMEL_NFC_READ_DATA,
+	ATMEL_NFC_WRITE_DATA,
+};
+
+struct atmel_nfc_op {
+	u8 cs;
+	u8 ncmds;
+	u8 cmds[2];
+	u8 naddrs;
+	u8 addrs[5];
+	enum atmel_nfc_data_xfer data;
+	u32 wait;
+	u32 errors;
+};
+
+struct atmel_nand_controller;
+struct atmel_nand_controller_caps;
+
+struct atmel_nand_controller_ops {
+	int (*probe)(struct platform_device *pdev,
+		     const struct atmel_nand_controller_caps *caps);
+	int (*remove)(struct atmel_nand_controller *nc);
+	void (*nand_init)(struct atmel_nand_controller *nc,
+			  struct atmel_nand *nand);
+	int (*ecc_init)(struct atmel_nand *nand);
+};
+
+struct atmel_nand_controller_caps {
+	bool has_dma;
+	bool legacy_of_bindings;
+	u32 ale_offs;
+	u32 cle_offs;
+	const struct atmel_nand_controller_ops *ops;
+};
+
+struct atmel_nand_controller {
+	struct nand_hw_control base;
+	const struct atmel_nand_controller_caps *caps;
+	struct device *dev;
+	struct regmap *smc;
+	struct dma_chan *dmac;
+	struct atmel_pmecc *pmecc;
+	struct list_head chips;
+	struct clk *mck;
+};
+
+static inline struct atmel_nand_controller *
+to_nand_controller(struct nand_hw_control *ctl)
+{
+	return container_of(ctl, struct atmel_nand_controller, base);
+}
+
+struct atmel_smc_nand_controller {
+	struct atmel_nand_controller base;
+	struct regmap *matrix;
+	unsigned int ebi_csa_offs;
+};
+
+static inline struct atmel_smc_nand_controller *
+to_smc_nand_controller(struct nand_hw_control *ctl)
+{
+	return container_of(to_nand_controller(ctl),
+			    struct atmel_smc_nand_controller, base);
+}
+
+struct atmel_hsmc_nand_controller {
+	struct atmel_nand_controller base;
+	struct {
+		struct gen_pool *pool;
+		void __iomem *virt;
+		dma_addr_t dma;
+	} sram;
+	struct regmap *io;
+	struct atmel_nfc_op op;
+	struct completion complete;
+	int irq;
+
+	/* Only used when instantiating from legacy DT bindings. */
+	struct clk *clk;
+};
+
+static inline struct atmel_hsmc_nand_controller *
+to_hsmc_nand_controller(struct nand_hw_control *ctl)
+{
+	return container_of(to_nand_controller(ctl),
+			    struct atmel_hsmc_nand_controller, base);
+}
+
+static bool atmel_nfc_op_done(struct atmel_nfc_op *op, u32 status)
+{
+	op->errors |= status & ATMEL_HSMC_NFC_SR_ERRORS;
+	op->wait ^= status & op->wait;
+
+	return !op->wait || op->errors;
+}
+
+static irqreturn_t atmel_nfc_interrupt(int irq, void *data)
+{
+	struct atmel_hsmc_nand_controller *nc = data;
+	u32 sr, rcvd;
+	bool done;
+
+	regmap_read(nc->base.smc, ATMEL_HSMC_NFC_SR, &sr);
+
+	rcvd = sr & (nc->op.wait | ATMEL_HSMC_NFC_SR_ERRORS);
+	done = atmel_nfc_op_done(&nc->op, sr);
+
+	if (rcvd)
+		regmap_write(nc->base.smc, ATMEL_HSMC_NFC_IDR, rcvd);
+
+	if (done)
+		complete(&nc->complete);
+
+	return rcvd ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int atmel_nfc_wait(struct atmel_hsmc_nand_controller *nc, bool poll,
+			  unsigned int timeout_ms)
+{
+	int ret;
+
+	if (!timeout_ms)
+		timeout_ms = DEFAULT_TIMEOUT_MS;
+
+	if (poll) {
+		u32 status;
+
+		ret = regmap_read_poll_timeout(nc->base.smc,
+					       ATMEL_HSMC_NFC_SR, status,
+					       atmel_nfc_op_done(&nc->op,
+								 status),
+					       0, timeout_ms * 1000);
+	} else {
+		init_completion(&nc->complete);
+		regmap_write(nc->base.smc, ATMEL_HSMC_NFC_IER,
+			     nc->op.wait | ATMEL_HSMC_NFC_SR_ERRORS);
+		ret = wait_for_completion_timeout(&nc->complete,
+						msecs_to_jiffies(timeout_ms));
+		if (!ret)
+			ret = -ETIMEDOUT;
+		else
+			ret = 0;
+
+		regmap_write(nc->base.smc, ATMEL_HSMC_NFC_IDR, 0xffffffff);
+	}
+
+	if (nc->op.errors & ATMEL_HSMC_NFC_SR_DTOE) {
+		dev_err(nc->base.dev, "Waiting NAND R/B Timeout\n");
+		ret = -ETIMEDOUT;
+	}
+
+	if (nc->op.errors & ATMEL_HSMC_NFC_SR_UNDEF) {
+		dev_err(nc->base.dev, "Access to an undefined area\n");
+		ret = -EIO;
+	}
+
+	if (nc->op.errors & ATMEL_HSMC_NFC_SR_AWB) {
+		dev_err(nc->base.dev, "Access while busy\n");
+		ret = -EIO;
+	}
+
+	if (nc->op.errors & ATMEL_HSMC_NFC_SR_NFCASE) {
+		dev_err(nc->base.dev, "Wrong access size\n");
+		ret = -EIO;
+	}
+
+	return ret;
+}
+
+static void atmel_nand_dma_transfer_finished(void *data)
+{
+	struct completion *finished = data;
+
+	complete(finished);
+}
+
+static int atmel_nand_dma_transfer(struct atmel_nand_controller *nc,
+				   void *buf, dma_addr_t dev_dma, size_t len,
+				   enum dma_data_direction dir)
+{
+	DECLARE_COMPLETION_ONSTACK(finished);
+	dma_addr_t src_dma, dst_dma, buf_dma;
+	struct dma_async_tx_descriptor *tx;
+	dma_cookie_t cookie;
+
+	buf_dma = dma_map_single(nc->dev, buf, len, dir);
+	if (dma_mapping_error(nc->dev, dev_dma)) {
+		dev_err(nc->dev,
+			"Failed to prepare a buffer for DMA access\n");
+		goto err;
+	}
+
+	if (dir == DMA_FROM_DEVICE) {
+		src_dma = dev_dma;
+		dst_dma = buf_dma;
+	} else {
+		src_dma = buf_dma;
+		dst_dma = dev_dma;
+	}
+
+	tx = dmaengine_prep_dma_memcpy(nc->dmac, dst_dma, src_dma, len,
+				       DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+	if (!tx) {
+		dev_err(nc->dev, "Failed to prepare DMA memcpy\n");
+		goto err_unmap;
+	}
+
+	tx->callback = atmel_nand_dma_transfer_finished;
+	tx->callback_param = &finished;
+
+	cookie = dmaengine_submit(tx);
+	if (dma_submit_error(cookie)) {
+		dev_err(nc->dev, "Failed to do DMA tx_submit\n");
+		goto err_unmap;
+	}
+
+	dma_async_issue_pending(nc->dmac);
+	wait_for_completion(&finished);
+
+	return 0;
+
+err_unmap:
+	dma_unmap_single(nc->dev, buf_dma, len, dir);
+
+err:
+	dev_dbg(nc->dev, "Fall back to CPU I/O\n");
+
+	return -EIO;
+}
+
+static u8 atmel_nand_read_byte(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+
+	return ioread8(nand->activecs->io.virt);
+}
+
+static u16 atmel_nand_read_word(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+
+	return ioread16(nand->activecs->io.virt);
+}
+
+static void atmel_nand_write_byte(struct mtd_info *mtd, u8 byte)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+
+	if (chip->options & NAND_BUSWIDTH_16)
+		iowrite16(byte | (byte << 8), nand->activecs->io.virt);
+	else
+		iowrite8(byte, nand->activecs->io.virt);
+}
+
+static void atmel_nand_read_buf(struct mtd_info *mtd, u8 *buf, int len)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_nand_controller *nc;
+
+	nc = to_nand_controller(chip->controller);
+
+	/*
+	 * If the controller supports DMA, the buffer address is DMA-able and
+	 * len is long enough to make DMA transfers profitable, let's trigger
+	 * a DMA transfer. If it fails, fallback to PIO mode.
+	 */
+	if (nc->dmac && virt_addr_valid(buf) &&
+	    len >= MIN_DMA_LEN &&
+	    !atmel_nand_dma_transfer(nc, buf, nand->activecs->io.dma, len,
+				     DMA_FROM_DEVICE))
+		return;
+
+	if (chip->options & NAND_BUSWIDTH_16)
+		ioread16_rep(nand->activecs->io.virt, buf, len / 2);
+	else
+		ioread8_rep(nand->activecs->io.virt, buf, len);
+}
+
+static void atmel_nand_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_nand_controller *nc;
+
+	nc = to_nand_controller(chip->controller);
+
+	/*
+	 * If the controller supports DMA, the buffer address is DMA-able and
+	 * len is long enough to make DMA transfers profitable, let's trigger
+	 * a DMA transfer. If it fails, fallback to PIO mode.
+	 */
+	if (nc->dmac && virt_addr_valid(buf) &&
+	    len >= MIN_DMA_LEN &&
+	    !atmel_nand_dma_transfer(nc, (void *)buf, nand->activecs->io.dma,
+				     len, DMA_TO_DEVICE))
+		return;
+
+	if (chip->options & NAND_BUSWIDTH_16)
+		iowrite16_rep(nand->activecs->io.virt, buf, len / 2);
+	else
+		iowrite8_rep(nand->activecs->io.virt, buf, len);
+}
+
+static int atmel_nand_dev_ready(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+
+	return gpiod_get_value(nand->activecs->rb.gpio);
+}
+
+static void atmel_nand_select_chip(struct mtd_info *mtd, int cs)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+
+	if (cs < 0 || cs >= nand->numcs) {
+		nand->activecs = NULL;
+		chip->dev_ready = NULL;
+		return;
+	}
+
+	nand->activecs = &nand->cs[cs];
+
+	if (nand->activecs->rb.type == ATMEL_NAND_GPIO_RB)
+		chip->dev_ready = atmel_nand_dev_ready;
+}
+
+static int atmel_hsmc_nand_dev_ready(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_hsmc_nand_controller *nc;
+	u32 status;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	regmap_read(nc->base.smc, ATMEL_HSMC_NFC_SR, &status);
+
+	return status & ATMEL_HSMC_NFC_SR_RBEDGE(nand->activecs->rb.id);
+}
+
+static void atmel_hsmc_nand_select_chip(struct mtd_info *mtd, int cs)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_hsmc_nand_controller *nc;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	atmel_nand_select_chip(mtd, cs);
+
+	if (!nand->activecs) {
+		regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CTRL,
+			     ATMEL_HSMC_NFC_CTRL_DIS);
+		return;
+	}
+
+	if (nand->activecs->rb.type == ATMEL_NAND_NATIVE_RB)
+		chip->dev_ready = atmel_hsmc_nand_dev_ready;
+
+	regmap_update_bits(nc->base.smc, ATMEL_HSMC_NFC_CFG,
+			   ATMEL_HSMC_NFC_CFG_PAGESIZE_MASK |
+			   ATMEL_HSMC_NFC_CFG_SPARESIZE_MASK |
+			   ATMEL_HSMC_NFC_CFG_RSPARE |
+			   ATMEL_HSMC_NFC_CFG_WSPARE,
+			   ATMEL_HSMC_NFC_CFG_PAGESIZE(mtd->writesize) |
+			   ATMEL_HSMC_NFC_CFG_SPARESIZE(mtd->oobsize) |
+			   ATMEL_HSMC_NFC_CFG_RSPARE);
+	regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CTRL,
+		     ATMEL_HSMC_NFC_CTRL_EN);
+}
+
+static int atmel_nfc_exec_op(struct atmel_hsmc_nand_controller *nc, bool poll)
+{
+	u8 *addrs = nc->op.addrs;
+	unsigned int op = 0;
+	u32 addr, val;
+	int i, ret;
+
+	nc->op.wait = ATMEL_HSMC_NFC_SR_CMDDONE;
+
+	for (i = 0; i < nc->op.ncmds; i++)
+		op |= ATMEL_NFC_CMD(i, nc->op.cmds[i]);
+
+	if (nc->op.naddrs == ATMEL_NFC_MAX_ADDR_CYCLES)
+		regmap_write(nc->base.smc, ATMEL_HSMC_NFC_ADDR, *addrs++);
+
+	op |= ATMEL_NFC_CSID(nc->op.cs) |
+	      ATMEL_NFC_ACYCLE(nc->op.naddrs);
+
+	if (nc->op.ncmds > 1)
+		op |= ATMEL_NFC_VCMD2;
+
+	addr = addrs[0] | (addrs[1] << 8) | (addrs[2] << 16) |
+	       (addrs[3] << 24);
+
+	if (nc->op.data != ATMEL_NFC_NO_DATA) {
+		op |= ATMEL_NFC_DATAEN;
+		nc->op.wait |= ATMEL_HSMC_NFC_SR_XFRDONE;
+
+		if (nc->op.data == ATMEL_NFC_WRITE_DATA)
+			op |= ATMEL_NFC_NFCWR;
+	}
+
+	/* Clear all flags. */
+	regmap_read(nc->base.smc, ATMEL_HSMC_NFC_SR, &val);
+
+	/* Send the command. */
+	regmap_write(nc->io, op, addr);
+
+	ret = atmel_nfc_wait(nc, poll, 0);
+	if (ret)
+		dev_err(nc->base.dev,
+			"Failed to send NAND command (err = %d)!",
+			ret);
+
+	/* Reset the op state. */
+	memset(&nc->op, 0, sizeof(nc->op));
+
+	return ret;
+}
+
+static void atmel_hsmc_nand_cmd_ctrl(struct mtd_info *mtd, int dat,
+				     unsigned int ctrl)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_hsmc_nand_controller *nc;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	if (ctrl & NAND_ALE) {
+		if (nc->op.naddrs == ATMEL_NFC_MAX_ADDR_CYCLES)
+			return;
+
+		nc->op.addrs[nc->op.naddrs++] = dat;
+	} else if (ctrl & NAND_CLE) {
+		if (nc->op.ncmds > 1)
+			return;
+
+		nc->op.cmds[nc->op.ncmds++] = dat;
+	}
+
+	if (dat == NAND_CMD_NONE) {
+		nc->op.cs = nand->activecs->id;
+		atmel_nfc_exec_op(nc, true);
+	}
+}
+
+static void atmel_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
+				unsigned int ctrl)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_nand_controller *nc;
+
+	nc = to_nand_controller(chip->controller);
+
+	if ((ctrl & NAND_CTRL_CHANGE) && nand->activecs->csgpio) {
+		if (ctrl & NAND_NCE)
+			gpiod_set_value(nand->activecs->csgpio, 0);
+		else
+			gpiod_set_value(nand->activecs->csgpio, 1);
+	}
+
+	if (ctrl & NAND_ALE)
+		writeb(cmd, nand->activecs->io.virt + nc->caps->ale_offs);
+	else if (ctrl & NAND_CLE)
+		writeb(cmd, nand->activecs->io.virt + nc->caps->cle_offs);
+}
+
+static void atmel_nfc_copy_to_sram(struct nand_chip *chip, const u8 *buf,
+				   bool oob_required)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_hsmc_nand_controller *nc;
+	int ret = -EIO;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	if (nc->base.dmac)
+		ret = atmel_nand_dma_transfer(&nc->base, (void *)buf,
+					      nc->sram.dma, mtd->writesize,
+					      DMA_TO_DEVICE);
+
+	/* Falling back to CPU copy. */
+	if (ret)
+		memcpy_toio(nc->sram.virt, buf, mtd->writesize);
+
+	if (oob_required)
+		memcpy_toio(nc->sram.virt + mtd->writesize, chip->oob_poi,
+			    mtd->oobsize);
+}
+
+static void atmel_nfc_copy_from_sram(struct nand_chip *chip, u8 *buf,
+				     bool oob_required)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_hsmc_nand_controller *nc;
+	int ret = -EIO;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	if (nc->base.dmac)
+		ret = atmel_nand_dma_transfer(&nc->base, buf, nc->sram.dma,
+					      mtd->writesize, DMA_FROM_DEVICE);
+
+	/* Falling back to CPU copy. */
+	if (ret)
+		memcpy_fromio(buf, nc->sram.virt, mtd->writesize);
+
+	if (oob_required)
+		memcpy_fromio(chip->oob_poi, nc->sram.virt + mtd->writesize,
+			      mtd->oobsize);
+}
+
+static void atmel_nfc_set_op_addr(struct nand_chip *chip, int page, int column)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_hsmc_nand_controller *nc;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	if (column >= 0) {
+		nc->op.addrs[nc->op.naddrs++] = column;
+
+		/*
+		 * 2 address cycles for the column offset on large page NANDs.
+		 */
+		if (mtd->writesize > 512)
+			nc->op.addrs[nc->op.naddrs++] = column >> 8;
+	}
+
+	if (page >= 0) {
+		nc->op.addrs[nc->op.naddrs++] = page;
+		nc->op.addrs[nc->op.naddrs++] = page >> 8;
+
+		if ((mtd->writesize > 512 && chip->chipsize > SZ_128M) ||
+		    (mtd->writesize <= 512 && chip->chipsize > SZ_32M))
+			nc->op.addrs[nc->op.naddrs++] = page >> 16;
+	}
+}
+
+static int atmel_nand_pmecc_enable(struct nand_chip *chip, int op, bool raw)
+{
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_nand_controller *nc;
+	int ret;
+
+	nc = to_nand_controller(chip->controller);
+
+	if (raw)
+		return 0;
+
+	ret = atmel_pmecc_enable(nand->pmecc, op);
+	if (ret)
+		dev_err(nc->dev,
+			"Failed to enable ECC engine (err = %d)\n", ret);
+
+	return ret;
+}
+
+static void atmel_nand_pmecc_disable(struct nand_chip *chip, bool raw)
+{
+	struct atmel_nand *nand = to_atmel_nand(chip);
+
+	if (!raw)
+		atmel_pmecc_disable(nand->pmecc);
+}
+
+static int atmel_nand_pmecc_generate_eccbytes(struct nand_chip *chip, bool raw)
+{
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand_controller *nc;
+	struct mtd_oob_region oobregion;
+	void *eccbuf;
+	int ret, i;
+
+	nc = to_nand_controller(chip->controller);
+
+	if (raw)
+		return 0;
+
+	ret = atmel_pmecc_wait_rdy(nand->pmecc);
+	if (ret) {
+		dev_err(nc->dev,
+			"Failed to transfer NAND page data (err = %d)\n",
+			ret);
+		return ret;
+	}
+
+	mtd_ooblayout_ecc(mtd, 0, &oobregion);
+	eccbuf = chip->oob_poi + oobregion.offset;
+
+	for (i = 0; i < chip->ecc.steps; i++) {
+		atmel_pmecc_get_generated_eccbytes(nand->pmecc, i,
+						   eccbuf);
+		eccbuf += chip->ecc.bytes;
+	}
+
+	return 0;
+}
+
+static int atmel_nand_pmecc_correct_data(struct nand_chip *chip, void *buf,
+					 bool raw)
+{
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand_controller *nc;
+	struct mtd_oob_region oobregion;
+	int ret, i, max_bitflips = 0;
+	void *databuf, *eccbuf;
+
+	nc = to_nand_controller(chip->controller);
+
+	if (raw)
+		return 0;
+
+	ret = atmel_pmecc_wait_rdy(nand->pmecc);
+	if (ret) {
+		dev_err(nc->dev,
+			"Failed to read NAND page data (err = %d)\n",
+			ret);
+		return ret;
+	}
+
+	mtd_ooblayout_ecc(mtd, 0, &oobregion);
+	eccbuf = chip->oob_poi + oobregion.offset;
+	databuf = buf;
+
+	for (i = 0; i < chip->ecc.steps; i++) {
+		ret = atmel_pmecc_correct_sector(nand->pmecc, i, databuf,
+						 eccbuf);
+		if (ret < 0 && !atmel_pmecc_correct_erased_chunks(nand->pmecc))
+			ret = nand_check_erased_ecc_chunk(databuf,
+							  chip->ecc.size,
+							  eccbuf,
+							  chip->ecc.bytes,
+							  NULL, 0,
+							  chip->ecc.strength);
+
+		if (ret >= 0)
+			max_bitflips = max(ret, max_bitflips);
+		else
+			mtd->ecc_stats.failed++;
+
+		databuf += chip->ecc.size;
+		eccbuf += chip->ecc.bytes;
+	}
+
+	return max_bitflips;
+}
+
+static int atmel_nand_pmecc_write_pg(struct nand_chip *chip, const u8 *buf,
+				     bool oob_required, int page, bool raw)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	int ret;
+
+	ret = atmel_nand_pmecc_enable(chip, NAND_ECC_WRITE, raw);
+	if (ret)
+		return ret;
+
+	atmel_nand_write_buf(mtd, buf, mtd->writesize);
+
+	ret = atmel_nand_pmecc_generate_eccbytes(chip, raw);
+	if (ret) {
+		atmel_pmecc_disable(nand->pmecc);
+		return ret;
+	}
+
+	atmel_nand_pmecc_disable(chip, raw);
+
+	atmel_nand_write_buf(mtd, chip->oob_poi, mtd->oobsize);
+
+	return 0;
+}
+
+static int atmel_nand_pmecc_write_page(struct mtd_info *mtd,
+				       struct nand_chip *chip, const u8 *buf,
+				       int oob_required, int page)
+{
+	return atmel_nand_pmecc_write_pg(chip, buf, oob_required, page, false);
+}
+
+static int atmel_nand_pmecc_write_page_raw(struct mtd_info *mtd,
+					   struct nand_chip *chip,
+					   const u8 *buf, int oob_required,
+					   int page)
+{
+	return atmel_nand_pmecc_write_pg(chip, buf, oob_required, page, true);
+}
+
+static int atmel_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf,
+				    bool oob_required, int page, bool raw)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	int ret;
+
+	ret = atmel_nand_pmecc_enable(chip, NAND_ECC_READ, raw);
+	if (ret)
+		return ret;
+
+	atmel_nand_read_buf(mtd, buf, mtd->writesize);
+	atmel_nand_read_buf(mtd, chip->oob_poi, mtd->oobsize);
+
+	ret = atmel_nand_pmecc_correct_data(chip, buf, raw);
+
+	atmel_nand_pmecc_disable(chip, raw);
+
+	return ret;
+}
+
+static int atmel_nand_pmecc_read_page(struct mtd_info *mtd,
+				      struct nand_chip *chip, u8 *buf,
+				      int oob_required, int page)
+{
+	return atmel_nand_pmecc_read_pg(chip, buf, oob_required, page, false);
+}
+
+static int atmel_nand_pmecc_read_page_raw(struct mtd_info *mtd,
+					  struct nand_chip *chip, u8 *buf,
+					  int oob_required, int page)
+{
+	return atmel_nand_pmecc_read_pg(chip, buf, oob_required, page, true);
+}
+
+static int atmel_hsmc_nand_pmecc_write_pg(struct nand_chip *chip,
+					  const u8 *buf, bool oob_required,
+					  int page, bool raw)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_hsmc_nand_controller *nc;
+	int ret;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	atmel_nfc_copy_to_sram(chip, buf, false);
+
+	nc->op.cmds[0] = NAND_CMD_SEQIN;
+	nc->op.ncmds = 1;
+	atmel_nfc_set_op_addr(chip, page, 0x0);
+	nc->op.cs = nand->activecs->id;
+	nc->op.data = ATMEL_NFC_WRITE_DATA;
+
+	ret = atmel_nand_pmecc_enable(chip, NAND_ECC_WRITE, raw);
+	if (ret)
+		return ret;
+
+	ret = atmel_nfc_exec_op(nc, false);
+	if (ret) {
+		atmel_nand_pmecc_disable(chip, raw);
+		dev_err(nc->base.dev,
+			"Failed to transfer NAND page data (err = %d)\n",
+			ret);
+		return ret;
+	}
+
+	ret = atmel_nand_pmecc_generate_eccbytes(chip, raw);
+
+	atmel_nand_pmecc_disable(chip, raw);
+
+	if (ret)
+		return ret;
+
+	atmel_nand_write_buf(mtd, chip->oob_poi, mtd->oobsize);
+
+	nc->op.cmds[0] = NAND_CMD_PAGEPROG;
+	nc->op.ncmds = 1;
+	nc->op.cs = nand->activecs->id;
+	ret = atmel_nfc_exec_op(nc, false);
+	if (ret)
+		dev_err(nc->base.dev, "Failed to program NAND page (err = %d)\n",
+			ret);
+
+	return ret;
+}
+
+static int atmel_hsmc_nand_pmecc_write_page(struct mtd_info *mtd,
+					    struct nand_chip *chip,
+					    const u8 *buf, int oob_required,
+					    int page)
+{
+	return atmel_hsmc_nand_pmecc_write_pg(chip, buf, oob_required, page,
+					      false);
+}
+
+static int atmel_hsmc_nand_pmecc_write_page_raw(struct mtd_info *mtd,
+						struct nand_chip *chip,
+						const u8 *buf,
+						int oob_required, int page)
+{
+	return atmel_hsmc_nand_pmecc_write_pg(chip, buf, oob_required, page,
+					      true);
+}
+
+static int atmel_hsmc_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf,
+					 bool oob_required, int page,
+					 bool raw)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_hsmc_nand_controller *nc;
+	int ret;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	/*
+	 * Optimized read page accessors only work when the NAND R/B pin is
+	 * connected to a native SoC R/B pin. If that's not the case, fallback
+	 * to the non-optimized one.
+	 */
+	if (nand->activecs->rb.type != ATMEL_NAND_NATIVE_RB) {
+		chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
+
+		return atmel_nand_pmecc_read_pg(chip, buf, oob_required, page,
+						raw);
+	}
+
+	nc->op.cmds[nc->op.ncmds++] = NAND_CMD_READ0;
+
+	if (mtd->writesize > 512)
+		nc->op.cmds[nc->op.ncmds++] = NAND_CMD_READSTART;
+
+	atmel_nfc_set_op_addr(chip, page, 0x0);
+	nc->op.cs = nand->activecs->id;
+	nc->op.data = ATMEL_NFC_READ_DATA;
+
+	ret = atmel_nand_pmecc_enable(chip, NAND_ECC_READ, raw);
+	if (ret)
+		return ret;
+
+	ret = atmel_nfc_exec_op(nc, false);
+	if (ret) {
+		atmel_nand_pmecc_disable(chip, raw);
+		dev_err(nc->base.dev,
+			"Failed to load NAND page data (err = %d)\n",
+			ret);
+		return ret;
+	}
+
+	atmel_nfc_copy_from_sram(chip, buf, true);
+
+	ret = atmel_nand_pmecc_correct_data(chip, buf, raw);
+
+	atmel_nand_pmecc_disable(chip, raw);
+
+	return ret;
+}
+
+static int atmel_hsmc_nand_pmecc_read_page(struct mtd_info *mtd,
+					   struct nand_chip *chip, u8 *buf,
+					   int oob_required, int page)
+{
+	return atmel_hsmc_nand_pmecc_read_pg(chip, buf, oob_required, page,
+					     false);
+}
+
+static int atmel_hsmc_nand_pmecc_read_page_raw(struct mtd_info *mtd,
+					       struct nand_chip *chip,
+					       u8 *buf, int oob_required,
+					       int page)
+{
+	return atmel_hsmc_nand_pmecc_read_pg(chip, buf, oob_required, page,
+					     true);
+}
+
+static int atmel_nand_pmecc_init(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_nand_controller *nc;
+	struct atmel_pmecc_user_req req;
+
+	nc = to_nand_controller(chip->controller);
+
+	if (!nc->pmecc) {
+		dev_err(nc->dev, "HW ECC not supported\n");
+		return -ENOTSUPP;
+	}
+
+	if (nc->caps->legacy_of_bindings) {
+		u32 val;
+
+		if (!of_property_read_u32(nc->dev->of_node, "atmel,pmecc-cap",
+					  &val))
+			chip->ecc.strength = val;
+
+		if (!of_property_read_u32(nc->dev->of_node,
+					  "atmel,pmecc-sector-size",
+					  &val))
+			chip->ecc.size = val;
+	}
+
+	if (chip->ecc.options & NAND_ECC_MAXIMIZE)
+		req.ecc.strength = ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH;
+	else if (chip->ecc.strength)
+		req.ecc.strength = chip->ecc.strength;
+	else if (chip->ecc_strength_ds)
+		req.ecc.strength = chip->ecc_strength_ds;
+	else
+		req.ecc.strength = ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH;
+
+	if (chip->ecc.size)
+		req.ecc.sectorsize = chip->ecc.size;
+	else if (chip->ecc_step_ds)
+		req.ecc.sectorsize = chip->ecc_step_ds;
+	else
+		req.ecc.sectorsize = ATMEL_PMECC_SECTOR_SIZE_AUTO;
+
+	req.pagesize = mtd->writesize;
+	req.oobsize = mtd->oobsize;
+
+	if (mtd->writesize <= 512) {
+		req.ecc.bytes = 4;
+		req.ecc.ooboffset = 0;
+	} else {
+		req.ecc.bytes = mtd->oobsize - 2;
+		req.ecc.ooboffset = ATMEL_PMECC_OOBOFFSET_AUTO;
+	}
+
+	nand->pmecc = atmel_pmecc_create_user(nc->pmecc, &req);
+	if (IS_ERR(nand->pmecc))
+		return PTR_ERR(nand->pmecc);
+
+	chip->ecc.algo = NAND_ECC_BCH;
+	chip->ecc.size = req.ecc.sectorsize;
+	chip->ecc.bytes = req.ecc.bytes / req.ecc.nsectors;
+	chip->ecc.strength = req.ecc.strength;
+
+	chip->options |= NAND_NO_SUBPAGE_WRITE;
+
+	mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
+
+	return 0;
+}
+
+static int atmel_nand_ecc_init(struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+	struct atmel_nand_controller *nc;
+	int ret;
+
+	nc = to_nand_controller(chip->controller);
+
+	switch (chip->ecc.mode) {
+	case NAND_ECC_NONE:
+	case NAND_ECC_SOFT:
+		/*
+		 * Nothing to do, the core will initialize everything for us.
+		 */
+		break;
+
+	case NAND_ECC_HW:
+		ret = atmel_nand_pmecc_init(chip);
+		if (ret)
+			return ret;
+
+		chip->ecc.read_page = atmel_nand_pmecc_read_page;
+		chip->ecc.write_page = atmel_nand_pmecc_write_page;
+		chip->ecc.read_page_raw = atmel_nand_pmecc_read_page_raw;
+		chip->ecc.write_page_raw = atmel_nand_pmecc_write_page_raw;
+		break;
+
+	default:
+		/* Other modes are not supported. */
+		dev_err(nc->dev, "Unsupported ECC mode: %d\n",
+			chip->ecc.mode);
+		return -ENOTSUPP;
+	}
+
+	return 0;
+}
+
+static int atmel_hsmc_nand_ecc_init(struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+	int ret;
+
+	ret = atmel_nand_ecc_init(nand);
+	if (ret)
+		return ret;
+
+	if (chip->ecc.mode != NAND_ECC_HW)
+		return 0;
+
+	/* Adjust the ECC operations for the HSMC IP. */
+	chip->ecc.read_page = atmel_hsmc_nand_pmecc_read_page;
+	chip->ecc.write_page = atmel_hsmc_nand_pmecc_write_page;
+	chip->ecc.read_page_raw = atmel_hsmc_nand_pmecc_read_page_raw;
+	chip->ecc.write_page_raw = atmel_hsmc_nand_pmecc_write_page_raw;
+	chip->ecc.options |= NAND_ECC_CUSTOM_PAGE_ACCESS;
+
+	return 0;
+}
+
+static void atmel_nand_init(struct atmel_nand_controller *nc,
+			    struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	mtd->dev.parent = nc->dev;
+	nand->base.controller = &nc->base;
+
+	chip->cmd_ctrl = atmel_nand_cmd_ctrl;
+	chip->read_byte = atmel_nand_read_byte;
+	chip->read_word = atmel_nand_read_word;
+	chip->write_byte = atmel_nand_write_byte;
+	chip->read_buf = atmel_nand_read_buf;
+	chip->write_buf = atmel_nand_write_buf;
+	chip->select_chip = atmel_nand_select_chip;
+
+	/* Some NANDs require a longer delay than the default one (20us). */
+	chip->chip_delay = 40;
+
+	/*
+	 * Use a bounce buffer when the buffer passed by the MTD user is not
+	 * suitable for DMA.
+	 */
+	if (nc->dmac)
+		chip->options |= NAND_USE_BOUNCE_BUFFER;
+
+	/* Default to HW ECC if pmecc is available. */
+	if (nc->pmecc)
+		chip->ecc.mode = NAND_ECC_HW;
+}
+
+static void atmel_smc_nand_init(struct atmel_nand_controller *nc,
+				struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+	struct atmel_smc_nand_controller *smc_nc;
+	int i;
+
+	atmel_nand_init(nc, nand);
+
+	smc_nc = to_smc_nand_controller(chip->controller);
+	if (!smc_nc->matrix)
+		return;
+
+	/* Attach the CS to the NAND Flash logic. */
+	for (i = 0; i < nand->numcs; i++)
+		regmap_update_bits(smc_nc->matrix, smc_nc->ebi_csa_offs,
+				   BIT(nand->cs[i].id), BIT(nand->cs[i].id));
+}
+
+static void atmel_hsmc_nand_init(struct atmel_nand_controller *nc,
+				 struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+
+	atmel_nand_init(nc, nand);
+
+	/* Overload some methods for the HSMC controller. */
+	chip->cmd_ctrl = atmel_hsmc_nand_cmd_ctrl;
+	chip->select_chip = atmel_hsmc_nand_select_chip;
+}
+
+static int atmel_nand_detect(struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand_controller *nc;
+	int ret;
+
+	nc = to_nand_controller(chip->controller);
+
+	ret = nand_scan_ident(mtd, nand->numcs, NULL);
+	if (ret)
+		dev_err(nc->dev, "nand_scan_ident() failed: %d\n", ret);
+
+	return ret;
+}
+
+static int atmel_nand_unregister(struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	int ret;
+
+	ret = mtd_device_unregister(mtd);
+	if (ret)
+		return ret;
+
+	nand_cleanup(chip);
+	list_del(&nand->node);
+
+	return 0;
+}
+
+static int atmel_nand_register(struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand_controller *nc;
+	int ret;
+
+	nc = to_nand_controller(chip->controller);
+
+	if (nc->caps->legacy_of_bindings || !nc->dev->of_node) {
+		/*
+		 * We keep the MTD name unchanged to avoid breaking platforms
+		 * where the MTD cmdline parser is used and the bootloader
+		 * has not been updated to use the new naming scheme.
+		 */
+		mtd->name = "atmel_nand";
+	} else if (!mtd->name) {
+		/*
+		 * If the new bindings are used and the bootloader has not been
+		 * updated to pass a new mtdparts parameter on the cmdline, you
+		 * should define the following property in your nand node:
+		 *
+		 *	label = "atmel_nand";
+		 *
+		 * This way, mtd->name will be set by the core when
+		 * nand_set_flash_node() is called.
+		 */
+		mtd->name = devm_kasprintf(nc->dev, GFP_KERNEL,
+					   "%s:nand.%d", dev_name(nc->dev),
+					   nand->cs[0].id);
+		if (!mtd->name) {
+			dev_err(nc->dev, "Failed to allocate mtd->name\n");
+			return -ENOMEM;
+		}
+	}
+
+	ret = nand_scan_tail(mtd);
+	if (ret) {
+		dev_err(nc->dev, "nand_scan_tail() failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = mtd_device_register(mtd, NULL, 0);
+	if (ret) {
+		dev_err(nc->dev, "Failed to register mtd device: %d\n", ret);
+		nand_cleanup(chip);
+		return ret;
+	}
+
+	list_add_tail(&nand->node, &nc->chips);
+
+	return 0;
+}
+
+static struct atmel_nand *atmel_nand_create(struct atmel_nand_controller *nc,
+					    struct device_node *np,
+					    int reg_cells)
+{
+	struct atmel_nand *nand;
+	struct gpio_desc *gpio;
+	int numcs, ret, i;
+
+	numcs = of_property_count_elems_of_size(np, "reg",
+						reg_cells * sizeof(u32));
+	if (numcs < 1) {
+		dev_err(nc->dev, "Missing or invalid reg property\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	nand = devm_kzalloc(nc->dev,
+			    sizeof(*nand) + (numcs * sizeof(*nand->cs)),
+			    GFP_KERNEL);
+	if (!nand) {
+		dev_err(nc->dev, "Failed to allocate NAND object\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	nand->numcs = numcs;
+
+	gpio = devm_fwnode_get_index_gpiod_from_child(nc->dev, "det", 0,
+						      &np->fwnode, GPIOD_IN,
+						      "nand-det");
+	if (IS_ERR(gpio) && PTR_ERR(gpio) != -ENOENT) {
+		dev_err(nc->dev,
+			"Failed to get detect gpio (err = %ld)\n",
+			PTR_ERR(gpio));
+		return ERR_CAST(gpio);
+	}
+
+	if (!IS_ERR(gpio))
+		nand->cdgpio = gpio;
+
+	for (i = 0; i < numcs; i++) {
+		struct resource res;
+		u32 val;
+
+		ret = of_address_to_resource(np, 0, &res);
+		if (ret) {
+			dev_err(nc->dev, "Invalid reg property (err = %d)\n",
+				ret);
+			return ERR_PTR(ret);
+		}
+
+		ret = of_property_read_u32_index(np, "reg", i * reg_cells,
+						 &val);
+		if (ret) {
+			dev_err(nc->dev, "Invalid reg property (err = %d)\n",
+				ret);
+			return ERR_PTR(ret);
+		}
+
+		nand->cs[i].id = val;
+
+		nand->cs[i].io.dma = res.start;
+		nand->cs[i].io.virt = devm_ioremap_resource(nc->dev, &res);
+		if (IS_ERR(nand->cs[i].io.virt))
+			return ERR_CAST(nand->cs[i].io.virt);
+
+		if (!of_property_read_u32(np, "atmel,rb", &val)) {
+			if (val > ATMEL_NFC_MAX_RB_ID)
+				return ERR_PTR(-EINVAL);
+
+			nand->cs[i].rb.type = ATMEL_NAND_NATIVE_RB;
+			nand->cs[i].rb.id = val;
+		} else {
+			gpio = devm_fwnode_get_index_gpiod_from_child(nc->dev,
+							"rb", i, &np->fwnode,
+							GPIOD_IN, "nand-rb");
+			if (IS_ERR(gpio) && PTR_ERR(gpio) != -ENOENT) {
+				dev_err(nc->dev,
+					"Failed to get R/B gpio (err = %ld)\n",
+					PTR_ERR(gpio));
+				return ERR_CAST(gpio);
+			}
+
+			if (!IS_ERR(gpio)) {
+				nand->cs[i].rb.type = ATMEL_NAND_GPIO_RB;
+				nand->cs[i].rb.gpio = gpio;
+			}
+		}
+
+		gpio = devm_fwnode_get_index_gpiod_from_child(nc->dev, "cs",
+							      i, &np->fwnode,
+							      GPIOD_OUT_HIGH,
+							      "nand-cs");
+		if (IS_ERR(gpio) && PTR_ERR(gpio) != -ENOENT) {
+			dev_err(nc->dev,
+				"Failed to get CS gpio (err = %ld)\n",
+				PTR_ERR(gpio));
+			return ERR_CAST(gpio);
+		}
+
+		if (!IS_ERR(gpio))
+			nand->cs[i].csgpio = gpio;
+	}
+
+	nand_set_flash_node(&nand->base, np);
+
+	return nand;
+}
+
+static int
+atmel_nand_controller_add_nand(struct atmel_nand_controller *nc,
+			       struct atmel_nand *nand)
+{
+	int ret;
+
+	/* No card inserted, skip this NAND. */
+	if (nand->cdgpio && gpiod_get_value(nand->cdgpio)) {
+		dev_info(nc->dev, "No SmartMedia card inserted.\n");
+		return 0;
+	}
+
+	nc->caps->ops->nand_init(nc, nand);
+
+	ret = atmel_nand_detect(nand);
+	if (ret)
+		return ret;
+
+	ret = nc->caps->ops->ecc_init(nand);
+	if (ret)
+		return ret;
+
+	return atmel_nand_register(nand);
+}
+
+static int
+atmel_nand_controller_remove_nands(struct atmel_nand_controller *nc)
+{
+	struct atmel_nand *nand, *tmp;
+	int ret;
+
+	list_for_each_entry_safe(nand, tmp, &nc->chips, node) {
+		ret = atmel_nand_unregister(nand);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int
+atmel_nand_controller_legacy_add_nands(struct atmel_nand_controller *nc)
+{
+	struct device *dev = nc->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct atmel_nand *nand;
+	struct gpio_desc *gpio;
+	struct resource *res;
+
+	/*
+	 * Legacy bindings only allow connecting a single NAND with a unique CS
+	 * line to the controller.
+	 */
+	nand = devm_kzalloc(nc->dev, sizeof(*nand) + sizeof(*nand->cs),
+			    GFP_KERNEL);
+	if (!nand)
+		return -ENOMEM;
+
+	nand->numcs = 1;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	nand->cs[0].io.virt = devm_ioremap_resource(dev, res);
+	if (IS_ERR(nand->cs[0].io.virt))
+		return PTR_ERR(nand->cs[0].io.virt);
+
+	nand->cs[0].io.dma = res->start;
+
+	/*
+	 * The old driver was hardcoding the CS id to 3 for all sama5
+	 * controllers. Since this id is only meaningful for the sama5
+	 * controller we can safely assign this id to 3 no matter the
+	 * controller.
+	 * If one wants to connect a NAND to a different CS line, he will
+	 * have to use the new bindings.
+	 */
+	nand->cs[0].id = 3;
+
+	/* R/B GPIO. */
+	gpio = devm_gpiod_get_index_optional(dev, NULL, 0,  GPIOD_IN);
+	if (IS_ERR(gpio)) {
+		dev_err(dev, "Failed to get R/B gpio (err = %ld)\n",
+			PTR_ERR(gpio));
+		return PTR_ERR(gpio);
+	}
+
+	if (gpio) {
+		nand->cs[0].rb.type = ATMEL_NAND_GPIO_RB;
+		nand->cs[0].rb.gpio = gpio;
+	}
+
+	/* CS GPIO. */
+	gpio = devm_gpiod_get_index_optional(dev, NULL, 1, GPIOD_OUT_HIGH);
+	if (IS_ERR(gpio)) {
+		dev_err(dev, "Failed to get CS gpio (err = %ld)\n",
+			PTR_ERR(gpio));
+		return PTR_ERR(gpio);
+	}
+
+	nand->cs[0].csgpio = gpio;
+
+	/* Card detect GPIO. */
+	gpio = devm_gpiod_get_index_optional(nc->dev, NULL, 2, GPIOD_IN);
+	if (IS_ERR(gpio)) {
+		dev_err(dev,
+			"Failed to get detect gpio (err = %ld)\n",
+			PTR_ERR(gpio));
+		return PTR_ERR(gpio);
+	}
+
+	nand->cdgpio = gpio;
+
+	nand_set_flash_node(&nand->base, nc->dev->of_node);
+
+	return atmel_nand_controller_add_nand(nc, nand);
+}
+
+static int atmel_nand_controller_add_nands(struct atmel_nand_controller *nc)
+{
+	struct device_node *np, *nand_np;
+	struct device *dev = nc->dev;
+	int ret, reg_cells;
+	u32 val;
+
+	/* We do not retrieve the SMC syscon when parsing old DTs. */
+	if (nc->caps->legacy_of_bindings)
+		return atmel_nand_controller_legacy_add_nands(nc);
+
+	np = dev->of_node;
+
+	ret = of_property_read_u32(np, "#address-cells", &val);
+	if (ret) {
+		dev_err(dev, "missing #address-cells property\n");
+		return ret;
+	}
+
+	reg_cells = val;
+
+	ret = of_property_read_u32(np, "#size-cells", &val);
+	if (ret) {
+		dev_err(dev, "missing #address-cells property\n");
+		return ret;
+	}
+
+	reg_cells += val;
+
+	for_each_child_of_node(np, nand_np) {
+		struct atmel_nand *nand;
+
+		nand = atmel_nand_create(nc, nand_np, reg_cells);
+		if (IS_ERR(nand)) {
+			ret = PTR_ERR(nand);
+			goto err;
+		}
+
+		ret = atmel_nand_controller_add_nand(nc, nand);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	atmel_nand_controller_remove_nands(nc);
+
+	return ret;
+}
+
+static void atmel_nand_controller_cleanup(struct atmel_nand_controller *nc)
+{
+	if (nc->dmac)
+		dma_release_channel(nc->dmac);
+
+	clk_put(nc->mck);
+}
+
+static const struct of_device_id atmel_matrix_of_ids[] = {
+	{
+		.compatible = "atmel,at91sam9260-matrix",
+		.data = (void *)AT91SAM9260_MATRIX_EBICSA,
+	},
+	{
+		.compatible = "atmel,at91sam9261-matrix",
+		.data = (void *)AT91SAM9261_MATRIX_EBICSA,
+	},
+	{
+		.compatible = "atmel,at91sam9263-matrix",
+		.data = (void *)AT91SAM9263_MATRIX_EBI0CSA,
+	},
+	{
+		.compatible = "atmel,at91sam9rl-matrix",
+		.data = (void *)AT91SAM9RL_MATRIX_EBICSA,
+	},
+	{
+		.compatible = "atmel,at91sam9g45-matrix",
+		.data = (void *)AT91SAM9G45_MATRIX_EBICSA,
+	},
+	{
+		.compatible = "atmel,at91sam9n12-matrix",
+		.data = (void *)AT91SAM9N12_MATRIX_EBICSA,
+	},
+	{
+		.compatible = "atmel,at91sam9x5-matrix",
+		.data = (void *)AT91SAM9X5_MATRIX_EBICSA,
+	},
+	{ /* sentinel */ },
+};
+
+static int atmel_nand_controller_init(struct atmel_nand_controller *nc,
+				struct platform_device *pdev,
+				const struct atmel_nand_controller_caps *caps)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	int ret;
+
+	nand_hw_control_init(&nc->base);
+	INIT_LIST_HEAD(&nc->chips);
+	nc->dev = dev;
+	nc->caps = caps;
+
+	platform_set_drvdata(pdev, nc);
+
+	nc->pmecc = devm_atmel_pmecc_get(dev);
+	if (IS_ERR(nc->pmecc)) {
+		ret = PTR_ERR(nc->pmecc);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Could not get PMECC object (err = %d)\n",
+				ret);
+		return ret;
+	}
+
+	if (nc->caps->has_dma) {
+		dma_cap_mask_t mask;
+
+		dma_cap_zero(mask);
+		dma_cap_set(DMA_MEMCPY, mask);
+
+		nc->dmac = dma_request_channel(mask, NULL, NULL);
+		if (!nc->dmac)
+			dev_err(nc->dev, "Failed to request DMA channel\n");
+	}
+
+	/* We do not retrieve the SMC syscon when parsing old DTs. */
+	if (nc->caps->legacy_of_bindings)
+		return 0;
+
+	np = of_parse_phandle(dev->parent->of_node, "atmel,smc", 0);
+	if (!np) {
+		dev_err(dev, "Missing or invalid atmel,smc property\n");
+		return -EINVAL;
+	}
+
+	nc->smc = syscon_node_to_regmap(np);
+	of_node_put(np);
+	if (IS_ERR(nc->smc)) {
+		ret = PTR_ERR(nc->smc);
+		dev_err(dev, "Could not get SMC regmap (err = %d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int
+atmel_smc_nand_controller_init(struct atmel_smc_nand_controller *nc)
+{
+	struct device *dev = nc->base.dev;
+	const struct of_device_id *match;
+	struct device_node *np;
+	int ret;
+
+	/* We do not retrieve the matrix syscon when parsing old DTs. */
+	if (nc->base.caps->legacy_of_bindings)
+		return 0;
+
+	np = of_parse_phandle(dev->parent->of_node, "atmel,matrix", 0);
+	if (!np)
+		return 0;
+
+	match = of_match_node(atmel_matrix_of_ids, np);
+	if (!match) {
+		of_node_put(np);
+		return 0;
+	}
+
+	nc->matrix = syscon_node_to_regmap(np);
+	of_node_put(np);
+	if (IS_ERR(nc->matrix)) {
+		ret = PTR_ERR(nc->matrix);
+		dev_err(dev, "Could not get Matrix regmap (err = %d)\n", ret);
+		return ret;
+	}
+
+	nc->ebi_csa_offs = (unsigned int)match->data;
+
+	/*
+	 * The at91sam9263 has 2 EBIs, if the NAND controller is under EBI1
+	 * add 4 to ->ebi_csa_offs.
+	 */
+	if (of_device_is_compatible(dev->parent->of_node,
+				    "atmel,at91sam9263-ebi1"))
+		nc->ebi_csa_offs += 4;
+
+	return 0;
+}
+
+static int
+atmel_hsmc_nand_controller_legacy_init(struct atmel_hsmc_nand_controller *nc)
+{
+	struct regmap_config regmap_conf = {
+		.reg_bits = 32,
+		.val_bits = 32,
+		.reg_stride = 4,
+	};
+
+	struct device *dev = nc->base.dev;
+	struct device_node *nand_np, *nfc_np;
+	void __iomem *iomem;
+	struct resource res;
+	int ret;
+
+	nand_np = dev->of_node;
+	nfc_np = of_find_compatible_node(dev->of_node, NULL,
+					 "atmel,sama5d3-nfc");
+
+	nc->clk = of_clk_get(nfc_np, 0);
+	if (IS_ERR(nc->clk)) {
+		ret = PTR_ERR(nc->clk);
+		dev_err(dev, "Failed to retrieve HSMC clock (err = %d)\n",
+			ret);
+		goto out;
+	}
+
+	ret = clk_prepare_enable(nc->clk);
+	if (ret) {
+		dev_err(dev, "Failed to enable the HSMC clock (err = %d)\n",
+			ret);
+		goto out;
+	}
+
+	nc->irq = of_irq_get(nand_np, 0);
+	if (nc->irq < 0) {
+		ret = nc->irq;
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get IRQ number (err = %d)\n",
+				ret);
+		goto out;
+	}
+
+	ret = of_address_to_resource(nfc_np, 0, &res);
+	if (ret) {
+		dev_err(dev, "Invalid or missing NFC IO resource (err = %d)\n",
+			ret);
+		goto out;
+	}
+
+	iomem = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(iomem)) {
+		ret = PTR_ERR(iomem);
+		goto out;
+	}
+
+	regmap_conf.name = "nfc-io";
+	regmap_conf.max_register = resource_size(&res) - 4;
+	nc->io = devm_regmap_init_mmio(dev, iomem, &regmap_conf);
+	if (IS_ERR(nc->io)) {
+		ret = PTR_ERR(nc->io);
+		dev_err(dev, "Could not create NFC IO regmap (err = %d)\n",
+			ret);
+		goto out;
+	}
+
+	ret = of_address_to_resource(nfc_np, 1, &res);
+	if (ret) {
+		dev_err(dev, "Invalid or missing HSMC resource (err = %d)\n",
+			ret);
+		goto out;
+	}
+
+	iomem = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(iomem)) {
+		ret = PTR_ERR(iomem);
+		goto out;
+	}
+
+	regmap_conf.name = "smc";
+	regmap_conf.max_register = resource_size(&res) - 4;
+	nc->base.smc = devm_regmap_init_mmio(dev, iomem, &regmap_conf);
+	if (IS_ERR(nc->base.smc)) {
+		ret = PTR_ERR(nc->base.smc);
+		dev_err(dev, "Could not create NFC IO regmap (err = %d)\n",
+			ret);
+		goto out;
+	}
+
+	ret = of_address_to_resource(nfc_np, 2, &res);
+	if (ret) {
+		dev_err(dev, "Invalid or missing SRAM resource (err = %d)\n",
+			ret);
+		goto out;
+	}
+
+	nc->sram.virt = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(nc->sram.virt)) {
+		ret = PTR_ERR(nc->sram.virt);
+		goto out;
+	}
+
+	nc->sram.dma = res.start;
+
+out:
+	of_node_put(nfc_np);
+
+	return ret;
+}
+
+static int
+atmel_hsmc_nand_controller_init(struct atmel_hsmc_nand_controller *nc)
+{
+	struct device *dev = nc->base.dev;
+	struct device_node *np;
+	int ret;
+
+	np = of_parse_phandle(dev->parent->of_node, "atmel,smc", 0);
+	if (!np) {
+		dev_err(dev, "Missing or invalid atmel,smc property\n");
+		return -EINVAL;
+	}
+
+	nc->irq = of_irq_get(np, 0);
+	of_node_put(np);
+	if (nc->irq < 0) {
+		if (nc->irq != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get IRQ number (err = %d)\n",
+				nc->irq);
+		return nc->irq;
+	}
+
+	np = of_parse_phandle(dev->of_node, "atmel,nfc-io", 0);
+	if (!np) {
+		dev_err(dev, "Missing or invalid atmel,nfc-io property\n");
+		return -EINVAL;
+	}
+
+	nc->io = syscon_node_to_regmap(np);
+	of_node_put(np);
+	if (IS_ERR(nc->io)) {
+		ret = PTR_ERR(nc->io);
+		dev_err(dev, "Could not get NFC IO regmap (err = %d)\n", ret);
+		return ret;
+	}
+
+	nc->sram.pool = of_gen_pool_get(nc->base.dev->of_node,
+					 "atmel,nfc-sram", 0);
+	if (!nc->sram.pool) {
+		dev_err(nc->base.dev, "Missing SRAM\n");
+		return -ENOMEM;
+	}
+
+	nc->sram.virt = gen_pool_dma_alloc(nc->sram.pool,
+					    ATMEL_NFC_SRAM_SIZE,
+					    &nc->sram.dma);
+	if (!nc->sram.virt) {
+		dev_err(nc->base.dev,
+			"Could not allocate memory from the NFC SRAM pool\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int
+atmel_hsmc_nand_controller_remove(struct atmel_nand_controller *nc)
+{
+	struct atmel_hsmc_nand_controller *hsmc_nc;
+	int ret;
+
+	ret = atmel_nand_controller_remove_nands(nc);
+	if (ret)
+		return ret;
+
+	hsmc_nc = container_of(nc, struct atmel_hsmc_nand_controller, base);
+	if (hsmc_nc->sram.pool)
+		gen_pool_free(hsmc_nc->sram.pool,
+			      (unsigned long)hsmc_nc->sram.virt,
+			      ATMEL_NFC_SRAM_SIZE);
+
+	if (hsmc_nc->clk) {
+		clk_disable_unprepare(hsmc_nc->clk);
+		clk_put(hsmc_nc->clk);
+	}
+
+	atmel_nand_controller_cleanup(nc);
+
+	return 0;
+}
+
+static int atmel_hsmc_nand_controller_probe(struct platform_device *pdev,
+				const struct atmel_nand_controller_caps *caps)
+{
+	struct device *dev = &pdev->dev;
+	struct atmel_hsmc_nand_controller *nc;
+	int ret;
+
+	nc = devm_kzalloc(dev, sizeof(*nc), GFP_KERNEL);
+	if (!nc)
+		return -ENOMEM;
+
+	ret = atmel_nand_controller_init(&nc->base, pdev, caps);
+	if (ret)
+		return ret;
+
+	if (caps->legacy_of_bindings)
+		ret = atmel_hsmc_nand_controller_legacy_init(nc);
+	else
+		ret = atmel_hsmc_nand_controller_init(nc);
+
+	if (ret)
+		return ret;
+
+	/* Make sure all irqs are masked before registering our IRQ handler. */
+	regmap_write(nc->base.smc, ATMEL_HSMC_NFC_IDR, 0xffffffff);
+	ret = devm_request_irq(dev, nc->irq, atmel_nfc_interrupt,
+			       IRQF_SHARED, "nfc", nc);
+	if (ret) {
+		dev_err(dev,
+			"Could not get register NFC interrupt handler (err = %d)\n",
+			ret);
+		goto err;
+	}
+
+	/* Initial NFC configuration. */
+	regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CFG,
+		     ATMEL_HSMC_NFC_CFG_DTO_MAX);
+
+	ret = atmel_nand_controller_add_nands(&nc->base);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	atmel_hsmc_nand_controller_remove(&nc->base);
+
+	return ret;
+}
+
+static const struct atmel_nand_controller_ops atmel_hsmc_nc_ops = {
+	.probe = atmel_hsmc_nand_controller_probe,
+	.remove = atmel_hsmc_nand_controller_remove,
+	.ecc_init = atmel_hsmc_nand_ecc_init,
+	.nand_init = atmel_hsmc_nand_init,
+};
+
+static const struct atmel_nand_controller_caps atmel_sama5_nc_caps = {
+	.has_dma = true,
+	.ale_offs = BIT(21),
+	.cle_offs = BIT(22),
+	.ops = &atmel_hsmc_nc_ops,
+};
+
+/* Only used to parse old bindings. */
+static const struct atmel_nand_controller_caps atmel_sama5_nand_caps = {
+	.has_dma = true,
+	.ale_offs = BIT(21),
+	.cle_offs = BIT(22),
+	.ops = &atmel_hsmc_nc_ops,
+	.legacy_of_bindings = true,
+};
+
+static int atmel_smc_nand_controller_probe(struct platform_device *pdev,
+				const struct atmel_nand_controller_caps *caps)
+{
+	struct device *dev = &pdev->dev;
+	struct atmel_smc_nand_controller *nc;
+	int ret;
+
+	nc = devm_kzalloc(dev, sizeof(*nc), GFP_KERNEL);
+	if (!nc)
+		return -ENOMEM;
+
+	ret = atmel_nand_controller_init(&nc->base, pdev, caps);
+	if (ret)
+		return ret;
+
+	ret = atmel_smc_nand_controller_init(nc);
+	if (ret)
+		return ret;
+
+	return atmel_nand_controller_add_nands(&nc->base);
+}
+
+static int
+atmel_smc_nand_controller_remove(struct atmel_nand_controller *nc)
+{
+	int ret;
+
+	ret = atmel_nand_controller_remove_nands(nc);
+	if (ret)
+		return ret;
+
+	atmel_nand_controller_cleanup(nc);
+
+	return 0;
+}
+
+static const struct atmel_nand_controller_ops atmel_smc_nc_ops = {
+	.probe = atmel_smc_nand_controller_probe,
+	.remove = atmel_smc_nand_controller_remove,
+	.ecc_init = atmel_nand_ecc_init,
+	.nand_init = atmel_smc_nand_init,
+};
+
+static const struct atmel_nand_controller_caps atmel_rm9200_nc_caps = {
+	.ale_offs = BIT(21),
+	.cle_offs = BIT(22),
+	.ops = &atmel_smc_nc_ops,
+};
+
+static const struct atmel_nand_controller_caps atmel_sam9261_nc_caps = {
+	.ale_offs = BIT(22),
+	.cle_offs = BIT(21),
+	.ops = &atmel_smc_nc_ops,
+};
+
+static const struct atmel_nand_controller_caps atmel_sam9g45_nc_caps = {
+	.has_dma = true,
+	.ale_offs = BIT(21),
+	.cle_offs = BIT(22),
+	.ops = &atmel_smc_nc_ops,
+};
+
+/* Only used to parse old bindings. */
+static const struct atmel_nand_controller_caps atmel_rm9200_nand_caps = {
+	.ale_offs = BIT(21),
+	.cle_offs = BIT(22),
+	.ops = &atmel_smc_nc_ops,
+	.legacy_of_bindings = true,
+};
+
+static const struct atmel_nand_controller_caps atmel_sam9261_nand_caps = {
+	.ale_offs = BIT(22),
+	.cle_offs = BIT(21),
+	.ops = &atmel_smc_nc_ops,
+	.legacy_of_bindings = true,
+};
+
+static const struct atmel_nand_controller_caps atmel_sam9g45_nand_caps = {
+	.has_dma = true,
+	.ale_offs = BIT(21),
+	.cle_offs = BIT(22),
+	.ops = &atmel_smc_nc_ops,
+	.legacy_of_bindings = true,
+};
+
+static const struct of_device_id atmel_nand_controller_of_ids[] = {
+	{
+		.compatible = "atmel,at91rm9200-nand-controller",
+		.data = &atmel_rm9200_nc_caps,
+	},
+	{
+		.compatible = "atmel,at91sam9260-nand-controller",
+		.data = &atmel_rm9200_nc_caps,
+	},
+	{
+		.compatible = "atmel,at91sam9261-nand-controller",
+		.data = &atmel_sam9261_nc_caps,
+	},
+	{
+		.compatible = "atmel,at91sam9g45-nand-controller",
+		.data = &atmel_sam9g45_nc_caps,
+	},
+	{
+		.compatible = "atmel,sama5d3-nand-controller",
+		.data = &atmel_sama5_nc_caps,
+	},
+	/* Support for old/deprecated bindings: */
+	{
+		.compatible = "atmel,at91rm9200-nand",
+		.data = &atmel_rm9200_nand_caps,
+	},
+	{
+		.compatible = "atmel,sama5d4-nand",
+		.data = &atmel_rm9200_nand_caps,
+	},
+	{
+		.compatible = "atmel,sama5d2-nand",
+		.data = &atmel_rm9200_nand_caps,
+	},
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, atmel_nand_controller_of_ids);
+
+static int atmel_nand_controller_probe(struct platform_device *pdev)
+{
+	const struct atmel_nand_controller_caps *caps;
+
+	if (pdev->id_entry)
+		caps = (void *)pdev->id_entry->driver_data;
+	else
+		caps = of_device_get_match_data(&pdev->dev);
+
+	if (!caps) {
+		dev_err(&pdev->dev, "Could not retrieve NFC caps\n");
+		return -EINVAL;
+	}
+
+	if (caps->legacy_of_bindings) {
+		u32 ale_offs = 21;
+
+		/*
+		 * If we are parsing legacy DT props and the DT contains a
+		 * valid NFC node, forward the request to the sama5 logic.
+		 */
+		if (of_find_compatible_node(pdev->dev.of_node, NULL,
+					    "atmel,sama5d3-nfc"))
+			caps = &atmel_sama5_nand_caps;
+
+		/*
+		 * Even if the compatible says we are dealing with an
+		 * at91rm9200 controller, the atmel,nand-has-dma specify that
+		 * this controller supports DMA, which means we are in fact
+		 * dealing with an at91sam9g45+ controller.
+		 */
+		if (!caps->has_dma &&
+		    of_property_read_bool(pdev->dev.of_node,
+					  "atmel,nand-has-dma"))
+			caps = &atmel_sam9g45_nand_caps;
+
+		/*
+		 * All SoCs except the at91sam9261 are assigning ALE to A21 and
+		 * CLE to A22. If atmel,nand-addr-offset != 21 this means we're
+		 * actually dealing with an at91sam9261 controller.
+		 */
+		of_property_read_u32(pdev->dev.of_node,
+				     "atmel,nand-addr-offset", &ale_offs);
+		if (ale_offs != 21)
+			caps = &atmel_sam9261_nand_caps;
+	}
+
+	return caps->ops->probe(pdev, caps);
+}
+
+static int atmel_nand_controller_remove(struct platform_device *pdev)
+{
+	struct atmel_nand_controller *nc = platform_get_drvdata(pdev);
+
+	return nc->caps->ops->remove(nc);
+}
+
+static struct platform_driver atmel_nand_controller_driver = {
+	.driver = {
+		.name = "atmel-nand-controller",
+		.of_match_table = of_match_ptr(atmel_nand_controller_of_ids),
+	},
+	.probe = atmel_nand_controller_probe,
+	.remove = atmel_nand_controller_remove,
+};
+module_platform_driver(atmel_nand_controller_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Boris Brezillon <boris.brezillon@free-electrons.com>");
+MODULE_DESCRIPTION("NAND Flash Controller driver for Atmel SoCs");
+MODULE_ALIAS("platform:atmel-nand-controller");
diff --git a/drivers/mtd/nand/atmel/pmecc.c b/drivers/mtd/nand/atmel/pmecc.c
new file mode 100644
index 000000000000..55a8ee5306ea
--- /dev/null
+++ b/drivers/mtd/nand/atmel/pmecc.c
@@ -0,0 +1,1020 @@
+/*
+ * Copyright 2017 ATMEL
+ * Copyright 2017 Free Electrons
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * Derived from the atmel_nand.c driver which contained the following
+ * copyrights:
+ *
+ *   Copyright 2003 Rick Bronson
+ *
+ *   Derived from drivers/mtd/nand/autcpu12.c
+ *	Copyright 2001 Thomas Gleixner (gleixner@autronix.de)
+ *
+ *   Derived from drivers/mtd/spia.c
+ *	Copyright 2000 Steven J. Hill (sjhill@cotw.com)
+ *
+ *   Add Hardware ECC support for AT91SAM9260 / AT91SAM9263
+ *	Richard Genoud (richard.genoud@gmail.com), Adeneo Copyright 2007
+ *
+ *   Derived from Das U-Boot source code
+ *	(u-boot-1.1.5/board/atmel/at91sam9263ek/nand.c)
+ *      Copyright 2006 ATMEL Rousset, Lacressonniere Nicolas
+ *
+ *   Add Programmable Multibit ECC support for various AT91 SoC
+ *	Copyright 2012 ATMEL, Hong Xu
+ *
+ *   Add Nand Flash Controller support for SAMA5 SoC
+ *	Copyright 2013 ATMEL, Josh Wu (josh.wu@atmel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * The PMECC is an hardware assisted BCH engine, which means part of the
+ * ECC algorithm is left to the software. The hardware/software repartition
+ * is explained in the "PMECC Controller Functional Description" chapter in
+ * Atmel datasheets, and some of the functions in this file are directly
+ * implementing the algorithms described in the "Software Implementation"
+ * sub-section.
+ *
+ * TODO: it seems that the software BCH implementation in lib/bch.c is already
+ * providing some of the logic we are implementing here. It would be smart
+ * to expose the needed lib/bch.c helpers/functions and re-use them here.
+ */
+
+#include <linux/genalloc.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/mtd/nand.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "pmecc.h"
+
+/* Galois field dimension */
+#define PMECC_GF_DIMENSION_13			13
+#define PMECC_GF_DIMENSION_14			14
+
+/* Primitive Polynomial used by PMECC */
+#define PMECC_GF_13_PRIMITIVE_POLY		0x201b
+#define PMECC_GF_14_PRIMITIVE_POLY		0x4443
+
+#define PMECC_LOOKUP_TABLE_SIZE_512		0x2000
+#define PMECC_LOOKUP_TABLE_SIZE_1024		0x4000
+
+/* Time out value for reading PMECC status register */
+#define PMECC_MAX_TIMEOUT_MS			100
+
+/* PMECC Register Definitions */
+#define ATMEL_PMECC_CFG				0x0
+#define PMECC_CFG_BCH_STRENGTH(x)		(x)
+#define PMECC_CFG_BCH_STRENGTH_MASK		GENMASK(2, 0)
+#define PMECC_CFG_SECTOR512			(0 << 4)
+#define PMECC_CFG_SECTOR1024			(1 << 4)
+#define PMECC_CFG_NSECTORS(x)			((fls(x) - 1) << 8)
+#define PMECC_CFG_READ_OP			(0 << 12)
+#define PMECC_CFG_WRITE_OP			(1 << 12)
+#define PMECC_CFG_SPARE_ENABLE			BIT(16)
+#define PMECC_CFG_AUTO_ENABLE			BIT(20)
+
+#define ATMEL_PMECC_SAREA			0x4
+#define ATMEL_PMECC_SADDR			0x8
+#define ATMEL_PMECC_EADDR			0xc
+
+#define ATMEL_PMECC_CLK				0x10
+#define PMECC_CLK_133MHZ			(2 << 0)
+
+#define ATMEL_PMECC_CTRL			0x14
+#define PMECC_CTRL_RST				BIT(0)
+#define PMECC_CTRL_DATA				BIT(1)
+#define PMECC_CTRL_USER				BIT(2)
+#define PMECC_CTRL_ENABLE			BIT(4)
+#define PMECC_CTRL_DISABLE			BIT(5)
+
+#define ATMEL_PMECC_SR				0x18
+#define PMECC_SR_BUSY				BIT(0)
+#define PMECC_SR_ENABLE				BIT(4)
+
+#define ATMEL_PMECC_IER				0x1c
+#define ATMEL_PMECC_IDR				0x20
+#define ATMEL_PMECC_IMR				0x24
+#define ATMEL_PMECC_ISR				0x28
+#define PMECC_ERROR_INT				BIT(0)
+
+#define ATMEL_PMECC_ECC(sector, n)		\
+	((((sector) + 1) * 0x40) + (n))
+
+#define ATMEL_PMECC_REM(sector, n)		\
+	((((sector) + 1) * 0x40) + ((n) * 4) + 0x200)
+
+/* PMERRLOC Register Definitions */
+#define ATMEL_PMERRLOC_ELCFG			0x0
+#define PMERRLOC_ELCFG_SECTOR_512		(0 << 0)
+#define PMERRLOC_ELCFG_SECTOR_1024		(1 << 0)
+#define PMERRLOC_ELCFG_NUM_ERRORS(n)		((n) << 16)
+
+#define ATMEL_PMERRLOC_ELPRIM			0x4
+#define ATMEL_PMERRLOC_ELEN			0x8
+#define ATMEL_PMERRLOC_ELDIS			0xc
+#define PMERRLOC_DISABLE			BIT(0)
+
+#define ATMEL_PMERRLOC_ELSR			0x10
+#define PMERRLOC_ELSR_BUSY			BIT(0)
+
+#define ATMEL_PMERRLOC_ELIER			0x14
+#define ATMEL_PMERRLOC_ELIDR			0x18
+#define ATMEL_PMERRLOC_ELIMR			0x1c
+#define ATMEL_PMERRLOC_ELISR			0x20
+#define PMERRLOC_ERR_NUM_MASK			GENMASK(12, 8)
+#define PMERRLOC_CALC_DONE			BIT(0)
+
+#define ATMEL_PMERRLOC_SIGMA(x)			(((x) * 0x4) + 0x28)
+
+#define ATMEL_PMERRLOC_EL(offs, x)		(((x) * 0x4) + (offs))
+
+struct atmel_pmecc_gf_tables {
+	u16 *alpha_to;
+	u16 *index_of;
+};
+
+struct atmel_pmecc_caps {
+	const int *strengths;
+	int nstrengths;
+	int el_offset;
+	bool correct_erased_chunks;
+};
+
+struct atmel_pmecc {
+	struct device *dev;
+	const struct atmel_pmecc_caps *caps;
+
+	struct {
+		void __iomem *base;
+		void __iomem *errloc;
+	} regs;
+
+	struct mutex lock;
+};
+
+struct atmel_pmecc_user_conf_cache {
+	u32 cfg;
+	u32 sarea;
+	u32 saddr;
+	u32 eaddr;
+};
+
+struct atmel_pmecc_user {
+	struct atmel_pmecc_user_conf_cache cache;
+	struct atmel_pmecc *pmecc;
+	const struct atmel_pmecc_gf_tables *gf_tables;
+	int eccbytes;
+	s16 *partial_syn;
+	s16 *si;
+	s16 *lmu;
+	s16 *smu;
+	s32 *mu;
+	s32 *dmu;
+	s32 *delta;
+	u32 isr;
+};
+
+static DEFINE_MUTEX(pmecc_gf_tables_lock);
+static const struct atmel_pmecc_gf_tables *pmecc_gf_tables_512;
+static const struct atmel_pmecc_gf_tables *pmecc_gf_tables_1024;
+
+static inline int deg(unsigned int poly)
+{
+	/* polynomial degree is the most-significant bit index */
+	return fls(poly) - 1;
+}
+
+static int atmel_pmecc_build_gf_tables(int mm, unsigned int poly,
+				       struct atmel_pmecc_gf_tables *gf_tables)
+{
+	unsigned int i, x = 1;
+	const unsigned int k = BIT(deg(poly));
+	unsigned int nn = BIT(mm) - 1;
+
+	/* primitive polynomial must be of degree m */
+	if (k != (1u << mm))
+		return -EINVAL;
+
+	for (i = 0; i < nn; i++) {
+		gf_tables->alpha_to[i] = x;
+		gf_tables->index_of[x] = i;
+		if (i && (x == 1))
+			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
+			return -EINVAL;
+		x <<= 1;
+		if (x & k)
+			x ^= poly;
+	}
+	gf_tables->alpha_to[nn] = 1;
+	gf_tables->index_of[0] = 0;
+
+	return 0;
+}
+
+static const struct atmel_pmecc_gf_tables *
+atmel_pmecc_create_gf_tables(const struct atmel_pmecc_user_req *req)
+{
+	struct atmel_pmecc_gf_tables *gf_tables;
+	unsigned int poly, degree, table_size;
+	int ret;
+
+	if (req->ecc.sectorsize == 512) {
+		degree = PMECC_GF_DIMENSION_13;
+		poly = PMECC_GF_13_PRIMITIVE_POLY;
+		table_size = PMECC_LOOKUP_TABLE_SIZE_512;
+	} else {
+		degree = PMECC_GF_DIMENSION_14;
+		poly = PMECC_GF_14_PRIMITIVE_POLY;
+		table_size = PMECC_LOOKUP_TABLE_SIZE_1024;
+	}
+
+	gf_tables = kzalloc(sizeof(*gf_tables) +
+			    (2 * table_size * sizeof(u16)),
+			    GFP_KERNEL);
+	if (!gf_tables)
+		return ERR_PTR(-ENOMEM);
+
+	gf_tables->alpha_to = (void *)(gf_tables + 1);
+	gf_tables->index_of = gf_tables->alpha_to + table_size;
+
+	ret = atmel_pmecc_build_gf_tables(degree, poly, gf_tables);
+	if (ret) {
+		kfree(gf_tables);
+		return ERR_PTR(ret);
+	}
+
+	return gf_tables;
+}
+
+static const struct atmel_pmecc_gf_tables *
+atmel_pmecc_get_gf_tables(const struct atmel_pmecc_user_req *req)
+{
+	const struct atmel_pmecc_gf_tables **gf_tables, *ret;
+
+	mutex_lock(&pmecc_gf_tables_lock);
+	if (req->ecc.sectorsize == 512)
+		gf_tables = &pmecc_gf_tables_512;
+	else
+		gf_tables = &pmecc_gf_tables_1024;
+
+	ret = *gf_tables;
+
+	if (!ret) {
+		ret = atmel_pmecc_create_gf_tables(req);
+		if (!IS_ERR(ret))
+			*gf_tables = ret;
+	}
+	mutex_unlock(&pmecc_gf_tables_lock);
+
+	return ret;
+}
+
+static int atmel_pmecc_prepare_user_req(struct atmel_pmecc *pmecc,
+					struct atmel_pmecc_user_req *req)
+{
+	int i, max_eccbytes, eccbytes = 0, eccstrength = 0;
+
+	if (req->pagesize <= 0 || req->oobsize <= 0 || req->ecc.bytes <= 0)
+		return -EINVAL;
+
+	if (req->ecc.ooboffset >= 0 &&
+	    req->ecc.ooboffset + req->ecc.bytes > req->oobsize)
+		return -EINVAL;
+
+	if (req->ecc.sectorsize == ATMEL_PMECC_SECTOR_SIZE_AUTO) {
+		if (req->ecc.strength != ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH)
+			return -EINVAL;
+
+		if (req->pagesize > 512)
+			req->ecc.sectorsize = 1024;
+		else
+			req->ecc.sectorsize = 512;
+	}
+
+	if (req->ecc.sectorsize != 512 && req->ecc.sectorsize != 1024)
+		return -EINVAL;
+
+	if (req->pagesize % req->ecc.sectorsize)
+		return -EINVAL;
+
+	req->ecc.nsectors = req->pagesize / req->ecc.sectorsize;
+
+	max_eccbytes = req->ecc.bytes;
+
+	for (i = 0; i < pmecc->caps->nstrengths; i++) {
+		int nbytes, strength = pmecc->caps->strengths[i];
+
+		if (req->ecc.strength != ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH &&
+		    strength < req->ecc.strength)
+			continue;
+
+		nbytes = DIV_ROUND_UP(strength * fls(8 * req->ecc.sectorsize),
+				      8);
+		nbytes *= req->ecc.nsectors;
+
+		if (nbytes > max_eccbytes)
+			break;
+
+		eccstrength = strength;
+		eccbytes = nbytes;
+
+		if (req->ecc.strength != ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH)
+			break;
+	}
+
+	if (!eccstrength)
+		return -EINVAL;
+
+	req->ecc.bytes = eccbytes;
+	req->ecc.strength = eccstrength;
+
+	if (req->ecc.ooboffset < 0)
+		req->ecc.ooboffset = req->oobsize - eccbytes;
+
+	return 0;
+}
+
+struct atmel_pmecc_user *
+atmel_pmecc_create_user(struct atmel_pmecc *pmecc,
+			struct atmel_pmecc_user_req *req)
+{
+	struct atmel_pmecc_user *user;
+	const struct atmel_pmecc_gf_tables *gf_tables;
+	int strength, size, ret;
+
+	ret = atmel_pmecc_prepare_user_req(pmecc, req);
+	if (ret)
+		return ERR_PTR(ret);
+
+	size = sizeof(*user);
+	size = ALIGN(size, sizeof(u16));
+	/* Reserve space for partial_syn, si and smu */
+	size += ((2 * req->ecc.strength) + 1) * sizeof(u16) *
+		(2 + req->ecc.strength + 2);
+	/* Reserve space for lmu. */
+	size += (req->ecc.strength + 1) * sizeof(u16);
+	/* Reserve space for mu, dmu and delta. */
+	size = ALIGN(size, sizeof(s32));
+	size += (req->ecc.strength + 1) * sizeof(s32);
+
+	user = kzalloc(size, GFP_KERNEL);
+	if (!user)
+		return ERR_PTR(-ENOMEM);
+
+	user->pmecc = pmecc;
+
+	user->partial_syn = (s16 *)PTR_ALIGN(user + 1, sizeof(u16));
+	user->si = user->partial_syn + ((2 * req->ecc.strength) + 1);
+	user->lmu = user->si + ((2 * req->ecc.strength) + 1);
+	user->smu = user->lmu + (req->ecc.strength + 1);
+	user->mu = (s32 *)PTR_ALIGN(user->smu +
+				    (((2 * req->ecc.strength) + 1) *
+				     (req->ecc.strength + 2)),
+				    sizeof(s32));
+	user->dmu = user->mu + req->ecc.strength + 1;
+	user->delta = user->dmu + req->ecc.strength + 1;
+
+	gf_tables = atmel_pmecc_get_gf_tables(req);
+	if (IS_ERR(gf_tables)) {
+		kfree(user);
+		return ERR_CAST(gf_tables);
+	}
+
+	user->gf_tables = gf_tables;
+
+	user->eccbytes = req->ecc.bytes / req->ecc.nsectors;
+
+	for (strength = 0; strength < pmecc->caps->nstrengths; strength++) {
+		if (pmecc->caps->strengths[strength] == req->ecc.strength)
+			break;
+	}
+
+	user->cache.cfg = PMECC_CFG_BCH_STRENGTH(strength) |
+			  PMECC_CFG_NSECTORS(req->ecc.nsectors);
+
+	if (req->ecc.sectorsize == 1024)
+		user->cache.cfg |= PMECC_CFG_SECTOR1024;
+
+	user->cache.sarea = req->oobsize - 1;
+	user->cache.saddr = req->ecc.ooboffset;
+	user->cache.eaddr = req->ecc.ooboffset + req->ecc.bytes - 1;
+
+	return user;
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_create_user);
+
+void atmel_pmecc_destroy_user(struct atmel_pmecc_user *user)
+{
+	kfree(user);
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_destroy_user);
+
+static int get_strength(struct atmel_pmecc_user *user)
+{
+	const int *strengths = user->pmecc->caps->strengths;
+
+	return strengths[user->cache.cfg & PMECC_CFG_BCH_STRENGTH_MASK];
+}
+
+static int get_sectorsize(struct atmel_pmecc_user *user)
+{
+	return user->cache.cfg & PMECC_LOOKUP_TABLE_SIZE_1024 ? 1024 : 512;
+}
+
+static void atmel_pmecc_gen_syndrome(struct atmel_pmecc_user *user, int sector)
+{
+	int strength = get_strength(user);
+	u32 value;
+	int i;
+
+	/* Fill odd syndromes */
+	for (i = 0; i < strength; i++) {
+		value = readl_relaxed(user->pmecc->regs.base +
+				      ATMEL_PMECC_REM(sector, i / 2));
+		if (i & 1)
+			value >>= 16;
+
+		user->partial_syn[(2 * i) + 1] = value;
+	}
+}
+
+static void atmel_pmecc_substitute(struct atmel_pmecc_user *user)
+{
+	int degree = get_sectorsize(user) == 512 ? 13 : 14;
+	int cw_len = BIT(degree) - 1;
+	int strength = get_strength(user);
+	s16 *alpha_to = user->gf_tables->alpha_to;
+	s16 *index_of = user->gf_tables->index_of;
+	s16 *partial_syn = user->partial_syn;
+	s16 *si;
+	int i, j;
+
+	/*
+	 * si[] is a table that holds the current syndrome value,
+	 * an element of that table belongs to the field
+	 */
+	si = user->si;
+
+	memset(&si[1], 0, sizeof(s16) * ((2 * strength) - 1));
+
+	/* Computation 2t syndromes based on S(x) */
+	/* Odd syndromes */
+	for (i = 1; i < 2 * strength; i += 2) {
+		for (j = 0; j < degree; j++) {
+			if (partial_syn[i] & BIT(j))
+				si[i] = alpha_to[i * j] ^ si[i];
+		}
+	}
+	/* Even syndrome = (Odd syndrome) ** 2 */
+	for (i = 2, j = 1; j <= strength; i = ++j << 1) {
+		if (si[j] == 0) {
+			si[i] = 0;
+		} else {
+			s16 tmp;
+
+			tmp = index_of[si[j]];
+			tmp = (tmp * 2) % cw_len;
+			si[i] = alpha_to[tmp];
+		}
+	}
+}
+
+static void atmel_pmecc_get_sigma(struct atmel_pmecc_user *user)
+{
+	s16 *lmu = user->lmu;
+	s16 *si = user->si;
+	s32 *mu = user->mu;
+	s32 *dmu = user->dmu;
+	s32 *delta = user->delta;
+	int degree = get_sectorsize(user) == 512 ? 13 : 14;
+	int cw_len = BIT(degree) - 1;
+	int strength = get_strength(user);
+	int num = 2 * strength + 1;
+	s16 *index_of = user->gf_tables->index_of;
+	s16 *alpha_to = user->gf_tables->alpha_to;
+	int i, j, k;
+	u32 dmu_0_count, tmp;
+	s16 *smu = user->smu;
+
+	/* index of largest delta */
+	int ro;
+	int largest;
+	int diff;
+
+	dmu_0_count = 0;
+
+	/* First Row */
+
+	/* Mu */
+	mu[0] = -1;
+
+	memset(smu, 0, sizeof(s16) * num);
+	smu[0] = 1;
+
+	/* discrepancy set to 1 */
+	dmu[0] = 1;
+	/* polynom order set to 0 */
+	lmu[0] = 0;
+	delta[0] = (mu[0] * 2 - lmu[0]) >> 1;
+
+	/* Second Row */
+
+	/* Mu */
+	mu[1] = 0;
+	/* Sigma(x) set to 1 */
+	memset(&smu[num], 0, sizeof(s16) * num);
+	smu[num] = 1;
+
+	/* discrepancy set to S1 */
+	dmu[1] = si[1];
+
+	/* polynom order set to 0 */
+	lmu[1] = 0;
+
+	delta[1] = (mu[1] * 2 - lmu[1]) >> 1;
+
+	/* Init the Sigma(x) last row */
+	memset(&smu[(strength + 1) * num], 0, sizeof(s16) * num);
+
+	for (i = 1; i <= strength; i++) {
+		mu[i + 1] = i << 1;
+		/* Begin Computing Sigma (Mu+1) and L(mu) */
+		/* check if discrepancy is set to 0 */
+		if (dmu[i] == 0) {
+			dmu_0_count++;
+
+			tmp = ((strength - (lmu[i] >> 1) - 1) / 2);
+			if ((strength - (lmu[i] >> 1) - 1) & 0x1)
+				tmp += 2;
+			else
+				tmp += 1;
+
+			if (dmu_0_count == tmp) {
+				for (j = 0; j <= (lmu[i] >> 1) + 1; j++)
+					smu[(strength + 1) * num + j] =
+							smu[i * num + j];
+
+				lmu[strength + 1] = lmu[i];
+				return;
+			}
+
+			/* copy polynom */
+			for (j = 0; j <= lmu[i] >> 1; j++)
+				smu[(i + 1) * num + j] = smu[i * num + j];
+
+			/* copy previous polynom order to the next */
+			lmu[i + 1] = lmu[i];
+		} else {
+			ro = 0;
+			largest = -1;
+			/* find largest delta with dmu != 0 */
+			for (j = 0; j < i; j++) {
+				if ((dmu[j]) && (delta[j] > largest)) {
+					largest = delta[j];
+					ro = j;
+				}
+			}
+
+			/* compute difference */
+			diff = (mu[i] - mu[ro]);
+
+			/* Compute degree of the new smu polynomial */
+			if ((lmu[i] >> 1) > ((lmu[ro] >> 1) + diff))
+				lmu[i + 1] = lmu[i];
+			else
+				lmu[i + 1] = ((lmu[ro] >> 1) + diff) * 2;
+
+			/* Init smu[i+1] with 0 */
+			for (k = 0; k < num; k++)
+				smu[(i + 1) * num + k] = 0;
+
+			/* Compute smu[i+1] */
+			for (k = 0; k <= lmu[ro] >> 1; k++) {
+				s16 a, b, c;
+
+				if (!(smu[ro * num + k] && dmu[i]))
+					continue;
+
+				a = index_of[dmu[i]];
+				b = index_of[dmu[ro]];
+				c = index_of[smu[ro * num + k]];
+				tmp = a + (cw_len - b) + c;
+				a = alpha_to[tmp % cw_len];
+				smu[(i + 1) * num + (k + diff)] = a;
+			}
+
+			for (k = 0; k <= lmu[i] >> 1; k++)
+				smu[(i + 1) * num + k] ^= smu[i * num + k];
+		}
+
+		/* End Computing Sigma (Mu+1) and L(mu) */
+		/* In either case compute delta */
+		delta[i + 1] = (mu[i + 1] * 2 - lmu[i + 1]) >> 1;
+
+		/* Do not compute discrepancy for the last iteration */
+		if (i >= strength)
+			continue;
+
+		for (k = 0; k <= (lmu[i + 1] >> 1); k++) {
+			tmp = 2 * (i - 1);
+			if (k == 0) {
+				dmu[i + 1] = si[tmp + 3];
+			} else if (smu[(i + 1) * num + k] && si[tmp + 3 - k]) {
+				s16 a, b, c;
+
+				a = index_of[smu[(i + 1) * num + k]];
+				b = si[2 * (i - 1) + 3 - k];
+				c = index_of[b];
+				tmp = a + c;
+				tmp %= cw_len;
+				dmu[i + 1] = alpha_to[tmp] ^ dmu[i + 1];
+			}
+		}
+	}
+}
+
+static int atmel_pmecc_err_location(struct atmel_pmecc_user *user)
+{
+	int sector_size = get_sectorsize(user);
+	int degree = sector_size == 512 ? 13 : 14;
+	struct atmel_pmecc *pmecc = user->pmecc;
+	int strength = get_strength(user);
+	int ret, roots_nbr, i, err_nbr = 0;
+	int num = (2 * strength) + 1;
+	s16 *smu = user->smu;
+	u32 val;
+
+	writel(PMERRLOC_DISABLE, pmecc->regs.errloc + ATMEL_PMERRLOC_ELDIS);
+
+	for (i = 0; i <= user->lmu[strength + 1] >> 1; i++) {
+		writel_relaxed(smu[(strength + 1) * num + i],
+			       pmecc->regs.errloc + ATMEL_PMERRLOC_SIGMA(i));
+		err_nbr++;
+	}
+
+	val = (err_nbr - 1) << 16;
+	if (sector_size == 1024)
+		val |= 1;
+
+	writel(val, pmecc->regs.errloc + ATMEL_PMERRLOC_ELCFG);
+	writel((sector_size * 8) + (degree * strength),
+	       pmecc->regs.errloc + ATMEL_PMERRLOC_ELEN);
+
+	ret = readl_relaxed_poll_timeout(pmecc->regs.errloc +
+					 ATMEL_PMERRLOC_ELISR,
+					 val, val & PMERRLOC_CALC_DONE, 0,
+					 PMECC_MAX_TIMEOUT_MS * 1000);
+	if (ret) {
+		dev_err(pmecc->dev,
+			"PMECC: Timeout to calculate error location.\n");
+		return ret;
+	}
+
+	roots_nbr = (val & PMERRLOC_ERR_NUM_MASK) >> 8;
+	/* Number of roots == degree of smu hence <= cap */
+	if (roots_nbr == user->lmu[strength + 1] >> 1)
+		return err_nbr - 1;
+
+	/*
+	 * Number of roots does not match the degree of smu
+	 * unable to correct error.
+	 */
+	return -EBADMSG;
+}
+
+int atmel_pmecc_correct_sector(struct atmel_pmecc_user *user, int sector,
+			       void *data, void *ecc)
+{
+	struct atmel_pmecc *pmecc = user->pmecc;
+	int sectorsize = get_sectorsize(user);
+	int eccbytes = user->eccbytes;
+	int i, nerrors;
+
+	if (!(user->isr & BIT(sector)))
+		return 0;
+
+	atmel_pmecc_gen_syndrome(user, sector);
+	atmel_pmecc_substitute(user);
+	atmel_pmecc_get_sigma(user);
+
+	nerrors = atmel_pmecc_err_location(user);
+	if (nerrors < 0)
+		return nerrors;
+
+	for (i = 0; i < nerrors; i++) {
+		const char *area;
+		int byte, bit;
+		u32 errpos;
+		u8 *ptr;
+
+		errpos = readl_relaxed(pmecc->regs.errloc +
+				ATMEL_PMERRLOC_EL(pmecc->caps->el_offset, i));
+		errpos--;
+
+		byte = errpos / 8;
+		bit = errpos % 8;
+
+		if (byte < sectorsize) {
+			ptr = data + byte;
+			area = "data";
+		} else if (byte < sectorsize + eccbytes) {
+			ptr = ecc + byte - sectorsize;
+			area = "ECC";
+		} else {
+			dev_dbg(pmecc->dev,
+				"Invalid errpos value (%d, max is %d)\n",
+				errpos, (sectorsize + eccbytes) * 8);
+			return -EINVAL;
+		}
+
+		dev_dbg(pmecc->dev,
+			"Bit flip in %s area, byte %d: 0x%02x -> 0x%02x\n",
+			area, byte, *ptr, (unsigned int)(*ptr ^ BIT(bit)));
+
+		*ptr ^= BIT(bit);
+	}
+
+	return nerrors;
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_correct_sector);
+
+bool atmel_pmecc_correct_erased_chunks(struct atmel_pmecc_user *user)
+{
+	return user->pmecc->caps->correct_erased_chunks;
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_correct_erased_chunks);
+
+void atmel_pmecc_get_generated_eccbytes(struct atmel_pmecc_user *user,
+					int sector, void *ecc)
+{
+	struct atmel_pmecc *pmecc = user->pmecc;
+	u8 *ptr = ecc;
+	int i;
+
+	for (i = 0; i < user->eccbytes; i++)
+		ptr[i] = readb_relaxed(pmecc->regs.base +
+				       ATMEL_PMECC_ECC(sector, i));
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_get_generated_eccbytes);
+
+int atmel_pmecc_enable(struct atmel_pmecc_user *user, int op)
+{
+	struct atmel_pmecc *pmecc = user->pmecc;
+	u32 cfg;
+
+	if (op != NAND_ECC_READ && op != NAND_ECC_WRITE) {
+		dev_err(pmecc->dev, "Bad ECC operation!");
+		return -EINVAL;
+	}
+
+	mutex_lock(&user->pmecc->lock);
+
+	cfg = user->cache.cfg;
+	if (op == NAND_ECC_WRITE)
+		cfg |= PMECC_CFG_WRITE_OP;
+	else
+		cfg |= PMECC_CFG_AUTO_ENABLE;
+
+	writel(cfg, pmecc->regs.base + ATMEL_PMECC_CFG);
+	writel(user->cache.sarea, pmecc->regs.base + ATMEL_PMECC_SAREA);
+	writel(user->cache.saddr, pmecc->regs.base + ATMEL_PMECC_SADDR);
+	writel(user->cache.eaddr, pmecc->regs.base + ATMEL_PMECC_EADDR);
+
+	writel(PMECC_CTRL_ENABLE, pmecc->regs.base + ATMEL_PMECC_CTRL);
+	writel(PMECC_CTRL_DATA, pmecc->regs.base + ATMEL_PMECC_CTRL);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_enable);
+
+void atmel_pmecc_disable(struct atmel_pmecc_user *user)
+{
+	struct atmel_pmecc *pmecc = user->pmecc;
+
+	writel(PMECC_CTRL_RST, pmecc->regs.base + ATMEL_PMECC_CTRL);
+	writel(PMECC_CTRL_DISABLE, pmecc->regs.base + ATMEL_PMECC_CTRL);
+	mutex_unlock(&user->pmecc->lock);
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_disable);
+
+int atmel_pmecc_wait_rdy(struct atmel_pmecc_user *user)
+{
+	struct atmel_pmecc *pmecc = user->pmecc;
+	u32 status;
+	int ret;
+
+	ret = readl_relaxed_poll_timeout(pmecc->regs.base +
+					 ATMEL_PMECC_SR,
+					 status, !(status & PMECC_SR_BUSY), 0,
+					 PMECC_MAX_TIMEOUT_MS * 1000);
+	if (ret) {
+		dev_err(pmecc->dev,
+			"Timeout while waiting for PMECC ready.\n");
+		return ret;
+	}
+
+	user->isr = readl_relaxed(pmecc->regs.base + ATMEL_PMECC_ISR);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_wait_rdy);
+
+static struct atmel_pmecc *atmel_pmecc_create(struct platform_device *pdev,
+					const struct atmel_pmecc_caps *caps,
+					int pmecc_res_idx, int errloc_res_idx)
+{
+	struct device *dev = &pdev->dev;
+	struct atmel_pmecc *pmecc;
+	struct resource *res;
+
+	pmecc = devm_kzalloc(dev, sizeof(*pmecc), GFP_KERNEL);
+	if (!pmecc)
+		return ERR_PTR(-ENOMEM);
+
+	pmecc->caps = caps;
+	pmecc->dev = dev;
+	mutex_init(&pmecc->lock);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, pmecc_res_idx);
+	pmecc->regs.base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pmecc->regs.base))
+		return ERR_CAST(pmecc->regs.base);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, errloc_res_idx);
+	pmecc->regs.errloc = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pmecc->regs.errloc))
+		return ERR_CAST(pmecc->regs.errloc);
+
+	/* Disable all interrupts before registering the PMECC handler. */
+	writel(0xffffffff, pmecc->regs.base + ATMEL_PMECC_IDR);
+
+	/* Reset the ECC engine */
+	writel(PMECC_CTRL_RST, pmecc->regs.base + ATMEL_PMECC_CTRL);
+	writel(PMECC_CTRL_DISABLE, pmecc->regs.base + ATMEL_PMECC_CTRL);
+
+	return pmecc;
+}
+
+static void devm_atmel_pmecc_put(struct device *dev, void *res)
+{
+	struct atmel_pmecc **pmecc = res;
+
+	put_device((*pmecc)->dev);
+}
+
+static struct atmel_pmecc *atmel_pmecc_get_by_node(struct device *userdev,
+						   struct device_node *np)
+{
+	struct platform_device *pdev;
+	struct atmel_pmecc *pmecc, **ptr;
+
+	pdev = of_find_device_by_node(np);
+	if (!pdev || !platform_get_drvdata(pdev))
+		return ERR_PTR(-EPROBE_DEFER);
+
+	ptr = devres_alloc(devm_atmel_pmecc_put, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	get_device(&pdev->dev);
+	pmecc = platform_get_drvdata(pdev);
+
+	*ptr = pmecc;
+
+	devres_add(userdev, ptr);
+
+	return pmecc;
+}
+
+static const int atmel_pmecc_strengths[] = { 2, 4, 8, 12, 24, 32 };
+
+static struct atmel_pmecc_caps at91sam9g45_caps = {
+	.strengths = atmel_pmecc_strengths,
+	.nstrengths = 5,
+	.el_offset = 0x8c,
+};
+
+static struct atmel_pmecc_caps sama5d4_caps = {
+	.strengths = atmel_pmecc_strengths,
+	.nstrengths = 5,
+	.el_offset = 0x8c,
+	.correct_erased_chunks = true,
+};
+
+static struct atmel_pmecc_caps sama5d2_caps = {
+	.strengths = atmel_pmecc_strengths,
+	.nstrengths = 6,
+	.el_offset = 0xac,
+	.correct_erased_chunks = true,
+};
+
+static const struct of_device_id atmel_pmecc_legacy_match[] = {
+	{ .compatible = "atmel,sama5d4-nand", &sama5d4_caps },
+	{ .compatible = "atmel,sama5d2-nand", &sama5d2_caps },
+	{ /* sentinel */ }
+};
+
+struct atmel_pmecc *devm_atmel_pmecc_get(struct device *userdev)
+{
+	struct atmel_pmecc *pmecc;
+	struct device_node *np;
+
+	if (!userdev)
+		return ERR_PTR(-EINVAL);
+
+	if (!userdev->of_node)
+		return NULL;
+
+	np = of_parse_phandle(userdev->of_node, "ecc-engine", 0);
+	if (np) {
+		pmecc = atmel_pmecc_get_by_node(userdev, np);
+		of_node_put(np);
+	} else {
+		/*
+		 * Support old DT bindings: in this case the PMECC iomem
+		 * resources are directly defined in the user pdev at position
+		 * 1 and 2. Extract all relevant information from there.
+		 */
+		struct platform_device *pdev = to_platform_device(userdev);
+		const struct atmel_pmecc_caps *caps;
+
+		/* No PMECC engine available. */
+		if (!of_property_read_bool(userdev->of_node,
+					   "atmel,has-pmecc"))
+			return NULL;
+
+		caps = &at91sam9g45_caps;
+
+		/*
+		 * Try to find the NFC subnode and extract the associated caps
+		 * from there.
+		 */
+		np = of_find_compatible_node(userdev->of_node, NULL,
+					     "atmel,sama5d3-nfc");
+		if (np) {
+			const struct of_device_id *match;
+
+			match = of_match_node(atmel_pmecc_legacy_match, np);
+			if (match && match->data)
+				caps = match->data;
+
+			of_node_put(np);
+		}
+
+		pmecc = atmel_pmecc_create(pdev, caps, 1, 2);
+	}
+
+	return pmecc;
+}
+EXPORT_SYMBOL(devm_atmel_pmecc_get);
+
+static const struct of_device_id atmel_pmecc_match[] = {
+	{ .compatible = "atmel,at91sam9g45-pmecc", &at91sam9g45_caps },
+	{ .compatible = "atmel,sama5d4-pmecc", &sama5d4_caps },
+	{ .compatible = "atmel,sama5d2-pmecc", &sama5d2_caps },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, atmel_pmecc_match);
+
+static int atmel_pmecc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct atmel_pmecc_caps *caps;
+	struct atmel_pmecc *pmecc;
+
+	caps = of_device_get_match_data(&pdev->dev);
+	if (!caps) {
+		dev_err(dev, "Invalid caps\n");
+		return -EINVAL;
+	}
+
+	pmecc = atmel_pmecc_create(pdev, caps, 0, 1);
+	if (IS_ERR(pmecc))
+		return PTR_ERR(pmecc);
+
+	platform_set_drvdata(pdev, pmecc);
+
+	return 0;
+}
+
+static struct platform_driver atmel_pmecc_driver = {
+	.driver = {
+		.name = "atmel-pmecc",
+		.of_match_table = of_match_ptr(atmel_pmecc_match),
+	},
+	.probe = atmel_pmecc_probe,
+};
+module_platform_driver(atmel_pmecc_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Boris Brezillon <boris.brezillon@free-electrons.com>");
+MODULE_DESCRIPTION("PMECC engine driver");
+MODULE_ALIAS("platform:atmel_pmecc");
diff --git a/drivers/mtd/nand/atmel/pmecc.h b/drivers/mtd/nand/atmel/pmecc.h
new file mode 100644
index 000000000000..a8ddbfca2ea5
--- /dev/null
+++ b/drivers/mtd/nand/atmel/pmecc.h
@@ -0,0 +1,73 @@
+/*
+ * © Copyright 2016 ATMEL
+ * © Copyright 2016 Free Electrons
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * Derived from the atmel_nand.c driver which contained the following
+ * copyrights:
+ *
+ *    Copyright © 2003 Rick Bronson
+ *
+ *    Derived from drivers/mtd/nand/autcpu12.c
+ *        Copyright © 2001 Thomas Gleixner (gleixner@autronix.de)
+ *
+ *    Derived from drivers/mtd/spia.c
+ *        Copyright © 2000 Steven J. Hill (sjhill@cotw.com)
+ *
+ *
+ *    Add Hardware ECC support for AT91SAM9260 / AT91SAM9263
+ *        Richard Genoud (richard.genoud@gmail.com), Adeneo Copyright © 2007
+ *
+ *        Derived from Das U-Boot source code
+ *              (u-boot-1.1.5/board/atmel/at91sam9263ek/nand.c)
+ *        © Copyright 2006 ATMEL Rousset, Lacressonniere Nicolas
+ *
+ *    Add Programmable Multibit ECC support for various AT91 SoC
+ *        © Copyright 2012 ATMEL, Hong Xu
+ *
+ *    Add Nand Flash Controller support for SAMA5 SoC
+ *        © Copyright 2013 ATMEL, Josh Wu (josh.wu@atmel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef ATMEL_PMECC_H
+#define ATMEL_PMECC_H
+
+#define ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH	0
+#define ATMEL_PMECC_SECTOR_SIZE_AUTO		0
+#define ATMEL_PMECC_OOBOFFSET_AUTO		-1
+
+struct atmel_pmecc_user_req {
+	int pagesize;
+	int oobsize;
+	struct {
+		int strength;
+		int bytes;
+		int sectorsize;
+		int nsectors;
+		int ooboffset;
+	} ecc;
+};
+
+struct atmel_pmecc *devm_atmel_pmecc_get(struct device *dev);
+
+struct atmel_pmecc_user *
+atmel_pmecc_create_user(struct atmel_pmecc *pmecc,
+			struct atmel_pmecc_user_req *req);
+void atmel_pmecc_destroy_user(struct atmel_pmecc_user *user);
+
+int atmel_pmecc_enable(struct atmel_pmecc_user *user, int op);
+void atmel_pmecc_disable(struct atmel_pmecc_user *user);
+int atmel_pmecc_wait_rdy(struct atmel_pmecc_user *user);
+int atmel_pmecc_correct_sector(struct atmel_pmecc_user *user, int sector,
+			       void *data, void *ecc);
+bool atmel_pmecc_correct_erased_chunks(struct atmel_pmecc_user *user);
+void atmel_pmecc_get_generated_eccbytes(struct atmel_pmecc_user *user,
+					int sector, void *ecc);
+
+#endif /* ATMEL_PMECC_H */
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
deleted file mode 100644
index 9ebd5ecefea6..000000000000
--- a/drivers/mtd/nand/atmel_nand.c
+++ /dev/null
@@ -1,2479 +0,0 @@
-/*
- *  Copyright © 2003 Rick Bronson
- *
- *  Derived from drivers/mtd/nand/autcpu12.c
- *	 Copyright © 2001 Thomas Gleixner (gleixner@autronix.de)
- *
- *  Derived from drivers/mtd/spia.c
- *	 Copyright © 2000 Steven J. Hill (sjhill@cotw.com)
- *
- *
- *  Add Hardware ECC support for AT91SAM9260 / AT91SAM9263
- *     Richard Genoud (richard.genoud@gmail.com), Adeneo Copyright © 2007
- *
- *     Derived from Das U-Boot source code
- *     		(u-boot-1.1.5/board/atmel/at91sam9263ek/nand.c)
- *     © Copyright 2006 ATMEL Rousset, Lacressonniere Nicolas
- *
- *  Add Programmable Multibit ECC support for various AT91 SoC
- *     © Copyright 2012 ATMEL, Hong Xu
- *
- *  Add Nand Flash Controller support for SAMA5 SoC
- *     © Copyright 2013 ATMEL, Josh Wu (josh.wu@atmel.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/clk.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/platform_device.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_gpio.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/nand.h>
-#include <linux/mtd/partitions.h>
-
-#include <linux/delay.h>
-#include <linux/dmaengine.h>
-#include <linux/gpio.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/platform_data/atmel.h>
-
-static int use_dma = 1;
-module_param(use_dma, int, 0);
-
-static int on_flash_bbt = 0;
-module_param(on_flash_bbt, int, 0);
-
-/* Register access macros */
-#define ecc_readl(add, reg)				\
-	__raw_readl(add + ATMEL_ECC_##reg)
-#define ecc_writel(add, reg, value)			\
-	__raw_writel((value), add + ATMEL_ECC_##reg)
-
-#include "atmel_nand_ecc.h"	/* Hardware ECC registers */
-#include "atmel_nand_nfc.h"	/* Nand Flash Controller definition */
-
-struct atmel_nand_caps {
-	bool pmecc_correct_erase_page;
-	uint8_t pmecc_max_correction;
-};
-
-/*
- * oob layout for large page size
- * bad block info is on bytes 0 and 1
- * the bytes have to be consecutives to avoid
- * several NAND_CMD_RNDOUT during read
- *
- * oob layout for small page size
- * bad block info is on bytes 4 and 5
- * the bytes have to be consecutives to avoid
- * several NAND_CMD_RNDOUT during read
- */
-static int atmel_ooblayout_ecc_sp(struct mtd_info *mtd, int section,
-				  struct mtd_oob_region *oobregion)
-{
-	if (section)
-		return -ERANGE;
-
-	oobregion->length = 4;
-	oobregion->offset = 0;
-
-	return 0;
-}
-
-static int atmel_ooblayout_free_sp(struct mtd_info *mtd, int section,
-				   struct mtd_oob_region *oobregion)
-{
-	if (section)
-		return -ERANGE;
-
-	oobregion->offset = 6;
-	oobregion->length = mtd->oobsize - oobregion->offset;
-
-	return 0;
-}
-
-static const struct mtd_ooblayout_ops atmel_ooblayout_sp_ops = {
-	.ecc = atmel_ooblayout_ecc_sp,
-	.free = atmel_ooblayout_free_sp,
-};
-
-struct atmel_nfc {
-	void __iomem		*base_cmd_regs;
-	void __iomem		*hsmc_regs;
-	void			*sram_bank0;
-	dma_addr_t		sram_bank0_phys;
-	bool			use_nfc_sram;
-	bool			write_by_sram;
-
-	struct clk		*clk;
-
-	bool			is_initialized;
-	struct completion	comp_ready;
-	struct completion	comp_cmd_done;
-	struct completion	comp_xfer_done;
-
-	/* Point to the sram bank which include readed data via NFC */
-	void			*data_in_sram;
-	bool			will_write_sram;
-};
-static struct atmel_nfc	nand_nfc;
-
-struct atmel_nand_host {
-	struct nand_chip	nand_chip;
-	void __iomem		*io_base;
-	dma_addr_t		io_phys;
-	struct atmel_nand_data	board;
-	struct device		*dev;
-	void __iomem		*ecc;
-
-	struct completion	comp;
-	struct dma_chan		*dma_chan;
-
-	struct atmel_nfc	*nfc;
-
-	const struct atmel_nand_caps	*caps;
-	bool			has_pmecc;
-	u8			pmecc_corr_cap;
-	u16			pmecc_sector_size;
-	bool			has_no_lookup_table;
-	u32			pmecc_lookup_table_offset;
-	u32			pmecc_lookup_table_offset_512;
-	u32			pmecc_lookup_table_offset_1024;
-
-	int			pmecc_degree;	/* Degree of remainders */
-	int			pmecc_cw_len;	/* Length of codeword */
-
-	void __iomem		*pmerrloc_base;
-	void __iomem		*pmerrloc_el_base;
-	void __iomem		*pmecc_rom_base;
-
-	/* lookup table for alpha_to and index_of */
-	void __iomem		*pmecc_alpha_to;
-	void __iomem		*pmecc_index_of;
-
-	/* data for pmecc computation */
-	int16_t			*pmecc_partial_syn;
-	int16_t			*pmecc_si;
-	int16_t			*pmecc_smu;	/* Sigma table */
-	int16_t			*pmecc_lmu;	/* polynomal order */
-	int			*pmecc_mu;
-	int			*pmecc_dmu;
-	int			*pmecc_delta;
-};
-
-/*
- * Enable NAND.
- */
-static void atmel_nand_enable(struct atmel_nand_host *host)
-{
-	if (gpio_is_valid(host->board.enable_pin))
-		gpio_set_value(host->board.enable_pin, 0);
-}
-
-/*
- * Disable NAND.
- */
-static void atmel_nand_disable(struct atmel_nand_host *host)
-{
-	if (gpio_is_valid(host->board.enable_pin))
-		gpio_set_value(host->board.enable_pin, 1);
-}
-
-/*
- * Hardware specific access to control-lines
- */
-static void atmel_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	if (ctrl & NAND_CTRL_CHANGE) {
-		if (ctrl & NAND_NCE)
-			atmel_nand_enable(host);
-		else
-			atmel_nand_disable(host);
-	}
-	if (cmd == NAND_CMD_NONE)
-		return;
-
-	if (ctrl & NAND_CLE)
-		writeb(cmd, host->io_base + (1 << host->board.cle));
-	else
-		writeb(cmd, host->io_base + (1 << host->board.ale));
-}
-
-/*
- * Read the Device Ready pin.
- */
-static int atmel_nand_device_ready(struct mtd_info *mtd)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	return gpio_get_value(host->board.rdy_pin) ^
-                !!host->board.rdy_pin_active_low;
-}
-
-/* Set up for hardware ready pin and enable pin. */
-static int atmel_nand_set_enable_ready_pins(struct mtd_info *mtd)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	int res = 0;
-
-	if (gpio_is_valid(host->board.rdy_pin)) {
-		res = devm_gpio_request(host->dev,
-				host->board.rdy_pin, "nand_rdy");
-		if (res < 0) {
-			dev_err(host->dev,
-				"can't request rdy gpio %d\n",
-				host->board.rdy_pin);
-			return res;
-		}
-
-		res = gpio_direction_input(host->board.rdy_pin);
-		if (res < 0) {
-			dev_err(host->dev,
-				"can't request input direction rdy gpio %d\n",
-				host->board.rdy_pin);
-			return res;
-		}
-
-		chip->dev_ready = atmel_nand_device_ready;
-	}
-
-	if (gpio_is_valid(host->board.enable_pin)) {
-		res = devm_gpio_request(host->dev,
-				host->board.enable_pin, "nand_enable");
-		if (res < 0) {
-			dev_err(host->dev,
-				"can't request enable gpio %d\n",
-				host->board.enable_pin);
-			return res;
-		}
-
-		res = gpio_direction_output(host->board.enable_pin, 1);
-		if (res < 0) {
-			dev_err(host->dev,
-				"can't request output direction enable gpio %d\n",
-				host->board.enable_pin);
-			return res;
-		}
-	}
-
-	return res;
-}
-
-/*
- * Minimal-overhead PIO for data access.
- */
-static void atmel_read_buf8(struct mtd_info *mtd, u8 *buf, int len)
-{
-	struct nand_chip	*nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	if (host->nfc && host->nfc->use_nfc_sram && host->nfc->data_in_sram) {
-		memcpy(buf, host->nfc->data_in_sram, len);
-		host->nfc->data_in_sram += len;
-	} else {
-		__raw_readsb(nand_chip->IO_ADDR_R, buf, len);
-	}
-}
-
-static void atmel_read_buf16(struct mtd_info *mtd, u8 *buf, int len)
-{
-	struct nand_chip	*nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	if (host->nfc && host->nfc->use_nfc_sram && host->nfc->data_in_sram) {
-		memcpy(buf, host->nfc->data_in_sram, len);
-		host->nfc->data_in_sram += len;
-	} else {
-		__raw_readsw(nand_chip->IO_ADDR_R, buf, len / 2);
-	}
-}
-
-static void atmel_write_buf8(struct mtd_info *mtd, const u8 *buf, int len)
-{
-	struct nand_chip	*nand_chip = mtd_to_nand(mtd);
-
-	__raw_writesb(nand_chip->IO_ADDR_W, buf, len);
-}
-
-static void atmel_write_buf16(struct mtd_info *mtd, const u8 *buf, int len)
-{
-	struct nand_chip	*nand_chip = mtd_to_nand(mtd);
-
-	__raw_writesw(nand_chip->IO_ADDR_W, buf, len / 2);
-}
-
-static void dma_complete_func(void *completion)
-{
-	complete(completion);
-}
-
-static int nfc_set_sram_bank(struct atmel_nand_host *host, unsigned int bank)
-{
-	/* NFC only has two banks. Must be 0 or 1 */
-	if (bank > 1)
-		return -EINVAL;
-
-	if (bank) {
-		struct mtd_info *mtd = nand_to_mtd(&host->nand_chip);
-
-		/* Only for a 2k-page or lower flash, NFC can handle 2 banks */
-		if (mtd->writesize > 2048)
-			return -EINVAL;
-		nfc_writel(host->nfc->hsmc_regs, BANK, ATMEL_HSMC_NFC_BANK1);
-	} else {
-		nfc_writel(host->nfc->hsmc_regs, BANK, ATMEL_HSMC_NFC_BANK0);
-	}
-
-	return 0;
-}
-
-static uint nfc_get_sram_off(struct atmel_nand_host *host)
-{
-	if (nfc_readl(host->nfc->hsmc_regs, BANK) & ATMEL_HSMC_NFC_BANK1)
-		return NFC_SRAM_BANK1_OFFSET;
-	else
-		return 0;
-}
-
-static dma_addr_t nfc_sram_phys(struct atmel_nand_host *host)
-{
-	if (nfc_readl(host->nfc->hsmc_regs, BANK) & ATMEL_HSMC_NFC_BANK1)
-		return host->nfc->sram_bank0_phys + NFC_SRAM_BANK1_OFFSET;
-	else
-		return host->nfc->sram_bank0_phys;
-}
-
-static int atmel_nand_dma_op(struct mtd_info *mtd, void *buf, int len,
-			       int is_read)
-{
-	struct dma_device *dma_dev;
-	enum dma_ctrl_flags flags;
-	dma_addr_t dma_src_addr, dma_dst_addr, phys_addr;
-	struct dma_async_tx_descriptor *tx = NULL;
-	dma_cookie_t cookie;
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	void *p = buf;
-	int err = -EIO;
-	enum dma_data_direction dir = is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-	struct atmel_nfc *nfc = host->nfc;
-
-	if (buf >= high_memory)
-		goto err_buf;
-
-	dma_dev = host->dma_chan->device;
-
-	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
-
-	phys_addr = dma_map_single(dma_dev->dev, p, len, dir);
-	if (dma_mapping_error(dma_dev->dev, phys_addr)) {
-		dev_err(host->dev, "Failed to dma_map_single\n");
-		goto err_buf;
-	}
-
-	if (is_read) {
-		if (nfc && nfc->data_in_sram)
-			dma_src_addr = nfc_sram_phys(host) + (nfc->data_in_sram
-				- (nfc->sram_bank0 + nfc_get_sram_off(host)));
-		else
-			dma_src_addr = host->io_phys;
-
-		dma_dst_addr = phys_addr;
-	} else {
-		dma_src_addr = phys_addr;
-
-		if (nfc && nfc->write_by_sram)
-			dma_dst_addr = nfc_sram_phys(host);
-		else
-			dma_dst_addr = host->io_phys;
-	}
-
-	tx = dma_dev->device_prep_dma_memcpy(host->dma_chan, dma_dst_addr,
-					     dma_src_addr, len, flags);
-	if (!tx) {
-		dev_err(host->dev, "Failed to prepare DMA memcpy\n");
-		goto err_dma;
-	}
-
-	init_completion(&host->comp);
-	tx->callback = dma_complete_func;
-	tx->callback_param = &host->comp;
-
-	cookie = tx->tx_submit(tx);
-	if (dma_submit_error(cookie)) {
-		dev_err(host->dev, "Failed to do DMA tx_submit\n");
-		goto err_dma;
-	}
-
-	dma_async_issue_pending(host->dma_chan);
-	wait_for_completion(&host->comp);
-
-	if (is_read && nfc && nfc->data_in_sram)
-		/* After read data from SRAM, need to increase the position */
-		nfc->data_in_sram += len;
-
-	err = 0;
-
-err_dma:
-	dma_unmap_single(dma_dev->dev, phys_addr, len, dir);
-err_buf:
-	if (err != 0)
-		dev_dbg(host->dev, "Fall back to CPU I/O\n");
-	return err;
-}
-
-static void atmel_read_buf(struct mtd_info *mtd, u8 *buf, int len)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
-	if (use_dma && len > mtd->oobsize)
-		/* only use DMA for bigger than oob size: better performances */
-		if (atmel_nand_dma_op(mtd, buf, len, 1) == 0)
-			return;
-
-	if (chip->options & NAND_BUSWIDTH_16)
-		atmel_read_buf16(mtd, buf, len);
-	else
-		atmel_read_buf8(mtd, buf, len);
-}
-
-static void atmel_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
-	if (use_dma && len > mtd->oobsize)
-		/* only use DMA for bigger than oob size: better performances */
-		if (atmel_nand_dma_op(mtd, (void *)buf, len, 0) == 0)
-			return;
-
-	if (chip->options & NAND_BUSWIDTH_16)
-		atmel_write_buf16(mtd, buf, len);
-	else
-		atmel_write_buf8(mtd, buf, len);
-}
-
-/*
- * Return number of ecc bytes per sector according to sector size and
- * correction capability
- *
- * Following table shows what at91 PMECC supported:
- * Correction Capability	Sector_512_bytes	Sector_1024_bytes
- * =====================	================	=================
- *                2-bits                 4-bytes                  4-bytes
- *                4-bits                 7-bytes                  7-bytes
- *                8-bits                13-bytes                 14-bytes
- *               12-bits                20-bytes                 21-bytes
- *               24-bits                39-bytes                 42-bytes
- *               32-bits                52-bytes                 56-bytes
- */
-static int pmecc_get_ecc_bytes(int cap, int sector_size)
-{
-	int m = 12 + sector_size / 512;
-	return (m * cap + 7) / 8;
-}
-
-static void __iomem *pmecc_get_alpha_to(struct atmel_nand_host *host)
-{
-	int table_size;
-
-	table_size = host->pmecc_sector_size == 512 ?
-		PMECC_LOOKUP_TABLE_SIZE_512 : PMECC_LOOKUP_TABLE_SIZE_1024;
-
-	return host->pmecc_rom_base + host->pmecc_lookup_table_offset +
-			table_size * sizeof(int16_t);
-}
-
-static int pmecc_data_alloc(struct atmel_nand_host *host)
-{
-	const int cap = host->pmecc_corr_cap;
-	int size;
-
-	size = (2 * cap + 1) * sizeof(int16_t);
-	host->pmecc_partial_syn = devm_kzalloc(host->dev, size, GFP_KERNEL);
-	host->pmecc_si = devm_kzalloc(host->dev, size, GFP_KERNEL);
-	host->pmecc_lmu = devm_kzalloc(host->dev,
-			(cap + 1) * sizeof(int16_t), GFP_KERNEL);
-	host->pmecc_smu = devm_kzalloc(host->dev,
-			(cap + 2) * size, GFP_KERNEL);
-
-	size = (cap + 1) * sizeof(int);
-	host->pmecc_mu = devm_kzalloc(host->dev, size, GFP_KERNEL);
-	host->pmecc_dmu = devm_kzalloc(host->dev, size, GFP_KERNEL);
-	host->pmecc_delta = devm_kzalloc(host->dev, size, GFP_KERNEL);
-
-	if (!host->pmecc_partial_syn ||
-		!host->pmecc_si ||
-		!host->pmecc_lmu ||
-		!host->pmecc_smu ||
-		!host->pmecc_mu ||
-		!host->pmecc_dmu ||
-		!host->pmecc_delta)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static void pmecc_gen_syndrome(struct mtd_info *mtd, int sector)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	int i;
-	uint32_t value;
-
-	/* Fill odd syndromes */
-	for (i = 0; i < host->pmecc_corr_cap; i++) {
-		value = pmecc_readl_rem_relaxed(host->ecc, sector, i / 2);
-		if (i & 1)
-			value >>= 16;
-		value &= 0xffff;
-		host->pmecc_partial_syn[(2 * i) + 1] = (int16_t)value;
-	}
-}
-
-static void pmecc_substitute(struct mtd_info *mtd)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	int16_t __iomem *alpha_to = host->pmecc_alpha_to;
-	int16_t __iomem *index_of = host->pmecc_index_of;
-	int16_t *partial_syn = host->pmecc_partial_syn;
-	const int cap = host->pmecc_corr_cap;
-	int16_t *si;
-	int i, j;
-
-	/* si[] is a table that holds the current syndrome value,
-	 * an element of that table belongs to the field
-	 */
-	si = host->pmecc_si;
-
-	memset(&si[1], 0, sizeof(int16_t) * (2 * cap - 1));
-
-	/* Computation 2t syndromes based on S(x) */
-	/* Odd syndromes */
-	for (i = 1; i < 2 * cap; i += 2) {
-		for (j = 0; j < host->pmecc_degree; j++) {
-			if (partial_syn[i] & ((unsigned short)0x1 << j))
-				si[i] = readw_relaxed(alpha_to + i * j) ^ si[i];
-		}
-	}
-	/* Even syndrome = (Odd syndrome) ** 2 */
-	for (i = 2, j = 1; j <= cap; i = ++j << 1) {
-		if (si[j] == 0) {
-			si[i] = 0;
-		} else {
-			int16_t tmp;
-
-			tmp = readw_relaxed(index_of + si[j]);
-			tmp = (tmp * 2) % host->pmecc_cw_len;
-			si[i] = readw_relaxed(alpha_to + tmp);
-		}
-	}
-
-	return;
-}
-
-static void pmecc_get_sigma(struct mtd_info *mtd)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	int16_t *lmu = host->pmecc_lmu;
-	int16_t *si = host->pmecc_si;
-	int *mu = host->pmecc_mu;
-	int *dmu = host->pmecc_dmu;	/* Discrepancy */
-	int *delta = host->pmecc_delta; /* Delta order */
-	int cw_len = host->pmecc_cw_len;
-	const int16_t cap = host->pmecc_corr_cap;
-	const int num = 2 * cap + 1;
-	int16_t __iomem	*index_of = host->pmecc_index_of;
-	int16_t __iomem	*alpha_to = host->pmecc_alpha_to;
-	int i, j, k;
-	uint32_t dmu_0_count, tmp;
-	int16_t *smu = host->pmecc_smu;
-
-	/* index of largest delta */
-	int ro;
-	int largest;
-	int diff;
-
-	dmu_0_count = 0;
-
-	/* First Row */
-
-	/* Mu */
-	mu[0] = -1;
-
-	memset(smu, 0, sizeof(int16_t) * num);
-	smu[0] = 1;
-
-	/* discrepancy set to 1 */
-	dmu[0] = 1;
-	/* polynom order set to 0 */
-	lmu[0] = 0;
-	delta[0] = (mu[0] * 2 - lmu[0]) >> 1;
-
-	/* Second Row */
-
-	/* Mu */
-	mu[1] = 0;
-	/* Sigma(x) set to 1 */
-	memset(&smu[num], 0, sizeof(int16_t) * num);
-	smu[num] = 1;
-
-	/* discrepancy set to S1 */
-	dmu[1] = si[1];
-
-	/* polynom order set to 0 */
-	lmu[1] = 0;
-
-	delta[1] = (mu[1] * 2 - lmu[1]) >> 1;
-
-	/* Init the Sigma(x) last row */
-	memset(&smu[(cap + 1) * num], 0, sizeof(int16_t) * num);
-
-	for (i = 1; i <= cap; i++) {
-		mu[i + 1] = i << 1;
-		/* Begin Computing Sigma (Mu+1) and L(mu) */
-		/* check if discrepancy is set to 0 */
-		if (dmu[i] == 0) {
-			dmu_0_count++;
-
-			tmp = ((cap - (lmu[i] >> 1) - 1) / 2);
-			if ((cap - (lmu[i] >> 1) - 1) & 0x1)
-				tmp += 2;
-			else
-				tmp += 1;
-
-			if (dmu_0_count == tmp) {
-				for (j = 0; j <= (lmu[i] >> 1) + 1; j++)
-					smu[(cap + 1) * num + j] =
-							smu[i * num + j];
-
-				lmu[cap + 1] = lmu[i];
-				return;
-			}
-
-			/* copy polynom */
-			for (j = 0; j <= lmu[i] >> 1; j++)
-				smu[(i + 1) * num + j] = smu[i * num + j];
-
-			/* copy previous polynom order to the next */
-			lmu[i + 1] = lmu[i];
-		} else {
-			ro = 0;
-			largest = -1;
-			/* find largest delta with dmu != 0 */
-			for (j = 0; j < i; j++) {
-				if ((dmu[j]) && (delta[j] > largest)) {
-					largest = delta[j];
-					ro = j;
-				}
-			}
-
-			/* compute difference */
-			diff = (mu[i] - mu[ro]);
-
-			/* Compute degree of the new smu polynomial */
-			if ((lmu[i] >> 1) > ((lmu[ro] >> 1) + diff))
-				lmu[i + 1] = lmu[i];
-			else
-				lmu[i + 1] = ((lmu[ro] >> 1) + diff) * 2;
-
-			/* Init smu[i+1] with 0 */
-			for (k = 0; k < num; k++)
-				smu[(i + 1) * num + k] = 0;
-
-			/* Compute smu[i+1] */
-			for (k = 0; k <= lmu[ro] >> 1; k++) {
-				int16_t a, b, c;
-
-				if (!(smu[ro * num + k] && dmu[i]))
-					continue;
-				a = readw_relaxed(index_of + dmu[i]);
-				b = readw_relaxed(index_of + dmu[ro]);
-				c = readw_relaxed(index_of + smu[ro * num + k]);
-				tmp = a + (cw_len - b) + c;
-				a = readw_relaxed(alpha_to + tmp % cw_len);
-				smu[(i + 1) * num + (k + diff)] = a;
-			}
-
-			for (k = 0; k <= lmu[i] >> 1; k++)
-				smu[(i + 1) * num + k] ^= smu[i * num + k];
-		}
-
-		/* End Computing Sigma (Mu+1) and L(mu) */
-		/* In either case compute delta */
-		delta[i + 1] = (mu[i + 1] * 2 - lmu[i + 1]) >> 1;
-
-		/* Do not compute discrepancy for the last iteration */
-		if (i >= cap)
-			continue;
-
-		for (k = 0; k <= (lmu[i + 1] >> 1); k++) {
-			tmp = 2 * (i - 1);
-			if (k == 0) {
-				dmu[i + 1] = si[tmp + 3];
-			} else if (smu[(i + 1) * num + k] && si[tmp + 3 - k]) {
-				int16_t a, b, c;
-				a = readw_relaxed(index_of +
-						smu[(i + 1) * num + k]);
-				b = si[2 * (i - 1) + 3 - k];
-				c = readw_relaxed(index_of + b);
-				tmp = a + c;
-				tmp %= cw_len;
-				dmu[i + 1] = readw_relaxed(alpha_to + tmp) ^
-					dmu[i + 1];
-			}
-		}
-	}
-
-	return;
-}
-
-static int pmecc_err_location(struct mtd_info *mtd)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	unsigned long end_time;
-	const int cap = host->pmecc_corr_cap;
-	const int num = 2 * cap + 1;
-	int sector_size = host->pmecc_sector_size;
-	int err_nbr = 0;	/* number of error */
-	int roots_nbr;		/* number of roots */
-	int i;
-	uint32_t val;
-	int16_t *smu = host->pmecc_smu;
-
-	pmerrloc_writel(host->pmerrloc_base, ELDIS, PMERRLOC_DISABLE);
-
-	for (i = 0; i <= host->pmecc_lmu[cap + 1] >> 1; i++) {
-		pmerrloc_writel_sigma_relaxed(host->pmerrloc_base, i,
-				      smu[(cap + 1) * num + i]);
-		err_nbr++;
-	}
-
-	val = (err_nbr - 1) << 16;
-	if (sector_size == 1024)
-		val |= 1;
-
-	pmerrloc_writel(host->pmerrloc_base, ELCFG, val);
-	pmerrloc_writel(host->pmerrloc_base, ELEN,
-			sector_size * 8 + host->pmecc_degree * cap);
-
-	end_time = jiffies + msecs_to_jiffies(PMECC_MAX_TIMEOUT_MS);
-	while (!(pmerrloc_readl_relaxed(host->pmerrloc_base, ELISR)
-		 & PMERRLOC_CALC_DONE)) {
-		if (unlikely(time_after(jiffies, end_time))) {
-			dev_err(host->dev, "PMECC: Timeout to calculate error location.\n");
-			return -1;
-		}
-		cpu_relax();
-	}
-
-	roots_nbr = (pmerrloc_readl_relaxed(host->pmerrloc_base, ELISR)
-		& PMERRLOC_ERR_NUM_MASK) >> 8;
-	/* Number of roots == degree of smu hence <= cap */
-	if (roots_nbr == host->pmecc_lmu[cap + 1] >> 1)
-		return err_nbr - 1;
-
-	/* Number of roots does not match the degree of smu
-	 * unable to correct error */
-	return -1;
-}
-
-static void pmecc_correct_data(struct mtd_info *mtd, uint8_t *buf, uint8_t *ecc,
-		int sector_num, int extra_bytes, int err_nbr)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	int i = 0;
-	int byte_pos, bit_pos, sector_size, pos;
-	uint32_t tmp;
-	uint8_t err_byte;
-
-	sector_size = host->pmecc_sector_size;
-
-	while (err_nbr) {
-		tmp = pmerrloc_readl_el_relaxed(host->pmerrloc_el_base, i) - 1;
-		byte_pos = tmp / 8;
-		bit_pos  = tmp % 8;
-
-		if (byte_pos >= (sector_size + extra_bytes))
-			BUG();	/* should never happen */
-
-		if (byte_pos < sector_size) {
-			err_byte = *(buf + byte_pos);
-			*(buf + byte_pos) ^= (1 << bit_pos);
-
-			pos = sector_num * host->pmecc_sector_size + byte_pos;
-			dev_dbg(host->dev, "Bit flip in data area, byte_pos: %d, bit_pos: %d, 0x%02x -> 0x%02x\n",
-				pos, bit_pos, err_byte, *(buf + byte_pos));
-		} else {
-			struct mtd_oob_region oobregion;
-
-			/* Bit flip in OOB area */
-			tmp = sector_num * nand_chip->ecc.bytes
-					+ (byte_pos - sector_size);
-			err_byte = ecc[tmp];
-			ecc[tmp] ^= (1 << bit_pos);
-
-			mtd_ooblayout_ecc(mtd, 0, &oobregion);
-			pos = tmp + oobregion.offset;
-			dev_dbg(host->dev, "Bit flip in OOB, oob_byte_pos: %d, bit_pos: %d, 0x%02x -> 0x%02x\n",
-				pos, bit_pos, err_byte, ecc[tmp]);
-		}
-
-		i++;
-		err_nbr--;
-	}
-
-	return;
-}
-
-static int pmecc_correction(struct mtd_info *mtd, u32 pmecc_stat, uint8_t *buf,
-	u8 *ecc)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	int i, err_nbr;
-	uint8_t *buf_pos;
-	int max_bitflips = 0;
-
-	for (i = 0; i < nand_chip->ecc.steps; i++) {
-		err_nbr = 0;
-		if (pmecc_stat & 0x1) {
-			buf_pos = buf + i * host->pmecc_sector_size;
-
-			pmecc_gen_syndrome(mtd, i);
-			pmecc_substitute(mtd);
-			pmecc_get_sigma(mtd);
-
-			err_nbr = pmecc_err_location(mtd);
-			if (err_nbr >= 0) {
-				pmecc_correct_data(mtd, buf_pos, ecc, i,
-						   nand_chip->ecc.bytes,
-						   err_nbr);
-			} else if (!host->caps->pmecc_correct_erase_page) {
-				u8 *ecc_pos = ecc + (i * nand_chip->ecc.bytes);
-
-				/* Try to detect erased pages */
-				err_nbr = nand_check_erased_ecc_chunk(buf_pos,
-							host->pmecc_sector_size,
-							ecc_pos,
-							nand_chip->ecc.bytes,
-							NULL, 0,
-							nand_chip->ecc.strength);
-			}
-
-			if (err_nbr < 0) {
-				dev_err(host->dev, "PMECC: Too many errors\n");
-				mtd->ecc_stats.failed++;
-				return -EIO;
-			}
-
-			mtd->ecc_stats.corrected += err_nbr;
-			max_bitflips = max_t(int, max_bitflips, err_nbr);
-		}
-		pmecc_stat >>= 1;
-	}
-
-	return max_bitflips;
-}
-
-static void pmecc_enable(struct atmel_nand_host *host, int ecc_op)
-{
-	u32 val;
-
-	if (ecc_op != NAND_ECC_READ && ecc_op != NAND_ECC_WRITE) {
-		dev_err(host->dev, "atmel_nand: wrong pmecc operation type!");
-		return;
-	}
-
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_RST);
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DISABLE);
-	val = pmecc_readl_relaxed(host->ecc, CFG);
-
-	if (ecc_op == NAND_ECC_READ)
-		pmecc_writel(host->ecc, CFG, (val & ~PMECC_CFG_WRITE_OP)
-			| PMECC_CFG_AUTO_ENABLE);
-	else
-		pmecc_writel(host->ecc, CFG, (val | PMECC_CFG_WRITE_OP)
-			& ~PMECC_CFG_AUTO_ENABLE);
-
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_ENABLE);
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DATA);
-}
-
-static int atmel_nand_pmecc_read_page(struct mtd_info *mtd,
-	struct nand_chip *chip, uint8_t *buf, int oob_required, int page)
-{
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	int eccsize = chip->ecc.size * chip->ecc.steps;
-	uint8_t *oob = chip->oob_poi;
-	uint32_t stat;
-	unsigned long end_time;
-	int bitflips = 0;
-
-	if (!host->nfc || !host->nfc->use_nfc_sram)
-		pmecc_enable(host, NAND_ECC_READ);
-
-	chip->read_buf(mtd, buf, eccsize);
-	chip->read_buf(mtd, oob, mtd->oobsize);
-
-	end_time = jiffies + msecs_to_jiffies(PMECC_MAX_TIMEOUT_MS);
-	while ((pmecc_readl_relaxed(host->ecc, SR) & PMECC_SR_BUSY)) {
-		if (unlikely(time_after(jiffies, end_time))) {
-			dev_err(host->dev, "PMECC: Timeout to get error status.\n");
-			return -EIO;
-		}
-		cpu_relax();
-	}
-
-	stat = pmecc_readl_relaxed(host->ecc, ISR);
-	if (stat != 0) {
-		struct mtd_oob_region oobregion;
-
-		mtd_ooblayout_ecc(mtd, 0, &oobregion);
-		bitflips = pmecc_correction(mtd, stat, buf,
-					    &oob[oobregion.offset]);
-		if (bitflips < 0)
-			/* uncorrectable errors */
-			return 0;
-	}
-
-	return bitflips;
-}
-
-static int atmel_nand_pmecc_write_page(struct mtd_info *mtd,
-		struct nand_chip *chip, const uint8_t *buf, int oob_required,
-		int page)
-{
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	struct mtd_oob_region oobregion = { };
-	int i, j, section = 0;
-	unsigned long end_time;
-
-	if (!host->nfc || !host->nfc->write_by_sram) {
-		pmecc_enable(host, NAND_ECC_WRITE);
-		chip->write_buf(mtd, (u8 *)buf, mtd->writesize);
-	}
-
-	end_time = jiffies + msecs_to_jiffies(PMECC_MAX_TIMEOUT_MS);
-	while ((pmecc_readl_relaxed(host->ecc, SR) & PMECC_SR_BUSY)) {
-		if (unlikely(time_after(jiffies, end_time))) {
-			dev_err(host->dev, "PMECC: Timeout to get ECC value.\n");
-			return -EIO;
-		}
-		cpu_relax();
-	}
-
-	for (i = 0; i < chip->ecc.steps; i++) {
-		for (j = 0; j < chip->ecc.bytes; j++) {
-			if (!oobregion.length)
-				mtd_ooblayout_ecc(mtd, section, &oobregion);
-
-			chip->oob_poi[oobregion.offset] =
-				pmecc_readb_ecc_relaxed(host->ecc, i, j);
-			oobregion.length--;
-			oobregion.offset++;
-			section++;
-		}
-	}
-	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
-
-	return 0;
-}
-
-static void atmel_pmecc_core_init(struct mtd_info *mtd)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	int eccbytes = mtd_ooblayout_count_eccbytes(mtd);
-	uint32_t val = 0;
-	struct mtd_oob_region oobregion;
-
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_RST);
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DISABLE);
-
-	switch (host->pmecc_corr_cap) {
-	case 2:
-		val = PMECC_CFG_BCH_ERR2;
-		break;
-	case 4:
-		val = PMECC_CFG_BCH_ERR4;
-		break;
-	case 8:
-		val = PMECC_CFG_BCH_ERR8;
-		break;
-	case 12:
-		val = PMECC_CFG_BCH_ERR12;
-		break;
-	case 24:
-		val = PMECC_CFG_BCH_ERR24;
-		break;
-	case 32:
-		val = PMECC_CFG_BCH_ERR32;
-		break;
-	}
-
-	if (host->pmecc_sector_size == 512)
-		val |= PMECC_CFG_SECTOR512;
-	else if (host->pmecc_sector_size == 1024)
-		val |= PMECC_CFG_SECTOR1024;
-
-	switch (nand_chip->ecc.steps) {
-	case 1:
-		val |= PMECC_CFG_PAGE_1SECTOR;
-		break;
-	case 2:
-		val |= PMECC_CFG_PAGE_2SECTORS;
-		break;
-	case 4:
-		val |= PMECC_CFG_PAGE_4SECTORS;
-		break;
-	case 8:
-		val |= PMECC_CFG_PAGE_8SECTORS;
-		break;
-	}
-
-	val |= (PMECC_CFG_READ_OP | PMECC_CFG_SPARE_DISABLE
-		| PMECC_CFG_AUTO_DISABLE);
-	pmecc_writel(host->ecc, CFG, val);
-
-	pmecc_writel(host->ecc, SAREA, mtd->oobsize - 1);
-	mtd_ooblayout_ecc(mtd, 0, &oobregion);
-	pmecc_writel(host->ecc, SADDR, oobregion.offset);
-	pmecc_writel(host->ecc, EADDR,
-		     oobregion.offset + eccbytes - 1);
-	/* See datasheet about PMECC Clock Control Register */
-	pmecc_writel(host->ecc, CLK, 2);
-	pmecc_writel(host->ecc, IDR, 0xff);
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_ENABLE);
-}
-
-/*
- * Get minimum ecc requirements from NAND.
- * If pmecc-cap, pmecc-sector-size in DTS are not specified, this function
- * will set them according to minimum ecc requirement. Otherwise, use the
- * value in DTS file.
- * return 0 if success. otherwise return error code.
- */
-static int pmecc_choose_ecc(struct atmel_nand_host *host,
-		int *cap, int *sector_size)
-{
-	/* Get minimum ECC requirements */
-	if (host->nand_chip.ecc_strength_ds) {
-		*cap = host->nand_chip.ecc_strength_ds;
-		*sector_size = host->nand_chip.ecc_step_ds;
-		dev_info(host->dev, "minimum ECC: %d bits in %d bytes\n",
-				*cap, *sector_size);
-	} else {
-		*cap = 2;
-		*sector_size = 512;
-		dev_info(host->dev, "can't detect min. ECC, assume 2 bits in 512 bytes\n");
-	}
-
-	/* If device tree doesn't specify, use NAND's minimum ECC parameters */
-	if (host->pmecc_corr_cap == 0) {
-		if (*cap > host->caps->pmecc_max_correction)
-			return -EINVAL;
-
-		/* use the most fitable ecc bits (the near bigger one ) */
-		if (*cap <= 2)
-			host->pmecc_corr_cap = 2;
-		else if (*cap <= 4)
-			host->pmecc_corr_cap = 4;
-		else if (*cap <= 8)
-			host->pmecc_corr_cap = 8;
-		else if (*cap <= 12)
-			host->pmecc_corr_cap = 12;
-		else if (*cap <= 24)
-			host->pmecc_corr_cap = 24;
-		else if (*cap <= 32)
-			host->pmecc_corr_cap = 32;
-		else
-			return -EINVAL;
-	}
-	if (host->pmecc_sector_size == 0) {
-		/* use the most fitable sector size (the near smaller one ) */
-		if (*sector_size >= 1024)
-			host->pmecc_sector_size = 1024;
-		else if (*sector_size >= 512)
-			host->pmecc_sector_size = 512;
-		else
-			return -EINVAL;
-	}
-	return 0;
-}
-
-static inline int deg(unsigned int poly)
-{
-	/* polynomial degree is the most-significant bit index */
-	return fls(poly) - 1;
-}
-
-static int build_gf_tables(int mm, unsigned int poly,
-		int16_t *index_of, int16_t *alpha_to)
-{
-	unsigned int i, x = 1;
-	const unsigned int k = 1 << deg(poly);
-	unsigned int nn = (1 << mm) - 1;
-
-	/* primitive polynomial must be of degree m */
-	if (k != (1u << mm))
-		return -EINVAL;
-
-	for (i = 0; i < nn; i++) {
-		alpha_to[i] = x;
-		index_of[x] = i;
-		if (i && (x == 1))
-			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
-			return -EINVAL;
-		x <<= 1;
-		if (x & k)
-			x ^= poly;
-	}
-	alpha_to[nn] = 1;
-	index_of[0] = 0;
-
-	return 0;
-}
-
-static uint16_t *create_lookup_table(struct device *dev, int sector_size)
-{
-	int degree = (sector_size == 512) ?
-			PMECC_GF_DIMENSION_13 :
-			PMECC_GF_DIMENSION_14;
-	unsigned int poly = (sector_size == 512) ?
-			PMECC_GF_13_PRIMITIVE_POLY :
-			PMECC_GF_14_PRIMITIVE_POLY;
-	int table_size = (sector_size == 512) ?
-			PMECC_LOOKUP_TABLE_SIZE_512 :
-			PMECC_LOOKUP_TABLE_SIZE_1024;
-
-	int16_t *addr = devm_kzalloc(dev, 2 * table_size * sizeof(uint16_t),
-			GFP_KERNEL);
-	if (addr && build_gf_tables(degree, poly, addr, addr + table_size))
-		return NULL;
-
-	return addr;
-}
-
-static int atmel_pmecc_nand_init_params(struct platform_device *pdev,
-					 struct atmel_nand_host *host)
-{
-	struct nand_chip *nand_chip = &host->nand_chip;
-	struct mtd_info *mtd = nand_to_mtd(nand_chip);
-	struct resource *regs, *regs_pmerr, *regs_rom;
-	uint16_t *galois_table;
-	int cap, sector_size, err_no;
-
-	err_no = pmecc_choose_ecc(host, &cap, &sector_size);
-	if (err_no) {
-		dev_err(host->dev, "The NAND flash's ECC requirement are not support!");
-		return err_no;
-	}
-
-	if (cap > host->pmecc_corr_cap ||
-			sector_size != host->pmecc_sector_size)
-		dev_info(host->dev, "WARNING: Be Caution! Using different PMECC parameters from Nand ONFI ECC reqirement.\n");
-
-	cap = host->pmecc_corr_cap;
-	sector_size = host->pmecc_sector_size;
-	host->pmecc_lookup_table_offset = (sector_size == 512) ?
-			host->pmecc_lookup_table_offset_512 :
-			host->pmecc_lookup_table_offset_1024;
-
-	dev_info(host->dev, "Initialize PMECC params, cap: %d, sector: %d\n",
-		 cap, sector_size);
-
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	if (!regs) {
-		dev_warn(host->dev,
-			"Can't get I/O resource regs for PMECC controller, rolling back on software ECC\n");
-		nand_chip->ecc.mode = NAND_ECC_SOFT;
-		nand_chip->ecc.algo = NAND_ECC_HAMMING;
-		return 0;
-	}
-
-	host->ecc = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(host->ecc)) {
-		err_no = PTR_ERR(host->ecc);
-		goto err;
-	}
-
-	regs_pmerr = platform_get_resource(pdev, IORESOURCE_MEM, 2);
-	host->pmerrloc_base = devm_ioremap_resource(&pdev->dev, regs_pmerr);
-	if (IS_ERR(host->pmerrloc_base)) {
-		err_no = PTR_ERR(host->pmerrloc_base);
-		goto err;
-	}
-	host->pmerrloc_el_base = host->pmerrloc_base + ATMEL_PMERRLOC_SIGMAx +
-		(host->caps->pmecc_max_correction + 1) * 4;
-
-	if (!host->has_no_lookup_table) {
-		regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3);
-		host->pmecc_rom_base = devm_ioremap_resource(&pdev->dev,
-								regs_rom);
-		if (IS_ERR(host->pmecc_rom_base)) {
-			dev_err(host->dev, "Can not get I/O resource for ROM, will build a lookup table in runtime!\n");
-			host->has_no_lookup_table = true;
-		}
-	}
-
-	if (host->has_no_lookup_table) {
-		/* Build the look-up table in runtime */
-		galois_table = create_lookup_table(host->dev, sector_size);
-		if (!galois_table) {
-			dev_err(host->dev, "Failed to build a lookup table in runtime!\n");
-			err_no = -EINVAL;
-			goto err;
-		}
-
-		host->pmecc_rom_base = (void __iomem *)galois_table;
-		host->pmecc_lookup_table_offset = 0;
-	}
-
-	nand_chip->ecc.size = sector_size;
-
-	/* set ECC page size and oob layout */
-	switch (mtd->writesize) {
-	case 512:
-	case 1024:
-	case 2048:
-	case 4096:
-	case 8192:
-		if (sector_size > mtd->writesize) {
-			dev_err(host->dev, "pmecc sector size is bigger than the page size!\n");
-			err_no = -EINVAL;
-			goto err;
-		}
-
-		host->pmecc_degree = (sector_size == 512) ?
-			PMECC_GF_DIMENSION_13 : PMECC_GF_DIMENSION_14;
-		host->pmecc_cw_len = (1 << host->pmecc_degree) - 1;
-		host->pmecc_alpha_to = pmecc_get_alpha_to(host);
-		host->pmecc_index_of = host->pmecc_rom_base +
-			host->pmecc_lookup_table_offset;
-
-		nand_chip->ecc.strength = cap;
-		nand_chip->ecc.bytes = pmecc_get_ecc_bytes(cap, sector_size);
-		nand_chip->ecc.steps = mtd->writesize / sector_size;
-		nand_chip->ecc.total = nand_chip->ecc.bytes *
-			nand_chip->ecc.steps;
-		if (nand_chip->ecc.total >
-				mtd->oobsize - PMECC_OOB_RESERVED_BYTES) {
-			dev_err(host->dev, "No room for ECC bytes\n");
-			err_no = -EINVAL;
-			goto err;
-		}
-
-		mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
-		break;
-	default:
-		dev_warn(host->dev,
-			"Unsupported page size for PMECC, use Software ECC\n");
-		/* page size not handled by HW ECC */
-		/* switching back to soft ECC */
-		nand_chip->ecc.mode = NAND_ECC_SOFT;
-		nand_chip->ecc.algo = NAND_ECC_HAMMING;
-		return 0;
-	}
-
-	/* Allocate data for PMECC computation */
-	err_no = pmecc_data_alloc(host);
-	if (err_no) {
-		dev_err(host->dev,
-				"Cannot allocate memory for PMECC computation!\n");
-		goto err;
-	}
-
-	nand_chip->options |= NAND_NO_SUBPAGE_WRITE;
-	nand_chip->ecc.read_page = atmel_nand_pmecc_read_page;
-	nand_chip->ecc.write_page = atmel_nand_pmecc_write_page;
-
-	atmel_pmecc_core_init(mtd);
-
-	return 0;
-
-err:
-	return err_no;
-}
-
-/*
- * Calculate HW ECC
- *
- * function called after a write
- *
- * mtd:        MTD block structure
- * dat:        raw data (unused)
- * ecc_code:   buffer for ECC
- */
-static int atmel_nand_calculate(struct mtd_info *mtd,
-		const u_char *dat, unsigned char *ecc_code)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	unsigned int ecc_value;
-
-	/* get the first 2 ECC bytes */
-	ecc_value = ecc_readl(host->ecc, PR);
-
-	ecc_code[0] = ecc_value & 0xFF;
-	ecc_code[1] = (ecc_value >> 8) & 0xFF;
-
-	/* get the last 2 ECC bytes */
-	ecc_value = ecc_readl(host->ecc, NPR) & ATMEL_ECC_NPARITY;
-
-	ecc_code[2] = ecc_value & 0xFF;
-	ecc_code[3] = (ecc_value >> 8) & 0xFF;
-
-	return 0;
-}
-
-/*
- * HW ECC read page function
- *
- * mtd:        mtd info structure
- * chip:       nand chip info structure
- * buf:        buffer to store read data
- * oob_required:    caller expects OOB data read to chip->oob_poi
- */
-static int atmel_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int oob_required, int page)
-{
-	int eccsize = chip->ecc.size;
-	int eccbytes = chip->ecc.bytes;
-	uint8_t *p = buf;
-	uint8_t *oob = chip->oob_poi;
-	uint8_t *ecc_pos;
-	int stat;
-	unsigned int max_bitflips = 0;
-	struct mtd_oob_region oobregion = {};
-
-	/*
-	 * Errata: ALE is incorrectly wired up to the ECC controller
-	 * on the AP7000, so it will include the address cycles in the
-	 * ECC calculation.
-	 *
-	 * Workaround: Reset the parity registers before reading the
-	 * actual data.
-	 */
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	if (host->board.need_reset_workaround)
-		ecc_writel(host->ecc, CR, ATMEL_ECC_RST);
-
-	/* read the page */
-	chip->read_buf(mtd, p, eccsize);
-
-	/* move to ECC position if needed */
-	mtd_ooblayout_ecc(mtd, 0, &oobregion);
-	if (oobregion.offset != 0) {
-		/*
-		 * This only works on large pages because the ECC controller
-		 * waits for NAND_CMD_RNDOUTSTART after the NAND_CMD_RNDOUT.
-		 * Anyway, for small pages, the first ECC byte is at offset
-		 * 0 in the OOB area.
-		 */
-		chip->cmdfunc(mtd, NAND_CMD_RNDOUT,
-			      mtd->writesize + oobregion.offset, -1);
-	}
-
-	/* the ECC controller needs to read the ECC just after the data */
-	ecc_pos = oob + oobregion.offset;
-	chip->read_buf(mtd, ecc_pos, eccbytes);
-
-	/* check if there's an error */
-	stat = chip->ecc.correct(mtd, p, oob, NULL);
-
-	if (stat < 0) {
-		mtd->ecc_stats.failed++;
-	} else {
-		mtd->ecc_stats.corrected += stat;
-		max_bitflips = max_t(unsigned int, max_bitflips, stat);
-	}
-
-	/* get back to oob start (end of page) */
-	chip->cmdfunc(mtd, NAND_CMD_RNDOUT, mtd->writesize, -1);
-
-	/* read the oob */
-	chip->read_buf(mtd, oob, mtd->oobsize);
-
-	return max_bitflips;
-}
-
-/*
- * HW ECC Correction
- *
- * function called after a read
- *
- * mtd:        MTD block structure
- * dat:        raw data read from the chip
- * read_ecc:   ECC from the chip (unused)
- * isnull:     unused
- *
- * Detect and correct a 1 bit error for a page
- */
-static int atmel_nand_correct(struct mtd_info *mtd, u_char *dat,
-		u_char *read_ecc, u_char *isnull)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	unsigned int ecc_status;
-	unsigned int ecc_word, ecc_bit;
-
-	/* get the status from the Status Register */
-	ecc_status = ecc_readl(host->ecc, SR);
-
-	/* if there's no error */
-	if (likely(!(ecc_status & ATMEL_ECC_RECERR)))
-		return 0;
-
-	/* get error bit offset (4 bits) */
-	ecc_bit = ecc_readl(host->ecc, PR) & ATMEL_ECC_BITADDR;
-	/* get word address (12 bits) */
-	ecc_word = ecc_readl(host->ecc, PR) & ATMEL_ECC_WORDADDR;
-	ecc_word >>= 4;
-
-	/* if there are multiple errors */
-	if (ecc_status & ATMEL_ECC_MULERR) {
-		/* check if it is a freshly erased block
-		 * (filled with 0xff) */
-		if ((ecc_bit == ATMEL_ECC_BITADDR)
-				&& (ecc_word == (ATMEL_ECC_WORDADDR >> 4))) {
-			/* the block has just been erased, return OK */
-			return 0;
-		}
-		/* it doesn't seems to be a freshly
-		 * erased block.
-		 * We can't correct so many errors */
-		dev_dbg(host->dev, "atmel_nand : multiple errors detected."
-				" Unable to correct.\n");
-		return -EBADMSG;
-	}
-
-	/* if there's a single bit error : we can correct it */
-	if (ecc_status & ATMEL_ECC_ECCERR) {
-		/* there's nothing much to do here.
-		 * the bit error is on the ECC itself.
-		 */
-		dev_dbg(host->dev, "atmel_nand : one bit error on ECC code."
-				" Nothing to correct\n");
-		return 0;
-	}
-
-	dev_dbg(host->dev, "atmel_nand : one bit error on data."
-			" (word offset in the page :"
-			" 0x%x bit offset : 0x%x)\n",
-			ecc_word, ecc_bit);
-	/* correct the error */
-	if (nand_chip->options & NAND_BUSWIDTH_16) {
-		/* 16 bits words */
-		((unsigned short *) dat)[ecc_word] ^= (1 << ecc_bit);
-	} else {
-		/* 8 bits words */
-		dat[ecc_word] ^= (1 << ecc_bit);
-	}
-	dev_dbg(host->dev, "atmel_nand : error corrected\n");
-	return 1;
-}
-
-/*
- * Enable HW ECC : unused on most chips
- */
-static void atmel_nand_hwctl(struct mtd_info *mtd, int mode)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	if (host->board.need_reset_workaround)
-		ecc_writel(host->ecc, CR, ATMEL_ECC_RST);
-}
-
-static int atmel_of_init_ecc(struct atmel_nand_host *host,
-			     struct device_node *np)
-{
-	u32 offset[2];
-	u32 val;
-
-	host->has_pmecc = of_property_read_bool(np, "atmel,has-pmecc");
-
-	/* Not using PMECC */
-	if (!(host->nand_chip.ecc.mode == NAND_ECC_HW) || !host->has_pmecc)
-		return 0;
-
-	/* use PMECC, get correction capability, sector size and lookup
-	 * table offset.
-	 * If correction bits and sector size are not specified, then find
-	 * them from NAND ONFI parameters.
-	 */
-	if (of_property_read_u32(np, "atmel,pmecc-cap", &val) == 0) {
-		if (val > host->caps->pmecc_max_correction) {
-			dev_err(host->dev,
-				"Required ECC strength too high: %u max %u\n",
-				val, host->caps->pmecc_max_correction);
-			return -EINVAL;
-		}
-		if ((val != 2)  && (val != 4)  && (val != 8) &&
-		    (val != 12) && (val != 24) && (val != 32)) {
-			dev_err(host->dev,
-				"Required ECC strength not supported: %u\n",
-				val);
-			return -EINVAL;
-		}
-		host->pmecc_corr_cap = (u8)val;
-	}
-
-	if (of_property_read_u32(np, "atmel,pmecc-sector-size", &val) == 0) {
-		if ((val != 512) && (val != 1024)) {
-			dev_err(host->dev,
-				"Required ECC sector size not supported: %u\n",
-				val);
-			return -EINVAL;
-		}
-		host->pmecc_sector_size = (u16)val;
-	}
-
-	if (of_property_read_u32_array(np, "atmel,pmecc-lookup-table-offset",
-			offset, 2) != 0) {
-		dev_err(host->dev, "Cannot get PMECC lookup table offset, will build a lookup table in runtime.\n");
-		host->has_no_lookup_table = true;
-		/* Will build a lookup table and initialize the offset later */
-		return 0;
-	}
-
-	if (!offset[0] && !offset[1]) {
-		dev_err(host->dev, "Invalid PMECC lookup table offset\n");
-		return -EINVAL;
-	}
-
-	host->pmecc_lookup_table_offset_512 = offset[0];
-	host->pmecc_lookup_table_offset_1024 = offset[1];
-
-	return 0;
-}
-
-static int atmel_of_init_port(struct atmel_nand_host *host,
-			      struct device_node *np)
-{
-	u32 val;
-	struct atmel_nand_data *board = &host->board;
-	enum of_gpio_flags flags = 0;
-
-	host->caps = (struct atmel_nand_caps *)
-		of_device_get_match_data(host->dev);
-
-	if (of_property_read_u32(np, "atmel,nand-addr-offset", &val) == 0) {
-		if (val >= 32) {
-			dev_err(host->dev, "invalid addr-offset %u\n", val);
-			return -EINVAL;
-		}
-		board->ale = val;
-	}
-
-	if (of_property_read_u32(np, "atmel,nand-cmd-offset", &val) == 0) {
-		if (val >= 32) {
-			dev_err(host->dev, "invalid cmd-offset %u\n", val);
-			return -EINVAL;
-		}
-		board->cle = val;
-	}
-
-	board->has_dma = of_property_read_bool(np, "atmel,nand-has-dma");
-
-	board->rdy_pin = of_get_gpio_flags(np, 0, &flags);
-	board->rdy_pin_active_low = (flags == OF_GPIO_ACTIVE_LOW);
-
-	board->enable_pin = of_get_gpio(np, 1);
-	board->det_pin = of_get_gpio(np, 2);
-
-	/* load the nfc driver if there is */
-	of_platform_populate(np, NULL, NULL, host->dev);
-
-	/*
-	 * Initialize ECC mode to NAND_ECC_SOFT so that we have a correct value
-	 * even if the nand-ecc-mode property is not defined.
-	 */
-	host->nand_chip.ecc.mode = NAND_ECC_SOFT;
-	host->nand_chip.ecc.algo = NAND_ECC_HAMMING;
-
-	return 0;
-}
-
-static int atmel_hw_nand_init_params(struct platform_device *pdev,
-					 struct atmel_nand_host *host)
-{
-	struct nand_chip *nand_chip = &host->nand_chip;
-	struct mtd_info *mtd = nand_to_mtd(nand_chip);
-	struct resource		*regs;
-
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	if (!regs) {
-		dev_err(host->dev,
-			"Can't get I/O resource regs, use software ECC\n");
-		nand_chip->ecc.mode = NAND_ECC_SOFT;
-		nand_chip->ecc.algo = NAND_ECC_HAMMING;
-		return 0;
-	}
-
-	host->ecc = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(host->ecc))
-		return PTR_ERR(host->ecc);
-
-	/* ECC is calculated for the whole page (1 step) */
-	nand_chip->ecc.size = mtd->writesize;
-
-	/* set ECC page size and oob layout */
-	switch (mtd->writesize) {
-	case 512:
-		mtd_set_ooblayout(mtd, &atmel_ooblayout_sp_ops);
-		ecc_writel(host->ecc, MR, ATMEL_ECC_PAGESIZE_528);
-		break;
-	case 1024:
-		mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
-		ecc_writel(host->ecc, MR, ATMEL_ECC_PAGESIZE_1056);
-		break;
-	case 2048:
-		mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
-		ecc_writel(host->ecc, MR, ATMEL_ECC_PAGESIZE_2112);
-		break;
-	case 4096:
-		mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
-		ecc_writel(host->ecc, MR, ATMEL_ECC_PAGESIZE_4224);
-		break;
-	default:
-		/* page size not handled by HW ECC */
-		/* switching back to soft ECC */
-		nand_chip->ecc.mode = NAND_ECC_SOFT;
-		nand_chip->ecc.algo = NAND_ECC_HAMMING;
-		return 0;
-	}
-
-	/* set up for HW ECC */
-	nand_chip->ecc.calculate = atmel_nand_calculate;
-	nand_chip->ecc.correct = atmel_nand_correct;
-	nand_chip->ecc.hwctl = atmel_nand_hwctl;
-	nand_chip->ecc.read_page = atmel_nand_read_page;
-	nand_chip->ecc.bytes = 4;
-	nand_chip->ecc.strength = 1;
-
-	return 0;
-}
-
-static inline u32 nfc_read_status(struct atmel_nand_host *host)
-{
-	u32 err_flags = NFC_SR_DTOE | NFC_SR_UNDEF | NFC_SR_AWB | NFC_SR_ASE;
-	u32 nfc_status = nfc_readl(host->nfc->hsmc_regs, SR);
-
-	if (unlikely(nfc_status & err_flags)) {
-		if (nfc_status & NFC_SR_DTOE)
-			dev_err(host->dev, "NFC: Waiting Nand R/B Timeout Error\n");
-		else if (nfc_status & NFC_SR_UNDEF)
-			dev_err(host->dev, "NFC: Access Undefined Area Error\n");
-		else if (nfc_status & NFC_SR_AWB)
-			dev_err(host->dev, "NFC: Access memory While NFC is busy\n");
-		else if (nfc_status & NFC_SR_ASE)
-			dev_err(host->dev, "NFC: Access memory Size Error\n");
-	}
-
-	return nfc_status;
-}
-
-/* SMC interrupt service routine */
-static irqreturn_t hsmc_interrupt(int irq, void *dev_id)
-{
-	struct atmel_nand_host *host = dev_id;
-	u32 status, mask, pending;
-	irqreturn_t ret = IRQ_NONE;
-
-	status = nfc_read_status(host);
-	mask = nfc_readl(host->nfc->hsmc_regs, IMR);
-	pending = status & mask;
-
-	if (pending & NFC_SR_XFR_DONE) {
-		complete(&host->nfc->comp_xfer_done);
-		nfc_writel(host->nfc->hsmc_regs, IDR, NFC_SR_XFR_DONE);
-		ret = IRQ_HANDLED;
-	}
-	if (pending & NFC_SR_RB_EDGE) {
-		complete(&host->nfc->comp_ready);
-		nfc_writel(host->nfc->hsmc_regs, IDR, NFC_SR_RB_EDGE);
-		ret = IRQ_HANDLED;
-	}
-	if (pending & NFC_SR_CMD_DONE) {
-		complete(&host->nfc->comp_cmd_done);
-		nfc_writel(host->nfc->hsmc_regs, IDR, NFC_SR_CMD_DONE);
-		ret = IRQ_HANDLED;
-	}
-
-	return ret;
-}
-
-/* NFC(Nand Flash Controller) related functions */
-static void nfc_prepare_interrupt(struct atmel_nand_host *host, u32 flag)
-{
-	if (flag & NFC_SR_XFR_DONE)
-		init_completion(&host->nfc->comp_xfer_done);
-
-	if (flag & NFC_SR_RB_EDGE)
-		init_completion(&host->nfc->comp_ready);
-
-	if (flag & NFC_SR_CMD_DONE)
-		init_completion(&host->nfc->comp_cmd_done);
-
-	/* Enable interrupt that need to wait for */
-	nfc_writel(host->nfc->hsmc_regs, IER, flag);
-}
-
-static int nfc_wait_interrupt(struct atmel_nand_host *host, u32 flag)
-{
-	int i, index = 0;
-	struct completion *comp[3];	/* Support 3 interrupt completion */
-
-	if (flag & NFC_SR_XFR_DONE)
-		comp[index++] = &host->nfc->comp_xfer_done;
-
-	if (flag & NFC_SR_RB_EDGE)
-		comp[index++] = &host->nfc->comp_ready;
-
-	if (flag & NFC_SR_CMD_DONE)
-		comp[index++] = &host->nfc->comp_cmd_done;
-
-	if (index == 0) {
-		dev_err(host->dev, "Unknown interrupt flag: 0x%08x\n", flag);
-		return -EINVAL;
-	}
-
-	for (i = 0; i < index; i++) {
-		if (wait_for_completion_timeout(comp[i],
-				msecs_to_jiffies(NFC_TIME_OUT_MS)))
-			continue;	/* wait for next completion */
-		else
-			goto err_timeout;
-	}
-
-	return 0;
-
-err_timeout:
-	dev_err(host->dev, "Time out to wait for interrupt: 0x%08x\n", flag);
-	/* Disable the interrupt as it is not handled by interrupt handler */
-	nfc_writel(host->nfc->hsmc_regs, IDR, flag);
-	return -ETIMEDOUT;
-}
-
-static int nfc_send_command(struct atmel_nand_host *host,
-	unsigned int cmd, unsigned int addr, unsigned char cycle0)
-{
-	unsigned long timeout;
-	u32 flag = NFC_SR_CMD_DONE;
-	flag |= cmd & NFCADDR_CMD_DATAEN ? NFC_SR_XFR_DONE : 0;
-
-	dev_dbg(host->dev,
-		"nfc_cmd: 0x%08x, addr1234: 0x%08x, cycle0: 0x%02x\n",
-		cmd, addr, cycle0);
-
-	timeout = jiffies + msecs_to_jiffies(NFC_TIME_OUT_MS);
-	while (nfc_readl(host->nfc->hsmc_regs, SR) & NFC_SR_BUSY) {
-		if (time_after(jiffies, timeout)) {
-			dev_err(host->dev,
-				"Time out to wait for NFC ready!\n");
-			return -ETIMEDOUT;
-		}
-	}
-
-	nfc_prepare_interrupt(host, flag);
-	nfc_writel(host->nfc->hsmc_regs, CYCLE0, cycle0);
-	nfc_cmd_addr1234_writel(cmd, addr, host->nfc->base_cmd_regs);
-	return nfc_wait_interrupt(host, flag);
-}
-
-static int nfc_device_ready(struct mtd_info *mtd)
-{
-	u32 status, mask;
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	status = nfc_read_status(host);
-	mask = nfc_readl(host->nfc->hsmc_regs, IMR);
-
-	/* The mask should be 0. If not we may lost interrupts */
-	if (unlikely(mask & status))
-		dev_err(host->dev, "Lost the interrupt flags: 0x%08x\n",
-				mask & status);
-
-	return status & NFC_SR_RB_EDGE;
-}
-
-static void nfc_select_chip(struct mtd_info *mtd, int chip)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	if (chip == -1)
-		nfc_writel(host->nfc->hsmc_regs, CTRL, NFC_CTRL_DISABLE);
-	else
-		nfc_writel(host->nfc->hsmc_regs, CTRL, NFC_CTRL_ENABLE);
-}
-
-static int nfc_make_addr(struct mtd_info *mtd, int command, int column,
-		int page_addr, unsigned int *addr1234, unsigned int *cycle0)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
-	int acycle = 0;
-	unsigned char addr_bytes[8];
-	int index = 0, bit_shift;
-
-	BUG_ON(addr1234 == NULL || cycle0 == NULL);
-
-	*cycle0 = 0;
-	*addr1234 = 0;
-
-	if (column != -1) {
-		if (chip->options & NAND_BUSWIDTH_16 &&
-				!nand_opcode_8bits(command))
-			column >>= 1;
-		addr_bytes[acycle++] = column & 0xff;
-		if (mtd->writesize > 512)
-			addr_bytes[acycle++] = (column >> 8) & 0xff;
-	}
-
-	if (page_addr != -1) {
-		addr_bytes[acycle++] = page_addr & 0xff;
-		addr_bytes[acycle++] = (page_addr >> 8) & 0xff;
-		if (chip->chipsize > (128 << 20))
-			addr_bytes[acycle++] = (page_addr >> 16) & 0xff;
-	}
-
-	if (acycle > 4)
-		*cycle0 = addr_bytes[index++];
-
-	for (bit_shift = 0; index < acycle; bit_shift += 8)
-		*addr1234 += addr_bytes[index++] << bit_shift;
-
-	/* return acycle in cmd register */
-	return acycle << NFCADDR_CMD_ACYCLE_BIT_POS;
-}
-
-static void nfc_nand_command(struct mtd_info *mtd, unsigned int command,
-				int column, int page_addr)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	unsigned long timeout;
-	unsigned int nfc_addr_cmd = 0;
-
-	unsigned int cmd1 = command << NFCADDR_CMD_CMD1_BIT_POS;
-
-	/* Set default settings: no cmd2, no addr cycle. read from nand */
-	unsigned int cmd2 = 0;
-	unsigned int vcmd2 = 0;
-	int acycle = NFCADDR_CMD_ACYCLE_NONE;
-	int csid = NFCADDR_CMD_CSID_3;
-	int dataen = NFCADDR_CMD_DATADIS;
-	int nfcwr = NFCADDR_CMD_NFCRD;
-	unsigned int addr1234 = 0;
-	unsigned int cycle0 = 0;
-	bool do_addr = true;
-	host->nfc->data_in_sram = NULL;
-
-	dev_dbg(host->dev, "%s: cmd = 0x%02x, col = 0x%08x, page = 0x%08x\n",
-	     __func__, command, column, page_addr);
-
-	switch (command) {
-	case NAND_CMD_RESET:
-		nfc_addr_cmd = cmd1 | acycle | csid | dataen | nfcwr;
-		nfc_send_command(host, nfc_addr_cmd, addr1234, cycle0);
-		udelay(chip->chip_delay);
-
-		nfc_nand_command(mtd, NAND_CMD_STATUS, -1, -1);
-		timeout = jiffies + msecs_to_jiffies(NFC_TIME_OUT_MS);
-		while (!(chip->read_byte(mtd) & NAND_STATUS_READY)) {
-			if (time_after(jiffies, timeout)) {
-				dev_err(host->dev,
-					"Time out to wait status ready!\n");
-				break;
-			}
-		}
-		return;
-	case NAND_CMD_STATUS:
-		do_addr = false;
-		break;
-	case NAND_CMD_PARAM:
-	case NAND_CMD_READID:
-		do_addr = false;
-		acycle = NFCADDR_CMD_ACYCLE_1;
-		if (column != -1)
-			addr1234 = column;
-		break;
-	case NAND_CMD_RNDOUT:
-		cmd2 = NAND_CMD_RNDOUTSTART << NFCADDR_CMD_CMD2_BIT_POS;
-		vcmd2 = NFCADDR_CMD_VCMD2;
-		break;
-	case NAND_CMD_READ0:
-	case NAND_CMD_READOOB:
-		if (command == NAND_CMD_READOOB) {
-			column += mtd->writesize;
-			command = NAND_CMD_READ0; /* only READ0 is valid */
-			cmd1 = command << NFCADDR_CMD_CMD1_BIT_POS;
-		}
-		if (host->nfc->use_nfc_sram) {
-			/* Enable Data transfer to sram */
-			dataen = NFCADDR_CMD_DATAEN;
-
-			/* Need enable PMECC now, since NFC will transfer
-			 * data in bus after sending nfc read command.
-			 */
-			if (chip->ecc.mode == NAND_ECC_HW && host->has_pmecc)
-				pmecc_enable(host, NAND_ECC_READ);
-		}
-
-		cmd2 = NAND_CMD_READSTART << NFCADDR_CMD_CMD2_BIT_POS;
-		vcmd2 = NFCADDR_CMD_VCMD2;
-		break;
-	/* For prgramming command, the cmd need set to write enable */
-	case NAND_CMD_PAGEPROG:
-	case NAND_CMD_SEQIN:
-	case NAND_CMD_RNDIN:
-		nfcwr = NFCADDR_CMD_NFCWR;
-		if (host->nfc->will_write_sram && command == NAND_CMD_SEQIN)
-			dataen = NFCADDR_CMD_DATAEN;
-		break;
-	default:
-		break;
-	}
-
-	if (do_addr)
-		acycle = nfc_make_addr(mtd, command, column, page_addr,
-				&addr1234, &cycle0);
-
-	nfc_addr_cmd = cmd1 | cmd2 | vcmd2 | acycle | csid | dataen | nfcwr;
-	nfc_send_command(host, nfc_addr_cmd, addr1234, cycle0);
-
-	/*
-	 * Program and erase have their own busy handlers status, sequential
-	 * in, and deplete1 need no delay.
-	 */
-	switch (command) {
-	case NAND_CMD_CACHEDPROG:
-	case NAND_CMD_PAGEPROG:
-	case NAND_CMD_ERASE1:
-	case NAND_CMD_ERASE2:
-	case NAND_CMD_RNDIN:
-	case NAND_CMD_STATUS:
-	case NAND_CMD_RNDOUT:
-	case NAND_CMD_SEQIN:
-	case NAND_CMD_READID:
-		return;
-
-	case NAND_CMD_READ0:
-		if (dataen == NFCADDR_CMD_DATAEN) {
-			host->nfc->data_in_sram = host->nfc->sram_bank0 +
-				nfc_get_sram_off(host);
-			return;
-		}
-		/* fall through */
-	default:
-		nfc_prepare_interrupt(host, NFC_SR_RB_EDGE);
-		nfc_wait_interrupt(host, NFC_SR_RB_EDGE);
-	}
-}
-
-static int nfc_sram_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			uint32_t offset, int data_len, const uint8_t *buf,
-			int oob_required, int page, int cached, int raw)
-{
-	int cfg, len;
-	int status = 0;
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	void *sram = host->nfc->sram_bank0 + nfc_get_sram_off(host);
-
-	/* Subpage write is not supported */
-	if (offset || (data_len < mtd->writesize))
-		return -EINVAL;
-
-	len = mtd->writesize;
-	/* Copy page data to sram that will write to nand via NFC */
-	if (use_dma) {
-		if (atmel_nand_dma_op(mtd, (void *)buf, len, 0) != 0)
-			/* Fall back to use cpu copy */
-			memcpy(sram, buf, len);
-	} else {
-		memcpy(sram, buf, len);
-	}
-
-	cfg = nfc_readl(host->nfc->hsmc_regs, CFG);
-	if (unlikely(raw) && oob_required) {
-		memcpy(sram + len, chip->oob_poi, mtd->oobsize);
-		len += mtd->oobsize;
-		nfc_writel(host->nfc->hsmc_regs, CFG, cfg | NFC_CFG_WSPARE);
-	} else {
-		nfc_writel(host->nfc->hsmc_regs, CFG, cfg & ~NFC_CFG_WSPARE);
-	}
-
-	if (chip->ecc.mode == NAND_ECC_HW && host->has_pmecc)
-		/*
-		 * When use NFC sram, need set up PMECC before send
-		 * NAND_CMD_SEQIN command. Since when the nand command
-		 * is sent, nfc will do transfer from sram and nand.
-		 */
-		pmecc_enable(host, NAND_ECC_WRITE);
-
-	host->nfc->will_write_sram = true;
-	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
-	host->nfc->will_write_sram = false;
-
-	if (likely(!raw))
-		/* Need to write ecc into oob */
-		status = chip->ecc.write_page(mtd, chip, buf, oob_required,
-					      page);
-
-	if (status < 0)
-		return status;
-
-	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
-	status = chip->waitfunc(mtd, chip);
-
-	if ((status & NAND_STATUS_FAIL) && (chip->errstat))
-		status = chip->errstat(mtd, chip, FL_WRITING, status, page);
-
-	if (status & NAND_STATUS_FAIL)
-		return -EIO;
-
-	return 0;
-}
-
-static int nfc_sram_init(struct mtd_info *mtd)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	int res = 0;
-
-	/* Initialize the NFC CFG register */
-	unsigned int cfg_nfc = 0;
-
-	/* set page size and oob layout */
-	switch (mtd->writesize) {
-	case 512:
-		cfg_nfc = NFC_CFG_PAGESIZE_512;
-		break;
-	case 1024:
-		cfg_nfc = NFC_CFG_PAGESIZE_1024;
-		break;
-	case 2048:
-		cfg_nfc = NFC_CFG_PAGESIZE_2048;
-		break;
-	case 4096:
-		cfg_nfc = NFC_CFG_PAGESIZE_4096;
-		break;
-	case 8192:
-		cfg_nfc = NFC_CFG_PAGESIZE_8192;
-		break;
-	default:
-		dev_err(host->dev, "Unsupported page size for NFC.\n");
-		res = -ENXIO;
-		return res;
-	}
-
-	/* oob bytes size = (NFCSPARESIZE + 1) * 4
-	 * Max support spare size is 512 bytes. */
-	cfg_nfc |= (((mtd->oobsize / 4) - 1) << NFC_CFG_NFC_SPARESIZE_BIT_POS
-		& NFC_CFG_NFC_SPARESIZE);
-	/* default set a max timeout */
-	cfg_nfc |= NFC_CFG_RSPARE |
-			NFC_CFG_NFC_DTOCYC | NFC_CFG_NFC_DTOMUL;
-
-	nfc_writel(host->nfc->hsmc_regs, CFG, cfg_nfc);
-
-	host->nfc->will_write_sram = false;
-	nfc_set_sram_bank(host, 0);
-
-	/* Use Write page with NFC SRAM only for PMECC or ECC NONE. */
-	if (host->nfc->write_by_sram) {
-		if ((chip->ecc.mode == NAND_ECC_HW && host->has_pmecc) ||
-				chip->ecc.mode == NAND_ECC_NONE)
-			chip->write_page = nfc_sram_write_page;
-		else
-			host->nfc->write_by_sram = false;
-	}
-
-	dev_info(host->dev, "Using NFC Sram read %s\n",
-			host->nfc->write_by_sram ? "and write" : "");
-	return 0;
-}
-
-static struct platform_driver atmel_nand_nfc_driver;
-/*
- * Probe for the NAND device.
- */
-static int atmel_nand_probe(struct platform_device *pdev)
-{
-	struct atmel_nand_host *host;
-	struct mtd_info *mtd;
-	struct nand_chip *nand_chip;
-	struct resource *mem;
-	int res, irq;
-
-	/* Allocate memory for the device structure (and zero it) */
-	host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
-	if (!host)
-		return -ENOMEM;
-
-	res = platform_driver_register(&atmel_nand_nfc_driver);
-	if (res)
-		dev_err(&pdev->dev, "atmel_nand: can't register NFC driver\n");
-
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	host->io_base = devm_ioremap_resource(&pdev->dev, mem);
-	if (IS_ERR(host->io_base)) {
-		res = PTR_ERR(host->io_base);
-		goto err_nand_ioremap;
-	}
-	host->io_phys = (dma_addr_t)mem->start;
-
-	nand_chip = &host->nand_chip;
-	mtd = nand_to_mtd(nand_chip);
-	host->dev = &pdev->dev;
-	if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node) {
-		nand_set_flash_node(nand_chip, pdev->dev.of_node);
-		/* Only when CONFIG_OF is enabled of_node can be parsed */
-		res = atmel_of_init_port(host, pdev->dev.of_node);
-		if (res)
-			goto err_nand_ioremap;
-	} else {
-		memcpy(&host->board, dev_get_platdata(&pdev->dev),
-		       sizeof(struct atmel_nand_data));
-		nand_chip->ecc.mode = host->board.ecc_mode;
-
-		/*
-		 * When using software ECC every supported avr32 board means
-		 * Hamming algorithm. If that ever changes we'll need to add
-		 * ecc_algo field to the struct atmel_nand_data.
-		 */
-		if (nand_chip->ecc.mode == NAND_ECC_SOFT)
-			nand_chip->ecc.algo = NAND_ECC_HAMMING;
-
-		/* 16-bit bus width */
-		if (host->board.bus_width_16)
-			nand_chip->options |= NAND_BUSWIDTH_16;
-	}
-
-	 /* link the private data structures */
-	nand_set_controller_data(nand_chip, host);
-	mtd->dev.parent = &pdev->dev;
-
-	/* Set address of NAND IO lines */
-	nand_chip->IO_ADDR_R = host->io_base;
-	nand_chip->IO_ADDR_W = host->io_base;
-
-	if (nand_nfc.is_initialized) {
-		/* NFC driver is probed and initialized */
-		host->nfc = &nand_nfc;
-
-		nand_chip->select_chip = nfc_select_chip;
-		nand_chip->dev_ready = nfc_device_ready;
-		nand_chip->cmdfunc = nfc_nand_command;
-
-		/* Initialize the interrupt for NFC */
-		irq = platform_get_irq(pdev, 0);
-		if (irq < 0) {
-			dev_err(host->dev, "Cannot get HSMC irq!\n");
-			res = irq;
-			goto err_nand_ioremap;
-		}
-
-		res = devm_request_irq(&pdev->dev, irq, hsmc_interrupt,
-				0, "hsmc", host);
-		if (res) {
-			dev_err(&pdev->dev, "Unable to request HSMC irq %d\n",
-				irq);
-			goto err_nand_ioremap;
-		}
-	} else {
-		res = atmel_nand_set_enable_ready_pins(mtd);
-		if (res)
-			goto err_nand_ioremap;
-
-		nand_chip->cmd_ctrl = atmel_nand_cmd_ctrl;
-	}
-
-	nand_chip->chip_delay = 40;		/* 40us command delay time */
-
-
-	nand_chip->read_buf = atmel_read_buf;
-	nand_chip->write_buf = atmel_write_buf;
-
-	platform_set_drvdata(pdev, host);
-	atmel_nand_enable(host);
-
-	if (gpio_is_valid(host->board.det_pin)) {
-		res = devm_gpio_request(&pdev->dev,
-				host->board.det_pin, "nand_det");
-		if (res < 0) {
-			dev_err(&pdev->dev,
-				"can't request det gpio %d\n",
-				host->board.det_pin);
-			goto err_no_card;
-		}
-
-		res = gpio_direction_input(host->board.det_pin);
-		if (res < 0) {
-			dev_err(&pdev->dev,
-				"can't request input direction det gpio %d\n",
-				host->board.det_pin);
-			goto err_no_card;
-		}
-
-		if (gpio_get_value(host->board.det_pin)) {
-			dev_info(&pdev->dev, "No SmartMedia card inserted.\n");
-			res = -ENXIO;
-			goto err_no_card;
-		}
-	}
-
-	if (!host->board.has_dma)
-		use_dma = 0;
-
-	if (use_dma) {
-		dma_cap_mask_t mask;
-
-		dma_cap_zero(mask);
-		dma_cap_set(DMA_MEMCPY, mask);
-		host->dma_chan = dma_request_channel(mask, NULL, NULL);
-		if (!host->dma_chan) {
-			dev_err(host->dev, "Failed to request DMA channel\n");
-			use_dma = 0;
-		}
-	}
-	if (use_dma)
-		dev_info(host->dev, "Using %s for DMA transfers.\n",
-					dma_chan_name(host->dma_chan));
-	else
-		dev_info(host->dev, "No DMA support for NAND access.\n");
-
-	/* first scan to find the device and get the page size */
-	res = nand_scan_ident(mtd, 1, NULL);
-	if (res)
-		goto err_scan_ident;
-
-	if (host->board.on_flash_bbt || on_flash_bbt)
-		nand_chip->bbt_options |= NAND_BBT_USE_FLASH;
-
-	if (nand_chip->bbt_options & NAND_BBT_USE_FLASH)
-		dev_info(&pdev->dev, "Use On Flash BBT\n");
-
-	if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node) {
-		res = atmel_of_init_ecc(host, pdev->dev.of_node);
-		if (res)
-			goto err_hw_ecc;
-	}
-
-	if (nand_chip->ecc.mode == NAND_ECC_HW) {
-		if (host->has_pmecc)
-			res = atmel_pmecc_nand_init_params(pdev, host);
-		else
-			res = atmel_hw_nand_init_params(pdev, host);
-
-		if (res != 0)
-			goto err_hw_ecc;
-	}
-
-	/* initialize the nfc configuration register */
-	if (host->nfc && host->nfc->use_nfc_sram) {
-		res = nfc_sram_init(mtd);
-		if (res) {
-			host->nfc->use_nfc_sram = false;
-			dev_err(host->dev, "Disable use nfc sram for data transfer.\n");
-		}
-	}
-
-	/* second phase scan */
-	res = nand_scan_tail(mtd);
-	if (res)
-		goto err_scan_tail;
-
-	mtd->name = "atmel_nand";
-	res = mtd_device_register(mtd, host->board.parts,
-				  host->board.num_parts);
-	if (!res)
-		return res;
-
-err_scan_tail:
-	if (host->has_pmecc && host->nand_chip.ecc.mode == NAND_ECC_HW)
-		pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DISABLE);
-err_hw_ecc:
-err_scan_ident:
-err_no_card:
-	atmel_nand_disable(host);
-	if (host->dma_chan)
-		dma_release_channel(host->dma_chan);
-err_nand_ioremap:
-	return res;
-}
-
-/*
- * Remove a NAND device.
- */
-static int atmel_nand_remove(struct platform_device *pdev)
-{
-	struct atmel_nand_host *host = platform_get_drvdata(pdev);
-	struct mtd_info *mtd = nand_to_mtd(&host->nand_chip);
-
-	nand_release(mtd);
-
-	atmel_nand_disable(host);
-
-	if (host->has_pmecc && host->nand_chip.ecc.mode == NAND_ECC_HW) {
-		pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DISABLE);
-		pmerrloc_writel(host->pmerrloc_base, ELDIS,
-				PMERRLOC_DISABLE);
-	}
-
-	if (host->dma_chan)
-		dma_release_channel(host->dma_chan);
-
-	platform_driver_unregister(&atmel_nand_nfc_driver);
-
-	return 0;
-}
-
-/*
- * AT91RM9200 does not have PMECC or PMECC Errloc peripherals for
- * BCH ECC. Combined with the "atmel,has-pmecc", it is used to describe
- * devices from the SAM9 family that have those.
- */
-static const struct atmel_nand_caps at91rm9200_caps = {
-	.pmecc_correct_erase_page = false,
-	.pmecc_max_correction = 24,
-};
-
-static const struct atmel_nand_caps sama5d4_caps = {
-	.pmecc_correct_erase_page = true,
-	.pmecc_max_correction = 24,
-};
-
-/*
- * The PMECC Errloc controller starting in SAMA5D2 is not compatible,
- * as the increased correction strength requires more registers.
- */
-static const struct atmel_nand_caps sama5d2_caps = {
-	.pmecc_correct_erase_page = true,
-	.pmecc_max_correction = 32,
-};
-
-static const struct of_device_id atmel_nand_dt_ids[] = {
-	{ .compatible = "atmel,at91rm9200-nand", .data = &at91rm9200_caps },
-	{ .compatible = "atmel,sama5d4-nand", .data = &sama5d4_caps },
-	{ .compatible = "atmel,sama5d2-nand", .data = &sama5d2_caps },
-	{ /* sentinel */ }
-};
-
-MODULE_DEVICE_TABLE(of, atmel_nand_dt_ids);
-
-static int atmel_nand_nfc_probe(struct platform_device *pdev)
-{
-	struct atmel_nfc *nfc = &nand_nfc;
-	struct resource *nfc_cmd_regs, *nfc_hsmc_regs, *nfc_sram;
-	int ret;
-
-	nfc_cmd_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	nfc->base_cmd_regs = devm_ioremap_resource(&pdev->dev, nfc_cmd_regs);
-	if (IS_ERR(nfc->base_cmd_regs))
-		return PTR_ERR(nfc->base_cmd_regs);
-
-	nfc_hsmc_regs = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	nfc->hsmc_regs = devm_ioremap_resource(&pdev->dev, nfc_hsmc_regs);
-	if (IS_ERR(nfc->hsmc_regs))
-		return PTR_ERR(nfc->hsmc_regs);
-
-	nfc_sram = platform_get_resource(pdev, IORESOURCE_MEM, 2);
-	if (nfc_sram) {
-		nfc->sram_bank0 = (void * __force)
-				devm_ioremap_resource(&pdev->dev, nfc_sram);
-		if (IS_ERR(nfc->sram_bank0)) {
-			dev_warn(&pdev->dev, "Fail to ioremap the NFC sram with error: %ld. So disable NFC sram.\n",
-					PTR_ERR(nfc->sram_bank0));
-		} else {
-			nfc->use_nfc_sram = true;
-			nfc->sram_bank0_phys = (dma_addr_t)nfc_sram->start;
-
-			if (pdev->dev.of_node)
-				nfc->write_by_sram = of_property_read_bool(
-						pdev->dev.of_node,
-						"atmel,write-by-sram");
-		}
-	}
-
-	nfc_writel(nfc->hsmc_regs, IDR, 0xffffffff);
-	nfc_readl(nfc->hsmc_regs, SR);	/* clear the NFC_SR */
-
-	nfc->clk = devm_clk_get(&pdev->dev, NULL);
-	if (!IS_ERR(nfc->clk)) {
-		ret = clk_prepare_enable(nfc->clk);
-		if (ret)
-			return ret;
-	} else {
-		dev_warn(&pdev->dev, "NFC clock missing, update your Device Tree");
-	}
-
-	nfc->is_initialized = true;
-	dev_info(&pdev->dev, "NFC is probed.\n");
-
-	return 0;
-}
-
-static int atmel_nand_nfc_remove(struct platform_device *pdev)
-{
-	struct atmel_nfc *nfc = &nand_nfc;
-
-	if (!IS_ERR(nfc->clk))
-		clk_disable_unprepare(nfc->clk);
-
-	return 0;
-}
-
-static const struct of_device_id atmel_nand_nfc_match[] = {
-	{ .compatible = "atmel,sama5d3-nfc" },
-	{ /* sentinel */ }
-};
-MODULE_DEVICE_TABLE(of, atmel_nand_nfc_match);
-
-static struct platform_driver atmel_nand_nfc_driver = {
-	.driver = {
-		.name = "atmel_nand_nfc",
-		.of_match_table = of_match_ptr(atmel_nand_nfc_match),
-	},
-	.probe = atmel_nand_nfc_probe,
-	.remove = atmel_nand_nfc_remove,
-};
-
-static struct platform_driver atmel_nand_driver = {
-	.probe		= atmel_nand_probe,
-	.remove		= atmel_nand_remove,
-	.driver		= {
-		.name	= "atmel_nand",
-		.of_match_table	= of_match_ptr(atmel_nand_dt_ids),
-	},
-};
-
-module_platform_driver(atmel_nand_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rick Bronson");
-MODULE_DESCRIPTION("NAND/SmartMedia driver for AT91 / AVR32");
-MODULE_ALIAS("platform:atmel_nand");
diff --git a/drivers/mtd/nand/atmel_nand_ecc.h b/drivers/mtd/nand/atmel_nand_ecc.h
deleted file mode 100644
index 834d694487bd..000000000000
--- a/drivers/mtd/nand/atmel_nand_ecc.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Error Corrected Code Controller (ECC) - System peripherals regsters.
- * Based on AT91SAM9260 datasheet revision B.
- *
- * Copyright (C) 2007 Andrew Victor
- * Copyright (C) 2007 - 2012 Atmel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#ifndef ATMEL_NAND_ECC_H
-#define ATMEL_NAND_ECC_H
-
-#define ATMEL_ECC_CR		0x00			/* Control register */
-#define		ATMEL_ECC_RST		(1 << 0)		/* Reset parity */
-
-#define ATMEL_ECC_MR		0x04			/* Mode register */
-#define		ATMEL_ECC_PAGESIZE	(3 << 0)		/* Page Size */
-#define			ATMEL_ECC_PAGESIZE_528		(0)
-#define			ATMEL_ECC_PAGESIZE_1056		(1)
-#define			ATMEL_ECC_PAGESIZE_2112		(2)
-#define			ATMEL_ECC_PAGESIZE_4224		(3)
-
-#define ATMEL_ECC_SR		0x08			/* Status register */
-#define		ATMEL_ECC_RECERR		(1 << 0)		/* Recoverable Error */
-#define		ATMEL_ECC_ECCERR		(1 << 1)		/* ECC Single Bit Error */
-#define		ATMEL_ECC_MULERR		(1 << 2)		/* Multiple Errors */
-
-#define ATMEL_ECC_PR		0x0c			/* Parity register */
-#define		ATMEL_ECC_BITADDR	(0xf << 0)		/* Bit Error Address */
-#define		ATMEL_ECC_WORDADDR	(0xfff << 4)		/* Word Error Address */
-
-#define ATMEL_ECC_NPR		0x10			/* NParity register */
-#define		ATMEL_ECC_NPARITY	(0xffff << 0)		/* NParity */
-
-/* PMECC Register Definitions */
-#define ATMEL_PMECC_CFG			0x000	/* Configuration Register */
-#define		PMECC_CFG_BCH_ERR2		(0 << 0)
-#define		PMECC_CFG_BCH_ERR4		(1 << 0)
-#define		PMECC_CFG_BCH_ERR8		(2 << 0)
-#define		PMECC_CFG_BCH_ERR12		(3 << 0)
-#define		PMECC_CFG_BCH_ERR24		(4 << 0)
-#define		PMECC_CFG_BCH_ERR32		(5 << 0)
-
-#define		PMECC_CFG_SECTOR512		(0 << 4)
-#define		PMECC_CFG_SECTOR1024		(1 << 4)
-
-#define		PMECC_CFG_PAGE_1SECTOR		(0 << 8)
-#define		PMECC_CFG_PAGE_2SECTORS		(1 << 8)
-#define		PMECC_CFG_PAGE_4SECTORS		(2 << 8)
-#define		PMECC_CFG_PAGE_8SECTORS		(3 << 8)
-
-#define		PMECC_CFG_READ_OP		(0 << 12)
-#define		PMECC_CFG_WRITE_OP		(1 << 12)
-
-#define		PMECC_CFG_SPARE_ENABLE		(1 << 16)
-#define		PMECC_CFG_SPARE_DISABLE		(0 << 16)
-
-#define		PMECC_CFG_AUTO_ENABLE		(1 << 20)
-#define		PMECC_CFG_AUTO_DISABLE		(0 << 20)
-
-#define ATMEL_PMECC_SAREA		0x004	/* Spare area size */
-#define ATMEL_PMECC_SADDR		0x008	/* PMECC starting address */
-#define ATMEL_PMECC_EADDR		0x00c	/* PMECC ending address */
-#define ATMEL_PMECC_CLK			0x010	/* PMECC clock control */
-#define		PMECC_CLK_133MHZ		(2 << 0)
-
-#define ATMEL_PMECC_CTRL		0x014	/* PMECC control register */
-#define		PMECC_CTRL_RST			(1 << 0)
-#define		PMECC_CTRL_DATA			(1 << 1)
-#define		PMECC_CTRL_USER			(1 << 2)
-#define		PMECC_CTRL_ENABLE		(1 << 4)
-#define		PMECC_CTRL_DISABLE		(1 << 5)
-
-#define ATMEL_PMECC_SR			0x018	/* PMECC status register */
-#define		PMECC_SR_BUSY			(1 << 0)
-#define		PMECC_SR_ENABLE			(1 << 4)
-
-#define ATMEL_PMECC_IER			0x01c	/* PMECC interrupt enable */
-#define		PMECC_IER_ENABLE		(1 << 0)
-#define ATMEL_PMECC_IDR			0x020	/* PMECC interrupt disable */
-#define		PMECC_IER_DISABLE		(1 << 0)
-#define ATMEL_PMECC_IMR			0x024	/* PMECC interrupt mask */
-#define		PMECC_IER_MASK			(1 << 0)
-#define ATMEL_PMECC_ISR			0x028	/* PMECC interrupt status */
-#define ATMEL_PMECC_ECCx		0x040	/* PMECC ECC x */
-#define ATMEL_PMECC_REMx		0x240	/* PMECC REM x */
-
-/* PMERRLOC Register Definitions */
-#define ATMEL_PMERRLOC_ELCFG		0x000	/* Error location config */
-#define		PMERRLOC_ELCFG_SECTOR_512	(0 << 0)
-#define		PMERRLOC_ELCFG_SECTOR_1024	(1 << 0)
-#define		PMERRLOC_ELCFG_NUM_ERRORS(n)	((n) << 16)
-
-#define ATMEL_PMERRLOC_ELPRIM		0x004	/* Error location primitive */
-#define ATMEL_PMERRLOC_ELEN		0x008	/* Error location enable */
-#define ATMEL_PMERRLOC_ELDIS		0x00c	/* Error location disable */
-#define		PMERRLOC_DISABLE		(1 << 0)
-
-#define ATMEL_PMERRLOC_ELSR		0x010	/* Error location status */
-#define		PMERRLOC_ELSR_BUSY		(1 << 0)
-#define ATMEL_PMERRLOC_ELIER		0x014	/* Error location int enable */
-#define ATMEL_PMERRLOC_ELIDR		0x018	/* Error location int disable */
-#define ATMEL_PMERRLOC_ELIMR		0x01c	/* Error location int mask */
-#define ATMEL_PMERRLOC_ELISR		0x020	/* Error location int status */
-#define		PMERRLOC_ERR_NUM_MASK		(0x1f << 8)
-#define		PMERRLOC_CALC_DONE		(1 << 0)
-#define ATMEL_PMERRLOC_SIGMAx		0x028	/* Error location SIGMA x */
-
-/*
- * The ATMEL_PMERRLOC_ELx register location depends from the number of
- * bits corrected by the PMECC controller. Do not use it.
- */
-
-/* Register access macros for PMECC */
-#define pmecc_readl_relaxed(addr, reg) \
-	readl_relaxed((addr) + ATMEL_PMECC_##reg)
-
-#define pmecc_writel(addr, reg, value) \
-	writel((value), (addr) + ATMEL_PMECC_##reg)
-
-#define pmecc_readb_ecc_relaxed(addr, sector, n) \
-	readb_relaxed((addr) + ATMEL_PMECC_ECCx + ((sector) * 0x40) + (n))
-
-#define pmecc_readl_rem_relaxed(addr, sector, n) \
-	readl_relaxed((addr) + ATMEL_PMECC_REMx + ((sector) * 0x40) + ((n) * 4))
-
-#define pmerrloc_readl_relaxed(addr, reg) \
-	readl_relaxed((addr) + ATMEL_PMERRLOC_##reg)
-
-#define pmerrloc_writel(addr, reg, value) \
-	writel((value), (addr) + ATMEL_PMERRLOC_##reg)
-
-#define pmerrloc_writel_sigma_relaxed(addr, n, value) \
-	writel_relaxed((value), (addr) + ATMEL_PMERRLOC_SIGMAx + ((n) * 4))
-
-#define pmerrloc_readl_sigma_relaxed(addr, n) \
-	readl_relaxed((addr) + ATMEL_PMERRLOC_SIGMAx + ((n) * 4))
-
-#define pmerrloc_readl_el_relaxed(addr, n) \
-	readl_relaxed((addr) + ((n) * 4))
-
-/* Galois field dimension */
-#define PMECC_GF_DIMENSION_13			13
-#define PMECC_GF_DIMENSION_14			14
-
-/* Primitive Polynomial used by PMECC */
-#define PMECC_GF_13_PRIMITIVE_POLY		0x201b
-#define PMECC_GF_14_PRIMITIVE_POLY		0x4443
-
-#define PMECC_LOOKUP_TABLE_SIZE_512		0x2000
-#define PMECC_LOOKUP_TABLE_SIZE_1024		0x4000
-
-/* Time out value for reading PMECC status register */
-#define PMECC_MAX_TIMEOUT_MS			100
-
-/* Reserved bytes in oob area */
-#define PMECC_OOB_RESERVED_BYTES		2
-
-#endif
diff --git a/drivers/mtd/nand/atmel_nand_nfc.h b/drivers/mtd/nand/atmel_nand_nfc.h
deleted file mode 100644
index 4d5d26221a7e..000000000000
--- a/drivers/mtd/nand/atmel_nand_nfc.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Atmel Nand Flash Controller (NFC) - System peripherals regsters.
- * Based on SAMA5D3 datasheet.
- *
- * © Copyright 2013 Atmel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#ifndef ATMEL_NAND_NFC_H
-#define ATMEL_NAND_NFC_H
-
-/*
- * HSMC NFC registers
- */
-#define ATMEL_HSMC_NFC_CFG	0x00		/* NFC Configuration Register */
-#define		NFC_CFG_PAGESIZE	(7 << 0)
-#define			NFC_CFG_PAGESIZE_512	(0 << 0)
-#define			NFC_CFG_PAGESIZE_1024	(1 << 0)
-#define			NFC_CFG_PAGESIZE_2048	(2 << 0)
-#define			NFC_CFG_PAGESIZE_4096	(3 << 0)
-#define			NFC_CFG_PAGESIZE_8192	(4 << 0)
-#define		NFC_CFG_WSPARE		(1 << 8)
-#define		NFC_CFG_RSPARE		(1 << 9)
-#define		NFC_CFG_NFC_DTOCYC	(0xf << 16)
-#define		NFC_CFG_NFC_DTOMUL	(0x7 << 20)
-#define		NFC_CFG_NFC_SPARESIZE	(0x7f << 24)
-#define		NFC_CFG_NFC_SPARESIZE_BIT_POS	24
-
-#define ATMEL_HSMC_NFC_CTRL	0x04		/* NFC Control Register */
-#define		NFC_CTRL_ENABLE		(1 << 0)
-#define		NFC_CTRL_DISABLE	(1 << 1)
-
-#define ATMEL_HSMC_NFC_SR	0x08		/* NFC Status Register */
-#define		NFC_SR_BUSY		(1 << 8)
-#define		NFC_SR_XFR_DONE		(1 << 16)
-#define		NFC_SR_CMD_DONE		(1 << 17)
-#define		NFC_SR_DTOE		(1 << 20)
-#define		NFC_SR_UNDEF		(1 << 21)
-#define		NFC_SR_AWB		(1 << 22)
-#define		NFC_SR_ASE		(1 << 23)
-#define		NFC_SR_RB_EDGE		(1 << 24)
-
-#define ATMEL_HSMC_NFC_IER	0x0c
-#define ATMEL_HSMC_NFC_IDR	0x10
-#define ATMEL_HSMC_NFC_IMR	0x14
-#define ATMEL_HSMC_NFC_CYCLE0	0x18		/* NFC Address Cycle Zero */
-#define		ATMEL_HSMC_NFC_ADDR_CYCLE0	(0xff)
-
-#define ATMEL_HSMC_NFC_BANK	0x1c		/* NFC Bank Register */
-#define		ATMEL_HSMC_NFC_BANK0		(0 << 0)
-#define		ATMEL_HSMC_NFC_BANK1		(1 << 0)
-
-#define nfc_writel(addr, reg, value) \
-	writel((value), (addr) + ATMEL_HSMC_NFC_##reg)
-
-#define nfc_readl(addr, reg) \
-	readl_relaxed((addr) + ATMEL_HSMC_NFC_##reg)
-
-/*
- * NFC Address Command definitions
- */
-#define NFCADDR_CMD_CMD1	(0xff << 2)	/* Command for Cycle 1 */
-#define NFCADDR_CMD_CMD1_BIT_POS	2
-#define NFCADDR_CMD_CMD2	(0xff << 10)	/* Command for Cycle 2 */
-#define NFCADDR_CMD_CMD2_BIT_POS	10
-#define NFCADDR_CMD_VCMD2	(0x1 << 18)	/* Valid Cycle 2 Command */
-#define NFCADDR_CMD_ACYCLE	(0x7 << 19)	/* Number of Address required */
-#define		NFCADDR_CMD_ACYCLE_NONE		(0x0 << 19)
-#define		NFCADDR_CMD_ACYCLE_1		(0x1 << 19)
-#define		NFCADDR_CMD_ACYCLE_2		(0x2 << 19)
-#define		NFCADDR_CMD_ACYCLE_3		(0x3 << 19)
-#define		NFCADDR_CMD_ACYCLE_4		(0x4 << 19)
-#define		NFCADDR_CMD_ACYCLE_5		(0x5 << 19)
-#define NFCADDR_CMD_ACYCLE_BIT_POS	19
-#define NFCADDR_CMD_CSID	(0x7 << 22)	/* Chip Select Identifier */
-#define		NFCADDR_CMD_CSID_0		(0x0 << 22)
-#define		NFCADDR_CMD_CSID_1		(0x1 << 22)
-#define		NFCADDR_CMD_CSID_2		(0x2 << 22)
-#define		NFCADDR_CMD_CSID_3		(0x3 << 22)
-#define		NFCADDR_CMD_CSID_4		(0x4 << 22)
-#define		NFCADDR_CMD_CSID_5		(0x5 << 22)
-#define		NFCADDR_CMD_CSID_6		(0x6 << 22)
-#define		NFCADDR_CMD_CSID_7		(0x7 << 22)
-#define NFCADDR_CMD_DATAEN	(0x1 << 25)	/* Data Transfer Enable */
-#define NFCADDR_CMD_DATADIS	(0x0 << 25)	/* Data Transfer Disable */
-#define NFCADDR_CMD_NFCRD	(0x0 << 26)	/* NFC Read Enable */
-#define NFCADDR_CMD_NFCWR	(0x1 << 26)	/* NFC Write Enable */
-#define NFCADDR_CMD_NFCBUSY	(0x1 << 27)	/* NFC Busy */
-
-#define nfc_cmd_addr1234_writel(cmd, addr1234, nfc_base) \
-	writel((addr1234), (cmd) + nfc_base)
-
-#define nfc_cmd_readl(bitstatus, nfc_base) \
-	readl_relaxed((bitstatus) + nfc_base)
-
-#define NFC_TIME_OUT_MS		100
-#define	NFC_SRAM_BANK1_OFFSET	0x1200
-
-#endif
diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index 42ebd73f821d..7419c5ce63f8 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -101,6 +101,9 @@ struct brcm_nand_dma_desc {
 #define BRCMNAND_MIN_BLOCKSIZE	(8 * 1024)
 #define BRCMNAND_MIN_DEVSIZE	(4ULL * 1024 * 1024)
 
+#define NAND_CTRL_RDY			(INTFC_CTLR_READY | INTFC_FLASH_READY)
+#define NAND_POLL_STATUS_TIMEOUT_MS	100
+
 /* Controller feature flags */
 enum {
 	BRCMNAND_HAS_1K_SECTORS			= BIT(0),
@@ -765,6 +768,31 @@ enum {
 	CS_SELECT_AUTO_DEVICE_ID_CFG		= BIT(30),
 };
 
+static int bcmnand_ctrl_poll_status(struct brcmnand_controller *ctrl,
+				    u32 mask, u32 expected_val,
+				    unsigned long timeout_ms)
+{
+	unsigned long limit;
+	u32 val;
+
+	if (!timeout_ms)
+		timeout_ms = NAND_POLL_STATUS_TIMEOUT_MS;
+
+	limit = jiffies + msecs_to_jiffies(timeout_ms);
+	do {
+		val = brcmnand_read_reg(ctrl, BRCMNAND_INTFC_STATUS);
+		if ((val & mask) == expected_val)
+			return 0;
+
+		cpu_relax();
+	} while (time_after(limit, jiffies));
+
+	dev_warn(ctrl->dev, "timeout on status poll (expected %x got %x)\n",
+		 expected_val, val & mask);
+
+	return -ETIMEDOUT;
+}
+
 static inline void brcmnand_set_wp(struct brcmnand_controller *ctrl, bool en)
 {
 	u32 val = en ? CS_SELECT_NAND_WP : 0;
@@ -1024,12 +1052,39 @@ static void brcmnand_wp(struct mtd_info *mtd, int wp)
 
 	if ((ctrl->features & BRCMNAND_HAS_WP) && wp_on == 1) {
 		static int old_wp = -1;
+		int ret;
 
 		if (old_wp != wp) {
 			dev_dbg(ctrl->dev, "WP %s\n", wp ? "on" : "off");
 			old_wp = wp;
 		}
+
+		/*
+		 * make sure ctrl/flash ready before and after
+		 * changing state of #WP pin
+		 */
+		ret = bcmnand_ctrl_poll_status(ctrl, NAND_CTRL_RDY |
+					       NAND_STATUS_READY,
+					       NAND_CTRL_RDY |
+					       NAND_STATUS_READY, 0);
+		if (ret)
+			return;
+
 		brcmnand_set_wp(ctrl, wp);
+		chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1);
+		/* NAND_STATUS_WP 0x00 = protected, 0x80 = not protected */
+		ret = bcmnand_ctrl_poll_status(ctrl,
+					       NAND_CTRL_RDY |
+					       NAND_STATUS_READY |
+					       NAND_STATUS_WP,
+					       NAND_CTRL_RDY |
+					       NAND_STATUS_READY |
+					       (wp ? 0 : NAND_STATUS_WP), 0);
+
+		if (ret)
+			dev_err_ratelimited(&host->pdev->dev,
+					    "nand #WP expected %s\n",
+					    wp ? "on" : "off");
 	}
 }
 
@@ -1157,15 +1212,15 @@ static irqreturn_t brcmnand_dma_irq(int irq, void *data)
 static void brcmnand_send_cmd(struct brcmnand_host *host, int cmd)
 {
 	struct brcmnand_controller *ctrl = host->ctrl;
-	u32 intfc;
+	int ret;
 
 	dev_dbg(ctrl->dev, "send native cmd %d addr_lo 0x%x\n", cmd,
 		brcmnand_read_reg(ctrl, BRCMNAND_CMD_ADDRESS));
 	BUG_ON(ctrl->cmd_pending != 0);
 	ctrl->cmd_pending = cmd;
 
-	intfc = brcmnand_read_reg(ctrl, BRCMNAND_INTFC_STATUS);
-	WARN_ON(!(intfc & INTFC_CTLR_READY));
+	ret = bcmnand_ctrl_poll_status(ctrl, NAND_CTRL_RDY, NAND_CTRL_RDY, 0);
+	WARN_ON(ret);
 
 	mb(); /* flush previous writes */
 	brcmnand_write_reg(ctrl, BRCMNAND_CMD_START,
diff --git a/drivers/mtd/nand/cmx270_nand.c b/drivers/mtd/nand/cmx270_nand.c
index 226ac0bcafc6..949b9400dcb7 100644
--- a/drivers/mtd/nand/cmx270_nand.c
+++ b/drivers/mtd/nand/cmx270_nand.c
@@ -145,7 +145,7 @@ static int __init cmx270_init(void)
 
 	ret = gpio_request(GPIO_NAND_CS, "NAND CS");
 	if (ret) {
-		pr_warning("CM-X270: failed to request NAND CS gpio\n");
+		pr_warn("CM-X270: failed to request NAND CS gpio\n");
 		return ret;
 	}
 
@@ -153,7 +153,7 @@ static int __init cmx270_init(void)
 
 	ret = gpio_request(GPIO_NAND_RB, "NAND R/B");
 	if (ret) {
-		pr_warning("CM-X270: failed to request NAND R/B gpio\n");
+		pr_warn("CM-X270: failed to request NAND R/B gpio\n");
 		goto err_gpio_request;
 	}
 
diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index 27fa8b87cd5f..531c51991e57 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -581,6 +581,17 @@ static struct davinci_nand_pdata
 			"ti,davinci-nand-use-bbt"))
 			pdata->bbt_options = NAND_BBT_USE_FLASH;
 
+		/*
+		 * Since kernel v4.8, this driver has been fixed to enable
+		 * use of 4-bit hardware ECC with subpages and verified on
+		 * TI's keystone EVMs (K2L, K2HK and K2E).
+		 * However, in the interest of not breaking systems using
+		 * existing UBI partitions, sub-page writes are not being
+		 * (re)enabled. If you want to use subpage writes on Keystone
+		 * platforms (i.e. do not have any existing UBI partitions),
+		 * then use "ti,davinci-nand" as the compatible in your
+		 * device-tree file.
+		 */
 		if (of_device_is_compatible(pdev->dev.of_node,
 					    "ti,keystone-nand")) {
 			pdata->options |= NAND_NO_SUBPAGE_WRITE;
diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 73b9d4e2dca0..16634df2e39a 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -45,16 +45,16 @@ MODULE_PARM_DESC(onfi_timing_mode,
  * We define a macro here that combines all interrupts this driver uses into
  * a single constant value, for convenience.
  */
-#define DENALI_IRQ_ALL	(INTR_STATUS__DMA_CMD_COMP | \
-			INTR_STATUS__ECC_TRANSACTION_DONE | \
-			INTR_STATUS__ECC_ERR | \
-			INTR_STATUS__PROGRAM_FAIL | \
-			INTR_STATUS__LOAD_COMP | \
-			INTR_STATUS__PROGRAM_COMP | \
-			INTR_STATUS__TIME_OUT | \
-			INTR_STATUS__ERASE_FAIL | \
-			INTR_STATUS__RST_COMP | \
-			INTR_STATUS__ERASE_COMP)
+#define DENALI_IRQ_ALL	(INTR__DMA_CMD_COMP | \
+			INTR__ECC_TRANSACTION_DONE | \
+			INTR__ECC_ERR | \
+			INTR__PROGRAM_FAIL | \
+			INTR__LOAD_COMP | \
+			INTR__PROGRAM_COMP | \
+			INTR__TIME_OUT | \
+			INTR__ERASE_FAIL | \
+			INTR__RST_COMP | \
+			INTR__ERASE_COMP)
 
 /*
  * indicates whether or not the internal value for the flash bank is
@@ -62,8 +62,6 @@ MODULE_PARM_DESC(onfi_timing_mode,
  */
 #define CHIP_SELECT_INVALID	-1
 
-#define SUPPORT_8BITECC		1
-
 /*
  * This macro divides two integers and rounds fractional values up
  * to the nearest integer value.
@@ -86,16 +84,10 @@ static inline struct denali_nand_info *mtd_to_denali(struct mtd_info *mtd)
 #define SPARE_ACCESS		0x41
 #define MAIN_ACCESS		0x42
 #define MAIN_SPARE_ACCESS	0x43
-#define PIPELINE_ACCESS		0x2000
 
 #define DENALI_READ	0
 #define DENALI_WRITE	0x100
 
-/* types of device accesses. We can issue commands and get status */
-#define COMMAND_CYCLE	0
-#define ADDR_CYCLE	1
-#define STATUS_CYCLE	2
-
 /*
  * this is a helper macro that allows us to
  * format the bank into the proper bits for the controller
@@ -164,7 +156,7 @@ static void read_status(struct denali_nand_info *denali)
 static void reset_bank(struct denali_nand_info *denali)
 {
 	uint32_t irq_status;
-	uint32_t irq_mask = INTR_STATUS__RST_COMP | INTR_STATUS__TIME_OUT;
+	uint32_t irq_mask = INTR__RST_COMP | INTR__TIME_OUT;
 
 	clear_interrupts(denali);
 
@@ -172,7 +164,7 @@ static void reset_bank(struct denali_nand_info *denali)
 
 	irq_status = wait_for_irq(denali, irq_mask);
 
-	if (irq_status & INTR_STATUS__TIME_OUT)
+	if (irq_status & INTR__TIME_OUT)
 		dev_err(denali->dev, "reset bank failed.\n");
 }
 
@@ -182,22 +174,22 @@ static uint16_t denali_nand_reset(struct denali_nand_info *denali)
 	int i;
 
 	for (i = 0; i < denali->max_banks; i++)
-		iowrite32(INTR_STATUS__RST_COMP | INTR_STATUS__TIME_OUT,
+		iowrite32(INTR__RST_COMP | INTR__TIME_OUT,
 		denali->flash_reg + INTR_STATUS(i));
 
 	for (i = 0; i < denali->max_banks; i++) {
 		iowrite32(1 << i, denali->flash_reg + DEVICE_RESET);
 		while (!(ioread32(denali->flash_reg + INTR_STATUS(i)) &
-			(INTR_STATUS__RST_COMP | INTR_STATUS__TIME_OUT)))
+			(INTR__RST_COMP | INTR__TIME_OUT)))
 			cpu_relax();
 		if (ioread32(denali->flash_reg + INTR_STATUS(i)) &
-			INTR_STATUS__TIME_OUT)
+			INTR__TIME_OUT)
 			dev_dbg(denali->dev,
 			"NAND Reset operation timed out on bank %d\n", i);
 	}
 
 	for (i = 0; i < denali->max_banks; i++)
-		iowrite32(INTR_STATUS__RST_COMP | INTR_STATUS__TIME_OUT,
+		iowrite32(INTR__RST_COMP | INTR__TIME_OUT,
 			  denali->flash_reg + INTR_STATUS(i));
 
 	return PASS;
@@ -347,52 +339,25 @@ static void get_samsung_nand_para(struct denali_nand_info *denali,
 
 static void get_toshiba_nand_para(struct denali_nand_info *denali)
 {
-	uint32_t tmp;
-
 	/*
 	 * Workaround to fix a controller bug which reports a wrong
 	 * spare area size for some kind of Toshiba NAND device
 	 */
 	if ((ioread32(denali->flash_reg + DEVICE_MAIN_AREA_SIZE) == 4096) &&
-		(ioread32(denali->flash_reg + DEVICE_SPARE_AREA_SIZE) == 64)) {
+		(ioread32(denali->flash_reg + DEVICE_SPARE_AREA_SIZE) == 64))
 		iowrite32(216, denali->flash_reg + DEVICE_SPARE_AREA_SIZE);
-		tmp = ioread32(denali->flash_reg + DEVICES_CONNECTED) *
-			ioread32(denali->flash_reg + DEVICE_SPARE_AREA_SIZE);
-		iowrite32(tmp,
-				denali->flash_reg + LOGICAL_PAGE_SPARE_SIZE);
-#if SUPPORT_15BITECC
-		iowrite32(15, denali->flash_reg + ECC_CORRECTION);
-#elif SUPPORT_8BITECC
-		iowrite32(8, denali->flash_reg + ECC_CORRECTION);
-#endif
-	}
 }
 
 static void get_hynix_nand_para(struct denali_nand_info *denali,
 							uint8_t device_id)
 {
-	uint32_t main_size, spare_size;
-
 	switch (device_id) {
 	case 0xD5: /* Hynix H27UAG8T2A, H27UBG8U5A or H27UCG8VFA */
 	case 0xD7: /* Hynix H27UDG8VEM, H27UCG8UDM or H27UCG8V5A */
 		iowrite32(128, denali->flash_reg + PAGES_PER_BLOCK);
 		iowrite32(4096, denali->flash_reg + DEVICE_MAIN_AREA_SIZE);
 		iowrite32(224, denali->flash_reg + DEVICE_SPARE_AREA_SIZE);
-		main_size = 4096 *
-			ioread32(denali->flash_reg + DEVICES_CONNECTED);
-		spare_size = 224 *
-			ioread32(denali->flash_reg + DEVICES_CONNECTED);
-		iowrite32(main_size,
-				denali->flash_reg + LOGICAL_PAGE_DATA_SIZE);
-		iowrite32(spare_size,
-				denali->flash_reg + LOGICAL_PAGE_SPARE_SIZE);
 		iowrite32(0, denali->flash_reg + DEVICE_WIDTH);
-#if SUPPORT_15BITECC
-		iowrite32(15, denali->flash_reg + ECC_CORRECTION);
-#elif SUPPORT_8BITECC
-		iowrite32(8, denali->flash_reg + ECC_CORRECTION);
-#endif
 		break;
 	default:
 		dev_warn(denali->dev,
@@ -454,17 +419,12 @@ static void find_valid_banks(struct denali_nand_info *denali)
 static void detect_max_banks(struct denali_nand_info *denali)
 {
 	uint32_t features = ioread32(denali->flash_reg + FEATURES);
-	/*
-	 * Read the revision register, so we can calculate the max_banks
-	 * properly: the encoding changed from rev 5.0 to 5.1
-	 */
-	u32 revision = MAKE_COMPARABLE_REVISION(
-				ioread32(denali->flash_reg + REVISION));
 
-	if (revision < REVISION_5_1)
-		denali->max_banks = 2 << (features & FEATURES__N_BANKS);
-	else
-		denali->max_banks = 1 << (features & FEATURES__N_BANKS);
+	denali->max_banks = 1 << (features & FEATURES__N_BANKS);
+
+	/* the encoding changed from rev 5.0 to 5.1 */
+	if (denali->revision < 0x0501)
+		denali->max_banks <<= 1;
 }
 
 static uint16_t denali_nand_timing_set(struct denali_nand_info *denali)
@@ -653,7 +613,6 @@ static irqreturn_t denali_isr(int irq, void *dev_id)
 	spin_unlock(&denali->irq_lock);
 	return result;
 }
-#define BANK(x) ((x) << 24)
 
 static uint32_t wait_for_irq(struct denali_nand_info *denali, uint32_t irq_mask)
 {
@@ -718,15 +677,7 @@ static int denali_send_pipeline_cmd(struct denali_nand_info *denali,
 				    int access_type, int op)
 {
 	int status = PASS;
-	uint32_t page_count = 1;
-	uint32_t addr, cmd, irq_status, irq_mask;
-
-	if (op == DENALI_READ)
-		irq_mask = INTR_STATUS__LOAD_COMP;
-	else if (op == DENALI_WRITE)
-		irq_mask = 0;
-	else
-		BUG();
+	uint32_t addr, cmd;
 
 	setup_ecc_for_xfer(denali, ecc_en, transfer_spare);
 
@@ -749,35 +700,8 @@ static int denali_send_pipeline_cmd(struct denali_nand_info *denali,
 		cmd = MODE_10 | addr;
 		index_addr(denali, cmd, access_type);
 
-		/*
-		 * page 33 of the NAND controller spec indicates we should not
-		 * use the pipeline commands in Spare area only mode.
-		 * So we don't.
-		 */
-		if (access_type == SPARE_ACCESS) {
-			cmd = MODE_01 | addr;
-			iowrite32(cmd, denali->flash_mem);
-		} else {
-			index_addr(denali, cmd,
-					PIPELINE_ACCESS | op | page_count);
-
-			/*
-			 * wait for command to be accepted
-			 * can always use status0 bit as the
-			 * mask is identical for each bank.
-			 */
-			irq_status = wait_for_irq(denali, irq_mask);
-
-			if (irq_status == 0) {
-				dev_err(denali->dev,
-					"cmd, page, addr on timeout (0x%x, 0x%x, 0x%x)\n",
-					cmd, denali->page, addr);
-				status = FAIL;
-			} else {
-				cmd = MODE_01 | addr;
-				iowrite32(cmd, denali->flash_mem);
-			}
-		}
+		cmd = MODE_01 | addr;
+		iowrite32(cmd, denali->flash_mem);
 	}
 	return status;
 }
@@ -829,8 +753,7 @@ static int write_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 	uint32_t irq_status;
-	uint32_t irq_mask = INTR_STATUS__PROGRAM_COMP |
-						INTR_STATUS__PROGRAM_FAIL;
+	uint32_t irq_mask = INTR__PROGRAM_COMP | INTR__PROGRAM_FAIL;
 	int status = 0;
 
 	denali->page = page;
@@ -857,7 +780,7 @@ static int write_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	uint32_t irq_mask = INTR_STATUS__LOAD_COMP;
+	uint32_t irq_mask = INTR__LOAD_COMP;
 	uint32_t irq_status, addr, cmd;
 
 	denali->page = page;
@@ -890,98 +813,158 @@ static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 	}
 }
 
-/*
- * this function examines buffers to see if they contain data that
- * indicate that the buffer is part of an erased region of flash.
- */
-static bool is_erased(uint8_t *buf, int len)
+static int denali_check_erased_page(struct mtd_info *mtd,
+				    struct nand_chip *chip, uint8_t *buf,
+				    unsigned long uncor_ecc_flags,
+				    unsigned int max_bitflips)
 {
-	int i;
+	uint8_t *ecc_code = chip->buffers->ecccode;
+	int ecc_steps = chip->ecc.steps;
+	int ecc_size = chip->ecc.size;
+	int ecc_bytes = chip->ecc.bytes;
+	int i, ret, stat;
+
+	ret = mtd_ooblayout_get_eccbytes(mtd, ecc_code, chip->oob_poi, 0,
+					 chip->ecc.total);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ecc_steps; i++) {
+		if (!(uncor_ecc_flags & BIT(i)))
+			continue;
 
-	for (i = 0; i < len; i++)
-		if (buf[i] != 0xFF)
-			return false;
-	return true;
+		stat = nand_check_erased_ecc_chunk(buf, ecc_size,
+						  ecc_code, ecc_bytes,
+						  NULL, 0,
+						  chip->ecc.strength);
+		if (stat < 0) {
+			mtd->ecc_stats.failed++;
+		} else {
+			mtd->ecc_stats.corrected += stat;
+			max_bitflips = max_t(unsigned int, max_bitflips, stat);
+		}
+
+		buf += ecc_size;
+		ecc_code += ecc_bytes;
+	}
+
+	return max_bitflips;
+}
+
+static int denali_hw_ecc_fixup(struct mtd_info *mtd,
+			       struct denali_nand_info *denali,
+			       unsigned long *uncor_ecc_flags)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	int bank = denali->flash_bank;
+	uint32_t ecc_cor;
+	unsigned int max_bitflips;
+
+	ecc_cor = ioread32(denali->flash_reg + ECC_COR_INFO(bank));
+	ecc_cor >>= ECC_COR_INFO__SHIFT(bank);
+
+	if (ecc_cor & ECC_COR_INFO__UNCOR_ERR) {
+		/*
+		 * This flag is set when uncorrectable error occurs at least in
+		 * one ECC sector.  We can not know "how many sectors", or
+		 * "which sector(s)".  We need erase-page check for all sectors.
+		 */
+		*uncor_ecc_flags = GENMASK(chip->ecc.steps - 1, 0);
+		return 0;
+	}
+
+	max_bitflips = ecc_cor & ECC_COR_INFO__MAX_ERRORS;
+
+	/*
+	 * The register holds the maximum of per-sector corrected bitflips.
+	 * This is suitable for the return value of the ->read_page() callback.
+	 * Unfortunately, we can not know the total number of corrected bits in
+	 * the page.  Increase the stats by max_bitflips. (compromised solution)
+	 */
+	mtd->ecc_stats.corrected += max_bitflips;
+
+	return max_bitflips;
 }
+
 #define ECC_SECTOR_SIZE 512
 
 #define ECC_SECTOR(x)	(((x) & ECC_ERROR_ADDRESS__SECTOR_NR) >> 12)
 #define ECC_BYTE(x)	(((x) & ECC_ERROR_ADDRESS__OFFSET))
 #define ECC_CORRECTION_VALUE(x) ((x) & ERR_CORRECTION_INFO__BYTEMASK)
-#define ECC_ERROR_CORRECTABLE(x) (!((x) & ERR_CORRECTION_INFO__ERROR_TYPE))
+#define ECC_ERROR_UNCORRECTABLE(x) ((x) & ERR_CORRECTION_INFO__ERROR_TYPE)
 #define ECC_ERR_DEVICE(x)	(((x) & ERR_CORRECTION_INFO__DEVICE_NR) >> 8)
 #define ECC_LAST_ERR(x)		((x) & ERR_CORRECTION_INFO__LAST_ERR_INFO)
 
-static bool handle_ecc(struct denali_nand_info *denali, uint8_t *buf,
-		       uint32_t irq_status, unsigned int *max_bitflips)
+static int denali_sw_ecc_fixup(struct mtd_info *mtd,
+			       struct denali_nand_info *denali,
+			       unsigned long *uncor_ecc_flags, uint8_t *buf)
 {
-	bool check_erased_page = false;
 	unsigned int bitflips = 0;
+	unsigned int max_bitflips = 0;
+	uint32_t err_addr, err_cor_info;
+	unsigned int err_byte, err_sector, err_device;
+	uint8_t err_cor_value;
+	unsigned int prev_sector = 0;
 
-	if (irq_status & INTR_STATUS__ECC_ERR) {
-		/* read the ECC errors. we'll ignore them for now */
-		uint32_t err_address, err_correction_info, err_byte,
-			 err_sector, err_device, err_correction_value;
-		denali_set_intr_modes(denali, false);
-
-		do {
-			err_address = ioread32(denali->flash_reg +
-						ECC_ERROR_ADDRESS);
-			err_sector = ECC_SECTOR(err_address);
-			err_byte = ECC_BYTE(err_address);
-
-			err_correction_info = ioread32(denali->flash_reg +
-						ERR_CORRECTION_INFO);
-			err_correction_value =
-				ECC_CORRECTION_VALUE(err_correction_info);
-			err_device = ECC_ERR_DEVICE(err_correction_info);
-
-			if (ECC_ERROR_CORRECTABLE(err_correction_info)) {
-				/*
-				 * If err_byte is larger than ECC_SECTOR_SIZE,
-				 * means error happened in OOB, so we ignore
-				 * it. It's no need for us to correct it
-				 * err_device is represented the NAND error
-				 * bits are happened in if there are more
-				 * than one NAND connected.
-				 */
-				if (err_byte < ECC_SECTOR_SIZE) {
-					struct mtd_info *mtd =
-						nand_to_mtd(&denali->nand);
-					int offset;
-
-					offset = (err_sector *
-							ECC_SECTOR_SIZE +
-							err_byte) *
-							denali->devnum +
-							err_device;
-					/* correct the ECC error */
-					buf[offset] ^= err_correction_value;
-					mtd->ecc_stats.corrected++;
-					bitflips++;
-				}
-			} else {
-				/*
-				 * if the error is not correctable, need to
-				 * look at the page to see if it is an erased
-				 * page. if so, then it's not a real ECC error
-				 */
-				check_erased_page = true;
-			}
-		} while (!ECC_LAST_ERR(err_correction_info));
-		/*
-		 * Once handle all ecc errors, controller will triger
-		 * a ECC_TRANSACTION_DONE interrupt, so here just wait
-		 * for a while for this interrupt
-		 */
-		while (!(read_interrupt_status(denali) &
-				INTR_STATUS__ECC_TRANSACTION_DONE))
-			cpu_relax();
-		clear_interrupts(denali);
-		denali_set_intr_modes(denali, true);
-	}
-	*max_bitflips = bitflips;
-	return check_erased_page;
+	/* read the ECC errors. we'll ignore them for now */
+	denali_set_intr_modes(denali, false);
+
+	do {
+		err_addr = ioread32(denali->flash_reg + ECC_ERROR_ADDRESS);
+		err_sector = ECC_SECTOR(err_addr);
+		err_byte = ECC_BYTE(err_addr);
+
+		err_cor_info = ioread32(denali->flash_reg + ERR_CORRECTION_INFO);
+		err_cor_value = ECC_CORRECTION_VALUE(err_cor_info);
+		err_device = ECC_ERR_DEVICE(err_cor_info);
+
+		/* reset the bitflip counter when crossing ECC sector */
+		if (err_sector != prev_sector)
+			bitflips = 0;
+
+		if (ECC_ERROR_UNCORRECTABLE(err_cor_info)) {
+			/*
+			 * Check later if this is a real ECC error, or
+			 * an erased sector.
+			 */
+			*uncor_ecc_flags |= BIT(err_sector);
+		} else if (err_byte < ECC_SECTOR_SIZE) {
+			/*
+			 * If err_byte is larger than ECC_SECTOR_SIZE, means error
+			 * happened in OOB, so we ignore it. It's no need for
+			 * us to correct it err_device is represented the NAND
+			 * error bits are happened in if there are more than
+			 * one NAND connected.
+			 */
+			int offset;
+			unsigned int flips_in_byte;
+
+			offset = (err_sector * ECC_SECTOR_SIZE + err_byte) *
+						denali->devnum + err_device;
+
+			/* correct the ECC error */
+			flips_in_byte = hweight8(buf[offset] ^ err_cor_value);
+			buf[offset] ^= err_cor_value;
+			mtd->ecc_stats.corrected += flips_in_byte;
+			bitflips += flips_in_byte;
+
+			max_bitflips = max(max_bitflips, bitflips);
+		}
+
+		prev_sector = err_sector;
+	} while (!ECC_LAST_ERR(err_cor_info));
+
+	/*
+	 * Once handle all ecc errors, controller will trigger a
+	 * ECC_TRANSACTION_DONE interrupt, so here just wait for
+	 * a while for this interrupt
+	 */
+	while (!(read_interrupt_status(denali) & INTR__ECC_TRANSACTION_DONE))
+		cpu_relax();
+	clear_interrupts(denali);
+	denali_set_intr_modes(denali, true);
+
+	return max_bitflips;
 }
 
 /* programs the controller to either enable/disable DMA transfers */
@@ -991,8 +974,30 @@ static void denali_enable_dma(struct denali_nand_info *denali, bool en)
 	ioread32(denali->flash_reg + DMA_ENABLE);
 }
 
-/* setups the HW to perform the data DMA */
-static void denali_setup_dma(struct denali_nand_info *denali, int op)
+static void denali_setup_dma64(struct denali_nand_info *denali, int op)
+{
+	uint32_t mode;
+	const int page_count = 1;
+	uint64_t addr = denali->buf.dma_buf;
+
+	mode = MODE_10 | BANK(denali->flash_bank) | denali->page;
+
+	/* DMA is a three step process */
+
+	/*
+	 * 1. setup transfer type, interrupt when complete,
+	 *    burst len = 64 bytes, the number of pages
+	 */
+	index_addr(denali, mode, 0x01002000 | (64 << 16) | op | page_count);
+
+	/* 2. set memory low address */
+	index_addr(denali, mode, addr);
+
+	/* 3. set memory high address */
+	index_addr(denali, mode, addr >> 32);
+}
+
+static void denali_setup_dma32(struct denali_nand_info *denali, int op)
 {
 	uint32_t mode;
 	const int page_count = 1;
@@ -1015,6 +1020,14 @@ static void denali_setup_dma(struct denali_nand_info *denali, int op)
 	index_addr(denali, mode | 0x14000, 0x2400);
 }
 
+static void denali_setup_dma(struct denali_nand_info *denali, int op)
+{
+	if (denali->caps & DENALI_CAP_DMA_64BIT)
+		denali_setup_dma64(denali, op);
+	else
+		denali_setup_dma32(denali, op);
+}
+
 /*
  * writes a page. user specifies type, and this function handles the
  * configuration details.
@@ -1026,8 +1039,7 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	dma_addr_t addr = denali->buf.dma_buf;
 	size_t size = mtd->writesize + mtd->oobsize;
 	uint32_t irq_status;
-	uint32_t irq_mask = INTR_STATUS__DMA_CMD_COMP |
-						INTR_STATUS__PROGRAM_FAIL;
+	uint32_t irq_mask = INTR__DMA_CMD_COMP | INTR__PROGRAM_FAIL;
 
 	/*
 	 * if it is a raw xfer, we want to disable ecc and send the spare area.
@@ -1118,16 +1130,15 @@ static int denali_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
 static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 			    uint8_t *buf, int oob_required, int page)
 {
-	unsigned int max_bitflips;
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-
 	dma_addr_t addr = denali->buf.dma_buf;
 	size_t size = mtd->writesize + mtd->oobsize;
-
 	uint32_t irq_status;
-	uint32_t irq_mask = INTR_STATUS__ECC_TRANSACTION_DONE |
-			    INTR_STATUS__ECC_ERR;
-	bool check_erased_page = false;
+	uint32_t irq_mask = denali->caps & DENALI_CAP_HW_ECC_FIXUP ?
+				INTR__DMA_CMD_COMP | INTR__ECC_UNCOR_ERR :
+				INTR__ECC_TRANSACTION_DONE | INTR__ECC_ERR;
+	unsigned long uncor_ecc_flags = 0;
+	int stat = 0;
 
 	if (page != denali->page) {
 		dev_err(denali->dev,
@@ -1151,21 +1162,23 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 
 	memcpy(buf, denali->buf.buf, mtd->writesize);
 
-	check_erased_page = handle_ecc(denali, buf, irq_status, &max_bitflips);
+	if (denali->caps & DENALI_CAP_HW_ECC_FIXUP)
+		stat = denali_hw_ecc_fixup(mtd, denali, &uncor_ecc_flags);
+	else if (irq_status & INTR__ECC_ERR)
+		stat = denali_sw_ecc_fixup(mtd, denali, &uncor_ecc_flags, buf);
 	denali_enable_dma(denali, false);
 
-	if (check_erased_page) {
+	if (stat < 0)
+		return stat;
+
+	if (uncor_ecc_flags) {
 		read_oob_data(mtd, chip->oob_poi, denali->page);
 
-		/* check ECC failures that may have occurred on erased pages */
-		if (check_erased_page) {
-			if (!is_erased(buf, mtd->writesize))
-				mtd->ecc_stats.failed++;
-			if (!is_erased(buf, mtd->oobsize))
-				mtd->ecc_stats.failed++;
-		}
+		stat = denali_check_erased_page(mtd, chip, buf,
+						uncor_ecc_flags, stat);
 	}
-	return max_bitflips;
+
+	return stat;
 }
 
 static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
@@ -1174,7 +1187,7 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 	dma_addr_t addr = denali->buf.dma_buf;
 	size_t size = mtd->writesize + mtd->oobsize;
-	uint32_t irq_mask = INTR_STATUS__DMA_CMD_COMP;
+	uint32_t irq_mask = INTR__DMA_CMD_COMP;
 
 	if (page != denali->page) {
 		dev_err(denali->dev,
@@ -1247,10 +1260,9 @@ static int denali_erase(struct mtd_info *mtd, int page)
 	index_addr(denali, cmd, 0x1);
 
 	/* wait for erase to complete or failure to occur */
-	irq_status = wait_for_irq(denali, INTR_STATUS__ERASE_COMP |
-					INTR_STATUS__ERASE_FAIL);
+	irq_status = wait_for_irq(denali, INTR__ERASE_COMP | INTR__ERASE_FAIL);
 
-	return irq_status & INTR_STATUS__ERASE_FAIL ? NAND_STATUS_FAIL : PASS;
+	return irq_status & INTR__ERASE_FAIL ? NAND_STATUS_FAIL : PASS;
 }
 
 static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col,
@@ -1303,6 +1315,14 @@ static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col,
 static void denali_hw_init(struct denali_nand_info *denali)
 {
 	/*
+	 * The REVISION register may not be reliable.  Platforms are allowed to
+	 * override it.
+	 */
+	if (!denali->revision)
+		denali->revision =
+				swab16(ioread32(denali->flash_reg + REVISION));
+
+	/*
 	 * tell driver how many bit controller will skip before
 	 * writing ECC code in OOB, this register may be already
 	 * set by firmware. So we read this value out.
@@ -1413,9 +1433,61 @@ static void denali_drv_init(struct denali_nand_info *denali)
 	denali->irq_status = 0;
 }
 
+static int denali_multidev_fixup(struct denali_nand_info *denali)
+{
+	struct nand_chip *chip = &denali->nand;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	/*
+	 * Support for multi device:
+	 * When the IP configuration is x16 capable and two x8 chips are
+	 * connected in parallel, DEVICES_CONNECTED should be set to 2.
+	 * In this case, the core framework knows nothing about this fact,
+	 * so we should tell it the _logical_ pagesize and anything necessary.
+	 */
+	denali->devnum = ioread32(denali->flash_reg + DEVICES_CONNECTED);
+
+	/*
+	 * On some SoCs, DEVICES_CONNECTED is not auto-detected.
+	 * For those, DEVICES_CONNECTED is left to 0.  Set 1 if it is the case.
+	 */
+	if (denali->devnum == 0) {
+		denali->devnum = 1;
+		iowrite32(1, denali->flash_reg + DEVICES_CONNECTED);
+	}
+
+	if (denali->devnum == 1)
+		return 0;
+
+	if (denali->devnum != 2) {
+		dev_err(denali->dev, "unsupported number of devices %d\n",
+			denali->devnum);
+		return -EINVAL;
+	}
+
+	/* 2 chips in parallel */
+	mtd->size <<= 1;
+	mtd->erasesize <<= 1;
+	mtd->writesize <<= 1;
+	mtd->oobsize <<= 1;
+	chip->chipsize <<= 1;
+	chip->page_shift += 1;
+	chip->phys_erase_shift += 1;
+	chip->bbt_erase_shift += 1;
+	chip->chip_shift += 1;
+	chip->pagemask <<= 1;
+	chip->ecc.size <<= 1;
+	chip->ecc.bytes <<= 1;
+	chip->ecc.strength <<= 1;
+	denali->bbtskipbytes <<= 1;
+
+	return 0;
+}
+
 int denali_init(struct denali_nand_info *denali)
 {
-	struct mtd_info *mtd = nand_to_mtd(&denali->nand);
+	struct nand_chip *chip = &denali->nand;
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
 	if (denali->platform == INTEL_CE4100) {
@@ -1449,13 +1521,16 @@ int denali_init(struct denali_nand_info *denali)
 
 	/* now that our ISR is registered, we can enable interrupts */
 	denali_set_intr_modes(denali, true);
-	mtd->name = "denali-nand";
+	nand_set_flash_node(chip, denali->dev->of_node);
+	/* Fallback to the default name if DT did not give "label" property */
+	if (!mtd->name)
+		mtd->name = "denali-nand";
 
 	/* register the driver with the NAND core subsystem */
-	denali->nand.select_chip = denali_select_chip;
-	denali->nand.cmdfunc = denali_cmdfunc;
-	denali->nand.read_byte = denali_read_byte;
-	denali->nand.waitfunc = denali_waitfunc;
+	chip->select_chip = denali_select_chip;
+	chip->cmdfunc = denali_cmdfunc;
+	chip->read_byte = denali_read_byte;
+	chip->waitfunc = denali_waitfunc;
 
 	/*
 	 * scan for NAND devices attached to the controller
@@ -1476,8 +1551,9 @@ int denali_init(struct denali_nand_info *denali)
 		goto failed_req_irq;
 	}
 
-	/* Is 32-bit DMA supported? */
-	ret = dma_set_mask(denali->dev, DMA_BIT_MASK(32));
+	ret = dma_set_mask(denali->dev,
+			   DMA_BIT_MASK(denali->caps & DENALI_CAP_DMA_64BIT ?
+					64 : 32));
 	if (ret) {
 		dev_err(denali->dev, "No usable DMA configuration\n");
 		goto failed_req_irq;
@@ -1493,54 +1569,35 @@ int denali_init(struct denali_nand_info *denali)
 	}
 
 	/*
-	 * support for multi nand
-	 * MTD known nothing about multi nand, so we should tell it
-	 * the real pagesize and anything necessery
-	 */
-	denali->devnum = ioread32(denali->flash_reg + DEVICES_CONNECTED);
-	denali->nand.chipsize <<= denali->devnum - 1;
-	denali->nand.page_shift += denali->devnum - 1;
-	denali->nand.pagemask = (denali->nand.chipsize >>
-						denali->nand.page_shift) - 1;
-	denali->nand.bbt_erase_shift += denali->devnum - 1;
-	denali->nand.phys_erase_shift = denali->nand.bbt_erase_shift;
-	denali->nand.chip_shift += denali->devnum - 1;
-	mtd->writesize <<= denali->devnum - 1;
-	mtd->oobsize <<= denali->devnum - 1;
-	mtd->erasesize <<= denali->devnum - 1;
-	mtd->size = denali->nand.numchips * denali->nand.chipsize;
-	denali->bbtskipbytes *= denali->devnum;
-
-	/*
 	 * second stage of the NAND scan
 	 * this stage requires information regarding ECC and
 	 * bad block management.
 	 */
 
 	/* Bad block management */
-	denali->nand.bbt_td = &bbt_main_descr;
-	denali->nand.bbt_md = &bbt_mirror_descr;
+	chip->bbt_td = &bbt_main_descr;
+	chip->bbt_md = &bbt_mirror_descr;
 
 	/* skip the scan for now until we have OOB read and write support */
-	denali->nand.bbt_options |= NAND_BBT_USE_FLASH;
-	denali->nand.options |= NAND_SKIP_BBTSCAN;
-	denali->nand.ecc.mode = NAND_ECC_HW_SYNDROME;
+	chip->bbt_options |= NAND_BBT_USE_FLASH;
+	chip->options |= NAND_SKIP_BBTSCAN;
+	chip->ecc.mode = NAND_ECC_HW_SYNDROME;
 
 	/* no subpage writes on denali */
-	denali->nand.options |= NAND_NO_SUBPAGE_WRITE;
+	chip->options |= NAND_NO_SUBPAGE_WRITE;
 
 	/*
 	 * Denali Controller only support 15bit and 8bit ECC in MRST,
 	 * so just let controller do 15bit ECC for MLC and 8bit ECC for
 	 * SLC if possible.
 	 * */
-	if (!nand_is_slc(&denali->nand) &&
+	if (!nand_is_slc(chip) &&
 			(mtd->oobsize > (denali->bbtskipbytes +
 			ECC_15BITS * (mtd->writesize /
 			ECC_SECTOR_SIZE)))) {
 		/* if MLC OOB size is large enough, use 15bit ECC*/
-		denali->nand.ecc.strength = 15;
-		denali->nand.ecc.bytes = ECC_15BITS;
+		chip->ecc.strength = 15;
+		chip->ecc.bytes = ECC_15BITS;
 		iowrite32(15, denali->flash_reg + ECC_CORRECTION);
 	} else if (mtd->oobsize < (denali->bbtskipbytes +
 			ECC_8BITS * (mtd->writesize /
@@ -1548,24 +1605,26 @@ int denali_init(struct denali_nand_info *denali)
 		pr_err("Your NAND chip OOB is not large enough to contain 8bit ECC correction codes");
 		goto failed_req_irq;
 	} else {
-		denali->nand.ecc.strength = 8;
-		denali->nand.ecc.bytes = ECC_8BITS;
+		chip->ecc.strength = 8;
+		chip->ecc.bytes = ECC_8BITS;
 		iowrite32(8, denali->flash_reg + ECC_CORRECTION);
 	}
 
 	mtd_set_ooblayout(mtd, &denali_ooblayout_ops);
-	denali->nand.ecc.bytes *= denali->devnum;
-	denali->nand.ecc.strength *= denali->devnum;
 
 	/* override the default read operations */
-	denali->nand.ecc.size = ECC_SECTOR_SIZE * denali->devnum;
-	denali->nand.ecc.read_page = denali_read_page;
-	denali->nand.ecc.read_page_raw = denali_read_page_raw;
-	denali->nand.ecc.write_page = denali_write_page;
-	denali->nand.ecc.write_page_raw = denali_write_page_raw;
-	denali->nand.ecc.read_oob = denali_read_oob;
-	denali->nand.ecc.write_oob = denali_write_oob;
-	denali->nand.erase = denali_erase;
+	chip->ecc.size = ECC_SECTOR_SIZE;
+	chip->ecc.read_page = denali_read_page;
+	chip->ecc.read_page_raw = denali_read_page_raw;
+	chip->ecc.write_page = denali_write_page;
+	chip->ecc.write_page_raw = denali_write_page_raw;
+	chip->ecc.read_oob = denali_read_oob;
+	chip->ecc.write_oob = denali_write_oob;
+	chip->erase = denali_erase;
+
+	ret = denali_multidev_fixup(denali);
+	if (ret)
+		goto failed_req_irq;
 
 	ret = nand_scan_tail(mtd);
 	if (ret)
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index ea22191e8515..ec004850652a 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -20,6 +20,7 @@
 #ifndef __DENALI_H__
 #define __DENALI_H__
 
+#include <linux/bitops.h>
 #include <linux/mtd/nand.h>
 
 #define DEVICE_RESET				0x0
@@ -178,8 +179,6 @@
 
 #define REVISION				0x370
 #define     REVISION__VALUE				0xffff
-#define MAKE_COMPARABLE_REVISION(x)		swab16((x) & REVISION__VALUE)
-#define REVISION_5_1				0x00000501
 
 #define ONFI_DEVICE_FEATURES			0x380
 #define     ONFI_DEVICE_FEATURES__VALUE			0x003f
@@ -218,65 +217,29 @@
 
 #define INTR_STATUS(__bank)	(0x410 + ((__bank) * 0x50))
 #define INTR_EN(__bank)		(0x420 + ((__bank) * 0x50))
-
-#define     INTR_STATUS__ECC_TRANSACTION_DONE		0x0001
-#define     INTR_STATUS__ECC_ERR			0x0002
-#define     INTR_STATUS__DMA_CMD_COMP			0x0004
-#define     INTR_STATUS__TIME_OUT			0x0008
-#define     INTR_STATUS__PROGRAM_FAIL			0x0010
-#define     INTR_STATUS__ERASE_FAIL			0x0020
-#define     INTR_STATUS__LOAD_COMP			0x0040
-#define     INTR_STATUS__PROGRAM_COMP			0x0080
-#define     INTR_STATUS__ERASE_COMP			0x0100
-#define     INTR_STATUS__PIPE_CPYBCK_CMD_COMP		0x0200
-#define     INTR_STATUS__LOCKED_BLK			0x0400
-#define     INTR_STATUS__UNSUP_CMD			0x0800
-#define     INTR_STATUS__INT_ACT			0x1000
-#define     INTR_STATUS__RST_COMP			0x2000
-#define     INTR_STATUS__PIPE_CMD_ERR			0x4000
-#define     INTR_STATUS__PAGE_XFER_INC			0x8000
-
-#define     INTR_EN__ECC_TRANSACTION_DONE		0x0001
-#define     INTR_EN__ECC_ERR				0x0002
-#define     INTR_EN__DMA_CMD_COMP			0x0004
-#define     INTR_EN__TIME_OUT				0x0008
-#define     INTR_EN__PROGRAM_FAIL			0x0010
-#define     INTR_EN__ERASE_FAIL				0x0020
-#define     INTR_EN__LOAD_COMP				0x0040
-#define     INTR_EN__PROGRAM_COMP			0x0080
-#define     INTR_EN__ERASE_COMP				0x0100
-#define     INTR_EN__PIPE_CPYBCK_CMD_COMP		0x0200
-#define     INTR_EN__LOCKED_BLK				0x0400
-#define     INTR_EN__UNSUP_CMD				0x0800
-#define     INTR_EN__INT_ACT				0x1000
-#define     INTR_EN__RST_COMP				0x2000
-#define     INTR_EN__PIPE_CMD_ERR			0x4000
-#define     INTR_EN__PAGE_XFER_INC			0x8000
+/* bit[1:0] is used differently depending on IP version */
+#define     INTR__ECC_UNCOR_ERR				0x0001	/* new IP */
+#define     INTR__ECC_TRANSACTION_DONE			0x0001	/* old IP */
+#define     INTR__ECC_ERR				0x0002	/* old IP */
+#define     INTR__DMA_CMD_COMP				0x0004
+#define     INTR__TIME_OUT				0x0008
+#define     INTR__PROGRAM_FAIL				0x0010
+#define     INTR__ERASE_FAIL				0x0020
+#define     INTR__LOAD_COMP				0x0040
+#define     INTR__PROGRAM_COMP				0x0080
+#define     INTR__ERASE_COMP				0x0100
+#define     INTR__PIPE_CPYBCK_CMD_COMP			0x0200
+#define     INTR__LOCKED_BLK				0x0400
+#define     INTR__UNSUP_CMD				0x0800
+#define     INTR__INT_ACT				0x1000
+#define     INTR__RST_COMP				0x2000
+#define     INTR__PIPE_CMD_ERR				0x4000
+#define     INTR__PAGE_XFER_INC				0x8000
 
 #define PAGE_CNT(__bank)	(0x430 + ((__bank) * 0x50))
 #define ERR_PAGE_ADDR(__bank)	(0x440 + ((__bank) * 0x50))
 #define ERR_BLOCK_ADDR(__bank)	(0x450 + ((__bank) * 0x50))
 
-#define DATA_INTR				0x550
-#define     DATA_INTR__WRITE_SPACE_AV			0x0001
-#define     DATA_INTR__READ_DATA_AV			0x0002
-
-#define DATA_INTR_EN				0x560
-#define     DATA_INTR_EN__WRITE_SPACE_AV		0x0001
-#define     DATA_INTR_EN__READ_DATA_AV			0x0002
-
-#define GPREG_0					0x570
-#define     GPREG_0__VALUE				0xffff
-
-#define GPREG_1					0x580
-#define     GPREG_1__VALUE				0xffff
-
-#define GPREG_2					0x590
-#define     GPREG_2__VALUE				0xffff
-
-#define GPREG_3					0x5a0
-#define     GPREG_3__VALUE				0xffff
-
 #define ECC_THRESHOLD				0x600
 #define     ECC_THRESHOLD__VALUE			0x03ff
 
@@ -297,6 +260,11 @@
 #define     ERR_CORRECTION_INFO__ERROR_TYPE		0x4000
 #define     ERR_CORRECTION_INFO__LAST_ERR_INFO		0x8000
 
+#define ECC_COR_INFO(bank)			(0x650 + (bank) / 2 * 0x10)
+#define     ECC_COR_INFO__SHIFT(bank)			((bank) % 2 * 8)
+#define     ECC_COR_INFO__MAX_ERRORS			0x007f
+#define     ECC_COR_INFO__UNCOR_ERR			0x0080
+
 #define DMA_ENABLE				0x700
 #define     DMA_ENABLE__FLAG				0x0001
 
@@ -304,20 +272,13 @@
 #define     IGNORE_ECC_DONE__FLAG			0x0001
 
 #define DMA_INTR				0x720
+#define DMA_INTR_EN				0x730
 #define     DMA_INTR__TARGET_ERROR			0x0001
 #define     DMA_INTR__DESC_COMP_CHANNEL0		0x0002
 #define     DMA_INTR__DESC_COMP_CHANNEL1		0x0004
 #define     DMA_INTR__DESC_COMP_CHANNEL2		0x0008
 #define     DMA_INTR__DESC_COMP_CHANNEL3		0x0010
-#define     DMA_INTR__MEMCOPY_DESC_COMP		0x0020
-
-#define DMA_INTR_EN				0x730
-#define     DMA_INTR_EN__TARGET_ERROR			0x0001
-#define     DMA_INTR_EN__DESC_COMP_CHANNEL0		0x0002
-#define     DMA_INTR_EN__DESC_COMP_CHANNEL1		0x0004
-#define     DMA_INTR_EN__DESC_COMP_CHANNEL2		0x0008
-#define     DMA_INTR_EN__DESC_COMP_CHANNEL3		0x0010
-#define     DMA_INTR_EN__MEMCOPY_DESC_COMP		0x0020
+#define     DMA_INTR__MEMCOPY_DESC_COMP			0x0020
 
 #define TARGET_ERR_ADDR_LO			0x740
 #define     TARGET_ERR_ADDR_LO__VALUE			0xffff
@@ -331,69 +292,12 @@
 #define     CHNL_ACTIVE__CHANNEL2			0x0004
 #define     CHNL_ACTIVE__CHANNEL3			0x0008
 
-#define ACTIVE_SRC_ID				0x800
-#define     ACTIVE_SRC_ID__VALUE			0x00ff
-
-#define PTN_INTR					0x810
-#define     PTN_INTR__CONFIG_ERROR			0x0001
-#define     PTN_INTR__ACCESS_ERROR_BANK0		0x0002
-#define     PTN_INTR__ACCESS_ERROR_BANK1		0x0004
-#define     PTN_INTR__ACCESS_ERROR_BANK2		0x0008
-#define     PTN_INTR__ACCESS_ERROR_BANK3		0x0010
-#define     PTN_INTR__REG_ACCESS_ERROR			0x0020
-
-#define PTN_INTR_EN				0x820
-#define     PTN_INTR_EN__CONFIG_ERROR			0x0001
-#define     PTN_INTR_EN__ACCESS_ERROR_BANK0		0x0002
-#define     PTN_INTR_EN__ACCESS_ERROR_BANK1		0x0004
-#define     PTN_INTR_EN__ACCESS_ERROR_BANK2		0x0008
-#define     PTN_INTR_EN__ACCESS_ERROR_BANK3		0x0010
-#define     PTN_INTR_EN__REG_ACCESS_ERROR		0x0020
-
-#define PERM_SRC_ID(__bank)	(0x830 + ((__bank) * 0x40))
-#define     PERM_SRC_ID__SRCID				0x00ff
-#define     PERM_SRC_ID__DIRECT_ACCESS_ACTIVE		0x0800
-#define     PERM_SRC_ID__WRITE_ACTIVE			0x2000
-#define     PERM_SRC_ID__READ_ACTIVE			0x4000
-#define     PERM_SRC_ID__PARTITION_VALID		0x8000
-
-#define MIN_BLK_ADDR(__bank)	(0x840 + ((__bank) * 0x40))
-#define     MIN_BLK_ADDR__VALUE				0xffff
-
-#define MAX_BLK_ADDR(__bank)	(0x850 + ((__bank) * 0x40))
-#define     MAX_BLK_ADDR__VALUE				0xffff
-
-#define MIN_MAX_BANK(__bank)	(0x860 + ((__bank) * 0x40))
-#define     MIN_MAX_BANK__MIN_VALUE			0x0003
-#define     MIN_MAX_BANK__MAX_VALUE			0x000c
-
-
-/* ffsdefs.h */
-#define CLEAR 0                 /*use this to clear a field instead of "fail"*/
-#define SET   1                 /*use this to set a field instead of "pass"*/
 #define FAIL 1                  /*failed flag*/
 #define PASS 0                  /*success flag*/
-#define ERR -1                  /*error flag*/
-
-/* lld.h */
-#define GOOD_BLOCK 0
-#define DEFECTIVE_BLOCK 1
-#define READ_ERROR 2
 
 #define CLK_X  5
 #define CLK_MULTI 4
 
-/* KBV - Updated to LNW scratch register address */
-#define SCRATCH_REG_ADDR    CONFIG_MTD_NAND_DENALI_SCRATCH_REG_ADDR
-#define SCRATCH_REG_SIZE    64
-
-#define GLOB_HWCTL_DEFAULT_BLKS    2048
-
-#define SUPPORT_15BITECC        1
-#define SUPPORT_8BITECC         1
-
-#define CUSTOM_CONF_PARAMS      0
-
 #define ONFI_BLOOM_TIME         1
 #define MODE5_WORKAROUND        0
 
@@ -403,31 +307,6 @@
 #define MODE_10    0x08000000
 #define MODE_11    0x0C000000
 
-
-#define DATA_TRANSFER_MODE              0
-#define PROTECTION_PER_BLOCK            1
-#define LOAD_WAIT_COUNT                 2
-#define PROGRAM_WAIT_COUNT              3
-#define ERASE_WAIT_COUNT                4
-#define INT_MONITOR_CYCLE_COUNT         5
-#define READ_BUSY_PIN_ENABLED           6
-#define MULTIPLANE_OPERATION_SUPPORT    7
-#define PRE_FETCH_MODE                  8
-#define CE_DONT_CARE_SUPPORT            9
-#define COPYBACK_SUPPORT                10
-#define CACHE_WRITE_SUPPORT             11
-#define CACHE_READ_SUPPORT              12
-#define NUM_PAGES_IN_BLOCK              13
-#define ECC_ENABLE_SELECT               14
-#define WRITE_ENABLE_2_READ_ENABLE      15
-#define ADDRESS_2_DATA                  16
-#define READ_ENABLE_2_WRITE_ENABLE      17
-#define TWO_ROW_ADDRESS_CYCLES          18
-#define MULTIPLANE_ADDRESS_RESTRICT     19
-#define ACC_CLOCKS                      20
-#define READ_WRITE_ENABLE_LOW_COUNT     21
-#define READ_WRITE_ENABLE_HIGH_COUNT    22
-
 #define ECC_SECTOR_SIZE     512
 
 struct nand_buf {
@@ -449,23 +328,26 @@ struct denali_nand_info {
 	struct nand_buf buf;
 	struct device *dev;
 	int total_used_banks;
-	uint32_t block;  /* stored for future use */
-	uint16_t page;
-	void __iomem *flash_reg;  /* Mapped io reg base address */
-	void __iomem *flash_mem;  /* Mapped io reg base address */
+	int page;
+	void __iomem *flash_reg;	/* Register Interface */
+	void __iomem *flash_mem;	/* Host Data/Command Interface */
 
 	/* elements used by ISR */
 	struct completion complete;
 	spinlock_t irq_lock;
 	uint32_t irq_status;
-	int irq_debug_array[32];
 	int irq;
 
-	uint32_t devnum;	/* represent how many nands connected */
-	uint32_t bbtskipbytes;
-	uint32_t max_banks;
+	int devnum;	/* represent how many nands connected */
+	int bbtskipbytes;
+	int max_banks;
+	unsigned int revision;
+	unsigned int caps;
 };
 
+#define DENALI_CAP_HW_ECC_FIXUP			BIT(0)
+#define DENALI_CAP_DMA_64BIT			BIT(1)
+
 extern int denali_init(struct denali_nand_info *denali);
 extern void denali_remove(struct denali_nand_info *denali);
 
diff --git a/drivers/mtd/nand/denali_dt.c b/drivers/mtd/nand/denali_dt.c
index 5607fcd3b8ed..df9ef36cc2ce 100644
--- a/drivers/mtd/nand/denali_dt.c
+++ b/drivers/mtd/nand/denali_dt.c
@@ -29,64 +29,66 @@ struct denali_dt {
 	struct clk		*clk;
 };
 
-static const struct of_device_id denali_nand_dt_ids[] = {
-		{ .compatible = "denali,denali-nand-dt" },
-		{ /* sentinel */ }
-	};
+struct denali_dt_data {
+	unsigned int revision;
+	unsigned int caps;
+};
 
-MODULE_DEVICE_TABLE(of, denali_nand_dt_ids);
+static const struct denali_dt_data denali_socfpga_data = {
+	.caps = DENALI_CAP_HW_ECC_FIXUP,
+};
 
-static u64 denali_dma_mask;
+static const struct of_device_id denali_nand_dt_ids[] = {
+	{
+		.compatible = "altr,socfpga-denali-nand",
+		.data = &denali_socfpga_data,
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, denali_nand_dt_ids);
 
-static int denali_dt_probe(struct platform_device *ofdev)
+static int denali_dt_probe(struct platform_device *pdev)
 {
 	struct resource *denali_reg, *nand_data;
 	struct denali_dt *dt;
+	const struct denali_dt_data *data;
 	struct denali_nand_info *denali;
 	int ret;
-	const struct of_device_id *of_id;
 
-	of_id = of_match_device(denali_nand_dt_ids, &ofdev->dev);
-	if (of_id) {
-		ofdev->id_entry = of_id->data;
-	} else {
-		pr_err("Failed to find the right device id.\n");
-		return -ENOMEM;
-	}
-
-	dt = devm_kzalloc(&ofdev->dev, sizeof(*dt), GFP_KERNEL);
+	dt = devm_kzalloc(&pdev->dev, sizeof(*dt), GFP_KERNEL);
 	if (!dt)
 		return -ENOMEM;
 	denali = &dt->denali;
 
+	data = of_device_get_match_data(&pdev->dev);
+	if (data) {
+		denali->revision = data->revision;
+		denali->caps = data->caps;
+	}
+
 	denali->platform = DT;
-	denali->dev = &ofdev->dev;
-	denali->irq = platform_get_irq(ofdev, 0);
+	denali->dev = &pdev->dev;
+	denali->irq = platform_get_irq(pdev, 0);
 	if (denali->irq < 0) {
-		dev_err(&ofdev->dev, "no irq defined\n");
+		dev_err(&pdev->dev, "no irq defined\n");
 		return denali->irq;
 	}
 
-	denali_reg = platform_get_resource_byname(ofdev, IORESOURCE_MEM, "denali_reg");
-	denali->flash_reg = devm_ioremap_resource(&ofdev->dev, denali_reg);
+	denali_reg = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						  "denali_reg");
+	denali->flash_reg = devm_ioremap_resource(&pdev->dev, denali_reg);
 	if (IS_ERR(denali->flash_reg))
 		return PTR_ERR(denali->flash_reg);
 
-	nand_data = platform_get_resource_byname(ofdev, IORESOURCE_MEM, "nand_data");
-	denali->flash_mem = devm_ioremap_resource(&ofdev->dev, nand_data);
+	nand_data = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						 "nand_data");
+	denali->flash_mem = devm_ioremap_resource(&pdev->dev, nand_data);
 	if (IS_ERR(denali->flash_mem))
 		return PTR_ERR(denali->flash_mem);
 
-	if (!of_property_read_u32(ofdev->dev.of_node,
-		"dma-mask", (u32 *)&denali_dma_mask)) {
-		denali->dev->dma_mask = &denali_dma_mask;
-	} else {
-		denali->dev->dma_mask = NULL;
-	}
-
-	dt->clk = devm_clk_get(&ofdev->dev, NULL);
+	dt->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(dt->clk)) {
-		dev_err(&ofdev->dev, "no clk available\n");
+		dev_err(&pdev->dev, "no clk available\n");
 		return PTR_ERR(dt->clk);
 	}
 	clk_prepare_enable(dt->clk);
@@ -95,7 +97,7 @@ static int denali_dt_probe(struct platform_device *ofdev)
 	if (ret)
 		goto out_disable_clk;
 
-	platform_set_drvdata(ofdev, dt);
+	platform_set_drvdata(pdev, dt);
 	return 0;
 
 out_disable_clk:
@@ -104,9 +106,9 @@ out_disable_clk:
 	return ret;
 }
 
-static int denali_dt_remove(struct platform_device *ofdev)
+static int denali_dt_remove(struct platform_device *pdev)
 {
-	struct denali_dt *dt = platform_get_drvdata(ofdev);
+	struct denali_dt *dt = platform_get_drvdata(pdev);
 
 	denali_remove(&dt->denali);
 	clk_disable_unprepare(dt->clk);
diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index bda1e4667138..cea50d2f218c 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -38,15 +38,6 @@
 #include <linux/amba/bus.h>
 #include <mtd/mtd-abi.h>
 
-#define FSMC_NAND_BW8		1
-#define FSMC_NAND_BW16		2
-
-#define FSMC_MAX_NOR_BANKS	4
-#define FSMC_MAX_NAND_BANKS	4
-
-#define FSMC_FLASH_WIDTH8	1
-#define FSMC_FLASH_WIDTH16	2
-
 /* fsmc controller registers for NOR flash */
 #define CTRL			0x0
 	/* ctrl register definitions */
@@ -133,33 +124,48 @@ enum access_mode {
 };
 
 /**
- * fsmc_nand_platform_data - platform specific NAND controller config
- * @nand_timings: timing setup for the physical NAND interface
- * @partitions: partition table for the platform, use a default fallback
- * if this is NULL
- * @nr_partitions: the number of partitions in the previous entry
- * @options: different options for the driver
- * @width: bus width
- * @bank: default bank
- * @select_bank: callback to select a certain bank, this is
- * platform-specific. If the controller only supports one bank
- * this may be set to NULL
+ * struct fsmc_nand_data - structure for FSMC NAND device state
+ *
+ * @pid:		Part ID on the AMBA PrimeCell format
+ * @mtd:		MTD info for a NAND flash.
+ * @nand:		Chip related info for a NAND flash.
+ * @partitions:		Partition info for a NAND Flash.
+ * @nr_partitions:	Total number of partition of a NAND flash.
+ *
+ * @bank:		Bank number for probed device.
+ * @clk:		Clock structure for FSMC.
+ *
+ * @read_dma_chan:	DMA channel for read access
+ * @write_dma_chan:	DMA channel for write access to NAND
+ * @dma_access_complete: Completion structure
+ *
+ * @data_pa:		NAND Physical port for Data.
+ * @data_va:		NAND port for Data.
+ * @cmd_va:		NAND port for Command.
+ * @addr_va:		NAND port for Address.
+ * @regs_va:		FSMC regs base address.
  */
-struct fsmc_nand_platform_data {
-	struct fsmc_nand_timings *nand_timings;
-	struct mtd_partition	*partitions;
-	unsigned int		nr_partitions;
-	unsigned int		options;
-	unsigned int		width;
-	unsigned int		bank;
+struct fsmc_nand_data {
+	u32			pid;
+	struct nand_chip	nand;
 
+	unsigned int		bank;
+	struct device		*dev;
 	enum access_mode	mode;
+	struct clk		*clk;
 
-	void			(*select_bank)(uint32_t bank, uint32_t busw);
+	/* DMA related objects */
+	struct dma_chan		*read_dma_chan;
+	struct dma_chan		*write_dma_chan;
+	struct completion	dma_access_complete;
 
-	/* priv structures for dma accesses */
-	void			*read_dma_priv;
-	void			*write_dma_priv;
+	struct fsmc_nand_timings *dev_timings;
+
+	dma_addr_t		data_pa;
+	void __iomem		*data_va;
+	void __iomem		*cmd_va;
+	void __iomem		*addr_va;
+	void __iomem		*regs_va;
 };
 
 static int fsmc_ecc1_ooblayout_ecc(struct mtd_info *mtd, int section,
@@ -246,86 +252,11 @@ static const struct mtd_ooblayout_ops fsmc_ecc4_ooblayout_ops = {
 	.free = fsmc_ecc4_ooblayout_free,
 };
 
-/**
- * struct fsmc_nand_data - structure for FSMC NAND device state
- *
- * @pid:		Part ID on the AMBA PrimeCell format
- * @mtd:		MTD info for a NAND flash.
- * @nand:		Chip related info for a NAND flash.
- * @partitions:		Partition info for a NAND Flash.
- * @nr_partitions:	Total number of partition of a NAND flash.
- *
- * @bank:		Bank number for probed device.
- * @clk:		Clock structure for FSMC.
- *
- * @read_dma_chan:	DMA channel for read access
- * @write_dma_chan:	DMA channel for write access to NAND
- * @dma_access_complete: Completion structure
- *
- * @data_pa:		NAND Physical port for Data.
- * @data_va:		NAND port for Data.
- * @cmd_va:		NAND port for Command.
- * @addr_va:		NAND port for Address.
- * @regs_va:		FSMC regs base address.
- */
-struct fsmc_nand_data {
-	u32			pid;
-	struct nand_chip	nand;
-	struct mtd_partition	*partitions;
-	unsigned int		nr_partitions;
-
-	unsigned int		bank;
-	struct device		*dev;
-	enum access_mode	mode;
-	struct clk		*clk;
-
-	/* DMA related objects */
-	struct dma_chan		*read_dma_chan;
-	struct dma_chan		*write_dma_chan;
-	struct completion	dma_access_complete;
-
-	struct fsmc_nand_timings *dev_timings;
-
-	dma_addr_t		data_pa;
-	void __iomem		*data_va;
-	void __iomem		*cmd_va;
-	void __iomem		*addr_va;
-	void __iomem		*regs_va;
-
-	void			(*select_chip)(uint32_t bank, uint32_t busw);
-};
-
 static inline struct fsmc_nand_data *mtd_to_fsmc(struct mtd_info *mtd)
 {
 	return container_of(mtd_to_nand(mtd), struct fsmc_nand_data, nand);
 }
 
-/* Assert CS signal based on chipnr */
-static void fsmc_select_chip(struct mtd_info *mtd, int chipnr)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct fsmc_nand_data *host;
-
-	host = mtd_to_fsmc(mtd);
-
-	switch (chipnr) {
-	case -1:
-		chip->cmd_ctrl(mtd, NAND_CMD_NONE, 0 | NAND_CTRL_CHANGE);
-		break;
-	case 0:
-	case 1:
-	case 2:
-	case 3:
-		if (host->select_chip)
-			host->select_chip(chipnr,
-					chip->options & NAND_BUSWIDTH_16);
-		break;
-
-	default:
-		dev_err(host->dev, "unsupported chip-select %d\n", chipnr);
-	}
-}
-
 /*
  * fsmc_cmd_ctrl - For facilitaing Hardware access
  * This routine allows hardware specific access to control-lines(ALE,CLE)
@@ -838,44 +769,46 @@ static bool filter(struct dma_chan *chan, void *slave)
 }
 
 static int fsmc_nand_probe_config_dt(struct platform_device *pdev,
-				     struct device_node *np)
+				     struct fsmc_nand_data *host,
+				     struct nand_chip *nand)
 {
-	struct fsmc_nand_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct device_node *np = pdev->dev.of_node;
 	u32 val;
 	int ret;
 
-	/* Set default NAND width to 8 bits */
-	pdata->width = 8;
+	nand->options = 0;
+
 	if (!of_property_read_u32(np, "bank-width", &val)) {
 		if (val == 2) {
-			pdata->width = 16;
+			nand->options |= NAND_BUSWIDTH_16;
 		} else if (val != 1) {
 			dev_err(&pdev->dev, "invalid bank-width %u\n", val);
 			return -EINVAL;
 		}
 	}
+
 	if (of_get_property(np, "nand-skip-bbtscan", NULL))
-		pdata->options = NAND_SKIP_BBTSCAN;
+		nand->options |= NAND_SKIP_BBTSCAN;
 
-	pdata->nand_timings = devm_kzalloc(&pdev->dev,
-				sizeof(*pdata->nand_timings), GFP_KERNEL);
-	if (!pdata->nand_timings)
+	host->dev_timings = devm_kzalloc(&pdev->dev,
+				sizeof(*host->dev_timings), GFP_KERNEL);
+	if (!host->dev_timings)
 		return -ENOMEM;
-	ret = of_property_read_u8_array(np, "timings", (u8 *)pdata->nand_timings,
-						sizeof(*pdata->nand_timings));
+	ret = of_property_read_u8_array(np, "timings", (u8 *)host->dev_timings,
+						sizeof(*host->dev_timings));
 	if (ret) {
 		dev_info(&pdev->dev, "No timings in dts specified, using default timings!\n");
-		pdata->nand_timings = NULL;
+		host->dev_timings = NULL;
 	}
 
 	/* Set default NAND bank to 0 */
-	pdata->bank = 0;
+	host->bank = 0;
 	if (!of_property_read_u32(np, "bank", &val)) {
 		if (val > 3) {
 			dev_err(&pdev->dev, "invalid bank %u\n", val);
 			return -EINVAL;
 		}
-		pdata->bank = val;
+		host->bank = val;
 	}
 	return 0;
 }
@@ -886,8 +819,6 @@ static int fsmc_nand_probe_config_dt(struct platform_device *pdev,
  */
 static int __init fsmc_nand_probe(struct platform_device *pdev)
 {
-	struct fsmc_nand_platform_data *pdata = dev_get_platdata(&pdev->dev);
-	struct device_node __maybe_unused *np = pdev->dev.of_node;
 	struct fsmc_nand_data *host;
 	struct mtd_info *mtd;
 	struct nand_chip *nand;
@@ -897,22 +828,17 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	u32 pid;
 	int i;
 
-	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return -ENOMEM;
-
-	pdev->dev.platform_data = pdata;
-	ret = fsmc_nand_probe_config_dt(pdev, np);
-	if (ret) {
-		dev_err(&pdev->dev, "no platform data\n");
-		return -ENODEV;
-	}
-
 	/* Allocate memory for the device structure (and zero it) */
 	host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
 	if (!host)
 		return -ENOMEM;
 
+	nand = &host->nand;
+
+	ret = fsmc_nand_probe_config_dt(pdev, host, nand);
+	if (ret)
+		return ret;
+
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_data");
 	host->data_va = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(host->data_va))
@@ -935,7 +861,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	if (IS_ERR(host->regs_va))
 		return PTR_ERR(host->regs_va);
 
-	host->clk = clk_get(&pdev->dev, NULL);
+	host->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(host->clk)) {
 		dev_err(&pdev->dev, "failed to fetch block clock\n");
 		return PTR_ERR(host->clk);
@@ -943,7 +869,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 
 	ret = clk_prepare_enable(host->clk);
 	if (ret)
-		goto err_clk_prepare_enable;
+		return ret;
 
 	/*
 	 * This device ID is actually a common AMBA ID as used on the
@@ -957,22 +883,15 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 		 AMBA_PART_BITS(pid), AMBA_MANF_BITS(pid),
 		 AMBA_REV_BITS(pid), AMBA_CONFIG_BITS(pid));
 
-	host->bank = pdata->bank;
-	host->select_chip = pdata->select_bank;
-	host->partitions = pdata->partitions;
-	host->nr_partitions = pdata->nr_partitions;
 	host->dev = &pdev->dev;
-	host->dev_timings = pdata->nand_timings;
-	host->mode = pdata->mode;
 
 	if (host->mode == USE_DMA_ACCESS)
 		init_completion(&host->dma_access_complete);
 
 	/* Link all private pointers */
 	mtd = nand_to_mtd(&host->nand);
-	nand = &host->nand;
 	nand_set_controller_data(nand, host);
-	nand_set_flash_node(nand, np);
+	nand_set_flash_node(nand, pdev->dev.of_node);
 
 	mtd->dev.parent = &pdev->dev;
 	nand->IO_ADDR_R = host->data_va;
@@ -987,26 +906,18 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	nand->ecc.mode = NAND_ECC_HW;
 	nand->ecc.hwctl = fsmc_enable_hwecc;
 	nand->ecc.size = 512;
-	nand->options = pdata->options;
-	nand->select_chip = fsmc_select_chip;
 	nand->badblockbits = 7;
-	nand_set_flash_node(nand, np);
-
-	if (pdata->width == FSMC_NAND_BW16)
-		nand->options |= NAND_BUSWIDTH_16;
 
 	switch (host->mode) {
 	case USE_DMA_ACCESS:
 		dma_cap_zero(mask);
 		dma_cap_set(DMA_MEMCPY, mask);
-		host->read_dma_chan = dma_request_channel(mask, filter,
-				pdata->read_dma_priv);
+		host->read_dma_chan = dma_request_channel(mask, filter, NULL);
 		if (!host->read_dma_chan) {
 			dev_err(&pdev->dev, "Unable to get read dma channel\n");
 			goto err_req_read_chnl;
 		}
-		host->write_dma_chan = dma_request_channel(mask, filter,
-				pdata->write_dma_priv);
+		host->write_dma_chan = dma_request_channel(mask, filter, NULL);
 		if (!host->write_dma_chan) {
 			dev_err(&pdev->dev, "Unable to get write dma channel\n");
 			goto err_req_write_chnl;
@@ -1107,18 +1018,8 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_probe;
 
-	/*
-	 * The partition information can is accessed by (in the same precedence)
-	 *
-	 * command line through Bootloader,
-	 * platform data,
-	 * default partition information present in driver.
-	 */
-	/*
-	 * Check for partition info passed
-	 */
 	mtd->name = "nand";
-	ret = mtd_device_register(mtd, host->partitions, host->nr_partitions);
+	ret = mtd_device_register(mtd, NULL, 0);
 	if (ret)
 		goto err_probe;
 
@@ -1135,8 +1036,6 @@ err_req_write_chnl:
 		dma_release_channel(host->read_dma_chan);
 err_req_read_chnl:
 	clk_disable_unprepare(host->clk);
-err_clk_prepare_enable:
-	clk_put(host->clk);
 	return ret;
 }
 
@@ -1155,7 +1054,6 @@ static int fsmc_nand_remove(struct platform_device *pdev)
 			dma_release_channel(host->read_dma_chan);
 		}
 		clk_disable_unprepare(host->clk);
-		clk_put(host->clk);
 	}
 
 	return 0;
@@ -1185,20 +1083,18 @@ static int fsmc_nand_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(fsmc_nand_pm_ops, fsmc_nand_suspend, fsmc_nand_resume);
 
-#ifdef CONFIG_OF
 static const struct of_device_id fsmc_nand_id_table[] = {
 	{ .compatible = "st,spear600-fsmc-nand" },
 	{ .compatible = "stericsson,fsmc-nand" },
 	{}
 };
 MODULE_DEVICE_TABLE(of, fsmc_nand_id_table);
-#endif
 
 static struct platform_driver fsmc_nand_driver = {
 	.remove = fsmc_nand_remove,
 	.driver = {
 		.name = "fsmc-nand",
-		.of_match_table = of_match_ptr(fsmc_nand_id_table),
+		.of_match_table = fsmc_nand_id_table,
 		.pm = &fsmc_nand_pm_ops,
 	},
 };
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
index 0d24857469ab..85294f150f4f 100644
--- a/drivers/mtd/nand/gpio.c
+++ b/drivers/mtd/nand/gpio.c
@@ -78,7 +78,9 @@ static void gpio_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
 	gpio_nand_dosync(gpiomtd);
 
 	if (ctrl & NAND_CTRL_CHANGE) {
-		gpio_set_value(gpiomtd->plat.gpio_nce, !(ctrl & NAND_NCE));
+		if (gpio_is_valid(gpiomtd->plat.gpio_nce))
+			gpio_set_value(gpiomtd->plat.gpio_nce,
+				       !(ctrl & NAND_NCE));
 		gpio_set_value(gpiomtd->plat.gpio_cle, !!(ctrl & NAND_CLE));
 		gpio_set_value(gpiomtd->plat.gpio_ale, !!(ctrl & NAND_ALE));
 		gpio_nand_dosync(gpiomtd);
@@ -201,7 +203,8 @@ static int gpio_nand_remove(struct platform_device *pdev)
 
 	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
 		gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
-	gpio_set_value(gpiomtd->plat.gpio_nce, 1);
+	if (gpio_is_valid(gpiomtd->plat.gpio_nce))
+		gpio_set_value(gpiomtd->plat.gpio_nce, 1);
 
 	return 0;
 }
@@ -239,10 +242,13 @@ static int gpio_nand_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	ret = devm_gpio_request(&pdev->dev, gpiomtd->plat.gpio_nce, "NAND NCE");
-	if (ret)
-		return ret;
-	gpio_direction_output(gpiomtd->plat.gpio_nce, 1);
+	if (gpio_is_valid(gpiomtd->plat.gpio_nce)) {
+		ret = devm_gpio_request(&pdev->dev, gpiomtd->plat.gpio_nce,
+					"NAND NCE");
+		if (ret)
+			return ret;
+		gpio_direction_output(gpiomtd->plat.gpio_nce, 1);
+	}
 
 	if (gpio_is_valid(gpiomtd->plat.gpio_nwp)) {
 		ret = devm_gpio_request(&pdev->dev, gpiomtd->plat.gpio_nwp,
diff --git a/drivers/mtd/nand/nand_amd.c b/drivers/mtd/nand/nand_amd.c
new file mode 100644
index 000000000000..170403a3bfa8
--- /dev/null
+++ b/drivers/mtd/nand/nand_amd.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 Free Electrons
+ * Copyright (C) 2017 NextThing Co
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mtd/nand.h>
+
+static void amd_nand_decode_id(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	nand_decode_ext_id(chip);
+
+	/*
+	 * Check for Spansion/AMD ID + repeating 5th, 6th byte since
+	 * some Spansion chips have erasesize that conflicts with size
+	 * listed in nand_ids table.
+	 * Data sheet (5 byte ID): Spansion S30ML-P ORNAND (p.39)
+	 */
+	if (chip->id.data[4] != 0x00 && chip->id.data[5] == 0x00 &&
+	    chip->id.data[6] == 0x00 && chip->id.data[7] == 0x00 &&
+	    mtd->writesize == 512) {
+		mtd->erasesize = 128 * 1024;
+		mtd->erasesize <<= ((chip->id.data[3] & 0x03) << 1);
+	}
+}
+
+static int amd_nand_init(struct nand_chip *chip)
+{
+	if (nand_is_slc(chip))
+		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
+
+	return 0;
+}
+
+const struct nand_manufacturer_ops amd_nand_manuf_ops = {
+	.detect = amd_nand_decode_id,
+	.init = amd_nand_init,
+};
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index b0524f8accb6..b1dd12729f19 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -139,6 +139,74 @@ const struct mtd_ooblayout_ops nand_ooblayout_lp_ops = {
 };
 EXPORT_SYMBOL_GPL(nand_ooblayout_lp_ops);
 
+/*
+ * Support the old "large page" layout used for 1-bit Hamming ECC where ECC
+ * are placed at a fixed offset.
+ */
+static int nand_ooblayout_ecc_lp_hamming(struct mtd_info *mtd, int section,
+					 struct mtd_oob_region *oobregion)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+
+	if (section)
+		return -ERANGE;
+
+	switch (mtd->oobsize) {
+	case 64:
+		oobregion->offset = 40;
+		break;
+	case 128:
+		oobregion->offset = 80;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	oobregion->length = ecc->total;
+	if (oobregion->offset + oobregion->length > mtd->oobsize)
+		return -ERANGE;
+
+	return 0;
+}
+
+static int nand_ooblayout_free_lp_hamming(struct mtd_info *mtd, int section,
+					  struct mtd_oob_region *oobregion)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int ecc_offset = 0;
+
+	if (section < 0 || section > 1)
+		return -ERANGE;
+
+	switch (mtd->oobsize) {
+	case 64:
+		ecc_offset = 40;
+		break;
+	case 128:
+		ecc_offset = 80;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (section == 0) {
+		oobregion->offset = 2;
+		oobregion->length = ecc_offset - 2;
+	} else {
+		oobregion->offset = ecc_offset + ecc->total;
+		oobregion->length = mtd->oobsize - oobregion->offset;
+	}
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops nand_ooblayout_lp_hamming_ops = {
+	.ecc = nand_ooblayout_ecc_lp_hamming,
+	.free = nand_ooblayout_free_lp_hamming,
+};
+
 static int check_offs_len(struct mtd_info *mtd,
 					loff_t ofs, uint64_t len)
 {
@@ -354,40 +422,32 @@ static void nand_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len)
  */
 static int nand_block_bad(struct mtd_info *mtd, loff_t ofs)
 {
-	int page, res = 0, i = 0;
+	int page, page_end, res;
 	struct nand_chip *chip = mtd_to_nand(mtd);
-	u16 bad;
+	u8 bad;
 
 	if (chip->bbt_options & NAND_BBT_SCANLASTPAGE)
 		ofs += mtd->erasesize - mtd->writesize;
 
 	page = (int)(ofs >> chip->page_shift) & chip->pagemask;
+	page_end = page + (chip->bbt_options & NAND_BBT_SCAN2NDPAGE ? 2 : 1);
 
-	do {
-		if (chip->options & NAND_BUSWIDTH_16) {
-			chip->cmdfunc(mtd, NAND_CMD_READOOB,
-					chip->badblockpos & 0xFE, page);
-			bad = cpu_to_le16(chip->read_word(mtd));
-			if (chip->badblockpos & 0x1)
-				bad >>= 8;
-			else
-				bad &= 0xFF;
-		} else {
-			chip->cmdfunc(mtd, NAND_CMD_READOOB, chip->badblockpos,
-					page);
-			bad = chip->read_byte(mtd);
-		}
+	for (; page < page_end; page++) {
+		res = chip->ecc.read_oob(mtd, chip, page);
+		if (res)
+			return res;
+
+		bad = chip->oob_poi[chip->badblockpos];
 
 		if (likely(chip->badblockbits == 8))
 			res = bad != 0xFF;
 		else
 			res = hweight8(bad) < chip->badblockbits;
-		ofs += mtd->writesize;
-		page = (int)(ofs >> chip->page_shift) & chip->pagemask;
-		i++;
-	} while (!res && i < 2 && (chip->bbt_options & NAND_BBT_SCAN2NDPAGE));
+		if (res)
+			return res;
+	}
 
-	return res;
+	return 0;
 }
 
 /**
@@ -676,6 +736,8 @@ static void nand_command(struct mtd_info *mtd, unsigned int command,
 	case NAND_CMD_ERASE2:
 	case NAND_CMD_SEQIN:
 	case NAND_CMD_STATUS:
+	case NAND_CMD_READID:
+	case NAND_CMD_SET_FEATURES:
 		return;
 
 	case NAND_CMD_RESET:
@@ -794,6 +856,8 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 	case NAND_CMD_ERASE2:
 	case NAND_CMD_SEQIN:
 	case NAND_CMD_STATUS:
+	case NAND_CMD_READID:
+	case NAND_CMD_SET_FEATURES:
 		return;
 
 	case NAND_CMD_RNDIN:
@@ -1958,7 +2022,9 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 		if (!aligned)
 			use_bufpoi = 1;
 		else if (chip->options & NAND_USE_BOUNCE_BUFFER)
-			use_bufpoi = !virt_addr_valid(buf);
+			use_bufpoi = !virt_addr_valid(buf) ||
+				     !IS_ALIGNED((unsigned long)buf,
+						 chip->buf_align);
 		else
 			use_bufpoi = 0;
 
@@ -1997,8 +2063,6 @@ read_retry:
 				break;
 			}
 
-			max_bitflips = max_t(unsigned int, max_bitflips, ret);
-
 			/* Transfer not aligned data */
 			if (use_bufpoi) {
 				if (!NAND_HAS_SUBPAGE_READ(chip) && !oob &&
@@ -2049,6 +2113,7 @@ read_retry:
 			}
 
 			buf += bytes;
+			max_bitflips = max_t(unsigned int, max_bitflips, ret);
 		} else {
 			memcpy(buf, chip->buffers->databuf + col, bytes);
 			buf += bytes;
@@ -2637,7 +2702,7 @@ static int nand_write_page_syndrome(struct mtd_info *mtd,
 }
 
 /**
- * nand_write_page - [REPLACEABLE] write one page
+ * nand_write_page - write one page
  * @mtd: MTD device structure
  * @chip: NAND chip descriptor
  * @offset: address offset within the page
@@ -2815,7 +2880,9 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 		if (part_pagewr)
 			use_bufpoi = 1;
 		else if (chip->options & NAND_USE_BOUNCE_BUFFER)
-			use_bufpoi = !virt_addr_valid(buf);
+			use_bufpoi = !virt_addr_valid(buf) ||
+				     !IS_ALIGNED((unsigned long)buf,
+						 chip->buf_align);
 		else
 			use_bufpoi = 0;
 
@@ -2840,9 +2907,10 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 			/* We still need to erase leftover OOB data */
 			memset(chip->oob_poi, 0xff, mtd->oobsize);
 		}
-		ret = chip->write_page(mtd, chip, column, bytes, wbuf,
-					oob_required, page, cached,
-					(ops->mode == MTD_OPS_RAW));
+
+		ret = nand_write_page(mtd, chip, column, bytes, wbuf,
+				      oob_required, page, cached,
+				      (ops->mode == MTD_OPS_RAW));
 		if (ret)
 			break;
 
@@ -3385,8 +3453,10 @@ static void nand_shutdown(struct mtd_info *mtd)
 }
 
 /* Set default functions */
-static void nand_set_defaults(struct nand_chip *chip, int busw)
+static void nand_set_defaults(struct nand_chip *chip)
 {
+	unsigned int busw = chip->options & NAND_BUSWIDTH_16;
+
 	/* check for proper chip_delay setup, set 20us if not */
 	if (!chip->chip_delay)
 		chip->chip_delay = 20;
@@ -3431,6 +3501,8 @@ static void nand_set_defaults(struct nand_chip *chip, int busw)
 		nand_hw_control_init(chip->controller);
 	}
 
+	if (!chip->buf_align)
+		chip->buf_align = 1;
 }
 
 /* Sanitize ONFI strings so we can safely print them */
@@ -3464,9 +3536,10 @@ static u16 onfi_crc16(u16 crc, u8 const *p, size_t len)
 }
 
 /* Parse the Extended Parameter Page. */
-static int nand_flash_detect_ext_param_page(struct mtd_info *mtd,
-		struct nand_chip *chip, struct nand_onfi_params *p)
+static int nand_flash_detect_ext_param_page(struct nand_chip *chip,
+					    struct nand_onfi_params *p)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct onfi_ext_param_page *ep;
 	struct onfi_ext_section *s;
 	struct onfi_ext_ecc_info *ecc;
@@ -3534,36 +3607,12 @@ ext_out:
 	return ret;
 }
 
-static int nand_setup_read_retry_micron(struct mtd_info *mtd, int retry_mode)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	uint8_t feature[ONFI_SUBFEATURE_PARAM_LEN] = {retry_mode};
-
-	return chip->onfi_set_features(mtd, chip, ONFI_FEATURE_ADDR_READ_RETRY,
-			feature);
-}
-
-/*
- * Configure chip properties from Micron vendor-specific ONFI table
- */
-static void nand_onfi_detect_micron(struct nand_chip *chip,
-		struct nand_onfi_params *p)
-{
-	struct nand_onfi_vendor_micron *micron = (void *)p->vendor;
-
-	if (le16_to_cpu(p->vendor_revision) < 1)
-		return;
-
-	chip->read_retries = micron->read_retry_options;
-	chip->setup_read_retry = nand_setup_read_retry_micron;
-}
-
 /*
  * Check if the NAND chip is ONFI compliant, returns 1 if it is, 0 otherwise.
  */
-static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip,
-					int *busw)
+static int nand_flash_detect_onfi(struct nand_chip *chip)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_onfi_params *p = &chip->onfi_params;
 	int i, j;
 	int val;
@@ -3633,9 +3682,7 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip,
 	chip->blocks_per_die = le32_to_cpu(p->blocks_per_lun);
 
 	if (onfi_feature(chip) & ONFI_FEATURE_16_BIT_BUS)
-		*busw = NAND_BUSWIDTH_16;
-	else
-		*busw = 0;
+		chip->options |= NAND_BUSWIDTH_16;
 
 	if (p->ecc_bits != 0xff) {
 		chip->ecc_strength_ds = p->ecc_bits;
@@ -3653,24 +3700,21 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip,
 			chip->cmdfunc = nand_command_lp;
 
 		/* The Extended Parameter Page is supported since ONFI 2.1. */
-		if (nand_flash_detect_ext_param_page(mtd, chip, p))
+		if (nand_flash_detect_ext_param_page(chip, p))
 			pr_warn("Failed to detect ONFI extended param page\n");
 	} else {
 		pr_warn("Could not retrieve ONFI ECC requirements\n");
 	}
 
-	if (p->jedec_id == NAND_MFR_MICRON)
-		nand_onfi_detect_micron(chip, p);
-
 	return 1;
 }
 
 /*
  * Check if the NAND chip is JEDEC compliant, returns 1 if it is, 0 otherwise.
  */
-static int nand_flash_detect_jedec(struct mtd_info *mtd, struct nand_chip *chip,
-					int *busw)
+static int nand_flash_detect_jedec(struct nand_chip *chip)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_jedec_params *p = &chip->jedec_params;
 	struct jedec_ecc_info *ecc;
 	int val;
@@ -3729,9 +3773,7 @@ static int nand_flash_detect_jedec(struct mtd_info *mtd, struct nand_chip *chip,
 	chip->bits_per_cell = p->bits_per_cell;
 
 	if (jedec_feature(chip) & JEDEC_FEATURE_16_BIT_BUS)
-		*busw = NAND_BUSWIDTH_16;
-	else
-		*busw = 0;
+		chip->options |= NAND_BUSWIDTH_16;
 
 	/* ECC info */
 	ecc = &p->ecc_info[0];
@@ -3820,165 +3862,46 @@ static int nand_get_bits_per_cell(u8 cellinfo)
  * chip. The rest of the parameters must be decoded according to generic or
  * manufacturer-specific "extended ID" decoding patterns.
  */
-static void nand_decode_ext_id(struct mtd_info *mtd, struct nand_chip *chip,
-				u8 id_data[8], int *busw)
+void nand_decode_ext_id(struct nand_chip *chip)
 {
-	int extid, id_len;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	int extid;
+	u8 *id_data = chip->id.data;
 	/* The 3rd id byte holds MLC / multichip data */
 	chip->bits_per_cell = nand_get_bits_per_cell(id_data[2]);
 	/* The 4th id byte is the important one */
 	extid = id_data[3];
 
-	id_len = nand_id_len(id_data, 8);
-
-	/*
-	 * Field definitions are in the following datasheets:
-	 * Old style (4,5 byte ID): Samsung K9GAG08U0M (p.32)
-	 * New Samsung (6 byte ID): Samsung K9GAG08U0F (p.44)
-	 * Hynix MLC   (6 byte ID): Hynix H27UBG8T2B (p.22)
-	 *
-	 * Check for ID length, non-zero 6th byte, cell type, and Hynix/Samsung
-	 * ID to decide what to do.
-	 */
-	if (id_len == 6 && id_data[0] == NAND_MFR_SAMSUNG &&
-			!nand_is_slc(chip) && id_data[5] != 0x00) {
-		/* Calc pagesize */
-		mtd->writesize = 2048 << (extid & 0x03);
-		extid >>= 2;
-		/* Calc oobsize */
-		switch (((extid >> 2) & 0x04) | (extid & 0x03)) {
-		case 1:
-			mtd->oobsize = 128;
-			break;
-		case 2:
-			mtd->oobsize = 218;
-			break;
-		case 3:
-			mtd->oobsize = 400;
-			break;
-		case 4:
-			mtd->oobsize = 436;
-			break;
-		case 5:
-			mtd->oobsize = 512;
-			break;
-		case 6:
-			mtd->oobsize = 640;
-			break;
-		case 7:
-		default: /* Other cases are "reserved" (unknown) */
-			mtd->oobsize = 1024;
-			break;
-		}
-		extid >>= 2;
-		/* Calc blocksize */
-		mtd->erasesize = (128 * 1024) <<
-			(((extid >> 1) & 0x04) | (extid & 0x03));
-		*busw = 0;
-	} else if (id_len == 6 && id_data[0] == NAND_MFR_HYNIX &&
-			!nand_is_slc(chip)) {
-		unsigned int tmp;
-
-		/* Calc pagesize */
-		mtd->writesize = 2048 << (extid & 0x03);
-		extid >>= 2;
-		/* Calc oobsize */
-		switch (((extid >> 2) & 0x04) | (extid & 0x03)) {
-		case 0:
-			mtd->oobsize = 128;
-			break;
-		case 1:
-			mtd->oobsize = 224;
-			break;
-		case 2:
-			mtd->oobsize = 448;
-			break;
-		case 3:
-			mtd->oobsize = 64;
-			break;
-		case 4:
-			mtd->oobsize = 32;
-			break;
-		case 5:
-			mtd->oobsize = 16;
-			break;
-		default:
-			mtd->oobsize = 640;
-			break;
-		}
-		extid >>= 2;
-		/* Calc blocksize */
-		tmp = ((extid >> 1) & 0x04) | (extid & 0x03);
-		if (tmp < 0x03)
-			mtd->erasesize = (128 * 1024) << tmp;
-		else if (tmp == 0x03)
-			mtd->erasesize = 768 * 1024;
-		else
-			mtd->erasesize = (64 * 1024) << tmp;
-		*busw = 0;
-	} else {
-		/* Calc pagesize */
-		mtd->writesize = 1024 << (extid & 0x03);
-		extid >>= 2;
-		/* Calc oobsize */
-		mtd->oobsize = (8 << (extid & 0x01)) *
-			(mtd->writesize >> 9);
-		extid >>= 2;
-		/* Calc blocksize. Blocksize is multiples of 64KiB */
-		mtd->erasesize = (64 * 1024) << (extid & 0x03);
-		extid >>= 2;
-		/* Get buswidth information */
-		*busw = (extid & 0x01) ? NAND_BUSWIDTH_16 : 0;
-
-		/*
-		 * Toshiba 24nm raw SLC (i.e., not BENAND) have 32B OOB per
-		 * 512B page. For Toshiba SLC, we decode the 5th/6th byte as
-		 * follows:
-		 * - ID byte 6, bits[2:0]: 100b -> 43nm, 101b -> 32nm,
-		 *                         110b -> 24nm
-		 * - ID byte 5, bit[7]:    1 -> BENAND, 0 -> raw SLC
-		 */
-		if (id_len >= 6 && id_data[0] == NAND_MFR_TOSHIBA &&
-				nand_is_slc(chip) &&
-				(id_data[5] & 0x7) == 0x6 /* 24nm */ &&
-				!(id_data[4] & 0x80) /* !BENAND */) {
-			mtd->oobsize = 32 * mtd->writesize >> 9;
-		}
-
-	}
+	/* Calc pagesize */
+	mtd->writesize = 1024 << (extid & 0x03);
+	extid >>= 2;
+	/* Calc oobsize */
+	mtd->oobsize = (8 << (extid & 0x01)) * (mtd->writesize >> 9);
+	extid >>= 2;
+	/* Calc blocksize. Blocksize is multiples of 64KiB */
+	mtd->erasesize = (64 * 1024) << (extid & 0x03);
+	extid >>= 2;
+	/* Get buswidth information */
+	if (extid & 0x1)
+		chip->options |= NAND_BUSWIDTH_16;
 }
+EXPORT_SYMBOL_GPL(nand_decode_ext_id);
 
 /*
  * Old devices have chip data hardcoded in the device ID table. nand_decode_id
  * decodes a matching ID table entry and assigns the MTD size parameters for
  * the chip.
  */
-static void nand_decode_id(struct mtd_info *mtd, struct nand_chip *chip,
-				struct nand_flash_dev *type, u8 id_data[8],
-				int *busw)
+static void nand_decode_id(struct nand_chip *chip, struct nand_flash_dev *type)
 {
-	int maf_id = id_data[0];
+	struct mtd_info *mtd = nand_to_mtd(chip);
 
 	mtd->erasesize = type->erasesize;
 	mtd->writesize = type->pagesize;
 	mtd->oobsize = mtd->writesize / 32;
-	*busw = type->options & NAND_BUSWIDTH_16;
 
 	/* All legacy ID NAND are small-page, SLC */
 	chip->bits_per_cell = 1;
-
-	/*
-	 * Check for Spansion/AMD ID + repeating 5th, 6th byte since
-	 * some Spansion chips have erasesize that conflicts with size
-	 * listed in nand_ids table.
-	 * Data sheet (5 byte ID): Spansion S30ML-P ORNAND (p.39)
-	 */
-	if (maf_id == NAND_MFR_AMD && id_data[4] != 0x00 && id_data[5] == 0x00
-			&& id_data[6] == 0x00 && id_data[7] == 0x00
-			&& mtd->writesize == 512) {
-		mtd->erasesize = 128 * 1024;
-		mtd->erasesize <<= ((id_data[3] & 0x03) << 1);
-	}
 }
 
 /*
@@ -3986,36 +3909,15 @@ static void nand_decode_id(struct mtd_info *mtd, struct nand_chip *chip,
  * heuristic patterns using various detected parameters (e.g., manufacturer,
  * page size, cell-type information).
  */
-static void nand_decode_bbm_options(struct mtd_info *mtd,
-				    struct nand_chip *chip, u8 id_data[8])
+static void nand_decode_bbm_options(struct nand_chip *chip)
 {
-	int maf_id = id_data[0];
+	struct mtd_info *mtd = nand_to_mtd(chip);
 
 	/* Set the bad block position */
 	if (mtd->writesize > 512 || (chip->options & NAND_BUSWIDTH_16))
 		chip->badblockpos = NAND_LARGE_BADBLOCK_POS;
 	else
 		chip->badblockpos = NAND_SMALL_BADBLOCK_POS;
-
-	/*
-	 * Bad block marker is stored in the last page of each block on Samsung
-	 * and Hynix MLC devices; stored in first two pages of each block on
-	 * Micron devices with 2KiB pages and on SLC Samsung, Hynix, Toshiba,
-	 * AMD/Spansion, and Macronix.  All others scan only the first page.
-	 */
-	if (!nand_is_slc(chip) &&
-			(maf_id == NAND_MFR_SAMSUNG ||
-			 maf_id == NAND_MFR_HYNIX))
-		chip->bbt_options |= NAND_BBT_SCANLASTPAGE;
-	else if ((nand_is_slc(chip) &&
-				(maf_id == NAND_MFR_SAMSUNG ||
-				 maf_id == NAND_MFR_HYNIX ||
-				 maf_id == NAND_MFR_TOSHIBA ||
-				 maf_id == NAND_MFR_AMD ||
-				 maf_id == NAND_MFR_MACRONIX)) ||
-			(mtd->writesize == 2048 &&
-			 maf_id == NAND_MFR_MICRON))
-		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
 }
 
 static inline bool is_full_id_nand(struct nand_flash_dev *type)
@@ -4023,9 +3925,12 @@ static inline bool is_full_id_nand(struct nand_flash_dev *type)
 	return type->id_len;
 }
 
-static bool find_full_id_nand(struct mtd_info *mtd, struct nand_chip *chip,
-		   struct nand_flash_dev *type, u8 *id_data, int *busw)
+static bool find_full_id_nand(struct nand_chip *chip,
+			      struct nand_flash_dev *type)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	u8 *id_data = chip->id.data;
+
 	if (!strncmp(type->id, id_data, type->id_len)) {
 		mtd->writesize = type->pagesize;
 		mtd->erasesize = type->erasesize;
@@ -4039,8 +3944,6 @@ static bool find_full_id_nand(struct mtd_info *mtd, struct nand_chip *chip,
 		chip->onfi_timing_mode_default =
 					type->onfi_timing_mode_default;
 
-		*busw = type->options & NAND_BUSWIDTH_16;
-
 		if (!mtd->name)
 			mtd->name = type->name;
 
@@ -4050,15 +3953,63 @@ static bool find_full_id_nand(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 /*
+ * Manufacturer detection. Only used when the NAND is not ONFI or JEDEC
+ * compliant and does not have a full-id or legacy-id entry in the nand_ids
+ * table.
+ */
+static void nand_manufacturer_detect(struct nand_chip *chip)
+{
+	/*
+	 * Try manufacturer detection if available and use
+	 * nand_decode_ext_id() otherwise.
+	 */
+	if (chip->manufacturer.desc && chip->manufacturer.desc->ops &&
+	    chip->manufacturer.desc->ops->detect)
+		chip->manufacturer.desc->ops->detect(chip);
+	else
+		nand_decode_ext_id(chip);
+}
+
+/*
+ * Manufacturer initialization. This function is called for all NANDs including
+ * ONFI and JEDEC compliant ones.
+ * Manufacturer drivers should put all their specific initialization code in
+ * their ->init() hook.
+ */
+static int nand_manufacturer_init(struct nand_chip *chip)
+{
+	if (!chip->manufacturer.desc || !chip->manufacturer.desc->ops ||
+	    !chip->manufacturer.desc->ops->init)
+		return 0;
+
+	return chip->manufacturer.desc->ops->init(chip);
+}
+
+/*
+ * Manufacturer cleanup. This function is called for all NANDs including
+ * ONFI and JEDEC compliant ones.
+ * Manufacturer drivers should put all their specific cleanup code in their
+ * ->cleanup() hook.
+ */
+static void nand_manufacturer_cleanup(struct nand_chip *chip)
+{
+	/* Release manufacturer private data */
+	if (chip->manufacturer.desc && chip->manufacturer.desc->ops &&
+	    chip->manufacturer.desc->ops->cleanup)
+		chip->manufacturer.desc->ops->cleanup(chip);
+}
+
+/*
  * Get the flash and manufacturer id and lookup if the type is supported.
  */
-static int nand_get_flash_type(struct mtd_info *mtd, struct nand_chip *chip,
-			       int *maf_id, int *dev_id,
-			       struct nand_flash_dev *type)
+static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 {
+	const struct nand_manufacturer *manufacturer;
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int busw;
-	int i, maf_idx;
-	u8 id_data[8];
+	int i, ret;
+	u8 *id_data = chip->id.data;
+	u8 maf_id, dev_id;
 
 	/*
 	 * Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx)
@@ -4073,8 +4024,8 @@ static int nand_get_flash_type(struct mtd_info *mtd, struct nand_chip *chip,
 	chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
 
 	/* Read manufacturer and device IDs */
-	*maf_id = chip->read_byte(mtd);
-	*dev_id = chip->read_byte(mtd);
+	maf_id = chip->read_byte(mtd);
+	dev_id = chip->read_byte(mtd);
 
 	/*
 	 * Try again to make sure, as some systems the bus-hold or other
@@ -4089,20 +4040,41 @@ static int nand_get_flash_type(struct mtd_info *mtd, struct nand_chip *chip,
 	for (i = 0; i < 8; i++)
 		id_data[i] = chip->read_byte(mtd);
 
-	if (id_data[0] != *maf_id || id_data[1] != *dev_id) {
+	if (id_data[0] != maf_id || id_data[1] != dev_id) {
 		pr_info("second ID read did not match %02x,%02x against %02x,%02x\n",
-			*maf_id, *dev_id, id_data[0], id_data[1]);
+			maf_id, dev_id, id_data[0], id_data[1]);
 		return -ENODEV;
 	}
 
+	chip->id.len = nand_id_len(id_data, 8);
+
+	/* Try to identify manufacturer */
+	manufacturer = nand_get_manufacturer(maf_id);
+	chip->manufacturer.desc = manufacturer;
+
 	if (!type)
 		type = nand_flash_ids;
 
+	/*
+	 * Save the NAND_BUSWIDTH_16 flag before letting auto-detection logic
+	 * override it.
+	 * This is required to make sure initial NAND bus width set by the
+	 * NAND controller driver is coherent with the real NAND bus width
+	 * (extracted by auto-detection code).
+	 */
+	busw = chip->options & NAND_BUSWIDTH_16;
+
+	/*
+	 * The flag is only set (never cleared), reset it to its default value
+	 * before starting auto-detection.
+	 */
+	chip->options &= ~NAND_BUSWIDTH_16;
+
 	for (; type->name != NULL; type++) {
 		if (is_full_id_nand(type)) {
-			if (find_full_id_nand(mtd, chip, type, id_data, &busw))
+			if (find_full_id_nand(chip, type))
 				goto ident_done;
-		} else if (*dev_id == type->dev_id) {
+		} else if (dev_id == type->dev_id) {
 			break;
 		}
 	}
@@ -4110,11 +4082,11 @@ static int nand_get_flash_type(struct mtd_info *mtd, struct nand_chip *chip,
 	chip->onfi_version = 0;
 	if (!type->name || !type->pagesize) {
 		/* Check if the chip is ONFI compliant */
-		if (nand_flash_detect_onfi(mtd, chip, &busw))
+		if (nand_flash_detect_onfi(chip))
 			goto ident_done;
 
 		/* Check if the chip is JEDEC compliant */
-		if (nand_flash_detect_jedec(mtd, chip, &busw))
+		if (nand_flash_detect_jedec(chip))
 			goto ident_done;
 	}
 
@@ -4126,48 +4098,34 @@ static int nand_get_flash_type(struct mtd_info *mtd, struct nand_chip *chip,
 
 	chip->chipsize = (uint64_t)type->chipsize << 20;
 
-	if (!type->pagesize) {
-		/* Decode parameters from extended ID */
-		nand_decode_ext_id(mtd, chip, id_data, &busw);
-	} else {
-		nand_decode_id(mtd, chip, type, id_data, &busw);
-	}
+	if (!type->pagesize)
+		nand_manufacturer_detect(chip);
+	else
+		nand_decode_id(chip, type);
+
 	/* Get chip options */
 	chip->options |= type->options;
 
-	/*
-	 * Check if chip is not a Samsung device. Do not clear the
-	 * options for chips which do not have an extended id.
-	 */
-	if (*maf_id != NAND_MFR_SAMSUNG && !type->pagesize)
-		chip->options &= ~NAND_SAMSUNG_LP_OPTIONS;
 ident_done:
 
-	/* Try to identify manufacturer */
-	for (maf_idx = 0; nand_manuf_ids[maf_idx].id != 0x0; maf_idx++) {
-		if (nand_manuf_ids[maf_idx].id == *maf_id)
-			break;
-	}
-
 	if (chip->options & NAND_BUSWIDTH_AUTO) {
-		WARN_ON(chip->options & NAND_BUSWIDTH_16);
-		chip->options |= busw;
-		nand_set_defaults(chip, busw);
+		WARN_ON(busw & NAND_BUSWIDTH_16);
+		nand_set_defaults(chip);
 	} else if (busw != (chip->options & NAND_BUSWIDTH_16)) {
 		/*
 		 * Check, if buswidth is correct. Hardware drivers should set
 		 * chip correct!
 		 */
 		pr_info("device found, Manufacturer ID: 0x%02x, Chip ID: 0x%02x\n",
-			*maf_id, *dev_id);
-		pr_info("%s %s\n", nand_manuf_ids[maf_idx].name, mtd->name);
-		pr_warn("bus width %d instead %d bit\n",
-			   (chip->options & NAND_BUSWIDTH_16) ? 16 : 8,
-			   busw ? 16 : 8);
+			maf_id, dev_id);
+		pr_info("%s %s\n", nand_manufacturer_name(manufacturer),
+			mtd->name);
+		pr_warn("bus width %d instead of %d bits\n", busw ? 16 : 8,
+			(chip->options & NAND_BUSWIDTH_16) ? 16 : 8);
 		return -EINVAL;
 	}
 
-	nand_decode_bbm_options(mtd, chip, id_data);
+	nand_decode_bbm_options(chip);
 
 	/* Calculate the address shift from the page size */
 	chip->page_shift = ffs(mtd->writesize) - 1;
@@ -4190,18 +4148,22 @@ ident_done:
 	if (mtd->writesize > 512 && chip->cmdfunc == nand_command)
 		chip->cmdfunc = nand_command_lp;
 
+	ret = nand_manufacturer_init(chip);
+	if (ret)
+		return ret;
+
 	pr_info("device found, Manufacturer ID: 0x%02x, Chip ID: 0x%02x\n",
-		*maf_id, *dev_id);
+		maf_id, dev_id);
 
 	if (chip->onfi_version)
-		pr_info("%s %s\n", nand_manuf_ids[maf_idx].name,
-				chip->onfi_params.model);
+		pr_info("%s %s\n", nand_manufacturer_name(manufacturer),
+			chip->onfi_params.model);
 	else if (chip->jedec_version)
-		pr_info("%s %s\n", nand_manuf_ids[maf_idx].name,
-				chip->jedec_params.model);
+		pr_info("%s %s\n", nand_manufacturer_name(manufacturer),
+			chip->jedec_params.model);
 	else
-		pr_info("%s %s\n", nand_manuf_ids[maf_idx].name,
-				type->name);
+		pr_info("%s %s\n", nand_manufacturer_name(manufacturer),
+			type->name);
 
 	pr_info("%d MiB, %s, erase size: %d KiB, page size: %d, OOB size: %d\n",
 		(int)(chip->chipsize >> 20), nand_is_slc(chip) ? "SLC" : "MLC",
@@ -4333,12 +4295,6 @@ static int nand_dt_init(struct nand_chip *chip)
 	ecc_strength = of_get_nand_ecc_strength(dn);
 	ecc_step = of_get_nand_ecc_step_size(dn);
 
-	if ((ecc_step >= 0 && !(ecc_strength >= 0)) ||
-	    (!(ecc_step >= 0) && ecc_strength >= 0)) {
-		pr_err("must set both strength and step size in DT\n");
-		return -EINVAL;
-	}
-
 	if (ecc_mode >= 0)
 		chip->ecc.mode = ecc_mode;
 
@@ -4391,10 +4347,10 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 		return -EINVAL;
 	}
 	/* Set the default functions */
-	nand_set_defaults(chip, chip->options & NAND_BUSWIDTH_16);
+	nand_set_defaults(chip);
 
 	/* Read the flash type */
-	ret = nand_get_flash_type(mtd, chip, &nand_maf_id, &nand_dev_id, table);
+	ret = nand_detect(chip, table);
 	if (ret) {
 		if (!(chip->options & NAND_SCAN_SILENT_NODEV))
 			pr_warn("No NAND device found\n");
@@ -4405,7 +4361,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	/* Initialize the ->data_interface field. */
 	ret = nand_init_data_interface(chip);
 	if (ret)
-		return ret;
+		goto err_nand_init;
 
 	/*
 	 * Setup the data interface correctly on the chip and controller side.
@@ -4417,7 +4373,10 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	 */
 	ret = nand_setup_data_interface(chip);
 	if (ret)
-		return ret;
+		goto err_nand_init;
+
+	nand_maf_id = chip->id.data[0];
+	nand_dev_id = chip->id.data[1];
 
 	chip->select_chip(mtd, -1);
 
@@ -4445,6 +4404,12 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	mtd->size = i * chip->chipsize;
 
 	return 0;
+
+err_nand_init:
+	/* Free manufacturer priv data. */
+	nand_manufacturer_cleanup(chip);
+
+	return ret;
 }
 EXPORT_SYMBOL(nand_scan_ident);
 
@@ -4610,32 +4575,54 @@ int nand_scan_tail(struct mtd_info *mtd)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
-	struct nand_buffers *nbuf;
+	struct nand_buffers *nbuf = NULL;
 	int ret;
 
 	/* New bad blocks should be marked in OOB, flash-based BBT, or both */
 	if (WARN_ON((chip->bbt_options & NAND_BBT_NO_OOB_BBM) &&
-		   !(chip->bbt_options & NAND_BBT_USE_FLASH)))
-		return -EINVAL;
+		   !(chip->bbt_options & NAND_BBT_USE_FLASH))) {
+		ret = -EINVAL;
+		goto err_ident;
+	}
 
 	if (invalid_ecc_page_accessors(chip)) {
 		pr_err("Invalid ECC page accessors setup\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_ident;
 	}
 
 	if (!(chip->options & NAND_OWN_BUFFERS)) {
-		nbuf = kzalloc(sizeof(*nbuf) + mtd->writesize
-				+ mtd->oobsize * 3, GFP_KERNEL);
-		if (!nbuf)
-			return -ENOMEM;
-		nbuf->ecccalc = (uint8_t *)(nbuf + 1);
-		nbuf->ecccode = nbuf->ecccalc + mtd->oobsize;
-		nbuf->databuf = nbuf->ecccode + mtd->oobsize;
+		nbuf = kzalloc(sizeof(*nbuf), GFP_KERNEL);
+		if (!nbuf) {
+			ret = -ENOMEM;
+			goto err_ident;
+		}
+
+		nbuf->ecccalc = kmalloc(mtd->oobsize, GFP_KERNEL);
+		if (!nbuf->ecccalc) {
+			ret = -ENOMEM;
+			goto err_free;
+		}
+
+		nbuf->ecccode = kmalloc(mtd->oobsize, GFP_KERNEL);
+		if (!nbuf->ecccode) {
+			ret = -ENOMEM;
+			goto err_free;
+		}
+
+		nbuf->databuf = kmalloc(mtd->writesize + mtd->oobsize,
+					GFP_KERNEL);
+		if (!nbuf->databuf) {
+			ret = -ENOMEM;
+			goto err_free;
+		}
 
 		chip->buffers = nbuf;
 	} else {
-		if (!chip->buffers)
-			return -ENOMEM;
+		if (!chip->buffers) {
+			ret = -ENOMEM;
+			goto err_ident;
+		}
 	}
 
 	/* Set the internal oob buffer location, just after the page data */
@@ -4653,7 +4640,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 			break;
 		case 64:
 		case 128:
-			mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
+			mtd_set_ooblayout(mtd, &nand_ooblayout_lp_hamming_ops);
 			break;
 		default:
 			WARN(1, "No oob scheme defined for oobsize %d\n",
@@ -4663,9 +4650,6 @@ int nand_scan_tail(struct mtd_info *mtd)
 		}
 	}
 
-	if (!chip->write_page)
-		chip->write_page = nand_write_page;
-
 	/*
 	 * Check ECC mode, default to software if 3byte/512byte hardware ECC is
 	 * selected and we have 256 byte pagesize fallback to software ECC
@@ -4871,10 +4855,25 @@ int nand_scan_tail(struct mtd_info *mtd)
 		return 0;
 
 	/* Build bad block table */
-	return chip->scan_bbt(mtd);
+	ret = chip->scan_bbt(mtd);
+	if (ret)
+		goto err_free;
+	return 0;
+
 err_free:
-	if (!(chip->options & NAND_OWN_BUFFERS))
-		kfree(chip->buffers);
+	if (nbuf) {
+		kfree(nbuf->databuf);
+		kfree(nbuf->ecccode);
+		kfree(nbuf->ecccalc);
+		kfree(nbuf);
+	}
+
+err_ident:
+	/* Clean up nand_scan_ident(). */
+
+	/* Free manufacturer priv data. */
+	nand_manufacturer_cleanup(chip);
+
 	return ret;
 }
 EXPORT_SYMBOL(nand_scan_tail);
@@ -4925,13 +4924,20 @@ void nand_cleanup(struct nand_chip *chip)
 
 	/* Free bad block table memory */
 	kfree(chip->bbt);
-	if (!(chip->options & NAND_OWN_BUFFERS))
+	if (!(chip->options & NAND_OWN_BUFFERS) && chip->buffers) {
+		kfree(chip->buffers->databuf);
+		kfree(chip->buffers->ecccode);
+		kfree(chip->buffers->ecccalc);
 		kfree(chip->buffers);
+	}
 
 	/* Free bad block descriptor memory */
 	if (chip->badblock_pattern && chip->badblock_pattern->options
 			& NAND_BBT_DYNAMICSTRUCT)
 		kfree(chip->badblock_pattern);
+
+	/* Free manufacturer priv data. */
+	nand_manufacturer_cleanup(chip);
 }
 EXPORT_SYMBOL_GPL(nand_cleanup);
 
diff --git a/drivers/mtd/nand/nand_hynix.c b/drivers/mtd/nand/nand_hynix.c
new file mode 100644
index 000000000000..b12dc7325378
--- /dev/null
+++ b/drivers/mtd/nand/nand_hynix.c
@@ -0,0 +1,631 @@
+/*
+ * Copyright (C) 2017 Free Electrons
+ * Copyright (C) 2017 NextThing Co
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mtd/nand.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+#define NAND_HYNIX_CMD_SET_PARAMS	0x36
+#define NAND_HYNIX_CMD_APPLY_PARAMS	0x16
+
+#define NAND_HYNIX_1XNM_RR_REPEAT	8
+
+/**
+ * struct hynix_read_retry - read-retry data
+ * @nregs: number of register to set when applying a new read-retry mode
+ * @regs: register offsets (NAND chip dependent)
+ * @values: array of values to set in registers. The array size is equal to
+ *	    (nregs * nmodes)
+ */
+struct hynix_read_retry {
+	int nregs;
+	const u8 *regs;
+	u8 values[0];
+};
+
+/**
+ * struct hynix_nand - private Hynix NAND struct
+ * @nand_technology: manufacturing process expressed in picometer
+ * @read_retry: read-retry information
+ */
+struct hynix_nand {
+	const struct hynix_read_retry *read_retry;
+};
+
+/**
+ * struct hynix_read_retry_otp - structure describing how the read-retry OTP
+ *				 area
+ * @nregs: number of hynix private registers to set before reading the reading
+ *	   the OTP area
+ * @regs: registers that should be configured
+ * @values: values that should be set in regs
+ * @page: the address to pass to the READ_PAGE command. Depends on the NAND
+ *	  chip
+ * @size: size of the read-retry OTP section
+ */
+struct hynix_read_retry_otp {
+	int nregs;
+	const u8 *regs;
+	const u8 *values;
+	int page;
+	int size;
+};
+
+static bool hynix_nand_has_valid_jedecid(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	u8 jedecid[6] = { };
+	int i = 0;
+
+	chip->cmdfunc(mtd, NAND_CMD_READID, 0x40, -1);
+	for (i = 0; i < 5; i++)
+		jedecid[i] = chip->read_byte(mtd);
+
+	return !strcmp("JEDEC", jedecid);
+}
+
+static int hynix_nand_setup_read_retry(struct mtd_info *mtd, int retry_mode)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct hynix_nand *hynix = nand_get_manufacturer_data(chip);
+	const u8 *values;
+	int status;
+	int i;
+
+	values = hynix->read_retry->values +
+		 (retry_mode * hynix->read_retry->nregs);
+
+	/* Enter 'Set Hynix Parameters' mode */
+	chip->cmdfunc(mtd, NAND_HYNIX_CMD_SET_PARAMS, -1, -1);
+
+	/*
+	 * Configure the NAND in the requested read-retry mode.
+	 * This is done by setting pre-defined values in internal NAND
+	 * registers.
+	 *
+	 * The set of registers is NAND specific, and the values are either
+	 * predefined or extracted from an OTP area on the NAND (values are
+	 * probably tweaked at production in this case).
+	 */
+	for (i = 0; i < hynix->read_retry->nregs; i++) {
+		int column = hynix->read_retry->regs[i];
+
+		column |= column << 8;
+		chip->cmdfunc(mtd, NAND_CMD_NONE, column, -1);
+		chip->write_byte(mtd, values[i]);
+	}
+
+	/* Apply the new settings. */
+	chip->cmdfunc(mtd, NAND_HYNIX_CMD_APPLY_PARAMS, -1, -1);
+
+	status = chip->waitfunc(mtd, chip);
+	if (status & NAND_STATUS_FAIL)
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * hynix_get_majority - get the value that is occurring the most in a given
+ *			set of values
+ * @in: the array of values to test
+ * @repeat: the size of the in array
+ * @out: pointer used to store the output value
+ *
+ * This function implements the 'majority check' logic that is supposed to
+ * overcome the unreliability of MLC NANDs when reading the OTP area storing
+ * the read-retry parameters.
+ *
+ * It's based on a pretty simple assumption: if we repeat the same value
+ * several times and then take the one that is occurring the most, we should
+ * find the correct value.
+ * Let's hope this dummy algorithm prevents us from losing the read-retry
+ * parameters.
+ */
+static int hynix_get_majority(const u8 *in, int repeat, u8 *out)
+{
+	int i, j, half = repeat / 2;
+
+	/*
+	 * We only test the first half of the in array because we must ensure
+	 * that the value is at least occurring repeat / 2 times.
+	 *
+	 * This loop is suboptimal since we may count the occurrences of the
+	 * same value several time, but we are doing that on small sets, which
+	 * makes it acceptable.
+	 */
+	for (i = 0; i < half; i++) {
+		int cnt = 0;
+		u8 val = in[i];
+
+		/* Count all values that are matching the one at index i. */
+		for (j = i + 1; j < repeat; j++) {
+			if (in[j] == val)
+				cnt++;
+		}
+
+		/* We found a value occurring more than repeat / 2. */
+		if (cnt > half) {
+			*out = val;
+			return 0;
+		}
+	}
+
+	return -EIO;
+}
+
+static int hynix_read_rr_otp(struct nand_chip *chip,
+			     const struct hynix_read_retry_otp *info,
+			     void *buf)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	int i;
+
+	chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
+
+	chip->cmdfunc(mtd, NAND_HYNIX_CMD_SET_PARAMS, -1, -1);
+
+	for (i = 0; i < info->nregs; i++) {
+		int column = info->regs[i];
+
+		column |= column << 8;
+		chip->cmdfunc(mtd, NAND_CMD_NONE, column, -1);
+		chip->write_byte(mtd, info->values[i]);
+	}
+
+	chip->cmdfunc(mtd, NAND_HYNIX_CMD_APPLY_PARAMS, -1, -1);
+
+	/* Sequence to enter OTP mode? */
+	chip->cmdfunc(mtd, 0x17, -1, -1);
+	chip->cmdfunc(mtd, 0x04, -1, -1);
+	chip->cmdfunc(mtd, 0x19, -1, -1);
+
+	/* Now read the page */
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0x0, info->page);
+	chip->read_buf(mtd, buf, info->size);
+
+	/* Put everything back to normal */
+	chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
+	chip->cmdfunc(mtd, NAND_HYNIX_CMD_SET_PARAMS, 0x38, -1);
+	chip->write_byte(mtd, 0x0);
+	chip->cmdfunc(mtd, NAND_HYNIX_CMD_APPLY_PARAMS, -1, -1);
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0x0, -1);
+
+	return 0;
+}
+
+#define NAND_HYNIX_1XNM_RR_COUNT_OFFS				0
+#define NAND_HYNIX_1XNM_RR_REG_COUNT_OFFS			8
+#define NAND_HYNIX_1XNM_RR_SET_OFFS(x, setsize, inv)		\
+	(16 + ((((x) * 2) + ((inv) ? 1 : 0)) * (setsize)))
+
+static int hynix_mlc_1xnm_rr_value(const u8 *buf, int nmodes, int nregs,
+				   int mode, int reg, bool inv, u8 *val)
+{
+	u8 tmp[NAND_HYNIX_1XNM_RR_REPEAT];
+	int val_offs = (mode * nregs) + reg;
+	int set_size = nmodes * nregs;
+	int i, ret;
+
+	for (i = 0; i < NAND_HYNIX_1XNM_RR_REPEAT; i++) {
+		int set_offs = NAND_HYNIX_1XNM_RR_SET_OFFS(i, set_size, inv);
+
+		tmp[i] = buf[val_offs + set_offs];
+	}
+
+	ret = hynix_get_majority(tmp, NAND_HYNIX_1XNM_RR_REPEAT, val);
+	if (ret)
+		return ret;
+
+	if (inv)
+		*val = ~*val;
+
+	return 0;
+}
+
+static u8 hynix_1xnm_mlc_read_retry_regs[] = {
+	0xcc, 0xbf, 0xaa, 0xab, 0xcd, 0xad, 0xae, 0xaf
+};
+
+static int hynix_mlc_1xnm_rr_init(struct nand_chip *chip,
+				  const struct hynix_read_retry_otp *info)
+{
+	struct hynix_nand *hynix = nand_get_manufacturer_data(chip);
+	struct hynix_read_retry *rr = NULL;
+	int ret, i, j;
+	u8 nregs, nmodes;
+	u8 *buf;
+
+	buf = kmalloc(info->size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = hynix_read_rr_otp(chip, info, buf);
+	if (ret)
+		goto out;
+
+	ret = hynix_get_majority(buf, NAND_HYNIX_1XNM_RR_REPEAT,
+				 &nmodes);
+	if (ret)
+		goto out;
+
+	ret = hynix_get_majority(buf + NAND_HYNIX_1XNM_RR_REPEAT,
+				 NAND_HYNIX_1XNM_RR_REPEAT,
+				 &nregs);
+	if (ret)
+		goto out;
+
+	rr = kzalloc(sizeof(*rr) + (nregs * nmodes), GFP_KERNEL);
+	if (!rr) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < nmodes; i++) {
+		for (j = 0; j < nregs; j++) {
+			u8 *val = rr->values + (i * nregs);
+
+			ret = hynix_mlc_1xnm_rr_value(buf, nmodes, nregs, i, j,
+						      false, val);
+			if (!ret)
+				continue;
+
+			ret = hynix_mlc_1xnm_rr_value(buf, nmodes, nregs, i, j,
+						      true, val);
+			if (ret)
+				goto out;
+		}
+	}
+
+	rr->nregs = nregs;
+	rr->regs = hynix_1xnm_mlc_read_retry_regs;
+	hynix->read_retry = rr;
+	chip->setup_read_retry = hynix_nand_setup_read_retry;
+	chip->read_retries = nmodes;
+
+out:
+	kfree(buf);
+
+	if (ret)
+		kfree(rr);
+
+	return ret;
+}
+
+static const u8 hynix_mlc_1xnm_rr_otp_regs[] = { 0x38 };
+static const u8 hynix_mlc_1xnm_rr_otp_values[] = { 0x52 };
+
+static const struct hynix_read_retry_otp hynix_mlc_1xnm_rr_otps[] = {
+	{
+		.nregs = ARRAY_SIZE(hynix_mlc_1xnm_rr_otp_regs),
+		.regs = hynix_mlc_1xnm_rr_otp_regs,
+		.values = hynix_mlc_1xnm_rr_otp_values,
+		.page = 0x21f,
+		.size = 784
+	},
+	{
+		.nregs = ARRAY_SIZE(hynix_mlc_1xnm_rr_otp_regs),
+		.regs = hynix_mlc_1xnm_rr_otp_regs,
+		.values = hynix_mlc_1xnm_rr_otp_values,
+		.page = 0x200,
+		.size = 528,
+	},
+};
+
+static int hynix_nand_rr_init(struct nand_chip *chip)
+{
+	int i, ret = 0;
+	bool valid_jedecid;
+
+	valid_jedecid = hynix_nand_has_valid_jedecid(chip);
+
+	/*
+	 * We only support read-retry for 1xnm NANDs, and those NANDs all
+	 * expose a valid JEDEC ID.
+	 */
+	if (valid_jedecid) {
+		u8 nand_tech = chip->id.data[5] >> 4;
+
+		/* 1xnm technology */
+		if (nand_tech == 4) {
+			for (i = 0; i < ARRAY_SIZE(hynix_mlc_1xnm_rr_otps);
+			     i++) {
+				/*
+				 * FIXME: Hynix recommend to copy the
+				 * read-retry OTP area into a normal page.
+				 */
+				ret = hynix_mlc_1xnm_rr_init(chip,
+						hynix_mlc_1xnm_rr_otps);
+				if (!ret)
+					break;
+			}
+		}
+	}
+
+	if (ret)
+		pr_warn("failed to initialize read-retry infrastructure");
+
+	return 0;
+}
+
+static void hynix_nand_extract_oobsize(struct nand_chip *chip,
+				       bool valid_jedecid)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	u8 oobsize;
+
+	oobsize = ((chip->id.data[3] >> 2) & 0x3) |
+		  ((chip->id.data[3] >> 4) & 0x4);
+
+	if (valid_jedecid) {
+		switch (oobsize) {
+		case 0:
+			mtd->oobsize = 2048;
+			break;
+		case 1:
+			mtd->oobsize = 1664;
+			break;
+		case 2:
+			mtd->oobsize = 1024;
+			break;
+		case 3:
+			mtd->oobsize = 640;
+			break;
+		default:
+			/*
+			 * We should never reach this case, but if that
+			 * happens, this probably means Hynix decided to use
+			 * a different extended ID format, and we should find
+			 * a way to support it.
+			 */
+			WARN(1, "Invalid OOB size");
+			break;
+		}
+	} else {
+		switch (oobsize) {
+		case 0:
+			mtd->oobsize = 128;
+			break;
+		case 1:
+			mtd->oobsize = 224;
+			break;
+		case 2:
+			mtd->oobsize = 448;
+			break;
+		case 3:
+			mtd->oobsize = 64;
+			break;
+		case 4:
+			mtd->oobsize = 32;
+			break;
+		case 5:
+			mtd->oobsize = 16;
+			break;
+		case 6:
+			mtd->oobsize = 640;
+			break;
+		default:
+			/*
+			 * We should never reach this case, but if that
+			 * happens, this probably means Hynix decided to use
+			 * a different extended ID format, and we should find
+			 * a way to support it.
+			 */
+			WARN(1, "Invalid OOB size");
+			break;
+		}
+	}
+}
+
+static void hynix_nand_extract_ecc_requirements(struct nand_chip *chip,
+						bool valid_jedecid)
+{
+	u8 ecc_level = (chip->id.data[4] >> 4) & 0x7;
+
+	if (valid_jedecid) {
+		/* Reference: H27UCG8T2E datasheet */
+		chip->ecc_step_ds = 1024;
+
+		switch (ecc_level) {
+		case 0:
+			chip->ecc_step_ds = 0;
+			chip->ecc_strength_ds = 0;
+			break;
+		case 1:
+			chip->ecc_strength_ds = 4;
+			break;
+		case 2:
+			chip->ecc_strength_ds = 24;
+			break;
+		case 3:
+			chip->ecc_strength_ds = 32;
+			break;
+		case 4:
+			chip->ecc_strength_ds = 40;
+			break;
+		case 5:
+			chip->ecc_strength_ds = 50;
+			break;
+		case 6:
+			chip->ecc_strength_ds = 60;
+			break;
+		default:
+			/*
+			 * We should never reach this case, but if that
+			 * happens, this probably means Hynix decided to use
+			 * a different extended ID format, and we should find
+			 * a way to support it.
+			 */
+			WARN(1, "Invalid ECC requirements");
+		}
+	} else {
+		/*
+		 * The ECC requirements field meaning depends on the
+		 * NAND technology.
+		 */
+		u8 nand_tech = chip->id.data[5] & 0x3;
+
+		if (nand_tech < 3) {
+			/* > 26nm, reference: H27UBG8T2A datasheet */
+			if (ecc_level < 5) {
+				chip->ecc_step_ds = 512;
+				chip->ecc_strength_ds = 1 << ecc_level;
+			} else if (ecc_level < 7) {
+				if (ecc_level == 5)
+					chip->ecc_step_ds = 2048;
+				else
+					chip->ecc_step_ds = 1024;
+				chip->ecc_strength_ds = 24;
+			} else {
+				/*
+				 * We should never reach this case, but if that
+				 * happens, this probably means Hynix decided
+				 * to use a different extended ID format, and
+				 * we should find a way to support it.
+				 */
+				WARN(1, "Invalid ECC requirements");
+			}
+		} else {
+			/* <= 26nm, reference: H27UBG8T2B datasheet */
+			if (!ecc_level) {
+				chip->ecc_step_ds = 0;
+				chip->ecc_strength_ds = 0;
+			} else if (ecc_level < 5) {
+				chip->ecc_step_ds = 512;
+				chip->ecc_strength_ds = 1 << (ecc_level - 1);
+			} else {
+				chip->ecc_step_ds = 1024;
+				chip->ecc_strength_ds = 24 +
+							(8 * (ecc_level - 5));
+			}
+		}
+	}
+}
+
+static void hynix_nand_extract_scrambling_requirements(struct nand_chip *chip,
+						       bool valid_jedecid)
+{
+	u8 nand_tech;
+
+	/* We need scrambling on all TLC NANDs*/
+	if (chip->bits_per_cell > 2)
+		chip->options |= NAND_NEED_SCRAMBLING;
+
+	/* And on MLC NANDs with sub-3xnm process */
+	if (valid_jedecid) {
+		nand_tech = chip->id.data[5] >> 4;
+
+		/* < 3xnm */
+		if (nand_tech > 0)
+			chip->options |= NAND_NEED_SCRAMBLING;
+	} else {
+		nand_tech = chip->id.data[5] & 0x3;
+
+		/* < 32nm */
+		if (nand_tech > 2)
+			chip->options |= NAND_NEED_SCRAMBLING;
+	}
+}
+
+static void hynix_nand_decode_id(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	bool valid_jedecid;
+	u8 tmp;
+
+	/*
+	 * Exclude all SLC NANDs from this advanced detection scheme.
+	 * According to the ranges defined in several datasheets, it might
+	 * appear that even SLC NANDs could fall in this extended ID scheme.
+	 * If that the case rework the test to let SLC NANDs go through the
+	 * detection process.
+	 */
+	if (chip->id.len < 6 || nand_is_slc(chip)) {
+		nand_decode_ext_id(chip);
+		return;
+	}
+
+	/* Extract pagesize */
+	mtd->writesize = 2048 << (chip->id.data[3] & 0x03);
+
+	tmp = (chip->id.data[3] >> 4) & 0x3;
+	/*
+	 * When bit7 is set that means we start counting at 1MiB, otherwise
+	 * we start counting at 128KiB and shift this value the content of
+	 * ID[3][4:5].
+	 * The only exception is when ID[3][4:5] == 3 and ID[3][7] == 0, in
+	 * this case the erasesize is set to 768KiB.
+	 */
+	if (chip->id.data[3] & 0x80)
+		mtd->erasesize = SZ_1M << tmp;
+	else if (tmp == 3)
+		mtd->erasesize = SZ_512K + SZ_256K;
+	else
+		mtd->erasesize = SZ_128K << tmp;
+
+	/*
+	 * Modern Toggle DDR NANDs have a valid JEDECID even though they are
+	 * not exposing a valid JEDEC parameter table.
+	 * These NANDs use a different NAND ID scheme.
+	 */
+	valid_jedecid = hynix_nand_has_valid_jedecid(chip);
+
+	hynix_nand_extract_oobsize(chip, valid_jedecid);
+	hynix_nand_extract_ecc_requirements(chip, valid_jedecid);
+	hynix_nand_extract_scrambling_requirements(chip, valid_jedecid);
+}
+
+static void hynix_nand_cleanup(struct nand_chip *chip)
+{
+	struct hynix_nand *hynix = nand_get_manufacturer_data(chip);
+
+	if (!hynix)
+		return;
+
+	kfree(hynix->read_retry);
+	kfree(hynix);
+	nand_set_manufacturer_data(chip, NULL);
+}
+
+static int hynix_nand_init(struct nand_chip *chip)
+{
+	struct hynix_nand *hynix;
+	int ret;
+
+	if (!nand_is_slc(chip))
+		chip->bbt_options |= NAND_BBT_SCANLASTPAGE;
+	else
+		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
+
+	hynix = kzalloc(sizeof(*hynix), GFP_KERNEL);
+	if (!hynix)
+		return -ENOMEM;
+
+	nand_set_manufacturer_data(chip, hynix);
+
+	ret = hynix_nand_rr_init(chip);
+	if (ret)
+		hynix_nand_cleanup(chip);
+
+	return ret;
+}
+
+const struct nand_manufacturer_ops hynix_nand_manuf_ops = {
+	.detect = hynix_nand_decode_id,
+	.init = hynix_nand_init,
+	.cleanup = hynix_nand_cleanup,
+};
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index 4a2f75b0c200..92e2cf8e9ff9 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -6,11 +6,10 @@
  * published by the Free Software Foundation.
  *
  */
-#include <linux/module.h>
 #include <linux/mtd/nand.h>
 #include <linux/sizes.h>
 
-#define LP_OPTIONS NAND_SAMSUNG_LP_OPTIONS
+#define LP_OPTIONS 0
 #define LP_OPTIONS16 (LP_OPTIONS | NAND_BUSWIDTH_16)
 
 #define SP_OPTIONS NAND_NEED_READRDY
@@ -169,29 +168,40 @@ struct nand_flash_dev nand_flash_ids[] = {
 };
 
 /* Manufacturer IDs */
-struct nand_manufacturers nand_manuf_ids[] = {
-	{NAND_MFR_TOSHIBA, "Toshiba"},
+static const struct nand_manufacturer nand_manufacturers[] = {
+	{NAND_MFR_TOSHIBA, "Toshiba", &toshiba_nand_manuf_ops},
 	{NAND_MFR_ESMT, "ESMT"},
-	{NAND_MFR_SAMSUNG, "Samsung"},
+	{NAND_MFR_SAMSUNG, "Samsung", &samsung_nand_manuf_ops},
 	{NAND_MFR_FUJITSU, "Fujitsu"},
 	{NAND_MFR_NATIONAL, "National"},
 	{NAND_MFR_RENESAS, "Renesas"},
 	{NAND_MFR_STMICRO, "ST Micro"},
-	{NAND_MFR_HYNIX, "Hynix"},
-	{NAND_MFR_MICRON, "Micron"},
-	{NAND_MFR_AMD, "AMD/Spansion"},
-	{NAND_MFR_MACRONIX, "Macronix"},
+	{NAND_MFR_HYNIX, "Hynix", &hynix_nand_manuf_ops},
+	{NAND_MFR_MICRON, "Micron", &micron_nand_manuf_ops},
+	{NAND_MFR_AMD, "AMD/Spansion", &amd_nand_manuf_ops},
+	{NAND_MFR_MACRONIX, "Macronix", &macronix_nand_manuf_ops},
 	{NAND_MFR_EON, "Eon"},
 	{NAND_MFR_SANDISK, "SanDisk"},
 	{NAND_MFR_INTEL, "Intel"},
 	{NAND_MFR_ATO, "ATO"},
 	{NAND_MFR_WINBOND, "Winbond"},
-	{0x0, "Unknown"}
 };
 
-EXPORT_SYMBOL(nand_manuf_ids);
-EXPORT_SYMBOL(nand_flash_ids);
+/**
+ * nand_get_manufacturer - Get manufacturer information from the manufacturer
+ *			   ID
+ * @id: manufacturer ID
+ *
+ * Returns a pointer a nand_manufacturer object if the manufacturer is defined
+ * in the NAND manufacturers database, NULL otherwise.
+ */
+const struct nand_manufacturer *nand_get_manufacturer(u8 id)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(nand_manufacturers); i++)
+		if (nand_manufacturers[i].id == id)
+			return &nand_manufacturers[i];
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Thomas Gleixner <tglx@linutronix.de>");
-MODULE_DESCRIPTION("Nand device & manufacturer IDs");
+	return NULL;
+}
diff --git a/drivers/mtd/nand/nand_macronix.c b/drivers/mtd/nand/nand_macronix.c
new file mode 100644
index 000000000000..84855c3e1a02
--- /dev/null
+++ b/drivers/mtd/nand/nand_macronix.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2017 Free Electrons
+ * Copyright (C) 2017 NextThing Co
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mtd/nand.h>
+
+static int macronix_nand_init(struct nand_chip *chip)
+{
+	if (nand_is_slc(chip))
+		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
+
+	return 0;
+}
+
+const struct nand_manufacturer_ops macronix_nand_manuf_ops = {
+	.init = macronix_nand_init,
+};
diff --git a/drivers/mtd/nand/nand_micron.c b/drivers/mtd/nand/nand_micron.c
new file mode 100644
index 000000000000..877011069251
--- /dev/null
+++ b/drivers/mtd/nand/nand_micron.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2017 Free Electrons
+ * Copyright (C) 2017 NextThing Co
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mtd/nand.h>
+
+struct nand_onfi_vendor_micron {
+	u8 two_plane_read;
+	u8 read_cache;
+	u8 read_unique_id;
+	u8 dq_imped;
+	u8 dq_imped_num_settings;
+	u8 dq_imped_feat_addr;
+	u8 rb_pulldown_strength;
+	u8 rb_pulldown_strength_feat_addr;
+	u8 rb_pulldown_strength_num_settings;
+	u8 otp_mode;
+	u8 otp_page_start;
+	u8 otp_data_prot_addr;
+	u8 otp_num_pages;
+	u8 otp_feat_addr;
+	u8 read_retry_options;
+	u8 reserved[72];
+	u8 param_revision;
+} __packed;
+
+static int micron_nand_setup_read_retry(struct mtd_info *mtd, int retry_mode)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	u8 feature[ONFI_SUBFEATURE_PARAM_LEN] = {retry_mode};
+
+	return chip->onfi_set_features(mtd, chip, ONFI_FEATURE_ADDR_READ_RETRY,
+				       feature);
+}
+
+/*
+ * Configure chip properties from Micron vendor-specific ONFI table
+ */
+static int micron_nand_onfi_init(struct nand_chip *chip)
+{
+	struct nand_onfi_params *p = &chip->onfi_params;
+	struct nand_onfi_vendor_micron *micron = (void *)p->vendor;
+
+	if (!chip->onfi_version)
+		return 0;
+
+	if (le16_to_cpu(p->vendor_revision) < 1)
+		return 0;
+
+	chip->read_retries = micron->read_retry_options;
+	chip->setup_read_retry = micron_nand_setup_read_retry;
+
+	return 0;
+}
+
+static int micron_nand_init(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	int ret;
+
+	ret = micron_nand_onfi_init(chip);
+	if (ret)
+		return ret;
+
+	if (mtd->writesize == 2048)
+		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
+
+	return 0;
+}
+
+const struct nand_manufacturer_ops micron_nand_manuf_ops = {
+	.init = micron_nand_init,
+};
diff --git a/drivers/mtd/nand/nand_samsung.c b/drivers/mtd/nand/nand_samsung.c
new file mode 100644
index 000000000000..1e0755997762
--- /dev/null
+++ b/drivers/mtd/nand/nand_samsung.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2017 Free Electrons
+ * Copyright (C) 2017 NextThing Co
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mtd/nand.h>
+
+static void samsung_nand_decode_id(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	/* New Samsung (6 byte ID): Samsung K9GAG08U0F (p.44) */
+	if (chip->id.len == 6 && !nand_is_slc(chip) &&
+	    chip->id.data[5] != 0x00) {
+		u8 extid = chip->id.data[3];
+
+		/* Get pagesize */
+		mtd->writesize = 2048 << (extid & 0x03);
+
+		extid >>= 2;
+
+		/* Get oobsize */
+		switch (((extid >> 2) & 0x4) | (extid & 0x3)) {
+		case 1:
+			mtd->oobsize = 128;
+			break;
+		case 2:
+			mtd->oobsize = 218;
+			break;
+		case 3:
+			mtd->oobsize = 400;
+			break;
+		case 4:
+			mtd->oobsize = 436;
+			break;
+		case 5:
+			mtd->oobsize = 512;
+			break;
+		case 6:
+			mtd->oobsize = 640;
+			break;
+		default:
+			/*
+			 * We should never reach this case, but if that
+			 * happens, this probably means Samsung decided to use
+			 * a different extended ID format, and we should find
+			 * a way to support it.
+			 */
+			WARN(1, "Invalid OOB size value");
+			break;
+		}
+
+		/* Get blocksize */
+		extid >>= 2;
+		mtd->erasesize = (128 * 1024) <<
+				 (((extid >> 1) & 0x04) | (extid & 0x03));
+
+		/* Extract ECC requirements from 5th id byte*/
+		extid = (chip->id.data[4] >> 4) & 0x07;
+		if (extid < 5) {
+			chip->ecc_step_ds = 512;
+			chip->ecc_strength_ds = 1 << extid;
+		} else {
+			chip->ecc_step_ds = 1024;
+			switch (extid) {
+			case 5:
+				chip->ecc_strength_ds = 24;
+				break;
+			case 6:
+				chip->ecc_strength_ds = 40;
+				break;
+			case 7:
+				chip->ecc_strength_ds = 60;
+				break;
+			default:
+				WARN(1, "Could not decode ECC info");
+				chip->ecc_step_ds = 0;
+			}
+		}
+	} else {
+		nand_decode_ext_id(chip);
+	}
+}
+
+static int samsung_nand_init(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	if (mtd->writesize > 512)
+		chip->options |= NAND_SAMSUNG_LP_OPTIONS;
+
+	if (!nand_is_slc(chip))
+		chip->bbt_options |= NAND_BBT_SCANLASTPAGE;
+	else
+		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
+
+	return 0;
+}
+
+const struct nand_manufacturer_ops samsung_nand_manuf_ops = {
+	.detect = samsung_nand_decode_id,
+	.init = samsung_nand_init,
+};
diff --git a/drivers/mtd/nand/nand_toshiba.c b/drivers/mtd/nand/nand_toshiba.c
new file mode 100644
index 000000000000..fa787ba38dcd
--- /dev/null
+++ b/drivers/mtd/nand/nand_toshiba.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 Free Electrons
+ * Copyright (C) 2017 NextThing Co
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mtd/nand.h>
+
+static void toshiba_nand_decode_id(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	nand_decode_ext_id(chip);
+
+	/*
+	 * Toshiba 24nm raw SLC (i.e., not BENAND) have 32B OOB per
+	 * 512B page. For Toshiba SLC, we decode the 5th/6th byte as
+	 * follows:
+	 * - ID byte 6, bits[2:0]: 100b -> 43nm, 101b -> 32nm,
+	 *                         110b -> 24nm
+	 * - ID byte 5, bit[7]:    1 -> BENAND, 0 -> raw SLC
+	 */
+	if (chip->id.len >= 6 && nand_is_slc(chip) &&
+	    (chip->id.data[5] & 0x7) == 0x6 /* 24nm */ &&
+	    !(chip->id.data[4] & 0x80) /* !BENAND */)
+		mtd->oobsize = 32 * mtd->writesize >> 9;
+}
+
+static int toshiba_nand_init(struct nand_chip *chip)
+{
+	if (nand_is_slc(chip))
+		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
+
+	return 0;
+}
+
+const struct nand_manufacturer_ops toshiba_nand_manuf_ops = {
+	.detect = toshiba_nand_decode_id,
+	.init = toshiba_nand_init,
+};
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 092c9bd225be..03a0d057bf2f 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -902,7 +902,7 @@ static int parse_weakpages(void)
 		zero_ok = (*w == '0' ? 1 : 0);
 		page_no = simple_strtoul(w, &w, 0);
 		if (!zero_ok && !page_no) {
-			NS_ERR("invalid weakpagess.\n");
+			NS_ERR("invalid weakpages.\n");
 			return -EINVAL;
 		}
 		max_writes = 3;
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 2a52101120d4..084934a9f19c 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1856,6 +1856,15 @@ static int omap_nand_probe(struct platform_device *pdev)
 	nand_chip->ecc.priv	= NULL;
 	nand_set_flash_node(nand_chip, dev->of_node);
 
+	if (!mtd->name) {
+		mtd->name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+					   "omap2-nand.%d", info->gpmc_cs);
+		if (!mtd->name) {
+			dev_err(&pdev->dev, "Failed to set MTD name\n");
+			return -ENOMEM;
+		}
+	}
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	nand_chip->IO_ADDR_R = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(nand_chip->IO_ADDR_R))
diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c
index 4a91c5d000be..f8e463a97b9e 100644
--- a/drivers/mtd/nand/orion_nand.c
+++ b/drivers/mtd/nand/orion_nand.c
@@ -23,6 +23,11 @@
 #include <asm/sizes.h>
 #include <linux/platform_data/mtd-orion_nand.h>
 
+struct orion_nand_info {
+	struct nand_chip chip;
+	struct clk *clk;
+};
+
 static void orion_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
 {
 	struct nand_chip *nc = mtd_to_nand(mtd);
@@ -75,20 +80,21 @@ static void orion_nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
 
 static int __init orion_nand_probe(struct platform_device *pdev)
 {
+	struct orion_nand_info *info;
 	struct mtd_info *mtd;
 	struct nand_chip *nc;
 	struct orion_nand_data *board;
 	struct resource *res;
-	struct clk *clk;
 	void __iomem *io_base;
 	int ret = 0;
 	u32 val = 0;
 
-	nc = devm_kzalloc(&pdev->dev,
-			sizeof(struct nand_chip),
+	info = devm_kzalloc(&pdev->dev,
+			sizeof(struct orion_nand_info),
 			GFP_KERNEL);
-	if (!nc)
+	if (!info)
 		return -ENOMEM;
+	nc = &info->chip;
 	mtd = nand_to_mtd(nc);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -145,16 +151,23 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 	if (board->dev_ready)
 		nc->dev_ready = board->dev_ready;
 
-	platform_set_drvdata(pdev, mtd);
+	platform_set_drvdata(pdev, info);
 
 	/* Not all platforms can gate the clock, so it is not
 	   an error if the clock does not exists. */
-	clk = clk_get(&pdev->dev, NULL);
-	if (!IS_ERR(clk)) {
-		clk_prepare_enable(clk);
-		clk_put(clk);
+	info->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(info->clk)) {
+		ret = PTR_ERR(info->clk);
+		if (ret == -ENOENT) {
+			info->clk = NULL;
+		} else {
+			dev_err(&pdev->dev, "failed to get clock!\n");
+			return ret;
+		}
 	}
 
+	clk_prepare_enable(info->clk);
+
 	ret = nand_scan(mtd, 1);
 	if (ret)
 		goto no_dev;
@@ -169,26 +182,19 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 	return 0;
 
 no_dev:
-	if (!IS_ERR(clk)) {
-		clk_disable_unprepare(clk);
-		clk_put(clk);
-	}
-
+	clk_disable_unprepare(info->clk);
 	return ret;
 }
 
 static int orion_nand_remove(struct platform_device *pdev)
 {
-	struct mtd_info *mtd = platform_get_drvdata(pdev);
-	struct clk *clk;
+	struct orion_nand_info *info = platform_get_drvdata(pdev);
+	struct nand_chip *chip = &info->chip;
+	struct mtd_info *mtd = nand_to_mtd(chip);
 
 	nand_release(mtd);
 
-	clk = clk_get(&pdev->dev, NULL);
-	if (!IS_ERR(clk)) {
-		clk_disable_unprepare(clk);
-		clk_put(clk);
-	}
+	clk_disable_unprepare(info->clk);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/oxnas_nand.c b/drivers/mtd/nand/oxnas_nand.c
index 3e3bf3b364d2..1b207aac840c 100644
--- a/drivers/mtd/nand/oxnas_nand.c
+++ b/drivers/mtd/nand/oxnas_nand.c
@@ -91,7 +91,7 @@ static int oxnas_nand_probe(struct platform_device *pdev)
 	int err = 0;
 
 	/* Allocate memory for the device structure (and zero it) */
-	oxnas = devm_kzalloc(&pdev->dev, sizeof(struct nand_chip),
+	oxnas = devm_kzalloc(&pdev->dev, sizeof(*oxnas),
 			     GFP_KERNEL);
 	if (!oxnas)
 		return -ENOMEM;
diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c
index 0eeeb8b889ea..118a26fff368 100644
--- a/drivers/mtd/nand/sunxi_nand.c
+++ b/drivers/mtd/nand/sunxi_nand.c
@@ -2212,17 +2212,17 @@ static int sunxi_nfc_probe(struct platform_device *pdev)
 		goto out_ahb_clk_unprepare;
 
 	nfc->reset = devm_reset_control_get_optional(dev, "ahb");
-	if (!IS_ERR(nfc->reset)) {
-		ret = reset_control_deassert(nfc->reset);
-		if (ret) {
-			dev_err(dev, "reset err %d\n", ret);
-			goto out_mod_clk_unprepare;
-		}
-	} else if (PTR_ERR(nfc->reset) != -ENOENT) {
+	if (IS_ERR(nfc->reset)) {
 		ret = PTR_ERR(nfc->reset);
 		goto out_mod_clk_unprepare;
 	}
 
+	ret = reset_control_deassert(nfc->reset);
+	if (ret) {
+		dev_err(dev, "reset err %d\n", ret);
+		goto out_mod_clk_unprepare;
+	}
+
 	ret = sunxi_nfc_rst(nfc);
 	if (ret)
 		goto out_ahb_reset_reassert;
@@ -2262,8 +2262,7 @@ out_release_dmac:
 	if (nfc->dmac)
 		dma_release_channel(nfc->dmac);
 out_ahb_reset_reassert:
-	if (!IS_ERR(nfc->reset))
-		reset_control_assert(nfc->reset);
+	reset_control_assert(nfc->reset);
 out_mod_clk_unprepare:
 	clk_disable_unprepare(nfc->mod_clk);
 out_ahb_clk_unprepare:
@@ -2278,8 +2277,7 @@ static int sunxi_nfc_remove(struct platform_device *pdev)
 
 	sunxi_nand_chips_cleanup(nfc);
 
-	if (!IS_ERR(nfc->reset))
-		reset_control_assert(nfc->reset);
+	reset_control_assert(nfc->reset);
 
 	if (nfc->dmac)
 		dma_release_channel(nfc->dmac);
diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c
index 4a5e948c62df..49b286c6c10f 100644
--- a/drivers/mtd/nand/tango_nand.c
+++ b/drivers/mtd/nand/tango_nand.c
@@ -55,10 +55,10 @@
  * byte 1 for other packets in the page (PKT_N, for N > 0)
  * ERR_COUNT_PKT_N is the max error count over all but the first packet.
  */
-#define DECODE_OK_PKT_0(v)	((v) & BIT(7))
-#define DECODE_OK_PKT_N(v)	((v) & BIT(15))
 #define ERR_COUNT_PKT_0(v)	(((v) >> 0) & 0x3f)
 #define ERR_COUNT_PKT_N(v)	(((v) >> 8) & 0x3f)
+#define DECODE_FAIL_PKT_0(v)	(((v) & BIT(7)) == 0)
+#define DECODE_FAIL_PKT_N(v)	(((v) & BIT(15)) == 0)
 
 /* Offsets relative to pbus_base */
 #define PBUS_CS_CTRL	0x83c
@@ -193,6 +193,8 @@ static int check_erased_page(struct nand_chip *chip, u8 *buf)
 						  chip->ecc.strength);
 		if (res < 0)
 			mtd->ecc_stats.failed++;
+		else
+			mtd->ecc_stats.corrected += res;
 
 		bitflips = max(res, bitflips);
 		buf += pkt_size;
@@ -202,9 +204,11 @@ static int check_erased_page(struct nand_chip *chip, u8 *buf)
 	return bitflips;
 }
 
-static int decode_error_report(struct tango_nfc *nfc)
+static int decode_error_report(struct nand_chip *chip)
 {
 	u32 status, res;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct tango_nfc *nfc = to_tango_nfc(chip->controller);
 
 	status = readl_relaxed(nfc->reg_base + NFC_XFER_STATUS);
 	if (status & PAGE_IS_EMPTY)
@@ -212,10 +216,14 @@ static int decode_error_report(struct tango_nfc *nfc)
 
 	res = readl_relaxed(nfc->mem_base + ERROR_REPORT);
 
-	if (DECODE_OK_PKT_0(res) && DECODE_OK_PKT_N(res))
-		return max(ERR_COUNT_PKT_0(res), ERR_COUNT_PKT_N(res));
+	if (DECODE_FAIL_PKT_0(res) || DECODE_FAIL_PKT_N(res))
+		return -EBADMSG;
+
+	/* ERR_COUNT_PKT_N is max, not sum, but that's all we have */
+	mtd->ecc_stats.corrected +=
+		ERR_COUNT_PKT_0(res) + ERR_COUNT_PKT_N(res);
 
-	return -EBADMSG;
+	return max(ERR_COUNT_PKT_0(res), ERR_COUNT_PKT_N(res));
 }
 
 static void tango_dma_callback(void *arg)
@@ -223,12 +231,13 @@ static void tango_dma_callback(void *arg)
 	complete(arg);
 }
 
-static int do_dma(struct tango_nfc *nfc, int dir, int cmd, const void *buf,
-		  int len, int page)
+static int do_dma(struct tango_nfc *nfc, enum dma_data_direction dir, int cmd,
+		  const void *buf, int len, int page)
 {
 	void __iomem *addr = nfc->reg_base + NFC_STATUS;
 	struct dma_chan *chan = nfc->chan;
 	struct dma_async_tx_descriptor *desc;
+	enum dma_transfer_direction tdir;
 	struct scatterlist sg;
 	struct completion tx_done;
 	int err = -EIO;
@@ -238,7 +247,8 @@ static int do_dma(struct tango_nfc *nfc, int dir, int cmd, const void *buf,
 	if (dma_map_sg(chan->device->dev, &sg, 1, dir) != 1)
 		return -EIO;
 
-	desc = dmaengine_prep_slave_sg(chan, &sg, 1, dir, DMA_PREP_INTERRUPT);
+	tdir = dir == DMA_TO_DEVICE ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
+	desc = dmaengine_prep_slave_sg(chan, &sg, 1, tdir, DMA_PREP_INTERRUPT);
 	if (!desc)
 		goto dma_unmap;
 
@@ -280,7 +290,7 @@ static int tango_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	if (err)
 		return err;
 
-	res = decode_error_report(nfc);
+	res = decode_error_report(chip);
 	if (res < 0) {
 		chip->ecc.read_oob_raw(mtd, chip, page);
 		res = check_erased_page(chip, buf);
@@ -661,6 +671,7 @@ static const struct of_device_id tango_nand_ids[] = {
 	{ .compatible = "sigma,smp8758-nand" },
 	{ /* sentinel */ }
 };
+MODULE_DEVICE_TABLE(of, tango_nand_ids);
 
 static struct platform_driver tango_nand_driver = {
 	.probe	= tango_nand_probe,
diff --git a/drivers/mtd/ofpart.c b/drivers/mtd/ofpart.c
index 464470122493..2861c7079d7b 100644
--- a/drivers/mtd/ofpart.c
+++ b/drivers/mtd/ofpart.c
@@ -166,8 +166,8 @@ static int parse_ofoldpart_partitions(struct mtd_info *master,
 	if (!part)
 		return 0; /* No partitions found */
 
-	pr_warning("Device tree uses obsolete partition map binding: %s\n",
-			dp->full_name);
+	pr_warn("Device tree uses obsolete partition map binding: %s\n",
+		dp->full_name);
 
 	nr_parts = plen / sizeof(part[0]);
 
diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig
index 7252087ef407..bfdfb1e72b38 100644
--- a/drivers/mtd/spi-nor/Kconfig
+++ b/drivers/mtd/spi-nor/Kconfig
@@ -106,4 +106,11 @@ config SPI_INTEL_SPI_PLATFORM
 	  To compile this driver as a module, choose M here: the module
 	  will be called intel-spi-platform.
 
+config SPI_STM32_QUADSPI
+	tristate "STM32 Quad SPI controller"
+	depends on ARCH_STM32
+	help
+	  This enables support for the STM32 Quad SPI controller.
+	  We only connect the NOR to this controller.
+
 endif # MTD_SPI_NOR
diff --git a/drivers/mtd/spi-nor/Makefile b/drivers/mtd/spi-nor/Makefile
index 72238a793198..285aab86c7ca 100644
--- a/drivers/mtd/spi-nor/Makefile
+++ b/drivers/mtd/spi-nor/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_MTD_MT81xx_NOR)    += mtk-quadspi.o
 obj-$(CONFIG_SPI_NXP_SPIFI)	+= nxp-spifi.o
 obj-$(CONFIG_SPI_INTEL_SPI)	+= intel-spi.o
 obj-$(CONFIG_SPI_INTEL_SPI_PLATFORM)	+= intel-spi-platform.o
+obj-$(CONFIG_SPI_STM32_QUADSPI)	+= stm32-quadspi.o
+\ No newline at end of file
diff --git a/drivers/mtd/spi-nor/hisi-sfc.c b/drivers/mtd/spi-nor/hisi-sfc.c
index 20378b0d55e9..a286350627a6 100644
--- a/drivers/mtd/spi-nor/hisi-sfc.c
+++ b/drivers/mtd/spi-nor/hisi-sfc.c
@@ -448,8 +448,11 @@ static int hisi_spi_nor_probe(struct platform_device *pdev)
 	if (!host->buffer)
 		return -ENOMEM;
 
+	ret = clk_prepare_enable(host->clk);
+	if (ret)
+		return ret;
+
 	mutex_init(&host->lock);
-	clk_prepare_enable(host->clk);
 	hisi_spi_nor_init(host);
 	ret = hisi_spi_nor_register_all(host);
 	if (ret)
diff --git a/drivers/mtd/spi-nor/intel-spi.c b/drivers/mtd/spi-nor/intel-spi.c
index a10f6027b386..986a3d020a3a 100644
--- a/drivers/mtd/spi-nor/intel-spi.c
+++ b/drivers/mtd/spi-nor/intel-spi.c
@@ -704,7 +704,7 @@ static void intel_spi_fill_partition(struct intel_spi *ispi,
 		 * whole partition read-only to be on the safe side.
 		 */
 		if (intel_spi_is_protected(ispi, base, limit))
-			ispi->writeable = 0;
+			ispi->writeable = false;
 
 		end = (limit << 12) + 4096;
 		if (end > part->size)
@@ -728,7 +728,7 @@ struct intel_spi *intel_spi_probe(struct device *dev,
 
 	ispi->base = devm_ioremap_resource(dev, mem);
 	if (IS_ERR(ispi->base))
-		return ispi->base;
+		return ERR_CAST(ispi->base);
 
 	ispi->dev = dev;
 	ispi->info = info;
diff --git a/drivers/mtd/spi-nor/mtk-quadspi.c b/drivers/mtd/spi-nor/mtk-quadspi.c
index e661877c23de..b6377707ce32 100644
--- a/drivers/mtd/spi-nor/mtk-quadspi.c
+++ b/drivers/mtd/spi-nor/mtk-quadspi.c
@@ -104,6 +104,8 @@
 #define MTK_NOR_MAX_RX_TX_SHIFT		6
 /* can shift up to 56 bits (7 bytes) transfer by MTK_NOR_PRG_CMD */
 #define MTK_NOR_MAX_SHIFT		7
+/* nor controller 4-byte address mode enable bit */
+#define MTK_NOR_4B_ADDR_EN		BIT(4)
 
 /* Helpers for accessing the program data / shift data registers */
 #define MTK_NOR_PRG_REG(n)		(MTK_NOR_PRGDATA0_REG + 4 * (n))
@@ -230,10 +232,35 @@ static int mt8173_nor_write_buffer_disable(struct mt8173_nor *mt8173_nor)
 				  10000);
 }
 
+static void mt8173_nor_set_addr_width(struct mt8173_nor *mt8173_nor)
+{
+	u8 val;
+	struct spi_nor *nor = &mt8173_nor->nor;
+
+	val = readb(mt8173_nor->base + MTK_NOR_DUAL_REG);
+
+	switch (nor->addr_width) {
+	case 3:
+		val &= ~MTK_NOR_4B_ADDR_EN;
+		break;
+	case 4:
+		val |= MTK_NOR_4B_ADDR_EN;
+		break;
+	default:
+		dev_warn(mt8173_nor->dev, "Unexpected address width %u.\n",
+			 nor->addr_width);
+		break;
+	}
+
+	writeb(val, mt8173_nor->base + MTK_NOR_DUAL_REG);
+}
+
 static void mt8173_nor_set_addr(struct mt8173_nor *mt8173_nor, u32 addr)
 {
 	int i;
 
+	mt8173_nor_set_addr_width(mt8173_nor);
+
 	for (i = 0; i < 3; i++) {
 		writeb(addr & 0xff, mt8173_nor->base + MTK_NOR_RADR0_REG + i * 4);
 		addr >>= 8;
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 747645c74134..dea8c9cbadf0 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -85,6 +85,7 @@ struct flash_info {
 					 * Use dedicated 4byte address op codes
 					 * to support memory size above 128Mib.
 					 */
+#define NO_CHIP_ERASE		BIT(12) /* Chip does not support chip erase */
 };
 
 #define JEDEC_MFR(info)	((info)->id[0])
@@ -960,6 +961,8 @@ static const struct flash_info spi_nor_ids[] = {
 
 	/* ESMT */
 	{ "f25l32pa", INFO(0x8c2016, 0, 64 * 1024, 64, SECT_4K | SPI_NOR_HAS_LOCK) },
+	{ "f25l32qa", INFO(0x8c4116, 0, 64 * 1024, 64, SECT_4K | SPI_NOR_HAS_LOCK) },
+	{ "f25l64qa", INFO(0x8c4117, 0, 64 * 1024, 128, SECT_4K | SPI_NOR_HAS_LOCK) },
 
 	/* Everspin */
 	{ "mr25h256", CAT25_INFO( 32 * 1024, 1, 256, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
@@ -1013,11 +1016,14 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "mx25l3205d",  INFO(0xc22016, 0, 64 * 1024,  64, SECT_4K) },
 	{ "mx25l3255e",  INFO(0xc29e16, 0, 64 * 1024,  64, SECT_4K) },
 	{ "mx25l6405d",  INFO(0xc22017, 0, 64 * 1024, 128, SECT_4K) },
+	{ "mx25u2033e",  INFO(0xc22532, 0, 64 * 1024,   4, SECT_4K) },
+	{ "mx25u4035",   INFO(0xc22533, 0, 64 * 1024,   8, SECT_4K) },
+	{ "mx25u8035",   INFO(0xc22534, 0, 64 * 1024,  16, SECT_4K) },
 	{ "mx25u6435f",  INFO(0xc22537, 0, 64 * 1024, 128, SECT_4K) },
 	{ "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) },
 	{ "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) },
 	{ "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, 0) },
-	{ "mx25u25635f", INFO(0xc22539, 0, 64 * 1024, 512, SECT_4K) },
+	{ "mx25u25635f", INFO(0xc22539, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_4B_OPCODES) },
 	{ "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) },
 	{ "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_QUAD_READ) },
 	{ "mx66l1g55g",  INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) },
@@ -1031,10 +1037,11 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024,  256, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_QUAD_READ) },
+	{ "n25q256ax1",  INFO(0x20bb19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q512a",    INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
 	{ "n25q512ax3",  INFO(0x20ba20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
-	{ "n25q00",      INFO(0x20ba21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
-	{ "n25q00a",     INFO(0x20bb21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
+	{ "n25q00",      INFO(0x20ba21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) },
+	{ "n25q00a",     INFO(0x20bb21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) },
 
 	/* PMC */
 	{ "pm25lv512",   INFO(0,        0, 32 * 1024,    2, SECT_4K_PMC) },
@@ -1128,6 +1135,9 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "w25x80", INFO(0xef3014, 0, 64 * 1024,  16, SECT_4K) },
 	{ "w25x16", INFO(0xef3015, 0, 64 * 1024,  32, SECT_4K) },
 	{ "w25x32", INFO(0xef3016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "w25q20cl", INFO(0xef4012, 0, 64 * 1024,  4, SECT_4K) },
+	{ "w25q20bw", INFO(0xef5012, 0, 64 * 1024,  4, SECT_4K) },
+	{ "w25q20ew", INFO(0xef6012, 0, 64 * 1024,  4, SECT_4K) },
 	{ "w25q32", INFO(0xef4016, 0, 64 * 1024,  64, SECT_4K) },
 	{
 		"w25q32dw", INFO(0xef6016, 0, 64 * 1024,  64,
@@ -1629,6 +1639,8 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 		nor->flags |= SNOR_F_USE_FSR;
 	if (info->flags & SPI_NOR_HAS_TB)
 		nor->flags |= SNOR_F_HAS_SR_TB;
+	if (info->flags & NO_CHIP_ERASE)
+		nor->flags |= SNOR_F_NO_OP_CHIP_ERASE;
 
 #ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
 	/* prefer "small sector" erase if possible */
diff --git a/drivers/mtd/spi-nor/stm32-quadspi.c b/drivers/mtd/spi-nor/stm32-quadspi.c
new file mode 100644
index 000000000000..ae45f81b8cd3
--- /dev/null
+++ b/drivers/mtd/spi-nor/stm32-quadspi.c
@@ -0,0 +1,693 @@
+/*
+ * stm32_quadspi.c
+ *
+ * Copyright (C) 2017, Ludovic Barre
+ *
+ * License terms: GNU General Public License (GPL), version 2
+ */
+#include <linux/clk.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/spi-nor.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+#define QUADSPI_CR		0x00
+#define CR_EN			BIT(0)
+#define CR_ABORT		BIT(1)
+#define CR_DMAEN		BIT(2)
+#define CR_TCEN			BIT(3)
+#define CR_SSHIFT		BIT(4)
+#define CR_DFM			BIT(6)
+#define CR_FSEL			BIT(7)
+#define CR_FTHRES_SHIFT		8
+#define CR_FTHRES_MASK		GENMASK(12, 8)
+#define CR_FTHRES(n)		(((n) << CR_FTHRES_SHIFT) & CR_FTHRES_MASK)
+#define CR_TEIE			BIT(16)
+#define CR_TCIE			BIT(17)
+#define CR_FTIE			BIT(18)
+#define CR_SMIE			BIT(19)
+#define CR_TOIE			BIT(20)
+#define CR_PRESC_SHIFT		24
+#define CR_PRESC_MASK		GENMASK(31, 24)
+#define CR_PRESC(n)		(((n) << CR_PRESC_SHIFT) & CR_PRESC_MASK)
+
+#define QUADSPI_DCR		0x04
+#define DCR_CSHT_SHIFT		8
+#define DCR_CSHT_MASK		GENMASK(10, 8)
+#define DCR_CSHT(n)		(((n) << DCR_CSHT_SHIFT) & DCR_CSHT_MASK)
+#define DCR_FSIZE_SHIFT		16
+#define DCR_FSIZE_MASK		GENMASK(20, 16)
+#define DCR_FSIZE(n)		(((n) << DCR_FSIZE_SHIFT) & DCR_FSIZE_MASK)
+
+#define QUADSPI_SR		0x08
+#define SR_TEF			BIT(0)
+#define SR_TCF			BIT(1)
+#define SR_FTF			BIT(2)
+#define SR_SMF			BIT(3)
+#define SR_TOF			BIT(4)
+#define SR_BUSY			BIT(5)
+#define SR_FLEVEL_SHIFT		8
+#define SR_FLEVEL_MASK		GENMASK(13, 8)
+
+#define QUADSPI_FCR		0x0c
+#define FCR_CTCF		BIT(1)
+
+#define QUADSPI_DLR		0x10
+
+#define QUADSPI_CCR		0x14
+#define CCR_INST_SHIFT		0
+#define CCR_INST_MASK		GENMASK(7, 0)
+#define CCR_INST(n)		(((n) << CCR_INST_SHIFT) & CCR_INST_MASK)
+#define CCR_IMODE_NONE		(0U << 8)
+#define CCR_IMODE_1		(1U << 8)
+#define CCR_IMODE_2		(2U << 8)
+#define CCR_IMODE_4		(3U << 8)
+#define CCR_ADMODE_NONE		(0U << 10)
+#define CCR_ADMODE_1		(1U << 10)
+#define CCR_ADMODE_2		(2U << 10)
+#define CCR_ADMODE_4		(3U << 10)
+#define CCR_ADSIZE_SHIFT	12
+#define CCR_ADSIZE_MASK		GENMASK(13, 12)
+#define CCR_ADSIZE(n)		(((n) << CCR_ADSIZE_SHIFT) & CCR_ADSIZE_MASK)
+#define CCR_ABMODE_NONE		(0U << 14)
+#define CCR_ABMODE_1		(1U << 14)
+#define CCR_ABMODE_2		(2U << 14)
+#define CCR_ABMODE_4		(3U << 14)
+#define CCR_ABSIZE_8		(0U << 16)
+#define CCR_ABSIZE_16		(1U << 16)
+#define CCR_ABSIZE_24		(2U << 16)
+#define CCR_ABSIZE_32		(3U << 16)
+#define CCR_DCYC_SHIFT		18
+#define CCR_DCYC_MASK		GENMASK(22, 18)
+#define CCR_DCYC(n)		(((n) << CCR_DCYC_SHIFT) & CCR_DCYC_MASK)
+#define CCR_DMODE_NONE		(0U << 24)
+#define CCR_DMODE_1		(1U << 24)
+#define CCR_DMODE_2		(2U << 24)
+#define CCR_DMODE_4		(3U << 24)
+#define CCR_FMODE_INDW		(0U << 26)
+#define CCR_FMODE_INDR		(1U << 26)
+#define CCR_FMODE_APM		(2U << 26)
+#define CCR_FMODE_MM		(3U << 26)
+
+#define QUADSPI_AR		0x18
+#define QUADSPI_ABR		0x1c
+#define QUADSPI_DR		0x20
+#define QUADSPI_PSMKR		0x24
+#define QUADSPI_PSMAR		0x28
+#define QUADSPI_PIR		0x2c
+#define QUADSPI_LPTR		0x30
+#define LPTR_DFT_TIMEOUT	0x10
+
+#define FSIZE_VAL(size)		(__fls(size) - 1)
+
+#define STM32_MAX_MMAP_SZ	SZ_256M
+#define STM32_MAX_NORCHIP	2
+
+#define STM32_QSPI_FIFO_TIMEOUT_US 30000
+#define STM32_QSPI_BUSY_TIMEOUT_US 100000
+
+struct stm32_qspi_flash {
+	struct spi_nor nor;
+	struct stm32_qspi *qspi;
+	u32 cs;
+	u32 fsize;
+	u32 presc;
+	u32 read_mode;
+	bool registered;
+};
+
+struct stm32_qspi {
+	struct device *dev;
+	void __iomem *io_base;
+	void __iomem *mm_base;
+	resource_size_t mm_size;
+	u32 nor_num;
+	struct clk *clk;
+	u32 clk_rate;
+	struct stm32_qspi_flash flash[STM32_MAX_NORCHIP];
+	struct completion cmd_completion;
+
+	/*
+	 * to protect device configuration, could be different between
+	 * 2 flash access (bk1, bk2)
+	 */
+	struct mutex lock;
+};
+
+struct stm32_qspi_cmd {
+	u8 addr_width;
+	u8 dummy;
+	bool tx_data;
+	u8 opcode;
+	u32 framemode;
+	u32 qspimode;
+	u32 addr;
+	size_t len;
+	void *buf;
+};
+
+static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi)
+{
+	u32 cr;
+	int err = 0;
+
+	if (readl_relaxed(qspi->io_base + QUADSPI_SR) & SR_TCF)
+		return 0;
+
+	reinit_completion(&qspi->cmd_completion);
+	cr = readl_relaxed(qspi->io_base + QUADSPI_CR);
+	writel_relaxed(cr | CR_TCIE, qspi->io_base + QUADSPI_CR);
+
+	if (!wait_for_completion_interruptible_timeout(&qspi->cmd_completion,
+						       msecs_to_jiffies(1000)))
+		err = -ETIMEDOUT;
+
+	writel_relaxed(cr, qspi->io_base + QUADSPI_CR);
+	return err;
+}
+
+static int stm32_qspi_wait_nobusy(struct stm32_qspi *qspi)
+{
+	u32 sr;
+
+	return readl_relaxed_poll_timeout(qspi->io_base + QUADSPI_SR, sr,
+					  !(sr & SR_BUSY), 10,
+					  STM32_QSPI_BUSY_TIMEOUT_US);
+}
+
+static void stm32_qspi_set_framemode(struct spi_nor *nor,
+				     struct stm32_qspi_cmd *cmd, bool read)
+{
+	u32 dmode = CCR_DMODE_1;
+
+	cmd->framemode = CCR_IMODE_1;
+
+	if (read) {
+		switch (nor->flash_read) {
+		case SPI_NOR_NORMAL:
+		case SPI_NOR_FAST:
+			dmode = CCR_DMODE_1;
+			break;
+		case SPI_NOR_DUAL:
+			dmode = CCR_DMODE_2;
+			break;
+		case SPI_NOR_QUAD:
+			dmode = CCR_DMODE_4;
+			break;
+		}
+	}
+
+	cmd->framemode |= cmd->tx_data ? dmode : 0;
+	cmd->framemode |= cmd->addr_width ? CCR_ADMODE_1 : 0;
+}
+
+static void stm32_qspi_read_fifo(u8 *val, void __iomem *addr)
+{
+	*val = readb_relaxed(addr);
+}
+
+static void stm32_qspi_write_fifo(u8 *val, void __iomem *addr)
+{
+	writeb_relaxed(*val, addr);
+}
+
+static int stm32_qspi_tx_poll(struct stm32_qspi *qspi,
+			      const struct stm32_qspi_cmd *cmd)
+{
+	void (*tx_fifo)(u8 *, void __iomem *);
+	u32 len = cmd->len, sr;
+	u8 *buf = cmd->buf;
+	int ret;
+
+	if (cmd->qspimode == CCR_FMODE_INDW)
+		tx_fifo = stm32_qspi_write_fifo;
+	else
+		tx_fifo = stm32_qspi_read_fifo;
+
+	while (len--) {
+		ret = readl_relaxed_poll_timeout(qspi->io_base + QUADSPI_SR,
+						 sr, (sr & SR_FTF), 10,
+						 STM32_QSPI_FIFO_TIMEOUT_US);
+		if (ret) {
+			dev_err(qspi->dev, "fifo timeout (stat:%#x)\n", sr);
+			break;
+		}
+		tx_fifo(buf++, qspi->io_base + QUADSPI_DR);
+	}
+
+	return ret;
+}
+
+static int stm32_qspi_tx_mm(struct stm32_qspi *qspi,
+			    const struct stm32_qspi_cmd *cmd)
+{
+	memcpy_fromio(cmd->buf, qspi->mm_base + cmd->addr, cmd->len);
+	return 0;
+}
+
+static int stm32_qspi_tx(struct stm32_qspi *qspi,
+			 const struct stm32_qspi_cmd *cmd)
+{
+	if (!cmd->tx_data)
+		return 0;
+
+	if (cmd->qspimode == CCR_FMODE_MM)
+		return stm32_qspi_tx_mm(qspi, cmd);
+
+	return stm32_qspi_tx_poll(qspi, cmd);
+}
+
+static int stm32_qspi_send(struct stm32_qspi_flash *flash,
+			   const struct stm32_qspi_cmd *cmd)
+{
+	struct stm32_qspi *qspi = flash->qspi;
+	u32 ccr, dcr, cr;
+	int err;
+
+	err = stm32_qspi_wait_nobusy(qspi);
+	if (err)
+		goto abort;
+
+	dcr = readl_relaxed(qspi->io_base + QUADSPI_DCR) & ~DCR_FSIZE_MASK;
+	dcr |= DCR_FSIZE(flash->fsize);
+	writel_relaxed(dcr, qspi->io_base + QUADSPI_DCR);
+
+	cr = readl_relaxed(qspi->io_base + QUADSPI_CR);
+	cr &= ~CR_PRESC_MASK & ~CR_FSEL;
+	cr |= CR_PRESC(flash->presc);
+	cr |= flash->cs ? CR_FSEL : 0;
+	writel_relaxed(cr, qspi->io_base + QUADSPI_CR);
+
+	if (cmd->tx_data)
+		writel_relaxed(cmd->len - 1, qspi->io_base + QUADSPI_DLR);
+
+	ccr = cmd->framemode | cmd->qspimode;
+
+	if (cmd->dummy)
+		ccr |= CCR_DCYC(cmd->dummy);
+
+	if (cmd->addr_width)
+		ccr |= CCR_ADSIZE(cmd->addr_width - 1);
+
+	ccr |= CCR_INST(cmd->opcode);
+	writel_relaxed(ccr, qspi->io_base + QUADSPI_CCR);
+
+	if (cmd->addr_width && cmd->qspimode != CCR_FMODE_MM)
+		writel_relaxed(cmd->addr, qspi->io_base + QUADSPI_AR);
+
+	err = stm32_qspi_tx(qspi, cmd);
+	if (err)
+		goto abort;
+
+	if (cmd->qspimode != CCR_FMODE_MM) {
+		err = stm32_qspi_wait_cmd(qspi);
+		if (err)
+			goto abort;
+		writel_relaxed(FCR_CTCF, qspi->io_base + QUADSPI_FCR);
+	}
+
+	return err;
+
+abort:
+	cr = readl_relaxed(qspi->io_base + QUADSPI_CR) | CR_ABORT;
+	writel_relaxed(cr, qspi->io_base + QUADSPI_CR);
+
+	dev_err(qspi->dev, "%s abort err:%d\n", __func__, err);
+	return err;
+}
+
+static int stm32_qspi_read_reg(struct spi_nor *nor,
+			       u8 opcode, u8 *buf, int len)
+{
+	struct stm32_qspi_flash *flash = nor->priv;
+	struct device *dev = flash->qspi->dev;
+	struct stm32_qspi_cmd cmd;
+
+	dev_dbg(dev, "read_reg: cmd:%#.2x buf:%p len:%#x\n", opcode, buf, len);
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = opcode;
+	cmd.tx_data = true;
+	cmd.len = len;
+	cmd.buf = buf;
+	cmd.qspimode = CCR_FMODE_INDR;
+
+	stm32_qspi_set_framemode(nor, &cmd, false);
+
+	return stm32_qspi_send(flash, &cmd);
+}
+
+static int stm32_qspi_write_reg(struct spi_nor *nor, u8 opcode,
+				u8 *buf, int len)
+{
+	struct stm32_qspi_flash *flash = nor->priv;
+	struct device *dev = flash->qspi->dev;
+	struct stm32_qspi_cmd cmd;
+
+	dev_dbg(dev, "write_reg: cmd:%#.2x buf:%p len:%#x\n", opcode, buf, len);
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = opcode;
+	cmd.tx_data = !!(buf && len > 0);
+	cmd.len = len;
+	cmd.buf = buf;
+	cmd.qspimode = CCR_FMODE_INDW;
+
+	stm32_qspi_set_framemode(nor, &cmd, false);
+
+	return stm32_qspi_send(flash, &cmd);
+}
+
+static ssize_t stm32_qspi_read(struct spi_nor *nor, loff_t from, size_t len,
+			       u_char *buf)
+{
+	struct stm32_qspi_flash *flash = nor->priv;
+	struct stm32_qspi *qspi = flash->qspi;
+	struct stm32_qspi_cmd cmd;
+	int err;
+
+	dev_dbg(qspi->dev, "read(%#.2x): buf:%p from:%#.8x len:%#x\n",
+		nor->read_opcode, buf, (u32)from, len);
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = nor->read_opcode;
+	cmd.addr_width = nor->addr_width;
+	cmd.addr = (u32)from;
+	cmd.tx_data = true;
+	cmd.dummy = nor->read_dummy;
+	cmd.len = len;
+	cmd.buf = buf;
+	cmd.qspimode = flash->read_mode;
+
+	stm32_qspi_set_framemode(nor, &cmd, true);
+	err = stm32_qspi_send(flash, &cmd);
+
+	return err ? err : len;
+}
+
+static ssize_t stm32_qspi_write(struct spi_nor *nor, loff_t to, size_t len,
+				const u_char *buf)
+{
+	struct stm32_qspi_flash *flash = nor->priv;
+	struct device *dev = flash->qspi->dev;
+	struct stm32_qspi_cmd cmd;
+	int err;
+
+	dev_dbg(dev, "write(%#.2x): buf:%p to:%#.8x len:%#x\n",
+		nor->program_opcode, buf, (u32)to, len);
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = nor->program_opcode;
+	cmd.addr_width = nor->addr_width;
+	cmd.addr = (u32)to;
+	cmd.tx_data = true;
+	cmd.len = len;
+	cmd.buf = (void *)buf;
+	cmd.qspimode = CCR_FMODE_INDW;
+
+	stm32_qspi_set_framemode(nor, &cmd, false);
+	err = stm32_qspi_send(flash, &cmd);
+
+	return err ? err : len;
+}
+
+static int stm32_qspi_erase(struct spi_nor *nor, loff_t offs)
+{
+	struct stm32_qspi_flash *flash = nor->priv;
+	struct device *dev = flash->qspi->dev;
+	struct stm32_qspi_cmd cmd;
+
+	dev_dbg(dev, "erase(%#.2x):offs:%#x\n", nor->erase_opcode, (u32)offs);
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = nor->erase_opcode;
+	cmd.addr_width = nor->addr_width;
+	cmd.addr = (u32)offs;
+	cmd.qspimode = CCR_FMODE_INDW;
+
+	stm32_qspi_set_framemode(nor, &cmd, false);
+
+	return stm32_qspi_send(flash, &cmd);
+}
+
+static irqreturn_t stm32_qspi_irq(int irq, void *dev_id)
+{
+	struct stm32_qspi *qspi = (struct stm32_qspi *)dev_id;
+	u32 cr, sr, fcr = 0;
+
+	cr = readl_relaxed(qspi->io_base + QUADSPI_CR);
+	sr = readl_relaxed(qspi->io_base + QUADSPI_SR);
+
+	if ((cr & CR_TCIE) && (sr & SR_TCF)) {
+		/* tx complete */
+		fcr |= FCR_CTCF;
+		complete(&qspi->cmd_completion);
+	} else {
+		dev_info_ratelimited(qspi->dev, "spurious interrupt\n");
+	}
+
+	writel_relaxed(fcr, qspi->io_base + QUADSPI_FCR);
+
+	return IRQ_HANDLED;
+}
+
+static int stm32_qspi_prep(struct spi_nor *nor, enum spi_nor_ops ops)
+{
+	struct stm32_qspi_flash *flash = nor->priv;
+	struct stm32_qspi *qspi = flash->qspi;
+
+	mutex_lock(&qspi->lock);
+	return 0;
+}
+
+static void stm32_qspi_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
+{
+	struct stm32_qspi_flash *flash = nor->priv;
+	struct stm32_qspi *qspi = flash->qspi;
+
+	mutex_unlock(&qspi->lock);
+}
+
+static int stm32_qspi_flash_setup(struct stm32_qspi *qspi,
+				  struct device_node *np)
+{
+	u32 width, flash_read, presc, cs_num, max_rate = 0;
+	struct stm32_qspi_flash *flash;
+	struct mtd_info *mtd;
+	int ret;
+
+	of_property_read_u32(np, "reg", &cs_num);
+	if (cs_num >= STM32_MAX_NORCHIP)
+		return -EINVAL;
+
+	of_property_read_u32(np, "spi-max-frequency", &max_rate);
+	if (!max_rate)
+		return -EINVAL;
+
+	presc = DIV_ROUND_UP(qspi->clk_rate, max_rate) - 1;
+
+	if (of_property_read_u32(np, "spi-rx-bus-width", &width))
+		width = 1;
+
+	if (width == 4)
+		flash_read = SPI_NOR_QUAD;
+	else if (width == 2)
+		flash_read = SPI_NOR_DUAL;
+	else if (width == 1)
+		flash_read = SPI_NOR_NORMAL;
+	else
+		return -EINVAL;
+
+	flash = &qspi->flash[cs_num];
+	flash->qspi = qspi;
+	flash->cs = cs_num;
+	flash->presc = presc;
+
+	flash->nor.dev = qspi->dev;
+	spi_nor_set_flash_node(&flash->nor, np);
+	flash->nor.priv = flash;
+	mtd = &flash->nor.mtd;
+
+	flash->nor.read = stm32_qspi_read;
+	flash->nor.write = stm32_qspi_write;
+	flash->nor.erase = stm32_qspi_erase;
+	flash->nor.read_reg = stm32_qspi_read_reg;
+	flash->nor.write_reg = stm32_qspi_write_reg;
+	flash->nor.prepare = stm32_qspi_prep;
+	flash->nor.unprepare = stm32_qspi_unprep;
+
+	writel_relaxed(LPTR_DFT_TIMEOUT, qspi->io_base + QUADSPI_LPTR);
+
+	writel_relaxed(CR_PRESC(presc) | CR_FTHRES(3) | CR_TCEN | CR_SSHIFT
+		       | CR_EN, qspi->io_base + QUADSPI_CR);
+
+	/*
+	 * in stm32 qspi controller, QUADSPI_DCR register has a fsize field
+	 * which define the size of nor flash.
+	 * if fsize is NULL, the controller can't sent spi-nor command.
+	 * set a temporary value just to discover the nor flash with
+	 * "spi_nor_scan". After, the right value (mtd->size) can be set.
+	 */
+	flash->fsize = FSIZE_VAL(SZ_1K);
+
+	ret = spi_nor_scan(&flash->nor, NULL, flash_read);
+	if (ret) {
+		dev_err(qspi->dev, "device scan failed\n");
+		return ret;
+	}
+
+	flash->fsize = FSIZE_VAL(mtd->size);
+
+	flash->read_mode = CCR_FMODE_MM;
+	if (mtd->size > qspi->mm_size)
+		flash->read_mode = CCR_FMODE_INDR;
+
+	writel_relaxed(DCR_CSHT(1), qspi->io_base + QUADSPI_DCR);
+
+	ret = mtd_device_register(mtd, NULL, 0);
+	if (ret) {
+		dev_err(qspi->dev, "mtd device parse failed\n");
+		return ret;
+	}
+
+	flash->registered = true;
+
+	dev_dbg(qspi->dev, "read mm:%s cs:%d bus:%d\n",
+		flash->read_mode == CCR_FMODE_MM ? "yes" : "no", cs_num, width);
+
+	return 0;
+}
+
+static void stm32_qspi_mtd_free(struct stm32_qspi *qspi)
+{
+	int i;
+
+	for (i = 0; i < STM32_MAX_NORCHIP; i++)
+		if (qspi->flash[i].registered)
+			mtd_device_unregister(&qspi->flash[i].nor.mtd);
+}
+
+static int stm32_qspi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *flash_np;
+	struct reset_control *rstc;
+	struct stm32_qspi *qspi;
+	struct resource *res;
+	int ret, irq;
+
+	qspi = devm_kzalloc(dev, sizeof(*qspi), GFP_KERNEL);
+	if (!qspi)
+		return -ENOMEM;
+
+	qspi->nor_num = of_get_child_count(dev->of_node);
+	if (!qspi->nor_num || qspi->nor_num > STM32_MAX_NORCHIP)
+		return -ENODEV;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi");
+	qspi->io_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(qspi->io_base))
+		return PTR_ERR(qspi->io_base);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi_mm");
+	qspi->mm_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(qspi->mm_base))
+		return PTR_ERR(qspi->mm_base);
+
+	qspi->mm_size = resource_size(res);
+
+	irq = platform_get_irq(pdev, 0);
+	ret = devm_request_irq(dev, irq, stm32_qspi_irq, 0,
+			       dev_name(dev), qspi);
+	if (ret) {
+		dev_err(dev, "failed to request irq\n");
+		return ret;
+	}
+
+	init_completion(&qspi->cmd_completion);
+
+	qspi->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(qspi->clk))
+		return PTR_ERR(qspi->clk);
+
+	qspi->clk_rate = clk_get_rate(qspi->clk);
+	if (!qspi->clk_rate)
+		return -EINVAL;
+
+	ret = clk_prepare_enable(qspi->clk);
+	if (ret) {
+		dev_err(dev, "can not enable the clock\n");
+		return ret;
+	}
+
+	rstc = devm_reset_control_get(dev, NULL);
+	if (!IS_ERR(rstc)) {
+		reset_control_assert(rstc);
+		udelay(2);
+		reset_control_deassert(rstc);
+	}
+
+	qspi->dev = dev;
+	platform_set_drvdata(pdev, qspi);
+	mutex_init(&qspi->lock);
+
+	for_each_available_child_of_node(dev->of_node, flash_np) {
+		ret = stm32_qspi_flash_setup(qspi, flash_np);
+		if (ret) {
+			dev_err(dev, "unable to setup flash chip\n");
+			goto err_flash;
+		}
+	}
+
+	return 0;
+
+err_flash:
+	mutex_destroy(&qspi->lock);
+	stm32_qspi_mtd_free(qspi);
+
+	clk_disable_unprepare(qspi->clk);
+	return ret;
+}
+
+static int stm32_qspi_remove(struct platform_device *pdev)
+{
+	struct stm32_qspi *qspi = platform_get_drvdata(pdev);
+
+	/* disable qspi */
+	writel_relaxed(0, qspi->io_base + QUADSPI_CR);
+
+	stm32_qspi_mtd_free(qspi);
+	mutex_destroy(&qspi->lock);
+
+	clk_disable_unprepare(qspi->clk);
+	return 0;
+}
+
+static const struct of_device_id stm32_qspi_match[] = {
+	{.compatible = "st,stm32f469-qspi"},
+	{}
+};
+MODULE_DEVICE_TABLE(of, stm32_qspi_match);
+
+static struct platform_driver stm32_qspi_driver = {
+	.probe	= stm32_qspi_probe,
+	.remove	= stm32_qspi_remove,
+	.driver	= {
+		.name = "stm32-quadspi",
+		.of_match_table = stm32_qspi_match,
+	},
+};
+module_platform_driver(stm32_qspi_driver);
+
+MODULE_AUTHOR("Ludovic Barre <ludovic.barre@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics STM32 quad spi driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 5497e65439df..c3963f880448 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -313,10 +313,10 @@ static void ubiblock_do_work(struct work_struct *work)
 	ret = ubiblock_read(pdu);
 	rq_flush_dcache_pages(req);
 
-	blk_mq_end_request(req, ret);
+	blk_mq_end_request(req, errno_to_blk_status(ret));
 }
 
-static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
 			     const struct blk_mq_queue_data *bd)
 {
 	struct request *req = bd->rq;
@@ -327,9 +327,9 @@ static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
 	case REQ_OP_READ:
 		ubi_sgl_init(&pdu->usgl);
 		queue_work(dev->wq, &pdu->work);
-		return BLK_MQ_RQ_QUEUE_OK;
+		return BLK_STS_OK;
 	default:
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 }
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 8bae3731d039..93e5d251a9e4 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -74,10 +74,10 @@ struct mtd_dev_param {
 };
 
 /* Numbers of elements set in the @mtd_dev_param array */
-static int __initdata mtd_devs;
+static int mtd_devs;
 
 /* MTD devices specification parameters */
-static struct mtd_dev_param __initdata mtd_dev_param[UBI_MAX_DEVICES];
+static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES];
 #ifdef CONFIG_MTD_UBI_FASTMAP
 /* UBI module parameter to enable fastmap automatically on non-fastmap images */
 static bool fm_autoconvert;
@@ -1294,7 +1294,7 @@ module_exit(ubi_exit);
  * This function returns positive resulting integer in case of success and a
  * negative error code in case of failure.
  */
-static int __init bytes_str_to_int(const char *str)
+static int bytes_str_to_int(const char *str)
 {
 	char *endp;
 	unsigned long result;
@@ -1332,7 +1332,7 @@ static int __init bytes_str_to_int(const char *str)
  * This function returns zero in case of success and a negative error code in
  * case of error.
  */
-static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp)
+static int ubi_mtd_param_parse(const char *val, struct kernel_param *kp)
 {
 	int i, len;
 	struct mtd_dev_param *p;
@@ -1413,7 +1413,7 @@ static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp)
 	return 0;
 }
 
-module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 000);
+module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 0400);
 MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: mtd=<name|num|path>[,<vid_hdr_offs>[,max_beb_per1024[,ubi_num]]].\n"
 		      "Multiple \"mtd\" parameters may be specified.\n"
 		      "MTD devices may be specified by their number, name, or path to the MTD character device node.\n"
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index f101a4985a7c..7bc96294ae4d 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -22,6 +22,7 @@
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
+#include <linux/seq_file.h>
 
 
 /**
@@ -386,7 +387,9 @@ out:
 	return count;
 }
 
-/* File operations for all UBI debugfs files */
+/* File operations for all UBI debugfs files except
+ * detailed_erase_block_info
+ */
 static const struct file_operations dfs_fops = {
 	.read   = dfs_file_read,
 	.write  = dfs_file_write,
@@ -395,6 +398,121 @@ static const struct file_operations dfs_fops = {
 	.owner  = THIS_MODULE,
 };
 
+/* As long as the position is less then that total number of erase blocks,
+ * we still have more to print.
+ */
+static void *eraseblk_count_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct ubi_device *ubi = s->private;
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	if (*pos < ubi->peb_count)
+		return pos;
+
+	return NULL;
+}
+
+/* Since we are using the position as the iterator, we just need to check if we
+ * are done and increment the position.
+ */
+static void *eraseblk_count_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct ubi_device *ubi = s->private;
+
+	if (v == SEQ_START_TOKEN)
+		return pos;
+	(*pos)++;
+
+	if (*pos < ubi->peb_count)
+		return pos;
+
+	return NULL;
+}
+
+static void eraseblk_count_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int eraseblk_count_seq_show(struct seq_file *s, void *iter)
+{
+	struct ubi_device *ubi = s->private;
+	struct ubi_wl_entry *wl;
+	int *block_number = iter;
+	int erase_count = -1;
+	int err;
+
+	/* If this is the start, print a header */
+	if (iter == SEQ_START_TOKEN) {
+		seq_puts(s,
+			 "physical_block_number\terase_count\tblock_status\tread_status\n");
+		return 0;
+	}
+
+	err = ubi_io_is_bad(ubi, *block_number);
+	if (err)
+		return err;
+
+	spin_lock(&ubi->wl_lock);
+
+	wl = ubi->lookuptbl[*block_number];
+	if (wl)
+		erase_count = wl->ec;
+
+	spin_unlock(&ubi->wl_lock);
+
+	if (erase_count < 0)
+		return 0;
+
+	seq_printf(s, "%-22d\t%-11d\n", *block_number, erase_count);
+
+	return 0;
+}
+
+static const struct seq_operations eraseblk_count_seq_ops = {
+	.start = eraseblk_count_seq_start,
+	.next = eraseblk_count_seq_next,
+	.stop = eraseblk_count_seq_stop,
+	.show = eraseblk_count_seq_show
+};
+
+static int eraseblk_count_open(struct inode *inode, struct file *f)
+{
+	struct seq_file *s;
+	int err;
+
+	err = seq_open(f, &eraseblk_count_seq_ops);
+	if (err)
+		return err;
+
+	s = f->private_data;
+	s->private = ubi_get_device((unsigned long)inode->i_private);
+
+	if (!s->private)
+		return -ENODEV;
+	else
+		return 0;
+}
+
+static int eraseblk_count_release(struct inode *inode, struct file *f)
+{
+	struct seq_file *s = f->private_data;
+	struct ubi_device *ubi = s->private;
+
+	ubi_put_device(ubi);
+
+	return seq_release(inode, f);
+}
+
+static const struct file_operations eraseblk_count_fops = {
+	.owner = THIS_MODULE,
+	.open = eraseblk_count_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = eraseblk_count_release,
+};
+
 /**
  * ubi_debugfs_init_dev - initialize debugfs for an UBI device.
  * @ubi: UBI device description object
@@ -491,6 +609,12 @@ int ubi_debugfs_init_dev(struct ubi_device *ubi)
 		goto out_remove;
 	d->dfs_power_cut_max = dent;
 
+	fname = "detailed_erase_block_info";
+	dent = debugfs_create_file(fname, S_IRUSR, d->dfs_dir, (void *)ubi_num,
+				   &eraseblk_count_fops);
+	if (IS_ERR_OR_NULL(dent))
+		goto out_remove;
+
 	return 0;
 
 out_remove:
diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
index c1f5c29e458e..b44c8d348e78 100644
--- a/drivers/mtd/ubi/fastmap.c
+++ b/drivers/mtd/ubi/fastmap.c
@@ -828,6 +828,24 @@ static int find_fm_anchor(struct ubi_attach_info *ai)
 	return ret;
 }
 
+static struct ubi_ainf_peb *clone_aeb(struct ubi_attach_info *ai,
+				      struct ubi_ainf_peb *old)
+{
+	struct ubi_ainf_peb *new;
+
+	new = ubi_alloc_aeb(ai, old->pnum, old->ec);
+	if (!new)
+		return NULL;
+
+	new->vol_id = old->vol_id;
+	new->sqnum = old->sqnum;
+	new->lnum = old->lnum;
+	new->scrub = old->scrub;
+	new->copy_flag = old->copy_flag;
+
+	return new;
+}
+
 /**
  * ubi_scan_fastmap - scan the fastmap.
  * @ubi: UBI device object
@@ -847,7 +865,7 @@ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
 	struct ubi_vid_hdr *vh;
 	struct ubi_ec_hdr *ech;
 	struct ubi_fastmap_layout *fm;
-	struct ubi_ainf_peb *tmp_aeb, *aeb;
+	struct ubi_ainf_peb *aeb;
 	int i, used_blocks, pnum, fm_anchor, ret = 0;
 	size_t fm_size;
 	__be32 crc, tmp_crc;
@@ -857,9 +875,16 @@ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
 	if (fm_anchor < 0)
 		return UBI_NO_FASTMAP;
 
-	/* Move all (possible) fastmap blocks into our new attach structure. */
-	list_for_each_entry_safe(aeb, tmp_aeb, &scan_ai->fastmap, u.list)
-		list_move_tail(&aeb->u.list, &ai->fastmap);
+	/* Copy all (possible) fastmap blocks into our new attach structure. */
+	list_for_each_entry(aeb, &scan_ai->fastmap, u.list) {
+		struct ubi_ainf_peb *new;
+
+		new = clone_aeb(ai, aeb);
+		if (!new)
+			return -ENOMEM;
+
+		list_add(&new->u.list, &ai->fastmap);
+	}
 
 	down_write(&ubi->fm_protect);
 	memset(ubi->fm_buf, 0, ubi->fm_size);
diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index 1b2e9217ec78..486e1e6997fc 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -986,9 +986,9 @@ static int cops_close(struct net_device *dev)
 static struct net_device *cops_dev;
 
 MODULE_LICENSE("GPL");
-module_param(io, int, 0);
-module_param(irq, int, 0);
-module_param(board_type, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
+module_param_hw(board_type, int, other, 0);
 
 static int __init cops_module_init(void)
 {
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index 01e2ac55c137..ac755d2950a6 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -1231,9 +1231,9 @@ static struct net_device *dev_ltpc;
 
 MODULE_LICENSE("GPL");
 module_param(debug, int, 0);
-module_param(io, int, 0);
-module_param(irq, int, 0);
-module_param(dma, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
+module_param_hw(dma, int, dma, 0);
 
 
 static int __init ltpc_module_init(void)
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 62ee439d5882..53a1cb551def 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -756,6 +756,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	struct net_device *dev = dev_id;
 	struct arcnet_local *lp;
 	int recbuf, status, diagstatus, didsomething, boguscount;
+	unsigned long flags;
 	int retval = IRQ_NONE;
 
 	arc_printk(D_DURING, dev, "\n");
@@ -765,7 +766,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	lp = netdev_priv(dev);
 	BUG_ON(!lp);
 
-	spin_lock(&lp->lock);
+	spin_lock_irqsave(&lp->lock, flags);
 
 	/* RESET flag was enabled - if device is not running, we must
 	 * clear it right away (but nothing else).
@@ -774,7 +775,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 		if (lp->hw.status(dev) & RESETflag)
 			lp->hw.command(dev, CFLAGScmd | RESETclear);
 		lp->hw.intmask(dev, 0);
-		spin_unlock(&lp->lock);
+		spin_unlock_irqrestore(&lp->lock, flags);
 		return retval;
 	}
 
@@ -998,7 +999,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	udelay(1);
 	lp->hw.intmask(dev, lp->intmask);
 
-	spin_unlock(&lp->lock);
+	spin_unlock_irqrestore(&lp->lock, flags);
 	return retval;
 }
 EXPORT_SYMBOL(arcnet_interrupt);
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 2056878fb087..4fa2e46b48d3 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -212,7 +212,7 @@ static int ack_tx(struct net_device *dev, int acked)
 	ackpkt->soft.cap.proto = 0; /* using protocol 0 for acknowledge */
 	ackpkt->soft.cap.mes.ack = acked;
 
-	arc_printk(D_PROTO, dev, "Ackknowledge for cap packet %x.\n",
+	arc_printk(D_PROTO, dev, "Acknowledge for cap packet %x.\n",
 		   *((int *)&ackpkt->soft.cap.cookie[0]));
 
 	ackskb->protocol = cpu_to_be16(ETH_P_ARCNET);
diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c
index b9e9931353b2..38fa60ddaf2e 100644
--- a/drivers/net/arcnet/com20020-isa.c
+++ b/drivers/net/arcnet/com20020-isa.c
@@ -129,8 +129,8 @@ static int clockp = 0;
 static int clockm = 0;
 
 module_param(node, int, 0);
-module_param(io, int, 0);
-module_param(irq, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
 module_param_string(device, device, sizeof(device), 0);
 module_param(timeout, int, 0);
 module_param(backplane, int, 0);
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 239de38fbd6a..47f80b83dcf4 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -135,6 +135,7 @@ static int com20020pci_probe(struct pci_dev *pdev,
 	for (i = 0; i < ci->devcount; i++) {
 		struct com20020_pci_channel_map *cm = &ci->chan_map_tbl[i];
 		struct com20020_dev *card;
+		int dev_id_mask = 0xf;
 
 		dev = alloc_arcdev(device);
 		if (!dev) {
@@ -166,6 +167,7 @@ static int com20020pci_probe(struct pci_dev *pdev,
 		arcnet_outb(0x00, ioaddr, COM20020_REG_W_COMMAND);
 		arcnet_inb(ioaddr, COM20020_REG_R_DIAGSTAT);
 
+		SET_NETDEV_DEV(dev, &pdev->dev);
 		dev->base_addr = ioaddr;
 		dev->dev_addr[0] = node;
 		dev->irq = pdev->irq;
@@ -179,8 +181,8 @@ static int com20020pci_probe(struct pci_dev *pdev,
 
 		/* Get the dev_id from the PLX rotary coder */
 		if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15))
-			dev->dev_id = 0xc;
-		dev->dev_id ^= inb(priv->misc + ci->rotary) >> 4;
+			dev_id_mask = 0x3;
+		dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask;
 
 		snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i);
 
diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c
index 13d9ad4b3f5c..78043a9c5981 100644
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c
@@ -246,8 +246,6 @@ int com20020_found(struct net_device *dev, int shared)
 		return -ENODEV;
 	}
 
-	dev->base_addr = ioaddr;
-
 	arc_printk(D_NORMAL, dev, "%s: station %02Xh found at %03lXh, IRQ %d.\n",
 		   lp->card_name, dev->dev_addr[0], dev->base_addr, dev->irq);
 
diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c
index b57863df5bf5..4e56aaf2b984 100644
--- a/drivers/net/arcnet/com90io.c
+++ b/drivers/net/arcnet/com90io.c
@@ -347,8 +347,8 @@ static int io;			/* use the insmod io= irq= shmem= options */
 static int irq;
 static char device[9];		/* use eg. device=arc1 to change name */
 
-module_param(io, int, 0);
-module_param(irq, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
 module_param_string(device, device, sizeof(device), 0);
 MODULE_LICENSE("GPL");
 
diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c
index 81f90c4703ae..ca4a57c30bf8 100644
--- a/drivers/net/arcnet/com90xx.c
+++ b/drivers/net/arcnet/com90xx.c
@@ -88,8 +88,8 @@ static int irq;
 static int shmem;
 static char device[9];		/* use eg. device=arc1 to change name */
 
-module_param(io, int, 0);
-module_param(irq, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
 module_param(shmem, int, 0);
 module_param_string(device, device, sizeof(device), 0);
 
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index c5fd4259da33..e5386ab706ec 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -90,10 +90,13 @@ enum ad_link_speed_type {
 	AD_LINK_SPEED_100MBPS,
 	AD_LINK_SPEED_1000MBPS,
 	AD_LINK_SPEED_2500MBPS,
+	AD_LINK_SPEED_5000MBPS,
 	AD_LINK_SPEED_10000MBPS,
+	AD_LINK_SPEED_14000MBPS,
 	AD_LINK_SPEED_20000MBPS,
 	AD_LINK_SPEED_25000MBPS,
 	AD_LINK_SPEED_40000MBPS,
+	AD_LINK_SPEED_50000MBPS,
 	AD_LINK_SPEED_56000MBPS,
 	AD_LINK_SPEED_100000MBPS,
 };
@@ -259,10 +262,13 @@ static inline int __check_agg_selection_timer(struct port *port)
  *     %AD_LINK_SPEED_100MBPS,
  *     %AD_LINK_SPEED_1000MBPS,
  *     %AD_LINK_SPEED_2500MBPS,
+ *     %AD_LINK_SPEED_5000MBPS,
  *     %AD_LINK_SPEED_10000MBPS
+ *     %AD_LINK_SPEED_14000MBPS,
  *     %AD_LINK_SPEED_20000MBPS
  *     %AD_LINK_SPEED_25000MBPS
  *     %AD_LINK_SPEED_40000MBPS
+ *     %AD_LINK_SPEED_50000MBPS
  *     %AD_LINK_SPEED_56000MBPS
  *     %AD_LINK_SPEED_100000MBPS
  */
@@ -296,10 +302,18 @@ static u16 __get_link_speed(struct port *port)
 			speed = AD_LINK_SPEED_2500MBPS;
 			break;
 
+		case SPEED_5000:
+			speed = AD_LINK_SPEED_5000MBPS;
+			break;
+
 		case SPEED_10000:
 			speed = AD_LINK_SPEED_10000MBPS;
 			break;
 
+		case SPEED_14000:
+			speed = AD_LINK_SPEED_14000MBPS;
+			break;
+
 		case SPEED_20000:
 			speed = AD_LINK_SPEED_20000MBPS;
 			break;
@@ -312,6 +326,10 @@ static u16 __get_link_speed(struct port *port)
 			speed = AD_LINK_SPEED_40000MBPS;
 			break;
 
+		case SPEED_50000:
+			speed = AD_LINK_SPEED_50000MBPS;
+			break;
+
 		case SPEED_56000:
 			speed = AD_LINK_SPEED_56000MBPS;
 			break;
@@ -707,9 +725,15 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator)
 		case AD_LINK_SPEED_2500MBPS:
 			bandwidth = nports * 2500;
 			break;
+		case AD_LINK_SPEED_5000MBPS:
+			bandwidth = nports * 5000;
+			break;
 		case AD_LINK_SPEED_10000MBPS:
 			bandwidth = nports * 10000;
 			break;
+		case AD_LINK_SPEED_14000MBPS:
+			bandwidth = nports * 14000;
+			break;
 		case AD_LINK_SPEED_20000MBPS:
 			bandwidth = nports * 20000;
 			break;
@@ -719,6 +743,9 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator)
 		case AD_LINK_SPEED_40000MBPS:
 			bandwidth = nports * 40000;
 			break;
+		case AD_LINK_SPEED_50000MBPS:
+			bandwidth = nports * 50000;
+			break;
 		case AD_LINK_SPEED_56000MBPS:
 			bandwidth = nports * 56000;
 			break;
@@ -2577,7 +2604,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
 		return -1;
 
 	ad_info->aggregator_id = aggregator->aggregator_identifier;
-	ad_info->ports = aggregator->num_of_ports;
+	ad_info->ports = __agg_active_ports(aggregator);
 	ad_info->actor_key = aggregator->actor_oper_aggregator_key;
 	ad_info->partner_key = aggregator->partner_oper_aggregator_key;
 	ether_addr_copy(ad_info->partner_system,
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2be78807fd6e..8ab6bdbe1682 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2612,11 +2612,13 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
 	bond_for_each_slave_rcu(bond, slave, iter) {
 		unsigned long trans_start = dev_trans_start(slave->dev);
 
+		slave->new_link = BOND_LINK_NOCHANGE;
+
 		if (slave->link != BOND_LINK_UP) {
 			if (bond_time_in_interval(bond, trans_start, 1) &&
 			    bond_time_in_interval(bond, slave->last_rx, 1)) {
 
-				slave->link  = BOND_LINK_UP;
+				slave->new_link = BOND_LINK_UP;
 				slave_state_changed = 1;
 
 				/* primary_slave has no meaning in round-robin
@@ -2643,7 +2645,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
 			if (!bond_time_in_interval(bond, trans_start, 2) ||
 			    !bond_time_in_interval(bond, slave->last_rx, 2)) {
 
-				slave->link  = BOND_LINK_DOWN;
+				slave->new_link = BOND_LINK_DOWN;
 				slave_state_changed = 1;
 
 				if (slave->link_failure_count < UINT_MAX)
@@ -2674,6 +2676,11 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
 		if (!rtnl_trylock())
 			goto re_arm;
 
+		bond_for_each_slave(bond, slave, iter) {
+			if (slave->new_link != BOND_LINK_NOCHANGE)
+				slave->link = slave->new_link;
+		}
+
 		if (slave_state_changed) {
 			bond_slave_state_change(bond);
 			if (BOND_MODE(bond) == BOND_MODE_XOR)
@@ -4185,7 +4192,6 @@ static void bond_destructor(struct net_device *bond_dev)
 	struct bonding *bond = netdev_priv(bond_dev);
 	if (bond->wq)
 		destroy_workqueue(bond->wq);
-	free_netdev(bond_dev);
 }
 
 void bond_setup(struct net_device *bond_dev)
@@ -4205,7 +4211,8 @@ void bond_setup(struct net_device *bond_dev)
 	bond_dev->netdev_ops = &bond_netdev_ops;
 	bond_dev->ethtool_ops = &bond_ethtool_ops;
 
-	bond_dev->destructor = bond_destructor;
+	bond_dev->needs_free_netdev = true;
+	bond_dev->priv_destructor = bond_destructor;
 
 	SET_NETDEV_DEVTYPE(bond_dev, &bond_type);
 
@@ -4271,10 +4278,10 @@ static int bond_check_params(struct bond_params *params)
 	int arp_validate_value, fail_over_mac_value, primary_reselect_value, i;
 	struct bond_opt_value newval;
 	const struct bond_opt_value *valptr;
-	int arp_all_targets_value;
+	int arp_all_targets_value = 0;
 	u16 ad_actor_sys_prio = 0;
 	u16 ad_user_port_key = 0;
-	__be32 arp_target[BOND_MAX_ARP_TARGETS];
+	__be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0 };
 	int arp_ip_count;
 	int bond_mode	= BOND_MODE_ROUNDROBIN;
 	int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
@@ -4501,7 +4508,6 @@ static int bond_check_params(struct bond_params *params)
 		arp_validate_value = 0;
 	}
 
-	arp_all_targets_value = 0;
 	if (arp_all_targets) {
 		bond_opt_initstr(&newval, arp_all_targets);
 		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS),
@@ -4730,7 +4736,7 @@ int bond_create(struct net *net, const char *name)
 
 	rtnl_unlock();
 	if (res < 0)
-		bond_destructor(bond_dev);
+		free_netdev(bond_dev);
 	return res;
 }
 
diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c
index ddabce759456..71a7c3b44fdd 100644
--- a/drivers/net/caif/caif_hsi.c
+++ b/drivers/net/caif/caif_hsi.c
@@ -1121,7 +1121,7 @@ static void cfhsi_setup(struct net_device *dev)
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
 	dev->mtu = CFHSI_MAX_CAIF_FRAME_SZ;
 	dev->priv_flags |= IFF_NO_QUEUE;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->netdev_ops = &cfhsi_netdevops;
 	for (i = 0; i < CFHSI_PRIO_LAST; ++i)
 		skb_queue_head_init(&cfhsi->qhead[i]);
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index c2dea4916e5d..76e1d3545105 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -428,7 +428,7 @@ static void caifdev_setup(struct net_device *dev)
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
 	dev->mtu = CAIF_MAX_MTU;
 	dev->priv_flags |= IFF_NO_QUEUE;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	skb_queue_head_init(&serdev->head);
 	serdev->common.link_select = CAIF_LINK_LOW_LATENCY;
 	serdev->common.use_frag = true;
diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c
index 3a529fbe539f..fc21afe852b9 100644
--- a/drivers/net/caif/caif_spi.c
+++ b/drivers/net/caif/caif_spi.c
@@ -712,7 +712,7 @@ static void cfspi_setup(struct net_device *dev)
 	dev->flags = IFF_NOARP | IFF_POINTOPOINT;
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->mtu = SPI_MAX_PAYLOAD_SIZE;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	skb_queue_head_init(&cfspi->qhead);
 	skb_queue_head_init(&cfspi->chead);
 	cfspi->cfdev.link_select = CAIF_LINK_HIGH_BANDW;
diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
index bc0eb47eccee..1794ea0420b7 100644
--- a/drivers/net/caif/caif_virtio.c
+++ b/drivers/net/caif/caif_virtio.c
@@ -617,7 +617,7 @@ static void cfv_netdev_setup(struct net_device *netdev)
 	netdev->tx_queue_len = 100;
 	netdev->flags = IFF_POINTOPOINT | IFF_NOARP;
 	netdev->mtu = CFV_DEF_MTU_SIZE;
-	netdev->destructor = free_netdev;
+	netdev->needs_free_netdev = true;
 }
 
 /* Create debugfs counters for the device */
@@ -679,8 +679,7 @@ static int cfv_probe(struct virtio_device *vdev)
 		goto err;
 
 	/* Get the TX virtio ring. This is a "guest side vring". */
-	err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names,
-			NULL);
+	err = virtio_find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names, NULL);
 	if (err)
 		goto err;
 
diff --git a/drivers/net/can/cc770/cc770_isa.c b/drivers/net/can/cc770/cc770_isa.c
index e0d15711e9ac..3a30fd3b4498 100644
--- a/drivers/net/can/cc770/cc770_isa.c
+++ b/drivers/net/can/cc770/cc770_isa.c
@@ -82,16 +82,16 @@ static u8 cor[MAXDEV] = {[0 ... (MAXDEV - 1)] = 0xff};
 static u8 bcr[MAXDEV] = {[0 ... (MAXDEV - 1)] = 0xff};
 static int indirect[MAXDEV] = {[0 ... (MAXDEV - 1)] = -1};
 
-module_param_array(port, ulong, NULL, S_IRUGO);
+module_param_hw_array(port, ulong, ioport, NULL, S_IRUGO);
 MODULE_PARM_DESC(port, "I/O port number");
 
-module_param_array(mem, ulong, NULL, S_IRUGO);
+module_param_hw_array(mem, ulong, iomem, NULL, S_IRUGO);
 MODULE_PARM_DESC(mem, "I/O memory address");
 
-module_param_array(indirect, int, NULL, S_IRUGO);
+module_param_hw_array(indirect, int, ioport, NULL, S_IRUGO);
 MODULE_PARM_DESC(indirect, "Indirect access via address and data port");
 
-module_param_array(irq, int, NULL, S_IRUGO);
+module_param_hw_array(irq, int, irq, NULL, S_IRUGO);
 MODULE_PARM_DESC(irq, "IRQ number");
 
 module_param_array(clk, int, NULL, S_IRUGO);
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 611d16a7061d..ae4ed03dc642 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -391,6 +391,9 @@ void can_change_state(struct net_device *dev, struct can_frame *cf,
 	can_update_state_error_stats(dev, new_state);
 	priv->state = new_state;
 
+	if (!cf)
+		return;
+
 	if (unlikely(new_state == CAN_STATE_BUS_OFF)) {
 		cf->can_id |= CAN_ERR_BUSOFF;
 		return;
diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c
index 0d57be5ea97b..85268be0c913 100644
--- a/drivers/net/can/peak_canfd/peak_canfd.c
+++ b/drivers/net/can/peak_canfd/peak_canfd.c
@@ -489,7 +489,7 @@ int peak_canfd_handle_msgs_list(struct peak_canfd_priv *priv,
 				struct pucan_rx_msg *msg_list, int msg_count)
 {
 	void *msg_ptr = msg_list;
-	int i, msg_size;
+	int i, msg_size = 0;
 
 	for (i = 0; i < msg_count; i++) {
 		msg_size = peak_canfd_handle_msg(priv, msg_ptr);
diff --git a/drivers/net/can/sja1000/sja1000_isa.c b/drivers/net/can/sja1000/sja1000_isa.c
index e97e6d35b300..a89c1e92554d 100644
--- a/drivers/net/can/sja1000/sja1000_isa.c
+++ b/drivers/net/can/sja1000/sja1000_isa.c
@@ -48,16 +48,16 @@ static unsigned char ocr[MAXDEV] = {[0 ... (MAXDEV - 1)] = 0xff};
 static int indirect[MAXDEV] = {[0 ... (MAXDEV - 1)] = -1};
 static spinlock_t indirect_lock[MAXDEV];  /* lock for indirect access mode */
 
-module_param_array(port, ulong, NULL, S_IRUGO);
+module_param_hw_array(port, ulong, ioport, NULL, S_IRUGO);
 MODULE_PARM_DESC(port, "I/O port number");
 
-module_param_array(mem, ulong, NULL, S_IRUGO);
+module_param_hw_array(mem, ulong, iomem, NULL, S_IRUGO);
 MODULE_PARM_DESC(mem, "I/O memory address");
 
-module_param_array(indirect, int, NULL, S_IRUGO);
+module_param_hw_array(indirect, int, ioport, NULL, S_IRUGO);
 MODULE_PARM_DESC(indirect, "Indirect access via address and data port");
 
-module_param_array(irq, int, NULL, S_IRUGO);
+module_param_hw_array(irq, int, irq, NULL, S_IRUGO);
 MODULE_PARM_DESC(irq, "IRQ number");
 
 module_param_array(clk, int, NULL, S_IRUGO);
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index eb7173713bbc..6a6e896e52fa 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -417,7 +417,7 @@ static int slc_open(struct net_device *dev)
 static void slc_free_netdev(struct net_device *dev)
 {
 	int i = dev->base_addr;
-	free_netdev(dev);
+
 	slcan_devs[i] = NULL;
 }
 
@@ -436,7 +436,8 @@ static const struct net_device_ops slc_netdev_ops = {
 static void slc_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &slc_netdev_ops;
-	dev->destructor		= slc_free_netdev;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= slc_free_netdev;
 
 	dev->hard_header_len	= 0;
 	dev->addr_len		= 0;
@@ -761,8 +762,6 @@ static void __exit slcan_exit(void)
 		if (sl->tty) {
 			printk(KERN_ERR "%s: tty discipline still running\n",
 			       dev->name);
-			/* Intentionally leak the control block. */
-			dev->destructor = NULL;
 		}
 
 		unregister_netdev(dev);
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index eecee7f8dfb7..afcc1312dbaf 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -265,6 +265,8 @@ static int gs_cmd_reset(struct gs_usb *gsusb, struct gs_can *gsdev)
 			     sizeof(*dm),
 			     1000);
 
+	kfree(dm);
+
 	return rc;
 }
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index 57913dbbae0a..1ca76e03e965 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -908,8 +908,6 @@ static int peak_usb_probe(struct usb_interface *intf,
 	const struct peak_usb_adapter *peak_usb_adapter = NULL;
 	int i, err = -ENOMEM;
 
-	usb_dev = interface_to_usbdev(intf);
-
 	/* get corresponding PCAN-USB adapter */
 	for (i = 0; i < ARRAY_SIZE(peak_usb_adapters_list); i++)
 		if (peak_usb_adapters_list[i]->device_id == usb_id_product) {
@@ -920,7 +918,7 @@ static int peak_usb_probe(struct usb_interface *intf,
 	if (!peak_usb_adapter) {
 		/* should never come except device_id bad usage in this file */
 		pr_err("%s: didn't find device id. 0x%x in devices list\n",
-			PCAN_USB_DRIVER_NAME, usb_dev->descriptor.idProduct);
+			PCAN_USB_DRIVER_NAME, usb_id_product);
 		return -ENODEV;
 	}
 
diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index facca33d53e9..a8cb33264ff1 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c
@@ -152,7 +152,7 @@ static const struct net_device_ops vcan_netdev_ops = {
 static void vcan_setup(struct net_device *dev)
 {
 	dev->type		= ARPHRD_CAN;
-	dev->mtu		= CAN_MTU;
+	dev->mtu		= CANFD_MTU;
 	dev->hard_header_len	= 0;
 	dev->addr_len		= 0;
 	dev->tx_queue_len	= 0;
@@ -163,7 +163,7 @@ static void vcan_setup(struct net_device *dev)
 		dev->flags |= IFF_ECHO;
 
 	dev->netdev_ops		= &vcan_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 static struct rtnl_link_ops vcan_link_ops __read_mostly = {
diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
index 7fbb24795681..cfe889e8f172 100644
--- a/drivers/net/can/vxcan.c
+++ b/drivers/net/can/vxcan.c
@@ -150,13 +150,13 @@ static const struct net_device_ops vxcan_netdev_ops = {
 static void vxcan_setup(struct net_device *dev)
 {
 	dev->type		= ARPHRD_CAN;
-	dev->mtu		= CAN_MTU;
+	dev->mtu		= CANFD_MTU;
 	dev->hard_header_len	= 0;
 	dev->addr_len		= 0;
 	dev->tx_queue_len	= 0;
 	dev->flags		= (IFF_NOARP|IFF_ECHO);
 	dev->netdev_ops		= &vxcan_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 /* forward declaration for rtnl_create_link() */
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 19581d783d8e..d034d8cd7d22 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -849,6 +849,9 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
 		mv88e6xxx_g1_stats_read(chip, reg, &low);
 		if (s->sizeof_stat == 8)
 			mv88e6xxx_g1_stats_read(chip, reg + 1, &high);
+		break;
+	default:
+		return UINT64_MAX;
 	}
 	value = (((u64)high) << 16) | low;
 	return value;
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index 96046bb12ca1..14c0be98e0a4 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -114,13 +114,13 @@ static inline int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
 	return -EOPNOTSUPP;
 }
 
-int mv88e6xxx_g2_pvt_write(struct mv88e6xxx_chip *chip, int src_dev,
-			   int src_port, u16 data)
+static inline int mv88e6xxx_g2_pvt_write(struct mv88e6xxx_chip *chip,
+					 int src_dev, int src_port, u16 data)
 {
 	return -EOPNOTSUPP;
 }
 
-int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip)
+static inline int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 149244aac20a..9905b52fe293 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -328,7 +328,6 @@ static void dummy_free_netdev(struct net_device *dev)
 	struct dummy_priv *priv = netdev_priv(dev);
 
 	kfree(priv->vfinfo);
-	free_netdev(dev);
 }
 
 static void dummy_setup(struct net_device *dev)
@@ -338,7 +337,8 @@ static void dummy_setup(struct net_device *dev)
 	/* Initialize the device structure. */
 	dev->netdev_ops = &dummy_netdev_ops;
 	dev->ethtool_ops = &dummy_ethtool_ops;
-	dev->destructor = dummy_free_netdev;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = dummy_free_netdev;
 
 	/* Fill in device structure with ethernet-generic values. */
 	dev->flags |= IFF_NOARP;
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index c7f9f2c77da7..db8592d412ab 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -1371,7 +1371,7 @@ el3_resume(struct device *pdev)
 #endif /* CONFIG_PM */
 
 module_param(debug,int, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param(max_interrupt_work, int, 0);
 MODULE_PARM_DESC(debug, "debug level (0-6)");
 MODULE_PARM_DESC(irq, "IRQ number(s) (assigned)");
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 40196f41768a..e41245a54f8b 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -813,8 +813,8 @@ module_param(global_enable_wol, int, 0);
 module_param_array(enable_wol, int, NULL, 0);
 module_param(rx_copybreak, int, 0);
 module_param(max_interrupt_work, int, 0);
-module_param(compaq_ioaddr, int, 0);
-module_param(compaq_irq, int, 0);
+module_param_hw(compaq_ioaddr, int, ioport, 0);
+module_param_hw(compaq_irq, int, irq, 0);
 module_param(compaq_device_id, int, 0);
 module_param(watchdog, int, 0);
 module_param(global_use_mmio, int, 0);
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index b0a3b85fc6f8..db02bc2fb4b2 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -748,13 +748,13 @@ static int ax_init_dev(struct net_device *dev)
 
 	ret = ax_mii_init(dev);
 	if (ret)
-		goto out_irq;
+		goto err_out;
 
 	ax_NS8390_init(dev, 0);
 
 	ret = register_netdev(dev);
 	if (ret)
-		goto out_irq;
+		goto err_out;
 
 	netdev_info(dev, "%dbit, irq %d, %lx, MAC: %pM\n",
 		    ei_local->word16 ? 16 : 8, dev->irq, dev->base_addr,
@@ -762,9 +762,6 @@ static int ax_init_dev(struct net_device *dev)
 
 	return 0;
 
- out_irq:
-	/* cleanup irq */
-	free_irq(dev->irq, dev);
  err_out:
 	return ret;
 }
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c
index c063b410a163..66f47987e2a2 100644
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -74,8 +74,8 @@ static int bad[MAX_NE_CARDS];
 static u32 ne_msg_enable;
 
 #ifdef MODULE
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_array(bad, int, NULL, 0);
 module_param_named(msg_enable, ne_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
 MODULE_PARM_DESC(io, "I/O base address(es),required");
diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c
index 364b6514f65f..4e02f6a23575 100644
--- a/drivers/net/ethernet/8390/smc-ultra.c
+++ b/drivers/net/ethernet/8390/smc-ultra.c
@@ -561,8 +561,8 @@ static struct net_device *dev_ultra[MAX_ULTRA_CARDS];
 static int io[MAX_ULTRA_CARDS];
 static int irq[MAX_ULTRA_CARDS];
 
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_named(msg_enable, ultra_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
 MODULE_PARM_DESC(io, "I/O base address(es)");
 MODULE_PARM_DESC(irq, "IRQ number(s) (assigned)");
diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c
index ad019cbc698f..6efa2722f850 100644
--- a/drivers/net/ethernet/8390/wd.c
+++ b/drivers/net/ethernet/8390/wd.c
@@ -503,10 +503,10 @@ static int irq[MAX_WD_CARDS];
 static int mem[MAX_WD_CARDS];
 static int mem_end[MAX_WD_CARDS];	/* for non std. mem size */
 
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
-module_param_array(mem, int, NULL, 0);
-module_param_array(mem_end, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
+module_param_hw_array(mem, int, iomem, NULL, 0);
+module_param_hw_array(mem_end, int, iomem, NULL, 0);
 module_param_named(msg_enable, wd_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
 MODULE_PARM_DESC(io, "I/O base address(es)");
 MODULE_PARM_DESC(irq, "IRQ number(s) (ignored for PureData boards)");
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 08d11cede9c9..f5b237e0bd60 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -61,6 +61,8 @@
 
 #define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF
 
+#define ENA_REGS_ADMIN_INTR_MASK 1
+
 /*****************************************************************************/
 /*****************************************************************************/
 /*****************************************************************************/
@@ -232,11 +234,9 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu
 	tail_masked = admin_queue->sq.tail & queue_size_mask;
 
 	/* In case of queue FULL */
-	cnt = admin_queue->sq.tail - admin_queue->sq.head;
+	cnt = atomic_read(&admin_queue->outstanding_cmds);
 	if (cnt >= admin_queue->q_depth) {
-		pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n",
-			 admin_queue->sq.tail, admin_queue->sq.head,
-			 admin_queue->q_depth);
+		pr_debug("admin queue is full.\n");
 		admin_queue->stats.out_of_space++;
 		return ERR_PTR(-ENOSPC);
 	}
@@ -508,15 +508,20 @@ static int ena_com_comp_status_to_errno(u8 comp_status)
 static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx,
 						     struct ena_com_admin_queue *admin_queue)
 {
-	unsigned long flags;
-	u32 start_time;
+	unsigned long flags, timeout;
 	int ret;
 
-	start_time = ((u32)jiffies_to_usecs(jiffies));
+	timeout = jiffies + ADMIN_CMD_TIMEOUT_US;
+
+	while (1) {
+		spin_lock_irqsave(&admin_queue->q_lock, flags);
+		ena_com_handle_admin_completion(admin_queue);
+		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+		if (comp_ctx->status != ENA_CMD_SUBMITTED)
+			break;
 
-	while (comp_ctx->status == ENA_CMD_SUBMITTED) {
-		if ((((u32)jiffies_to_usecs(jiffies)) - start_time) >
-		    ADMIN_CMD_TIMEOUT_US) {
+		if (time_is_before_jiffies(timeout)) {
 			pr_err("Wait for completion (polling) timeout\n");
 			/* ENA didn't have any completion */
 			spin_lock_irqsave(&admin_queue->q_lock, flags);
@@ -528,10 +533,6 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c
 			goto err;
 		}
 
-		spin_lock_irqsave(&admin_queue->q_lock, flags);
-		ena_com_handle_admin_completion(admin_queue);
-		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
-
 		msleep(100);
 	}
 
@@ -1455,6 +1456,12 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev)
 
 void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling)
 {
+	u32 mask_value = 0;
+
+	if (polling)
+		mask_value = ENA_REGS_ADMIN_INTR_MASK;
+
+	writel(mask_value, ena_dev->reg_bar + ENA_REGS_INTR_MASK_OFF);
 	ena_dev->admin_queue.polling = polling;
 }
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 67b2338f8fb3..3ee55e2fd694 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -80,7 +80,6 @@ static const struct ena_stats ena_stats_tx_strings[] = {
 	ENA_STAT_TX_ENTRY(tx_poll),
 	ENA_STAT_TX_ENTRY(doorbells),
 	ENA_STAT_TX_ENTRY(prepare_ctx_err),
-	ENA_STAT_TX_ENTRY(missing_tx_comp),
 	ENA_STAT_TX_ENTRY(bad_req_id),
 };
 
@@ -94,6 +93,7 @@ static const struct ena_stats ena_stats_rx_strings[] = {
 	ENA_STAT_RX_ENTRY(dma_mapping_err),
 	ENA_STAT_RX_ENTRY(bad_desc_num),
 	ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
+	ENA_STAT_RX_ENTRY(empty_rx_ring),
 };
 
 static const struct ena_stats ena_stats_ena_com_strings[] = {
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 7c1214d78855..4f16ed38bcf3 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -190,6 +190,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter)
 		rxr->sgl_size = adapter->max_rx_sgl_size;
 		rxr->smoothed_interval =
 			ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
+		rxr->empty_rx_queue = 0;
 	}
 }
 
@@ -1078,6 +1079,26 @@ inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
 	rx_ring->per_napi_bytes = 0;
 }
 
+static inline void ena_unmask_interrupt(struct ena_ring *tx_ring,
+					struct ena_ring *rx_ring)
+{
+	struct ena_eth_io_intr_reg intr_reg;
+
+	/* Update intr register: rx intr delay,
+	 * tx intr delay and interrupt unmask
+	 */
+	ena_com_update_intr_reg(&intr_reg,
+				rx_ring->smoothed_interval,
+				tx_ring->smoothed_interval,
+				true);
+
+	/* It is a shared MSI-X.
+	 * Tx and Rx CQ have pointer to it.
+	 * So we use one of them to reach the intr reg
+	 */
+	ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+}
+
 static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring,
 					     struct ena_ring *rx_ring)
 {
@@ -1108,7 +1129,6 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
 {
 	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
 	struct ena_ring *tx_ring, *rx_ring;
-	struct ena_eth_io_intr_reg intr_reg;
 
 	u32 tx_work_done;
 	u32 rx_work_done;
@@ -1149,22 +1169,9 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
 			if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
 				ena_adjust_intr_moderation(rx_ring, tx_ring);
 
-			/* Update intr register: rx intr delay,
-			 * tx intr delay and interrupt unmask
-			 */
-			ena_com_update_intr_reg(&intr_reg,
-						rx_ring->smoothed_interval,
-						tx_ring->smoothed_interval,
-						true);
-
-			/* It is a shared MSI-X.
-			 * Tx and Rx CQ have pointer to it.
-			 * So we use one of them to reach the intr reg
-			 */
-			ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+			ena_unmask_interrupt(tx_ring, rx_ring);
 		}
 
-
 		ena_update_ring_numa_node(tx_ring, rx_ring);
 
 		ret = rx_work_done;
@@ -1485,6 +1492,11 @@ static int ena_up_complete(struct ena_adapter *adapter)
 
 	ena_napi_enable_all(adapter);
 
+	/* Enable completion queues interrupt */
+	for (i = 0; i < adapter->num_queues; i++)
+		ena_unmask_interrupt(&adapter->tx_ring[i],
+				     &adapter->rx_ring[i]);
+
 	/* schedule napi in case we had pending packets
 	 * from the last time we disable napi
 	 */
@@ -1532,6 +1544,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
 			  "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
 			  qid, rc);
 		ena_com_destroy_io_queue(ena_dev, ena_qid);
+		return rc;
 	}
 
 	ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
@@ -1596,6 +1609,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
 			  "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
 			  qid, rc);
 		ena_com_destroy_io_queue(ena_dev, ena_qid);
+		return rc;
 	}
 
 	ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
@@ -1981,6 +1995,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tx_info->tx_descs = nb_hw_desc;
 	tx_info->last_jiffies = jiffies;
+	tx_info->print_once = 0;
 
 	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
 		tx_ring->ring_size);
@@ -2550,13 +2565,44 @@ err:
 		"Reset attempt failed. Can not reset the device\n");
 }
 
-static void check_for_missing_tx_completions(struct ena_adapter *adapter)
+static int check_missing_comp_in_queue(struct ena_adapter *adapter,
+				       struct ena_ring *tx_ring)
 {
 	struct ena_tx_buffer *tx_buf;
 	unsigned long last_jiffies;
+	u32 missed_tx = 0;
+	int i;
+
+	for (i = 0; i < tx_ring->ring_size; i++) {
+		tx_buf = &tx_ring->tx_buffer_info[i];
+		last_jiffies = tx_buf->last_jiffies;
+		if (unlikely(last_jiffies &&
+			     time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
+			if (!tx_buf->print_once)
+				netif_notice(adapter, tx_err, adapter->netdev,
+					     "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
+					     tx_ring->qid, i);
+
+			tx_buf->print_once = 1;
+			missed_tx++;
+
+			if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
+				netif_err(adapter, tx_err, adapter->netdev,
+					  "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
+					  missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
+				set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+				return -EIO;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void check_for_missing_tx_completions(struct ena_adapter *adapter)
+{
 	struct ena_ring *tx_ring;
-	int i, j, budget;
-	u32 missed_tx;
+	int i, budget, rc;
 
 	/* Make sure the driver doesn't turn the device in other process */
 	smp_rmb();
@@ -2572,31 +2618,9 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
 	for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
 		tx_ring = &adapter->tx_ring[i];
 
-		for (j = 0; j < tx_ring->ring_size; j++) {
-			tx_buf = &tx_ring->tx_buffer_info[j];
-			last_jiffies = tx_buf->last_jiffies;
-			if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
-				netif_notice(adapter, tx_err, adapter->netdev,
-					     "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
-					     tx_ring->qid, j);
-
-				u64_stats_update_begin(&tx_ring->syncp);
-				missed_tx = tx_ring->tx_stats.missing_tx_comp++;
-				u64_stats_update_end(&tx_ring->syncp);
-
-				/* Clear last jiffies so the lost buffer won't
-				 * be counted twice.
-				 */
-				tx_buf->last_jiffies = 0;
-
-				if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
-					netif_err(adapter, tx_err, adapter->netdev,
-						  "The number of lost tx completion is above the threshold (%d > %d). Reset the device\n",
-						  missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
-					set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
-				}
-			}
-		}
+		rc = check_missing_comp_in_queue(adapter, tx_ring);
+		if (unlikely(rc))
+			return;
 
 		budget--;
 		if (!budget)
@@ -2606,6 +2630,58 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
 	adapter->last_monitored_tx_qid = i % adapter->num_queues;
 }
 
+/* trigger napi schedule after 2 consecutive detections */
+#define EMPTY_RX_REFILL 2
+/* For the rare case where the device runs out of Rx descriptors and the
+ * napi handler failed to refill new Rx descriptors (due to a lack of memory
+ * for example).
+ * This case will lead to a deadlock:
+ * The device won't send interrupts since all the new Rx packets will be dropped
+ * The napi handler won't allocate new Rx descriptors so the device will be
+ * able to send new packets.
+ *
+ * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
+ * It is recommended to have at least 512MB, with a minimum of 128MB for
+ * constrained environment).
+ *
+ * When such a situation is detected - Reschedule napi
+ */
+static void check_for_empty_rx_ring(struct ena_adapter *adapter)
+{
+	struct ena_ring *rx_ring;
+	int i, refill_required;
+
+	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+		return;
+
+	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+		return;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		rx_ring = &adapter->rx_ring[i];
+
+		refill_required =
+			ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
+		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
+			rx_ring->empty_rx_queue++;
+
+			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
+				u64_stats_update_begin(&rx_ring->syncp);
+				rx_ring->rx_stats.empty_rx_ring++;
+				u64_stats_update_end(&rx_ring->syncp);
+
+				netif_err(adapter, drv, adapter->netdev,
+					  "trigger refill for ring %d\n", i);
+
+				napi_schedule(rx_ring->napi);
+				rx_ring->empty_rx_queue = 0;
+			}
+		} else {
+			rx_ring->empty_rx_queue = 0;
+		}
+	}
+}
+
 /* Check for keep alive expiration */
 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
 {
@@ -2660,6 +2736,8 @@ static void ena_timer_service(unsigned long data)
 
 	check_for_missing_tx_completions(adapter);
 
+	check_for_empty_rx_ring(adapter);
+
 	if (debug_area)
 		ena_dump_stats_to_buf(adapter, debug_area);
 
@@ -2840,6 +2918,11 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
 {
 	int release_bars;
 
+	if (ena_dev->mem_bar)
+		devm_iounmap(&pdev->dev, ena_dev->mem_bar);
+
+	devm_iounmap(&pdev->dev, ena_dev->reg_bar);
+
 	release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
 	pci_release_selected_regions(pdev, release_bars);
 }
@@ -2927,8 +3010,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_free_ena_dev;
 	}
 
-	ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR),
-				   pci_resource_len(pdev, ENA_REG_BAR));
+	ena_dev->reg_bar = devm_ioremap(&pdev->dev,
+					pci_resource_start(pdev, ENA_REG_BAR),
+					pci_resource_len(pdev, ENA_REG_BAR));
 	if (!ena_dev->reg_bar) {
 		dev_err(&pdev->dev, "failed to remap regs bar\n");
 		rc = -EFAULT;
@@ -2948,8 +3032,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
 
 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
-		ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR),
-					      pci_resource_len(pdev, ENA_MEM_BAR));
+		ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
+						   pci_resource_start(pdev, ENA_MEM_BAR),
+						   pci_resource_len(pdev, ENA_MEM_BAR));
 		if (!ena_dev->mem_bar) {
 			rc = -EFAULT;
 			goto err_device_destroy;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 0e22bce6239d..a4d3d5e21068 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -45,7 +45,7 @@
 
 #define DRV_MODULE_VER_MAJOR	1
 #define DRV_MODULE_VER_MINOR	1
-#define DRV_MODULE_VER_SUBMINOR 2
+#define DRV_MODULE_VER_SUBMINOR 7
 
 #define DRV_MODULE_NAME		"ena"
 #ifndef DRV_MODULE_VERSION
@@ -146,7 +146,18 @@ struct ena_tx_buffer {
 	u32 tx_descs;
 	/* num of buffers used by this skb */
 	u32 num_of_bufs;
-	/* Save the last jiffies to detect missing tx packets */
+
+	/* Used for detect missing tx packets to limit the number of prints */
+	u32 print_once;
+	/* Save the last jiffies to detect missing tx packets
+	 *
+	 * sets to non zero value on ena_start_xmit and set to zero on
+	 * napi and timer_Service_routine.
+	 *
+	 * while this value is not protected by lock,
+	 * a given packet is not expected to be handled by ena_start_xmit
+	 * and by napi/timer_service at the same time.
+	 */
 	unsigned long last_jiffies;
 	struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
 } ____cacheline_aligned;
@@ -170,7 +181,6 @@ struct ena_stats_tx {
 	u64 napi_comp;
 	u64 tx_poll;
 	u64 doorbells;
-	u64 missing_tx_comp;
 	u64 bad_req_id;
 };
 
@@ -184,6 +194,7 @@ struct ena_stats_rx {
 	u64 dma_mapping_err;
 	u64 bad_desc_num;
 	u64 rx_copybreak_pkt;
+	u64 empty_rx_ring;
 };
 
 struct ena_ring {
@@ -231,6 +242,7 @@ struct ena_ring {
 		struct ena_stats_tx tx_stats;
 		struct ena_stats_rx rx_stats;
 	};
+	int empty_rx_queue;
 } ____cacheline_aligned;
 
 struct ena_stats_dev {
diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c
index 61a641f23149..12a6a93d221b 100644
--- a/drivers/net/ethernet/amd/lance.c
+++ b/drivers/net/ethernet/amd/lance.c
@@ -318,9 +318,9 @@ static int io[MAX_CARDS];
 static int dma[MAX_CARDS];
 static int irq[MAX_CARDS];
 
-module_param_array(io, int, NULL, 0);
-module_param_array(dma, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(dma, int, dma, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param(lance_debug, int, 0);
 MODULE_PARM_DESC(io, "LANCE/PCnet I/O base address(es),required");
 MODULE_PARM_DESC(dma, "LANCE/PCnet ISA DMA channel (ignored for some devices)");
diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c
index 5985bf220a8d..e248d1ab3e47 100644
--- a/drivers/net/ethernet/amd/ni65.c
+++ b/drivers/net/ethernet/amd/ni65.c
@@ -1227,9 +1227,9 @@ static void set_multicast_list(struct net_device *dev)
 #ifdef MODULE
 static struct net_device *dev_ni65;
 
-module_param(irq, int, 0);
-module_param(io, int, 0);
-module_param(dma, int, 0);
+module_param_hw(irq, int, irq, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(dma, int, dma, 0);
 MODULE_PARM_DESC(irq, "ni6510 IRQ number (ignored for some cards)");
 MODULE_PARM_DESC(io, "ni6510 I/O base address");
 MODULE_PARM_DESC(dma, "ni6510 ISA DMA channel (ignored for some cards)");
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
index b3bc87fe3764..0a98c369df20 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
@@ -324,7 +324,7 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata,
 			      struct xgbe_ring *ring,
 			      struct xgbe_ring_data *rdata)
 {
-	int order, ret;
+	int ret;
 
 	if (!ring->rx_hdr_pa.pages) {
 		ret = xgbe_alloc_pages(pdata, &ring->rx_hdr_pa, GFP_ATOMIC, 0);
@@ -333,9 +333,8 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata,
 	}
 
 	if (!ring->rx_buf_pa.pages) {
-		order = max_t(int, PAGE_ALLOC_COSTLY_ORDER - 1, 0);
 		ret = xgbe_alloc_pages(pdata, &ring->rx_buf_pa, GFP_ATOMIC,
-				       order);
+				       PAGE_ALLOC_COSTLY_ORDER);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 4ee15ff06a44..faeb4935ef3e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -200,29 +200,18 @@ err_exit:
 static int hw_atl_a0_hw_offload_set(struct aq_hw_s *self,
 				    struct aq_nic_cfg_s *aq_nic_cfg)
 {
-	int err = 0;
-
 	/* TX checksums offloads*/
 	tpo_ipv4header_crc_offload_en_set(self, 1);
 	tpo_tcp_udp_crc_offload_en_set(self, 1);
-	if (err < 0)
-		goto err_exit;
 
 	/* RX checksums offloads*/
 	rpo_ipv4header_crc_offload_en_set(self, 1);
 	rpo_tcp_udp_crc_offload_en_set(self, 1);
-	if (err < 0)
-		goto err_exit;
 
 	/* LSO offloads*/
 	tdm_large_send_offload_en_set(self, 0xFFFFFFFFU);
-	if (err < 0)
-		goto err_exit;
-
-	err = aq_hw_err_from_flags(self);
 
-err_exit:
-	return err;
+	return aq_hw_err_from_flags(self);
 }
 
 static int hw_atl_a0_hw_init_tx_path(struct aq_hw_s *self)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 42150708191d..1bceb7358e5c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -200,25 +200,18 @@ err_exit:
 static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 				    struct aq_nic_cfg_s *aq_nic_cfg)
 {
-	int err = 0;
 	unsigned int i;
 
 	/* TX checksums offloads*/
 	tpo_ipv4header_crc_offload_en_set(self, 1);
 	tpo_tcp_udp_crc_offload_en_set(self, 1);
-	if (err < 0)
-		goto err_exit;
 
 	/* RX checksums offloads*/
 	rpo_ipv4header_crc_offload_en_set(self, 1);
 	rpo_tcp_udp_crc_offload_en_set(self, 1);
-	if (err < 0)
-		goto err_exit;
 
 	/* LSO offloads*/
 	tdm_large_send_offload_en_set(self, 0xFFFFFFFFU);
-	if (err < 0)
-		goto err_exit;
 
 /* LRO offloads */
 	{
@@ -245,10 +238,7 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 
 		rpo_lro_en_set(self, aq_nic_cfg->is_lro ? 0xFFFFFFFFU : 0U);
 	}
-	err = aq_hw_err_from_flags(self);
-
-err_exit:
-	return err;
+	return aq_hw_err_from_flags(self);
 }
 
 static int hw_atl_b0_hw_init_tx_path(struct aq_hw_s *self)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index b8e3d88f0879..a66aee51ab5b 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
@@ -193,9 +193,6 @@ int hw_atl_utils_hw_get_regs(struct aq_hw_s *self,
 			     struct aq_hw_caps_s *aq_hw_caps,
 			     u32 *regs_buff);
 
-int hw_atl_utils_hw_get_settings(struct aq_hw_s *self,
-				 struct ethtool_cmd *cmd);
-
 int hw_atl_utils_hw_set_power(struct aq_hw_s *self,
 			      unsigned int power_state);
 
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index 63f2deec2a52..77a1c03255de 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -1353,6 +1353,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) &&
 		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
 		printk(KERN_ERR "atl2: No usable DMA configuration, aborting\n");
+		err = -EIO;
 		goto err_dma;
 	}
 
@@ -1366,10 +1367,11 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * pcibios_set_master to do the needed arch specific settings */
 	pci_set_master(pdev);
 
-	err = -ENOMEM;
 	netdev = alloc_etherdev(sizeof(struct atl2_adapter));
-	if (!netdev)
+	if (!netdev) {
+		err = -ENOMEM;
 		goto err_alloc_etherdev;
+	}
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 
@@ -1408,8 +1410,6 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto err_sw_init;
 
-	err = -EIO;
-
 	netdev->hw_features = NETIF_F_HW_VLAN_CTAG_RX;
 	netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
 
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 099b374c1b17..5274501428e4 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2026,9 +2026,12 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	priv->num_rx_desc_words = params->num_rx_desc_words;
 
 	priv->irq0 = platform_get_irq(pdev, 0);
-	if (!priv->is_lite)
+	if (!priv->is_lite) {
 		priv->irq1 = platform_get_irq(pdev, 1);
-	priv->wol_irq = platform_get_irq(pdev, 2);
+		priv->wol_irq = platform_get_irq(pdev, 2);
+	} else {
+		priv->wol_irq = platform_get_irq(pdev, 1);
+	}
 	if (priv->irq0 <= 0 || (priv->irq1 <= 0 && !priv->is_lite)) {
 		dev_err(&pdev->dev, "invalid interrupts\n");
 		ret = -EINVAL;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index eccb3d1b6abb..f619c4cac51f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1926,7 +1926,7 @@ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
 	}
 
 	/* select a non-FCoE queue */
-	return fallback(dev, skb) % BNX2X_NUM_ETH_QUEUES(bp);
+	return fallback(dev, skb) % (BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos);
 }
 
 void bnx2x_set_num_queues(struct bnx2x *bp)
@@ -3883,15 +3883,26 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		/* when transmitting in a vf, start bd must hold the ethertype
 		 * for fw to enforce it
 		 */
+		u16 vlan_tci = 0;
 #ifndef BNX2X_STOP_ON_ERROR
-		if (IS_VF(bp))
+		if (IS_VF(bp)) {
 #endif
-			tx_start_bd->vlan_or_ethertype =
-				cpu_to_le16(ntohs(eth->h_proto));
+			/* Still need to consider inband vlan for enforced */
+			if (__vlan_get_tag(skb, &vlan_tci)) {
+				tx_start_bd->vlan_or_ethertype =
+					cpu_to_le16(ntohs(eth->h_proto));
+			} else {
+				tx_start_bd->bd_flags.as_bitfield |=
+					(X_ETH_INBAND_VLAN <<
+					 ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
+				tx_start_bd->vlan_or_ethertype =
+					cpu_to_le16(vlan_tci);
+			}
 #ifndef BNX2X_STOP_ON_ERROR
-		else
+		} else {
 			/* used by FW for packet accounting */
 			tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
+		}
 #endif
 	}
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index a851f95c307a..349a46593abf 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12729,7 +12729,7 @@ static int bnx2x_set_mc_list(struct bnx2x *bp)
 	} else {
 		/* If no mc addresses are required, flush the configuration */
 		rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
-		if (rc)
+		if (rc < 0)
 			BNX2X_ERR("Failed to clear multicast configuration %d\n",
 				  rc);
 	}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index bdfd53b46bc5..9ca994d0bab6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -901,6 +901,8 @@ static void bnx2x_vf_flr(struct bnx2x *bp, struct bnx2x_virtf *vf)
 	/* release VF resources */
 	bnx2x_vf_free_resc(bp, vf);
 
+	vf->malicious = false;
+
 	/* re-open the mailbox */
 	bnx2x_vf_enable_mbx(bp, vf->abs_vfid);
 	return;
@@ -1822,9 +1824,11 @@ get_vf:
 		   vf->abs_vfid, qidx);
 		bnx2x_vf_handle_rss_update_eqe(bp, vf);
 	case EVENT_RING_OPCODE_VF_FLR:
-	case EVENT_RING_OPCODE_MALICIOUS_VF:
 		/* Do nothing for now */
 		return 0;
+	case EVENT_RING_OPCODE_MALICIOUS_VF:
+		vf->malicious = true;
+		return 0;
 	}
 
 	return 0;
@@ -1905,6 +1909,13 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
 			continue;
 		}
 
+		if (vf->malicious) {
+			DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+			       "vf %d malicious so no stats for it\n",
+			       vf->abs_vfid);
+			continue;
+		}
+
 		DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
 		       "add addresses for vf %d\n", vf->abs_vfid);
 		for_each_vfq(vf, j) {
@@ -3042,7 +3053,7 @@ void bnx2x_vf_pci_dealloc(struct bnx2x *bp)
 {
 	BNX2X_PCI_FREE(bp->vf2pf_mbox, bp->vf2pf_mbox_mapping,
 		       sizeof(struct bnx2x_vf_mbx_msg));
-	BNX2X_PCI_FREE(bp->vf2pf_mbox, bp->pf2vf_bulletin_mapping,
+	BNX2X_PCI_FREE(bp->pf2vf_bulletin, bp->pf2vf_bulletin_mapping,
 		       sizeof(union pf_vf_bulletin));
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 888d0b6632e8..53466f6cebab 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -141,6 +141,7 @@ struct bnx2x_virtf {
 #define VF_RESET	3	/* VF FLR'd, pending cleanup */
 
 	bool flr_clnup_stage;	/* true during flr cleanup */
+	bool malicious;		/* true if FW indicated so, until FLR */
 
 	/* dma */
 	dma_addr_t fw_stat_map;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b56c54d68d5e..74e8e215524d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1301,10 +1301,11 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 		cp_cons = NEXT_CMP(cp_cons);
 	}
 
-	if (unlikely(agg_bufs > MAX_SKB_FRAGS)) {
+	if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) {
 		bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs);
-		netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
-			    agg_bufs, (int)MAX_SKB_FRAGS);
+		if (agg_bufs > MAX_SKB_FRAGS)
+			netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
+				    agg_bufs, (int)MAX_SKB_FRAGS);
 		return NULL;
 	}
 
@@ -1562,6 +1563,45 @@ next_rx_no_prod:
 	return rc;
 }
 
+/* In netpoll mode, if we are using a combined completion ring, we need to
+ * discard the rx packets and recycle the buffers.
+ */
+static int bnxt_force_rx_discard(struct bnxt *bp, struct bnxt_napi *bnapi,
+				 u32 *raw_cons, u8 *event)
+{
+	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+	u32 tmp_raw_cons = *raw_cons;
+	struct rx_cmp_ext *rxcmp1;
+	struct rx_cmp *rxcmp;
+	u16 cp_cons;
+	u8 cmp_type;
+
+	cp_cons = RING_CMP(tmp_raw_cons);
+	rxcmp = (struct rx_cmp *)
+			&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+	tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons);
+	cp_cons = RING_CMP(tmp_raw_cons);
+	rxcmp1 = (struct rx_cmp_ext *)
+			&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+	if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
+		return -EBUSY;
+
+	cmp_type = RX_CMP_TYPE(rxcmp);
+	if (cmp_type == CMP_TYPE_RX_L2_CMP) {
+		rxcmp1->rx_cmp_cfa_code_errors_v2 |=
+			cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR);
+	} else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
+		struct rx_tpa_end_cmp_ext *tpa_end1;
+
+		tpa_end1 = (struct rx_tpa_end_cmp_ext *)rxcmp1;
+		tpa_end1->rx_tpa_end_cmp_errors_v2 |=
+			cpu_to_le32(RX_TPA_END_CMP_ERRORS);
+	}
+	return bnxt_rx_pkt(bp, bnapi, raw_cons, event);
+}
+
 #define BNXT_GET_EVENT_PORT(data)	\
 	((data) &			\
 	 ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK)
@@ -1744,7 +1784,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 			if (unlikely(tx_pkts > bp->tx_wake_thresh))
 				rx_pkts = budget;
 		} else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
-			rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+			if (likely(budget))
+				rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+			else
+				rc = bnxt_force_rx_discard(bp, bnapi, &raw_cons,
+							   &event);
 			if (likely(rc >= 0))
 				rx_pkts += rc;
 			else if (rc == -EBUSY)	/* partial completion */
@@ -6663,12 +6707,11 @@ static void bnxt_poll_controller(struct net_device *dev)
 	struct bnxt *bp = netdev_priv(dev);
 	int i;
 
-	for (i = 0; i < bp->cp_nr_rings; i++) {
-		struct bnxt_irq *irq = &bp->irq_tbl[i];
+	/* Only process tx rings/combined rings in netpoll mode. */
+	for (i = 0; i < bp->tx_nr_rings; i++) {
+		struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
 
-		disable_irq(irq->vector);
-		irq->handler(irq->vector, bp->bnapi[i]);
-		enable_irq(irq->vector);
+		napi_schedule(&txr->bnapi->napi);
 	}
 }
 #endif
@@ -7630,8 +7673,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->min_mtu = ETH_ZLEN;
 	dev->max_mtu = BNXT_MAX_MTU;
 
-	bnxt_dcb_init(bp);
-
 #ifdef CONFIG_BNXT_SRIOV
 	init_waitqueue_head(&bp->sriov_cfg_wait);
 #endif
@@ -7669,6 +7710,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	bnxt_hwrm_func_qcfg(bp);
 	bnxt_hwrm_port_led_qcaps(bp);
 	bnxt_ethtool_init(bp);
+	bnxt_dcb_init(bp);
 
 	bnxt_set_rx_skb_mode(bp, false);
 	bnxt_set_tpa_flags(bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 3ef42dbc6327..d46a85041083 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -374,12 +374,16 @@ struct rx_tpa_end_cmp_ext {
 
 	__le32 rx_tpa_end_cmp_errors_v2;
 	#define RX_TPA_END_CMP_V2				(0x1 << 0)
-	#define RX_TPA_END_CMP_ERRORS				(0x7fff << 1)
+	#define RX_TPA_END_CMP_ERRORS				(0x3 << 1)
 	#define RX_TPA_END_CMPL_ERRORS_SHIFT			 1
 
 	u32 rx_tpa_end_cmp_start_opaque;
 };
 
+#define TPA_END_ERRORS(rx_tpa_end_ext)					\
+	((rx_tpa_end_ext)->rx_tpa_end_cmp_errors_v2 &			\
+	 cpu_to_le32(RX_TPA_END_CMP_ERRORS))
+
 #define DB_IDX_MASK						0xffffff
 #define DB_IDX_VALID						(0x1 << 26)
 #define DB_IRQ_DIS						(0x1 << 27)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index 46de2f8ff024..5c6dd0ce209f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -553,8 +553,10 @@ static u8 bnxt_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 	if ((mode & DCB_CAP_DCBX_VER_CEE) || !(mode & DCB_CAP_DCBX_VER_IEEE))
 		return 1;
 
-	if ((mode & DCB_CAP_DCBX_HOST) && BNXT_VF(bp))
-		return 1;
+	if (mode & DCB_CAP_DCBX_HOST) {
+		if (BNXT_VF(bp) || (bp->flags & BNXT_FLAG_FW_LLDP_AGENT))
+			return 1;
+	}
 
 	if (mode == bp->dcbx_cap)
 		return 0;
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 55c8e25b43d9..16a0f192daec 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -2641,3 +2641,4 @@ static struct platform_driver sbmac_driver = {
 };
 
 module_platform_driver(sbmac_driver);
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index bed9ef17bc26..7ccffbb0019e 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -144,7 +144,7 @@ static inline int
 sleep_cond(wait_queue_head_t *wait_queue, int *condition)
 {
 	int errno = 0;
-	wait_queue_t we;
+	wait_queue_entry_t we;
 
 	init_waitqueue_entry(&we, current);
 	add_wait_queue(wait_queue, &we);
@@ -171,7 +171,7 @@ sleep_timeout_cond(wait_queue_head_t *wait_queue,
 		   int *condition,
 		   int timeout)
 {
-	wait_queue_t we;
+	wait_queue_entry_t we;
 
 	init_waitqueue_entry(&we, current);
 	add_wait_queue(wait_queue, &we);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 38a5c6764bb5..53309f659951 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2171,9 +2171,10 @@ static int cxgb_up(struct adapter *adap)
 {
 	int err;
 
+	mutex_lock(&uld_mutex);
 	err = setup_sge_queues(adap);
 	if (err)
-		goto out;
+		goto rel_lock;
 	err = setup_rss(adap);
 	if (err)
 		goto freeq;
@@ -2196,23 +2197,28 @@ static int cxgb_up(struct adapter *adap)
 		if (err)
 			goto irq_err;
 	}
+
 	enable_rx(adap);
 	t4_sge_start(adap);
 	t4_intr_enable(adap);
 	adap->flags |= FULL_INIT_DONE;
+	mutex_unlock(&uld_mutex);
+
 	notify_ulds(adap, CXGB4_STATE_UP);
 #if IS_ENABLED(CONFIG_IPV6)
 	update_clip(adap);
 #endif
 	/* Initialize hash mac addr list*/
 	INIT_LIST_HEAD(&adap->mac_hlist);
- out:
 	return err;
+
  irq_err:
 	dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err);
  freeq:
 	t4_free_sge_resources(adap);
-	goto out;
+ rel_lock:
+	mutex_unlock(&uld_mutex);
+	return err;
 }
 
 static void cxgb_down(struct adapter *adapter)
@@ -2771,6 +2777,9 @@ void t4_fatal_err(struct adapter *adap)
 {
 	int port;
 
+	if (pci_channel_offline(adap->pdev))
+		return;
+
 	/* Disable the SGE since ULDs are going to free resources that
 	 * could be exposed to the adapter.  RDMA MWs for example...
 	 */
@@ -3882,9 +3891,10 @@ static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev,
 	spin_lock(&adap->stats_lock);
 	for_each_port(adap, i) {
 		struct net_device *dev = adap->port[i];
-
-		netif_device_detach(dev);
-		netif_carrier_off(dev);
+		if (dev) {
+			netif_device_detach(dev);
+			netif_carrier_off(dev);
+		}
 	}
 	spin_unlock(&adap->stats_lock);
 	disable_interrupts(adap);
@@ -3963,12 +3973,13 @@ static void eeh_resume(struct pci_dev *pdev)
 	rtnl_lock();
 	for_each_port(adap, i) {
 		struct net_device *dev = adap->port[i];
-
-		if (netif_running(dev)) {
-			link_start(dev);
-			cxgb_set_rxmode(dev);
+		if (dev) {
+			if (netif_running(dev)) {
+				link_start(dev);
+				cxgb_set_rxmode(dev);
+			}
+			netif_device_attach(dev);
 		}
-		netif_device_attach(dev);
 	}
 	rtnl_unlock();
 }
@@ -4516,7 +4527,7 @@ static void dummy_setup(struct net_device *dev)
 	/* Initialize the device structure. */
 	dev->netdev_ops = &cxgb4_mgmt_netdev_ops;
 	dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 }
 
 static int config_mgmt_dev(struct pci_dev *pdev)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index aded42b96f6d..3a34aa629f7d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -4557,8 +4557,13 @@ void t4_intr_enable(struct adapter *adapter)
  */
 void t4_intr_disable(struct adapter *adapter)
 {
-	u32 whoami = t4_read_reg(adapter, PL_WHOAMI_A);
-	u32 pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
+	u32 whoami, pf;
+
+	if (pci_channel_offline(adapter->pdev))
+		return;
+
+	whoami = t4_read_reg(adapter, PL_WHOAMI_A);
+	pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
 			SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
 
 	t4_write_reg(adapter, MYPF_REG(PL_PF_INT_ENABLE_A), 0);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index fa376444e57c..f2d623a7aee0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
@@ -37,7 +37,7 @@
 
 #define T4FW_VERSION_MAJOR 0x01
 #define T4FW_VERSION_MINOR 0x10
-#define T4FW_VERSION_MICRO 0x21
+#define T4FW_VERSION_MICRO 0x2D
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -46,7 +46,7 @@
 
 #define T5FW_VERSION_MAJOR 0x01
 #define T5FW_VERSION_MINOR 0x10
-#define T5FW_VERSION_MICRO 0x21
+#define T5FW_VERSION_MICRO 0x2D
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -55,7 +55,7 @@
 
 #define T6FW_VERSION_MAJOR 0x01
 #define T6FW_VERSION_MINOR 0x10
-#define T6FW_VERSION_MICRO 0x21
+#define T6FW_VERSION_MICRO 0x2D
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index 47384f7323ac..da5b58b853e2 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -1704,12 +1704,12 @@ static int use_dma;			/* These generate unused var warnings if ALLOW_DMA = 0 */
 static int dma;
 static int dmasize = 16;		/* or 64 */
 
-module_param(io, int, 0);
-module_param(irq, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
 module_param(debug, int, 0);
 module_param_string(media, media, sizeof(media), 0);
 module_param(duplex, int, 0);
-module_param(dma , int, 0);
+module_param_hw(dma , int, dma, 0);
 module_param(dmasize , int, 0);
 module_param(use_dma , int, 0);
 MODULE_PARM_DESC(io, "cs89x0 I/O base address");
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index df4a871df633..fd6bcf024729 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -1015,7 +1015,7 @@ static int     compact_infoblock(struct net_device *dev, u_char count, u_char *p
 
 static int io=0x0;/* EDIT THIS LINE FOR YOUR CONFIGURATION IF NEEDED        */
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 module_param(de4x5_debug, int, 0);
 module_param(dec_only, int, 0);
 module_param(args, charp, 0);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index f3a09ab55900..4eee18ce9be4 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -5078,9 +5078,11 @@ static netdev_features_t be_features_check(struct sk_buff *skb,
 	struct be_adapter *adapter = netdev_priv(dev);
 	u8 l4_hdr = 0;
 
-	/* The code below restricts offload features for some tunneled packets.
+	/* The code below restricts offload features for some tunneled and
+	 * Q-in-Q packets.
 	 * Offload features for normal (non tunnel) packets are unchanged.
 	 */
+	features = vlan_features_check(skb, features);
 	if (!skb->encapsulation ||
 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
 		return features;
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index e863ba74d005..8bb0db990c8f 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -739,6 +739,8 @@ static int ethoc_open(struct net_device *dev)
 	if (ret)
 		return ret;
 
+	napi_enable(&priv->napi);
+
 	ethoc_init_ring(priv, dev->mem_start);
 	ethoc_reset(priv);
 
@@ -754,7 +756,6 @@ static int ethoc_open(struct net_device *dev)
 	priv->old_duplex = -1;
 
 	phy_start(dev->phydev);
-	napi_enable(&priv->napi);
 
 	if (netif_msg_ifup(priv)) {
 		dev_info(&dev->dev, "I/O: %08lx Memory: %08lx-%08lx\n",
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 6ac336b546e6..1536356e2ea8 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1174,11 +1174,17 @@ static int ftmac100_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id ftmac100_of_ids[] = {
+	{ .compatible = "andestech,atmac100" },
+	{ }
+};
+
 static struct platform_driver ftmac100_driver = {
 	.probe		= ftmac100_probe,
 	.remove		= ftmac100_remove,
 	.driver		= {
 		.name	= DRV_NAME,
+		.of_match_table = ftmac100_of_ids
 	},
 };
 
@@ -1202,3 +1208,4 @@ module_exit(ftmac100_exit);
 MODULE_AUTHOR("Po-Yu Chuang <ratbert@faraday-tech.com>");
 MODULE_DESCRIPTION("FTMAC100 driver");
 MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(of, ftmac100_of_ids);
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 9a520e4f0df9..290ad0563320 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -2647,7 +2647,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */
 
 	/* device used for DMA mapping */
-	arch_setup_dma_ops(dev, 0, 0, NULL, false);
+	set_dma_ops(dev, get_dma_ops(&pdev->dev));
 	err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40));
 	if (err) {
 		dev_err(dev, "dma_coerce_mask_and_coherent() failed\n");
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 56a563f90b0b..f7c8649fd28f 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3192,7 +3192,7 @@ static int fec_reset_phy(struct platform_device *pdev)
 {
 	int err, phy_reset;
 	bool active_high = false;
-	int msec = 1;
+	int msec = 1, phy_post_delay = 0;
 	struct device_node *np = pdev->dev.of_node;
 
 	if (!np)
@@ -3209,6 +3209,11 @@ static int fec_reset_phy(struct platform_device *pdev)
 	else if (!gpio_is_valid(phy_reset))
 		return 0;
 
+	err = of_property_read_u32(np, "phy-reset-post-delay", &phy_post_delay);
+	/* valid reset duration should be less than 1s */
+	if (!err && phy_post_delay > 1000)
+		return -EINVAL;
+
 	active_high = of_property_read_bool(np, "phy-reset-active-high");
 
 	err = devm_gpio_request_one(&pdev->dev, phy_reset,
@@ -3226,6 +3231,15 @@ static int fec_reset_phy(struct platform_device *pdev)
 
 	gpio_set_value_cansleep(phy_reset, !active_high);
 
+	if (!phy_post_delay)
+		return 0;
+
+	if (phy_post_delay > 20)
+		msleep(phy_post_delay);
+	else
+		usleep_range(phy_post_delay * 1000,
+			     phy_post_delay * 1000 + 1000);
+
 	return 0;
 }
 #else /* CONFIG_OF */
diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig
index dc0850b3b517..8870a9a798ca 100644
--- a/drivers/net/ethernet/freescale/fman/Kconfig
+++ b/drivers/net/ethernet/freescale/fman/Kconfig
@@ -2,6 +2,7 @@ config FSL_FMAN
 	tristate "FMan support"
 	depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST
 	select GENERIC_ALLOCATOR
+	depends on HAS_DMA
 	select PHYLIB
 	default n
 	help
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 0b31f8502ada..6e67d22fd0d5 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -623,6 +623,8 @@ static struct platform_device *dpaa_eth_add_device(int fman_id,
 		goto no_mem;
 	}
 
+	set_dma_ops(&pdev->dev, get_dma_ops(priv->dev));
+
 	ret = platform_device_add_data(pdev, &data, sizeof(data));
 	if (ret)
 		goto err;
diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index 446c7b374ff5..a10de1e9c157 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
@@ -381,7 +381,7 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *id =
 		of_match_device(fsl_pq_mdio_match, &pdev->dev);
-	const struct fsl_pq_mdio_data *data = id->data;
+	const struct fsl_pq_mdio_data *data;
 	struct device_node *np = pdev->dev.of_node;
 	struct resource res;
 	struct device_node *tbi;
@@ -389,6 +389,13 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev)
 	struct mii_bus *new_bus;
 	int err;
 
+	if (!id) {
+		dev_err(&pdev->dev, "Failed to match device\n");
+		return -ENODEV;
+	}
+
+	data = id->data;
+
 	dev_dbg(&pdev->dev, "found %s compatible node\n", id->compatible);
 
 	new_bus = mdiobus_alloc_size(sizeof(*priv));
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 3f7ae9f64cd8..f77ba9fa257b 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -2594,11 +2594,10 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 		} else if (ugeth->ug_info->uf_info.bd_mem_part ==
 			   MEM_PART_MURAM) {
 			out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].bd_ring_base,
-				 (u32) immrbar_virt_to_phys(ugeth->
-							    p_tx_bd_ring[i]));
+				 (u32)qe_muram_dma(ugeth->p_tx_bd_ring[i]));
 			out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].
 				 last_bd_completed_address,
-				 (u32) immrbar_virt_to_phys(endOfRing));
+				 (u32)qe_muram_dma(endOfRing));
 		}
 	}
 
@@ -2844,8 +2843,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 		} else if (ugeth->ug_info->uf_info.bd_mem_part ==
 			   MEM_PART_MURAM) {
 			out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr,
-				 (u32) immrbar_virt_to_phys(ugeth->
-							    p_rx_bd_ring[i]));
+				 (u32)qe_muram_dma(ugeth->p_rx_bd_ring[i]));
 		}
 		/* rest of fields handled by QE */
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index e13aa064a8e9..7a8addda726e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -29,10 +29,9 @@ enum _dsm_rst_type {
 	HNS_ROCE_RESET_FUNC     = 0x7,
 };
 
-const u8 hns_dsaf_acpi_dsm_uuid[] = {
-	0x1A, 0xAA, 0x85, 0x1A, 0x93, 0xE2, 0x5E, 0x41,
-	0x8E, 0x28, 0x8D, 0x69, 0x0A, 0x0F, 0x82, 0x0A
-};
+static const guid_t hns_dsaf_acpi_dsm_guid =
+	GUID_INIT(0x1A85AA1A, 0xE293, 0x415E,
+		  0x8E, 0x28, 0x8D, 0x69, 0x0A, 0x0F, 0x82, 0x0A);
 
 static void dsaf_write_sub(struct dsaf_device *dsaf_dev, u32 reg, u32 val)
 {
@@ -151,7 +150,7 @@ static void hns_dsaf_acpi_srst_by_port(struct dsaf_device *dsaf_dev, u8 op_type,
 	argv4.package.elements = obj_args;
 
 	obj = acpi_evaluate_dsm(ACPI_HANDLE(dsaf_dev->dev),
-				hns_dsaf_acpi_dsm_uuid, 0, op_type, &argv4);
+				&hns_dsaf_acpi_dsm_guid, 0, op_type, &argv4);
 	if (!obj) {
 		dev_warn(dsaf_dev->dev, "reset port_type%d port%d fail!",
 			 port_type, port);
@@ -434,7 +433,7 @@ static phy_interface_t hns_mac_get_phy_if_acpi(struct hns_mac_cb *mac_cb)
 	argv4.package.elements = &obj_args,
 
 	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
-				hns_dsaf_acpi_dsm_uuid, 0,
+				&hns_dsaf_acpi_dsm_guid, 0,
 				HNS_OP_GET_PORT_TYPE_FUNC, &argv4);
 
 	if (!obj || obj->type != ACPI_TYPE_INTEGER)
@@ -474,7 +473,7 @@ int hns_mac_get_sfp_prsnt_acpi(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
 	argv4.package.elements = &obj_args,
 
 	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
-				hns_dsaf_acpi_dsm_uuid, 0,
+				&hns_dsaf_acpi_dsm_guid, 0,
 				HNS_OP_GET_SFP_STAT_FUNC, &argv4);
 
 	if (!obj || obj->type != ACPI_TYPE_INTEGER)
@@ -565,7 +564,7 @@ hns_mac_config_sds_loopback_acpi(struct hns_mac_cb *mac_cb, bool en)
 	argv4.package.elements = obj_args;
 
 	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dsaf_dev->dev),
-				hns_dsaf_acpi_dsm_uuid, 0,
+				&hns_dsaf_acpi_dsm_guid, 0,
 				HNS_OP_SERDES_LP_FUNC, &argv4);
 	if (!obj) {
 		dev_warn(mac_cb->dsaf_dev->dev, "set port%d serdes lp fail!",
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index b8fab149690f..e95795b3c841 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -288,9 +288,15 @@ static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en)
 
 		/* Force 1000M Link, Default is 0x0200 */
 		phy_write(phy_dev, 7, 0x20C);
-		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
 
-		/* Enable PHY loop-back */
+		/* Powerup Fiber */
+		phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
+		val = phy_read(phy_dev, COPPER_CONTROL_REG);
+		val &= ~PHY_POWER_DOWN;
+		phy_write(phy_dev, COPPER_CONTROL_REG, val);
+
+		/* Enable Phy Loopback */
+		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
 		val = phy_read(phy_dev, COPPER_CONTROL_REG);
 		val |= PHY_LOOP_BACK;
 		val &= ~PHY_POWER_DOWN;
@@ -299,6 +305,12 @@ static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en)
 		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA);
 		phy_write(phy_dev, 1, 0x400);
 		phy_write(phy_dev, 7, 0x200);
+
+		phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
+		val = phy_read(phy_dev, COPPER_CONTROL_REG);
+		val |= PHY_POWER_DOWN;
+		phy_write(phy_dev, COPPER_CONTROL_REG, val);
+
 		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
 		phy_write(phy_dev, 9, 0xF00);
 
diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c
index 1a31bee6e728..5673b071e39d 100644
--- a/drivers/net/ethernet/hp/hp100.c
+++ b/drivers/net/ethernet/hp/hp100.c
@@ -2966,7 +2966,7 @@ MODULE_DESCRIPTION("HP CASCADE Architecture Driver for 100VG-AnyLan Network Adap
 #define HP100_DEVICES 5
 /* Parameters set by insmod */
 static int hp100_port[HP100_DEVICES] = { 0, [1 ... (HP100_DEVICES-1)] = -1 };
-module_param_array(hp100_port, int, NULL, 0);
+module_param_hw_array(hp100_port, int, ioport, NULL, 0);
 
 /* List of devices */
 static struct net_device *hp100_devlist[HP100_DEVICES];
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 508923f39ccf..259e69a52ec5 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -343,6 +343,7 @@ static int emac_reset(struct emac_instance *dev)
 {
 	struct emac_regs __iomem *p = dev->emacp;
 	int n = 20;
+	bool __maybe_unused try_internal_clock = false;
 
 	DBG(dev, "reset" NL);
 
@@ -355,6 +356,7 @@ static int emac_reset(struct emac_instance *dev)
 	}
 
 #ifdef CONFIG_PPC_DCR_NATIVE
+do_retry:
 	/*
 	 * PPC460EX/GT Embedded Processor Advanced User's Manual
 	 * section 28.10.1 Mode Register 0 (EMACx_MR0) states:
@@ -362,10 +364,19 @@ static int emac_reset(struct emac_instance *dev)
 	 * of the EMAC. If none is present, select the internal clock
 	 * (SDR0_ETH_CFG[EMACx_PHY_CLK] = 1).
 	 * After a soft reset, select the external clock.
+	 *
+	 * The AR8035-A PHY Meraki MR24 does not provide a TX Clk if the
+	 * ethernet cable is not attached. This causes the reset to timeout
+	 * and the PHY detection code in emac_init_phy() is unable to
+	 * communicate and detect the AR8035-A PHY. As a result, the emac
+	 * driver bails out early and the user has no ethernet.
+	 * In order to stay compatible with existing configurations, the
+	 * driver will temporarily switch to the internal clock, after
+	 * the first reset fails.
 	 */
 	if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) {
-		if (dev->phy_address == 0xffffffff &&
-		    dev->phy_map == 0xffffffff) {
+		if (try_internal_clock || (dev->phy_address == 0xffffffff &&
+					   dev->phy_map == 0xffffffff)) {
 			/* No PHY: select internal loop clock before reset */
 			dcri_clrset(SDR0, SDR0_ETH_CFG,
 				    0, SDR0_ETH_CFG_ECS << dev->cell_index);
@@ -383,8 +394,15 @@ static int emac_reset(struct emac_instance *dev)
 
 #ifdef CONFIG_PPC_DCR_NATIVE
 	if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) {
-		if (dev->phy_address == 0xffffffff &&
-		    dev->phy_map == 0xffffffff) {
+		if (!n && !try_internal_clock) {
+			/* first attempt has timed out. */
+			n = 20;
+			try_internal_clock = true;
+			goto do_retry;
+		}
+
+		if (try_internal_clock || (dev->phy_address == 0xffffffff &&
+					   dev->phy_map == 0xffffffff)) {
 			/* No PHY: restore external clock source after reset */
 			dcri_clrset(SDR0, SDR0_ETH_CFG,
 				    SDR0_ETH_CFG_ECS << dev->cell_index, 0);
@@ -2460,20 +2478,24 @@ static int emac_mii_bus_reset(struct mii_bus *bus)
 	return emac_reset(dev);
 }
 
+static int emac_mdio_phy_start_aneg(struct mii_phy *phy,
+				    struct phy_device *phy_dev)
+{
+	phy_dev->autoneg = phy->autoneg;
+	phy_dev->speed = phy->speed;
+	phy_dev->duplex = phy->duplex;
+	phy_dev->advertising = phy->advertising;
+	return phy_start_aneg(phy_dev);
+}
+
 static int emac_mdio_setup_aneg(struct mii_phy *phy, u32 advertise)
 {
 	struct net_device *ndev = phy->dev;
 	struct emac_instance *dev = netdev_priv(ndev);
 
-	dev->phy.autoneg = AUTONEG_ENABLE;
-	dev->phy.speed = SPEED_1000;
-	dev->phy.duplex = DUPLEX_FULL;
-	dev->phy.advertising = advertise;
 	phy->autoneg = AUTONEG_ENABLE;
-	phy->speed = dev->phy.speed;
-	phy->duplex = dev->phy.duplex;
 	phy->advertising = advertise;
-	return phy_start_aneg(dev->phy_dev);
+	return emac_mdio_phy_start_aneg(phy, dev->phy_dev);
 }
 
 static int emac_mdio_setup_forced(struct mii_phy *phy, int speed, int fd)
@@ -2481,13 +2503,10 @@ static int emac_mdio_setup_forced(struct mii_phy *phy, int speed, int fd)
 	struct net_device *ndev = phy->dev;
 	struct emac_instance *dev = netdev_priv(ndev);
 
-	dev->phy.autoneg =  AUTONEG_DISABLE;
-	dev->phy.speed = speed;
-	dev->phy.duplex = fd;
 	phy->autoneg = AUTONEG_DISABLE;
 	phy->speed = speed;
 	phy->duplex = fd;
-	return phy_start_aneg(dev->phy_dev);
+	return emac_mdio_phy_start_aneg(phy, dev->phy_dev);
 }
 
 static int emac_mdio_poll_link(struct mii_phy *phy)
@@ -2509,16 +2528,17 @@ static int emac_mdio_read_link(struct mii_phy *phy)
 {
 	struct net_device *ndev = phy->dev;
 	struct emac_instance *dev = netdev_priv(ndev);
+	struct phy_device *phy_dev = dev->phy_dev;
 	int res;
 
-	res = phy_read_status(dev->phy_dev);
+	res = phy_read_status(phy_dev);
 	if (res)
 		return res;
 
-	dev->phy.speed = phy->speed;
-	dev->phy.duplex = phy->duplex;
-	dev->phy.pause = phy->pause;
-	dev->phy.asym_pause = phy->asym_pause;
+	phy->speed = phy_dev->speed;
+	phy->duplex = phy_dev->duplex;
+	phy->pause = phy_dev->pause;
+	phy->asym_pause = phy_dev->asym_pause;
 	return 0;
 }
 
@@ -2528,13 +2548,6 @@ static int emac_mdio_init_phy(struct mii_phy *phy)
 	struct emac_instance *dev = netdev_priv(ndev);
 
 	phy_start(dev->phy_dev);
-	dev->phy.autoneg = phy->autoneg;
-	dev->phy.speed = phy->speed;
-	dev->phy.duplex = phy->duplex;
-	dev->phy.advertising = phy->advertising;
-	dev->phy.pause = phy->pause;
-	dev->phy.asym_pause = phy->asym_pause;
-
 	return phy_init_hw(dev->phy_dev);
 }
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4f2d329dba99..c0fbeb387db4 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -81,7 +81,7 @@
 static const char ibmvnic_driver_name[] = "ibmvnic";
 static const char ibmvnic_driver_string[] = "IBM System i/p Virtual NIC Driver";
 
-MODULE_AUTHOR("Santiago Leon <santi_leon@yahoo.com>");
+MODULE_AUTHOR("Santiago Leon");
 MODULE_DESCRIPTION("IBM System i/p Virtual NIC Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
@@ -1468,6 +1468,11 @@ static void ibmvnic_netpoll_controller(struct net_device *dev)
 }
 #endif
 
+static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	return -EOPNOTSUPP;
+}
+
 static const struct net_device_ops ibmvnic_netdev_ops = {
 	.ndo_open		= ibmvnic_open,
 	.ndo_stop		= ibmvnic_close,
@@ -1479,6 +1484,7 @@ static const struct net_device_ops ibmvnic_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= ibmvnic_netpoll_controller,
 #endif
+	.ndo_change_mtu		= ibmvnic_change_mtu,
 };
 
 /* ethtool functions */
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index cdde3cc28fb5..44d9610f7a15 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -399,6 +399,7 @@ struct i40e_pf {
 #define I40E_FLAG_RX_CSUM_ENABLED		BIT_ULL(1)
 #define I40E_FLAG_MSI_ENABLED			BIT_ULL(2)
 #define I40E_FLAG_MSIX_ENABLED			BIT_ULL(3)
+#define I40E_FLAG_HW_ATR_EVICT_ENABLED		BIT_ULL(4)
 #define I40E_FLAG_RSS_ENABLED			BIT_ULL(6)
 #define I40E_FLAG_VMDQ_ENABLED			BIT_ULL(7)
 #define I40E_FLAG_IWARP_ENABLED			BIT_ULL(10)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 7a8eb486b9ea..894c8e57ba00 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -224,7 +224,7 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
 	I40E_PRIV_FLAG("LinkPolling", I40E_FLAG_LINK_POLLING_ENABLED, 0),
 	I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
 	I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
-	I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_CAPABLE, 0),
+	I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
 	I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
 };
 
@@ -4092,7 +4092,7 @@ flags_complete:
 
 	/* Only allow ATR evict on hardware that is capable of handling it */
 	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
-		pf->flags &= ~I40E_FLAG_HW_ATR_EVICT_CAPABLE;
+		pf->flags &= ~I40E_FLAG_HW_ATR_EVICT_ENABLED;
 
 	if (changed_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT) {
 		u16 sw_flags = 0, valid_flags = 0;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index d5c9c9e06ff5..a7a4b28b4144 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -295,7 +295,7 @@ struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id)
  **/
 void i40e_service_event_schedule(struct i40e_pf *pf)
 {
-	if (!test_bit(__I40E_VSI_DOWN, pf->state) &&
+	if (!test_bit(__I40E_DOWN, pf->state) &&
 	    !test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
 		queue_work(i40e_wq, &pf->service_task);
 }
@@ -3611,7 +3611,7 @@ static irqreturn_t i40e_intr(int irq, void *data)
 		 * this is not a performance path and napi_schedule()
 		 * can deal with rescheduling.
 		 */
-		if (!test_bit(__I40E_VSI_DOWN, pf->state))
+		if (!test_bit(__I40E_DOWN, pf->state))
 			napi_schedule_irqoff(&q_vector->napi);
 	}
 
@@ -3687,7 +3687,7 @@ static irqreturn_t i40e_intr(int irq, void *data)
 enable_intr:
 	/* re-enable interrupt causes */
 	wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask);
-	if (!test_bit(__I40E_VSI_DOWN, pf->state)) {
+	if (!test_bit(__I40E_DOWN, pf->state)) {
 		i40e_service_event_schedule(pf);
 		i40e_irq_dynamic_enable_icr0(pf, false);
 	}
@@ -6203,7 +6203,7 @@ static void i40e_fdir_reinit_subtask(struct i40e_pf *pf)
 {
 
 	/* if interface is down do nothing */
-	if (test_bit(__I40E_VSI_DOWN, pf->state))
+	if (test_bit(__I40E_DOWN, pf->state))
 		return;
 
 	if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state))
@@ -6344,7 +6344,7 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf)
 	int i;
 
 	/* if interface is down do nothing */
-	if (test_bit(__I40E_VSI_DOWN, pf->state) ||
+	if (test_bit(__I40E_DOWN, pf->state) ||
 	    test_bit(__I40E_CONFIG_BUSY, pf->state))
 		return;
 
@@ -6399,9 +6399,9 @@ static void i40e_reset_subtask(struct i40e_pf *pf)
 		reset_flags |= BIT(__I40E_GLOBAL_RESET_REQUESTED);
 		clear_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state);
 	}
-	if (test_bit(__I40E_VSI_DOWN_REQUESTED, pf->state)) {
-		reset_flags |= BIT(__I40E_VSI_DOWN_REQUESTED);
-		clear_bit(__I40E_VSI_DOWN_REQUESTED, pf->state);
+	if (test_bit(__I40E_DOWN_REQUESTED, pf->state)) {
+		reset_flags |= BIT(__I40E_DOWN_REQUESTED);
+		clear_bit(__I40E_DOWN_REQUESTED, pf->state);
 	}
 
 	/* If there's a recovery already waiting, it takes
@@ -6415,7 +6415,7 @@ static void i40e_reset_subtask(struct i40e_pf *pf)
 
 	/* If we're already down or resetting, just bail */
 	if (reset_flags &&
-	    !test_bit(__I40E_VSI_DOWN, pf->state) &&
+	    !test_bit(__I40E_DOWN, pf->state) &&
 	    !test_bit(__I40E_CONFIG_BUSY, pf->state)) {
 		rtnl_lock();
 		i40e_do_reset(pf, reset_flags, true);
@@ -7002,7 +7002,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 	u32 val;
 	int v;
 
-	if (test_bit(__I40E_VSI_DOWN, pf->state))
+	if (test_bit(__I40E_DOWN, pf->state))
 		goto clear_recovery;
 	dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n");
 
@@ -8821,11 +8821,12 @@ static int i40e_sw_init(struct i40e_pf *pf)
 		    (pf->hw.aq.api_min_ver > 4))) {
 		/* Supported in FW API version higher than 1.4 */
 		pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
-		pf->flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
-	} else {
-		pf->flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
 	}
 
+	/* Enable HW ATR eviction if possible */
+	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
+		pf->flags |= I40E_FLAG_HW_ATR_EVICT_ENABLED;
+
 	pf->eeprom_version = 0xDEAD;
 	pf->lan_veb = I40E_NO_VEB;
 	pf->lan_vsi = I40E_NO_VSI;
@@ -9767,7 +9768,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi)
 		return -ENODEV;
 	}
 	if (vsi == pf->vsi[pf->lan_vsi] &&
-	    !test_bit(__I40E_VSI_DOWN, pf->state)) {
+	    !test_bit(__I40E_DOWN, pf->state)) {
 		dev_info(&pf->pdev->dev, "Can't remove PF VSI\n");
 		return -ENODEV;
 	}
@@ -11003,7 +11004,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 	pf->next_vsi = 0;
 	pf->pdev = pdev;
-	set_bit(__I40E_VSI_DOWN, pf->state);
+	set_bit(__I40E_DOWN, pf->state);
 
 	hw = &pf->hw;
 	hw->back = pf;
@@ -11293,7 +11294,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * before setting up the misc vector or we get a race and the vector
 	 * ends up disabled forever.
 	 */
-	clear_bit(__I40E_VSI_DOWN, pf->state);
+	clear_bit(__I40E_DOWN, pf->state);
 
 	/* In case of MSIX we are going to setup the misc vector right here
 	 * to handle admin queue events etc. In case of legacy and MSI
@@ -11448,7 +11449,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	/* Unwind what we've done if something failed in the setup */
 err_vsis:
-	set_bit(__I40E_VSI_DOWN, pf->state);
+	set_bit(__I40E_DOWN, pf->state);
 	i40e_clear_interrupt_scheme(pf);
 	kfree(pf->vsi);
 err_switch_setup:
@@ -11500,7 +11501,7 @@ static void i40e_remove(struct pci_dev *pdev)
 
 	/* no more scheduling of any task */
 	set_bit(__I40E_SUSPENDED, pf->state);
-	set_bit(__I40E_VSI_DOWN, pf->state);
+	set_bit(__I40E_DOWN, pf->state);
 	if (pf->service_timer.data)
 		del_timer_sync(&pf->service_timer);
 	if (pf->service_task.func)
@@ -11740,7 +11741,7 @@ static void i40e_shutdown(struct pci_dev *pdev)
 	struct i40e_hw *hw = &pf->hw;
 
 	set_bit(__I40E_SUSPENDED, pf->state);
-	set_bit(__I40E_VSI_DOWN, pf->state);
+	set_bit(__I40E_DOWN, pf->state);
 	rtnl_lock();
 	i40e_prep_for_reset(pf, true);
 	rtnl_unlock();
@@ -11789,7 +11790,7 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
 	int retval = 0;
 
 	set_bit(__I40E_SUSPENDED, pf->state);
-	set_bit(__I40E_VSI_DOWN, pf->state);
+	set_bit(__I40E_DOWN, pf->state);
 
 	if (pf->wol_en && (pf->flags & I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE))
 		i40e_enable_mc_magic_wake(pf);
@@ -11841,7 +11842,7 @@ static int i40e_resume(struct pci_dev *pdev)
 
 	/* handling the reset will rebuild the device state */
 	if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) {
-		clear_bit(__I40E_VSI_DOWN, pf->state);
+		clear_bit(__I40E_DOWN, pf->state);
 		rtnl_lock();
 		i40e_reset_and_rebuild(pf, false, true);
 		rtnl_unlock();
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 29321a6167a6..77115c25d96f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1854,7 +1854,8 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
 #else
-	unsigned int truesize = SKB_DATA_ALIGN(size);
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(I40E_SKB_PAD + size);
 #endif
 	struct sk_buff *skb;
 
@@ -2340,7 +2341,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	/* Due to lack of space, no more new filters can be programmed */
 	if (th->syn && (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED))
 		return;
-	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) {
+	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) {
 		/* HW ATR eviction will take care of removing filters on FIN
 		 * and RST packets.
 		 */
@@ -2402,7 +2403,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
 
-	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
+	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED)
 		dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
 
 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 95c23fbaa211..0fb38ca78900 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -3017,10 +3017,12 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
 					   VLAN_VID_MASK));
 	}
 
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 	if (vlan_id || qos)
 		ret = i40e_vsi_add_pvid(vsi, vlanprio);
 	else
 		i40e_vsi_remove_pvid(vsi);
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
 
 	if (vlan_id) {
 		dev_info(&pf->pdev->dev, "Setting VLAN %d, QOS 0x%x on VF %d\n",
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index dfe241a12ad0..12b02e530503 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1190,7 +1190,8 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
 #else
-	unsigned int truesize = SKB_DATA_ALIGN(size);
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(I40E_SKB_PAD + size);
 #endif
 	struct sk_buff *skb;
 
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 9b875d776b29..33c901622ed5 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -3719,7 +3719,7 @@ static void mvpp2_bm_bufs_get_addrs(struct device *dev, struct mvpp2 *priv,
 				    dma_addr_t *dma_addr,
 				    phys_addr_t *phys_addr)
 {
-	int cpu = smp_processor_id();
+	int cpu = get_cpu();
 
 	*dma_addr = mvpp2_percpu_read(priv, cpu,
 				      MVPP2_BM_PHY_ALLOC_REG(bm_pool->id));
@@ -3740,6 +3740,8 @@ static void mvpp2_bm_bufs_get_addrs(struct device *dev, struct mvpp2 *priv,
 		if (sizeof(phys_addr_t) == 8)
 			*phys_addr |= (u64)phys_addr_highbits << 32;
 	}
+
+	put_cpu();
 }
 
 /* Free all buffers from the pool */
@@ -3920,18 +3922,12 @@ static inline u32 mvpp2_bm_cookie_pool_set(u32 cookie, int pool)
 	return bm;
 }
 
-/* Get pool number from a BM cookie */
-static inline int mvpp2_bm_cookie_pool_get(unsigned long cookie)
-{
-	return (cookie >> MVPP2_BM_COOKIE_POOL_OFFS) & 0xFF;
-}
-
 /* Release buffer to BM */
 static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
 				     dma_addr_t buf_dma_addr,
 				     phys_addr_t buf_phys_addr)
 {
-	int cpu = smp_processor_id();
+	int cpu = get_cpu();
 
 	if (port->priv->hw_version == MVPP22) {
 		u32 val = 0;
@@ -3958,15 +3954,15 @@ static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
 			   MVPP2_BM_VIRT_RLS_REG, buf_phys_addr);
 	mvpp2_percpu_write(port->priv, cpu,
 			   MVPP2_BM_PHY_RLS_REG(pool), buf_dma_addr);
+
+	put_cpu();
 }
 
 /* Refill BM pool */
-static void mvpp2_pool_refill(struct mvpp2_port *port, u32 bm,
+static void mvpp2_pool_refill(struct mvpp2_port *port, int pool,
 			      dma_addr_t dma_addr,
 			      phys_addr_t phys_addr)
 {
-	int pool = mvpp2_bm_cookie_pool_get(bm);
-
 	mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
 }
 
@@ -4186,8 +4182,6 @@ static void mvpp22_port_mii_set(struct mvpp2_port *port)
 {
 	u32 val;
 
-	return;
-
 	/* Only GOP port 0 has an XLG MAC */
 	if (port->gop_id == 0) {
 		val = readl(port->base + MVPP22_XLG_CTRL3_REG);
@@ -4515,21 +4509,6 @@ static void mvpp2_rxq_offset_set(struct mvpp2_port *port,
 	mvpp2_write(port->priv, MVPP2_RXQ_CONFIG_REG(prxq), val);
 }
 
-/* Obtain BM cookie information from descriptor */
-static u32 mvpp2_bm_cookie_build(struct mvpp2_port *port,
-				 struct mvpp2_rx_desc *rx_desc)
-{
-	int cpu = smp_processor_id();
-	int pool;
-
-	pool = (mvpp2_rxdesc_status_get(port, rx_desc) &
-		MVPP2_RXD_BM_POOL_ID_MASK) >>
-		MVPP2_RXD_BM_POOL_ID_OFFS;
-
-	return ((pool & 0xFF) << MVPP2_BM_COOKIE_POOL_OFFS) |
-	       ((cpu & 0xFF) << MVPP2_BM_COOKIE_CPU_OFFS);
-}
-
 /* Tx descriptors helper methods */
 
 /* Get pointer to next Tx descriptor to be processed (send) by HW */
@@ -4757,7 +4736,7 @@ static void mvpp2_txp_max_tx_size_set(struct mvpp2_port *port)
 static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port,
 				   struct mvpp2_rx_queue *rxq)
 {
-	int cpu = smp_processor_id();
+	int cpu = get_cpu();
 
 	if (rxq->pkts_coal > MVPP2_OCCUPIED_THRESH_MASK)
 		rxq->pkts_coal = MVPP2_OCCUPIED_THRESH_MASK;
@@ -4765,6 +4744,8 @@ static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port,
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_THRESH_REG,
 			   rxq->pkts_coal);
+
+	put_cpu();
 }
 
 static u32 mvpp2_usec_to_cycles(u32 usec, unsigned long clk_hz)
@@ -4945,7 +4926,7 @@ static int mvpp2_rxq_init(struct mvpp2_port *port,
 	mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
 
 	/* Set Rx descriptors queue starting address - indirect access */
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
 	if (port->priv->hw_version == MVPP21)
 		rxq_dma = rxq->descs_dma;
@@ -4954,6 +4935,7 @@ static int mvpp2_rxq_init(struct mvpp2_port *port,
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_ADDR_REG, rxq_dma);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_SIZE_REG, rxq->size);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_INDEX_REG, 0);
+	put_cpu();
 
 	/* Set Offset */
 	mvpp2_rxq_offset_set(port, rxq->id, NET_SKB_PAD);
@@ -4980,9 +4962,13 @@ static void mvpp2_rxq_drop_pkts(struct mvpp2_port *port,
 
 	for (i = 0; i < rx_received; i++) {
 		struct mvpp2_rx_desc *rx_desc = mvpp2_rxq_next_desc_get(rxq);
-		u32 bm = mvpp2_bm_cookie_build(port, rx_desc);
+		u32 status = mvpp2_rxdesc_status_get(port, rx_desc);
+		int pool;
+
+		pool = (status & MVPP2_RXD_BM_POOL_ID_MASK) >>
+			MVPP2_RXD_BM_POOL_ID_OFFS;
 
-		mvpp2_pool_refill(port, bm,
+		mvpp2_pool_refill(port, pool,
 				  mvpp2_rxdesc_dma_addr_get(port, rx_desc),
 				  mvpp2_rxdesc_cookie_get(port, rx_desc));
 	}
@@ -5012,10 +4998,11 @@ static void mvpp2_rxq_deinit(struct mvpp2_port *port,
 	 * free descriptor number
 	 */
 	mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_ADDR_REG, 0);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_SIZE_REG, 0);
+	put_cpu();
 }
 
 /* Create and initialize a Tx queue */
@@ -5038,7 +5025,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
 	txq->last_desc = txq->size - 1;
 
 	/* Set Tx descriptors queue starting address - indirect access */
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_ADDR_REG,
 			   txq->descs_dma);
@@ -5063,6 +5050,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG,
 			   MVPP2_PREF_BUF_PTR(desc) | MVPP2_PREF_BUF_SIZE_16 |
 			   MVPP2_PREF_BUF_THRESH(desc_per_txq / 2));
+	put_cpu();
 
 	/* WRR / EJP configuration - indirect access */
 	tx_port_num = mvpp2_egress_port(port);
@@ -5133,10 +5121,11 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
 	mvpp2_write(port->priv, MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(txq->id), 0);
 
 	/* Set Tx descriptors queue starting address and size */
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_ADDR_REG, 0);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_SIZE_REG, 0);
+	put_cpu();
 }
 
 /* Cleanup Tx ports */
@@ -5146,7 +5135,7 @@ static void mvpp2_txq_clean(struct mvpp2_port *port, struct mvpp2_tx_queue *txq)
 	int delay, pending, cpu;
 	u32 val;
 
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
 	val = mvpp2_percpu_read(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG);
 	val |= MVPP2_TXQ_DRAIN_EN_MASK;
@@ -5173,6 +5162,7 @@ static void mvpp2_txq_clean(struct mvpp2_port *port, struct mvpp2_tx_queue *txq)
 
 	val &= ~MVPP2_TXQ_DRAIN_EN_MASK;
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG, val);
+	put_cpu();
 
 	for_each_present_cpu(cpu) {
 		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
@@ -5420,7 +5410,7 @@ static void mvpp2_rx_csum(struct mvpp2_port *port, u32 status,
 
 /* Reuse skb if possible, or allocate a new skb and add it to BM pool */
 static int mvpp2_rx_refill(struct mvpp2_port *port,
-			   struct mvpp2_bm_pool *bm_pool, u32 bm)
+			   struct mvpp2_bm_pool *bm_pool, int pool)
 {
 	dma_addr_t dma_addr;
 	phys_addr_t phys_addr;
@@ -5432,7 +5422,7 @@ static int mvpp2_rx_refill(struct mvpp2_port *port,
 	if (!buf)
 		return -ENOMEM;
 
-	mvpp2_pool_refill(port, bm, dma_addr, phys_addr);
+	mvpp2_pool_refill(port, pool, dma_addr, phys_addr);
 
 	return 0;
 }
@@ -5490,7 +5480,7 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
 		unsigned int frag_size;
 		dma_addr_t dma_addr;
 		phys_addr_t phys_addr;
-		u32 bm, rx_status;
+		u32 rx_status;
 		int pool, rx_bytes, err;
 		void *data;
 
@@ -5502,8 +5492,8 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
 		phys_addr = mvpp2_rxdesc_cookie_get(port, rx_desc);
 		data = (void *)phys_to_virt(phys_addr);
 
-		bm = mvpp2_bm_cookie_build(port, rx_desc);
-		pool = mvpp2_bm_cookie_pool_get(bm);
+		pool = (rx_status & MVPP2_RXD_BM_POOL_ID_MASK) >>
+			MVPP2_RXD_BM_POOL_ID_OFFS;
 		bm_pool = &port->priv->bm_pools[pool];
 
 		/* In case of an error, release the requested buffer pointer
@@ -5516,7 +5506,7 @@ err_drop_frame:
 			dev->stats.rx_errors++;
 			mvpp2_rx_error(port, rx_desc);
 			/* Return the buffer to the pool */
-			mvpp2_pool_refill(port, bm, dma_addr, phys_addr);
+			mvpp2_pool_refill(port, pool, dma_addr, phys_addr);
 			continue;
 		}
 
@@ -5531,7 +5521,7 @@ err_drop_frame:
 			goto err_drop_frame;
 		}
 
-		err = mvpp2_rx_refill(port, bm_pool, bm);
+		err = mvpp2_rx_refill(port, bm_pool, pool);
 		if (err) {
 			netdev_err(port->dev, "failed to refill BM pools\n");
 			goto err_drop_frame;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index ae5fdc2df654..ffbcb27c05e5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1562,11 +1562,6 @@ static int mlx4_en_flow_replace(struct net_device *dev,
 		qpn = priv->drop_qp.qpn;
 	else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) {
 		qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1);
-		if (qpn < priv->rss_map.base_qpn ||
-		    qpn >= priv->rss_map.base_qpn + priv->rx_ring_num) {
-			en_warn(priv, "rxnfc: QP (0x%x) doesn't exist\n", qpn);
-			return -EINVAL;
-		}
 	} else {
 		if (cmd->fs.ring_cookie >= priv->rx_ring_num) {
 			en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist\n",
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 703205475524..83aab1e4c8c8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2862,12 +2862,10 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 	int port = 0;
 
 	if (msi_x) {
-		int nreq = dev->caps.num_ports * num_online_cpus() + 1;
-
-		nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
-			     nreq);
-		if (nreq > MAX_MSIX)
-			nreq = MAX_MSIX;
+		int nreq = min3(dev->caps.num_ports *
+				(int)num_online_cpus() + 1,
+				dev->caps.num_eqs - dev->caps.reserved_eqs,
+				MAX_MSIX);
 
 		entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
 		if (!entries)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 1a670b681555..0710b3677464 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -35,6 +35,7 @@
 #include <linux/etherdevice.h>
 
 #include <linux/mlx4/cmd.h>
+#include <linux/mlx4/qp.h>
 #include <linux/export.h>
 
 #include "mlx4.h"
@@ -985,16 +986,21 @@ int mlx4_flow_attach(struct mlx4_dev *dev,
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
 
+	if (!mlx4_qp_lookup(dev, rule->qpn)) {
+		mlx4_err_rule(dev, "QP doesn't exist\n", rule);
+		ret = -EINVAL;
+		goto out;
+	}
+
 	trans_rule_ctrl_to_hw(rule, mailbox->buf);
 
 	size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
 
 	list_for_each_entry(cur, &rule->list, list) {
 		ret = parse_trans_rule(dev, cur, mailbox->buf + size);
-		if (ret < 0) {
-			mlx4_free_cmd_mailbox(dev, mailbox);
-			return ret;
-		}
+		if (ret < 0)
+			goto out;
+
 		size += ret;
 	}
 
@@ -1021,6 +1027,7 @@ int mlx4_flow_attach(struct mlx4_dev *dev,
 		}
 	}
 
+out:
 	mlx4_free_cmd_mailbox(dev, mailbox);
 
 	return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 2d6abd4662b1..5a310d313e94 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -384,6 +384,19 @@ static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
 		__mlx4_qp_free_icm(dev, qpn);
 }
 
+struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
+{
+	struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
+	struct mlx4_qp *qp;
+
+	spin_lock(&qp_table->lock);
+
+	qp = __mlx4_qp_lookup(dev, qpn);
+
+	spin_unlock(&qp_table->lock);
+	return qp;
+}
+
 int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -471,6 +484,12 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 	}
 
 	if (attr & MLX4_UPDATE_QP_QOS_VPORT) {
+		if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP)) {
+			mlx4_warn(dev, "Granular QoS per VF is not enabled\n");
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+
 		qp_mask |= 1ULL << MLX4_UPD_QP_MASK_QOS_VPP;
 		cmd->qp_context.qos_vport = params->qos_vport;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 07516545474f..812783865205 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -5255,6 +5255,13 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave)
 	mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex);
 }
 
+static void update_qos_vpp(struct mlx4_update_qp_context *ctx,
+			   struct mlx4_vf_immed_vlan_work *work)
+{
+	ctx->qp_mask |= cpu_to_be64(1ULL << MLX4_UPD_QP_MASK_QOS_VPP);
+	ctx->qp_context.qos_vport = work->qos_vport;
+}
+
 void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work)
 {
 	struct mlx4_vf_immed_vlan_work *work =
@@ -5369,11 +5376,10 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work)
 					qp->sched_queue & 0xC7;
 				upd_context->qp_context.pri_path.sched_queue |=
 					((work->qos & 0x7) << 3);
-				upd_context->qp_mask |=
-					cpu_to_be64(1ULL <<
-						    MLX4_UPD_QP_MASK_QOS_VPP);
-				upd_context->qp_context.qos_vport =
-					work->qos_vport;
+
+				if (dev->caps.flags2 &
+				    MLX4_DEV_CAP_FLAG2_QOS_VPP)
+					update_qos_vpp(upd_context, work);
 			}
 
 			err = mlx4_cmd(dev, mailbox->dma,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index fc52d742b7f7..27251a78075c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -13,7 +13,7 @@ config MLX5_CORE
 
 config MLX5_CORE_EN
 	bool "Mellanox Technologies ConnectX-4 Ethernet support"
-	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+	depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
 	depends on IPV6=y || IPV6=n || MLX5_CORE=m
 	imply PTP_1588_CLOCK
 	default n
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 5bdaf3d545b2..10d282841f5b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -774,7 +774,7 @@ static void cb_timeout_handler(struct work_struct *work)
 	mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
 		       mlx5_command_str(msg_to_opcode(ent->in)),
 		       msg_to_opcode(ent->in));
-	mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+	mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
 }
 
 static void cmd_work_handler(struct work_struct *work)
@@ -804,6 +804,7 @@ static void cmd_work_handler(struct work_struct *work)
 	}
 
 	cmd->ent_arr[ent->idx] = ent;
+	set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
 	lay = get_inst(cmd, ent->idx);
 	ent->lay = lay;
 	memset(lay, 0, sizeof(*lay));
@@ -825,6 +826,20 @@ static void cmd_work_handler(struct work_struct *work)
 	if (ent->callback)
 		schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
 
+	/* Skip sending command to fw if internal error */
+	if (pci_channel_offline(dev->pdev) ||
+	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+		u8 status = 0;
+		u32 drv_synd;
+
+		ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status);
+		MLX5_SET(mbox_out, ent->out, status, status);
+		MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
+
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+		return;
+	}
+
 	/* ring doorbell after the descriptor is valid */
 	mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
 	wmb();
@@ -835,7 +850,7 @@ static void cmd_work_handler(struct work_struct *work)
 		poll_timeout(ent);
 		/* make sure we read the descriptor after ownership is SW */
 		rmb();
-		mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT));
 	}
 }
 
@@ -879,7 +894,7 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 		wait_for_completion(&ent->done);
 	} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
 		ent->ret = -ETIMEDOUT;
-		mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
 	}
 
 	err = ent->ret;
@@ -1375,7 +1390,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
 	}
 }
 
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
 	struct mlx5_cmd_work_ent *ent;
@@ -1395,6 +1410,19 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
 			struct semaphore *sem;
 
 			ent = cmd->ent_arr[i];
+
+			/* if we already completed the command, ignore it */
+			if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP,
+						&ent->state)) {
+				/* only real completion can free the cmd slot */
+				if (!forced) {
+					mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
+						      ent->idx);
+					free_ent(cmd, ent->idx);
+				}
+				continue;
+			}
+
 			if (ent->callback)
 				cancel_delayed_work(&ent->cb_timeout_work);
 			if (ent->page_queue)
@@ -1417,7 +1445,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
 				mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
 					      ent->ret, deliv_status_to_str(ent->status), ent->status);
 			}
-			free_ent(cmd, ent->idx);
+
+			/* only real completion will free the entry slot */
+			if (!forced)
+				free_ent(cmd, ent->idx);
 
 			if (ent->callback) {
 				ds = ent->ts2 - ent->ts1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 0099a3e397bc..944fc1742464 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -458,13 +458,15 @@ struct mlx5e_mpw_info {
 
 struct mlx5e_rx_am_stats {
 	int ppms; /* packets per msec */
+	int bpms; /* bytes per msec */
 	int epms; /* events per msec */
 };
 
 struct mlx5e_rx_am_sample {
-	ktime_t		time;
-	unsigned int	pkt_ctr;
-	u16		event_ctr;
+	ktime_t	time;
+	u32	pkt_ctr;
+	u32	byte_ctr;
+	u16	event_ctr;
 };
 
 struct mlx5e_rx_am { /* Adaptive Moderation */
@@ -1003,7 +1005,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
 void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
 void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
 
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn);
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv);
 void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv);
 
 int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index ce7b09d72ff6..16486dff1493 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -794,7 +794,6 @@ static void get_supported(u32 eth_proto_cap,
 	ptys2ethtool_supported_port(link_ksettings, eth_proto_cap);
 	ptys2ethtool_supported_link(supported, eth_proto_cap);
 	ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause);
-	ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Asym_Pause);
 }
 
 static void get_advertising(u32 eth_proto_cap, u8 tx_pause,
@@ -804,7 +803,7 @@ static void get_advertising(u32 eth_proto_cap, u8 tx_pause,
 	unsigned long *advertising = link_ksettings->link_modes.advertising;
 
 	ptys2ethtool_adver_link(advertising, eth_proto_cap);
-	if (tx_pause)
+	if (rx_pause)
 		ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause);
 	if (tx_pause ^ rx_pause)
 		ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause);
@@ -849,6 +848,8 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
 	struct mlx5e_priv *priv    = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
+	u32 rx_pause = 0;
+	u32 tx_pause = 0;
 	u32 eth_proto_cap;
 	u32 eth_proto_admin;
 	u32 eth_proto_lp;
@@ -871,11 +872,13 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
 	an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
 	an_status        = MLX5_GET(ptys_reg, out, an_status);
 
+	mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
+
 	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
 	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
 
 	get_supported(eth_proto_cap, link_ksettings);
-	get_advertising(eth_proto_admin, 0, 0, link_ksettings);
+	get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings);
 	get_speed_duplex(netdev, eth_proto_oper, link_ksettings);
 
 	eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
@@ -1239,11 +1242,11 @@ static int mlx5e_get_ts_info(struct net_device *dev,
 				 SOF_TIMESTAMPING_RX_HARDWARE |
 				 SOF_TIMESTAMPING_RAW_HARDWARE;
 
-	info->tx_types = (BIT(1) << HWTSTAMP_TX_OFF) |
-			 (BIT(1) << HWTSTAMP_TX_ON);
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+			 BIT(HWTSTAMP_TX_ON);
 
-	info->rx_filters = (BIT(1) << HWTSTAMP_FILTER_NONE) |
-			   (BIT(1) << HWTSTAMP_FILTER_ALL);
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+			   BIT(HWTSTAMP_FILTER_ALL);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 576d6787b484..53ed58320a24 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -800,7 +800,7 @@ void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv)
 	mlx5e_destroy_flow_table(&ttc->ft);
 }
 
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn)
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv)
 {
 	struct mlx5e_ttc_table *ttc = &priv->fs.ttc;
 	struct mlx5_flow_table_attr ft_attr = {};
@@ -810,7 +810,6 @@ int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn)
 	ft_attr.max_fte = MLX5E_TTC_TABLE_SIZE;
 	ft_attr.level = MLX5E_TTC_FT_LEVEL;
 	ft_attr.prio = MLX5E_NIC_PRIO;
-	ft_attr.underlay_qpn = underlay_qpn;
 
 	ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
 	if (IS_ERR(ft->t)) {
@@ -1147,7 +1146,7 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
 		priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
 	}
 
-	err = mlx5e_create_ttc_table(priv, 0);
+	err = mlx5e_create_ttc_table(priv);
 	if (err) {
 		netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
 			   err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a61b71b6fff3..277f4de30375 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2976,7 +2976,7 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 tc)
 	new_channels.params = priv->channels.params;
 	new_channels.params.num_tc = tc ? tc : 1;
 
-	if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
 		priv->channels.params = new_channels.params;
 		goto out;
 	}
@@ -4241,7 +4241,8 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 	return netdev;
 
 err_cleanup_nic:
-	profile->cleanup(priv);
+	if (profile->cleanup)
+		profile->cleanup(priv);
 	free_netdev(netdev);
 
 	return NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 79462c0368a0..46984a52a94b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -791,6 +791,8 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
 	params->tx_max_inline         = mlx5e_get_max_inline_cap(mdev);
 	params->num_tc                = 1;
 	params->lro_wqe_sz            = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+
+	mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
 }
 
 static void mlx5e_build_rep_netdev(struct net_device *netdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 7b1566f0ae58..66b5fec15313 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1041,6 +1041,8 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
 #define MLX5_IB_GRH_BYTES       40
 #define MLX5_IPOIB_ENCAP_LEN    4
 #define MLX5_GID_SIZE           16
+#define MLX5_IPOIB_PSEUDO_LEN   20
+#define MLX5_IPOIB_HARD_LEN     (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN)
 
 static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 					 struct mlx5_cqe64 *cqe,
@@ -1048,6 +1050,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 					 struct sk_buff *skb)
 {
 	struct net_device *netdev = rq->netdev;
+	char *pseudo_header;
 	u8 *dgid;
 	u8 g;
 
@@ -1076,8 +1079,11 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 	if (likely(netdev->features & NETIF_F_RXHASH))
 		mlx5e_skb_set_hash(cqe, skb);
 
+	/* 20 bytes of ipoib header and 4 for encap existing */
+	pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN);
+	memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN);
 	skb_reset_mac_header(skb);
-	skb_pull(skb, MLX5_IPOIB_ENCAP_LEN);
+	skb_pull(skb, MLX5_IPOIB_HARD_LEN);
 
 	skb->dev = netdev;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
index 02dd3a95ed8f..acf32fe952cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
@@ -183,28 +183,27 @@ static void mlx5e_am_exit_parking(struct mlx5e_rx_am *am)
 	mlx5e_am_step(am);
 }
 
+#define IS_SIGNIFICANT_DIFF(val, ref) \
+	(((100 * abs((val) - (ref))) / (ref)) > 10) /* more than 10% difference */
+
 static int mlx5e_am_stats_compare(struct mlx5e_rx_am_stats *curr,
 				  struct mlx5e_rx_am_stats *prev)
 {
-	int diff;
-
-	if (!prev->ppms)
-		return curr->ppms ? MLX5E_AM_STATS_BETTER :
+	if (!prev->bpms)
+		return curr->bpms ? MLX5E_AM_STATS_BETTER :
 				    MLX5E_AM_STATS_SAME;
 
-	diff = curr->ppms - prev->ppms;
-	if (((100 * abs(diff)) / prev->ppms) > 10) /* more than 10% diff */
-		return (diff > 0) ? MLX5E_AM_STATS_BETTER :
-				    MLX5E_AM_STATS_WORSE;
+	if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
+		return (curr->bpms > prev->bpms) ? MLX5E_AM_STATS_BETTER :
+						   MLX5E_AM_STATS_WORSE;
 
-	if (!prev->epms)
-		return curr->epms ? MLX5E_AM_STATS_WORSE :
-				    MLX5E_AM_STATS_SAME;
+	if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
+		return (curr->ppms > prev->ppms) ? MLX5E_AM_STATS_BETTER :
+						   MLX5E_AM_STATS_WORSE;
 
-	diff = curr->epms - prev->epms;
-	if (((100 * abs(diff)) / prev->epms) > 10) /* more than 10% diff */
-		return (diff < 0) ? MLX5E_AM_STATS_BETTER :
-				    MLX5E_AM_STATS_WORSE;
+	if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
+		return (curr->epms < prev->epms) ? MLX5E_AM_STATS_BETTER :
+						   MLX5E_AM_STATS_WORSE;
 
 	return MLX5E_AM_STATS_SAME;
 }
@@ -266,10 +265,13 @@ static void mlx5e_am_sample(struct mlx5e_rq *rq,
 {
 	s->time	     = ktime_get();
 	s->pkt_ctr   = rq->stats.packets;
+	s->byte_ctr  = rq->stats.bytes;
 	s->event_ctr = rq->cq.event_ctr;
 }
 
 #define MLX5E_AM_NEVENTS 64
+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
+#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1))
 
 static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start,
 				struct mlx5e_rx_am_sample *end,
@@ -277,13 +279,17 @@ static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start,
 {
 	/* u32 holds up to 71 minutes, should be enough */
 	u32 delta_us = ktime_us_delta(end->time, start->time);
-	unsigned int npkts = end->pkt_ctr - start->pkt_ctr;
+	u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr);
+	u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr,
+			     start->byte_ctr);
 
 	if (!delta_us)
 		return;
 
-	curr_stats->ppms =            (npkts * USEC_PER_MSEC) / delta_us;
-	curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us;
+	curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us);
+	curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us);
+	curr_stats->epms = DIV_ROUND_UP(MLX5E_AM_NEVENTS * USEC_PER_MSEC,
+					delta_us);
 }
 
 void mlx5e_rx_am_work(struct work_struct *work)
@@ -308,7 +314,8 @@ void mlx5e_rx_am(struct mlx5e_rq *rq)
 
 	switch (am->state) {
 	case MLX5E_AM_MEASURE_IN_PROGRESS:
-		nevents = rq->cq.event_ctr - am->start_sample.event_ctr;
+		nevents = BIT_GAP(BITS_PER_TYPE(u16), rq->cq.event_ctr,
+				  am->start_sample.event_ctr);
 		if (nevents < MLX5E_AM_NEVENTS)
 			break;
 		mlx5e_am_sample(rq, &end_sample);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 53e4992d6511..f81c3aa60b46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -417,20 +417,13 @@ struct mlx5e_stats {
 };
 
 static const struct counter_desc mlx5e_pme_status_desc[] = {
-	{ "module_plug", 0 },
 	{ "module_unplug", 8 },
 };
 
 static const struct counter_desc mlx5e_pme_error_desc[] = {
-	{ "module_pwr_budget_exd", 0 },  /* power budget exceed */
-	{ "module_long_range", 8 },      /* long range for non MLNX cable */
-	{ "module_bus_stuck", 16 },      /* bus stuck (I2C or data shorted) */
-	{ "module_no_eeprom", 24 },      /* no eeprom/retry time out */
-	{ "module_enforce_part", 32 },   /* enforce part number list */
-	{ "module_unknown_id", 40 },     /* unknown identifier */
-	{ "module_high_temp", 48 },      /* high temperature */
+	{ "module_bus_stuck", 16 },       /* bus stuck (I2C or data shorted) */
+	{ "module_high_temp", 48 },       /* high temperature */
 	{ "module_bad_shorted", 56 },    /* bad or shorted cable/module */
-	{ "module_unknown_status", 64 },
 };
 
 #endif /* __MLX5_EN_STATS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 11c27e4fadf6..9df9fc0d26f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -43,6 +43,7 @@
 #include <net/tc_act/tc_vlan.h>
 #include <net/tc_act/tc_tunnel_key.h>
 #include <net/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_csum.h>
 #include <net/vxlan.h>
 #include <net/arp.h>
 #include "en.h"
@@ -384,7 +385,7 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
 			mlx5_encap_dealloc(priv->mdev, e->encap_id);
 
-		hlist_del_rcu(&e->encap_hlist);
+		hash_del_rcu(&e->encap_hlist);
 		kfree(e->encap_header);
 		kfree(e);
 	}
@@ -894,7 +895,6 @@ static struct mlx5_fields fields[] = {
 	{MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0,  2, offsetof(struct pedit_headers, eth.h_source[4])},
 	{MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE,  2, offsetof(struct pedit_headers, eth.h_proto)},
 
-	{MLX5_ACTION_IN_FIELD_OUT_IP_DSCP, 1, offsetof(struct pedit_headers, ip4.tos)},
 	{MLX5_ACTION_IN_FIELD_OUT_IP_TTL,  1, offsetof(struct pedit_headers, ip4.ttl)},
 	{MLX5_ACTION_IN_FIELD_OUT_SIPV4,   4, offsetof(struct pedit_headers, ip4.saddr)},
 	{MLX5_ACTION_IN_FIELD_OUT_DIPV4,   4, offsetof(struct pedit_headers, ip4.daddr)},
@@ -925,11 +925,11 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 				struct mlx5e_tc_flow_parse_attr *parse_attr)
 {
 	struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
-	int i, action_size, nactions, max_actions, first, last;
+	int i, action_size, nactions, max_actions, first, last, first_z;
 	void *s_masks_p, *a_masks_p, *vals_p;
-	u32 s_mask, a_mask, val;
 	struct mlx5_fields *f;
 	u8 cmd, field_bsize;
+	u32 s_mask, a_mask;
 	unsigned long mask;
 	void *action;
 
@@ -946,7 +946,8 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 	for (i = 0; i < ARRAY_SIZE(fields); i++) {
 		f = &fields[i];
 		/* avoid seeing bits set from previous iterations */
-		s_mask = a_mask = mask = val = 0;
+		s_mask = 0;
+		a_mask = 0;
 
 		s_masks_p = (void *)set_masks + f->offset;
 		a_masks_p = (void *)add_masks + f->offset;
@@ -981,12 +982,12 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 			memset(a_masks_p, 0, f->size);
 		}
 
-		memcpy(&val, vals_p, f->size);
-
 		field_bsize = f->size * BITS_PER_BYTE;
+
+		first_z = find_first_zero_bit(&mask, field_bsize);
 		first = find_first_bit(&mask, field_bsize);
 		last  = find_last_bit(&mask, field_bsize);
-		if (first > 0 || last != (field_bsize - 1)) {
+		if (first > 0 || last != (field_bsize - 1) || first_z < last) {
 			printk(KERN_WARNING "mlx5: partial rewrite (mask %lx) is currently not offloaded\n",
 			       mask);
 			return -EOPNOTSUPP;
@@ -1002,11 +1003,11 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 		}
 
 		if (field_bsize == 32)
-			MLX5_SET(set_action_in, action, data, ntohl(val));
+			MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p));
 		else if (field_bsize == 16)
-			MLX5_SET(set_action_in, action, data, ntohs(val));
+			MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p));
 		else if (field_bsize == 8)
-			MLX5_SET(set_action_in, action, data, val);
+			MLX5_SET(set_action_in, action, data, *(u8 *)vals_p);
 
 		action += action_size;
 		nactions++;
@@ -1109,6 +1110,28 @@ out_err:
 	return err;
 }
 
+static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags)
+{
+	u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
+			 TCA_CSUM_UPDATE_FLAG_UDP;
+
+	/*  The HW recalcs checksums only if re-writing headers */
+	if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
+		netdev_warn(priv->netdev,
+			    "TC csum action is only offloaded with pedit\n");
+		return false;
+	}
+
+	if (update_flags & ~prot_flags) {
+		netdev_warn(priv->netdev,
+			    "can't offload TC csum action for some header/s - flags %#x\n",
+			    update_flags);
+		return false;
+	}
+
+	return true;
+}
+
 static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 				struct mlx5e_tc_flow_parse_attr *parse_attr,
 				struct mlx5e_tc_flow *flow)
@@ -1149,6 +1172,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 			continue;
 		}
 
+		if (is_tcf_csum(a)) {
+			if (csum_offload_supported(priv, attr->action,
+						   tcf_csum_update_flags(a)))
+				continue;
+
+			return -EOPNOTSUPP;
+		}
+
 		if (is_tcf_skbedit_mark(a)) {
 			u32 mark = tcf_skbedit_mark(a);
 
@@ -1651,6 +1682,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 			continue;
 		}
 
+		if (is_tcf_csum(a)) {
+			if (csum_offload_supported(priv, attr->action,
+						   tcf_csum_update_flags(a)))
+				continue;
+
+			return -EOPNOTSUPP;
+		}
+
 		if (is_tcf_mirred_egress_redirect(a)) {
 			int ifindex = tcf_mirred_ifindex(a);
 			struct net_device *out_dev, *encap_dev = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index ea5d8d37a75c..33eae5ad2fb0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -422,7 +422,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			break;
 
 		case MLX5_EVENT_TYPE_CMD:
-			mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector));
+			mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
 			break;
 
 		case MLX5_EVENT_TYPE_PORT_CHANGE:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index f991f669047e..a53e982a6863 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -906,21 +906,34 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
 	return 0;
 }
 
-int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+static int mlx5_devlink_eswitch_check(struct devlink *devlink)
 {
-	struct mlx5_core_dev *dev;
-	u16 cur_mlx5_mode, mlx5_mode = 0;
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
 
-	dev = devlink_priv(devlink);
+	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return -EOPNOTSUPP;
 
 	if (!MLX5_CAP_GEN(dev, vport_group_manager))
 		return -EOPNOTSUPP;
 
-	cur_mlx5_mode = dev->priv.eswitch->mode;
-
-	if (cur_mlx5_mode == SRIOV_NONE)
+	if (dev->priv.eswitch->mode == SRIOV_NONE)
 		return -EOPNOTSUPP;
 
+	return 0;
+}
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	u16 cur_mlx5_mode, mlx5_mode = 0;
+	int err;
+
+	err = mlx5_devlink_eswitch_check(devlink);
+	if (err)
+		return err;
+
+	cur_mlx5_mode = dev->priv.eswitch->mode;
+
 	if (esw_mode_from_devlink(mode, &mlx5_mode))
 		return -EINVAL;
 
@@ -937,15 +950,12 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 
 int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 {
-	struct mlx5_core_dev *dev;
-
-	dev = devlink_priv(devlink);
-
-	if (!MLX5_CAP_GEN(dev, vport_group_manager))
-		return -EOPNOTSUPP;
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	int err;
 
-	if (dev->priv.eswitch->mode == SRIOV_NONE)
-		return -EOPNOTSUPP;
+	err = mlx5_devlink_eswitch_check(devlink);
+	if (err)
+		return err;
 
 	return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
 }
@@ -954,15 +964,12 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
-	int num_vports = esw->enabled_vports;
 	int err, vport;
 	u8 mlx5_mode;
 
-	if (!MLX5_CAP_GEN(dev, vport_group_manager))
-		return -EOPNOTSUPP;
-
-	if (esw->mode == SRIOV_NONE)
-		return -EOPNOTSUPP;
+	err = mlx5_devlink_eswitch_check(devlink);
+	if (err)
+		return err;
 
 	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
 	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
@@ -985,7 +992,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
 	if (err)
 		goto out;
 
-	for (vport = 1; vport < num_vports; vport++) {
+	for (vport = 1; vport < esw->enabled_vports; vport++) {
 		err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
 		if (err) {
 			esw_warn(dev, "Failed to set min inline on vport %d\n",
@@ -1010,12 +1017,11 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
+	int err;
 
-	if (!MLX5_CAP_GEN(dev, vport_group_manager))
-		return -EOPNOTSUPP;
-
-	if (esw->mode == SRIOV_NONE)
-		return -EOPNOTSUPP;
+	err = mlx5_devlink_eswitch_check(devlink);
+	if (err)
+		return err;
 
 	return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
 }
@@ -1062,11 +1068,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap)
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
 	int err;
 
-	if (!MLX5_CAP_GEN(dev, vport_group_manager))
-		return -EOPNOTSUPP;
-
-	if (esw->mode == SRIOV_NONE)
-		return -EOPNOTSUPP;
+	err = mlx5_devlink_eswitch_check(devlink);
+	if (err)
+		return err;
 
 	if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
 	    (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) ||
@@ -1105,12 +1109,11 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
+	int err;
 
-	if (!MLX5_CAP_GEN(dev, vport_group_manager))
-		return -EOPNOTSUPP;
-
-	if (esw->mode == SRIOV_NONE)
-		return -EOPNOTSUPP;
+	err = mlx5_devlink_eswitch_check(devlink);
+	if (err)
+		return err;
 
 	*encap = esw->offloads.encap;
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 19e3d2fc2099..fcec7bedd3cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -40,28 +40,25 @@
 #include "eswitch.h"
 
 int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
-			    struct mlx5_flow_table *ft)
+			    struct mlx5_flow_table *ft, u32 underlay_qpn)
 {
 	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
 
 	if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
-	    ft->underlay_qpn == 0)
+	    underlay_qpn == 0)
 		return 0;
 
 	MLX5_SET(set_flow_table_root_in, in, opcode,
 		 MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
 	MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
 	MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
+	MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn);
 	if (ft->vport) {
 		MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport);
 		MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
 	}
 
-	if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
-	    ft->underlay_qpn != 0)
-		MLX5_SET(set_flow_table_root_in, in, underlay_qpn, ft->underlay_qpn);
-
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 8fad80688536..0f98a7cf4877 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -71,7 +71,8 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
 			unsigned int index);
 
 int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
-			    struct mlx5_flow_table *ft);
+			    struct mlx5_flow_table *ft,
+			    u32 underlay_qpn);
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id);
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index b8a176503d38..8f5125ccd8d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -650,7 +650,7 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 	if (ft->level >= min_level)
 		return 0;
 
-	err = mlx5_cmd_update_root_ft(root->dev, ft);
+	err = mlx5_cmd_update_root_ft(root->dev, ft, root->underlay_qpn);
 	if (err)
 		mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
 			       ft->id);
@@ -818,8 +818,6 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 		goto unlock_root;
 	}
 
-	ft->underlay_qpn = ft_attr->underlay_qpn;
-
 	tree_init_node(&ft->node, 1, del_flow_table);
 	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
 	next_ft = find_next_chained_ft(fs_prio);
@@ -864,7 +862,7 @@ struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace
 	ft_attr.level   = level;
 	ft_attr.prio    = prio;
 
-	return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, 0);
+	return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, vport);
 }
 
 struct mlx5_flow_table*
@@ -1489,7 +1487,8 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft)
 
 	new_root_ft = find_next_ft(ft);
 	if (new_root_ft) {
-		int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft);
+		int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft,
+						  root->underlay_qpn);
 
 		if (err) {
 			mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
@@ -2062,3 +2061,21 @@ err:
 	mlx5_cleanup_fs(dev);
 	return err;
 }
+
+int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
+{
+	struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
+
+	root->underlay_qpn = underlay_qpn;
+	return 0;
+}
+EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn);
+
+int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
+{
+	struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
+
+	root->underlay_qpn = 0;
+	return 0;
+}
+EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 81eafc7b9dd9..990acee6fb09 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -118,7 +118,6 @@ struct mlx5_flow_table {
 	/* FWD rules that point on this flow table */
 	struct list_head		fwd_rules;
 	u32				flags;
-	u32				underlay_qpn;
 };
 
 struct mlx5_fc_cache {
@@ -195,6 +194,7 @@ struct mlx5_flow_root_namespace {
 	struct mlx5_flow_table		*root_ft;
 	/* Should be held when chaining flow tables */
 	struct mutex			chain_lock;
+	u32				underlay_qpn;
 };
 
 int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index d0515391d33b..f27f84ffbc85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -90,7 +90,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev)
 	spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
 
 	mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
-	mlx5_cmd_comp_handler(dev, vector);
+	mlx5_cmd_comp_handler(dev, vector, true);
 	return;
 
 no_trig:
@@ -275,10 +275,8 @@ static void poll_health(unsigned long data)
 	struct mlx5_core_health *health = &dev->priv.health;
 	u32 count;
 
-	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-		mod_timer(&health->timer, get_next_poll_jiffies());
-		return;
-	}
+	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+		goto out;
 
 	count = ioread32be(health->health_counter);
 	if (count == health->prev)
@@ -290,8 +288,6 @@ static void poll_health(unsigned long data)
 	if (health->miss_counter == MAX_MISSES) {
 		dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n");
 		print_health_info(dev);
-	} else {
-		mod_timer(&health->timer, get_next_poll_jiffies());
 	}
 
 	if (in_fatal(dev) && !health->sick) {
@@ -305,6 +301,9 @@ static void poll_health(unsigned long data)
 				"new health works are not permitted at this stage\n");
 		spin_unlock(&health->wq_lock);
 	}
+
+out:
+	mod_timer(&health->timer, get_next_poll_jiffies());
 }
 
 void mlx5_start_health_poll(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
index 019c230da498..cc1858752e70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
@@ -66,6 +66,10 @@ static void mlx5i_init(struct mlx5_core_dev *mdev,
 
 	mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev));
 
+	/* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
+	mlx5e_set_rq_type_params(mdev, &priv->channels.params, MLX5_WQ_TYPE_LINKED_LIST);
+	priv->channels.params.lro_en = false;
+
 	mutex_init(&priv->state_lock);
 
 	netdev->hw_features    |= NETIF_F_SG;
@@ -156,6 +160,8 @@ out:
 
 static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
 {
+	mlx5_fs_remove_rx_underlay_qpn(mdev, qp->qpn);
+
 	mlx5_core_destroy_qp(mdev, qp);
 }
 
@@ -170,6 +176,8 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv)
 		return err;
 	}
 
+	mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
+
 	err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]);
 	if (err) {
 		mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
@@ -189,7 +197,6 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
 
 static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
 {
-	struct mlx5i_priv *ipriv = priv->ppriv;
 	int err;
 
 	priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
@@ -205,7 +212,7 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
 		priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
 	}
 
-	err = mlx5e_create_ttc_table(priv, ipriv->qp.qpn);
+	err = mlx5e_create_ttc_table(priv);
 	if (err) {
 		netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
 			   err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 0c123d571b4c..13be264587f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -175,8 +175,9 @@ static struct mlx5_profile profile[] = {
 	},
 };
 
-#define FW_INIT_TIMEOUT_MILI	2000
-#define FW_INIT_WAIT_MS		2
+#define FW_INIT_TIMEOUT_MILI		2000
+#define FW_INIT_WAIT_MS			2
+#define FW_PRE_INIT_TIMEOUT_MILI	10000
 
 static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
 {
@@ -537,8 +538,10 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 	/* disable cmdif checksum */
 	MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
 
-	/* If the HCA supports 4K UARs use it */
-	if (MLX5_CAP_GEN_MAX(dev, uar_4k))
+	/* Enable 4K UAR only when HCA supports it and page size is bigger
+	 * than 4K.
+	 */
+	if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096)
 		MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1);
 
 	MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
@@ -612,7 +615,6 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
 	struct mlx5_priv *priv  = &mdev->priv;
 	struct msix_entry *msix = priv->msix_arr;
 	int irq                 = msix[i + MLX5_EQ_VEC_COMP_BASE].vector;
-	int err;
 
 	if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
 		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
@@ -622,18 +624,11 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
 	cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
 			priv->irq_info[i].mask);
 
-	err = irq_set_affinity_hint(irq, priv->irq_info[i].mask);
-	if (err) {
-		mlx5_core_warn(mdev, "irq_set_affinity_hint failed,irq 0x%.4x",
-			       irq);
-		goto err_clear_mask;
-	}
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    irq_set_affinity_hint(irq, priv->irq_info[i].mask))
+		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
 
 	return 0;
-
-err_clear_mask:
-	free_cpumask_var(priv->irq_info[i].mask);
-	return err;
 }
 
 static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
@@ -1019,6 +1014,15 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	 */
 	dev->state = MLX5_DEVICE_STATE_UP;
 
+	/* wait for firmware to accept initialization segments configurations
+	 */
+	err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
+	if (err) {
+		dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n",
+			FW_PRE_INIT_TIMEOUT_MILI);
+		goto out;
+	}
+
 	err = mlx5_cmd_init(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index ea56f6ade6b4..5f0a7bc692a4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -199,10 +199,11 @@ static int mlxsw_sp_erif_entry_get(struct mlxsw_sp *mlxsw_sp,
 
 	entry->counter_valid = false;
 	entry->counter = 0;
+	entry->index = mlxsw_sp_rif_index(rif);
+
 	if (!counters_enabled)
 		return 0;
 
-	entry->index = mlxsw_sp_rif_index(rif);
 	err = mlxsw_sp_rif_counter_value_get(mlxsw_sp, rif,
 					     MLXSW_SP_RIF_COUNTER_EGRESS,
 					     &cnt);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 33cec1cc1642..0744452a0b18 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -206,6 +206,9 @@ void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
 {
 	unsigned int *p_counter_index;
 
+	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
+		return;
+
 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
 	if (WARN_ON(!p_counter_index))
 		return;
@@ -3331,6 +3334,9 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
 	u16 vid = vlan_dev_vlan_id(vlan_dev);
 
+	if (netif_is_bridge_port(vlan_dev))
+		return 0;
+
 	if (mlxsw_sp_port_dev_check(real_dev))
 		return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event,
 						     vid);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 0d8411f1f954..f4bb0c0b7c1d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1497,8 +1497,7 @@ do_fdb_op:
 	err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid,
 				      adding, true);
 	if (err) {
-		if (net_ratelimit())
-			netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n");
+		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to set FDB entry\n");
 		return;
 	}
 
@@ -1558,8 +1557,7 @@ do_fdb_op:
 	err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid,
 					  adding, true);
 	if (err) {
-		if (net_ratelimit())
-			netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n");
+		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to set FDB entry\n");
 		return;
 	}
 
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
index b8d5270359cd..e30676515529 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
@@ -247,7 +247,7 @@ nx_fw_cmd_set_mtu(struct netxen_adapter *adapter, int mtu)
 	cmd.req.arg3 = 0;
 
 	if (recv_ctx->state == NX_HOST_CTX_STATE_ACTIVE)
-		netxen_issue_cmd(adapter, &cmd);
+		rcode = netxen_issue_cmd(adapter, &cmd);
 
 	if (rcode != NX_RCODE_SUCCESS)
 		return -EIO;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 483241b4b05d..a672f6a860dc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -2956,7 +2956,7 @@ static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn,
 				qed_wr(p_hwfn,
 				       p_ptt,
 				       s_storm_defs[storm_id].cm_ctx_wr_addr,
-				       BIT(9) | lid);
+				       (i << 9) | lid);
 				*(dump_buf + offset) = qed_rd(p_hwfn,
 							      p_ptt,
 							      rd_reg_addr);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
index 67200c5498ab..0a8fde629991 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
@@ -983,7 +983,7 @@ void qed_set_rfs_mode_disable(struct qed_hwfn *p_hwfn,
 	memset(&camline, 0, sizeof(union gft_cam_line_union));
 	qed_wr(p_hwfn, p_ptt, PRS_REG_GFT_CAM + CAM_LINE_SIZE * pf_id,
 	       camline.cam_line_mapped.camline);
-	memset(&ramline, 0, sizeof(union gft_cam_line_union));
+	memset(&ramline, 0, sizeof(ramline));
 
 	for (i = 0; i < RAM_LINE_SIZE / REG_SIZE; i++) {
 		u32 hw_addr = PRS_REG_GFT_PROFILE_MASK_RAM;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 537d1236a4fe..715b3aaf83ac 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1730,7 +1730,8 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
 		qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats);
 		break;
 	default:
-		DP_ERR(cdev, "Invalid protocol type = %d\n", type);
+		DP_VERBOSE(cdev, QED_MSG_SP,
+			   "Invalid protocol type = %d\n", type);
 		return;
 	}
 }
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 49bad00a0f8f..81312924df14 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -37,8 +37,8 @@
 
 #define _QLCNIC_LINUX_MAJOR 5
 #define _QLCNIC_LINUX_MINOR 3
-#define _QLCNIC_LINUX_SUBVERSION 65
-#define QLCNIC_LINUX_VERSIONID  "5.3.65"
+#define _QLCNIC_LINUX_SUBVERSION 66
+#define QLCNIC_LINUX_VERSIONID  "5.3.66"
 #define QLCNIC_DRV_IDC_VER  0x01
 #define QLCNIC_DRIVER_VERSION  ((_QLCNIC_LINUX_MAJOR << 16) |\
 		 (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION))
@@ -1824,22 +1824,44 @@ struct qlcnic_hardware_ops {
 	u32 (*get_cap_size)(void *, int);
 	void (*set_sys_info)(void *, int, u32);
 	void (*store_cap_mask)(void *, u32);
+	bool (*encap_rx_offload) (struct qlcnic_adapter *adapter);
+	bool (*encap_tx_offload) (struct qlcnic_adapter *adapter);
 };
 
 extern struct qlcnic_nic_template qlcnic_vf_ops;
 
-static inline bool qlcnic_encap_tx_offload(struct qlcnic_adapter *adapter)
+static inline bool qlcnic_83xx_encap_tx_offload(struct qlcnic_adapter *adapter)
 {
 	return adapter->ahw->extra_capability[0] &
 	       QLCNIC_83XX_FW_CAPAB_ENCAP_TX_OFFLOAD;
 }
 
-static inline bool qlcnic_encap_rx_offload(struct qlcnic_adapter *adapter)
+static inline bool qlcnic_83xx_encap_rx_offload(struct qlcnic_adapter *adapter)
 {
 	return adapter->ahw->extra_capability[0] &
 	       QLCNIC_83XX_FW_CAPAB_ENCAP_RX_OFFLOAD;
 }
 
+static inline bool qlcnic_82xx_encap_tx_offload(struct qlcnic_adapter *adapter)
+{
+	return false;
+}
+
+static inline bool qlcnic_82xx_encap_rx_offload(struct qlcnic_adapter *adapter)
+{
+        return false;
+}
+
+static inline bool qlcnic_encap_rx_offload(struct qlcnic_adapter *adapter)
+{
+        return adapter->ahw->hw_ops->encap_rx_offload(adapter);
+}
+
+static inline bool qlcnic_encap_tx_offload(struct qlcnic_adapter *adapter)
+{
+        return adapter->ahw->hw_ops->encap_tx_offload(adapter);
+}
+
 static inline int qlcnic_start_firmware(struct qlcnic_adapter *adapter)
 {
 	return adapter->nic_ops->start_firmware(adapter);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 718bf58a7da6..f7080d0ab874 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -242,6 +242,8 @@ static struct qlcnic_hardware_ops qlcnic_83xx_hw_ops = {
 	.get_cap_size			= qlcnic_83xx_get_cap_size,
 	.set_sys_info			= qlcnic_83xx_set_sys_info,
 	.store_cap_mask			= qlcnic_83xx_store_cap_mask,
+	.encap_rx_offload		= qlcnic_83xx_encap_rx_offload,
+	.encap_tx_offload		= qlcnic_83xx_encap_tx_offload,
 };
 
 static struct qlcnic_nic_template qlcnic_83xx_ops = {
@@ -3168,6 +3170,40 @@ int qlcnic_83xx_flash_read32(struct qlcnic_adapter *adapter, u32 flash_addr,
 	return 0;
 }
 
+void qlcnic_83xx_get_port_type(struct qlcnic_adapter *adapter)
+{
+	struct qlcnic_hardware_context *ahw = adapter->ahw;
+	struct qlcnic_cmd_args cmd;
+	u32 config;
+	int err;
+
+	err = qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_GET_LINK_STATUS);
+	if (err)
+		return;
+
+	err = qlcnic_issue_cmd(adapter, &cmd);
+	if (err) {
+		dev_info(&adapter->pdev->dev,
+			 "Get Link Status Command failed: 0x%x\n", err);
+		goto out;
+	} else {
+		config = cmd.rsp.arg[3];
+
+		switch (QLC_83XX_SFP_MODULE_TYPE(config)) {
+		case QLC_83XX_MODULE_FIBRE_1000BASE_SX:
+		case QLC_83XX_MODULE_FIBRE_1000BASE_LX:
+		case QLC_83XX_MODULE_FIBRE_1000BASE_CX:
+		case QLC_83XX_MODULE_TP_1000BASE_T:
+			ahw->port_type = QLCNIC_GBE;
+			break;
+		default:
+			ahw->port_type = QLCNIC_XGBE;
+		}
+	}
+out:
+	qlcnic_free_mbx_args(&cmd);
+}
+
 int qlcnic_83xx_test_link(struct qlcnic_adapter *adapter)
 {
 	u8 pci_func;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
index 3dfe8e27b51c..b75a81246856 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
@@ -637,6 +637,7 @@ void qlcnic_83xx_get_pauseparam(struct qlcnic_adapter *,
 int qlcnic_83xx_set_pauseparam(struct qlcnic_adapter *,
 			       struct ethtool_pauseparam *);
 int qlcnic_83xx_test_link(struct qlcnic_adapter *);
+void qlcnic_83xx_get_port_type(struct qlcnic_adapter *adapter);
 int qlcnic_83xx_reg_test(struct qlcnic_adapter *);
 int qlcnic_83xx_get_regs_len(struct qlcnic_adapter *);
 int qlcnic_83xx_get_registers(struct qlcnic_adapter *, u32 *);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index 9a869c15d8bf..7f7deeaf1cf0 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -486,6 +486,9 @@ static int qlcnic_set_link_ksettings(struct net_device *dev,
 	u32 ret = 0;
 	struct qlcnic_adapter *adapter = netdev_priv(dev);
 
+	if (qlcnic_83xx_check(adapter))
+		qlcnic_83xx_get_port_type(adapter);
+
 	if (adapter->ahw->port_type != QLCNIC_GBE)
 		return -EOPNOTSUPP;
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index 838cc0ceafd8..7848cf04b29a 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
@@ -341,7 +341,7 @@ qlcnic_pcie_sem_lock(struct qlcnic_adapter *adapter, int sem, u32 id_reg)
 			}
 			return -EIO;
 		}
-		usleep_range(1000, 1500);
+		udelay(1200);
 	}
 
 	if (id_reg)
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index b6628aaa6e4a..1b5f7d57b6f8 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -632,6 +632,8 @@ static struct qlcnic_hardware_ops qlcnic_hw_ops = {
 	.get_cap_size			= qlcnic_82xx_get_cap_size,
 	.set_sys_info			= qlcnic_82xx_set_sys_info,
 	.store_cap_mask			= qlcnic_82xx_store_cap_mask,
+	.encap_rx_offload               = qlcnic_82xx_encap_rx_offload,
+	.encap_tx_offload               = qlcnic_82xx_encap_tx_offload,
 };
 
 static int qlcnic_check_multi_tx_capability(struct qlcnic_adapter *adapter)
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index 2f656f395f39..c58180f40844 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
@@ -77,6 +77,8 @@ static struct qlcnic_hardware_ops qlcnic_sriov_vf_hw_ops = {
 	.free_mac_list			= qlcnic_sriov_vf_free_mac_list,
 	.enable_sds_intr		= qlcnic_83xx_enable_sds_intr,
 	.disable_sds_intr		= qlcnic_83xx_disable_sds_intr,
+	.encap_rx_offload               = qlcnic_83xx_encap_rx_offload,
+	.encap_tx_offload               = qlcnic_83xx_encap_tx_offload,
 };
 
 static struct qlcnic_nic_template qlcnic_sriov_vf_ops = {
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index cc065ffbe4b5..bcd4708b3745 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -931,7 +931,7 @@ int emac_mac_up(struct emac_adapter *adpt)
 	emac_mac_config(adpt);
 	emac_mac_rx_descs_refill(adpt, &adpt->rx_q);
 
-	adpt->phydev->irq = PHY_IGNORE_INTERRUPT;
+	adpt->phydev->irq = PHY_POLL;
 	ret = phy_connect_direct(netdev, adpt->phydev, emac_adjust_link,
 				 PHY_INTERFACE_MODE_SGMII);
 	if (ret) {
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
index 441c19366489..18461fcb9815 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-phy.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
@@ -13,15 +13,11 @@
 /* Qualcomm Technologies, Inc. EMAC PHY Controller driver.
  */
 
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_net.h>
 #include <linux/of_mdio.h>
 #include <linux/phy.h>
 #include <linux/iopoll.h>
 #include <linux/acpi.h>
 #include "emac.h"
-#include "emac-mac.h"
 
 /* EMAC base register offsets */
 #define EMAC_MDIO_CTRL                                        0x001414
@@ -52,62 +48,10 @@
 
 #define MDIO_WAIT_TIMES                                           1000
 
-#define EMAC_LINK_SPEED_DEFAULT (\
-		EMAC_LINK_SPEED_10_HALF  |\
-		EMAC_LINK_SPEED_10_FULL  |\
-		EMAC_LINK_SPEED_100_HALF |\
-		EMAC_LINK_SPEED_100_FULL |\
-		EMAC_LINK_SPEED_1GB_FULL)
-
-/**
- * emac_phy_mdio_autopoll_disable() - disable mdio autopoll
- * @adpt: the emac adapter
- *
- * The autopoll feature takes over the MDIO bus.  In order for
- * the PHY driver to be able to talk to the PHY over the MDIO
- * bus, we need to temporarily disable the autopoll feature.
- */
-static int emac_phy_mdio_autopoll_disable(struct emac_adapter *adpt)
-{
-	u32 val;
-
-	/* disable autopoll */
-	emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, MDIO_AP_EN, 0);
-
-	/* wait for any mdio polling to complete */
-	if (!readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, val,
-				!(val & MDIO_BUSY), 100, MDIO_WAIT_TIMES * 100))
-		return 0;
-
-	/* failed to disable; ensure it is enabled before returning */
-	emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, 0, MDIO_AP_EN);
-
-	return -EBUSY;
-}
-
-/**
- * emac_phy_mdio_autopoll_disable() - disable mdio autopoll
- * @adpt: the emac adapter
- *
- * The EMAC has the ability to poll the external PHY on the MDIO
- * bus for link state changes.  This eliminates the need for the
- * driver to poll the phy.  If if the link state does change,
- * the EMAC issues an interrupt on behalf of the PHY.
- */
-static void emac_phy_mdio_autopoll_enable(struct emac_adapter *adpt)
-{
-	emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, 0, MDIO_AP_EN);
-}
-
 static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum)
 {
 	struct emac_adapter *adpt = bus->priv;
 	u32 reg;
-	int ret;
-
-	ret = emac_phy_mdio_autopoll_disable(adpt);
-	if (ret)
-		return ret;
 
 	emac_reg_update32(adpt->base + EMAC_PHY_STS, PHY_ADDR_BMSK,
 			  (addr << PHY_ADDR_SHFT));
@@ -122,24 +66,15 @@ static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum)
 	if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg,
 			       !(reg & (MDIO_START | MDIO_BUSY)),
 			       100, MDIO_WAIT_TIMES * 100))
-		ret = -EIO;
-	else
-		ret = (reg >> MDIO_DATA_SHFT) & MDIO_DATA_BMSK;
+		return -EIO;
 
-	emac_phy_mdio_autopoll_enable(adpt);
-
-	return ret;
+	return (reg >> MDIO_DATA_SHFT) & MDIO_DATA_BMSK;
 }
 
 static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val)
 {
 	struct emac_adapter *adpt = bus->priv;
 	u32 reg;
-	int ret;
-
-	ret = emac_phy_mdio_autopoll_disable(adpt);
-	if (ret)
-		return ret;
 
 	emac_reg_update32(adpt->base + EMAC_PHY_STS, PHY_ADDR_BMSK,
 			  (addr << PHY_ADDR_SHFT));
@@ -155,11 +90,9 @@ static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val)
 	if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg,
 			       !(reg & (MDIO_START | MDIO_BUSY)), 100,
 			       MDIO_WAIT_TIMES * 100))
-		ret = -EIO;
+		return -EIO;
 
-	emac_phy_mdio_autopoll_enable(adpt);
-
-	return ret;
+	return 0;
 }
 
 /* Configure the MDIO bus and connect the external PHY */
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 28a8cdc36485..98a326faea29 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -50,19 +50,7 @@
 #define DMAR_DLY_CNT_DEF				    15
 #define DMAW_DLY_CNT_DEF				     4
 
-#define IMR_NORMAL_MASK         (\
-		ISR_ERROR       |\
-		ISR_GPHY_LINK   |\
-		ISR_TX_PKT      |\
-		GPHY_WAKEUP_INT)
-
-#define IMR_EXTENDED_MASK       (\
-		SW_MAN_INT      |\
-		ISR_OVER        |\
-		ISR_ERROR       |\
-		ISR_GPHY_LINK   |\
-		ISR_TX_PKT      |\
-		GPHY_WAKEUP_INT)
+#define IMR_NORMAL_MASK		(ISR_ERROR | ISR_OVER | ISR_TX_PKT)
 
 #define ISR_TX_PKT      (\
 	TX_PKT_INT      |\
@@ -70,10 +58,6 @@
 	TX_PKT_INT2     |\
 	TX_PKT_INT3)
 
-#define ISR_GPHY_LINK        (\
-	GPHY_LINK_UP_INT     |\
-	GPHY_LINK_DOWN_INT)
-
 #define ISR_OVER        (\
 	RFD0_UR_INT     |\
 	RFD1_UR_INT     |\
@@ -187,10 +171,6 @@ irqreturn_t emac_isr(int _irq, void *data)
 	if (status & ISR_OVER)
 		net_warn_ratelimited("warning: TX/RX overflow\n");
 
-	/* link event */
-	if (status & ISR_GPHY_LINK)
-		phy_mac_interrupt(adpt->phydev, !!(status & GPHY_LINK_UP_INT));
-
 exit:
 	/* enable the interrupt */
 	writel(irq->mask, adpt->base + EMAC_INT_MASK);
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 513e6c74e199..24ca7df15d07 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -296,8 +296,9 @@ qcaspi_receive(struct qcaspi *qca)
 
 	/* Allocate rx SKB if we don't have one available. */
 	if (!qca->rx_skb) {
-		qca->rx_skb = netdev_alloc_skb(net_dev,
-					       net_dev->mtu + VLAN_ETH_HLEN);
+		qca->rx_skb = netdev_alloc_skb_ip_align(net_dev,
+							net_dev->mtu +
+							VLAN_ETH_HLEN);
 		if (!qca->rx_skb) {
 			netdev_dbg(net_dev, "out of RX resources\n");
 			qca->stats.out_of_mem++;
@@ -377,7 +378,7 @@ qcaspi_receive(struct qcaspi *qca)
 					qca->rx_skb, qca->rx_skb->dev);
 				qca->rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
 				netif_rx_ni(qca->rx_skb);
-				qca->rx_skb = netdev_alloc_skb(net_dev,
+				qca->rx_skb = netdev_alloc_skb_ip_align(net_dev,
 					net_dev->mtu + VLAN_ETH_HLEN);
 				if (!qca->rx_skb) {
 					netdev_dbg(net_dev, "out of RX resources\n");
@@ -759,7 +760,8 @@ qcaspi_netdev_init(struct net_device *dev)
 	if (!qca->rx_buffer)
 		return -ENOBUFS;
 
-	qca->rx_skb = netdev_alloc_skb(dev, qca->net_dev->mtu + VLAN_ETH_HLEN);
+	qca->rx_skb = netdev_alloc_skb_ip_align(dev, qca->net_dev->mtu +
+						VLAN_ETH_HLEN);
 	if (!qca->rx_skb) {
 		kfree(qca->rx_buffer);
 		netdev_info(qca->net_dev, "Failed to allocate RX sk_buff.\n");
diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c
index 9bcd4aefc9c5..bed34684994f 100644
--- a/drivers/net/ethernet/realtek/atp.c
+++ b/drivers/net/ethernet/realtek/atp.c
@@ -151,8 +151,8 @@ MODULE_LICENSE("GPL");
 
 module_param(max_interrupt_work, int, 0);
 module_param(debug, int, 0);
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_array(xcvr, int, NULL, 0);
 MODULE_PARM_DESC(max_interrupt_work, "ATP maximum events handled per interrupt");
 MODULE_PARM_DESC(debug, "ATP debug level (0-7)");
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 3cd7989c007d..784782da3a85 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -230,18 +230,6 @@ static void ravb_ring_free(struct net_device *ndev, int q)
 	int ring_size;
 	int i;
 
-	/* Free RX skb ringbuffer */
-	if (priv->rx_skb[q]) {
-		for (i = 0; i < priv->num_rx_ring[q]; i++)
-			dev_kfree_skb(priv->rx_skb[q][i]);
-	}
-	kfree(priv->rx_skb[q]);
-	priv->rx_skb[q] = NULL;
-
-	/* Free aligned TX buffers */
-	kfree(priv->tx_align[q]);
-	priv->tx_align[q] = NULL;
-
 	if (priv->rx_ring[q]) {
 		for (i = 0; i < priv->num_rx_ring[q]; i++) {
 			struct ravb_ex_rx_desc *desc = &priv->rx_ring[q][i];
@@ -270,6 +258,18 @@ static void ravb_ring_free(struct net_device *ndev, int q)
 		priv->tx_ring[q] = NULL;
 	}
 
+	/* Free RX skb ringbuffer */
+	if (priv->rx_skb[q]) {
+		for (i = 0; i < priv->num_rx_ring[q]; i++)
+			dev_kfree_skb(priv->rx_skb[q][i]);
+	}
+	kfree(priv->rx_skb[q]);
+	priv->rx_skb[q] = NULL;
+
+	/* Free aligned TX buffers */
+	kfree(priv->tx_align[q]);
+	priv->tx_align[q] = NULL;
+
 	/* Free TX skb ringbuffer.
 	 * SKBs are freed by ravb_tx_free() call above.
 	 */
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index f68c4db656ed..2d686ccf971b 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3220,7 +3220,8 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 	/* MDIO bus init */
 	ret = sh_mdio_init(mdp, pd);
 	if (ret) {
-		dev_err(&ndev->dev, "failed to initialise MDIO\n");
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "MDIO init failed: %d\n", ret);
 		goto out_release;
 	}
 
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 2ae852454780..a9ce82d3e9cf 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -1505,8 +1505,8 @@ static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port,
 		*index = entry->index;
 		resolved = false;
 	} else if (removing) {
-		ofdpa_neigh_del(trans, found);
 		*index = found->index;
+		ofdpa_neigh_del(trans, found);
 	} else if (updating) {
 		ofdpa_neigh_update(found, trans, NULL, false);
 		resolved = !is_zero_ether_addr(found->eth_dst);
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 78efb2822b86..78f9e43420e0 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -4172,7 +4172,7 @@ found:
 	 * recipients
 	 */
 	if (is_mc_recip) {
-		MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+		MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
 		unsigned int depth, i;
 
 		memset(inbuf, 0, sizeof(inbuf));
@@ -4320,7 +4320,7 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 			efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
 		} else {
 			efx_mcdi_display_error(efx, MC_CMD_FILTER_OP,
-					       MC_CMD_FILTER_OP_IN_LEN,
+					       MC_CMD_FILTER_OP_EXT_IN_LEN,
 					       NULL, 0, rc);
 		}
 	}
@@ -4453,7 +4453,7 @@ static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx,
 				      struct efx_filter_spec *spec)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
 	struct efx_filter_spec *saved_spec;
 	unsigned int hash, i, depth = 1;
 	bool replacing = false;
@@ -4940,7 +4940,7 @@ not_restored:
 static void efx_ef10_filter_table_remove(struct efx_nic *efx)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
 	struct efx_filter_spec *spec;
 	unsigned int filter_idx;
 	int rc;
@@ -5105,6 +5105,7 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
 
 	/* Insert/renew filters */
 	for (i = 0; i < addr_count; i++) {
+		EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID);
 		efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
 		efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr);
 		rc = efx_ef10_filter_insert(efx, &spec, true);
@@ -5122,11 +5123,11 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
 				}
 				return rc;
 			} else {
-				/* mark as not inserted, and carry on */
-				rc = EFX_EF10_FILTER_ID_INVALID;
+				/* keep invalid ID, and carry on */
 			}
+		} else {
+			ids[i] = efx_ef10_filter_get_unsafe_id(rc);
 		}
-		ids[i] = efx_ef10_filter_get_unsafe_id(rc);
 	}
 
 	if (multicast && rollback) {
diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index b7e4345c990d..019cef1d3cf7 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c
@@ -661,8 +661,6 @@ restore_filters:
 		up_write(&vf->efx->filter_sem);
 		mutex_unlock(&vf->efx->mac_lock);
 
-		up_write(&vf->efx->filter_sem);
-
 		rc2 = efx_net_open(vf->efx->net_dev);
 		if (rc2)
 			goto reset_nic;
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 7b916aa21bde..4d7fb8af880d 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -18,8 +18,12 @@
 #include "mcdi.h"
 
 enum {
-	EFX_REV_SIENA_A0 = 0,
-	EFX_REV_HUNT_A0 = 1,
+	/* Revisions 0-2 were Falcon A0, A1 and B0 respectively.
+	 * They are not supported by this driver but these revision numbers
+	 * form part of the ethtool API for register dumping.
+	 */
+	EFX_REV_SIENA_A0 = 3,
+	EFX_REV_HUNT_A0 = 4,
 };
 
 static inline int efx_nic_rev(struct efx_nic *efx)
diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c
index c8d84679ede7..d3bb2ba51f40 100644
--- a/drivers/net/ethernet/smsc/smc9194.c
+++ b/drivers/net/ethernet/smsc/smc9194.c
@@ -1501,8 +1501,8 @@ static void smc_set_multicast_list(struct net_device *dev)
 static struct net_device *devSMC9194;
 MODULE_LICENSE("GPL");
 
-module_param(io, int, 0);
-module_param(irq, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
 module_param(ifport, int, 0);
 MODULE_PARM_DESC(io, "SMC 99194 I/O base address");
 MODULE_PARM_DESC(irq, "SMC 99194 IRQ number");
diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
index 489ef146201e..6a9c954492f2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
@@ -37,6 +37,7 @@
 #define TSE_PCS_CONTROL_AN_EN_MASK			BIT(12)
 #define TSE_PCS_CONTROL_REG				0x00
 #define TSE_PCS_CONTROL_RESTART_AN_MASK			BIT(9)
+#define TSE_PCS_CTRL_AUTONEG_SGMII			0x1140
 #define TSE_PCS_IF_MODE_REG				0x28
 #define TSE_PCS_LINK_TIMER_0_REG			0x24
 #define TSE_PCS_LINK_TIMER_1_REG			0x26
@@ -65,6 +66,7 @@
 #define TSE_PCS_SW_RESET_TIMEOUT			100
 #define TSE_PCS_USE_SGMII_AN_MASK			BIT(1)
 #define TSE_PCS_USE_SGMII_ENA				BIT(0)
+#define TSE_PCS_IF_USE_SGMII				0x03
 
 #define SGMII_ADAPTER_CTRL_REG				0x00
 #define SGMII_ADAPTER_DISABLE				0x0001
@@ -101,7 +103,9 @@ int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs)
 {
 	int ret = 0;
 
-	writew(TSE_PCS_USE_SGMII_ENA, base + TSE_PCS_IF_MODE_REG);
+	writew(TSE_PCS_IF_USE_SGMII, base + TSE_PCS_IF_MODE_REG);
+
+	writew(TSE_PCS_CTRL_AUTONEG_SGMII, base + TSE_PCS_CONTROL_REG);
 
 	writew(TSE_PCS_SGMII_LINK_TIMER_0, base + TSE_PCS_LINK_TIMER_0_REG);
 	writew(TSE_PCS_SGMII_LINK_TIMER_1, base + TSE_PCS_LINK_TIMER_1_REG);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index aa6476439aee..e0ef02f9503b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -214,13 +214,13 @@ static int dwmac4_wrback_get_tx_timestamp_status(struct dma_desc *p)
 {
 	/* Context type from W/B descriptor must be zero */
 	if (le32_to_cpu(p->des3) & TDES3_CONTEXT_TYPE)
-		return -EINVAL;
+		return 0;
 
 	/* Tx Timestamp Status is 1 so des0 and des1'll have valid values */
 	if (le32_to_cpu(p->des3) & TDES3_TIMESTAMP_STATUS)
-		return 0;
+		return 1;
 
-	return 1;
+	return 0;
 }
 
 static inline u64 dwmac4_get_timestamp(void *desc, u32 ats)
@@ -282,7 +282,10 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
 		}
 	}
 exit:
-	return ret;
+	if (likely(ret == 0))
+		return 1;
+
+	return 0;
 }
 
 static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index cd8c60132390..6e4cbc6ce0ef 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -434,14 +434,14 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
 		return;
 
 	/* check tx tstamp status */
-	if (!priv->hw->desc->get_tx_timestamp_status(p)) {
+	if (priv->hw->desc->get_tx_timestamp_status(p)) {
 		/* get the valid tstamp */
 		ns = priv->hw->desc->get_timestamp(p, priv->adv_ts);
 
 		memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
 		shhwtstamp.hwtstamp = ns_to_ktime(ns);
 
-		netdev_info(priv->dev, "get valid TX hw timestamp %llu\n", ns);
+		netdev_dbg(priv->dev, "get valid TX hw timestamp %llu\n", ns);
 		/* pass tstamp to stack */
 		skb_tstamp_tx(skb, &shhwtstamp);
 	}
@@ -468,19 +468,19 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
 		return;
 
 	/* Check if timestamp is available */
-	if (!priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) {
+	if (priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) {
 		/* For GMAC4, the valid timestamp is from CTX next desc. */
 		if (priv->plat->has_gmac4)
 			ns = priv->hw->desc->get_timestamp(np, priv->adv_ts);
 		else
 			ns = priv->hw->desc->get_timestamp(p, priv->adv_ts);
 
-		netdev_info(priv->dev, "get valid RX hw timestamp %llu\n", ns);
+		netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
 		shhwtstamp = skb_hwtstamps(skb);
 		memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
 		shhwtstamp->hwtstamp = ns_to_ktime(ns);
 	} else  {
-		netdev_err(priv->dev, "cannot get RX hw timestamp\n");
+		netdev_dbg(priv->dev, "cannot get RX hw timestamp\n");
 	}
 }
 
@@ -546,7 +546,10 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
 			/* PTP v1, UDP, any kind of event packet */
 			config.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
 			/* take time stamp for all event messages */
-			snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+			if (priv->plat->has_gmac4)
+				snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
+			else
+				snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
 
 			ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
 			ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
@@ -578,7 +581,10 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
 			config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
 			ptp_v2 = PTP_TCR_TSVER2ENA;
 			/* take time stamp for all event messages */
-			snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+			if (priv->plat->has_gmac4)
+				snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
+			else
+				snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
 
 			ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
 			ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
@@ -612,7 +618,10 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
 			config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
 			ptp_v2 = PTP_TCR_TSVER2ENA;
 			/* take time stamp for all event messages */
-			snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+			if (priv->plat->has_gmac4)
+				snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
+			else
+				snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
 
 			ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
 			ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
@@ -1208,7 +1217,7 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 	u32 rx_count = priv->plat->rx_queues_to_use;
 	unsigned int bfsize = 0;
 	int ret = -ENOMEM;
-	u32 queue;
+	int queue;
 	int i;
 
 	if (priv->hw->mode->set_16kib_bfsize)
@@ -2724,7 +2733,7 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
 
 		priv->hw->desc->prepare_tso_tx_desc(desc, 0, buff_size,
 			0, 1,
-			(last_segment) && (buff_size < TSO_MAX_BUFF_SIZE),
+			(last_segment) && (tmp_len <= TSO_MAX_BUFF_SIZE),
 			0, 0);
 
 		tmp_len -= TSO_MAX_BUFF_SIZE;
@@ -2822,7 +2831,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tx_q->tx_skbuff_dma[first_entry].buf = des;
 	tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
-	tx_q->tx_skbuff[first_entry] = skb;
 
 	first->des0 = cpu_to_le32(des);
 
@@ -2856,6 +2864,14 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tx_q->tx_skbuff_dma[tx_q->cur_tx].last_segment = true;
 
+	/* Only the last descriptor gets to point to the skb. */
+	tx_q->tx_skbuff[tx_q->cur_tx] = skb;
+
+	/* We've used all descriptors we need for this skb, however,
+	 * advance cur_tx so that it references a fresh descriptor.
+	 * ndo_start_xmit will fill this descriptor the next time it's
+	 * called and stmmac_tx_clean may clean up to this descriptor.
+	 */
 	tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
 
 	if (unlikely(stmmac_tx_avail(priv, queue) <= (MAX_SKB_FRAGS + 1))) {
@@ -2947,7 +2963,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	int i, csum_insertion = 0, is_jumbo = 0;
 	u32 queue = skb_get_queue_mapping(skb);
 	int nfrags = skb_shinfo(skb)->nr_frags;
-	unsigned int entry, first_entry;
+	int entry;
+	unsigned int first_entry;
 	struct dma_desc *desc, *first;
 	struct stmmac_tx_queue *tx_q;
 	unsigned int enh_desc;
@@ -2988,8 +3005,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	first = desc;
 
-	tx_q->tx_skbuff[first_entry] = skb;
-
 	enh_desc = priv->plat->enh_desc;
 	/* To program the descriptors according to the size of the frame */
 	if (enh_desc)
@@ -3037,8 +3052,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 						skb->len);
 	}
 
-	entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+	/* Only the last descriptor gets to point to the skb. */
+	tx_q->tx_skbuff[entry] = skb;
 
+	/* We've used all descriptors we need for this skb, however,
+	 * advance cur_tx so that it references a fresh descriptor.
+	 * ndo_start_xmit will fill this descriptor the next time it's
+	 * called and stmmac_tx_clean may clean up to this descriptor.
+	 */
+	entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
 	tx_q->cur_tx = entry;
 
 	if (netif_msg_pktdata(priv)) {
@@ -3725,7 +3747,7 @@ static void sysfs_display_ring(void *head, int size, int extend_desc,
 			ep++;
 		} else {
 			seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
-				   i, (unsigned int)virt_to_phys(ep),
+				   i, (unsigned int)virt_to_phys(p),
 				   le32_to_cpu(p->des0), le32_to_cpu(p->des1),
 				   le32_to_cpu(p->des2), le32_to_cpu(p->des3));
 			p++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
index 48fb72fc423c..f4b31d69f60e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
@@ -59,7 +59,8 @@
 /* Enable Snapshot for Messages Relevant to Master */
 #define	PTP_TCR_TSMSTRENA	BIT(15)
 /* Select PTP packets for Taking Snapshots */
-#define	PTP_TCR_SNAPTYPSEL_1	GENMASK(17, 16)
+#define	PTP_TCR_SNAPTYPSEL_1	BIT(16)
+#define	PTP_GMAC4_TCR_SNAPTYPSEL_1	GENMASK(17, 16)
 /* Enable MAC address for PTP Frame Filtering */
 #define	PTP_TCR_TSENMACADDR	BIT(18)
 
diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
index 5a90fed06260..5b56c24b6ed2 100644
--- a/drivers/net/ethernet/sun/ldmvsw.c
+++ b/drivers/net/ethernet/sun/ldmvsw.c
@@ -411,13 +411,14 @@ static int vsw_port_remove(struct vio_dev *vdev)
 
 	if (port) {
 		del_timer_sync(&port->vio.timer);
+		del_timer_sync(&port->clean_timer);
 
 		napi_disable(&port->napi);
+		unregister_netdev(port->dev);
 
 		list_del_rcu(&port->list);
 
 		synchronize_rcu();
-		del_timer_sync(&port->clean_timer);
 		spin_lock_irqsave(&port->vp->lock, flags);
 		sunvnet_port_rm_txq_common(port);
 		spin_unlock_irqrestore(&port->vp->lock, flags);
@@ -427,7 +428,6 @@ static int vsw_port_remove(struct vio_dev *vdev)
 
 		dev_set_drvdata(&vdev->dev, NULL);
 
-		unregister_netdev(port->dev);
 		free_netdev(port->dev);
 	}
 
diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c
index 1562ab4151e1..56ba411421f0 100644
--- a/drivers/net/ethernet/ti/cpsw-common.c
+++ b/drivers/net/ethernet/ti/cpsw-common.c
@@ -90,7 +90,7 @@ int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr)
 	if (of_device_is_compatible(dev->of_node, "ti,dm816-emac"))
 		return cpsw_am33xx_cm_get_macid(dev, 0x30, slave, mac_addr);
 
-	if (of_machine_is_compatible("ti,am4372"))
+	if (of_machine_is_compatible("ti,am43"))
 		return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
 
 	if (of_machine_is_compatible("ti,dra7"))
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 729a7da90b5b..e6222e535019 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1353,9 +1353,10 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe)
 
 	tx_pipe->dma_channel = knav_dma_open_channel(dev,
 				tx_pipe->dma_chan_name, &config);
-	if (IS_ERR_OR_NULL(tx_pipe->dma_channel)) {
+	if (IS_ERR(tx_pipe->dma_channel)) {
 		dev_err(dev, "failed opening tx chan(%s)\n",
 			tx_pipe->dma_chan_name);
+		ret = PTR_ERR(tx_pipe->dma_channel);
 		goto err;
 	}
 
@@ -1673,9 +1674,10 @@ static int netcp_setup_navigator_resources(struct net_device *ndev)
 
 	netcp->rx_channel = knav_dma_open_channel(netcp->netcp_device->device,
 					netcp->dma_chan_name, &config);
-	if (IS_ERR_OR_NULL(netcp->rx_channel)) {
+	if (IS_ERR(netcp->rx_channel)) {
 		dev_err(netcp->ndev_dev, "failed opening rx chan(%s\n",
 			netcp->dma_chan_name);
+		ret = PTR_ERR(netcp->rx_channel);
 		goto fail;
 	}
 
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 897176fc5043..dd92950a4615 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -2651,7 +2651,6 @@ static int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *ifr)
 	case HWTSTAMP_FILTER_NONE:
 		cpts_rx_enable(cpts, 0);
 		break;
-	case HWTSTAMP_FILTER_ALL:
 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index dec5d563ab19..199459bd6961 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1007,7 +1007,7 @@ static void geneve_setup(struct net_device *dev)
 
 	dev->netdev_ops = &geneve_netdev_ops;
 	dev->ethtool_ops = &geneve_ethtool_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 
 	SET_NETDEV_DEVTYPE(dev, &geneve_type);
 
@@ -1133,7 +1133,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
 
 	/* make enough headroom for basic scenario */
 	encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
-	if (ip_tunnel_info_af(info) == AF_INET) {
+	if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
 		encap_len += sizeof(struct iphdr);
 		dev->max_mtu -= sizeof(struct iphdr);
 	} else {
@@ -1293,7 +1293,7 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
 		goto nla_put_failure;
 
-	if (ip_tunnel_info_af(info) == AF_INET) {
+	if (rtnl_dereference(geneve->sock4)) {
 		if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
 				    info->key.u.ipv4.dst))
 			goto nla_put_failure;
@@ -1302,8 +1302,10 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			       !!(info->key.tun_flags & TUNNEL_CSUM)))
 			goto nla_put_failure;
 
+	}
+
 #if IS_ENABLED(CONFIG_IPV6)
-	} else {
+	if (rtnl_dereference(geneve->sock6)) {
 		if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
 				     &info->key.u.ipv6.dst))
 			goto nla_put_failure;
@@ -1315,8 +1317,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 		if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
 			       !geneve->use_udp6_rx_checksums))
 			goto nla_put_failure;
-#endif
 	}
+#endif
 
 	if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
 	    nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 4fea1b3dfbb4..ca110cd2a4e4 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -611,7 +611,7 @@ static const struct net_device_ops gtp_netdev_ops = {
 static void gtp_link_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &gtp_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 
 	dev->hard_header_len = 0;
 	dev->addr_len = 0;
@@ -873,7 +873,7 @@ static struct gtp_dev *gtp_find_dev(struct net *src_net, struct nlattr *nla[])
 
 	/* Check if there's an existing gtpX device to configure */
 	dev = dev_get_by_index_rcu(net, nla_get_u32(nla[GTPA_LINK]));
-	if (dev->netdev_ops == &gtp_netdev_ops)
+	if (dev && dev->netdev_ops == &gtp_netdev_ops)
 		gtp = netdev_priv(dev);
 
 	put_net(net);
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 922bf440e9f1..021a8ec411ab 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -311,7 +311,7 @@ static void sp_setup(struct net_device *dev)
 {
 	/* Finish setting up the DEVICE info. */
 	dev->netdev_ops		= &sp_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->mtu		= SIXP_MTU;
 	dev->hard_header_len	= AX25_MAX_HEADER_LEN;
 	dev->header_ops 	= &ax25_header_ops;
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 594fa1407e29..1503f10122f7 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -1176,7 +1176,7 @@ static int iobase[NR_PORTS] = { 0x378, };
 
 module_param_array(mode, charp, NULL, 0);
 MODULE_PARM_DESC(mode, "baycom operating mode");
-module_param_array(iobase, int, NULL, 0);
+module_param_hw_array(iobase, int, ioport, NULL, 0);
 MODULE_PARM_DESC(iobase, "baycom io base address");
 
 MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu");
diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c
index 809dc25909d1..92b13b39f426 100644
--- a/drivers/net/hamradio/baycom_par.c
+++ b/drivers/net/hamradio/baycom_par.c
@@ -481,7 +481,7 @@ static int iobase[NR_PORTS] = { 0x378, };
 
 module_param_array(mode, charp, NULL, 0);
 MODULE_PARM_DESC(mode, "baycom operating mode; eg. par96 or picpar");
-module_param_array(iobase, int, NULL, 0);
+module_param_hw_array(iobase, int, ioport, NULL, 0);
 MODULE_PARM_DESC(iobase, "baycom io base address");
 
 MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu");
diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c
index ebc06822fd4d..d9a646acca20 100644
--- a/drivers/net/hamradio/baycom_ser_fdx.c
+++ b/drivers/net/hamradio/baycom_ser_fdx.c
@@ -614,9 +614,9 @@ static int baud[NR_PORTS] = { [0 ... NR_PORTS-1] = 1200 };
 
 module_param_array(mode, charp, NULL, 0);
 MODULE_PARM_DESC(mode, "baycom operating mode; * for software DCD");
-module_param_array(iobase, int, NULL, 0);
+module_param_hw_array(iobase, int, ioport, NULL, 0);
 MODULE_PARM_DESC(iobase, "baycom io base address");
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "baycom irq number");
 module_param_array(baud, int, NULL, 0);
 MODULE_PARM_DESC(baud, "baycom baud rate (300 to 4800)");
diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c
index 60fcf512c208..f1c8a9ff3891 100644
--- a/drivers/net/hamradio/baycom_ser_hdx.c
+++ b/drivers/net/hamradio/baycom_ser_hdx.c
@@ -642,9 +642,9 @@ static int irq[NR_PORTS] = { 4, };
 
 module_param_array(mode, charp, NULL, 0);
 MODULE_PARM_DESC(mode, "baycom operating mode; * for software DCD");
-module_param_array(iobase, int, NULL, 0);
+module_param_hw_array(iobase, int, ioport, NULL, 0);
 MODULE_PARM_DESC(iobase, "baycom io base address");
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "baycom irq number");
 
 MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu");
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index f62e7f325cf9..78a6414c5fd9 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -476,7 +476,7 @@ static const struct net_device_ops bpq_netdev_ops = {
 static void bpq_setup(struct net_device *dev)
 {
 	dev->netdev_ops	     = &bpq_netdev_ops;
-	dev->destructor	     = free_netdev;
+	dev->needs_free_netdev = true;
 
 	memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN);
 	memcpy(dev->dev_addr,  &ax25_defaddr, AX25_ADDR_LEN);
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index 2479072981a1..dec6b76bc0fb 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -274,7 +274,7 @@ static unsigned long rand;
 
 MODULE_AUTHOR("Klaus Kudielka");
 MODULE_DESCRIPTION("Driver for high-speed SCC boards");
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_LICENSE("GPL");
 
 static void __exit dmascc_exit(void)
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index 8c3633c1d078..97e3bc60c3e7 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -576,6 +576,8 @@ static int hdlcdrv_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	case HDLCDRVCTL_CALIBRATE:
 		if(!capable(CAP_SYS_RAWIO))
 			return -EPERM;
+		if (s->par.bitrate <= 0)
+			return -EINVAL;
 		if (bi.data.calibrate > INT_MAX / s->par.bitrate)
 			return -EINVAL;
 		s->hdlctx.calibrate = bi.data.calibrate * s->par.bitrate / 16;
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 262b2ea576a3..6066f1bcaf2d 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -171,6 +171,8 @@ struct rndis_device {
 	spinlock_t request_lock;
 	struct list_head req_list;
 
+	struct work_struct mcast_work;
+
 	u8 hw_mac_adr[ETH_ALEN];
 	u8 rss_key[NETVSC_HASH_KEYLEN];
 	u16 ind_table[ITAB_NUM];
@@ -201,6 +203,7 @@ int rndis_filter_open(struct netvsc_device *nvdev);
 int rndis_filter_close(struct netvsc_device *nvdev);
 int rndis_filter_device_add(struct hv_device *dev,
 			    struct netvsc_device_info *info);
+void rndis_filter_update(struct netvsc_device *nvdev);
 void rndis_filter_device_remove(struct hv_device *dev,
 				struct netvsc_device *nvdev);
 int rndis_filter_set_rss_param(struct rndis_device *rdev,
@@ -211,7 +214,6 @@ int rndis_filter_receive(struct net_device *ndev,
 			 struct vmbus_channel *channel,
 			 void *data, u32 buflen);
 
-int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter);
 int rndis_filter_set_device_mac(struct net_device *ndev, char *mac);
 
 void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
@@ -696,7 +698,6 @@ struct net_device_context {
 	/* list protection */
 	spinlock_t lock;
 
-	struct work_struct work;
 	u32 msg_enable; /* debug level */
 
 	u32 tx_checksum_mask;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 4421a6d00375..643c539a08ba 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -56,37 +56,12 @@ static int debug = -1;
 module_param(debug, int, S_IRUGO);
 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
-static void do_set_multicast(struct work_struct *w)
-{
-	struct net_device_context *ndevctx =
-		container_of(w, struct net_device_context, work);
-	struct hv_device *device_obj = ndevctx->device_ctx;
-	struct net_device *ndev = hv_get_drvdata(device_obj);
-	struct netvsc_device *nvdev = rcu_dereference(ndevctx->nvdev);
-	struct rndis_device *rdev;
-
-	if (!nvdev)
-		return;
-
-	rdev = nvdev->extension;
-	if (rdev == NULL)
-		return;
-
-	if (ndev->flags & IFF_PROMISC)
-		rndis_filter_set_packet_filter(rdev,
-			NDIS_PACKET_TYPE_PROMISCUOUS);
-	else
-		rndis_filter_set_packet_filter(rdev,
-			NDIS_PACKET_TYPE_BROADCAST |
-			NDIS_PACKET_TYPE_ALL_MULTICAST |
-			NDIS_PACKET_TYPE_DIRECTED);
-}
-
 static void netvsc_set_multicast_list(struct net_device *net)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
+	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 
-	schedule_work(&net_device_ctx->work);
+	rndis_filter_update(nvdev);
 }
 
 static int netvsc_open(struct net_device *net)
@@ -123,8 +98,6 @@ static int netvsc_close(struct net_device *net)
 
 	netif_tx_disable(net);
 
-	/* Make sure netvsc_set_multicast_list doesn't re-enable filter! */
-	cancel_work_sync(&net_device_ctx->work);
 	ret = rndis_filter_close(nvdev);
 	if (ret != 0) {
 		netdev_err(net, "unable to close device (ret %d).\n", ret);
@@ -803,7 +776,7 @@ static int netvsc_set_channels(struct net_device *net,
 	    channels->rx_count || channels->tx_count || channels->other_count)
 		return -EINVAL;
 
-	if (count > net->num_tx_queues || count > net->num_rx_queues)
+	if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX)
 		return -EINVAL;
 
 	if (!nvdev || nvdev->destroy)
@@ -1028,7 +1001,7 @@ static const struct {
 static int netvsc_get_sset_count(struct net_device *dev, int string_set)
 {
 	struct net_device_context *ndc = netdev_priv(dev);
-	struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 
 	if (!nvdev)
 		return -ENODEV;
@@ -1158,11 +1131,22 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-static void netvsc_poll_controller(struct net_device *net)
+static void netvsc_poll_controller(struct net_device *dev)
 {
-	/* As netvsc_start_xmit() works synchronous we don't have to
-	 * trigger anything here.
-	 */
+	struct net_device_context *ndc = netdev_priv(dev);
+	struct netvsc_device *ndev;
+	int i;
+
+	rcu_read_lock();
+	ndev = rcu_dereference(ndc->nvdev);
+	if (ndev) {
+		for (i = 0; i < ndev->num_chn; i++) {
+			struct netvsc_channel *nvchan = &ndev->chan_table[i];
+
+			napi_schedule(&nvchan->napi);
+		}
+	}
+	rcu_read_unlock();
 }
 #endif
 
@@ -1219,7 +1203,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
 	rndis_dev = ndev->extension;
 	if (indir) {
 		for (i = 0; i < ITAB_NUM; i++)
-			if (indir[i] >= dev->num_rx_queues)
+			if (indir[i] >= VRSS_CHANNEL_MAX)
 				return -EINVAL;
 
 		for (i = 0; i < ITAB_NUM; i++)
@@ -1552,7 +1536,6 @@ static int netvsc_probe(struct hv_device *dev,
 	hv_set_drvdata(dev, net);
 
 	INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
-	INIT_WORK(&net_device_ctx->work, do_set_multicast);
 
 	spin_lock_init(&net_device_ctx->lock);
 	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
@@ -1622,7 +1605,6 @@ static int netvsc_remove(struct hv_device *dev)
 	netif_device_detach(net);
 
 	cancel_delayed_work_sync(&ndev_ctx->dwork);
-	cancel_work_sync(&ndev_ctx->work);
 
 	/*
 	 * Call to the vsc driver to let it know that the device is being
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index f9d5b0b8209a..cb79cd081f42 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -31,6 +31,7 @@
 
 #include "hyperv_net.h"
 
+static void rndis_set_multicast(struct work_struct *w);
 
 #define RNDIS_EXT_LEN PAGE_SIZE
 struct rndis_request {
@@ -76,6 +77,7 @@ static struct rndis_device *get_rndis_device(void)
 	spin_lock_init(&device->request_lock);
 
 	INIT_LIST_HEAD(&device->req_list);
+	INIT_WORK(&device->mcast_work, rndis_set_multicast);
 
 	device->state = RNDIS_DEV_UNINITIALIZED;
 
@@ -815,7 +817,8 @@ static int rndis_filter_query_link_speed(struct rndis_device *dev)
 	return ret;
 }
 
-int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
+static int rndis_filter_set_packet_filter(struct rndis_device *dev,
+					  u32 new_filter)
 {
 	struct rndis_request *request;
 	struct rndis_set_request *set;
@@ -846,6 +849,28 @@ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
 	return ret;
 }
 
+static void rndis_set_multicast(struct work_struct *w)
+{
+	struct rndis_device *rdev
+		= container_of(w, struct rndis_device, mcast_work);
+
+	if (rdev->ndev->flags & IFF_PROMISC)
+		rndis_filter_set_packet_filter(rdev,
+					       NDIS_PACKET_TYPE_PROMISCUOUS);
+	else
+		rndis_filter_set_packet_filter(rdev,
+					       NDIS_PACKET_TYPE_BROADCAST |
+					       NDIS_PACKET_TYPE_ALL_MULTICAST |
+					       NDIS_PACKET_TYPE_DIRECTED);
+}
+
+void rndis_filter_update(struct netvsc_device *nvdev)
+{
+	struct rndis_device *rdev = nvdev->extension;
+
+	schedule_work(&rdev->mcast_work);
+}
+
 static int rndis_filter_init_device(struct rndis_device *dev)
 {
 	struct rndis_request *request;
@@ -973,6 +998,9 @@ static int rndis_filter_close_device(struct rndis_device *dev)
 	if (dev->state != RNDIS_DEV_DATAINITIALIZED)
 		return 0;
 
+	/* Make sure rndis_set_multicast doesn't re-enable filter! */
+	cancel_work_sync(&dev->mcast_work);
+
 	ret = rndis_filter_set_packet_filter(dev, 0);
 	if (ret == -ENODEV)
 		ret = 0;
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 312fce7302d3..144ea5ae8ab4 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -207,7 +207,6 @@ static void ifb_dev_free(struct net_device *dev)
 		__skb_queue_purge(&txp->tq);
 	}
 	kfree(dp->tx_private);
-	free_netdev(dev);
 }
 
 static void ifb_setup(struct net_device *dev)
@@ -230,7 +229,8 @@ static void ifb_setup(struct net_device *dev)
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	netif_keep_dst(dev);
 	eth_hw_addr_random(dev);
-	dev->destructor = ifb_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ifb_dev_free;
 }
 
 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 618ed88fad0f..7c7680c8f0e3 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -632,7 +632,7 @@ void ipvlan_link_setup(struct net_device *dev)
 	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 	dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
 	dev->netdev_ops = &ipvlan_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->header_ops = &ipvlan_header_ops;
 	dev->ethtool_ops = &ipvlan_ethtool_ops;
 }
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index c285eafd3f1c..35f198d83701 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -2207,11 +2207,11 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:" ALI_IRCC_DRIVER_NAME);
 
 
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_PARM_DESC(io, "Base I/O addresses");
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "IRQ lines");
-module_param_array(dma, int, NULL, 0);
+module_param_hw_array(dma, int, dma, NULL, 0);
 MODULE_PARM_DESC(dma, "DMA channels");
 
 module_init(ali_ircc_init);
diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c
index 8716b8c07feb..6f3c805f7211 100644
--- a/drivers/net/irda/irda-usb.c
+++ b/drivers/net/irda/irda-usb.c
@@ -1077,7 +1077,7 @@ static int stir421x_patch_device(struct irda_usb_cb *self)
          * are "42101001.sb" or "42101002.sb"
          */
         sprintf(stir421x_fw_name, "4210%4X.sb",
-                self->usbdev->descriptor.bcdDevice);
+		le16_to_cpu(self->usbdev->descriptor.bcdDevice));
         ret = request_firmware(&fw, stir421x_fw_name, &self->usbdev->dev);
         if (ret < 0)
                 return ret;
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index aaecc3baaf30..7beae147be11 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -2396,11 +2396,11 @@ MODULE_LICENSE("GPL");
 
 module_param(qos_mtt_bits, int, 0);
 MODULE_PARM_DESC(qos_mtt_bits, "Minimum Turn Time");
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_PARM_DESC(io, "Base I/O addresses");
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "IRQ lines");
-module_param_array(dma, int, NULL, 0);
+module_param_hw_array(dma, int, dma, NULL, 0);
 MODULE_PARM_DESC(dma, "DMA channels");
 module_param(dongle_id, int, 0);
 MODULE_PARM_DESC(dongle_id, "Type-id of used dongle");
diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index dcf92ba80872..23ed89ae5ddc 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -82,24 +82,24 @@ MODULE_PARM_DESC(nopnp, "Do not use PNP to detect controller settings, defaults
 
 #define DMA_INVAL 255
 static int ircc_dma = DMA_INVAL;
-module_param(ircc_dma, int, 0);
+module_param_hw(ircc_dma, int, dma, 0);
 MODULE_PARM_DESC(ircc_dma, "DMA channel");
 
 #define IRQ_INVAL 255
 static int ircc_irq = IRQ_INVAL;
-module_param(ircc_irq, int, 0);
+module_param_hw(ircc_irq, int, irq, 0);
 MODULE_PARM_DESC(ircc_irq, "IRQ line");
 
 static int ircc_fir;
-module_param(ircc_fir, int, 0);
+module_param_hw(ircc_fir, int, ioport, 0);
 MODULE_PARM_DESC(ircc_fir, "FIR Base Address");
 
 static int ircc_sir;
-module_param(ircc_sir, int, 0);
+module_param_hw(ircc_sir, int, ioport, 0);
 MODULE_PARM_DESC(ircc_sir, "SIR Base Address");
 
 static int ircc_cfg;
-module_param(ircc_cfg, int, 0);
+module_param_hw(ircc_cfg, int, ioport, 0);
 MODULE_PARM_DESC(ircc_cfg, "Configuration register base address");
 
 static int ircc_transceiver;
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index 8d5b903d1d9d..282b6c9ae05b 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -1263,9 +1263,9 @@ MODULE_LICENSE("GPL");
 
 module_param(qos_mtt_bits, int, 0);
 MODULE_PARM_DESC(qos_mtt_bits, "Mimimum Turn Time");
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_PARM_DESC(io, "Base I/O addresses");
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "IRQ lines");
 
 /*
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 224f65cb576b..30612497643c 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -159,7 +159,6 @@ static void loopback_dev_free(struct net_device *dev)
 {
 	dev_net(dev)->loopback_dev = NULL;
 	free_percpu(dev->lstats);
-	free_netdev(dev);
 }
 
 static const struct net_device_ops loopback_ops = {
@@ -196,7 +195,8 @@ static void loopback_setup(struct net_device *dev)
 	dev->ethtool_ops	= &loopback_ethtool_ops;
 	dev->header_ops		= &eth_header_ops;
 	dev->netdev_ops		= &loopback_ops;
-	dev->destructor		= loopback_dev_free;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= loopback_dev_free;
 }
 
 /* Setup and register the loopback device. */
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index cdc347be68f2..79411675f0e6 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -2996,7 +2996,6 @@ static void macsec_free_netdev(struct net_device *dev)
 	free_percpu(macsec->secy.tx_sc.stats);
 
 	dev_put(real_dev);
-	free_netdev(dev);
 }
 
 static void macsec_setup(struct net_device *dev)
@@ -3006,7 +3005,8 @@ static void macsec_setup(struct net_device *dev)
 	dev->max_mtu = ETH_MAX_MTU;
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->netdev_ops = &macsec_netdev_ops;
-	dev->destructor = macsec_free_netdev;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = macsec_free_netdev;
 	SET_NETDEV_DEVTYPE(dev, &macsec_type);
 
 	eth_zero_addr(dev->broadcast);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index b34eaaae03fd..72b801803aa4 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -39,16 +39,20 @@
 #define MACVLAN_HASH_SIZE	(1<<MACVLAN_HASH_BITS)
 #define MACVLAN_BC_QUEUE_LEN	1000
 
+#define MACVLAN_F_PASSTHRU	1
+#define MACVLAN_F_ADDRCHANGE	2
+
 struct macvlan_port {
 	struct net_device	*dev;
 	struct hlist_head	vlan_hash[MACVLAN_HASH_SIZE];
 	struct list_head	vlans;
 	struct sk_buff_head	bc_queue;
 	struct work_struct	bc_work;
-	bool 			passthru;
+	u32			flags;
 	int			count;
 	struct hlist_head	vlan_source_hash[MACVLAN_HASH_SIZE];
 	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
+	unsigned char           perm_addr[ETH_ALEN];
 };
 
 struct macvlan_source_entry {
@@ -66,6 +70,31 @@ struct macvlan_skb_cb {
 
 static void macvlan_port_destroy(struct net_device *dev);
 
+static inline bool macvlan_passthru(const struct macvlan_port *port)
+{
+	return port->flags & MACVLAN_F_PASSTHRU;
+}
+
+static inline void macvlan_set_passthru(struct macvlan_port *port)
+{
+	port->flags |= MACVLAN_F_PASSTHRU;
+}
+
+static inline bool macvlan_addr_change(const struct macvlan_port *port)
+{
+	return port->flags & MACVLAN_F_ADDRCHANGE;
+}
+
+static inline void macvlan_set_addr_change(struct macvlan_port *port)
+{
+	port->flags |= MACVLAN_F_ADDRCHANGE;
+}
+
+static inline void macvlan_clear_addr_change(struct macvlan_port *port)
+{
+	port->flags &= ~MACVLAN_F_ADDRCHANGE;
+}
+
 /* Hash Ethernet address */
 static u32 macvlan_eth_hash(const unsigned char *addr)
 {
@@ -181,11 +210,12 @@ static void macvlan_hash_change_addr(struct macvlan_dev *vlan,
 static bool macvlan_addr_busy(const struct macvlan_port *port,
 			      const unsigned char *addr)
 {
-	/* Test to see if the specified multicast address is
+	/* Test to see if the specified address is
 	 * currently in use by the underlying device or
 	 * another macvlan.
 	 */
-	if (ether_addr_equal_64bits(port->dev->dev_addr, addr))
+	if (!macvlan_passthru(port) && !macvlan_addr_change(port) &&
+	    ether_addr_equal_64bits(port->dev->dev_addr, addr))
 		return true;
 
 	if (macvlan_hash_lookup(port, addr))
@@ -445,7 +475,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 	}
 
 	macvlan_forward_source(skb, port, eth->h_source);
-	if (port->passthru)
+	if (macvlan_passthru(port))
 		vlan = list_first_or_null_rcu(&port->vlans,
 					      struct macvlan_dev, list);
 	else
@@ -574,7 +604,7 @@ static int macvlan_open(struct net_device *dev)
 	struct net_device *lowerdev = vlan->lowerdev;
 	int err;
 
-	if (vlan->port->passthru) {
+	if (macvlan_passthru(vlan->port)) {
 		if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC)) {
 			err = dev_set_promiscuity(lowerdev, 1);
 			if (err < 0)
@@ -649,7 +679,7 @@ static int macvlan_stop(struct net_device *dev)
 	dev_uc_unsync(lowerdev, dev);
 	dev_mc_unsync(lowerdev, dev);
 
-	if (vlan->port->passthru) {
+	if (macvlan_passthru(vlan->port)) {
 		if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC))
 			dev_set_promiscuity(lowerdev, -1);
 		goto hash_del;
@@ -672,6 +702,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct net_device *lowerdev = vlan->lowerdev;
+	struct macvlan_port *port = vlan->port;
 	int err;
 
 	if (!(dev->flags & IFF_UP)) {
@@ -682,7 +713,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
 		if (macvlan_addr_busy(vlan->port, addr))
 			return -EBUSY;
 
-		if (!vlan->port->passthru) {
+		if (!macvlan_passthru(port)) {
 			err = dev_uc_add(lowerdev, addr);
 			if (err)
 				return err;
@@ -692,6 +723,15 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
 
 		macvlan_hash_change_addr(vlan, addr);
 	}
+	if (macvlan_passthru(port) && !macvlan_addr_change(port)) {
+		/* Since addr_change isn't set, we are here due to lower
+		 * device change.  Save the lower-dev address so we can
+		 * restore it later.
+		 */
+		ether_addr_copy(vlan->port->perm_addr,
+				lowerdev->dev_addr);
+	}
+	macvlan_clear_addr_change(port);
 	return 0;
 }
 
@@ -703,7 +743,12 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p)
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
+	/* If the addresses are the same, this is a no-op */
+	if (ether_addr_equal(dev->dev_addr, addr->sa_data))
+		return 0;
+
 	if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
+		macvlan_set_addr_change(vlan->port);
 		dev_set_mac_address(vlan->lowerdev, addr);
 		return 0;
 	}
@@ -789,10 +834,12 @@ static int macvlan_change_mtu(struct net_device *dev, int new_mtu)
  */
 static struct lock_class_key macvlan_netdev_addr_lock_key;
 
-#define ALWAYS_ON_FEATURES \
-	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | NETIF_F_LLTX | \
+#define ALWAYS_ON_OFFLOADS \
+	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \
 	 NETIF_F_GSO_ROBUST)
 
+#define ALWAYS_ON_FEATURES (ALWAYS_ON_OFFLOADS | NETIF_F_LLTX)
+
 #define MACVLAN_FEATURES \
 	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
 	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_LRO | \
@@ -827,6 +874,7 @@ static int macvlan_init(struct net_device *dev)
 	dev->features		|= ALWAYS_ON_FEATURES;
 	dev->hw_features	|= NETIF_F_LRO;
 	dev->vlan_features	= lowerdev->vlan_features & MACVLAN_FEATURES;
+	dev->vlan_features	|= ALWAYS_ON_OFFLOADS;
 	dev->gso_max_size	= lowerdev->gso_max_size;
 	dev->gso_max_segs	= lowerdev->gso_max_segs;
 	dev->hard_header_len	= lowerdev->hard_header_len;
@@ -925,7 +973,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	/* Support unicast filter only on passthru devices.
 	 * Multicast filter should be allowed on all devices.
 	 */
-	if (!vlan->port->passthru && is_unicast_ether_addr(addr))
+	if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr))
 		return -EOPNOTSUPP;
 
 	if (flags & NLM_F_REPLACE)
@@ -949,7 +997,7 @@ static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 	/* Support unicast filter only on passthru devices.
 	 * Multicast filter should be allowed on all devices.
 	 */
-	if (!vlan->port->passthru && is_unicast_ether_addr(addr))
+	if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr))
 		return -EOPNOTSUPP;
 
 	if (is_unicast_ether_addr(addr))
@@ -1089,7 +1137,7 @@ void macvlan_common_setup(struct net_device *dev)
 	netif_keep_dst(dev);
 	dev->priv_flags	       |= IFF_UNICAST_FLT;
 	dev->netdev_ops		= &macvlan_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->header_ops		= &macvlan_hard_header_ops;
 	dev->ethtool_ops	= &macvlan_ethtool_ops;
 }
@@ -1117,8 +1165,8 @@ static int macvlan_port_create(struct net_device *dev)
 	if (port == NULL)
 		return -ENOMEM;
 
-	port->passthru = false;
 	port->dev = dev;
+	ether_addr_copy(port->perm_addr, dev->dev_addr);
 	INIT_LIST_HEAD(&port->vlans);
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
@@ -1158,6 +1206,18 @@ static void macvlan_port_destroy(struct net_device *dev)
 		kfree_skb(skb);
 	}
 
+	/* If the lower device address has been changed by passthru
+	 * macvlan, put it back.
+	 */
+	if (macvlan_passthru(port) &&
+	    !ether_addr_equal(port->dev->dev_addr, port->perm_addr)) {
+		struct sockaddr sa;
+
+		sa.sa_family = port->dev->type;
+		memcpy(&sa.sa_data, port->perm_addr, port->dev->addr_len);
+		dev_set_mac_address(port->dev, &sa);
+	}
+
 	kfree(port);
 }
 
@@ -1323,7 +1383,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	port = macvlan_port_get_rtnl(lowerdev);
 
 	/* Only 1 macvlan device can be created in passthru mode */
-	if (port->passthru) {
+	if (macvlan_passthru(port)) {
 		/* The macvlan port must be not created this time,
 		 * still goto destroy_macvlan_port for readability.
 		 */
@@ -1349,7 +1409,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 			err = -EINVAL;
 			goto destroy_macvlan_port;
 		}
-		port->passthru = true;
+		macvlan_set_passthru(port);
 		eth_hw_addr_inherit(dev, lowerdev);
 	}
 
@@ -1431,7 +1491,7 @@ static int macvlan_changelink(struct net_device *dev,
 	if (data && data[IFLA_MACVLAN_FLAGS]) {
 		__u16 flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
 		bool promisc = (flags ^ vlan->flags) & MACVLAN_FLAG_NOPROMISC;
-		if (vlan->port->passthru && promisc) {
+		if (macvlan_passthru(vlan->port) && promisc) {
 			int err;
 
 			if (flags & MACVLAN_FLAG_NOPROMISC)
@@ -1594,7 +1654,7 @@ static int macvlan_device_event(struct notifier_block *unused,
 		}
 		break;
 	case NETDEV_CHANGEADDR:
-		if (!port->passthru)
+		if (!macvlan_passthru(port))
 			return NOTIFY_DONE;
 
 		vlan = list_first_entry_or_null(&port->vlans,
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 06ee6395117f..0e27920c2b6b 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -358,7 +358,7 @@ static ssize_t enabled_store(struct config_item *item,
 		if (err)
 			goto out_unlock;
 
-		pr_info("netconsole: network logging started\n");
+		pr_info("network logging started\n");
 	} else {	/* false */
 		/* We need to disable the netconsole before cleaning it up
 		 * otherwise we might end up in write_msg() with
diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c
index b91603835d26..c4b3362da4a2 100644
--- a/drivers/net/nlmon.c
+++ b/drivers/net/nlmon.c
@@ -113,7 +113,7 @@ static void nlmon_setup(struct net_device *dev)
 
 	dev->netdev_ops	= &nlmon_ops;
 	dev->ethtool_ops = &nlmon_ethtool_ops;
-	dev->destructor	= free_netdev;
+	dev->needs_free_netdev = true;
 
 	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
 			NETIF_F_HIGHDMA | NETIF_F_LLTX;
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 60ffc9da6a28..3ab6c58d4be6 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -108,7 +108,7 @@ config MDIO_MOXART
 config MDIO_OCTEON
 	tristate "Octeon and some ThunderX SOCs MDIO buses"
 	depends on 64BIT
-	depends on HAS_IOMEM
+	depends on HAS_IOMEM && OF_MDIO
 	select MDIO_CAVIUM
 	help
 	  This module provides a driver for the Octeon and ThunderX MDIO
@@ -127,6 +127,7 @@ config MDIO_THUNDER
 	tristate "ThunderX SOCs MDIO buses"
 	depends on 64BIT
 	depends on PCI
+	depends on !(MDIO_DEVICE=y && PHYLIB=m)
 	select MDIO_CAVIUM
 	help
 	  This driver supports the MDIO interfaces found on Cavium
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index ed0d10f54f26..c3065236ffcc 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -908,7 +908,7 @@ static void decode_txts(struct dp83640_private *dp83640,
 	if (overflow) {
 		pr_debug("tx timestamp queue overflow, count %d\n", overflow);
 		while (skb) {
-			skb_complete_tx_timestamp(skb, NULL);
+			kfree_skb(skb);
 			skb = skb_dequeue(&dp83640->tx_queue);
 		}
 		return;
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 272b051a0199..57297ba23987 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -255,34 +255,6 @@ static int marvell_config_aneg(struct phy_device *phydev)
 {
 	int err;
 
-	/* The Marvell PHY has an errata which requires
-	 * that certain registers get written in order
-	 * to restart autonegotiation */
-	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
-
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1d, 0x1f);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1e, 0x200c);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1d, 0x5);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1e, 0);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1e, 0x100);
-	if (err < 0)
-		return err;
-
 	err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
 	if (err < 0)
 		return err;
@@ -316,6 +288,42 @@ static int marvell_config_aneg(struct phy_device *phydev)
 	return 0;
 }
 
+static int m88e1101_config_aneg(struct phy_device *phydev)
+{
+	int err;
+
+	/* This Marvell PHY has an errata which requires
+	 * that certain registers get written in order
+	 * to restart autonegotiation
+	 */
+	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1d, 0x1f);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1e, 0x200c);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1d, 0x5);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1e, 0);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1e, 0x100);
+	if (err < 0)
+		return err;
+
+	return marvell_config_aneg(phydev);
+}
+
 static int m88e1111_config_aneg(struct phy_device *phydev)
 {
 	int err;
@@ -1119,8 +1127,6 @@ static int marvell_read_status_page(struct phy_device *phydev, int page)
 		if (adv < 0)
 			return adv;
 
-		lpa &= adv;
-
 		if (status & MII_M1011_PHY_STATUS_FULLDUPLEX)
 			phydev->duplex = DUPLEX_FULL;
 		else
@@ -1892,7 +1898,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
 		.config_init = &marvell_config_init,
-		.config_aneg = &marvell_config_aneg,
+		.config_aneg = &m88e1101_config_aneg,
 		.read_status = &genphy_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
index 963838d4fac1..599ce24c514f 100644
--- a/drivers/net/phy/mdio-mux.c
+++ b/drivers/net/phy/mdio-mux.c
@@ -122,10 +122,9 @@ int mdio_mux_init(struct device *dev,
 	pb = devm_kzalloc(dev, sizeof(*pb), GFP_KERNEL);
 	if (pb == NULL) {
 		ret_val = -ENOMEM;
-		goto err_parent_bus;
+		goto err_pb_kz;
 	}
 
-
 	pb->switch_data = data;
 	pb->switch_fn = switch_fn;
 	pb->current_child = -1;
@@ -154,6 +153,7 @@ int mdio_mux_init(struct device *dev,
 		cb->mii_bus = mdiobus_alloc();
 		if (!cb->mii_bus) {
 			ret_val = -ENOMEM;
+			devm_kfree(dev, cb);
 			of_node_put(child_bus_node);
 			break;
 		}
@@ -170,7 +170,6 @@ int mdio_mux_init(struct device *dev,
 			mdiobus_free(cb->mii_bus);
 			devm_kfree(dev, cb);
 		} else {
-			of_node_get(child_bus_node);
 			cb->next = pb->children;
 			pb->children = cb;
 		}
@@ -181,9 +180,11 @@ int mdio_mux_init(struct device *dev,
 		return 0;
 	}
 
+	devm_kfree(dev, pb);
+err_pb_kz:
 	/* balance the reference of_mdio_find_bus() took */
-	put_device(&pb->mii_bus->dev);
-
+	if (!mux_bus)
+		put_device(&parent_bus->dev);
 err_parent_bus:
 	of_node_put(parent_bus_node);
 	return ret_val;
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index a898e5c4ef1b..f99c21f78b63 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -364,9 +364,6 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
 
 	mutex_init(&bus->mdio_lock);
 
-	if (bus->reset)
-		bus->reset(bus);
-
 	/* de-assert bus level PHY GPIO resets */
 	if (bus->num_reset_gpios > 0) {
 		bus->reset_gpiod = devm_kcalloc(&bus->dev,
@@ -396,6 +393,9 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
 		}
 	}
 
+	if (bus->reset)
+		bus->reset(bus);
+
 	for (i = 0; i < PHY_MAX_ADDR; i++) {
 		if ((bus->phy_mask & (1 << i)) == 0) {
 			struct phy_device *phydev;
@@ -658,6 +658,18 @@ static int mdio_bus_match(struct device *dev, struct device_driver *drv)
 	return 0;
 }
 
+static int mdio_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	int rc;
+
+	/* Some devices have extra OF data and an OF-style MODALIAS */
+	rc = of_device_uevent_modalias(dev, env);
+	if (rc != -ENODEV)
+		return rc;
+
+	return 0;
+}
+
 #ifdef CONFIG_PM
 static int mdio_bus_suspend(struct device *dev)
 {
@@ -708,6 +720,7 @@ static const struct dev_pm_ops mdio_bus_pm_ops = {
 struct bus_type mdio_bus_type = {
 	.name		= "mdio_bus",
 	.match		= mdio_bus_match,
+	.uevent		= mdio_uevent,
 	.pm		= MDIO_BUS_PM_OPS,
 };
 EXPORT_SYMBOL(mdio_bus_type);
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 6a5fd18f062c..8b2038844ba9 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -268,23 +268,12 @@ out:
 	return ret;
 }
 
-static int kszphy_config_init(struct phy_device *phydev)
+/* Some config bits need to be set again on resume, handle them here. */
+static int kszphy_config_reset(struct phy_device *phydev)
 {
 	struct kszphy_priv *priv = phydev->priv;
-	const struct kszphy_type *type;
 	int ret;
 
-	if (!priv)
-		return 0;
-
-	type = priv->type;
-
-	if (type->has_broadcast_disable)
-		kszphy_broadcast_disable(phydev);
-
-	if (type->has_nand_tree_disable)
-		kszphy_nand_tree_disable(phydev);
-
 	if (priv->rmii_ref_clk_sel) {
 		ret = kszphy_rmii_clk_sel(phydev, priv->rmii_ref_clk_sel_val);
 		if (ret) {
@@ -295,11 +284,30 @@ static int kszphy_config_init(struct phy_device *phydev)
 	}
 
 	if (priv->led_mode >= 0)
-		kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode);
+		kszphy_setup_led(phydev, priv->type->led_mode_reg, priv->led_mode);
 
 	return 0;
 }
 
+static int kszphy_config_init(struct phy_device *phydev)
+{
+	struct kszphy_priv *priv = phydev->priv;
+	const struct kszphy_type *type;
+
+	if (!priv)
+		return 0;
+
+	type = priv->type;
+
+	if (type->has_broadcast_disable)
+		kszphy_broadcast_disable(phydev);
+
+	if (type->has_nand_tree_disable)
+		kszphy_nand_tree_disable(phydev);
+
+	return kszphy_config_reset(phydev);
+}
+
 static int ksz8041_config_init(struct phy_device *phydev)
 {
 	struct device_node *of_node = phydev->mdio.dev.of_node;
@@ -611,6 +619,8 @@ static int ksz9031_read_status(struct phy_device *phydev)
 	if ((regval & 0xFF) == 0xFF) {
 		phy_init_hw(phydev);
 		phydev->link = 0;
+		if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
+			phydev->drv->config_intr(phydev);
 	}
 
 	return 0;
@@ -700,8 +710,14 @@ static int kszphy_suspend(struct phy_device *phydev)
 
 static int kszphy_resume(struct phy_device *phydev)
 {
+	int ret;
+
 	genphy_resume(phydev);
 
+	ret = kszphy_config_reset(phydev);
+	if (ret)
+		return ret;
+
 	/* Enable PHY Interrupts */
 	if (phy_interrupt_is_valid(phydev)) {
 		phydev->interrupts = PHY_INTERRUPT_ENABLED;
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 82ab8fb82587..eebb0e1c70ff 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -54,6 +54,8 @@ static const char *phy_speed_to_str(int speed)
 		return "5Gbps";
 	case SPEED_10000:
 		return "10Gbps";
+	case SPEED_14000:
+		return "14Gbps";
 	case SPEED_20000:
 		return "20Gbps";
 	case SPEED_25000:
@@ -241,7 +243,7 @@ static const struct phy_setting settings[] = {
  * phy_lookup_setting - lookup a PHY setting
  * @speed: speed to match
  * @duplex: duplex to match
- * @feature: allowed link modes
+ * @features: allowed link modes
  * @exact: an exact match is required
  *
  * Search the settings array for a setting that matches the speed and
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 1da31dc47f86..74b907206aa7 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -629,7 +629,7 @@ static void sl_uninit(struct net_device *dev)
 static void sl_free_netdev(struct net_device *dev)
 {
 	int i = dev->base_addr;
-	free_netdev(dev);
+
 	slip_devs[i] = NULL;
 }
 
@@ -651,7 +651,8 @@ static const struct net_device_ops sl_netdev_ops = {
 static void sl_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &sl_netdev_ops;
-	dev->destructor		= sl_free_netdev;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= sl_free_netdev;
 
 	dev->hard_header_len	= 0;
 	dev->addr_len		= 0;
@@ -1369,8 +1370,6 @@ static void __exit slip_exit(void)
 		if (sl->tty) {
 			printk(KERN_ERR "%s: tty discipline still running\n",
 			       dev->name);
-			/* Intentionally leak the control block. */
-			dev->destructor = NULL;
 		}
 
 		unregister_netdev(dev);
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 6c5d5ef46f75..fba8c136aa7c 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1643,7 +1643,6 @@ static void team_destructor(struct net_device *dev)
 	struct team *team = netdev_priv(dev);
 
 	free_percpu(team->pcpu_stats);
-	free_netdev(dev);
 }
 
 static int team_open(struct net_device *dev)
@@ -2079,7 +2078,8 @@ static void team_setup(struct net_device *dev)
 
 	dev->netdev_ops = &team_netdev_ops;
 	dev->ethtool_ops = &team_ethtool_ops;
-	dev->destructor	= team_destructor;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = team_destructor;
 	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->priv_flags |= IFF_TEAM;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index bbd707b9ef7a..9ee7d4275640 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1560,7 +1560,6 @@ static void tun_free_netdev(struct net_device *dev)
 	free_percpu(tun->pcpu_stats);
 	tun_flow_uninit(tun);
 	security_tun_dev_free_security(tun->security);
-	free_netdev(dev);
 }
 
 static void tun_setup(struct net_device *dev)
@@ -1571,7 +1570,8 @@ static void tun_setup(struct net_device *dev)
 	tun->group = INVALID_GID;
 
 	dev->ethtool_ops = &tun_ethtool_ops;
-	dev->destructor = tun_free_netdev;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = tun_free_netdev;
 	/* We prefer our own queue length */
 	dev->tx_queue_len = TUN_READQ_SIZE;
 }
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 51cf60092a18..4037ab27734a 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1722,6 +1722,18 @@ static const struct driver_info lenovo_info = {
 	.tx_fixup = ax88179_tx_fixup,
 };
 
+static const struct driver_info belkin_info = {
+	.description = "Belkin USB Ethernet Adapter",
+	.bind	= ax88179_bind,
+	.unbind = ax88179_unbind,
+	.status = ax88179_status,
+	.link_reset = ax88179_link_reset,
+	.reset	= ax88179_reset,
+	.flags	= FLAG_ETHER | FLAG_FRAMING_AX,
+	.rx_fixup = ax88179_rx_fixup,
+	.tx_fixup = ax88179_tx_fixup,
+};
+
 static const struct usb_device_id products[] = {
 {
 	/* ASIX AX88179 10/100/1000 */
@@ -1751,6 +1763,10 @@ static const struct usb_device_id products[] = {
 	/* Lenovo OneLinkDock Gigabit LAN */
 	USB_DEVICE(0x17ef, 0x304b),
 	.driver_info = (unsigned long)&lenovo_info,
+}, {
+	/* Belkin B2B128 USB 3.0 Hub + Gigabit Ethernet Adapter */
+	USB_DEVICE(0x050d, 0x0128),
+	.driver_info = (unsigned long)&belkin_info,
 },
 	{ },
 };
diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c
index eb52de8205f0..c7a350bbaaa7 100644
--- a/drivers/net/usb/cdc-phonet.c
+++ b/drivers/net/usb/cdc-phonet.c
@@ -298,7 +298,7 @@ static void usbpn_setup(struct net_device *dev)
 	dev->addr_len		= 1;
 	dev->tx_queue_len	= 3;
 
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 /*
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index f3ae88fdf332..8ab281b478f2 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -310,6 +310,26 @@ skip:
 		return -ENODEV;
 	}
 
+	return 0;
+
+bad_desc:
+	dev_info(&dev->udev->dev, "bad CDC descriptors\n");
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind);
+
+
+/* like usbnet_generic_cdc_bind() but handles filter initialization
+ * correctly
+ */
+int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
+{
+	int rv;
+
+	rv = usbnet_generic_cdc_bind(dev, intf);
+	if (rv < 0)
+		goto bail_out;
+
 	/* Some devices don't initialise properly. In particular
 	 * the packet filter is not reset. There are devices that
 	 * don't do reset all the way. So the packet filter should
@@ -317,13 +337,10 @@ skip:
 	 */
 	usbnet_cdc_update_filter(dev);
 
-	return 0;
-
-bad_desc:
-	dev_info(&dev->udev->dev, "bad CDC descriptors\n");
-	return -ENODEV;
+bail_out:
+	return rv;
 }
-EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind);
+EXPORT_SYMBOL_GPL(usbnet_ether_cdc_bind);
 
 void usbnet_cdc_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
@@ -417,7 +434,7 @@ int usbnet_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 	BUILD_BUG_ON((sizeof(((struct usbnet *)0)->data)
 			< sizeof(struct cdc_state)));
 
-	status = usbnet_generic_cdc_bind(dev, intf);
+	status = usbnet_ether_cdc_bind(dev, intf);
 	if (status < 0)
 		return status;
 
diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c
index c4f1c363e24b..9df3c1ffff35 100644
--- a/drivers/net/usb/ch9200.c
+++ b/drivers/net/usb/ch9200.c
@@ -310,8 +310,8 @@ static int get_mac_address(struct usbnet *dev, unsigned char *data)
 	int rd_mac_len = 0;
 
 	netdev_dbg(dev->net, "get_mac_address:\n\tusbnet VID:%0x PID:%0x\n",
-		   dev->udev->descriptor.idVendor,
-		   dev->udev->descriptor.idProduct);
+		   le16_to_cpu(dev->udev->descriptor.idVendor),
+		   le16_to_cpu(dev->udev->descriptor.idProduct));
 
 	memset(mac_addr, 0, sizeof(mac_addr));
 	rd_mac_len = control_read(dev, REQUEST_READ, 0,
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index d7165767ca9d..32a22f4e8356 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -123,7 +123,7 @@ static void qmimux_setup(struct net_device *dev)
 	dev->addr_len        = 0;
 	dev->flags           = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
 	dev->netdev_ops      = &qmimux_netdev_ops;
-	dev->destructor      = free_netdev;
+	dev->needs_free_netdev = true;
 }
 
 static struct net_device *qmimux_find_dev(struct usbnet *dev, u8 mux_id)
@@ -1192,10 +1192,14 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x1199, 0x9056, 8)},	/* Sierra Wireless Modem */
 	{QMI_FIXED_INTF(0x1199, 0x9057, 8)},
 	{QMI_FIXED_INTF(0x1199, 0x9061, 8)},	/* Sierra Wireless Modem */
+	{QMI_FIXED_INTF(0x1199, 0x9063, 8)},	/* Sierra Wireless EM7305 */
+	{QMI_FIXED_INTF(0x1199, 0x9063, 10)},	/* Sierra Wireless EM7305 */
 	{QMI_FIXED_INTF(0x1199, 0x9071, 8)},	/* Sierra Wireless MC74xx */
 	{QMI_FIXED_INTF(0x1199, 0x9071, 10)},	/* Sierra Wireless MC74xx */
 	{QMI_FIXED_INTF(0x1199, 0x9079, 8)},	/* Sierra Wireless EM74xx */
 	{QMI_FIXED_INTF(0x1199, 0x9079, 10)},	/* Sierra Wireless EM74xx */
+	{QMI_FIXED_INTF(0x1199, 0x907b, 8)},	/* Sierra Wireless EM74xx */
+	{QMI_FIXED_INTF(0x1199, 0x907b, 10)},	/* Sierra Wireless EM74xx */
 	{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)},	/* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
 	{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)},	/* Alcatel L800MA */
 	{QMI_FIXED_INTF(0x2357, 0x0201, 4)},	/* TP-LINK HSUPA Modem MA180 */
@@ -1204,6 +1208,8 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x1bc7, 0x1100, 3)},	/* Telit ME910 */
 	{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},	/* Telit LE920 */
 	{QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)},	/* Telit LE920, LE920A4 */
+	{QMI_FIXED_INTF(0x1c9e, 0x9801, 3)},	/* Telewell TW-3G HSPA+ */
+	{QMI_FIXED_INTF(0x1c9e, 0x9803, 4)},	/* Telewell TW-3G HSPA+ */
 	{QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)},	/* XS Stick W100-2 from 4G Systems */
 	{QMI_FIXED_INTF(0x0b3c, 0xc000, 4)},	/* Olivetti Olicard 100 */
 	{QMI_FIXED_INTF(0x0b3c, 0xc001, 4)},	/* Olivetti Olicard 120 */
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index ddc62cb69be8..1a419a45e2a2 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -4368,6 +4368,8 @@ static u8 rtl_get_version(struct usb_interface *intf)
 		break;
 	}
 
+	dev_dbg(&intf->dev, "Detected version 0x%04x\n", version);
+
 	return version;
 }
 
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 765400b62168..2dfca96a63b6 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -681,7 +681,7 @@ static int smsc95xx_set_features(struct net_device *netdev,
 	if (ret < 0)
 		return ret;
 
-	if (features & NETIF_F_HW_CSUM)
+	if (features & NETIF_F_IP_CSUM)
 		read_buf |= Tx_COE_EN_;
 	else
 		read_buf &= ~Tx_COE_EN_;
@@ -1279,12 +1279,19 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
 
 	spin_lock_init(&pdata->mac_cr_lock);
 
+	/* LAN95xx devices do not alter the computed checksum of 0 to 0xffff.
+	 * RFC 2460, ipv6 UDP calculated checksum yields a result of zero must
+	 * be changed to 0xffff. RFC 768, ipv4 UDP computed checksum is zero,
+	 * it is transmitted as all ones. The zero transmitted checksum means
+	 * transmitter generated no checksum. Hence, enable csum offload only
+	 * for ipv4 packets.
+	 */
 	if (DEFAULT_TX_CSUM_ENABLE)
-		dev->net->features |= NETIF_F_HW_CSUM;
+		dev->net->features |= NETIF_F_IP_CSUM;
 	if (DEFAULT_RX_CSUM_ENABLE)
 		dev->net->features |= NETIF_F_RXCSUM;
 
-	dev->net->hw_features = NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
+	dev->net->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
 
 	smsc95xx_init_mac_address(dev);
 
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 38f0f03a29c8..364fa9d11d1a 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -222,7 +222,6 @@ static int veth_dev_init(struct net_device *dev)
 static void veth_dev_free(struct net_device *dev)
 {
 	free_percpu(dev->vstats);
-	free_netdev(dev);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -317,7 +316,8 @@ static void veth_setup(struct net_device *dev)
 			       NETIF_F_HW_VLAN_STAG_TX |
 			       NETIF_F_HW_VLAN_CTAG_RX |
 			       NETIF_F_HW_VLAN_STAG_RX);
-	dev->destructor = veth_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = veth_dev_free;
 	dev->max_mtu = ETH_MAX_MTU;
 
 	dev->hw_features = VETH_FEATURES;
@@ -383,7 +383,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
 		tbp = tb;
 	}
 
-	if (tbp[IFLA_IFNAME]) {
+	if (ifmp && tbp[IFLA_IFNAME]) {
 		nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
 		name_assign_type = NET_NAME_USER;
 	} else {
@@ -402,7 +402,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
 		return PTR_ERR(peer);
 	}
 
-	if (tbp[IFLA_ADDRESS] == NULL)
+	if (!ifmp || !tbp[IFLA_ADDRESS])
 		eth_hw_addr_random(peer);
 
 	if (ifmp && (dev->ifindex != 0))
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1c6d3923c224..143d8a95a60d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/average.h>
+#include <net/route.h>
 
 static int napi_weight = NAPI_POLL_WEIGHT;
 module_param(napi_weight, int, 0444);
@@ -54,17 +55,6 @@ module_param(napi_tx, bool, 0644);
  */
 DECLARE_EWMA(pkt_len, 0, 64)
 
-/* With mergeable buffers we align buffer address and use the low bits to
- * encode its true size. Buffer size is up to 1 page so we need to align to
- * square root of page size to ensure we reserve enough bits to encode the true
- * size.
- */
-#define MERGEABLE_BUFFER_MIN_ALIGN_SHIFT ((PAGE_SHIFT + 1) / 2)
-
-/* Minimum alignment for mergeable packet buffers. */
-#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, \
-				   1 << MERGEABLE_BUFFER_MIN_ALIGN_SHIFT)
-
 #define VIRTNET_DRIVER_VERSION "1.0.0"
 
 struct virtnet_stats {
@@ -112,6 +102,9 @@ struct receive_queue {
 	/* RX: fragments + linear part + virtio header */
 	struct scatterlist sg[MAX_SKB_FRAGS + 2];
 
+	/* Min single buffer size for mergeable buffers case. */
+	unsigned int min_buf_len;
+
 	/* Name of this receive queue: input.$index */
 	char name[40];
 };
@@ -277,24 +270,6 @@ static void skb_xmit_done(struct virtqueue *vq)
 		netif_wake_subqueue(vi->dev, vq2txq(vq));
 }
 
-static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx)
-{
-	unsigned int truesize = mrg_ctx & (MERGEABLE_BUFFER_ALIGN - 1);
-	return (truesize + 1) * MERGEABLE_BUFFER_ALIGN;
-}
-
-static void *mergeable_ctx_to_buf_address(unsigned long mrg_ctx)
-{
-	return (void *)(mrg_ctx & -MERGEABLE_BUFFER_ALIGN);
-
-}
-
-static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize)
-{
-	unsigned int size = truesize / MERGEABLE_BUFFER_ALIGN;
-	return (unsigned long)buf | (size - 1);
-}
-
 /* Called from bottom half context */
 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 				   struct receive_queue *rq,
@@ -538,15 +513,13 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
 
 	while (--*num_buf) {
 		unsigned int buflen;
-		unsigned long ctx;
 		void *buf;
 		int off;
 
-		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &buflen);
-		if (unlikely(!ctx))
+		buf = virtqueue_get_buf(rq->vq, &buflen);
+		if (unlikely(!buf))
 			goto err_buf;
 
-		buf = mergeable_ctx_to_buf_address(ctx);
 		p = virt_to_head_page(buf);
 		off = buf - page_address(p);
 
@@ -575,10 +548,10 @@ err_buf:
 static struct sk_buff *receive_mergeable(struct net_device *dev,
 					 struct virtnet_info *vi,
 					 struct receive_queue *rq,
-					 unsigned long ctx,
+					 void *buf,
+					 void *ctx,
 					 unsigned int len)
 {
-	void *buf = mergeable_ctx_to_buf_address(ctx);
 	struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
 	u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
 	struct page *page = virt_to_head_page(buf);
@@ -666,7 +639,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	}
 	rcu_read_unlock();
 
-	truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
+	if (unlikely(len > (unsigned long)ctx)) {
+		pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
+			 dev->name, len, (unsigned long)ctx);
+		dev->stats.rx_length_errors++;
+		goto err_skb;
+	}
+	truesize = (unsigned long)ctx;
 	head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
 	curr_skb = head_skb;
 
@@ -675,7 +654,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	while (--num_buf) {
 		int num_skb_frags;
 
-		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
+		buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
 		if (unlikely(!ctx)) {
 			pr_debug("%s: rx error: %d buffers out of %d missing\n",
 				 dev->name, num_buf,
@@ -685,8 +664,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 			goto err_buf;
 		}
 
-		buf = mergeable_ctx_to_buf_address(ctx);
 		page = virt_to_head_page(buf);
+		if (unlikely(len > (unsigned long)ctx)) {
+			pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
+				 dev->name, len, (unsigned long)ctx);
+			dev->stats.rx_length_errors++;
+			goto err_skb;
+		}
+		truesize = (unsigned long)ctx;
 
 		num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
 		if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
@@ -702,7 +687,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 			head_skb->truesize += nskb->truesize;
 			num_skb_frags = 0;
 		}
-		truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
 		if (curr_skb != head_skb) {
 			head_skb->data_len += len;
 			head_skb->len += len;
@@ -727,14 +711,14 @@ err_xdp:
 err_skb:
 	put_page(page);
 	while (--num_buf) {
-		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
-		if (unlikely(!ctx)) {
+		buf = virtqueue_get_buf(rq->vq, &len);
+		if (unlikely(!buf)) {
 			pr_debug("%s: rx error: %d buffers missing\n",
 				 dev->name, num_buf);
 			dev->stats.rx_length_errors++;
 			break;
 		}
-		page = virt_to_head_page(mergeable_ctx_to_buf_address(ctx));
+		page = virt_to_head_page(buf);
 		put_page(page);
 	}
 err_buf:
@@ -745,7 +729,7 @@ xdp_xmit:
 }
 
 static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
-		       void *buf, unsigned int len)
+		       void *buf, unsigned int len, void **ctx)
 {
 	struct net_device *dev = vi->dev;
 	struct sk_buff *skb;
@@ -756,9 +740,7 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
 		pr_debug("%s: short packet %i\n", dev->name, len);
 		dev->stats.rx_length_errors++;
 		if (vi->mergeable_rx_bufs) {
-			unsigned long ctx = (unsigned long)buf;
-			void *base = mergeable_ctx_to_buf_address(ctx);
-			put_page(virt_to_head_page(base));
+			put_page(virt_to_head_page(buf));
 		} else if (vi->big_packets) {
 			give_pages(rq, buf);
 		} else {
@@ -768,7 +750,7 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
 	}
 
 	if (vi->mergeable_rx_bufs)
-		skb = receive_mergeable(dev, vi, rq, (unsigned long)buf, len);
+		skb = receive_mergeable(dev, vi, rq, buf, ctx, len);
 	else if (vi->big_packets)
 		skb = receive_big(dev, vi, rq, buf, len);
 	else
@@ -880,14 +862,15 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
 	return err;
 }
 
-static unsigned int get_mergeable_buf_len(struct ewma_pkt_len *avg_pkt_len)
+static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
+					  struct ewma_pkt_len *avg_pkt_len)
 {
 	const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	unsigned int len;
 
 	len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
-			GOOD_PACKET_LEN, PAGE_SIZE - hdr_len);
-	return ALIGN(len, MERGEABLE_BUFFER_ALIGN);
+				rq->min_buf_len, PAGE_SIZE - hdr_len);
+	return ALIGN(len, L1_CACHE_BYTES);
 }
 
 static int add_recvbuf_mergeable(struct virtnet_info *vi,
@@ -896,17 +879,17 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 	struct page_frag *alloc_frag = &rq->alloc_frag;
 	unsigned int headroom = virtnet_get_headroom(vi);
 	char *buf;
-	unsigned long ctx;
+	void *ctx;
 	int err;
 	unsigned int len, hole;
 
-	len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len);
+	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
 	if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
 		return -ENOMEM;
 
 	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
 	buf += headroom; /* advance address leaving hole at front of pkt */
-	ctx = mergeable_buf_to_ctx(buf, len);
+	ctx = (void *)(unsigned long)len;
 	get_page(alloc_frag->page);
 	alloc_frag->offset += len + headroom;
 	hole = alloc_frag->size - alloc_frag->offset;
@@ -921,7 +904,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 	}
 
 	sg_init_one(rq->sg, buf, len);
-	err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp);
+	err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
 	if (err < 0)
 		put_page(virt_to_head_page(buf));
 
@@ -1032,10 +1015,20 @@ static int virtnet_receive(struct receive_queue *rq, int budget)
 	void *buf;
 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 
-	while (received < budget &&
-	       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
-		bytes += receive_buf(vi, rq, buf, len);
-		received++;
+	if (vi->mergeable_rx_bufs) {
+		void *ctx;
+
+		while (received < budget &&
+		       (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
+			bytes += receive_buf(vi, rq, buf, len, ctx);
+			received++;
+		}
+	} else {
+		while (received < budget &&
+		       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
+			bytes += receive_buf(vi, rq, buf, len, NULL);
+			received++;
+		}
 	}
 
 	if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
@@ -1804,6 +1797,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
 	flush_work(&vi->config_work);
 
 	netif_device_detach(vi->dev);
+	netif_tx_disable(vi->dev);
 	cancel_delayed_work_sync(&vi->refill);
 
 	if (netif_running(vi->dev)) {
@@ -1854,7 +1848,6 @@ static int virtnet_reset(struct virtnet_info *vi, int curr_qp, int xdp_qp)
 	virtnet_freeze_down(dev);
 	_remove_vq_common(vi);
 
-	dev->config->reset(dev);
 	virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
 	virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
 
@@ -1997,6 +1990,7 @@ static const struct net_device_ops virtnet_netdev = {
 	.ndo_poll_controller = virtnet_netpoll,
 #endif
 	.ndo_xdp		= virtnet_xdp,
+	.ndo_features_check	= passthru_features_check,
 };
 
 static void virtnet_config_changed_work(struct work_struct *work)
@@ -2118,9 +2112,7 @@ static void free_unused_bufs(struct virtnet_info *vi)
 
 		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
 			if (vi->mergeable_rx_bufs) {
-				unsigned long ctx = (unsigned long)buf;
-				void *base = mergeable_ctx_to_buf_address(ctx);
-				put_page(virt_to_head_page(base));
+				put_page(virt_to_head_page(buf));
 			} else if (vi->big_packets) {
 				give_pages(&vi->rq[i], buf);
 			} else {
@@ -2141,6 +2133,22 @@ static void virtnet_del_vqs(struct virtnet_info *vi)
 	virtnet_free_queues(vi);
 }
 
+/* How large should a single buffer be so a queue full of these can fit at
+ * least one full packet?
+ * Logic below assumes the mergeable buffer header is used.
+ */
+static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
+{
+	const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	unsigned int rq_size = virtqueue_get_vring_size(vq);
+	unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
+	unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
+	unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
+
+	return max(max(min_buf_len, hdr_len) - hdr_len,
+		   (unsigned int)GOOD_PACKET_LEN);
+}
+
 static int virtnet_find_vqs(struct virtnet_info *vi)
 {
 	vq_callback_t **callbacks;
@@ -2148,6 +2156,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 	int ret = -ENOMEM;
 	int i, total_vqs;
 	const char **names;
+	bool *ctx;
 
 	/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
 	 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
@@ -2166,6 +2175,13 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 	names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
 	if (!names)
 		goto err_names;
+	if (vi->mergeable_rx_bufs) {
+		ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL);
+		if (!ctx)
+			goto err_ctx;
+	} else {
+		ctx = NULL;
+	}
 
 	/* Parameters for control virtqueue, if any */
 	if (vi->has_cvq) {
@@ -2181,10 +2197,12 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 		sprintf(vi->sq[i].name, "output.%d", i);
 		names[rxq2vq(i)] = vi->rq[i].name;
 		names[txq2vq(i)] = vi->sq[i].name;
+		if (ctx)
+			ctx[rxq2vq(i)] = true;
 	}
 
 	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
-					 names, NULL);
+					 names, ctx, NULL);
 	if (ret)
 		goto err_find;
 
@@ -2196,6 +2214,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
 		vi->rq[i].vq = vqs[rxq2vq(i)];
+		vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
 		vi->sq[i].vq = vqs[txq2vq(i)];
 	}
 
@@ -2206,6 +2225,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 	return 0;
 
 err_find:
+	kfree(ctx);
+err_ctx:
 	kfree(names);
 err_names:
 	kfree(callbacks);
@@ -2282,7 +2303,8 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
 
 	BUG_ON(queue_index >= vi->max_queue_pairs);
 	avg = &vi->rq[queue_index].mrg_avg_pkt_len;
-	return sprintf(buf, "%u\n", get_mergeable_buf_len(avg));
+	return sprintf(buf, "%u\n",
+		       get_mergeable_buf_len(&vi->rq[queue_index], avg));
 }
 
 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 25bc764ae7dc..d1c7029ded7c 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2962,6 +2962,11 @@ vmxnet3_force_close(struct vmxnet3_adapter *adapter)
 	/* we need to enable NAPI, otherwise dev_close will deadlock */
 	for (i = 0; i < adapter->num_rx_queues; i++)
 		napi_enable(&adapter->rx_queue[i].napi);
+	/*
+	 * Need to clear the quiesce bit to ensure that vmxnet3_close
+	 * can quiesce the device properly
+	 */
+	clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
 	dev_close(adapter->netdev);
 }
 
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index ceda5861da78..022c0b5f9844 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -36,12 +36,14 @@
 #include <net/addrconf.h>
 #include <net/l3mdev.h>
 #include <net/fib_rules.h>
+#include <net/netns/generic.h>
 
 #define DRV_NAME	"vrf"
 #define DRV_VERSION	"1.0"
 
 #define FIB_RULE_PREF  1000       /* default preference for FIB rules */
-static bool add_fib_rules = true;
+
+static unsigned int vrf_net_id;
 
 struct net_vrf {
 	struct rtable __rcu	*rth;
@@ -989,6 +991,7 @@ static u32 vrf_fib_table(const struct net_device *dev)
 
 static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+	kfree_skb(skb);
 	return 0;
 }
 
@@ -998,7 +1001,7 @@ static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook,
 {
 	struct net *net = dev_net(dev);
 
-	if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0)
+	if (nf_hook(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) != 1)
 		skb = NULL;    /* kfree_skb(skb) handled by nf code */
 
 	return skb;
@@ -1347,7 +1350,7 @@ static void vrf_setup(struct net_device *dev)
 	dev->netdev_ops = &vrf_netdev_ops;
 	dev->l3mdev_ops = &vrf_l3mdev_ops;
 	dev->ethtool_ops = &vrf_ethtool_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 
 	/* Fill in device structure with ethernet-generic values. */
 	eth_hw_addr_random(dev);
@@ -1393,6 +1396,8 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 		       struct nlattr *tb[], struct nlattr *data[])
 {
 	struct net_vrf *vrf = netdev_priv(dev);
+	bool *add_fib_rules;
+	struct net *net;
 	int err;
 
 	if (!data || !data[IFLA_VRF_TABLE])
@@ -1408,13 +1413,15 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 	if (err)
 		goto out;
 
-	if (add_fib_rules) {
+	net = dev_net(dev);
+	add_fib_rules = net_generic(net, vrf_net_id);
+	if (*add_fib_rules) {
 		err = vrf_add_fib_rules(dev);
 		if (err) {
 			unregister_netdevice(dev);
 			goto out;
 		}
-		add_fib_rules = false;
+		*add_fib_rules = false;
 	}
 
 out:
@@ -1497,16 +1504,38 @@ static struct notifier_block vrf_notifier_block __read_mostly = {
 	.notifier_call = vrf_device_event,
 };
 
+/* Initialize per network namespace state */
+static int __net_init vrf_netns_init(struct net *net)
+{
+	bool *add_fib_rules = net_generic(net, vrf_net_id);
+
+	*add_fib_rules = true;
+
+	return 0;
+}
+
+static struct pernet_operations vrf_net_ops __net_initdata = {
+	.init = vrf_netns_init,
+	.id   = &vrf_net_id,
+	.size = sizeof(bool),
+};
+
 static int __init vrf_init_module(void)
 {
 	int rc;
 
 	register_netdevice_notifier(&vrf_notifier_block);
 
-	rc = rtnl_link_register(&vrf_link_ops);
+	rc = register_pernet_subsys(&vrf_net_ops);
 	if (rc < 0)
 		goto error;
 
+	rc = rtnl_link_register(&vrf_link_ops);
+	if (rc < 0) {
+		unregister_pernet_subsys(&vrf_net_ops);
+		goto error;
+	}
+
 	return 0;
 
 error:
diff --git a/drivers/net/vsockmon.c b/drivers/net/vsockmon.c
index 7f0136f2dd9d..c28bdce14fd5 100644
--- a/drivers/net/vsockmon.c
+++ b/drivers/net/vsockmon.c
@@ -135,7 +135,7 @@ static void vsockmon_setup(struct net_device *dev)
 
 	dev->netdev_ops	= &vsockmon_ops;
 	dev->ethtool_ops = &vsockmon_ethtool_ops;
-	dev->destructor	= free_netdev;
+	dev->needs_free_netdev = true;
 
 	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
 			NETIF_F_HIGHDMA | NETIF_F_LLTX;
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 328b4712683c..5fa798a5c9a6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -59,6 +59,8 @@ static const u8 all_zeros_mac[ETH_ALEN + 2];
 
 static int vxlan_sock_add(struct vxlan_dev *vxlan);
 
+static void vxlan_vs_del_dev(struct vxlan_dev *vxlan);
+
 /* per-network namespace private data for this module */
 struct vxlan_net {
 	struct list_head  vxlan_list;
@@ -740,6 +742,22 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
 	call_rcu(&f->rcu, vxlan_fdb_free);
 }
 
+static void vxlan_dst_free(struct rcu_head *head)
+{
+	struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
+
+	dst_cache_destroy(&rd->dst_cache);
+	kfree(rd);
+}
+
+static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
+				  struct vxlan_rdst *rd)
+{
+	list_del_rcu(&rd->list);
+	vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
+	call_rcu(&rd->rcu, vxlan_dst_free);
+}
+
 static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
 			   union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
 			   __be32 *vni, u32 *ifindex)
@@ -864,9 +882,7 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
 	 * otherwise destroy the fdb entry
 	 */
 	if (rd && !list_is_singular(&f->remotes)) {
-		list_del_rcu(&rd->list);
-		vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
-		kfree_rcu(rd, rcu);
+		vxlan_fdb_dst_destroy(vxlan, f, rd);
 		goto out;
 	}
 
@@ -1067,6 +1083,8 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan)
 	rcu_assign_pointer(vxlan->vn4_sock, NULL);
 	synchronize_net();
 
+	vxlan_vs_del_dev(vxlan);
+
 	if (__vxlan_sock_release_prep(sock4)) {
 		udp_tunnel_sock_release(sock4->sock);
 		kfree(sock4);
@@ -2342,6 +2360,15 @@ static void vxlan_cleanup(unsigned long arg)
 	mod_timer(&vxlan->age_timer, next_timer);
 }
 
+static void vxlan_vs_del_dev(struct vxlan_dev *vxlan)
+{
+	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+
+	spin_lock(&vn->sock_lock);
+	hlist_del_init_rcu(&vxlan->hlist);
+	spin_unlock(&vn->sock_lock);
+}
+
 static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
 {
 	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
@@ -2584,7 +2611,7 @@ static void vxlan_setup(struct net_device *dev)
 	eth_hw_addr_random(dev);
 	ether_setup(dev);
 
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	SET_NETDEV_DEVTYPE(dev, &vxlan_type);
 
 	dev->features	|= NETIF_F_LLTX;
@@ -3286,15 +3313,9 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
 static void vxlan_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
-	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
 
 	vxlan_flush(vxlan, true);
 
-	spin_lock(&vn->sock_lock);
-	if (!hlist_unhashed(&vxlan->hlist))
-		hlist_del_rcu(&vxlan->hlist);
-	spin_unlock(&vn->sock_lock);
-
 	gro_cells_destroy(&vxlan->gro_cells);
 	list_del(&vxlan->next);
 	unregister_netdevice_queue(dev, head);
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 4ca71bca39ac..6ea16260ec76 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -232,11 +232,11 @@ static int irq[MAX_CARDS+1] = { -1, -1, -1, -1, -1, -1, 0, };
 static struct class *cosa_class;
 
 #ifdef MODULE
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_PARM_DESC(io, "The I/O bases of the COSA or SRP cards");
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "The IRQ lines of the COSA or SRP cards");
-module_param_array(dma, int, NULL, 0);
+module_param_hw_array(dma, int, dma, NULL, 0);
 MODULE_PARM_DESC(dma, "The DMA channels of the COSA or SRP cards");
 
 MODULE_AUTHOR("Jan \"Yenya\" Kasprzak, <kas@fi.muni.cz>");
diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 65ee2a6f248c..a0d76f70c428 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -475,7 +475,7 @@ static void dlci_setup(struct net_device *dev)
 	dev->flags		= 0;
 	dev->header_ops		= &dlci_header_ops;
 	dev->netdev_ops		= &dlci_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 
 	dlp->receive		= dlci_receive;
 
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index eb915281197e..78596e42a3f3 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1106,7 +1106,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 		return -EIO;
 	}
 
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	*get_dev_p(pvc, type) = dev;
 	if (!used) {
 		state(hdlc)->dce_changed = 1;
diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index dd6bb3364ad2..4de0737fbf8a 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -324,11 +324,11 @@ static void sv11_shutdown(struct z8530_dev *dev)
 static int io = 0x200;
 static int irq = 9;
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, "The I/O base of the Comtrol Hostess SV11 card");
-module_param(dma, int, 0);
+module_param_hw(dma, int, dma, 0);
 MODULE_PARM_DESC(dma, "Set this to 1 to use DMA1/DMA3 for TX/RX");
-module_param(irq, int, 0);
+module_param_hw(irq, int, irq, 0);
 MODULE_PARM_DESC(irq, "The interrupt line setting for the Comtrol Hostess SV11 card");
 
 MODULE_AUTHOR("Alan Cox");
diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index 9df9ed62beff..63f749078a1f 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -306,7 +306,7 @@ static const struct net_device_ops lapbeth_netdev_ops = {
 static void lapbeth_setup(struct net_device *dev)
 {
 	dev->netdev_ops	     = &lapbeth_netdev_ops;
-	dev->destructor	     = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->type            = ARPHRD_X25;
 	dev->hard_header_len = 3;
 	dev->mtu             = 1000;
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index 3ca3419c54a0..bde8c0339831 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -1463,8 +1463,8 @@ set_multicast_list( struct net_device  *dev )
 
 
 #ifdef MODULE
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_array(baud, int, NULL, 0);
 module_param_array(rxl, int, NULL, 0);
 module_param_array(mac, int, NULL, 0);
diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index fbb5aa2c4d8f..c56f2c252113 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -363,13 +363,13 @@ static int rxdma=3;
 static int irq=5;
 static bool slow=false;
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, "The I/O base of the Sealevel card");
-module_param(txdma, int, 0);
+module_param_hw(txdma, int, dma, 0);
 MODULE_PARM_DESC(txdma, "Transmit DMA channel");
-module_param(rxdma, int, 0);
+module_param_hw(rxdma, int, dma, 0);
 MODULE_PARM_DESC(rxdma, "Receive DMA channel");
-module_param(irq, int, 0);
+module_param_hw(irq, int, irq, 0);
 MODULE_PARM_DESC(irq, "The interrupt line setting for the SeaLevel card");
 module_param(slow, bool, 0);
 MODULE_PARM_DESC(slow, "Set this for an older Sealevel card such as the 4012");
diff --git a/drivers/net/wireless/ath/ath6kl/main.c b/drivers/net/wireless/ath/ath6kl/main.c
index 91ee542de3d7..b90c77ef792e 100644
--- a/drivers/net/wireless/ath/ath6kl/main.c
+++ b/drivers/net/wireless/ath/ath6kl/main.c
@@ -1287,7 +1287,7 @@ void init_netdev(struct net_device *dev)
 	struct ath6kl *ar = ath6kl_priv(dev);
 
 	dev->netdev_ops = &ath6kl_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->watchdog_timeo = ATH6KL_TX_TIMEOUT;
 
 	dev->needed_headroom = ETH_HLEN;
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index d5e993dc9b23..517a315e259b 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1271,6 +1271,8 @@ static int wcn36xx_remove(struct platform_device *pdev)
 	qcom_smem_state_put(wcn->tx_enable_state);
 	qcom_smem_state_put(wcn->tx_rings_empty_state);
 
+	rpmsg_destroy_ept(wcn->smd_channel);
+
 	iounmap(wcn->dxe_base);
 	iounmap(wcn->ccu_base);
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index cd1d6730eab7..617199c0e5a0 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -5225,7 +5225,6 @@ void brcmf_cfg80211_free_netdev(struct net_device *ndev)
 
 	if (vif)
 		brcmf_free_vif(vif);
-	free_netdev(ndev);
 }
 
 static bool brcmf_is_linkup(const struct brcmf_event_msg *e)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index a3d82368f1a9..511d190c6cca 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -624,7 +624,8 @@ struct brcmf_if *brcmf_add_if(struct brcmf_pub *drvr, s32 bsscfgidx, s32 ifidx,
 		if (!ndev)
 			return ERR_PTR(-ENOMEM);
 
-		ndev->destructor = brcmf_cfg80211_free_netdev;
+		ndev->needs_free_netdev = true;
+		ndev->priv_destructor = brcmf_cfg80211_free_netdev;
 		ifp = netdev_priv(ndev);
 		ifp->ndev = ndev;
 		/* store mapping ifidx to bsscfgidx */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
index c7c1e9906500..d231042f19d6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
@@ -442,7 +442,7 @@ struct brcmf_fw {
 	const char *nvram_name;
 	u16 domain_nr;
 	u16 bus_nr;
-	void (*done)(struct device *dev, const struct firmware *fw,
+	void (*done)(struct device *dev, int err, const struct firmware *fw,
 		     void *nvram_image, u32 nvram_len);
 };
 
@@ -477,52 +477,51 @@ static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx)
 	if (!nvram && !(fwctx->flags & BRCMF_FW_REQ_NV_OPTIONAL))
 		goto fail;
 
-	fwctx->done(fwctx->dev, fwctx->code, nvram, nvram_length);
+	fwctx->done(fwctx->dev, 0, fwctx->code, nvram, nvram_length);
 	kfree(fwctx);
 	return;
 
 fail:
 	brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev));
 	release_firmware(fwctx->code);
-	device_release_driver(fwctx->dev);
+	fwctx->done(fwctx->dev, -ENOENT, NULL, NULL, 0);
 	kfree(fwctx);
 }
 
 static void brcmf_fw_request_code_done(const struct firmware *fw, void *ctx)
 {
 	struct brcmf_fw *fwctx = ctx;
-	int ret;
+	int ret = 0;
 
 	brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(fwctx->dev));
-	if (!fw)
+	if (!fw) {
+		ret = -ENOENT;
 		goto fail;
-
-	/* only requested code so done here */
-	if (!(fwctx->flags & BRCMF_FW_REQUEST_NVRAM)) {
-		fwctx->done(fwctx->dev, fw, NULL, 0);
-		kfree(fwctx);
-		return;
 	}
+	/* only requested code so done here */
+	if (!(fwctx->flags & BRCMF_FW_REQUEST_NVRAM))
+		goto done;
+
 	fwctx->code = fw;
 	ret = request_firmware_nowait(THIS_MODULE, true, fwctx->nvram_name,
 				      fwctx->dev, GFP_KERNEL, fwctx,
 				      brcmf_fw_request_nvram_done);
 
-	if (!ret)
-		return;
-
-	brcmf_fw_request_nvram_done(NULL, fwctx);
+	/* pass NULL to nvram callback for bcm47xx fallback */
+	if (ret)
+		brcmf_fw_request_nvram_done(NULL, fwctx);
 	return;
 
 fail:
 	brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev));
-	device_release_driver(fwctx->dev);
+done:
+	fwctx->done(fwctx->dev, ret, fw, NULL, 0);
 	kfree(fwctx);
 }
 
 int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags,
 				const char *code, const char *nvram,
-				void (*fw_cb)(struct device *dev,
+				void (*fw_cb)(struct device *dev, int err,
 					      const struct firmware *fw,
 					      void *nvram_image, u32 nvram_len),
 				u16 domain_nr, u16 bus_nr)
@@ -555,7 +554,7 @@ int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags,
 
 int brcmf_fw_get_firmwares(struct device *dev, u16 flags,
 			   const char *code, const char *nvram,
-			   void (*fw_cb)(struct device *dev,
+			   void (*fw_cb)(struct device *dev, int err,
 					 const struct firmware *fw,
 					 void *nvram_image, u32 nvram_len))
 {
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
index d3c9f0d52ae3..8fa4b7e1ab3d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
@@ -73,13 +73,13 @@ void brcmf_fw_nvram_free(void *nvram);
  */
 int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags,
 				const char *code, const char *nvram,
-				void (*fw_cb)(struct device *dev,
+				void (*fw_cb)(struct device *dev, int err,
 					      const struct firmware *fw,
 					      void *nvram_image, u32 nvram_len),
 				u16 domain_nr, u16 bus_nr);
 int brcmf_fw_get_firmwares(struct device *dev, u16 flags,
 			   const char *code, const char *nvram,
-			   void (*fw_cb)(struct device *dev,
+			   void (*fw_cb)(struct device *dev, int err,
 					 const struct firmware *fw,
 					 void *nvram_image, u32 nvram_len));
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
index 72373e59308e..f59642b2c935 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
@@ -2145,7 +2145,7 @@ void brcmf_fws_add_interface(struct brcmf_if *ifp)
 	struct brcmf_fws_info *fws = drvr_to_fws(ifp->drvr);
 	struct brcmf_fws_mac_descriptor *entry;
 
-	if (!ifp->ndev || fws->fcmode == BRCMF_FWS_FCMODE_NONE)
+	if (!ifp->ndev || !brcmf_fws_queue_skbs(fws))
 		return;
 
 	entry = &fws->desc.iface[ifp->ifidx];
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index f36b96dc6acd..f878706613e6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -1650,16 +1650,23 @@ static const struct brcmf_buscore_ops brcmf_pcie_buscore_ops = {
 	.write32 = brcmf_pcie_buscore_write32,
 };
 
-static void brcmf_pcie_setup(struct device *dev, const struct firmware *fw,
+static void brcmf_pcie_setup(struct device *dev, int ret,
+			     const struct firmware *fw,
 			     void *nvram, u32 nvram_len)
 {
-	struct brcmf_bus *bus = dev_get_drvdata(dev);
-	struct brcmf_pciedev *pcie_bus_dev = bus->bus_priv.pcie;
-	struct brcmf_pciedev_info *devinfo = pcie_bus_dev->devinfo;
+	struct brcmf_bus *bus;
+	struct brcmf_pciedev *pcie_bus_dev;
+	struct brcmf_pciedev_info *devinfo;
 	struct brcmf_commonring **flowrings;
-	int ret;
 	u32 i;
 
+	/* check firmware loading result */
+	if (ret)
+		goto fail;
+
+	bus = dev_get_drvdata(dev);
+	pcie_bus_dev = bus->bus_priv.pcie;
+	devinfo = pcie_bus_dev->devinfo;
 	brcmf_pcie_attach(devinfo);
 
 	/* Some of the firmwares have the size of the memory of the device
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index fc64b8913aa6..5653d6dd38f6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -3422,7 +3422,7 @@ static int brcmf_sdio_bus_preinit(struct device *dev)
 		/* otherwise, set txglomalign */
 		value = sdiodev->settings->bus.sdio.sd_sgentry_align;
 		/* SDIO ADMA requires at least 32 bit alignment */
-		value = max_t(u32, value, 4);
+		value = max_t(u32, value, ALIGNMENT);
 		err = brcmf_iovar_data_set(dev, "bus:txglomalign", &value,
 					   sizeof(u32));
 	}
@@ -3982,21 +3982,26 @@ static const struct brcmf_bus_ops brcmf_sdio_bus_ops = {
 	.get_memdump = brcmf_sdio_bus_get_memdump,
 };
 
-static void brcmf_sdio_firmware_callback(struct device *dev,
+static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 					 const struct firmware *code,
 					 void *nvram, u32 nvram_len)
 {
-	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-	struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
-	struct brcmf_sdio *bus = sdiodev->bus;
-	int err = 0;
+	struct brcmf_bus *bus_if;
+	struct brcmf_sdio_dev *sdiodev;
+	struct brcmf_sdio *bus;
 	u8 saveclk;
 
-	brcmf_dbg(TRACE, "Enter: dev=%s\n", dev_name(dev));
+	brcmf_dbg(TRACE, "Enter: dev=%s, err=%d\n", dev_name(dev), err);
+	bus_if = dev_get_drvdata(dev);
+	sdiodev = bus_if->bus_priv.sdio;
+	if (err)
+		goto fail;
 
 	if (!bus_if->drvr)
 		return;
 
+	bus = sdiodev->bus;
+
 	/* try to download image and nvram to the dongle */
 	bus->alp_only = true;
 	err = brcmf_sdio_download_firmware(bus, code, nvram, nvram_len);
@@ -4083,6 +4088,7 @@ release:
 fail:
 	brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), err);
 	device_release_driver(dev);
+	device_release_driver(&sdiodev->func[2]->dev);
 }
 
 struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index e4d545f9edee..0eea48e73331 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -1159,17 +1159,18 @@ fail:
 	return ret;
 }
 
-static void brcmf_usb_probe_phase2(struct device *dev,
+static void brcmf_usb_probe_phase2(struct device *dev, int ret,
 				   const struct firmware *fw,
 				   void *nvram, u32 nvlen)
 {
 	struct brcmf_bus *bus = dev_get_drvdata(dev);
-	struct brcmf_usbdev_info *devinfo;
-	int ret;
+	struct brcmf_usbdev_info *devinfo = bus->bus_priv.usb->devinfo;
+
+	if (ret)
+		goto error;
 
 	brcmf_dbg(USB, "Start fw downloading\n");
 
-	devinfo = bus->bus_priv.usb->devinfo;
 	ret = check_file(fw->data);
 	if (ret < 0) {
 		brcmf_err("invalid firmware\n");
diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index 4b040451a9b8..6a13303af2b7 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -246,8 +246,8 @@ MODULE_DESCRIPTION("Support for Cisco/Aironet 802.11 wireless ethernet cards.  "
 		   "Direct support for ISA/PCI/MPI cards and support for PCMCIA when used with airo_cs.");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_SUPPORTED_DEVICE("Aironet 4500, 4800 and Cisco 340/350");
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_array(rates, int, NULL, 0);
 module_param_array(ssids, charp, NULL, 0);
 module_param(auto_wep, int, 0);
@@ -3066,7 +3066,7 @@ static int airo_thread(void *data) {
 		if (ai->jobs) {
 			locked = down_interruptible(&ai->sem);
 		} else {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 
 			init_waitqueue_entry(&wait, current);
 			add_wait_queue(&ai->thr_wait, &wait);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c
index 3b3e076571d6..45e2efc70d19 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c
@@ -79,8 +79,8 @@
 /* Lowest firmware API version supported */
 #define IWL7260_UCODE_API_MIN	17
 #define IWL7265_UCODE_API_MIN	17
-#define IWL7265D_UCODE_API_MIN	17
-#define IWL3168_UCODE_API_MIN	20
+#define IWL7265D_UCODE_API_MIN	22
+#define IWL3168_UCODE_API_MIN	22
 
 /* NVM versions */
 #define IWL7260_NVM_VERSION		0x0a1d
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
index b9718c0cf174..89137717c1fc 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
@@ -74,8 +74,8 @@
 #define IWL8265_UCODE_API_MAX	30
 
 /* Lowest firmware API version supported */
-#define IWL8000_UCODE_API_MIN	17
-#define IWL8265_UCODE_API_MIN	20
+#define IWL8000_UCODE_API_MIN	22
+#define IWL8265_UCODE_API_MIN	22
 
 /* NVM versions */
 #define IWL8000_NVM_VERSION		0x0a1d
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
index 306bc967742e..77efbb78e867 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
@@ -370,6 +370,7 @@
 #define MON_DMARB_RD_DATA_ADDR		(0xa03c5c)
 
 #define DBGC_IN_SAMPLE			(0xa03c00)
+#define DBGC_OUT_CTRL			(0xa03c0c)
 
 /* enable the ID buf for read */
 #define WFPM_PS_CTL_CLR			0xA0300C
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rs.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rs.h
index 1b7d265ffb0a..a10c6aae9ab9 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rs.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rs.h
@@ -307,6 +307,11 @@ enum {
 /* Bit 1-3: LQ command color. Used to match responses to LQ commands */
 #define LQ_FLAG_COLOR_POS               1
 #define LQ_FLAG_COLOR_MSK               (7 << LQ_FLAG_COLOR_POS)
+#define LQ_FLAG_COLOR_GET(_f)		(((_f) & LQ_FLAG_COLOR_MSK) >>\
+					 LQ_FLAG_COLOR_POS)
+#define LQ_FLAGS_COLOR_INC(_c)		((((_c) + 1) << LQ_FLAG_COLOR_POS) &\
+					 LQ_FLAG_COLOR_MSK)
+#define LQ_FLAG_COLOR_SET(_f, _c)	((_c) | ((_f) & ~LQ_FLAG_COLOR_MSK))
 
 /* Bit 4-5: Tx RTS BW Signalling
  * (0) No RTS BW signalling
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
index 81b98915b1a4..1360ebfdc51b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
@@ -519,8 +519,11 @@ struct agg_tx_status {
  * bit-7 invalid rate indication
  */
 #define TX_RES_INIT_RATE_INDEX_MSK 0x0f
+#define TX_RES_RATE_TABLE_COLOR_POS 4
 #define TX_RES_RATE_TABLE_COLOR_MSK 0x70
 #define TX_RES_INV_RATE_INDEX_MSK 0x80
+#define TX_RES_RATE_TABLE_COL_GET(_f) (((_f) & TX_RES_RATE_TABLE_COLOR_MSK) >>\
+				       TX_RES_RATE_TABLE_COLOR_POS)
 
 #define IWL_MVM_TX_RES_GET_TID(_ra_tid) ((_ra_tid) & 0x0f)
 #define IWL_MVM_TX_RES_GET_RA(_ra_tid) ((_ra_tid) >> 4)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
index 7b86a4f1b574..c8712e6eea74 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
@@ -1002,14 +1002,6 @@ int iwl_mvm_fw_dbg_collect_trig(struct iwl_mvm *mvm,
 	return 0;
 }
 
-static inline void iwl_mvm_restart_early_start(struct iwl_mvm *mvm)
-{
-	if (mvm->cfg->device_family == IWL_DEVICE_FAMILY_7000)
-		iwl_clear_bits_prph(mvm->trans, MON_BUFF_SAMPLE_CTL, 0x100);
-	else
-		iwl_write_prph(mvm->trans, DBGC_IN_SAMPLE, 1);
-}
-
 int iwl_mvm_start_fw_dbg_conf(struct iwl_mvm *mvm, u8 conf_id)
 {
 	u8 *ptr;
@@ -1023,10 +1015,8 @@ int iwl_mvm_start_fw_dbg_conf(struct iwl_mvm *mvm, u8 conf_id)
 	/* EARLY START - firmware's configuration is hard coded */
 	if ((!mvm->fw->dbg_conf_tlv[conf_id] ||
 	     !mvm->fw->dbg_conf_tlv[conf_id]->num_of_hcmds) &&
-	    conf_id == FW_DBG_START_FROM_ALIVE) {
-		iwl_mvm_restart_early_start(mvm);
+	    conf_id == FW_DBG_START_FROM_ALIVE)
 		return 0;
-	}
 
 	if (!mvm->fw->dbg_conf_tlv[conf_id])
 		return -EINVAL;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index 0f1831b41915..fd2fc46e2fe5 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -1040,7 +1040,7 @@ static int iwl_mvm_mac_ctxt_send_beacon(struct iwl_mvm *mvm,
 		struct iwl_mac_beacon_cmd_v6 beacon_cmd_v6;
 		struct iwl_mac_beacon_cmd_v7 beacon_cmd;
 	} u = {};
-	struct iwl_mac_beacon_cmd beacon_cmd;
+	struct iwl_mac_beacon_cmd beacon_cmd = {};
 	struct ieee80211_tx_info *info;
 	u32 beacon_skb_len;
 	u32 rate, tx_flags;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 4e74a6b90e70..52f8d7a6a7dc 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1730,8 +1730,11 @@ int iwl_mvm_find_free_queue(struct iwl_mvm *mvm, u8 sta_id, u8 minq, u8 maxq);
  */
 static inline u32 iwl_mvm_flushable_queues(struct iwl_mvm *mvm)
 {
+	u32 cmd_queue = iwl_mvm_is_dqa_supported(mvm) ? IWL_MVM_DQA_CMD_QUEUE :
+		IWL_MVM_CMD_QUEUE;
+
 	return ((BIT(mvm->cfg->base_params->num_of_queues) - 1) &
-		~BIT(IWL_MVM_CMD_QUEUE));
+		~BIT(cmd_queue));
 }
 
 static inline
@@ -1753,6 +1756,7 @@ static inline void iwl_mvm_stop_device(struct iwl_mvm *mvm)
 	if (!iwl_mvm_has_new_tx_api(mvm))
 		iwl_free_fw_paging(mvm);
 	mvm->ucode_loaded = false;
+	mvm->fw_dbg_conf = FW_DBG_INVALID;
 	iwl_trans_stop_device(mvm->trans);
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 9ffff6ed8133..3da5ec40aaea 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -1149,21 +1149,37 @@ static void iwl_mvm_fw_error_dump_wk(struct work_struct *work)
 
 	mutex_lock(&mvm->mutex);
 
-	/* stop recording */
 	if (mvm->cfg->device_family == IWL_DEVICE_FAMILY_7000) {
+		/* stop recording */
 		iwl_set_bits_prph(mvm->trans, MON_BUFF_SAMPLE_CTL, 0x100);
+
+		iwl_mvm_fw_error_dump(mvm);
+
+		/* start recording again if the firmware is not crashed */
+		if (!test_bit(STATUS_FW_ERROR, &mvm->trans->status) &&
+		    mvm->fw->dbg_dest_tlv)
+			iwl_clear_bits_prph(mvm->trans,
+					    MON_BUFF_SAMPLE_CTL, 0x100);
 	} else {
+		u32 in_sample = iwl_read_prph(mvm->trans, DBGC_IN_SAMPLE);
+		u32 out_ctrl = iwl_read_prph(mvm->trans, DBGC_OUT_CTRL);
+
+		/* stop recording */
 		iwl_write_prph(mvm->trans, DBGC_IN_SAMPLE, 0);
-		/* wait before we collect the data till the DBGC stop */
 		udelay(100);
-	}
+		iwl_write_prph(mvm->trans, DBGC_OUT_CTRL, 0);
+		/* wait before we collect the data till the DBGC stop */
+		udelay(500);
 
-	iwl_mvm_fw_error_dump(mvm);
+		iwl_mvm_fw_error_dump(mvm);
 
-	/* start recording again if the firmware is not crashed */
-	WARN_ON_ONCE((!test_bit(STATUS_FW_ERROR, &mvm->trans->status)) &&
-		     mvm->fw->dbg_dest_tlv &&
-		     iwl_mvm_start_fw_dbg_conf(mvm, mvm->fw_dbg_conf));
+		/* start recording again if the firmware is not crashed */
+		if (!test_bit(STATUS_FW_ERROR, &mvm->trans->status) &&
+		    mvm->fw->dbg_dest_tlv) {
+			iwl_write_prph(mvm->trans, DBGC_IN_SAMPLE, in_sample);
+			iwl_write_prph(mvm->trans, DBGC_OUT_CTRL, out_ctrl);
+		}
+	}
 
 	mutex_unlock(&mvm->mutex);
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index 7788eefcd2bd..aa785cf3cf68 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
@@ -2,7 +2,7 @@
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
- * Copyright(c) 2016 Intel Deutschland GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -1083,34 +1083,6 @@ static void rs_get_lower_rate_down_column(struct iwl_lq_sta *lq_sta,
 		rs_get_lower_rate_in_column(lq_sta, rate);
 }
 
-/* Check if both rates are identical
- * allow_ant_mismatch enables matching a SISO rate on ANT_A or ANT_B
- * with a rate indicating STBC/BFER and ANT_AB.
- */
-static inline bool rs_rate_equal(struct rs_rate *a,
-				 struct rs_rate *b,
-				 bool allow_ant_mismatch)
-
-{
-	bool ant_match = (a->ant == b->ant) && (a->stbc == b->stbc) &&
-		(a->bfer == b->bfer);
-
-	if (allow_ant_mismatch) {
-		if (a->stbc || a->bfer) {
-			WARN_ONCE(a->ant != ANT_AB, "stbc %d bfer %d ant %d",
-				  a->stbc, a->bfer, a->ant);
-			ant_match |= (b->ant == ANT_A || b->ant == ANT_B);
-		} else if (b->stbc || b->bfer) {
-			WARN_ONCE(b->ant != ANT_AB, "stbc %d bfer %d ant %d",
-				  b->stbc, b->bfer, b->ant);
-			ant_match |= (a->ant == ANT_A || a->ant == ANT_B);
-		}
-	}
-
-	return (a->type == b->type) && (a->bw == b->bw) && (a->sgi == b->sgi) &&
-		(a->ldpc == b->ldpc) && (a->index == b->index) && ant_match;
-}
-
 /* Check if both rates share the same column */
 static inline bool rs_rate_column_match(struct rs_rate *a,
 					struct rs_rate *b)
@@ -1182,12 +1154,12 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 	u32 lq_hwrate;
 	struct rs_rate lq_rate, tx_resp_rate;
 	struct iwl_scale_tbl_info *curr_tbl, *other_tbl, *tmp_tbl;
-	u8 reduced_txp = (uintptr_t)info->status.status_driver_data[0];
+	u32 tlc_info = (uintptr_t)info->status.status_driver_data[0];
+	u8 reduced_txp = tlc_info & RS_DRV_DATA_TXP_MSK;
+	u8 lq_color = RS_DRV_DATA_LQ_COLOR_GET(tlc_info);
 	u32 tx_resp_hwrate = (uintptr_t)info->status.status_driver_data[1];
 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 	struct iwl_lq_sta *lq_sta = &mvmsta->lq_sta;
-	bool allow_ant_mismatch = fw_has_api(&mvm->fw->ucode_capa,
-					     IWL_UCODE_TLV_API_LQ_SS_PARAMS);
 
 	/* Treat uninitialized rate scaling data same as non-existing. */
 	if (!lq_sta) {
@@ -1262,10 +1234,10 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 	rs_rate_from_ucode_rate(lq_hwrate, info->band, &lq_rate);
 
 	/* Here we actually compare this rate to the latest LQ command */
-	if (!rs_rate_equal(&tx_resp_rate, &lq_rate, allow_ant_mismatch)) {
+	if (lq_color != LQ_FLAG_COLOR_GET(table->flags)) {
 		IWL_DEBUG_RATE(mvm,
-			       "initial tx resp rate 0x%x does not match 0x%x\n",
-			       tx_resp_hwrate, lq_hwrate);
+			       "tx resp color 0x%x does not match 0x%x\n",
+			       lq_color, LQ_FLAG_COLOR_GET(table->flags));
 
 		/*
 		 * Since rates mis-match, the last LQ command may have failed.
@@ -3326,6 +3298,7 @@ static void rs_build_rates_table(struct iwl_mvm *mvm,
 	u8 valid_tx_ant = 0;
 	struct iwl_lq_cmd *lq_cmd = &lq_sta->lq;
 	bool toggle_ant = false;
+	u32 color;
 
 	memcpy(&rate, initial_rate, sizeof(rate));
 
@@ -3380,6 +3353,9 @@ static void rs_build_rates_table(struct iwl_mvm *mvm,
 				 num_rates, num_retries, valid_tx_ant,
 				 toggle_ant);
 
+	/* update the color of the LQ command (as a counter at bits 1-3) */
+	color = LQ_FLAGS_COLOR_INC(LQ_FLAG_COLOR_GET(lq_cmd->flags));
+	lq_cmd->flags = LQ_FLAG_COLOR_SET(lq_cmd->flags, color);
 }
 
 struct rs_bfer_active_iter_data {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h
index ee207f2c0a90..3abde1cb0303 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h
@@ -2,6 +2,7 @@
  *
  * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -357,6 +358,20 @@ struct iwl_lq_sta {
 	} pers;
 };
 
+/* ieee80211_tx_info's status_driver_data[0] is packed with lq color and txp
+ * Note, it's iwlmvm <-> mac80211 interface.
+ * bits 0-7: reduced tx power
+ * bits 8-10: LQ command's color
+ */
+#define RS_DRV_DATA_TXP_MSK 0xff
+#define RS_DRV_DATA_LQ_COLOR_POS 8
+#define RS_DRV_DATA_LQ_COLOR_MSK (7 << RS_DRV_DATA_LQ_COLOR_POS)
+#define RS_DRV_DATA_LQ_COLOR_GET(_f) (((_f) & RS_DRV_DATA_LQ_COLOR_MSK) >>\
+				      RS_DRV_DATA_LQ_COLOR_POS)
+#define RS_DRV_DATA_PACK(_c, _p) ((void *)(uintptr_t)\
+				  (((uintptr_t)_p) |\
+				   ((_c) << RS_DRV_DATA_LQ_COLOR_POS)))
+
 /* Initialize station's rate scaling information after adding station */
 void iwl_mvm_rs_rate_init(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 			  enum nl80211_band band, bool init);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index f5c786ddc526..614d67810d05 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -2120,7 +2120,8 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 	if (!iwl_mvm_is_dqa_supported(mvm))
 		return 0;
 
-	if (WARN_ON(vif->type != NL80211_IFTYPE_AP))
+	if (WARN_ON(vif->type != NL80211_IFTYPE_AP &&
+		    vif->type != NL80211_IFTYPE_ADHOC))
 		return -ENOTSUPP;
 
 	/*
@@ -2155,6 +2156,16 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 		mvmvif->cab_queue = queue;
 	} else if (!fw_has_api(&mvm->fw->ucode_capa,
 			       IWL_UCODE_TLV_API_STA_TYPE)) {
+		/*
+		 * In IBSS, ieee80211_check_queues() sets the cab_queue to be
+		 * invalid, so make sure we use the queue we want.
+		 * Note that this is done here as we want to avoid making DQA
+		 * changes in mac80211 layer.
+		 */
+		if (vif->type == NL80211_IFTYPE_ADHOC) {
+			vif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE;
+			mvmvif->cab_queue = vif->cab_queue;
+		}
 		iwl_mvm_enable_txq(mvm, vif->cab_queue, vif->cab_queue, 0,
 				   &cfg, timeout);
 	}
@@ -3321,18 +3332,15 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm,
 
 	/* Get the station from the mvm local station table */
 	mvm_sta = iwl_mvm_get_key_sta(mvm, vif, sta);
-	if (!mvm_sta) {
-		IWL_ERR(mvm, "Failed to find station\n");
-		return -EINVAL;
-	}
-	sta_id = mvm_sta->sta_id;
+	if (mvm_sta)
+		sta_id = mvm_sta->sta_id;
 
 	IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n",
 		      keyconf->keyidx, sta_id);
 
-	if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC ||
-	    keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 ||
-	    keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256)
+	if (mvm_sta && (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC ||
+			keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 ||
+			keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256))
 		return iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, true);
 
 	if (!__test_and_clear_bit(keyconf->hw_key_idx, mvm->fw_key_table)) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
index 2716cb5483bf..ad62b67dceb2 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -313,6 +313,7 @@ enum iwl_mvm_agg_state {
  *	This is basically (last acked packet++).
  * @rate_n_flags: Rate at which Tx was attempted. Holds the data between the
  *	Tx response (TX_CMD), and the block ack notification (COMPRESSED_BA).
+ * @lq_color: the color of the LQ command as it appears in tx response.
  * @amsdu_in_ampdu_allowed: true if A-MSDU in A-MPDU is allowed.
  * @state: state of the BA agreement establishment / tear down.
  * @txq_id: Tx queue used by the BA session / DQA
@@ -331,6 +332,7 @@ struct iwl_mvm_tid_data {
 	u16 next_reclaimed;
 	/* The rest is Tx AGG related */
 	u32 rate_n_flags;
+	u8 lq_color;
 	bool amsdu_in_ampdu_allowed;
 	enum iwl_mvm_agg_state state;
 	u16 txq_id;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
index f9cbd197246f..506d58104e1c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
@@ -790,11 +790,13 @@ static int iwl_mvm_tcool_set_cur_state(struct thermal_cooling_device *cdev,
 	struct iwl_mvm *mvm = (struct iwl_mvm *)(cdev->devdata);
 	int ret;
 
-	if (!mvm->ucode_loaded || !(mvm->cur_ucode == IWL_UCODE_REGULAR))
-		return -EIO;
-
 	mutex_lock(&mvm->mutex);
 
+	if (!mvm->ucode_loaded || !(mvm->cur_ucode == IWL_UCODE_REGULAR)) {
+		ret = -EIO;
+		goto unlock;
+	}
+
 	if (new_state >= ARRAY_SIZE(iwl_mvm_cdev_budgets)) {
 		ret = -EINVAL;
 		goto unlock;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index bcaceb64a6e8..f21901cd4a4f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -1323,6 +1323,7 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 	struct iwl_mvm_sta *mvmsta;
 	struct sk_buff_head skbs;
 	u8 skb_freed = 0;
+	u8 lq_color;
 	u16 next_reclaimed, seq_ctl;
 	bool is_ndp = false;
 
@@ -1405,8 +1406,9 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 		info->status.tx_time =
 			le16_to_cpu(tx_resp->wireless_media_time);
 		BUILD_BUG_ON(ARRAY_SIZE(info->status.status_driver_data) < 1);
+		lq_color = TX_RES_RATE_TABLE_COL_GET(tx_resp->tlc_info);
 		info->status.status_driver_data[0] =
-				(void *)(uintptr_t)tx_resp->reduced_tpc;
+			RS_DRV_DATA_PACK(lq_color, tx_resp->reduced_tpc);
 
 		ieee80211_tx_status(mvm->hw, skb);
 	}
@@ -1638,6 +1640,9 @@ static void iwl_mvm_rx_tx_cmd_agg(struct iwl_mvm *mvm,
 			le32_to_cpu(tx_resp->initial_rate);
 		mvmsta->tid_data[tid].tx_time =
 			le16_to_cpu(tx_resp->wireless_media_time);
+		mvmsta->tid_data[tid].lq_color =
+			(tx_resp->tlc_info & TX_RES_RATE_TABLE_COLOR_MSK) >>
+			TX_RES_RATE_TABLE_COLOR_POS;
 	}
 
 	rcu_read_unlock();
@@ -1707,6 +1712,11 @@ static void iwl_mvm_tx_reclaim(struct iwl_mvm *mvm, int sta_id, int tid,
 	iwl_mvm_check_ratid_empty(mvm, sta, tid);
 
 	freed = 0;
+
+	/* pack lq color from tid_data along the reduced txp */
+	ba_info->status.status_driver_data[0] =
+		RS_DRV_DATA_PACK(tid_data->lq_color,
+				 ba_info->status.status_driver_data[0]);
 	ba_info->status.status_driver_data[1] = (void *)(uintptr_t)rate;
 
 	skb_queue_walk(&reclaimed_skbs, skb) {
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 70acf850a9f1..93cbc7a69bcd 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -2803,7 +2803,8 @@ static struct iwl_trans_dump_data
 #ifdef CONFIG_PM_SLEEP
 static int iwl_trans_pcie_suspend(struct iwl_trans *trans)
 {
-	if (trans->runtime_pm_mode == IWL_PLAT_PM_MODE_D0I3)
+	if (trans->runtime_pm_mode == IWL_PLAT_PM_MODE_D0I3 &&
+	    (trans->system_pm_mode == IWL_PLAT_PM_MODE_D0I3))
 		return iwl_pci_fw_enter_d0i3(trans);
 
 	return 0;
@@ -2811,7 +2812,8 @@ static int iwl_trans_pcie_suspend(struct iwl_trans *trans)
 
 static void iwl_trans_pcie_resume(struct iwl_trans *trans)
 {
-	if (trans->runtime_pm_mode == IWL_PLAT_PM_MODE_D0I3)
+	if (trans->runtime_pm_mode == IWL_PLAT_PM_MODE_D0I3 &&
+	    (trans->system_pm_mode == IWL_PLAT_PM_MODE_D0I3))
 		iwl_pci_fw_exit_d0i3(trans);
 }
 #endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 9fb46a6f47cf..9c9bfbbabdf1 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -906,7 +906,7 @@ int iwl_trans_pcie_dyn_txq_alloc(struct iwl_trans *trans,
 
 	if (WARN_ON(iwl_rx_packet_payload_len(hcmd.resp_pkt) != sizeof(*rsp))) {
 		ret = -EINVAL;
-		goto error;
+		goto error_free_resp;
 	}
 
 	rsp = (void *)hcmd.resp_pkt->data;
@@ -915,13 +915,13 @@ int iwl_trans_pcie_dyn_txq_alloc(struct iwl_trans *trans,
 	if (qid > ARRAY_SIZE(trans_pcie->txq)) {
 		WARN_ONCE(1, "queue index %d unsupported", qid);
 		ret = -EIO;
-		goto error;
+		goto error_free_resp;
 	}
 
 	if (test_and_set_bit(qid, trans_pcie->queue_used)) {
 		WARN_ONCE(1, "queue %d already used", qid);
 		ret = -EIO;
-		goto error;
+		goto error_free_resp;
 	}
 
 	txq->id = qid;
@@ -934,8 +934,11 @@ int iwl_trans_pcie_dyn_txq_alloc(struct iwl_trans *trans,
 			   (txq->write_ptr) | (qid << 16));
 	IWL_DEBUG_TX_QUEUES(trans, "Activate queue %d\n", qid);
 
+	iwl_free_resp(&hcmd);
 	return qid;
 
+error_free_resp:
+	iwl_free_resp(&hcmd);
 error:
 	iwl_pcie_gen2_txq_free_memory(trans, txq);
 	return ret;
diff --git a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
index b2c6b065b542..ff153ce29539 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
@@ -2544,7 +2544,7 @@ static int prism2_ioctl_priv_prism2_param(struct net_device *dev,
 			ret = -EINVAL;
 		}
 		if (local->iw_mode == IW_MODE_MASTER) {
-			wait_queue_t __wait;
+			wait_queue_entry_t __wait;
 			init_waitqueue_entry(&__wait, current);
 			add_wait_queue(&local->hostscan_wq, &__wait);
 			set_current_state(TASK_INTERRUPTIBLE);
diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c
index 544fc09dcb62..1372b20f931e 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_main.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_main.c
@@ -73,7 +73,7 @@ struct net_device * hostap_add_interface(struct local_info *local,
 	dev->mem_end = mdev->mem_end;
 
 	hostap_setup_dev(dev, local, type);
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 
 	sprintf(dev->name, "%s%s", prefix, name);
 	if (!rtnl_locked)
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 002b25cff5b6..c854a557998b 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2861,7 +2861,7 @@ static const struct net_device_ops hwsim_netdev_ops = {
 static void hwsim_mon_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &hwsim_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	ether_setup(dev);
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->type = ARPHRD_IEEE80211_RADIOTAP;
diff --git a/drivers/net/wireless/marvell/libertas/main.c b/drivers/net/wireless/marvell/libertas/main.c
index e3500203715c..dde065d0d5c1 100644
--- a/drivers/net/wireless/marvell/libertas/main.c
+++ b/drivers/net/wireless/marvell/libertas/main.c
@@ -453,7 +453,7 @@ static int lbs_thread(void *data)
 {
 	struct net_device *dev = data;
 	struct lbs_private *priv = dev->ml_priv;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	lbs_deb_enter(LBS_DEB_THREAD);
 
diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index dd87b9ff64c3..39b6b5e3f6e0 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -1280,7 +1280,7 @@ void mwifiex_init_priv_params(struct mwifiex_private *priv,
 			      struct net_device *dev)
 {
 	dev->netdev_ops = &mwifiex_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	/* Initialize private structure */
 	priv->current_key_index = 0;
 	priv->media_connected = false;
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 530586be05b4..5b1d2e8402d9 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -199,6 +199,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
 	unsigned long   remaining_credit;
 	struct timer_list credit_timeout;
 	u64 credit_window_start;
+	bool rate_limited;
 
 	/* Statistics */
 	struct xenvif_stats stats;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 8397f6c92451..e322a862ddfe 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -106,7 +106,11 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
 
 	if (work_done < budget) {
 		napi_complete_done(napi, work_done);
-		xenvif_napi_schedule_or_enable_events(queue);
+		/* If the queue is rate-limited, it shall be
+		 * rescheduled in the timer callback.
+		 */
+		if (likely(!queue->rate_limited))
+			xenvif_napi_schedule_or_enable_events(queue);
 	}
 
 	return work_done;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 602d408fa25e..5042ff8d449a 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -180,6 +180,7 @@ static void tx_add_credit(struct xenvif_queue *queue)
 		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
 
 	queue->remaining_credit = min(max_credit, max_burst);
+	queue->rate_limited = false;
 }
 
 void xenvif_tx_credit_callback(unsigned long data)
@@ -686,8 +687,10 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
 		msecs_to_jiffies(queue->credit_usec / 1000);
 
 	/* Timer could already be pending in rare cases. */
-	if (timer_pending(&queue->credit_timeout))
+	if (timer_pending(&queue->credit_timeout)) {
+		queue->rate_limited = true;
 		return true;
+	}
 
 	/* Passed the point where we can replenish credit? */
 	if (time_after_eq64(now, next_credit)) {
@@ -702,6 +705,7 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
 		mod_timer(&queue->credit_timeout,
 			  next_credit);
 		queue->credit_window_start = next_credit;
+		queue->rate_limited = true;
 
 		return true;
 	}
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 6ffc482550c1..7b61adb6270c 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1934,8 +1934,7 @@ abort_transaction_no_dev_fatal:
 	xennet_disconnect_backend(info);
 	xennet_destroy_queues(info);
  out:
-	unregister_netdev(info->netdev);
-	xennet_free_netdev(info->netdev);
+	device_unregister(&dev->dev);
 	return err;
 }
 
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index c00238491673..7b3b6fd63d7d 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c
@@ -2878,7 +2878,7 @@ static const struct intel_ntb_reg skx_reg = {
 	.link_is_up		= xeon_link_is_up,
 	.db_ioread		= skx_db_ioread,
 	.db_iowrite		= skx_db_iowrite,
-	.db_size		= sizeof(u64),
+	.db_size		= sizeof(u32),
 	.ntb_ctl		= SKX_NTBCNTL_OFFSET,
 	.mw_bar			= {2, 4},
 };
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 02ca45fdd892..10e5bf460139 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -177,14 +177,12 @@ struct ntb_transport_qp {
 	u64 rx_err_ver;
 	u64 rx_memcpy;
 	u64 rx_async;
-	u64 dma_rx_prep_err;
 	u64 tx_bytes;
 	u64 tx_pkts;
 	u64 tx_ring_full;
 	u64 tx_err_no_buf;
 	u64 tx_memcpy;
 	u64 tx_async;
-	u64 dma_tx_prep_err;
 };
 
 struct ntb_transport_mw {
@@ -254,8 +252,6 @@ enum {
 #define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
 #define NTB_QP_DEF_NUM_ENTRIES	100
 #define NTB_LINK_DOWN_TIMEOUT	10
-#define DMA_RETRIES		20
-#define DMA_OUT_RESOURCE_TO	msecs_to_jiffies(50)
 
 static void ntb_transport_rxc_db(unsigned long data);
 static const struct ntb_ctx_ops ntb_transport_ops;
@@ -516,12 +512,6 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
 	out_offset += snprintf(buf + out_offset, out_count - out_offset,
 			       "free tx - \t%u\n",
 			       ntb_transport_tx_free_entry(qp));
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
-			       "DMA tx prep err - \t%llu\n",
-			       qp->dma_tx_prep_err);
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
-			       "DMA rx prep err - \t%llu\n",
-			       qp->dma_rx_prep_err);
 
 	out_offset += snprintf(buf + out_offset, out_count - out_offset,
 			       "\n");
@@ -623,7 +613,7 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
 	if (!mw->virt_addr)
 		return -ENOMEM;
 
-	if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count)
+	if (mw_num < qp_count % mw_count)
 		num_qps_mw = qp_count / mw_count + 1;
 	else
 		num_qps_mw = qp_count / mw_count;
@@ -768,8 +758,6 @@ static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
 	qp->tx_err_no_buf = 0;
 	qp->tx_memcpy = 0;
 	qp->tx_async = 0;
-	qp->dma_tx_prep_err = 0;
-	qp->dma_rx_prep_err = 0;
 }
 
 static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
@@ -1000,7 +988,7 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
 	qp->event_handler = NULL;
 	ntb_qp_link_down_reset(qp);
 
-	if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count)
+	if (mw_num < qp_count % mw_count)
 		num_qps_mw = qp_count / mw_count + 1;
 	else
 		num_qps_mw = qp_count / mw_count;
@@ -1128,8 +1116,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 	qp_count = ilog2(qp_bitmap);
 	if (max_num_clients && max_num_clients < qp_count)
 		qp_count = max_num_clients;
-	else if (mw_count < qp_count)
-		qp_count = mw_count;
+	else if (nt->mw_count < qp_count)
+		qp_count = nt->mw_count;
 
 	qp_bitmap &= BIT_ULL(qp_count) - 1;
 
@@ -1317,7 +1305,6 @@ static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset)
 	struct dmaengine_unmap_data *unmap;
 	dma_cookie_t cookie;
 	void *buf = entry->buf;
-	int retries = 0;
 
 	len = entry->len;
 	device = chan->device;
@@ -1346,22 +1333,11 @@ static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset)
 
 	unmap->from_cnt = 1;
 
-	for (retries = 0; retries < DMA_RETRIES; retries++) {
-		txd = device->device_prep_dma_memcpy(chan,
-						     unmap->addr[1],
-						     unmap->addr[0], len,
-						     DMA_PREP_INTERRUPT);
-		if (txd)
-			break;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(DMA_OUT_RESOURCE_TO);
-	}
-
-	if (!txd) {
-		qp->dma_rx_prep_err++;
+	txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+					     unmap->addr[0], len,
+					     DMA_PREP_INTERRUPT);
+	if (!txd)
 		goto err_get_unmap;
-	}
 
 	txd->callback_result = ntb_rx_copy_callback;
 	txd->callback_param = entry;
@@ -1606,7 +1582,6 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
 	struct dmaengine_unmap_data *unmap;
 	dma_addr_t dest;
 	dma_cookie_t cookie;
-	int retries = 0;
 
 	device = chan->device;
 	dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index;
@@ -1628,21 +1603,10 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
 
 	unmap->to_cnt = 1;
 
-	for (retries = 0; retries < DMA_RETRIES; retries++) {
-		txd = device->device_prep_dma_memcpy(chan, dest,
-						     unmap->addr[0], len,
-						     DMA_PREP_INTERRUPT);
-		if (txd)
-			break;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(DMA_OUT_RESOURCE_TO);
-	}
-
-	if (!txd) {
-		qp->dma_tx_prep_err++;
+	txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
+					     DMA_PREP_INTERRUPT);
+	if (!txd)
 		goto err_get_unmap;
-	}
 
 	txd->callback_result = ntb_tx_copy_callback;
 	txd->callback_param = entry;
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 434e1d474f33..5cab2831ce99 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -90,11 +90,11 @@ MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");
 
 static unsigned int seg_order = 19; /* 512K */
 module_param(seg_order, uint, 0644);
-MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
+MODULE_PARM_DESC(seg_order, "size order [2^n] of buffer segment for testing");
 
 static unsigned int run_order = 32; /* 4G */
 module_param(run_order, uint, 0644);
-MODULE_PARM_DESC(run_order, "size order [n^2] of total data to transfer");
+MODULE_PARM_DESC(run_order, "size order [2^n] of total data to transfer");
 
 static bool use_dma; /* default to 0 */
 module_param(use_dma, bool, 0644);
diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c
index 77a48a5164ff..df431e8a0631 100644
--- a/drivers/nubus/nubus.c
+++ b/drivers/nubus/nubus.c
@@ -13,7 +13,6 @@
 #include <linux/nubus.h>
 #include <linux/errno.h>
 #include <linux/init.h>
-#include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <asm/setup.h>
@@ -34,14 +33,6 @@ extern void oss_nubus_init(void);
 
 #define NUBUS_TEST_PATTERN 0x5A932BC7
 
-/* Define this if you like to live dangerously - it is known not to
-   work on pretty much every machine except the Quadra 630 and the LC
-   III. */
-#undef I_WANT_TO_PROBE_SLOT_ZERO
-
-/* This sometimes helps combat failure to boot */
-#undef TRY_TO_DODGE_WSOD
-
 /* Globals */
 
 struct nubus_dev *nubus_devices;
@@ -101,9 +92,6 @@ static void nubus_rewind(unsigned char **ptr, int len, int map)
 {
 	unsigned char *p = *ptr;
 
-	/* Sanity check */
-	if (len > 65536)
-		pr_err("rewind of 0x%08x!\n", len);
 	while (len) {
 		do {
 			p--;
@@ -117,8 +105,6 @@ static void nubus_advance(unsigned char **ptr, int len, int map)
 {
 	unsigned char *p = *ptr;
 
-	if (len > 65536)
-		pr_err("advance of 0x%08x!\n", len);
 	while (len) {
 		while (not_useful(p, map))
 			p++;
@@ -130,10 +116,15 @@ static void nubus_advance(unsigned char **ptr, int len, int map)
 
 static void nubus_move(unsigned char **ptr, int len, int map)
 {
+	unsigned long slot_space = (unsigned long)*ptr & 0xFF000000;
+
 	if (len > 0)
 		nubus_advance(ptr, len, map);
 	else if (len < 0)
 		nubus_rewind(ptr, -len, map);
+
+	if (((unsigned long)*ptr & 0xFF000000) != slot_space)
+		pr_err("%s: moved out of slot address space!\n", __func__);
 }
 
 /* Now, functions to read the sResource tree */
@@ -454,10 +445,6 @@ nubus_get_functional_resource(struct nubus_board *board, int slot,
 	pr_info("  Function 0x%02x:\n", parent->type);
 	nubus_get_subdir(parent, &dir);
 
-	/* Apple seems to have botched the ROM on the IIx */
-	if (slot == 0 && (unsigned long)dir.base % 2)
-		dir.base += 1;
-
 	pr_debug("%s: parent is 0x%p, dir is 0x%p\n",
 	         __func__, parent->base, dir.base);
 
@@ -691,83 +678,6 @@ static int __init nubus_get_board_resource(struct nubus_board *board, int slot,
 	return 0;
 }
 
-/* Attempt to bypass the somewhat non-obvious arrangement of
-   sResources in the motherboard ROM */
-static void __init nubus_find_rom_dir(struct nubus_board* board)
-{
-	unsigned char *rp;
-	unsigned char *romdir;
-	struct nubus_dir dir;
-	struct nubus_dirent ent;
-
-	/* Check for the extra directory just under the format block */
-	rp = board->fblock;
-	nubus_rewind(&rp, 4, board->lanes);
-	if (nubus_get_rom(&rp, 4, board->lanes) != NUBUS_TEST_PATTERN) {
-		/* OK, the ROM was telling the truth */
-		board->directory = board->fblock;
-		nubus_move(&board->directory,
-			   nubus_expand32(board->doffset),
-			   board->lanes);
-		return;
-	}
-
-	/* On "slot zero", you have to walk down a few more
-	   directories to get to the equivalent of a real card's root
-	   directory.  We don't know what they were smoking when they
-	   came up with this. */
-	romdir = nubus_rom_addr(board->slot);
-	nubus_rewind(&romdir, ROM_DIR_OFFSET, board->lanes);
-	dir.base = dir.ptr = romdir;
-	dir.done = 0;
-	dir.mask = board->lanes;
-
-	/* This one points to an "Unknown Macintosh" directory */
-	if (nubus_readdir(&dir, &ent) == -1)
-		goto badrom;
-
-	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
-		printk(KERN_INFO "nubus_get_rom_dir: entry %02x %06x\n", ent.type, ent.data);
-	/* This one takes us to where we want to go. */
-	if (nubus_readdir(&dir, &ent) == -1)
-		goto badrom;
-	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
-		printk(KERN_DEBUG "nubus_get_rom_dir: entry %02x %06x\n", ent.type, ent.data);
-	nubus_get_subdir(&ent, &dir);
-
-	/* Resource ID 01, also an "Unknown Macintosh" */
-	if (nubus_readdir(&dir, &ent) == -1)
-		goto badrom;
-	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
-		printk(KERN_DEBUG "nubus_get_rom_dir: entry %02x %06x\n", ent.type, ent.data);
-
-	/* FIXME: the first one is *not* always the right one.  We
-	   suspect this has something to do with the ROM revision.
-	   "The HORROR ROM" (LC-series) uses 0x7e, while "The HORROR
-	   Continues" (Q630) uses 0x7b.  The DAFB Macs evidently use
-	   something else.  Please run "Slots" on your Mac (see
-	   include/linux/nubus.h for where to get this program) and
-	   tell us where the 'SiDirPtr' for Slot 0 is.  If you feel
-	   brave, you should also use MacsBug to walk down the ROM
-	   directories like this function does and try to find the
-	   path to that address... */
-	if (nubus_readdir(&dir, &ent) == -1)
-		goto badrom;
-	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
-		printk(KERN_DEBUG "nubus_get_rom_dir: entry %02x %06x\n", ent.type, ent.data);
-
-	/* Bwahahahaha... */
-	nubus_get_subdir(&ent, &dir);
-	board->directory = dir.base;
-	return;
-
-	/* Even more evil laughter... */
- badrom:
-	board->directory = board->fblock;
-	nubus_move(&board->directory, nubus_expand32(board->doffset), board->lanes);
-	printk(KERN_ERR "nubus_get_rom_dir: ROM weirdness!  Notify the developers...\n");
-}
-
 /* Add a board (might be many devices) to the list */
 static struct nubus_board * __init nubus_add_board(int slot, int bytelanes)
 {
@@ -828,8 +738,11 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes)
 	 * since the initial Macintosh ROM releases skipped the check.
 	 */
 
-	/* Attempt to work around slot zero weirdness */
-	nubus_find_rom_dir(board);
+	/* Set up the directory pointer */
+	board->directory = board->fblock;
+	nubus_move(&board->directory, nubus_expand32(board->doffset),
+	           board->lanes);
+
 	nubus_get_root_dir(board, &dir);
 
 	/* We're ready to rock */
@@ -849,9 +762,6 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes)
 		nubus_get_board_resource(board, slot, &ent);
 	}
 
-	/* Aaaarrrrgghh!  The LC III motherboard has *two* board
-	   resources.  I have no idea WTF to do about this. */
-
 	while (nubus_readdir(&dir, &ent) != -1) {
 		struct nubus_dev *dev;
 		struct nubus_dev **devp;
@@ -898,8 +808,6 @@ void __init nubus_probe_slot(int slot)
 			continue;
 
 		dp = *rp;
-		if(dp == 0)
-			continue;
 
 		/* The last byte of the format block consists of two
 		   nybbles which are "mirror images" of each other.
@@ -908,7 +816,7 @@ void __init nubus_probe_slot(int slot)
 			continue;
 		/* Check that this value is actually *on* one of the
 		   bytelanes it claims are valid! */
-		if ((dp & 0x0F) >= (1 << i))
+		if (not_useful(rp, dp))
 			continue;
 
 		/* Looks promising.  Let's put it on the list. */
@@ -922,10 +830,6 @@ void __init nubus_scan_bus(void)
 {
 	int slot;
 
-	/* This might not work on your machine */
-#ifdef I_WANT_TO_PROBE_SLOT_ZERO
-	nubus_probe_slot(0);
-#endif
 	for (slot = 9; slot < 15; slot++) {
 		nubus_probe_slot(slot);
 	}
@@ -943,13 +847,6 @@ static int __init nubus_init(void)
 		via_nubus_init();
 	}
 
-#ifdef TRY_TO_DODGE_WSOD
-	/* Rogue Ethernet interrupts can kill the machine if we don't
-	   do this.  Obviously this is bogus.  Hopefully the local VIA
-	   gurus can fix the real cause of the problem. */
-	mdelay(1000);
-#endif
-
 	/* And probe */
 	pr_info("NuBus: Scanning NuBus slots.\n");
 	nubus_devices = NULL;
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 9faaa9694d87..f12d23c49771 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -186,7 +186,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 	 * another kernel subsystem, and we just pass it through.
 	 */
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		goto out;
 	}
 
@@ -205,7 +205,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 					"io error in %s sector %lld, len %d,\n",
 					(rw == READ) ? "READ" : "WRITE",
 					(unsigned long long) iter.bi_sector, len);
-			bio->bi_error = err;
+			bio->bi_status = errno_to_blk_status(err);
 			break;
 		}
 	}
@@ -218,7 +218,8 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 }
 
 static int nsblk_rw_bytes(struct nd_namespace_common *ndns,
-		resource_size_t offset, void *iobuf, size_t n, int rw)
+		resource_size_t offset, void *iobuf, size_t n, int rw,
+		unsigned long flags)
 {
 	struct nd_namespace_blk *nsblk = to_nd_namespace_blk(&ndns->dev);
 	struct nd_blk_region *ndbr = to_ndbr(nsblk);
@@ -272,7 +273,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
 
 	blk_queue_make_request(q, nd_blk_make_request);
 	blk_queue_max_hw_sectors(q, UINT_MAX);
-	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
 	q->queuedata = nsblk;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 368795aad5c9..b6ba0618ea46 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -32,45 +32,53 @@ enum log_ent_request {
 };
 
 static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
-		void *buf, size_t n)
+		void *buf, size_t n, unsigned long flags)
 {
 	struct nd_btt *nd_btt = arena->nd_btt;
 	struct nd_namespace_common *ndns = nd_btt->ndns;
 
 	/* arena offsets are 4K from the base of the device */
 	offset += SZ_4K;
-	return nvdimm_read_bytes(ndns, offset, buf, n);
+	return nvdimm_read_bytes(ndns, offset, buf, n, flags);
 }
 
 static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
-		void *buf, size_t n)
+		void *buf, size_t n, unsigned long flags)
 {
 	struct nd_btt *nd_btt = arena->nd_btt;
 	struct nd_namespace_common *ndns = nd_btt->ndns;
 
 	/* arena offsets are 4K from the base of the device */
 	offset += SZ_4K;
-	return nvdimm_write_bytes(ndns, offset, buf, n);
+	return nvdimm_write_bytes(ndns, offset, buf, n, flags);
 }
 
 static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
 {
 	int ret;
 
+	/*
+	 * infooff and info2off should always be at least 512B aligned.
+	 * We rely on that to make sure rw_bytes does error clearing
+	 * correctly, so make sure that is the case.
+	 */
+	WARN_ON_ONCE(!IS_ALIGNED(arena->infooff, 512));
+	WARN_ON_ONCE(!IS_ALIGNED(arena->info2off, 512));
+
 	ret = arena_write_bytes(arena, arena->info2off, super,
-			sizeof(struct btt_sb));
+			sizeof(struct btt_sb), 0);
 	if (ret)
 		return ret;
 
 	return arena_write_bytes(arena, arena->infooff, super,
-			sizeof(struct btt_sb));
+			sizeof(struct btt_sb), 0);
 }
 
 static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
 {
 	WARN_ON(!super);
 	return arena_read_bytes(arena, arena->infooff, super,
-			sizeof(struct btt_sb));
+			sizeof(struct btt_sb), 0);
 }
 
 /*
@@ -79,16 +87,17 @@ static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
  *   mapping is in little-endian
  *   mapping contains 'E' and 'Z' flags as desired
  */
-static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping)
+static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping,
+		unsigned long flags)
 {
 	u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
 
 	WARN_ON(lba >= arena->external_nlba);
-	return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE);
+	return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE, flags);
 }
 
 static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
-			u32 z_flag, u32 e_flag)
+			u32 z_flag, u32 e_flag, unsigned long rwb_flags)
 {
 	u32 ze;
 	__le32 mapping_le;
@@ -127,11 +136,11 @@ static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
 	}
 
 	mapping_le = cpu_to_le32(mapping);
-	return __btt_map_write(arena, lba, mapping_le);
+	return __btt_map_write(arena, lba, mapping_le, rwb_flags);
 }
 
 static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
-			int *trim, int *error)
+			int *trim, int *error, unsigned long rwb_flags)
 {
 	int ret;
 	__le32 in;
@@ -140,7 +149,7 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
 
 	WARN_ON(lba >= arena->external_nlba);
 
-	ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE);
+	ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE, rwb_flags);
 	if (ret)
 		return ret;
 
@@ -189,7 +198,7 @@ static int btt_log_read_pair(struct arena_info *arena, u32 lane,
 	WARN_ON(!ent);
 	return arena_read_bytes(arena,
 			arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
-			2 * LOG_ENT_SIZE);
+			2 * LOG_ENT_SIZE, 0);
 }
 
 static struct dentry *debugfs_root;
@@ -335,7 +344,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
  * btt_flog_write is the wrapper for updating the freelist elements
  */
 static int __btt_log_write(struct arena_info *arena, u32 lane,
-			u32 sub, struct log_entry *ent)
+			u32 sub, struct log_entry *ent, unsigned long flags)
 {
 	int ret;
 	/*
@@ -350,13 +359,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane,
 	void *src = ent;
 
 	/* split the 16B write into atomic, durable halves */
-	ret = arena_write_bytes(arena, ns_off, src, log_half);
+	ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
 	if (ret)
 		return ret;
 
 	ns_off += log_half;
 	src += log_half;
-	return arena_write_bytes(arena, ns_off, src, log_half);
+	return arena_write_bytes(arena, ns_off, src, log_half, flags);
 }
 
 static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
@@ -364,7 +373,7 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
 {
 	int ret;
 
-	ret = __btt_log_write(arena, lane, sub, ent);
+	ret = __btt_log_write(arena, lane, sub, ent, NVDIMM_IO_ATOMIC);
 	if (ret)
 		return ret;
 
@@ -393,11 +402,19 @@ static int btt_map_init(struct arena_info *arena)
 	if (!zerobuf)
 		return -ENOMEM;
 
+	/*
+	 * mapoff should always be at least 512B  aligned. We rely on that to
+	 * make sure rw_bytes does error clearing correctly, so make sure that
+	 * is the case.
+	 */
+	WARN_ON_ONCE(!IS_ALIGNED(arena->mapoff, 512));
+
 	while (mapsize) {
 		size_t size = min(mapsize, chunk_size);
 
+		WARN_ON_ONCE(size < 512);
 		ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
-				size);
+				size, 0);
 		if (ret)
 			goto free;
 
@@ -417,26 +434,50 @@ static int btt_map_init(struct arena_info *arena)
  */
 static int btt_log_init(struct arena_info *arena)
 {
+	size_t logsize = arena->info2off - arena->logoff;
+	size_t chunk_size = SZ_4K, offset = 0;
+	struct log_entry log;
+	void *zerobuf;
 	int ret;
 	u32 i;
-	struct log_entry log, zerolog;
 
-	memset(&zerolog, 0, sizeof(zerolog));
+	zerobuf = kzalloc(chunk_size, GFP_KERNEL);
+	if (!zerobuf)
+		return -ENOMEM;
+	/*
+	 * logoff should always be at least 512B  aligned. We rely on that to
+	 * make sure rw_bytes does error clearing correctly, so make sure that
+	 * is the case.
+	 */
+	WARN_ON_ONCE(!IS_ALIGNED(arena->logoff, 512));
+
+	while (logsize) {
+		size_t size = min(logsize, chunk_size);
+
+		WARN_ON_ONCE(size < 512);
+		ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf,
+				size, 0);
+		if (ret)
+			goto free;
+
+		offset += size;
+		logsize -= size;
+		cond_resched();
+	}
 
 	for (i = 0; i < arena->nfree; i++) {
 		log.lba = cpu_to_le32(i);
 		log.old_map = cpu_to_le32(arena->external_nlba + i);
 		log.new_map = cpu_to_le32(arena->external_nlba + i);
 		log.seq = cpu_to_le32(LOG_SEQ_INIT);
-		ret = __btt_log_write(arena, i, 0, &log);
-		if (ret)
-			return ret;
-		ret = __btt_log_write(arena, i, 1, &zerolog);
+		ret = __btt_log_write(arena, i, 0, &log, 0);
 		if (ret)
-			return ret;
+			goto free;
 	}
 
-	return 0;
+ free:
+	kfree(zerobuf);
+	return ret;
 }
 
 static int btt_freelist_init(struct arena_info *arena)
@@ -470,7 +511,7 @@ static int btt_freelist_init(struct arena_info *arena)
 
 		/* Check if map recovery is needed */
 		ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry,
-				NULL, NULL);
+				NULL, NULL, 0);
 		if (ret)
 			return ret;
 		if ((le32_to_cpu(log_new.new_map) != map_entry) &&
@@ -480,7 +521,7 @@ static int btt_freelist_init(struct arena_info *arena)
 			 * to complete the map write. So fix up the map.
 			 */
 			ret = btt_map_write(arena, le32_to_cpu(log_new.lba),
-					le32_to_cpu(log_new.new_map), 0, 0);
+					le32_to_cpu(log_new.new_map), 0, 0, 0);
 			if (ret)
 				return ret;
 		}
@@ -875,7 +916,7 @@ static int btt_data_read(struct arena_info *arena, struct page *page,
 	u64 nsoff = to_namespace_offset(arena, lba);
 	void *mem = kmap_atomic(page);
 
-	ret = arena_read_bytes(arena, nsoff, mem + off, len);
+	ret = arena_read_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC);
 	kunmap_atomic(mem);
 
 	return ret;
@@ -888,7 +929,7 @@ static int btt_data_write(struct arena_info *arena, u32 lba,
 	u64 nsoff = to_namespace_offset(arena, lba);
 	void *mem = kmap_atomic(page);
 
-	ret = arena_write_bytes(arena, nsoff, mem + off, len);
+	ret = arena_write_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC);
 	kunmap_atomic(mem);
 
 	return ret;
@@ -931,10 +972,12 @@ static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
 		mem = kmap_atomic(bv.bv_page);
 		if (rw)
 			ret = arena_write_bytes(arena, meta_nsoff,
-					mem + bv.bv_offset, cur_len);
+					mem + bv.bv_offset, cur_len,
+					NVDIMM_IO_ATOMIC);
 		else
 			ret = arena_read_bytes(arena, meta_nsoff,
-					mem + bv.bv_offset, cur_len);
+					mem + bv.bv_offset, cur_len,
+					NVDIMM_IO_ATOMIC);
 
 		kunmap_atomic(mem);
 		if (ret)
@@ -976,7 +1019,8 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
 
 		cur_len = min(btt->sector_size, len);
 
-		ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag);
+		ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag,
+				NVDIMM_IO_ATOMIC);
 		if (ret)
 			goto out_lane;
 
@@ -1006,7 +1050,7 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
 			barrier();
 
 			ret = btt_map_read(arena, premap, &new_map, &t_flag,
-						&e_flag);
+						&e_flag, NVDIMM_IO_ATOMIC);
 			if (ret)
 				goto out_rtt;
 
@@ -1093,7 +1137,8 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
 		}
 
 		lock_map(arena, premap);
-		ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL);
+		ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL,
+				NVDIMM_IO_ATOMIC);
 		if (ret)
 			goto out_map;
 		if (old_postmap >= arena->internal_nlba) {
@@ -1110,7 +1155,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
 		if (ret)
 			goto out_map;
 
-		ret = btt_map_write(arena, premap, new_postmap, 0, 0);
+		ret = btt_map_write(arena, premap, new_postmap, 0, 0, 0);
 		if (ret)
 			goto out_map;
 
@@ -1165,7 +1210,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 	 * another kernel subsystem, and we just pass it through.
 	 */
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		goto out;
 	}
 
@@ -1187,7 +1232,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 					(op_is_write(bio_op(bio))) ? "WRITE" :
 					"READ",
 					(unsigned long long) iter.bi_sector, len);
-			bio->bi_error = err;
+			bio->bi_status = errno_to_blk_status(err);
 			break;
 		}
 	}
@@ -1252,7 +1297,6 @@ static int btt_blk_init(struct btt *btt)
 	blk_queue_make_request(btt->btt_queue, btt_make_request);
 	blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
 	blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
-	blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
 	btt->btt_queue->queuedata = btt;
 
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 4b76af2b8715..4c989bb9a8a0 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -222,13 +222,6 @@ struct device *nd_btt_create(struct nd_region *nd_region)
 	return dev;
 }
 
-static bool uuid_is_null(u8 *uuid)
-{
-	static const u8 null_uuid[16];
-
-	return (memcmp(uuid, null_uuid, 16) == 0);
-}
-
 /**
  * nd_btt_arena_is_valid - check if the metadata layout is valid
  * @nd_btt:	device with BTT geometry and backing device info
@@ -249,7 +242,7 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
 	if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0)
 		return false;
 
-	if (!uuid_is_null(super->parent_uuid))
+	if (!guid_is_null((guid_t *)&super->parent_uuid))
 		if (memcmp(super->parent_uuid, parent_uuid, 16) != 0)
 			return false;
 
@@ -273,7 +266,7 @@ static int __nd_btt_probe(struct nd_btt *nd_btt,
 	if (!btt_sb || !ndns || !nd_btt)
 		return -ENODEV;
 
-	if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb)))
+	if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0))
 		return -ENXIO;
 
 	if (nvdimm_namespace_capacity(ndns) < SZ_16M)
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 93d128da1c92..7ceb5fa4f2a1 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -228,7 +228,8 @@ u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb)
 EXPORT_SYMBOL(nd_sb_checksum);
 
 static int nsio_rw_bytes(struct nd_namespace_common *ndns,
-		resource_size_t offset, void *buf, size_t size, int rw)
+		resource_size_t offset, void *buf, size_t size, int rw,
+		unsigned long flags)
 {
 	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
 	unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
@@ -259,7 +260,8 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 		 * work around this collision.
 		 */
 		if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
-				&& (!ndns->claim || !is_nd_btt(ndns->claim))) {
+				&& !(flags & NVDIMM_IO_ATOMIC)
+				&& !ndns->claim) {
 			long cleared;
 
 			cleared = nvdimm_clear_poison(&ndns->dev,
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 77d032192bf7..03852d738eec 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -31,6 +31,7 @@ enum {
 	ND_MAX_LANES = 256,
 	SECTOR_SHIFT = 9,
 	INT_LBASIZE_ALIGNMENT = 64,
+	NVDIMM_IO_ATOMIC = 1,
 };
 
 struct nd_poison {
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 335c8175410b..a6c403600d19 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -357,7 +357,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 	if (!is_nd_pmem(nd_pfn->dev.parent))
 		return -ENODEV;
 
-	if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)))
+	if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0))
 		return -ENXIO;
 
 	if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0)
@@ -662,7 +662,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
 	checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
 	pfn_sb->checksum = cpu_to_le64(checksum);
 
-	return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
+	return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0);
 }
 
 /*
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index c544d466ea51..6b577afb1d44 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -49,19 +49,19 @@ static struct nd_region *to_region(struct pmem_device *pmem)
 	return to_nd_region(to_dev(pmem)->parent);
 }
 
-static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
-		unsigned int len)
+static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
+		phys_addr_t offset, unsigned int len)
 {
 	struct device *dev = to_dev(pmem);
 	sector_t sector;
 	long cleared;
-	int rc = 0;
+	blk_status_t rc = BLK_STS_OK;
 
 	sector = (offset - pmem->data_offset) / 512;
 
 	cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
 	if (cleared < len)
-		rc = -EIO;
+		rc = BLK_STS_IOERR;
 	if (cleared > 0 && cleared / 512) {
 		cleared /= 512;
 		dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
@@ -84,7 +84,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
 	kunmap_atomic(mem);
 }
 
-static int read_pmem(struct page *page, unsigned int off,
+static blk_status_t read_pmem(struct page *page, unsigned int off,
 		void *pmem_addr, unsigned int len)
 {
 	int rc;
@@ -93,15 +93,15 @@ static int read_pmem(struct page *page, unsigned int off,
 	rc = memcpy_mcsafe(mem + off, pmem_addr, len);
 	kunmap_atomic(mem);
 	if (rc)
-		return -EIO;
-	return 0;
+		return BLK_STS_IOERR;
+	return BLK_STS_OK;
 }
 
-static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
+static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 			unsigned int len, unsigned int off, bool is_write,
 			sector_t sector)
 {
-	int rc = 0;
+	blk_status_t rc = BLK_STS_OK;
 	bool bad_pmem = false;
 	phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
 	void *pmem_addr = pmem->virt_addr + pmem_off;
@@ -111,7 +111,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 
 	if (!is_write) {
 		if (unlikely(bad_pmem))
-			rc = -EIO;
+			rc = BLK_STS_IOERR;
 		else {
 			rc = read_pmem(page, off, pmem_addr, len);
 			flush_dcache_page(page);
@@ -149,7 +149,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 
 static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 {
-	int rc = 0;
+	blk_status_t rc = 0;
 	bool do_acct;
 	unsigned long start;
 	struct bio_vec bvec;
@@ -166,7 +166,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 				bvec.bv_offset, op_is_write(bio_op(bio)),
 				iter.bi_sector);
 		if (rc) {
-			bio->bi_error = rc;
+			bio->bi_status = rc;
 			break;
 		}
 	}
@@ -184,7 +184,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 		       struct page *page, bool is_write)
 {
 	struct pmem_device *pmem = bdev->bd_queue->queuedata;
-	int rc;
+	blk_status_t rc;
 
 	rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector);
 
@@ -197,7 +197,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	if (rc == 0)
 		page_endio(page, is_write, 0);
 
-	return rc;
+	return blk_status_to_errno(rc);
 }
 
 /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
@@ -343,7 +343,6 @@ static int pmem_attach_disk(struct device *dev,
 	blk_queue_make_request(q, pmem_make_request);
 	blk_queue_physical_block_size(q, PAGE_SIZE);
 	blk_queue_max_hw_sectors(q, UINT_MAX);
-	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
 	queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
 	q->queuedata = pmem;
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 90745a616df7..46d6cb1e03bd 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -13,18 +13,6 @@ config BLK_DEV_NVME
 	  To compile this driver as a module, choose M here: the
 	  module will be called nvme.
 
-config BLK_DEV_NVME_SCSI
-	bool "SCSI emulation for NVMe device nodes"
-	depends on NVME_CORE
-	---help---
-	  This adds support for the SG_IO ioctl on the NVMe character
-	  and block devices nodes, as well as a translation for a small
-	  number of selected SCSI commands to NVMe commands to the NVMe
-	  driver.  If you don't know what this means you probably want
-	  to say N here, unless you run a distro that abuses the SCSI
-	  emulation to provide stable device names for mount by id, like
-	  some OpenSuSE and SLES versions.
-
 config NVME_FABRICS
 	tristate
 
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index f1a7d945fbb6..cc0aacb4c8b4 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -5,7 +5,6 @@ obj-$(CONFIG_NVME_RDMA)			+= nvme-rdma.o
 obj-$(CONFIG_NVME_FC)			+= nvme-fc.o
 
 nvme-core-y				:= core.o
-nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI)	+= scsi.o
 nvme-core-$(CONFIG_NVM)			+= lightnvm.o
 
 nvme-y					+= pci.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d5e0906262ea..d70df1d0072d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -27,7 +27,6 @@
 #include <linux/nvme_ioctl.h>
 #include <linux/t10-pi.h>
 #include <linux/pm_qos.h>
-#include <scsi/sg.h>
 #include <asm/unaligned.h>
 
 #include "nvme.h"
@@ -45,7 +44,7 @@ module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
 MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
 EXPORT_SYMBOL_GPL(nvme_io_timeout);
 
-unsigned char shutdown_timeout = 5;
+static unsigned char shutdown_timeout = 5;
 module_param(shutdown_timeout, byte, 0644);
 MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
 
@@ -56,7 +55,7 @@ MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
 static int nvme_char_major;
 module_param(nvme_char_major, int, 0);
 
-static unsigned long default_ps_max_latency_us = 25000;
+static unsigned long default_ps_max_latency_us = 100000;
 module_param(default_ps_max_latency_us, ulong, 0644);
 MODULE_PARM_DESC(default_ps_max_latency_us,
 		 "max power saving latency for new devices; use PM QOS to change per device");
@@ -65,34 +64,53 @@ static bool force_apst;
 module_param(force_apst, bool, 0644);
 MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
 
+static bool streams;
+module_param(streams, bool, 0644);
+MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
+
+struct workqueue_struct *nvme_wq;
+EXPORT_SYMBOL_GPL(nvme_wq);
+
 static LIST_HEAD(nvme_ctrl_list);
 static DEFINE_SPINLOCK(dev_list_lock);
 
 static struct class *nvme_class;
 
-static int nvme_error_status(struct request *req)
+int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
+{
+	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+		return -EBUSY;
+	if (!queue_work(nvme_wq, &ctrl->reset_work))
+		return -EBUSY;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
+
+static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
+{
+	int ret;
+
+	ret = nvme_reset_ctrl(ctrl);
+	if (!ret)
+		flush_work(&ctrl->reset_work);
+	return ret;
+}
+
+static blk_status_t nvme_error_status(struct request *req)
 {
 	switch (nvme_req(req)->status & 0x7ff) {
 	case NVME_SC_SUCCESS:
-		return 0;
+		return BLK_STS_OK;
 	case NVME_SC_CAP_EXCEEDED:
-		return -ENOSPC;
-	default:
-		return -EIO;
-
-	/*
-	 * XXX: these errors are a nasty side-band protocol to
-	 * drivers/md/dm-mpath.c:noretry_error() that aren't documented
-	 * anywhere..
-	 */
-	case NVME_SC_CMD_SEQ_ERROR:
-		return -EILSEQ;
+		return BLK_STS_NOSPC;
 	case NVME_SC_ONCS_NOT_SUPPORTED:
-		return -EOPNOTSUPP;
+		return BLK_STS_NOTSUPP;
 	case NVME_SC_WRITE_FAULT:
 	case NVME_SC_READ_ERROR:
 	case NVME_SC_UNWRITTEN_BLOCK:
-		return -ENODATA;
+		return BLK_STS_MEDIUM;
+	default:
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -165,7 +183,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		switch (old_state) {
 		case NVME_CTRL_NEW:
 		case NVME_CTRL_LIVE:
-		case NVME_CTRL_RECONNECTING:
 			changed = true;
 			/* FALLTHRU */
 		default:
@@ -283,6 +300,105 @@ struct request *nvme_alloc_request(struct request_queue *q,
 }
 EXPORT_SYMBOL_GPL(nvme_alloc_request);
 
+static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
+{
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+
+	c.directive.opcode = nvme_admin_directive_send;
+	c.directive.nsid = cpu_to_le32(0xffffffff);
+	c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
+	c.directive.dtype = NVME_DIR_IDENTIFY;
+	c.directive.tdtype = NVME_DIR_STREAMS;
+	c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
+
+	return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
+}
+
+static int nvme_disable_streams(struct nvme_ctrl *ctrl)
+{
+	return nvme_toggle_streams(ctrl, false);
+}
+
+static int nvme_enable_streams(struct nvme_ctrl *ctrl)
+{
+	return nvme_toggle_streams(ctrl, true);
+}
+
+static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
+				  struct streams_directive_params *s, u32 nsid)
+{
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+	memset(s, 0, sizeof(*s));
+
+	c.directive.opcode = nvme_admin_directive_recv;
+	c.directive.nsid = cpu_to_le32(nsid);
+	c.directive.numd = sizeof(*s);
+	c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
+	c.directive.dtype = NVME_DIR_STREAMS;
+
+	return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
+}
+
+static int nvme_configure_directives(struct nvme_ctrl *ctrl)
+{
+	struct streams_directive_params s;
+	int ret;
+
+	if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
+		return 0;
+	if (!streams)
+		return 0;
+
+	ret = nvme_enable_streams(ctrl);
+	if (ret)
+		return ret;
+
+	ret = nvme_get_stream_params(ctrl, &s, 0xffffffff);
+	if (ret)
+		return ret;
+
+	ctrl->nssa = le16_to_cpu(s.nssa);
+	if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
+		dev_info(ctrl->device, "too few streams (%u) available\n",
+					ctrl->nssa);
+		nvme_disable_streams(ctrl);
+		return 0;
+	}
+
+	ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
+	dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
+	return 0;
+}
+
+/*
+ * Check if 'req' has a write hint associated with it. If it does, assign
+ * a valid namespace stream to the write.
+ */
+static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
+				     struct request *req, u16 *control,
+				     u32 *dsmgmt)
+{
+	enum rw_hint streamid = req->write_hint;
+
+	if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
+		streamid = 0;
+	else {
+		streamid--;
+		if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
+			return;
+
+		*control |= NVME_RW_DTYPE_STREAMS;
+		*dsmgmt |= streamid << 16;
+	}
+
+	if (streamid < ARRAY_SIZE(req->q->write_hints))
+		req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
+}
+
 static inline void nvme_setup_flush(struct nvme_ns *ns,
 		struct nvme_command *cmnd)
 {
@@ -291,7 +407,7 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
 	cmnd->common.nsid = cpu_to_le32(ns->ns_id);
 }
 
-static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
+static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmnd)
 {
 	unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
@@ -300,7 +416,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 
 	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
 	if (!range)
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	__rq_for_each_bio(bio, req) {
 		u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
@@ -314,7 +430,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 
 	if (WARN_ON_ONCE(n != segments)) {
 		kfree(range);
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 	memset(cmnd, 0, sizeof(*cmnd));
@@ -328,15 +444,26 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 	req->special_vec.bv_len = sizeof(*range) * segments;
 	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
-static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
-		struct nvme_command *cmnd)
+static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
+		struct request *req, struct nvme_command *cmnd)
 {
+	struct nvme_ctrl *ctrl = ns->ctrl;
 	u16 control = 0;
 	u32 dsmgmt = 0;
 
+	/*
+	 * If formated with metadata, require the block layer provide a buffer
+	 * unless this namespace is formated such that the metadata can be
+	 * stripped/generated by the controller with PRACT=1.
+	 */
+	if (ns && ns->ms &&
+	    (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) &&
+	    !blk_integrity_rq(req) && !blk_rq_is_passthrough(req))
+		return BLK_STS_NOTSUPP;
+
 	if (req->cmd_flags & REQ_FUA)
 		control |= NVME_RW_FUA;
 	if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
@@ -351,6 +478,9 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
 	cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
 	cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
 
+	if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
+		nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
+
 	if (ns->ms) {
 		switch (ns->pi_type) {
 		case NVME_NS_DPS_PI_TYPE3:
@@ -370,12 +500,13 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
 
 	cmnd->rw.control = cpu_to_le16(control);
 	cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+	return 0;
 }
 
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd)
 {
-	int ret = BLK_MQ_RQ_QUEUE_OK;
+	blk_status_t ret = BLK_STS_OK;
 
 	if (!(req->rq_flags & RQF_DONTPREP)) {
 		nvme_req(req)->retries = 0;
@@ -398,11 +529,11 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		break;
 	case REQ_OP_READ:
 	case REQ_OP_WRITE:
-		nvme_setup_rw(ns, req, cmd);
+		ret = nvme_setup_rw(ns, req, cmd);
 		break;
 	default:
 		WARN_ON_ONCE(1);
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 	cmd->common.command_id = req->tag;
@@ -555,15 +686,16 @@ int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 			result, timeout);
 }
 
-static void nvme_keep_alive_end_io(struct request *rq, int error)
+static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
 {
 	struct nvme_ctrl *ctrl = rq->end_io_data;
 
 	blk_mq_free_request(rq);
 
-	if (error) {
+	if (status) {
 		dev_err(ctrl->device,
-			"failed nvme_keep_alive_end_io error=%d\n", error);
+			"failed nvme_keep_alive_end_io error=%d\n",
+				status);
 		return;
 	}
 
@@ -599,7 +731,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
 	if (nvme_keep_alive(ctrl)) {
 		/* allocation failure, reset the controller */
 		dev_err(ctrl->device, "keep-alive failed\n");
-		ctrl->ops->reset_ctrl(ctrl);
+		nvme_reset_ctrl(ctrl);
 		return;
 	}
 }
@@ -623,7 +755,7 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
 
-int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
+static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 {
 	struct nvme_command c = { };
 	int error;
@@ -643,6 +775,77 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 	return error;
 }
 
+static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
+{
+	struct nvme_command c = { };
+	int status;
+	void *data;
+	int pos;
+	int len;
+
+	c.identify.opcode = nvme_admin_identify;
+	c.identify.nsid = cpu_to_le32(nsid);
+	c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
+
+	data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data,
+				      NVME_IDENTIFY_DATA_SIZE);
+	if (status)
+		goto free_data;
+
+	for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
+		struct nvme_ns_id_desc *cur = data + pos;
+
+		if (cur->nidl == 0)
+			break;
+
+		switch (cur->nidt) {
+		case NVME_NIDT_EUI64:
+			if (cur->nidl != NVME_NIDT_EUI64_LEN) {
+				dev_warn(ns->ctrl->device,
+					 "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
+					 cur->nidl);
+				goto free_data;
+			}
+			len = NVME_NIDT_EUI64_LEN;
+			memcpy(ns->eui, data + pos + sizeof(*cur), len);
+			break;
+		case NVME_NIDT_NGUID:
+			if (cur->nidl != NVME_NIDT_NGUID_LEN) {
+				dev_warn(ns->ctrl->device,
+					 "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
+					 cur->nidl);
+				goto free_data;
+			}
+			len = NVME_NIDT_NGUID_LEN;
+			memcpy(ns->nguid, data + pos + sizeof(*cur), len);
+			break;
+		case NVME_NIDT_UUID:
+			if (cur->nidl != NVME_NIDT_UUID_LEN) {
+				dev_warn(ns->ctrl->device,
+					 "ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
+					 cur->nidl);
+				goto free_data;
+			}
+			len = NVME_NIDT_UUID_LEN;
+			uuid_copy(&ns->uuid, data + pos + sizeof(*cur));
+			break;
+		default:
+			/* Skip unnkown types */
+			len = cur->nidl;
+			break;
+		}
+
+		len += sizeof(*cur);
+	}
+free_data:
+	kfree(data);
+	return status;
+}
+
 static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
 {
 	struct nvme_command c = { };
@@ -653,7 +856,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
 	return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
 }
 
-int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
+static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
 		struct nvme_id_ns **id)
 {
 	struct nvme_command c = { };
@@ -675,26 +878,7 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
 	return error;
 }
 
-int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
-		      void *buffer, size_t buflen, u32 *result)
-{
-	struct nvme_command c;
-	union nvme_result res;
-	int ret;
-
-	memset(&c, 0, sizeof(c));
-	c.features.opcode = nvme_admin_get_features;
-	c.features.nsid = cpu_to_le32(nsid);
-	c.features.fid = cpu_to_le32(fid);
-
-	ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0,
-			NVME_QID_ANY, 0, 0);
-	if (ret >= 0 && result)
-		*result = le32_to_cpu(res.u32);
-	return ret;
-}
-
-int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
+static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 		      void *buffer, size_t buflen, u32 *result)
 {
 	struct nvme_command c;
@@ -713,28 +897,6 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 	return ret;
 }
 
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
-{
-	struct nvme_command c = { };
-	int error;
-
-	c.common.opcode = nvme_admin_get_log_page,
-	c.common.nsid = cpu_to_le32(0xFFFFFFFF),
-	c.common.cdw10[0] = cpu_to_le32(
-			(((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
-			 NVME_LOG_SMART),
-
-	*log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
-	if (!*log)
-		return -ENOMEM;
-
-	error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
-			sizeof(struct nvme_smart_log));
-	if (error)
-		kfree(*log);
-	return error;
-}
-
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
 {
 	u32 q_count = (*count - 1) | ((*count - 1) << 16);
@@ -752,7 +914,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
 	 * access to the admin queue, as that might be only way to fix them up.
 	 */
 	if (status > 0) {
-		dev_err(ctrl->dev, "Could not set queue count (%d)\n", status);
+		dev_err(ctrl->device, "Could not set queue count (%d)\n", status);
 		*count = 0;
 	} else {
 		nr_io_queues = min(result & 0xffff, result >> 16) + 1;
@@ -870,12 +1032,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
 		return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
 	case NVME_IOCTL_SUBMIT_IO:
 		return nvme_submit_io(ns, (void __user *)arg);
-#ifdef CONFIG_BLK_DEV_NVME_SCSI
-	case SG_GET_VERSION_NUM:
-		return nvme_sg_get_version_num((void __user *)arg);
-	case SG_IO:
-		return nvme_sg_io(ns, (void __user *)arg);
-#endif
 	default:
 #ifdef CONFIG_NVM
 		if (ns->ndev)
@@ -892,10 +1048,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
 static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
 			unsigned int cmd, unsigned long arg)
 {
-	switch (cmd) {
-	case SG_IO:
-		return -ENOIOCTLCMD;
-	}
 	return nvme_ioctl(bdev, mode, cmd, arg);
 }
 #else
@@ -925,6 +1077,29 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 }
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
+static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
+		u16 bs)
+{
+	struct nvme_ns *ns = disk->private_data;
+	u16 old_ms = ns->ms;
+	u8 pi_type = 0;
+
+	ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
+	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+
+	/* PI implementation requires metadata equal t10 pi tuple size */
+	if (ns->ms == sizeof(struct t10_pi_tuple))
+		pi_type = id->dps & NVME_NS_DPS_PI_MASK;
+
+	if (blk_get_integrity(disk) &&
+	    (ns->pi_type != pi_type || ns->ms != old_ms ||
+	     bs != queue_logical_block_size(disk->queue) ||
+	     (ns->ms && ns->ext)))
+		blk_integrity_unregister(disk);
+
+	ns->pi_type = pi_type;
+}
+
 static void nvme_init_integrity(struct nvme_ns *ns)
 {
 	struct blk_integrity integrity;
@@ -951,11 +1126,21 @@ static void nvme_init_integrity(struct nvme_ns *ns)
 	blk_queue_max_integrity_segments(ns->queue, 1);
 }
 #else
+static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
+		u16 bs)
+{
+}
 static void nvme_init_integrity(struct nvme_ns *ns)
 {
 }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
+static void nvme_set_chunk_size(struct nvme_ns *ns)
+{
+	u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
+	blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
+}
+
 static void nvme_config_discard(struct nvme_ns *ns)
 {
 	struct nvme_ctrl *ctrl = ns->ctrl;
@@ -964,8 +1149,15 @@ static void nvme_config_discard(struct nvme_ns *ns)
 	BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
 			NVME_DSM_MAX_RANGES);
 
-	ns->queue->limits.discard_alignment = logical_block_size;
-	ns->queue->limits.discard_granularity = logical_block_size;
+	if (ctrl->nr_streams && ns->sws && ns->sgs) {
+		unsigned int sz = logical_block_size * ns->sws * ns->sgs;
+
+		ns->queue->limits.discard_alignment = sz;
+		ns->queue->limits.discard_granularity = sz;
+	} else {
+		ns->queue->limits.discard_alignment = logical_block_size;
+		ns->queue->limits.discard_granularity = logical_block_size;
+	}
 	blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
 	blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
@@ -989,7 +1181,15 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
 	if (ns->ctrl->vs >= NVME_VS(1, 1, 0))
 		memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui));
 	if (ns->ctrl->vs >= NVME_VS(1, 2, 0))
-		memcpy(ns->uuid, (*id)->nguid, sizeof(ns->uuid));
+		memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid));
+	if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) {
+		 /* Don't treat error as fatal we potentially
+		  * already have a NGUID or EUI-64
+		  */
+		if (nvme_identify_ns_descs(ns, ns->ns_id))
+			dev_warn(ns->ctrl->device,
+				 "%s: Identify Descriptors failed\n", __func__);
+	}
 
 	return 0;
 }
@@ -997,37 +1197,26 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
 static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 {
 	struct nvme_ns *ns = disk->private_data;
-	u8 lbaf, pi_type;
-	u16 old_ms;
-	unsigned short bs;
-
-	old_ms = ns->ms;
-	lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
-	ns->lba_shift = id->lbaf[lbaf].ds;
-	ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
-	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+	struct nvme_ctrl *ctrl = ns->ctrl;
+	u16 bs;
 
 	/*
 	 * If identify namespace failed, use default 512 byte block size so
 	 * block layer can use before failing read/write for 0 capacity.
 	 */
+	ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
 	if (ns->lba_shift == 0)
 		ns->lba_shift = 9;
 	bs = 1 << ns->lba_shift;
-	/* XXX: PI implementation requires metadata equal t10 pi tuple size */
-	pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
-					id->dps & NVME_NS_DPS_PI_MASK : 0;
+	ns->noiob = le16_to_cpu(id->noiob);
 
 	blk_mq_freeze_queue(disk->queue);
-	if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
-				ns->ms != old_ms ||
-				bs != queue_logical_block_size(disk->queue) ||
-				(ns->ms && ns->ext)))
-		blk_integrity_unregister(disk);
 
-	ns->pi_type = pi_type;
+	if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
+		nvme_prep_integrity(disk, id, bs);
 	blk_queue_logical_block_size(ns->queue, bs);
-
+	if (ns->noiob)
+		nvme_set_chunk_size(ns);
 	if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
 		nvme_init_integrity(ns);
 	if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
@@ -1035,7 +1224,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 	else
 		set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
 
-	if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+	if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
 		nvme_config_discard(ns);
 	blk_mq_unfreeze_queue(disk->queue);
 }
@@ -1271,7 +1460,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
 
 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
 {
-	unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
+	unsigned long timeout = jiffies + (shutdown_timeout * HZ);
 	u32 csts;
 	int ret;
 
@@ -1330,7 +1519,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 	 * transitioning between power states.  Therefore, when running
 	 * in any given state, we will enter the next lower-power
 	 * non-operational state after waiting 50 * (enlat + exlat)
-	 * microseconds, as long as that state's total latency is under
+	 * microseconds, as long as that state's exit latency is under
 	 * the requested maximum latency.
 	 *
 	 * We will not autonomously enter any non-operational state for
@@ -1360,7 +1549,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 	if (!table)
 		return;
 
-	if (ctrl->ps_max_latency_us == 0) {
+	if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) {
 		/* Turn off APST. */
 		apste = 0;
 		dev_dbg(ctrl->device, "APST disabled\n");
@@ -1375,7 +1564,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 		 * lowest-power state, not the number of states.
 		 */
 		for (state = (int)ctrl->npss; state >= 0; state--) {
-			u64 total_latency_us, transition_ms;
+			u64 total_latency_us, exit_latency_us, transition_ms;
 
 			if (target)
 				table->entries[state] = target;
@@ -1396,12 +1585,15 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 			      NVME_PS_FLAGS_NON_OP_STATE))
 				continue;
 
-			total_latency_us =
-				(u64)le32_to_cpu(ctrl->psd[state].entry_lat) +
-				+ le32_to_cpu(ctrl->psd[state].exit_lat);
-			if (total_latency_us > ctrl->ps_max_latency_us)
+			exit_latency_us =
+				(u64)le32_to_cpu(ctrl->psd[state].exit_lat);
+			if (exit_latency_us > ctrl->ps_max_latency_us)
 				continue;
 
+			total_latency_us =
+				exit_latency_us +
+				le32_to_cpu(ctrl->psd[state].entry_lat);
+
 			/*
 			 * This state is good.  Use it as the APST idle
 			 * target for higher power states.
@@ -1513,6 +1705,31 @@ static bool quirk_matches(const struct nvme_id_ctrl *id,
 		string_matches(id->fr, q->fr, sizeof(id->fr));
 }
 
+static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+{
+	size_t nqnlen;
+	int off;
+
+	nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
+	if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
+		strcpy(ctrl->subnqn, id->subnqn);
+		return;
+	}
+
+	if (ctrl->vs >= NVME_VS(1, 2, 1))
+		dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
+
+	/* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
+	off = snprintf(ctrl->subnqn, NVMF_NQN_SIZE,
+			"nqn.2014.08.org.nvmexpress:%4x%4x",
+			le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
+	memcpy(ctrl->subnqn + off, id->sn, sizeof(id->sn));
+	off += sizeof(id->sn);
+	memcpy(ctrl->subnqn + off, id->mn, sizeof(id->mn));
+	off += sizeof(id->mn);
+	memset(ctrl->subnqn + off, 0, sizeof(ctrl->subnqn) - off);
+}
+
 /*
  * Initialize the cached copies of the Identify data and various controller
  * register in our nvme_ctrl structure.  This should be called as soon as
@@ -1524,7 +1741,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	u64 cap;
 	int ret, page_shift;
 	u32 max_hw_sectors;
-	u8 prev_apsta;
+	bool prev_apst_enabled;
 
 	ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
 	if (ret) {
@@ -1548,6 +1765,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 		return -EIO;
 	}
 
+	nvme_init_subnqn(ctrl, id);
+
 	if (!ctrl->identified) {
 		/*
 		 * Check for quirks.  Quirk can depend on firmware version,
@@ -1567,7 +1786,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	}
 
 	if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
-		dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
+		dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
 		ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
 	}
 
@@ -1592,20 +1811,21 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	ctrl->kas = le16_to_cpu(id->kas);
 
 	ctrl->npss = id->npss;
-	prev_apsta = ctrl->apsta;
+	ctrl->apsta = id->apsta;
+	prev_apst_enabled = ctrl->apst_enabled;
 	if (ctrl->quirks & NVME_QUIRK_NO_APST) {
 		if (force_apst && id->apsta) {
-			dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
-			ctrl->apsta = 1;
+			dev_warn(ctrl->device, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
+			ctrl->apst_enabled = true;
 		} else {
-			ctrl->apsta = 0;
+			ctrl->apst_enabled = false;
 		}
 	} else {
-		ctrl->apsta = id->apsta;
+		ctrl->apst_enabled = id->apsta;
 	}
 	memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
 
-	if (ctrl->ops->is_fabrics) {
+	if (ctrl->ops->flags & NVME_F_FABRICS) {
 		ctrl->icdoff = le16_to_cpu(id->icdoff);
 		ctrl->ioccsz = le32_to_cpu(id->ioccsz);
 		ctrl->iorcsz = le32_to_cpu(id->iorcsz);
@@ -1619,22 +1839,25 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 			ret = -EINVAL;
 
 		if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
-			dev_err(ctrl->dev,
+			dev_err(ctrl->device,
 				"keep-alive support is mandatory for fabrics\n");
 			ret = -EINVAL;
 		}
 	} else {
 		ctrl->cntlid = le16_to_cpu(id->cntlid);
+		ctrl->hmpre = le32_to_cpu(id->hmpre);
+		ctrl->hmmin = le32_to_cpu(id->hmmin);
 	}
 
 	kfree(id);
 
-	if (ctrl->apsta && !prev_apsta)
+	if (ctrl->apst_enabled && !prev_apst_enabled)
 		dev_pm_qos_expose_latency_tolerance(ctrl->device);
-	else if (!ctrl->apsta && prev_apsta)
+	else if (!ctrl->apst_enabled && prev_apst_enabled)
 		dev_pm_qos_hide_latency_tolerance(ctrl->device);
 
 	nvme_configure_apst(ctrl);
+	nvme_configure_directives(ctrl);
 
 	ctrl->identified = true;
 
@@ -1720,7 +1943,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
 		return nvme_dev_user_cmd(ctrl, argp);
 	case NVME_IOCTL_RESET:
 		dev_warn(ctrl->device, "resetting controller\n");
-		return ctrl->ops->reset_ctrl(ctrl);
+		return nvme_reset_ctrl_sync(ctrl);
 	case NVME_IOCTL_SUBSYS_RESET:
 		return nvme_reset_subsystem(ctrl);
 	case NVME_IOCTL_RESCAN:
@@ -1746,7 +1969,7 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
 	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
 	int ret;
 
-	ret = ctrl->ops->reset_ctrl(ctrl);
+	ret = nvme_reset_ctrl_sync(ctrl);
 	if (ret < 0)
 		return ret;
 	return count;
@@ -1772,8 +1995,8 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
 	int serial_len = sizeof(ctrl->serial);
 	int model_len = sizeof(ctrl->model);
 
-	if (memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
-		return sprintf(buf, "eui.%16phN\n", ns->uuid);
+	if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+		return sprintf(buf, "eui.%16phN\n", ns->nguid);
 
 	if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
 		return sprintf(buf, "eui.%8phN\n", ns->eui);
@@ -1788,11 +2011,28 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL);
 
+static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+	return sprintf(buf, "%pU\n", ns->nguid);
+}
+static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL);
+
 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
 								char *buf)
 {
 	struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
-	return sprintf(buf, "%pU\n", ns->uuid);
+
+	/* For backward compatibility expose the NGUID to userspace if
+	 * we have no UUID set
+	 */
+	if (uuid_is_null(&ns->uuid)) {
+		printk_ratelimited(KERN_WARNING
+				   "No UUID available providing old NGUID\n");
+		return sprintf(buf, "%pU\n", ns->nguid);
+	}
+	return sprintf(buf, "%pU\n", &ns->uuid);
 }
 static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
 
@@ -1815,6 +2055,7 @@ static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
 static struct attribute *nvme_ns_attrs[] = {
 	&dev_attr_wwid.attr,
 	&dev_attr_uuid.attr,
+	&dev_attr_nguid.attr,
 	&dev_attr_eui.attr,
 	&dev_attr_nsid.attr,
 	NULL,
@@ -1827,7 +2068,12 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
 	struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
 
 	if (a == &dev_attr_uuid.attr) {
-		if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
+		if (uuid_is_null(&ns->uuid) ||
+		    !memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+			return 0;
+	}
+	if (a == &dev_attr_nguid.attr) {
+		if (!memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
 			return 0;
 	}
 	if (a == &dev_attr_eui.attr) {
@@ -1916,8 +2162,7 @@ static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
 {
 	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
 
-	return snprintf(buf, PAGE_SIZE, "%s\n",
-			ctrl->ops->get_subsysnqn(ctrl));
+	return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subnqn);
 }
 static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
 
@@ -1946,24 +2191,16 @@ static struct attribute *nvme_dev_attrs[] = {
 	NULL
 };
 
-#define CHECK_ATTR(ctrl, a, name)		\
-	if ((a) == &dev_attr_##name.attr &&	\
-	    !(ctrl)->ops->get_##name)		\
-		return 0
-
 static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
 		struct attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
 
-	if (a == &dev_attr_delete_controller.attr) {
-		if (!ctrl->ops->delete_ctrl)
-			return 0;
-	}
-
-	CHECK_ATTR(ctrl, a, subsysnqn);
-	CHECK_ATTR(ctrl, a, address);
+	if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl)
+		return 0;
+	if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
+		return 0;
 
 	return a->mode;
 }
@@ -2004,6 +2241,32 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	return ret;
 }
 
+static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
+{
+	struct streams_directive_params s;
+	int ret;
+
+	if (!ctrl->nr_streams)
+		return 0;
+
+	ret = nvme_get_stream_params(ctrl, &s, ns->ns_id);
+	if (ret)
+		return ret;
+
+	ns->sws = le32_to_cpu(s.sws);
+	ns->sgs = le16_to_cpu(s.sgs);
+
+	if (ns->sws) {
+		unsigned int bs = 1 << ns->lba_shift;
+
+		blk_queue_io_min(ns->queue, bs * ns->sws);
+		if (ns->sgs)
+			blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
+	}
+
+	return 0;
+}
+
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
 	struct nvme_ns *ns;
@@ -2033,6 +2296,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
 	nvme_set_queue_limits(ctrl, ns->queue);
+	nvme_setup_streams_ns(ctrl, ns);
 
 	sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
 
@@ -2041,7 +2305,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
 	if (nvme_nvm_ns_supported(ns, id) &&
 				nvme_nvm_register(ns, disk_name, node)) {
-		dev_warn(ctrl->dev, "%s: LightNVM init failure\n", __func__);
+		dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
 		goto out_free_id;
 	}
 
@@ -2098,7 +2362,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 		if (ns->ndev)
 			nvme_nvm_unregister_sysfs(ns);
 		del_gendisk(ns->disk);
-		blk_mq_abort_requeue_list(ns->queue);
 		blk_cleanup_queue(ns->queue);
 	}
 
@@ -2217,7 +2480,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
 	 * removal.
 	 */
 	if (ctrl->state == NVME_CTRL_LIVE)
-		schedule_work(&ctrl->scan_work);
+		queue_work(nvme_wq, &ctrl->scan_work);
 }
 EXPORT_SYMBOL_GPL(nvme_queue_scan);
 
@@ -2272,7 +2535,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 		/*FALLTHRU*/
 	case NVME_SC_ABORT_REQ:
 		++ctrl->event_limit;
-		schedule_work(&ctrl->async_event_work);
+		queue_work(nvme_wq, &ctrl->async_event_work);
 		break;
 	default:
 		break;
@@ -2295,7 +2558,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
 void nvme_queue_async_events(struct nvme_ctrl *ctrl)
 {
 	ctrl->event_limit = NVME_NR_AERS;
-	schedule_work(&ctrl->async_event_work);
+	queue_work(nvme_wq, &ctrl->async_event_work);
 }
 EXPORT_SYMBOL_GPL(nvme_queue_async_events);
 
@@ -2427,6 +2690,13 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 	struct nvme_ns *ns;
 
 	mutex_lock(&ctrl->namespaces_mutex);
+
+	/* Forcibly unquiesce queues to avoid blocking dispatch */
+	blk_mq_unquiesce_queue(ctrl->admin_q);
+
+	/* Forcibly start all queues to avoid having stuck requests */
+	blk_mq_start_hw_queues(ctrl->admin_q);
+
 	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		/*
 		 * Revalidating a dead namespace sets capacity to 0. This will
@@ -2436,8 +2706,19 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 			continue;
 		revalidate_disk(ns->disk);
 		blk_set_queue_dying(ns->queue);
-		blk_mq_abort_requeue_list(ns->queue);
-		blk_mq_start_stopped_hw_queues(ns->queue, true);
+
+		/* Forcibly unquiesce queues to avoid blocking dispatch */
+		blk_mq_unquiesce_queue(ns->queue);
+
+		/*
+		 * Forcibly start all queues to avoid having stuck requests.
+		 * Note that we must ensure the queues are not stopped
+		 * when the final removal happens.
+		 */
+		blk_mq_start_hw_queues(ns->queue);
+
+		/* draining requests in requeue list */
+		blk_mq_kick_requeue_list(ns->queue);
 	}
 	mutex_unlock(&ctrl->namespaces_mutex);
 }
@@ -2507,7 +2788,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
 
 	mutex_lock(&ctrl->namespaces_mutex);
 	list_for_each_entry(ns, &ctrl->namespaces, list) {
-		blk_mq_start_stopped_hw_queues(ns->queue, true);
+		blk_mq_unquiesce_queue(ns->queue);
 		blk_mq_kick_requeue_list(ns->queue);
 	}
 	mutex_unlock(&ctrl->namespaces_mutex);
@@ -2518,10 +2799,15 @@ int __init nvme_core_init(void)
 {
 	int result;
 
+	nvme_wq = alloc_workqueue("nvme-wq",
+			WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+	if (!nvme_wq)
+		return -ENOMEM;
+
 	result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
 							&nvme_dev_fops);
 	if (result < 0)
-		return result;
+		goto destroy_wq;
 	else if (result > 0)
 		nvme_char_major = result;
 
@@ -2533,8 +2819,10 @@ int __init nvme_core_init(void)
 
 	return 0;
 
- unregister_chrdev:
+unregister_chrdev:
 	__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+destroy_wq:
+	destroy_workqueue(nvme_wq);
 	return result;
 }
 
@@ -2542,6 +2830,7 @@ void nvme_core_exit(void)
 {
 	class_destroy(nvme_class);
 	__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+	destroy_workqueue(nvme_wq);
 }
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 990e6fb32a63..2e582a240943 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -58,7 +58,6 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn)
 
 	kref_init(&host->ref);
 	memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
-	uuid_be_gen(&host->id);
 
 	list_add_tail(&host->list, &nvmf_hosts);
 out_unlock:
@@ -75,7 +74,6 @@ static struct nvmf_host *nvmf_host_default(void)
 		return NULL;
 
 	kref_init(&host->ref);
-	uuid_be_gen(&host->id);
 	snprintf(host->nqn, NVMF_NQN_SIZE,
 		"nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id);
 
@@ -128,16 +126,6 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
 EXPORT_SYMBOL_GPL(nvmf_get_address);
 
 /**
- * nvmf_get_subsysnqn() - Get subsystem NQN
- * @ctrl:	Host NVMe controller instance which we got the NQN
- */
-const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl)
-{
-	return ctrl->opts->subsysnqn;
-}
-EXPORT_SYMBOL_GPL(nvmf_get_subsysnqn);
-
-/**
  * nvmf_reg_read32() -  NVMe Fabrics "Property Get" API function.
  * @ctrl:	Host NVMe controller instance maintaining the admin
  *		queue used to submit the property read command to
@@ -337,6 +325,24 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
 			}
 		}
 		break;
+
+	case NVME_SC_CONNECT_INVALID_HOST:
+		dev_err(ctrl->device,
+			"Connect for subsystem %s is not allowed, hostnqn: %s\n",
+			data->subsysnqn, data->hostnqn);
+		break;
+
+	case NVME_SC_CONNECT_CTRL_BUSY:
+		dev_err(ctrl->device,
+			"Connect command failed: controller is busy or not available\n");
+		break;
+
+	case NVME_SC_CONNECT_FORMAT:
+		dev_err(ctrl->device,
+			"Connect incompatible format: %d",
+			cmd->connect.recfmt);
+		break;
+
 	default:
 		dev_err(ctrl->device,
 			"Connect command failed, error wo/DNR bit: %d\n",
@@ -376,13 +382,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
 	cmd.connect.opcode = nvme_fabrics_command;
 	cmd.connect.fctype = nvme_fabrics_type_connect;
 	cmd.connect.qid = 0;
-
-	/*
-	 * fabrics spec sets a minimum of depth 32 for admin queue,
-	 * so set the queue with this depth always until
-	 * justification otherwise.
-	 */
-	cmd.connect.sqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+	cmd.connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
 
 	/*
 	 * Set keep-alive timeout in seconds granularity (ms * 1000)
@@ -395,7 +395,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
 	if (!data)
 		return -ENOMEM;
 
-	memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be));
+	uuid_copy(&data->hostid, &ctrl->opts->host->id);
 	data->cntlid = cpu_to_le16(0xffff);
 	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
 	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
@@ -454,7 +454,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
 	if (!data)
 		return -ENOMEM;
 
-	memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be));
+	uuid_copy(&data->hostid, &ctrl->opts->host->id);
 	data->cntlid = cpu_to_le16(ctrl->cntlid);
 	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
 	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
@@ -474,7 +474,7 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
 {
 	if (ctrl->opts->max_reconnects != -1 &&
-	    ctrl->opts->nr_reconnects < ctrl->opts->max_reconnects)
+	    ctrl->nr_reconnects < ctrl->opts->max_reconnects)
 		return true;
 
 	return false;
@@ -547,6 +547,7 @@ static const match_table_t opt_tokens = {
 	{ NVMF_OPT_KATO,		"keep_alive_tmo=%d"	},
 	{ NVMF_OPT_HOSTNQN,		"hostnqn=%s"		},
 	{ NVMF_OPT_HOST_TRADDR,		"host_traddr=%s"	},
+	{ NVMF_OPT_HOST_ID,		"hostid=%s"		},
 	{ NVMF_OPT_ERR,			NULL			}
 };
 
@@ -558,6 +559,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 	int token, ret = 0;
 	size_t nqnlen  = 0;
 	int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
+	uuid_t hostid;
 
 	/* Set defaults */
 	opts->queue_size = NVMF_DEF_QUEUE_SIZE;
@@ -568,6 +570,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 	if (!options)
 		return -ENOMEM;
 
+	uuid_gen(&hostid);
+
 	while ((p = strsep(&o, ",\n")) != NULL) {
 		if (!*p)
 			continue;
@@ -724,6 +728,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 			}
 			opts->host_traddr = p;
 			break;
+		case NVMF_OPT_HOST_ID:
+			p = match_strdup(args);
+			if (!p) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			if (uuid_parse(p, &hostid)) {
+				ret = -EINVAL;
+				goto out;
+			}
+			break;
 		default:
 			pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
 				p);
@@ -743,6 +758,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 		opts->host = nvmf_default_host;
 	}
 
+	uuid_copy(&opts->host->id, &hostid);
+
 out:
 	if (!opts->discovery_nqn && !opts->kato)
 		opts->kato = NVME_DEFAULT_KATO;
@@ -803,7 +820,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
 
 #define NVMF_REQUIRED_OPTS	(NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
 #define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
-				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN)
+				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
+				 NVMF_OPT_HOST_ID)
 
 static struct nvme_ctrl *
 nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
@@ -854,6 +872,15 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
 		goto out_unlock;
 	}
 
+	if (strcmp(ctrl->subnqn, opts->subsysnqn)) {
+		dev_warn(ctrl->device,
+			"controller returned incorrect NQN: \"%s\".\n",
+			ctrl->subnqn);
+		mutex_unlock(&nvmf_transports_mutex);
+		ctrl->ops->delete_ctrl(ctrl);
+		return ERR_PTR(-EINVAL);
+	}
+
 	mutex_unlock(&nvmf_transports_mutex);
 	return ctrl;
 
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index f5a9c1fb186f..bf33663218cd 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -36,7 +36,7 @@ struct nvmf_host {
 	struct kref		ref;
 	struct list_head	list;
 	char			nqn[NVMF_NQN_SIZE];
-	uuid_be			id;
+	uuid_t			id;
 };
 
 /**
@@ -56,6 +56,7 @@ enum {
 	NVMF_OPT_RECONNECT_DELAY = 1 << 9,
 	NVMF_OPT_HOST_TRADDR	= 1 << 10,
 	NVMF_OPT_CTRL_LOSS_TMO	= 1 << 11,
+	NVMF_OPT_HOST_ID	= 1 << 12,
 };
 
 /**
@@ -80,7 +81,6 @@ enum {
  * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
  * @kato:	Keep-alive timeout.
  * @host:	Virtual NVMe host, contains the NQN and Host ID.
- * @nr_reconnects: number of reconnect attempted since the last ctrl failure
  * @max_reconnects: maximum number of allowed reconnect attempts before removing
  *              the controller, (-1) means reconnect forever, zero means remove
  *              immediately;
@@ -98,7 +98,6 @@ struct nvmf_ctrl_options {
 	bool			discovery_nqn;
 	unsigned int		kato;
 	struct nvmf_host	*host;
-	int			nr_reconnects;
 	int			max_reconnects;
 };
 
@@ -140,7 +139,6 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
 int nvmf_register_transport(struct nvmf_transport_ops *ops);
 void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
 void nvmf_free_options(struct nvmf_ctrl_options *opts);
-const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
 int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
 
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 70e689bf1cad..ed87214fdc0e 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -36,7 +36,7 @@
  */
 #define NVME_FC_NR_AEN_COMMANDS	1
 #define NVME_FC_AQ_BLKMQ_DEPTH	\
-	(NVMF_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
+	(NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
 #define AEN_CMDID_BASE		(NVME_FC_AQ_BLKMQ_DEPTH + 1)
 
 enum nvme_fc_queue_flags {
@@ -45,8 +45,6 @@ enum nvme_fc_queue_flags {
 
 #define NVMEFC_QUEUE_DELAY	3		/* ms units */
 
-#define NVME_FC_MAX_CONNECT_ATTEMPTS	1
-
 struct nvme_fc_queue {
 	struct nvme_fc_ctrl	*ctrl;
 	struct device		*dev;
@@ -163,14 +161,12 @@ struct nvme_fc_ctrl {
 	struct blk_mq_tag_set	tag_set;
 
 	struct work_struct	delete_work;
-	struct work_struct	reset_work;
 	struct delayed_work	connect_work;
-	int			reconnect_delay;
-	int			connect_attempts;
 
 	struct kref		ref;
 	u32			flags;
 	u32			iocnt;
+	wait_queue_head_t	ioabort_wait;
 
 	struct nvme_fc_fcp_op	aen_ops[NVME_FC_NR_AEN_COMMANDS];
 
@@ -218,7 +214,6 @@ static LIST_HEAD(nvme_fc_lport_list);
 static DEFINE_IDA(nvme_fc_local_port_cnt);
 static DEFINE_IDA(nvme_fc_ctrl_cnt);
 
-static struct workqueue_struct *nvme_fc_wq;
 
 
 
@@ -882,8 +877,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
 	assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize);
 	/* Linux supports only Dynamic controllers */
 	assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff);
-	memcpy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id,
-		min_t(size_t, FCNVME_ASSOC_HOSTID_LEN, sizeof(uuid_be)));
+	uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id);
 	strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn,
 		min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE));
 	strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn,
@@ -1143,6 +1137,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
 /* *********************** NVME Ctrl Routines **************************** */
 
 static void __nvme_fc_final_op_cleanup(struct request *rq);
+static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
 
 static int
 nvme_fc_reinit_request(void *data, struct request *rq)
@@ -1245,8 +1240,10 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
 
 	spin_lock_irqsave(&ctrl->lock, flags);
 	if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
-		if (ctrl->flags & FCCTRL_TERMIO)
-			ctrl->iocnt--;
+		if (ctrl->flags & FCCTRL_TERMIO) {
+			if (!--ctrl->iocnt)
+				wake_up(&ctrl->ioabort_wait);
+		}
 	}
 	if (op->flags & FCOP_FLAGS_RELEASED)
 		complete_rq = true;
@@ -1269,7 +1266,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 	struct nvme_command *sqe = &op->cmd_iu.sqe;
 	__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
 	union nvme_result result;
-	bool complete_rq;
+	bool complete_rq, terminate_assoc = true;
 
 	/*
 	 * WARNING:
@@ -1298,6 +1295,14 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 	 * fabricate a CQE, the following fields will not be set as they
 	 * are not referenced:
 	 *      cqe.sqid,  cqe.sqhd,  cqe.command_id
+	 *
+	 * Failure or error of an individual i/o, in a transport
+	 * detected fashion unrelated to the nvme completion status,
+	 * potentially cause the initiator and target sides to get out
+	 * of sync on SQ head/tail (aka outstanding io count allowed).
+	 * Per FC-NVME spec, failure of an individual command requires
+	 * the connection to be terminated, which in turn requires the
+	 * association to be terminated.
 	 */
 
 	fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
@@ -1363,6 +1368,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 		goto done;
 	}
 
+	terminate_assoc = false;
+
 done:
 	if (op->flags & FCOP_FLAGS_AEN) {
 		nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
@@ -1370,19 +1377,23 @@ done:
 		atomic_set(&op->state, FCPOP_STATE_IDLE);
 		op->flags = FCOP_FLAGS_AEN;	/* clear other flags */
 		nvme_fc_ctrl_put(ctrl);
-		return;
+		goto check_error;
 	}
 
 	complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
 	if (!complete_rq) {
 		if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
-			status = cpu_to_le16(NVME_SC_ABORT_REQ);
+			status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
 			if (blk_queue_dying(rq->q))
-				status |= cpu_to_le16(NVME_SC_DNR);
+				status |= cpu_to_le16(NVME_SC_DNR << 1);
 		}
 		nvme_end_request(rq, status, result);
 	} else
 		__nvme_fc_final_op_cleanup(rq);
+
+check_error:
+	if (terminate_assoc)
+		nvme_fc_error_recovery(ctrl, "transport detected io error");
 }
 
 static int
@@ -1439,18 +1450,8 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
 {
 	struct nvme_fc_ctrl *ctrl = set->driver_data;
 	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
-	struct nvme_fc_queue *queue = &ctrl->queues[hctx_idx+1];
-
-	return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
-}
-
-static int
-nvme_fc_init_admin_request(struct blk_mq_tag_set *set, struct request *rq,
-		unsigned int hctx_idx, unsigned int numa_node)
-{
-	struct nvme_fc_ctrl *ctrl = set->driver_data;
-	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
-	struct nvme_fc_queue *queue = &ctrl->queues[0];
+	int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
+	struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
 
 	return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
 }
@@ -1748,10 +1749,14 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
 static void
 nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
 {
+	/* only proceed if in LIVE state - e.g. on first error */
+	if (ctrl->ctrl.state != NVME_CTRL_LIVE)
+		return;
+
 	dev_warn(ctrl->ctrl.device,
 		"NVME-FC{%d}: transport association error detected: %s\n",
 		ctrl->cnum, errmsg);
-	dev_info(ctrl->ctrl.device,
+	dev_warn(ctrl->ctrl.device,
 		"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
 
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
@@ -1761,10 +1766,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
 		return;
 	}
 
-	if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
-		dev_err(ctrl->ctrl.device,
-			"NVME-FC{%d}: error_recovery: Failed to schedule "
-			"reset work\n", ctrl->cnum);
+	nvme_reset_ctrl(&ctrl->ctrl);
 }
 
 static enum blk_eh_timer_return
@@ -1873,7 +1875,7 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
  * level FC exchange resource that is also outstanding. This must be
  * considered in all cleanup operations.
  */
-static int
+static blk_status_t
 nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	struct nvme_fc_fcp_op *op, u32 data_len,
 	enum nvmefc_fcp_datadir	io_dir)
@@ -1888,10 +1890,10 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	 * the target device is present
 	 */
 	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 
 	if (!nvme_fc_ctrl_get(ctrl))
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 
 	/* format the FC-NVME CMD IU and fcp_req */
 	cmdiu->connection_id = cpu_to_be64(queue->connection_id);
@@ -1939,8 +1941,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 		if (ret < 0) {
 			nvme_cleanup_cmd(op->rq);
 			nvme_fc_ctrl_put(ctrl);
-			return (ret == -ENOMEM || ret == -EAGAIN) ?
-				BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+			if (ret == -ENOMEM || ret == -EAGAIN)
+				return BLK_STS_RESOURCE;
+			return BLK_STS_IOERR;
 		}
 	}
 
@@ -1957,28 +1960,26 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 					queue->lldd_handle, &op->fcp_req);
 
 	if (ret) {
-		if (op->rq) {			/* normal request */
+		if (op->rq)			/* normal request */
 			nvme_fc_unmap_data(ctrl, op->rq, op);
-			nvme_cleanup_cmd(op->rq);
-		}
 		/* else - aen. no cleanup needed */
 
 		nvme_fc_ctrl_put(ctrl);
 
 		if (ret != -EBUSY)
-			return BLK_MQ_RQ_QUEUE_ERROR;
+			return BLK_STS_IOERR;
 
 		if (op->rq) {
 			blk_mq_stop_hw_queues(op->rq->q);
 			blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
 		}
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
-static int
+static blk_status_t
 nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
 			const struct blk_mq_queue_data *bd)
 {
@@ -1991,7 +1992,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_command *sqe = &cmdiu->sqe;
 	enum nvmefc_fcp_datadir	io_dir;
 	u32 data_len;
-	int ret;
+	blk_status_t ret;
 
 	ret = nvme_setup_cmd(ns, rq, sqe);
 	if (ret)
@@ -2046,7 +2047,7 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 	struct nvme_fc_fcp_op *aen_op;
 	unsigned long flags;
 	bool terminating = false;
-	int ret;
+	blk_status_t ret;
 
 	if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
 		return;
@@ -2078,7 +2079,6 @@ __nvme_fc_final_op_cleanup(struct request *rq)
 	op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
 			FCOP_FLAGS_COMPLETE);
 
-	nvme_cleanup_cmd(rq);
 	nvme_fc_unmap_data(ctrl, rq, op);
 	nvme_complete_rq(rq);
 	nvme_fc_ctrl_put(ctrl);
@@ -2191,9 +2191,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 	if (!opts->nr_io_queues)
 		return 0;
 
-	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
-			opts->nr_io_queues);
-
 	nvme_fc_init_io_queues(ctrl);
 
 	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
@@ -2264,9 +2261,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 	if (ctrl->queue_count == 1)
 		return 0;
 
-	dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n",
-			opts->nr_io_queues);
-
 	nvme_fc_init_io_queues(ctrl);
 
 	ret = blk_mq_reinit_tagset(&ctrl->tag_set);
@@ -2302,7 +2296,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	int ret;
 	bool changed;
 
-	ctrl->connect_attempts++;
+	++ctrl->ctrl.nr_reconnects;
 
 	/*
 	 * Create the admin queue
@@ -2399,9 +2393,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-	ctrl->connect_attempts = 0;
-
-	kref_get(&ctrl->ctrl.kref);
+	ctrl->ctrl.nr_reconnects = 0;
 
 	if (ctrl->queue_count > 1) {
 		nvme_start_queues(&ctrl->ctrl);
@@ -2487,11 +2479,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 
 	/* wait for all io that had to be aborted */
 	spin_lock_irqsave(&ctrl->lock, flags);
-	while (ctrl->iocnt) {
-		spin_unlock_irqrestore(&ctrl->lock, flags);
-		msleep(1000);
-		spin_lock_irqsave(&ctrl->lock, flags);
-	}
+	wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
 	ctrl->flags &= ~FCCTRL_TERMIO;
 	spin_unlock_irqrestore(&ctrl->lock, flags);
 
@@ -2521,7 +2509,7 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
 	struct nvme_fc_ctrl *ctrl =
 		container_of(work, struct nvme_fc_ctrl, delete_work);
 
-	cancel_work_sync(&ctrl->reset_work);
+	cancel_work_sync(&ctrl->ctrl.reset_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
 
 	/*
@@ -2532,26 +2520,32 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
 
 	/*
 	 * tear down the controller
-	 * This will result in the last reference on the nvme ctrl to
-	 * expire, calling the transport nvme_fc_nvme_ctrl_freed() callback.
-	 * From there, the transport will tear down it's logical queues and
-	 * association.
+	 * After the last reference on the nvme ctrl is removed,
+	 * the transport nvme_fc_nvme_ctrl_freed() callback will be
+	 * invoked. From there, the transport will tear down it's
+	 * logical queues and association.
 	 */
 	nvme_uninit_ctrl(&ctrl->ctrl);
 
 	nvme_put_ctrl(&ctrl->ctrl);
 }
 
-static int
-__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
+static bool
+__nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
 {
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
-		return -EBUSY;
+		return true;
 
-	if (!queue_work(nvme_fc_wq, &ctrl->delete_work))
-		return -EBUSY;
+	if (!queue_work(nvme_wq, &ctrl->delete_work))
+		return true;
 
-	return 0;
+	return false;
+}
+
+static int
+__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
+{
+	return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0;
 }
 
 /*
@@ -2569,7 +2563,7 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
 	ret = __nvme_fc_del_ctrl(ctrl);
 
 	if (!ret)
-		flush_workqueue(nvme_fc_wq);
+		flush_workqueue(nvme_wq);
 
 	nvme_put_ctrl(&ctrl->ctrl);
 
@@ -2577,83 +2571,62 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
 }
 
 static void
-nvme_fc_reset_ctrl_work(struct work_struct *work)
+nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
 {
-	struct nvme_fc_ctrl *ctrl =
-			container_of(work, struct nvme_fc_ctrl, reset_work);
-	int ret;
+	/* If we are resetting/deleting then do nothing */
+	if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
+		WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
+			ctrl->ctrl.state == NVME_CTRL_LIVE);
+		return;
+	}
 
-	/* will block will waiting for io to terminate */
-	nvme_fc_delete_association(ctrl);
+	dev_info(ctrl->ctrl.device,
+		"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
+		ctrl->cnum, status);
 
-	ret = nvme_fc_create_association(ctrl);
-	if (ret) {
+	if (nvmf_should_reconnect(&ctrl->ctrl)) {
+		dev_info(ctrl->ctrl.device,
+			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
+			ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
+		queue_delayed_work(nvme_wq, &ctrl->connect_work,
+				ctrl->ctrl.opts->reconnect_delay * HZ);
+	} else {
 		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
-			ctrl->cnum, ret);
-		if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
-			dev_warn(ctrl->ctrl.device,
 				"NVME-FC{%d}: Max reconnect attempts (%d) "
 				"reached. Removing controller\n",
-				ctrl->cnum, ctrl->connect_attempts);
-
-			if (!nvme_change_ctrl_state(&ctrl->ctrl,
-				NVME_CTRL_DELETING)) {
-				dev_err(ctrl->ctrl.device,
-					"NVME-FC{%d}: failed to change state "
-					"to DELETING\n", ctrl->cnum);
-				return;
-			}
-
-			WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
-			return;
-		}
-
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
-			ctrl->cnum, ctrl->reconnect_delay);
-		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
-				ctrl->reconnect_delay * HZ);
-	} else
-		dev_info(ctrl->ctrl.device,
-			"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
+				ctrl->cnum, ctrl->ctrl.nr_reconnects);
+		WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
+	}
 }
 
-/*
- * called by the nvme core layer, for sysfs interface that requests
- * a reset of the nvme controller
- */
-static int
-nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
+static void
+nvme_fc_reset_ctrl_work(struct work_struct *work)
 {
-	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
-
-	dev_warn(ctrl->ctrl.device,
-		"NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
-
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
-		return -EBUSY;
-
-	if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
-		return -EBUSY;
+	struct nvme_fc_ctrl *ctrl =
+		container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
+	int ret;
 
-	flush_work(&ctrl->reset_work);
+	/* will block will waiting for io to terminate */
+	nvme_fc_delete_association(ctrl);
 
-	return 0;
+	ret = nvme_fc_create_association(ctrl);
+	if (ret)
+		nvme_fc_reconnect_or_delete(ctrl, ret);
+	else
+		dev_info(ctrl->ctrl.device,
+			"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
 }
 
 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
 	.name			= "fc",
 	.module			= THIS_MODULE,
-	.is_fabrics		= true,
+	.flags			= NVME_F_FABRICS,
 	.reg_read32		= nvmf_reg_read32,
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
-	.reset_ctrl		= nvme_fc_reset_nvme_ctrl,
 	.free_ctrl		= nvme_fc_nvme_ctrl_freed,
 	.submit_async_event	= nvme_fc_submit_async_event,
 	.delete_ctrl		= nvme_fc_del_nvme_ctrl,
-	.get_subsysnqn		= nvmf_get_subsysnqn,
 	.get_address		= nvmf_get_address,
 };
 
@@ -2667,34 +2640,9 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
 				struct nvme_fc_ctrl, connect_work);
 
 	ret = nvme_fc_create_association(ctrl);
-	if (ret) {
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: Reconnect attempt failed (%d)\n",
-			ctrl->cnum, ret);
-		if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
-			dev_warn(ctrl->ctrl.device,
-				"NVME-FC{%d}: Max reconnect attempts (%d) "
-				"reached. Removing controller\n",
-				ctrl->cnum, ctrl->connect_attempts);
-
-			if (!nvme_change_ctrl_state(&ctrl->ctrl,
-				NVME_CTRL_DELETING)) {
-				dev_err(ctrl->ctrl.device,
-					"NVME-FC{%d}: failed to change state "
-					"to DELETING\n", ctrl->cnum);
-				return;
-			}
-
-			WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
-			return;
-		}
-
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
-			ctrl->cnum, ctrl->reconnect_delay);
-		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
-				ctrl->reconnect_delay * HZ);
-	} else
+	if (ret)
+		nvme_fc_reconnect_or_delete(ctrl, ret);
+	else
 		dev_info(ctrl->ctrl.device,
 			"NVME-FC{%d}: controller reconnect complete\n",
 			ctrl->cnum);
@@ -2704,7 +2652,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
 static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
 	.queue_rq	= nvme_fc_queue_rq,
 	.complete	= nvme_fc_complete_rq,
-	.init_request	= nvme_fc_init_admin_request,
+	.init_request	= nvme_fc_init_request,
 	.exit_request	= nvme_fc_exit_request,
 	.reinit_request	= nvme_fc_reinit_request,
 	.init_hctx	= nvme_fc_init_admin_hctx,
@@ -2720,6 +2668,12 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	unsigned long flags;
 	int ret, idx;
 
+	if (!(rport->remoteport.port_role &
+	    (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
+		ret = -EBADR;
+		goto out_fail;
+	}
+
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 	if (!ctrl) {
 		ret = -ENOMEM;
@@ -2743,9 +2697,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	kref_init(&ctrl->ref);
 
 	INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
-	INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
+	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
 	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
-	ctrl->reconnect_delay = opts->reconnect_delay;
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
@@ -2811,6 +2764,9 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 		nvme_uninit_ctrl(&ctrl->ctrl);
 		nvme_put_ctrl(&ctrl->ctrl);
 
+		/* Remove core ctrl ref. */
+		nvme_put_ctrl(&ctrl->ctrl);
+
 		/* as we're past the point where we transition to the ref
 		 * counting teardown path, if we return a bad pointer here,
 		 * the calling routine, thinking it's prior to the
@@ -2825,6 +2781,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 		return ERR_PTR(ret);
 	}
 
+	kref_get(&ctrl->ctrl.kref);
+
 	dev_info(ctrl->ctrl.device,
 		"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
 		ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
@@ -2961,26 +2919,13 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
 static struct nvmf_transport_ops nvme_fc_transport = {
 	.name		= "fc",
 	.required_opts	= NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR,
-	.allowed_opts	= NVMF_OPT_RECONNECT_DELAY,
+	.allowed_opts	= NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO,
 	.create_ctrl	= nvme_fc_create_ctrl,
 };
 
 static int __init nvme_fc_init_module(void)
 {
-	int ret;
-
-	nvme_fc_wq = create_workqueue("nvme_fc_wq");
-	if (!nvme_fc_wq)
-		return -ENOMEM;
-
-	ret = nvmf_register_transport(&nvme_fc_transport);
-	if (ret)
-		goto err;
-
-	return 0;
-err:
-	destroy_workqueue(nvme_fc_wq);
-	return ret;
+	return nvmf_register_transport(&nvme_fc_transport);
 }
 
 static void __exit nvme_fc_exit_module(void)
@@ -2991,8 +2936,6 @@ static void __exit nvme_fc_exit_module(void)
 
 	nvmf_unregister_transport(&nvme_fc_transport);
 
-	destroy_workqueue(nvme_fc_wq);
-
 	ida_destroy(&nvme_fc_local_port_cnt);
 	ida_destroy(&nvme_fc_ctrl_cnt);
 }
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 8c4adac6fafc..be8541335e31 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -242,7 +242,7 @@ static inline void _nvme_nvm_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
-	BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
+	BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
 }
 
@@ -367,7 +367,8 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
 
 		if (unlikely(elba > nvmdev->total_secs)) {
 			pr_err("nvm: L2P data from device is out of bounds!\n");
-			return -EINVAL;
+			ret = -EINVAL;
+			goto out;
 		}
 
 		/* Transform physical address to target address space */
@@ -464,8 +465,8 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas,
 	return ret;
 }
 
-static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
-				struct nvme_ns *ns, struct nvme_nvm_command *c)
+static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
+				    struct nvme_nvm_command *c)
 {
 	c->ph_rw.opcode = rqd->opcode;
 	c->ph_rw.nsid = cpu_to_le32(ns->ns_id);
@@ -479,7 +480,7 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
 					rqd->bio->bi_iter.bi_sector));
 }
 
-static void nvme_nvm_end_io(struct request *rq, int error)
+static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
 {
 	struct nvm_rq *rqd = rq->end_io_data;
 
@@ -503,12 +504,12 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	if (!cmd)
 		return -ENOMEM;
 
-	nvme_nvm_rqtocmd(rq, rqd, ns, cmd);
+	nvme_nvm_rqtocmd(rqd, ns, cmd);
 
 	rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
 	if (IS_ERR(rq)) {
 		kfree(cmd);
-		return -ENOMEM;
+		return PTR_ERR(rq);
 	}
 	rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
 
@@ -570,13 +571,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
 	.max_phys_sect		= 64,
 };
 
-static void nvme_nvm_end_user_vio(struct request *rq, int error)
-{
-	struct completion *waiting = rq->end_io_data;
-
-	complete(waiting);
-}
-
 static int nvme_nvm_submit_user_cmd(struct request_queue *q,
 				struct nvme_ns *ns,
 				struct nvme_nvm_command *vcmd,
@@ -607,7 +601,6 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
 	rq->timeout = timeout ? timeout : ADMIN_TIMEOUT;
 
 	rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
-	rq->end_io_data = &wait;
 
 	if (ppa_buf && ppa_len) {
 		ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
@@ -661,9 +654,7 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
 	}
 
 submit:
-	blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_user_vio);
-
-	wait_for_completion_io(&wait);
+	blk_execute_rq(q, NULL, rq, 0);
 
 	if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
 		ret = -EINTR;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 29c708ca9621..d70ff0fdd36b 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -27,12 +27,11 @@ extern unsigned char nvme_io_timeout;
 extern unsigned char admin_timeout;
 #define ADMIN_TIMEOUT	(admin_timeout * HZ)
 
-extern unsigned char shutdown_timeout;
-#define SHUTDOWN_TIMEOUT	(shutdown_timeout * HZ)
-
 #define NVME_DEFAULT_KATO	5
 #define NVME_KATO_GRACE		10
 
+extern struct workqueue_struct *nvme_wq;
+
 enum {
 	NVME_NS_LBA		= 0,
 	NVME_NS_LIGHTNVM	= 1,
@@ -131,6 +130,7 @@ struct nvme_ctrl {
 	struct device *device;	/* char device */
 	struct list_head node;
 	struct ida ns_ida;
+	struct work_struct reset_work;
 
 	struct opal_dev *opal_dev;
 
@@ -138,6 +138,7 @@ struct nvme_ctrl {
 	char serial[20];
 	char model[40];
 	char firmware_rev[8];
+	char subnqn[NVMF_NQN_SIZE];
 	u16 cntlid;
 
 	u32 ctrl_config;
@@ -147,6 +148,8 @@ struct nvme_ctrl {
 	u16 oncs;
 	u16 vid;
 	u16 oacs;
+	u16 nssa;
+	u16 nr_streams;
 	atomic_t abort_limit;
 	u8 event_limit;
 	u8 vwc;
@@ -165,6 +168,10 @@ struct nvme_ctrl {
 
 	/* Power saving configuration */
 	u64 ps_max_latency_us;
+	bool apst_enabled;
+
+	u32 hmpre;
+	u32 hmmin;
 
 	/* Fabrics only */
 	u16 sqsize;
@@ -172,12 +179,10 @@ struct nvme_ctrl {
 	u32 iorcsz;
 	u16 icdoff;
 	u16 maxcmd;
+	int nr_reconnects;
 	struct nvmf_ctrl_options *opts;
 };
 
-/*
- * An NVM Express namespace is equivalent to a SCSI LUN
- */
 struct nvme_ns {
 	struct list_head list;
 
@@ -189,14 +194,18 @@ struct nvme_ns {
 	int instance;
 
 	u8 eui[8];
-	u8 uuid[16];
+	u8 nguid[16];
+	uuid_t uuid;
 
 	unsigned ns_id;
 	int lba_shift;
 	u16 ms;
+	u16 sgs;
+	u32 sws;
 	bool ext;
 	u8 pi_type;
 	unsigned long flags;
+	u16 noiob;
 
 #define NVME_NS_REMOVING 0
 #define NVME_NS_DEAD     1
@@ -208,15 +217,15 @@ struct nvme_ns {
 struct nvme_ctrl_ops {
 	const char *name;
 	struct module *module;
-	bool is_fabrics;
+	unsigned int flags;
+#define NVME_F_FABRICS			(1 << 0)
+#define NVME_F_METADATA_SUPPORTED	(1 << 1)
 	int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
 	int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
 	int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
-	int (*reset_ctrl)(struct nvme_ctrl *ctrl);
 	void (*free_ctrl)(struct nvme_ctrl *ctrl);
 	void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx);
 	int (*delete_ctrl)(struct nvme_ctrl *ctrl);
-	const char *(*get_subsysnqn)(struct nvme_ctrl *ctrl);
 	int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
 };
 
@@ -294,7 +303,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, unsigned int flags, int qid);
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd);
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void *buf, unsigned bufflen);
@@ -308,23 +317,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void __user *ubuffer, unsigned bufflen,
 		void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
 		u32 *result, unsigned timeout);
-int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
-int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
-		struct nvme_id_ns **id);
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
-int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
-		      void *buffer, size_t buflen, u32 *result);
-int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
-		      void *buffer, size_t buflen, u32 *result);
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
 void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
 void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
-
-struct sg_io_hdr;
-
-int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
-int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
-int nvme_sg_get_version_num(int __user *ip);
+int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
 
 #ifdef CONFIG_NVM
 int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index fed803232edc..33c3b9db7d36 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -17,28 +17,15 @@
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
 #include <linux/blk-mq-pci.h>
-#include <linux/cpu.h>
-#include <linux/delay.h>
 #include <linux/dmi.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/hdreg.h>
-#include <linux/idr.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/kdev_t.h>
-#include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/mutex.h>
 #include <linux/pci.h>
 #include <linux/poison.h>
-#include <linux/ptrace.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
 #include <linux/t10-pi.h>
 #include <linux/timer.h>
 #include <linux/types.h>
@@ -49,7 +36,6 @@
 #include "nvme.h"
 
 #define NVME_Q_DEPTH		1024
-#define NVME_AQ_DEPTH		256
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
 
@@ -66,12 +52,14 @@ static bool use_cmb_sqes = true;
 module_param(use_cmb_sqes, bool, 0644);
 MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
 
-static struct workqueue_struct *nvme_workq;
+static unsigned int max_host_mem_size_mb = 128;
+module_param(max_host_mem_size_mb, uint, 0444);
+MODULE_PARM_DESC(max_host_mem_size_mb,
+	"Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
 
 struct nvme_dev;
 struct nvme_queue;
 
-static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
 
@@ -92,9 +80,8 @@ struct nvme_dev {
 	int q_depth;
 	u32 db_stride;
 	void __iomem *bar;
-	struct work_struct reset_work;
+	unsigned long bar_mapped_size;
 	struct work_struct remove_work;
-	struct timer_list watchdog_timer;
 	struct mutex shutdown_lock;
 	bool subsystem;
 	void __iomem *cmb;
@@ -104,10 +91,18 @@ struct nvme_dev {
 	u32 cmbloc;
 	struct nvme_ctrl ctrl;
 	struct completion ioq_wait;
+
+	/* shadow doorbell buffer support: */
 	u32 *dbbuf_dbs;
 	dma_addr_t dbbuf_dbs_dma_addr;
 	u32 *dbbuf_eis;
 	dma_addr_t dbbuf_eis_dma_addr;
+
+	/* host memory buffer support: */
+	u64 host_mem_size;
+	u32 nr_host_mem_descs;
+	struct nvme_host_mem_buf_desc *host_mem_descs;
+	void **host_mem_desc_bufs;
 };
 
 static inline unsigned int sq_idx(unsigned int qid, u32 stride)
@@ -185,8 +180,8 @@ static inline void _nvme_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
-	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
+	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
+	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
 	BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
 	BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
@@ -263,7 +258,7 @@ static void nvme_dbbuf_set(struct nvme_dev *dev)
 	c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr);
 
 	if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) {
-		dev_warn(dev->dev, "unable to set dbbuf\n");
+		dev_warn(dev->ctrl.device, "unable to set dbbuf\n");
 		/* Free memory and continue on */
 		nvme_dbbuf_dma_free(dev);
 	}
@@ -350,19 +345,6 @@ static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_i
 	nvmeq->tags = NULL;
 }
 
-static int nvme_admin_init_request(struct blk_mq_tag_set *set,
-		struct request *req, unsigned int hctx_idx,
-		unsigned int numa_node)
-{
-	struct nvme_dev *dev = set->driver_data;
-	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-	struct nvme_queue *nvmeq = dev->queues[0];
-
-	BUG_ON(!nvmeq);
-	iod->nvmeq = nvmeq;
-	return 0;
-}
-
 static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 			  unsigned int hctx_idx)
 {
@@ -382,7 +364,8 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
 {
 	struct nvme_dev *dev = set->driver_data;
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-	struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
+	int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
+	struct nvme_queue *nvmeq = dev->queues[queue_idx];
 
 	BUG_ON(!nvmeq);
 	iod->nvmeq = nvmeq;
@@ -427,7 +410,7 @@ static __le64 **iod_list(struct request *req)
 	return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
 }
 
-static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
+static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
 	int nseg = blk_rq_nr_phys_segments(rq);
@@ -436,7 +419,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 	if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
 		iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
 		if (!iod->sg)
-			return BLK_MQ_RQ_QUEUE_BUSY;
+			return BLK_STS_RESOURCE;
 	} else {
 		iod->sg = iod->inline_sg;
 	}
@@ -446,7 +429,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 	iod->nents = 0;
 	iod->length = size;
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
@@ -616,21 +599,21 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 	return true;
 }
 
-static int nvme_map_data(struct nvme_dev *dev, struct request *req,
+static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		struct nvme_command *cmnd)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct request_queue *q = req->q;
 	enum dma_data_direction dma_dir = rq_data_dir(req) ?
 			DMA_TO_DEVICE : DMA_FROM_DEVICE;
-	int ret = BLK_MQ_RQ_QUEUE_ERROR;
+	blk_status_t ret = BLK_STS_IOERR;
 
 	sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
 	iod->nents = blk_rq_map_sg(q, req, iod->sg);
 	if (!iod->nents)
 		goto out;
 
-	ret = BLK_MQ_RQ_QUEUE_BUSY;
+	ret = BLK_STS_RESOURCE;
 	if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
 				DMA_ATTR_NO_WARN))
 		goto out;
@@ -638,7 +621,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
 	if (!nvme_setup_prps(dev, req))
 		goto out_unmap;
 
-	ret = BLK_MQ_RQ_QUEUE_ERROR;
+	ret = BLK_STS_IOERR;
 	if (blk_integrity_rq(req)) {
 		if (blk_rq_count_integrity_sg(q, req->bio) != 1)
 			goto out_unmap;
@@ -658,7 +641,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
 	cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma);
 	if (blk_integrity_rq(req))
 		cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 
 out_unmap:
 	dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
@@ -688,7 +671,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 /*
  * NOTE: ns is NULL when called on the admin queue.
  */
-static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct nvme_ns *ns = hctx->queue->queuedata;
@@ -696,47 +679,34 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_dev *dev = nvmeq->dev;
 	struct request *req = bd->rq;
 	struct nvme_command cmnd;
-	int ret = BLK_MQ_RQ_QUEUE_OK;
-
-	/*
-	 * If formated with metadata, require the block layer provide a buffer
-	 * unless this namespace is formated such that the metadata can be
-	 * stripped/generated by the controller with PRACT=1.
-	 */
-	if (ns && ns->ms && !blk_integrity_rq(req)) {
-		if (!(ns->pi_type && ns->ms == 8) &&
-		    !blk_rq_is_passthrough(req)) {
-			blk_mq_end_request(req, -EFAULT);
-			return BLK_MQ_RQ_QUEUE_OK;
-		}
-	}
+	blk_status_t ret;
 
 	ret = nvme_setup_cmd(ns, req, &cmnd);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		return ret;
 
 	ret = nvme_init_iod(req, dev);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		goto out_free_cmd;
 
-	if (blk_rq_nr_phys_segments(req))
+	if (blk_rq_nr_phys_segments(req)) {
 		ret = nvme_map_data(dev, req, &cmnd);
-
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
-		goto out_cleanup_iod;
+		if (ret)
+			goto out_cleanup_iod;
+	}
 
 	blk_mq_start_request(req);
 
 	spin_lock_irq(&nvmeq->q_lock);
 	if (unlikely(nvmeq->cq_vector < 0)) {
-		ret = BLK_MQ_RQ_QUEUE_ERROR;
+		ret = BLK_STS_IOERR;
 		spin_unlock_irq(&nvmeq->q_lock);
 		goto out_cleanup_iod;
 	}
 	__nvme_submit_cmd(nvmeq, &cmnd);
 	nvme_process_cq(nvmeq);
 	spin_unlock_irq(&nvmeq->q_lock);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 out_cleanup_iod:
 	nvme_free_iod(dev, req);
 out_free_cmd:
@@ -759,65 +729,75 @@ static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
 	return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
 }
 
-static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
+static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
 {
-	u16 head, phase;
-
-	head = nvmeq->cq_head;
-	phase = nvmeq->cq_phase;
-
-	while (nvme_cqe_valid(nvmeq, head, phase)) {
-		struct nvme_completion cqe = nvmeq->cqes[head];
-		struct request *req;
-
-		if (++head == nvmeq->q_depth) {
-			head = 0;
-			phase = !phase;
-		}
-
-		if (tag && *tag == cqe.command_id)
-			*tag = -1;
+	u16 head = nvmeq->cq_head;
 
-		if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
-			dev_warn(nvmeq->dev->ctrl.device,
-				"invalid id %d completed on queue %d\n",
-				cqe.command_id, le16_to_cpu(cqe.sq_id));
-			continue;
-		}
+	if (likely(nvmeq->cq_vector >= 0)) {
+		if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
+						      nvmeq->dbbuf_cq_ei))
+			writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+	}
+}
 
-		/*
-		 * AEN requests are special as they don't time out and can
-		 * survive any kind of queue freeze and often don't respond to
-		 * aborts.  We don't even bother to allocate a struct request
-		 * for them but rather special case them here.
-		 */
-		if (unlikely(nvmeq->qid == 0 &&
-				cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) {
-			nvme_complete_async_event(&nvmeq->dev->ctrl,
-					cqe.status, &cqe.result);
-			continue;
-		}
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
+		struct nvme_completion *cqe)
+{
+	struct request *req;
 
-		req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
-		nvme_end_request(req, cqe.status, cqe.result);
+	if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
+		dev_warn(nvmeq->dev->ctrl.device,
+			"invalid id %d completed on queue %d\n",
+			cqe->command_id, le16_to_cpu(cqe->sq_id));
+		return;
 	}
 
-	if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
+	/*
+	 * AEN requests are special as they don't time out and can
+	 * survive any kind of queue freeze and often don't respond to
+	 * aborts.  We don't even bother to allocate a struct request
+	 * for them but rather special case them here.
+	 */
+	if (unlikely(nvmeq->qid == 0 &&
+			cqe->command_id >= NVME_AQ_BLKMQ_DEPTH)) {
+		nvme_complete_async_event(&nvmeq->dev->ctrl,
+				cqe->status, &cqe->result);
 		return;
+	}
 
-	if (likely(nvmeq->cq_vector >= 0))
-		if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
-						      nvmeq->dbbuf_cq_ei))
-			writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
-	nvmeq->cq_head = head;
-	nvmeq->cq_phase = phase;
+	req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
+	nvme_end_request(req, cqe->status, cqe->result);
+}
 
-	nvmeq->cqe_seen = 1;
+static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
+		struct nvme_completion *cqe)
+{
+	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
+		*cqe = nvmeq->cqes[nvmeq->cq_head];
+
+		if (++nvmeq->cq_head == nvmeq->q_depth) {
+			nvmeq->cq_head = 0;
+			nvmeq->cq_phase = !nvmeq->cq_phase;
+		}
+		return true;
+	}
+	return false;
 }
 
 static void nvme_process_cq(struct nvme_queue *nvmeq)
 {
-	__nvme_process_cq(nvmeq, NULL);
+	struct nvme_completion cqe;
+	int consumed = 0;
+
+	while (nvme_read_cqe(nvmeq, &cqe)) {
+		nvme_handle_cqe(nvmeq, &cqe);
+		consumed++;
+	}
+
+	if (consumed) {
+		nvme_ring_cq_doorbell(nvmeq);
+		nvmeq->cqe_seen = 1;
+	}
 }
 
 static irqreturn_t nvme_irq(int irq, void *data)
@@ -842,16 +822,28 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
 
 static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
 {
-	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
-		spin_lock_irq(&nvmeq->q_lock);
-		__nvme_process_cq(nvmeq, &tag);
-		spin_unlock_irq(&nvmeq->q_lock);
+	struct nvme_completion cqe;
+	int found = 0, consumed = 0;
 
-		if (tag == -1)
-			return 1;
-	}
+	if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+		return 0;
 
-	return 0;
+	spin_lock_irq(&nvmeq->q_lock);
+	while (nvme_read_cqe(nvmeq, &cqe)) {
+		nvme_handle_cqe(nvmeq, &cqe);
+		consumed++;
+
+		if (tag == cqe.command_id) {
+			found = 1;
+			break;
+		}
+       }
+
+	if (consumed)
+		nvme_ring_cq_doorbell(nvmeq);
+	spin_unlock_irq(&nvmeq->q_lock);
+
+	return found;
 }
 
 static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -939,7 +931,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
 	return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
 }
 
-static void abort_endio(struct request *req, int error)
+static void abort_endio(struct request *req, blk_status_t error)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct nvme_queue *nvmeq = iod->nvmeq;
@@ -950,6 +942,51 @@ static void abort_endio(struct request *req, int error)
 	blk_mq_free_request(req);
 }
 
+static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
+{
+
+	/* If true, indicates loss of adapter communication, possibly by a
+	 * NVMe Subsystem reset.
+	 */
+	bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
+
+	/* If there is a reset ongoing, we shouldn't reset again. */
+	if (dev->ctrl.state == NVME_CTRL_RESETTING)
+		return false;
+
+	/* We shouldn't reset unless the controller is on fatal error state
+	 * _or_ if we lost the communication with it.
+	 */
+	if (!(csts & NVME_CSTS_CFS) && !nssro)
+		return false;
+
+	/* If PCI error recovery process is happening, we cannot reset or
+	 * the recovery mechanism will surely fail.
+	 */
+	if (pci_channel_offline(to_pci_dev(dev->dev)))
+		return false;
+
+	return true;
+}
+
+static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
+{
+	/* Read a config register to help see what died. */
+	u16 pci_status;
+	int result;
+
+	result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
+				      &pci_status);
+	if (result == PCIBIOS_SUCCESSFUL)
+		dev_warn(dev->ctrl.device,
+			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
+			 csts, pci_status);
+	else
+		dev_warn(dev->ctrl.device,
+			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
+			 csts, result);
+}
+
 static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -957,6 +994,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	struct nvme_dev *dev = nvmeq->dev;
 	struct request *abort_req;
 	struct nvme_command cmd;
+	u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+	/*
+	 * Reset immediately if the controller is failed
+	 */
+	if (nvme_should_reset(dev, csts)) {
+		nvme_warn_reset(dev, csts);
+		nvme_dev_disable(dev, false);
+		nvme_reset_ctrl(&dev->ctrl);
+		return BLK_EH_HANDLED;
+	}
 
 	/*
 	 * Did we miss an interrupt?
@@ -993,7 +1041,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 			 "I/O %d QID %d timeout, reset controller\n",
 			 req->tag, nvmeq->qid);
 		nvme_dev_disable(dev, false);
-		nvme_reset(dev);
+		nvme_reset_ctrl(&dev->ctrl);
 
 		/*
 		 * Mark the request as handled, since the inline shutdown
@@ -1247,7 +1295,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
 	.complete	= nvme_pci_complete_rq,
 	.init_hctx	= nvme_admin_init_hctx,
 	.exit_hctx      = nvme_admin_exit_hctx,
-	.init_request	= nvme_admin_init_request,
+	.init_request	= nvme_init_request,
 	.timeout	= nvme_timeout,
 };
 
@@ -1311,6 +1359,32 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 	return 0;
 }
 
+static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+{
+	return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
+}
+
+static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+	if (size <= dev->bar_mapped_size)
+		return 0;
+	if (size > pci_resource_len(pdev, 0))
+		return -ENOMEM;
+	if (dev->bar)
+		iounmap(dev->bar);
+	dev->bar = ioremap(pci_resource_start(pdev, 0), size);
+	if (!dev->bar) {
+		dev->bar_mapped_size = 0;
+		return -ENOMEM;
+	}
+	dev->bar_mapped_size = size;
+	dev->dbs = dev->bar + NVME_REG_DBS;
+
+	return 0;
+}
+
 static int nvme_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
@@ -1318,6 +1392,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 	struct nvme_queue *nvmeq;
 
+	result = nvme_remap_bar(dev, db_bar_size(dev, 0));
+	if (result < 0)
+		return result;
+
 	dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
 						NVME_CAP_NSSRC(cap) : 0;
 
@@ -1358,66 +1436,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	return result;
 }
 
-static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
-{
-
-	/* If true, indicates loss of adapter communication, possibly by a
-	 * NVMe Subsystem reset.
-	 */
-	bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
-
-	/* If there is a reset ongoing, we shouldn't reset again. */
-	if (work_busy(&dev->reset_work))
-		return false;
-
-	/* We shouldn't reset unless the controller is on fatal error state
-	 * _or_ if we lost the communication with it.
-	 */
-	if (!(csts & NVME_CSTS_CFS) && !nssro)
-		return false;
-
-	/* If PCI error recovery process is happening, we cannot reset or
-	 * the recovery mechanism will surely fail.
-	 */
-	if (pci_channel_offline(to_pci_dev(dev->dev)))
-		return false;
-
-	return true;
-}
-
-static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
-{
-	/* Read a config register to help see what died. */
-	u16 pci_status;
-	int result;
-
-	result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
-				      &pci_status);
-	if (result == PCIBIOS_SUCCESSFUL)
-		dev_warn(dev->dev,
-			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
-			 csts, pci_status);
-	else
-		dev_warn(dev->dev,
-			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
-			 csts, result);
-}
-
-static void nvme_watchdog_timer(unsigned long data)
-{
-	struct nvme_dev *dev = (struct nvme_dev *)data;
-	u32 csts = readl(dev->bar + NVME_REG_CSTS);
-
-	/* Skip controllers under certain specific conditions. */
-	if (nvme_should_reset(dev, csts)) {
-		if (!nvme_reset(dev))
-			nvme_warn_reset(dev, csts);
-		return;
-	}
-
-	mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-}
-
 static int nvme_create_io_queues(struct nvme_dev *dev)
 {
 	unsigned i, max;
@@ -1506,19 +1524,176 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
 	if (dev->cmb) {
 		iounmap(dev->cmb);
 		dev->cmb = NULL;
+		if (dev->cmbsz) {
+			sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
+						     &dev_attr_cmb.attr, NULL);
+			dev->cmbsz = 0;
+		}
 	}
 }
 
-static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
 {
-	return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
+	size_t len = dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs);
+	struct nvme_command c;
+	u64 dma_addr;
+	int ret;
+
+	dma_addr = dma_map_single(dev->dev, dev->host_mem_descs, len,
+			DMA_TO_DEVICE);
+	if (dma_mapping_error(dev->dev, dma_addr))
+		return -ENOMEM;
+
+	memset(&c, 0, sizeof(c));
+	c.features.opcode	= nvme_admin_set_features;
+	c.features.fid		= cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
+	c.features.dword11	= cpu_to_le32(bits);
+	c.features.dword12	= cpu_to_le32(dev->host_mem_size >>
+					      ilog2(dev->ctrl.page_size));
+	c.features.dword13	= cpu_to_le32(lower_32_bits(dma_addr));
+	c.features.dword14	= cpu_to_le32(upper_32_bits(dma_addr));
+	c.features.dword15	= cpu_to_le32(dev->nr_host_mem_descs);
+
+	ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
+	if (ret) {
+		dev_warn(dev->ctrl.device,
+			 "failed to set host mem (err %d, flags %#x).\n",
+			 ret, bits);
+	}
+	dma_unmap_single(dev->dev, dma_addr, len, DMA_TO_DEVICE);
+	return ret;
+}
+
+static void nvme_free_host_mem(struct nvme_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < dev->nr_host_mem_descs; i++) {
+		struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
+		size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size;
+
+		dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i],
+				le64_to_cpu(desc->addr));
+	}
+
+	kfree(dev->host_mem_desc_bufs);
+	dev->host_mem_desc_bufs = NULL;
+	kfree(dev->host_mem_descs);
+	dev->host_mem_descs = NULL;
+}
+
+static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
+{
+	struct nvme_host_mem_buf_desc *descs;
+	u32 chunk_size, max_entries, i = 0;
+	void **bufs;
+	u64 size, tmp;
+
+	/* start big and work our way down */
+	chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
+retry:
+	tmp = (preferred + chunk_size - 1);
+	do_div(tmp, chunk_size);
+	max_entries = tmp;
+	descs = kcalloc(max_entries, sizeof(*descs), GFP_KERNEL);
+	if (!descs)
+		goto out;
+
+	bufs = kcalloc(max_entries, sizeof(*bufs), GFP_KERNEL);
+	if (!bufs)
+		goto out_free_descs;
+
+	for (size = 0; size < preferred; size += chunk_size) {
+		u32 len = min_t(u64, chunk_size, preferred - size);
+		dma_addr_t dma_addr;
+
+		bufs[i] = dma_alloc_attrs(dev->dev, len, &dma_addr, GFP_KERNEL,
+				DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
+		if (!bufs[i])
+			break;
+
+		descs[i].addr = cpu_to_le64(dma_addr);
+		descs[i].size = cpu_to_le32(len / dev->ctrl.page_size);
+		i++;
+	}
+
+	if (!size || (min && size < min)) {
+		dev_warn(dev->ctrl.device,
+			"failed to allocate host memory buffer.\n");
+		goto out_free_bufs;
+	}
+
+	dev_info(dev->ctrl.device,
+		"allocated %lld MiB host memory buffer.\n",
+		size >> ilog2(SZ_1M));
+	dev->nr_host_mem_descs = i;
+	dev->host_mem_size = size;
+	dev->host_mem_descs = descs;
+	dev->host_mem_desc_bufs = bufs;
+	return 0;
+
+out_free_bufs:
+	while (--i >= 0) {
+		size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size;
+
+		dma_free_coherent(dev->dev, size, bufs[i],
+				le64_to_cpu(descs[i].addr));
+	}
+
+	kfree(bufs);
+out_free_descs:
+	kfree(descs);
+out:
+	/* try a smaller chunk size if we failed early */
+	if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
+		chunk_size /= 2;
+		goto retry;
+	}
+	dev->host_mem_descs = NULL;
+	return -ENOMEM;
+}
+
+static void nvme_setup_host_mem(struct nvme_dev *dev)
+{
+	u64 max = (u64)max_host_mem_size_mb * SZ_1M;
+	u64 preferred = (u64)dev->ctrl.hmpre * 4096;
+	u64 min = (u64)dev->ctrl.hmmin * 4096;
+	u32 enable_bits = NVME_HOST_MEM_ENABLE;
+
+	preferred = min(preferred, max);
+	if (min > max) {
+		dev_warn(dev->ctrl.device,
+			"min host memory (%lld MiB) above limit (%d MiB).\n",
+			min >> ilog2(SZ_1M), max_host_mem_size_mb);
+		nvme_free_host_mem(dev);
+		return;
+	}
+
+	/*
+	 * If we already have a buffer allocated check if we can reuse it.
+	 */
+	if (dev->host_mem_descs) {
+		if (dev->host_mem_size >= min)
+			enable_bits |= NVME_HOST_MEM_RETURN;
+		else
+			nvme_free_host_mem(dev);
+	}
+
+	if (!dev->host_mem_descs) {
+		if (nvme_alloc_host_mem(dev, min, preferred))
+			return;
+	}
+
+	if (nvme_set_host_mem(dev, enable_bits))
+		nvme_free_host_mem(dev);
 }
 
 static int nvme_setup_io_queues(struct nvme_dev *dev)
 {
 	struct nvme_queue *adminq = dev->queues[0];
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
-	int result, nr_io_queues, size;
+	int result, nr_io_queues;
+	unsigned long size;
 
 	nr_io_queues = num_online_cpus();
 	result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
@@ -1537,20 +1712,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 			nvme_release_cmb(dev);
 	}
 
-	size = db_bar_size(dev, nr_io_queues);
-	if (size > 8192) {
-		iounmap(dev->bar);
-		do {
-			dev->bar = ioremap(pci_resource_start(pdev, 0), size);
-			if (dev->bar)
-				break;
-			if (!--nr_io_queues)
-				return -ENOMEM;
-			size = db_bar_size(dev, nr_io_queues);
-		} while (1);
-		dev->dbs = dev->bar + 4096;
-		adminq->q_db = dev->dbs;
-	}
+	do {
+		size = db_bar_size(dev, nr_io_queues);
+		result = nvme_remap_bar(dev, size);
+		if (!result)
+			break;
+		if (!--nr_io_queues)
+			return -ENOMEM;
+	} while (1);
+	adminq->q_db = dev->dbs;
 
 	/* Deregister the admin queue's interrupt */
 	pci_free_irq(pdev, 0, adminq);
@@ -1581,7 +1751,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	return nvme_create_io_queues(dev);
 }
 
-static void nvme_del_queue_end(struct request *req, int error)
+static void nvme_del_queue_end(struct request *req, blk_status_t error)
 {
 	struct nvme_queue *nvmeq = req->end_io_data;
 
@@ -1589,7 +1759,7 @@ static void nvme_del_queue_end(struct request *req, int error)
 	complete(&nvmeq->dev->ioq_wait);
 }
 
-static void nvme_del_cq_end(struct request *req, int error)
+static void nvme_del_cq_end(struct request *req, blk_status_t error)
 {
 	struct nvme_queue *nvmeq = req->end_io_data;
 
@@ -1735,8 +1905,8 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	 */
 	if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) {
 		dev->q_depth = 2;
-		dev_warn(dev->dev, "detected Apple NVMe controller, set "
-			"queue depth=%u to work around controller resets\n",
+		dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
+			"set queue depth=%u to work around controller resets\n",
 			dev->q_depth);
 	}
 
@@ -1754,7 +1924,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 		if (dev->cmbsz) {
 			if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
 						    &dev_attr_cmb.attr, NULL))
-				dev_warn(dev->dev,
+				dev_warn(dev->ctrl.device,
 					 "failed to add sysfs attribute for CMB\n");
 		}
 	}
@@ -1779,6 +1949,7 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
+	nvme_release_cmb(dev);
 	pci_free_irq_vectors(pdev);
 
 	if (pci_is_enabled(pdev)) {
@@ -1793,13 +1964,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	bool dead = true;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
-	del_timer_sync(&dev->watchdog_timer);
-
 	mutex_lock(&dev->shutdown_lock);
 	if (pci_is_enabled(pdev)) {
 		u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
-		if (dev->ctrl.state == NVME_CTRL_LIVE)
+		if (dev->ctrl.state == NVME_CTRL_LIVE ||
+		    dev->ctrl.state == NVME_CTRL_RESETTING)
 			nvme_start_freeze(&dev->ctrl);
 		dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
 			pdev->error_state  != pci_channel_io_normal);
@@ -1809,8 +1979,20 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	 * Give the controller a chance to complete all entered requests if
 	 * doing a safe shutdown.
 	 */
-	if (!dead && shutdown)
-		nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+	if (!dead) {
+		if (shutdown)
+			nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+
+		/*
+		 * If the controller is still alive tell it to stop using the
+		 * host memory buffer.  In theory the shutdown / reset should
+		 * make sure that it doesn't access the host memoery anymore,
+		 * but I'd rather be safe than sorry..
+		 */
+		if (dev->host_mem_descs)
+			nvme_set_host_mem(dev, 0);
+
+	}
 	nvme_stop_queues(&dev->ctrl);
 
 	queues = dev->online_queues - 1;
@@ -1893,11 +2075,12 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
 
 static void nvme_reset_work(struct work_struct *work)
 {
-	struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
+	struct nvme_dev *dev =
+		container_of(work, struct nvme_dev, ctrl.reset_work);
 	bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
 	int result = -ENODEV;
 
-	if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING))
+	if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING))
 		goto out;
 
 	/*
@@ -1907,9 +2090,6 @@ static void nvme_reset_work(struct work_struct *work)
 	if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
 		nvme_dev_disable(dev, false);
 
-	if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
-		goto out;
-
 	result = nvme_pci_enable(dev);
 	if (result)
 		goto out;
@@ -1945,6 +2125,9 @@ static void nvme_reset_work(struct work_struct *work)
 				 "unable to allocate dma for dbbuf\n");
 	}
 
+	if (dev->ctrl.hmpre)
+		nvme_setup_host_mem(dev);
+
 	result = nvme_setup_io_queues(dev);
 	if (result)
 		goto out;
@@ -1958,8 +2141,6 @@ static void nvme_reset_work(struct work_struct *work)
 	if (dev->online_queues > 1)
 		nvme_queue_async_events(&dev->ctrl);
 
-	mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-
 	/*
 	 * Keep the controller around but remove all namespaces if we don't have
 	 * any working I/O queue.
@@ -1999,17 +2180,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
 	nvme_put_ctrl(&dev->ctrl);
 }
 
-static int nvme_reset(struct nvme_dev *dev)
-{
-	if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
-		return -ENODEV;
-	if (work_busy(&dev->reset_work))
-		return -ENODEV;
-	if (!queue_work(nvme_workq, &dev->reset_work))
-		return -EBUSY;
-	return 0;
-}
-
 static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
 {
 	*val = readl(to_nvme_dev(ctrl)->bar + off);
@@ -2028,23 +2198,13 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
 	return 0;
 }
 
-static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
-{
-	struct nvme_dev *dev = to_nvme_dev(ctrl);
-	int ret = nvme_reset(dev);
-
-	if (!ret)
-		flush_work(&dev->reset_work);
-	return ret;
-}
-
 static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
 	.name			= "pcie",
 	.module			= THIS_MODULE,
+	.flags			= NVME_F_METADATA_SUPPORTED,
 	.reg_read32		= nvme_pci_reg_read32,
 	.reg_write32		= nvme_pci_reg_write32,
 	.reg_read64		= nvme_pci_reg_read64,
-	.reset_ctrl		= nvme_pci_reset_ctrl,
 	.free_ctrl		= nvme_pci_free_ctrl,
 	.submit_async_event	= nvme_pci_submit_async_event,
 };
@@ -2056,8 +2216,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
 	if (pci_request_mem_regions(pdev, "nvme"))
 		return -ENODEV;
 
-	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
-	if (!dev->bar)
+	if (nvme_remap_bar(dev, NVME_REG_DBS + 4096))
 		goto release;
 
 	return 0;
@@ -2111,10 +2270,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (result)
 		goto free;
 
-	INIT_WORK(&dev->reset_work, nvme_reset_work);
+	INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
 	INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
-	setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
-		(unsigned long)dev);
 	mutex_init(&dev->shutdown_lock);
 	init_completion(&dev->ioq_wait);
 
@@ -2129,9 +2286,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (result)
 		goto release_pools;
 
+	nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
 	dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
 
-	queue_work(nvme_workq, &dev->reset_work);
+	queue_work(nvme_wq, &dev->ctrl.reset_work);
 	return 0;
 
  release_pools:
@@ -2152,7 +2310,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
 	if (prepare)
 		nvme_dev_disable(dev, false);
 	else
-		nvme_reset(dev);
+		nvme_reset_ctrl(&dev->ctrl);
 }
 
 static void nvme_shutdown(struct pci_dev *pdev)
@@ -2172,6 +2330,7 @@ static void nvme_remove(struct pci_dev *pdev)
 
 	nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
 
+	cancel_work_sync(&dev->ctrl.reset_work);
 	pci_set_drvdata(pdev, NULL);
 
 	if (!pci_device_is_present(pdev)) {
@@ -2179,12 +2338,12 @@ static void nvme_remove(struct pci_dev *pdev)
 		nvme_dev_disable(dev, false);
 	}
 
-	flush_work(&dev->reset_work);
+	flush_work(&dev->ctrl.reset_work);
 	nvme_uninit_ctrl(&dev->ctrl);
 	nvme_dev_disable(dev, true);
+	nvme_free_host_mem(dev);
 	nvme_dev_remove_admin(dev);
 	nvme_free_queues(dev, 0);
-	nvme_release_cmb(dev);
 	nvme_release_prp_pools(dev);
 	nvme_dev_unmap(dev);
 	nvme_put_ctrl(&dev->ctrl);
@@ -2223,7 +2382,7 @@ static int nvme_resume(struct device *dev)
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
-	nvme_reset(ndev);
+	nvme_reset_ctrl(&ndev->ctrl);
 	return 0;
 }
 #endif
@@ -2262,7 +2421,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
 
 	dev_info(dev->ctrl.device, "restart after slot reset\n");
 	pci_restore_state(pdev);
-	nvme_reset(dev);
+	nvme_reset_ctrl(&dev->ctrl);
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
@@ -2288,6 +2447,8 @@ static const struct pci_device_id nvme_id_table[] = {
 	{ PCI_VDEVICE(INTEL, 0x0a54),
 		.driver_data = NVME_QUIRK_STRIPE_SIZE |
 				NVME_QUIRK_DEALLOCATE_ZEROES, },
+	{ PCI_VDEVICE(INTEL, 0xf1a5),	/* Intel 600P/P3100 */
+		.driver_data = NVME_QUIRK_NO_DEEPEST_PS },
 	{ PCI_VDEVICE(INTEL, 0x5845),	/* Qemu emulated controller */
 		.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
 	{ PCI_DEVICE(0x1c58, 0x0003),	/* HGST adapter */
@@ -2316,22 +2477,12 @@ static struct pci_driver nvme_driver = {
 
 static int __init nvme_init(void)
 {
-	int result;
-
-	nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
-	if (!nvme_workq)
-		return -ENOMEM;
-
-	result = pci_register_driver(&nvme_driver);
-	if (result)
-		destroy_workqueue(nvme_workq);
-	return result;
+	return pci_register_driver(&nvme_driver);
 }
 
 static void __exit nvme_exit(void)
 {
 	pci_unregister_driver(&nvme_driver);
-	destroy_workqueue(nvme_workq);
 	_nvme_check_size();
 }
 
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index dd1c6deef82f..6d4119dfbdaa 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -48,7 +48,7 @@
  */
 #define NVME_RDMA_NR_AEN_COMMANDS      1
 #define NVME_RDMA_AQ_BLKMQ_DEPTH       \
-	(NVMF_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
+	(NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
 
 struct nvme_rdma_device {
 	struct ib_device       *dev;
@@ -80,10 +80,8 @@ struct nvme_rdma_request {
 };
 
 enum nvme_rdma_queue_flags {
-	NVME_RDMA_Q_CONNECTED = (1 << 0),
-	NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1),
-	NVME_RDMA_Q_DELETING = (1 << 2),
-	NVME_RDMA_Q_LIVE = (1 << 3),
+	NVME_RDMA_Q_LIVE		= 0,
+	NVME_RDMA_Q_DELETING		= 1,
 };
 
 struct nvme_rdma_queue {
@@ -103,9 +101,6 @@ struct nvme_rdma_queue {
 };
 
 struct nvme_rdma_ctrl {
-	/* read and written in the hot path */
-	spinlock_t		lock;
-
 	/* read only in the hot path */
 	struct nvme_rdma_queue	*queues;
 	u32			queue_count;
@@ -113,7 +108,6 @@ struct nvme_rdma_ctrl {
 	/* other member variables */
 	struct blk_mq_tag_set	tag_set;
 	struct work_struct	delete_work;
-	struct work_struct	reset_work;
 	struct work_struct	err_work;
 
 	struct nvme_rdma_qe	async_event_sqe;
@@ -145,8 +139,6 @@ static DEFINE_MUTEX(device_list_mutex);
 static LIST_HEAD(nvme_rdma_ctrl_list);
 static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
 
-static struct workqueue_struct *nvme_rdma_wq;
-
 /*
  * Disabling this option makes small I/O goes faster, but is fundamentally
  * unsafe.  With it turned off we will have to register a global rkey that
@@ -301,10 +293,12 @@ out:
 	return ret;
 }
 
-static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl,
-		struct request *rq, unsigned int queue_idx)
+static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
+		struct request *rq, unsigned int hctx_idx)
 {
+	struct nvme_rdma_ctrl *ctrl = set->driver_data;
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+	int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
 	struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
 	struct nvme_rdma_device *dev = queue->device;
 
@@ -315,22 +309,13 @@ static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl,
 			DMA_TO_DEVICE);
 }
 
-static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
-		struct request *rq, unsigned int hctx_idx)
-{
-	return __nvme_rdma_exit_request(set->driver_data, rq, hctx_idx + 1);
-}
-
-static void nvme_rdma_exit_admin_request(struct blk_mq_tag_set *set,
-		struct request *rq, unsigned int hctx_idx)
-{
-	return __nvme_rdma_exit_request(set->driver_data, rq, 0);
-}
-
-static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl,
-		struct request *rq, unsigned int queue_idx)
+static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
+		struct request *rq, unsigned int hctx_idx,
+		unsigned int numa_node)
 {
+	struct nvme_rdma_ctrl *ctrl = set->driver_data;
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+	int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
 	struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
 	struct nvme_rdma_device *dev = queue->device;
 	struct ib_device *ibdev = dev->dev;
@@ -358,20 +343,6 @@ out_free_qe:
 	return -ENOMEM;
 }
 
-static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
-		struct request *rq, unsigned int hctx_idx,
-		unsigned int numa_node)
-{
-	return __nvme_rdma_init_request(set->driver_data, rq, hctx_idx + 1);
-}
-
-static int nvme_rdma_init_admin_request(struct blk_mq_tag_set *set,
-		struct request *rq, unsigned int hctx_idx,
-		unsigned int numa_node)
-{
-	return __nvme_rdma_init_request(set->driver_data, rq, 0);
-}
-
 static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 		unsigned int hctx_idx)
 {
@@ -469,9 +440,6 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
 	struct nvme_rdma_device *dev;
 	struct ib_device *ibdev;
 
-	if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags))
-		return;
-
 	dev = queue->device;
 	ibdev = dev->dev;
 	rdma_destroy_qp(queue->cm_id);
@@ -483,17 +451,21 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
 	nvme_rdma_dev_put(dev);
 }
 
-static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
-		struct nvme_rdma_device *dev)
+static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
 {
-	struct ib_device *ibdev = dev->dev;
+	struct ib_device *ibdev;
 	const int send_wr_factor = 3;			/* MR, SEND, INV */
 	const int cq_factor = send_wr_factor + 1;	/* + RECV */
 	int comp_vector, idx = nvme_rdma_queue_idx(queue);
-
 	int ret;
 
-	queue->device = dev;
+	queue->device = nvme_rdma_find_get_device(queue->cm_id);
+	if (!queue->device) {
+		dev_err(queue->cm_id->device->dev.parent,
+			"no client data found!\n");
+		return -ECONNREFUSED;
+	}
+	ibdev = queue->device->dev;
 
 	/*
 	 * The admin queue is barely used once the controller is live, so don't
@@ -506,12 +478,12 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
 
 
 	/* +1 for ib_stop_cq */
-	queue->ib_cq = ib_alloc_cq(dev->dev, queue,
-				cq_factor * queue->queue_size + 1, comp_vector,
-				IB_POLL_SOFTIRQ);
+	queue->ib_cq = ib_alloc_cq(ibdev, queue,
+				cq_factor * queue->queue_size + 1,
+				comp_vector, IB_POLL_SOFTIRQ);
 	if (IS_ERR(queue->ib_cq)) {
 		ret = PTR_ERR(queue->ib_cq);
-		goto out;
+		goto out_put_dev;
 	}
 
 	ret = nvme_rdma_create_qp(queue, send_wr_factor);
@@ -524,7 +496,6 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
 		ret = -ENOMEM;
 		goto out_destroy_qp;
 	}
-	set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags);
 
 	return 0;
 
@@ -532,7 +503,8 @@ out_destroy_qp:
 	ib_destroy_qp(queue->qp);
 out_destroy_ib_cq:
 	ib_free_cq(queue->ib_cq);
-out:
+out_put_dev:
+	nvme_rdma_dev_put(queue->device);
 	return ret;
 }
 
@@ -583,12 +555,10 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
 	}
 
 	clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
-	set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags);
 
 	return 0;
 
 out_destroy_cm_id:
-	nvme_rdma_destroy_queue_ib(queue);
 	rdma_destroy_id(queue->cm_id);
 	return ret;
 }
@@ -718,11 +688,11 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
 	if (nvmf_should_reconnect(&ctrl->ctrl)) {
 		dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
 			ctrl->ctrl.opts->reconnect_delay);
-		queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
+		queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
 				ctrl->ctrl.opts->reconnect_delay * HZ);
 	} else {
 		dev_info(ctrl->ctrl.device, "Removing controller...\n");
-		queue_work(nvme_rdma_wq, &ctrl->delete_work);
+		queue_work(nvme_wq, &ctrl->delete_work);
 	}
 }
 
@@ -733,7 +703,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	bool changed;
 	int ret;
 
-	++ctrl->ctrl.opts->nr_reconnects;
+	++ctrl->ctrl.nr_reconnects;
 
 	if (ctrl->queue_count > 1) {
 		nvme_rdma_free_io_queues(ctrl);
@@ -749,40 +719,37 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	if (ret)
 		goto requeue;
 
-	ret = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+	ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
 	if (ret)
 		goto requeue;
 
-	blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
-
 	ret = nvmf_connect_admin_queue(&ctrl->ctrl);
 	if (ret)
-		goto stop_admin_q;
+		goto requeue;
 
 	set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
 
 	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
 	if (ret)
-		goto stop_admin_q;
+		goto requeue;
 
 	nvme_start_keep_alive(&ctrl->ctrl);
 
 	if (ctrl->queue_count > 1) {
 		ret = nvme_rdma_init_io_queues(ctrl);
 		if (ret)
-			goto stop_admin_q;
+			goto requeue;
 
 		ret = nvme_rdma_connect_io_queues(ctrl);
 		if (ret)
-			goto stop_admin_q;
+			goto requeue;
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
-	ctrl->ctrl.opts->nr_reconnects = 0;
+	ctrl->ctrl.nr_reconnects = 0;
 
 	if (ctrl->queue_count > 1) {
-		nvme_start_queues(&ctrl->ctrl);
 		nvme_queue_scan(&ctrl->ctrl);
 		nvme_queue_async_events(&ctrl->ctrl);
 	}
@@ -791,11 +758,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
 	return;
 
-stop_admin_q:
-	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
 requeue:
 	dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
-			ctrl->ctrl.opts->nr_reconnects);
+			ctrl->ctrl.nr_reconnects);
 	nvme_rdma_reconnect_or_remove(ctrl);
 }
 
@@ -807,10 +772,8 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 
 	nvme_stop_keep_alive(&ctrl->ctrl);
 
-	for (i = 0; i < ctrl->queue_count; i++) {
-		clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags);
+	for (i = 0; i < ctrl->queue_count; i++)
 		clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
-	}
 
 	if (ctrl->queue_count > 1)
 		nvme_stop_queues(&ctrl->ctrl);
@@ -823,6 +786,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_cancel_request, &ctrl->ctrl);
 
+	/*
+	 * queues are not a live anymore, so restart the queues to fail fast
+	 * new IO
+	 */
+	blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+	nvme_start_queues(&ctrl->ctrl);
+
 	nvme_rdma_reconnect_or_remove(ctrl);
 }
 
@@ -831,7 +801,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING))
 		return;
 
-	queue_work(nvme_rdma_wq, &ctrl->err_work);
+	queue_work(nvme_wq, &ctrl->err_work);
 }
 
 static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
@@ -1038,6 +1008,19 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 		nvme_rdma_wr_error(cq, wc, "SEND");
 }
 
+static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
+{
+	int sig_limit;
+
+	/*
+	 * We signal completion every queue depth/2 and also handle the
+	 * degenerated case of a  device with queue_depth=1, where we
+	 * would need to signal every message.
+	 */
+	sig_limit = max(queue->queue_size / 2, 1);
+	return (++queue->sig_count % sig_limit) == 0;
+}
+
 static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 		struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
 		struct ib_send_wr *first, bool flush)
@@ -1065,9 +1048,6 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 	 * Would have been way to obvious to handle this in hardware or
 	 * at least the RDMA stack..
 	 *
-	 * This messy and racy code sniplet is copy and pasted from the iSER
-	 * initiator, and the magic '32' comes from there as well.
-	 *
 	 * Always signal the flushes. The magic request used for the flush
 	 * sequencer is not allocated in our driver's tagset and it's
 	 * triggered to be freed by blk_cleanup_queue(). So we need to
@@ -1075,7 +1055,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 	 * embedded in request's payload, is not freed when __ib_process_cq()
 	 * calls wr_cqe->done().
 	 */
-	if ((++queue->sig_count % 32) == 0 || flush)
+	if (nvme_rdma_queue_sig_limit(queue) || flush)
 		wr.send_flags |= IB_SEND_SIGNALED;
 
 	if (first)
@@ -1266,21 +1246,11 @@ static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
 
 static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
 {
-	struct nvme_rdma_device *dev;
 	int ret;
 
-	dev = nvme_rdma_find_get_device(queue->cm_id);
-	if (!dev) {
-		dev_err(queue->cm_id->device->dev.parent,
-			"no client data found!\n");
-		return -ECONNREFUSED;
-	}
-
-	ret = nvme_rdma_create_queue_ib(queue, dev);
-	if (ret) {
-		nvme_rdma_dev_put(dev);
-		goto out;
-	}
+	ret = nvme_rdma_create_queue_ib(queue);
+	if (ret)
+		return ret;
 
 	ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS);
 	if (ret) {
@@ -1294,7 +1264,6 @@ static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
 
 out_destroy_queue:
 	nvme_rdma_destroy_queue_ib(queue);
-out:
 	return ret;
 }
 
@@ -1322,8 +1291,8 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
 	 * specified by the Fabrics standard.
 	 */
 	if (priv.qid == 0) {
-		priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH);
-		priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+		priv.hrqsize = cpu_to_le16(NVME_AQ_DEPTH);
+		priv.hsqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
 	} else {
 		/*
 		 * current interpretation of the fabrics spec
@@ -1371,12 +1340,14 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		complete(&queue->cm_done);
 		return 0;
 	case RDMA_CM_EVENT_REJECTED:
+		nvme_rdma_destroy_queue_ib(queue);
 		cm_error = nvme_rdma_conn_rejected(queue, ev);
 		break;
-	case RDMA_CM_EVENT_ADDR_ERROR:
 	case RDMA_CM_EVENT_ROUTE_ERROR:
 	case RDMA_CM_EVENT_CONNECT_ERROR:
 	case RDMA_CM_EVENT_UNREACHABLE:
+		nvme_rdma_destroy_queue_ib(queue);
+	case RDMA_CM_EVENT_ADDR_ERROR:
 		dev_dbg(queue->ctrl->ctrl.device,
 			"CM error event %d\n", ev->event);
 		cm_error = -ECONNRESET;
@@ -1423,22 +1394,32 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
 /*
  * We cannot accept any other command until the Connect command has completed.
  */
-static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
-		struct request *rq)
+static inline blk_status_t
+nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq)
 {
 	if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
 		struct nvme_command *cmd = nvme_req(rq)->cmd;
 
 		if (!blk_rq_is_passthrough(rq) ||
 		    cmd->common.opcode != nvme_fabrics_command ||
-		    cmd->fabrics.fctype != nvme_fabrics_type_connect)
-			return false;
+		    cmd->fabrics.fctype != nvme_fabrics_type_connect) {
+			/*
+			 * reconnecting state means transport disruption, which
+			 * can take a long time and even might fail permanently,
+			 * so we can't let incoming I/O be requeued forever.
+			 * fail it fast to allow upper layers a chance to
+			 * failover.
+			 */
+			if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
+				return BLK_STS_IOERR;
+			return BLK_STS_RESOURCE; /* try again later */
+		}
 	}
 
-	return true;
+	return 0;
 }
 
-static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct nvme_ns *ns = hctx->queue->queuedata;
@@ -1449,27 +1430,29 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_command *c = sqe->data;
 	bool flush = false;
 	struct ib_device *dev;
-	int ret;
+	blk_status_t ret;
+	int err;
 
 	WARN_ON_ONCE(rq->tag < 0);
 
-	if (!nvme_rdma_queue_is_ready(queue, rq))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+	ret = nvme_rdma_queue_is_ready(queue, rq);
+	if (unlikely(ret))
+		return ret;
 
 	dev = queue->device->dev;
 	ib_dma_sync_single_for_cpu(dev, sqe->dma,
 			sizeof(struct nvme_command), DMA_TO_DEVICE);
 
 	ret = nvme_setup_cmd(ns, rq, c);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		return ret;
 
 	blk_mq_start_request(rq);
 
-	ret = nvme_rdma_map_data(queue, rq, c);
-	if (ret < 0) {
+	err = nvme_rdma_map_data(queue, rq, c);
+	if (err < 0) {
 		dev_err(queue->ctrl->ctrl.device,
-			     "Failed to map data (%d)\n", ret);
+			     "Failed to map data (%d)\n", err);
 		nvme_cleanup_cmd(rq);
 		goto err;
 	}
@@ -1479,17 +1462,18 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (req_op(rq) == REQ_OP_FLUSH)
 		flush = true;
-	ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
+	err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
 			req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
-	if (ret) {
+	if (err) {
 		nvme_rdma_unmap_data(queue, rq);
 		goto err;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 err:
-	return (ret == -ENOMEM || ret == -EAGAIN) ?
-		BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+	if (err == -ENOMEM || err == -EAGAIN)
+		return BLK_STS_RESOURCE;
+	return BLK_STS_IOERR;
 }
 
 static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -1499,7 +1483,6 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 	struct ib_wc wc;
 	int found = 0;
 
-	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 	while (ib_poll_cq(cq, 1, &wc) > 0) {
 		struct ib_cqe *cqe = wc.wr_cqe;
 
@@ -1536,8 +1519,8 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = {
 static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
 	.queue_rq	= nvme_rdma_queue_rq,
 	.complete	= nvme_rdma_complete_rq,
-	.init_request	= nvme_rdma_init_admin_request,
-	.exit_request	= nvme_rdma_exit_admin_request,
+	.init_request	= nvme_rdma_init_request,
+	.exit_request	= nvme_rdma_exit_request,
 	.reinit_request	= nvme_rdma_reinit_request,
 	.init_hctx	= nvme_rdma_init_admin_hctx,
 	.timeout	= nvme_rdma_timeout,
@@ -1547,7 +1530,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 {
 	int error;
 
-	error = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+	error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
 	if (error)
 		return error;
 
@@ -1648,7 +1631,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
 		nvme_rdma_free_io_queues(ctrl);
 	}
 
-	if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags))
+	if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
 		nvme_shutdown_ctrl(&ctrl->ctrl);
 
 	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1685,7 +1668,7 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
 		return -EBUSY;
 
-	if (!queue_work(nvme_rdma_wq, &ctrl->delete_work))
+	if (!queue_work(nvme_wq, &ctrl->delete_work))
 		return -EBUSY;
 
 	return 0;
@@ -1719,8 +1702,8 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
 
 static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 {
-	struct nvme_rdma_ctrl *ctrl = container_of(work,
-					struct nvme_rdma_ctrl, reset_work);
+	struct nvme_rdma_ctrl *ctrl =
+		container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
 	int ret;
 	bool changed;
 
@@ -1761,36 +1744,19 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 del_dead_ctrl:
 	/* Deleting this dead controller... */
 	dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
-	WARN_ON(!queue_work(nvme_rdma_wq, &ctrl->delete_work));
-}
-
-static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl)
-{
-	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
-		return -EBUSY;
-
-	if (!queue_work(nvme_rdma_wq, &ctrl->reset_work))
-		return -EBUSY;
-
-	flush_work(&ctrl->reset_work);
-
-	return 0;
+	WARN_ON(!queue_work(nvme_wq, &ctrl->delete_work));
 }
 
 static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
 	.name			= "rdma",
 	.module			= THIS_MODULE,
-	.is_fabrics		= true,
+	.flags			= NVME_F_FABRICS,
 	.reg_read32		= nvmf_reg_read32,
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
-	.reset_ctrl		= nvme_rdma_reset_ctrl,
 	.free_ctrl		= nvme_rdma_free_ctrl,
 	.submit_async_event	= nvme_rdma_submit_async_event,
 	.delete_ctrl		= nvme_rdma_del_ctrl,
-	.get_subsysnqn		= nvmf_get_subsysnqn,
 	.get_address		= nvmf_get_address,
 };
 
@@ -1895,8 +1861,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 			nvme_rdma_reconnect_ctrl_work);
 	INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
 	INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
-	INIT_WORK(&ctrl->reset_work, nvme_rdma_reset_ctrl_work);
-	spin_lock_init(&ctrl->lock);
+	INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
 
 	ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -1915,12 +1880,14 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	/* sanity check icdoff */
 	if (ctrl->ctrl.icdoff) {
 		dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
+		ret = -EINVAL;
 		goto out_remove_admin_queue;
 	}
 
 	/* sanity check keyed sgls */
 	if (!(ctrl->ctrl.sgls & (1 << 20))) {
 		dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not support\n");
+		ret = -EINVAL;
 		goto out_remove_admin_queue;
 	}
 
@@ -2009,7 +1976,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
 	}
 	mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-	flush_workqueue(nvme_rdma_wq);
+	flush_workqueue(nvme_wq);
 }
 
 static struct ib_client nvme_rdma_ib_client = {
@@ -2022,13 +1989,9 @@ static int __init nvme_rdma_init_module(void)
 {
 	int ret;
 
-	nvme_rdma_wq = create_workqueue("nvme_rdma_wq");
-	if (!nvme_rdma_wq)
-		return -ENOMEM;
-
 	ret = ib_register_client(&nvme_rdma_ib_client);
 	if (ret)
-		goto err_destroy_wq;
+		return ret;
 
 	ret = nvmf_register_transport(&nvme_rdma_transport);
 	if (ret)
@@ -2038,8 +2001,6 @@ static int __init nvme_rdma_init_module(void)
 
 err_unreg_client:
 	ib_unregister_client(&nvme_rdma_ib_client);
-err_destroy_wq:
-	destroy_workqueue(nvme_rdma_wq);
 	return ret;
 }
 
@@ -2047,7 +2008,6 @@ static void __exit nvme_rdma_cleanup_module(void)
 {
 	nvmf_unregister_transport(&nvme_rdma_transport);
 	ib_unregister_client(&nvme_rdma_ib_client);
-	destroy_workqueue(nvme_rdma_wq);
 }
 
 module_init(nvme_rdma_init_module);
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
deleted file mode 100644
index 1f7671e631dd..000000000000
--- a/drivers/nvme/host/scsi.c
+++ /dev/null
@@ -1,2460 +0,0 @@
-/*
- * NVM Express device driver
- * Copyright (c) 2011-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-/*
- * Refer to the SCSI-NVMe Translation spec for details on how
- * each command is translated.
- */
-
-#include <linux/bio.h>
-#include <linux/bitops.h>
-#include <linux/blkdev.h>
-#include <linux/compat.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/idr.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/kdev_t.h>
-#include <linux/kthread.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/poison.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <asm/unaligned.h>
-#include <scsi/sg.h>
-#include <scsi/scsi.h>
-#include <scsi/scsi_request.h>
-
-#include "nvme.h"
-
-static int sg_version_num = 30534;	/* 2 digits for each component */
-
-/* VPD Page Codes */
-#define VPD_SUPPORTED_PAGES				0x00
-#define VPD_SERIAL_NUMBER				0x80
-#define VPD_DEVICE_IDENTIFIERS				0x83
-#define VPD_EXTENDED_INQUIRY				0x86
-#define VPD_BLOCK_LIMITS				0xB0
-#define VPD_BLOCK_DEV_CHARACTERISTICS			0xB1
-
-/* format unit paramter list offsets */
-#define FORMAT_UNIT_SHORT_PARM_LIST_LEN			4
-#define FORMAT_UNIT_LONG_PARM_LIST_LEN			8
-#define FORMAT_UNIT_PROT_INT_OFFSET			3
-#define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET		0
-#define FORMAT_UNIT_PROT_FIELD_USAGE_MASK		0x07
-
-/* Misc. defines */
-#define FIXED_SENSE_DATA				0x70
-#define DESC_FORMAT_SENSE_DATA				0x72
-#define FIXED_SENSE_DATA_ADD_LENGTH			10
-#define LUN_ENTRY_SIZE					8
-#define LUN_DATA_HEADER_SIZE				8
-#define ALL_LUNS_RETURNED				0x02
-#define ALL_WELL_KNOWN_LUNS_RETURNED			0x01
-#define RESTRICTED_LUNS_RETURNED			0x00
-#define DOWNLOAD_SAVE_ACTIVATE				0x05
-#define DOWNLOAD_SAVE_DEFER_ACTIVATE			0x0E
-#define ACTIVATE_DEFERRED_MICROCODE			0x0F
-#define FORMAT_UNIT_IMMED_MASK				0x2
-#define FORMAT_UNIT_IMMED_OFFSET			1
-#define KELVIN_TEMP_FACTOR				273
-#define FIXED_FMT_SENSE_DATA_SIZE			18
-#define DESC_FMT_SENSE_DATA_SIZE			8
-
-/* SCSI/NVMe defines and bit masks */
-#define INQ_STANDARD_INQUIRY_PAGE			0x00
-#define INQ_SUPPORTED_VPD_PAGES_PAGE			0x00
-#define INQ_UNIT_SERIAL_NUMBER_PAGE			0x80
-#define INQ_DEVICE_IDENTIFICATION_PAGE			0x83
-#define INQ_EXTENDED_INQUIRY_DATA_PAGE			0x86
-#define INQ_BDEV_LIMITS_PAGE				0xB0
-#define INQ_BDEV_CHARACTERISTICS_PAGE			0xB1
-#define INQ_SERIAL_NUMBER_LENGTH			0x14
-#define INQ_NUM_SUPPORTED_VPD_PAGES			6
-#define VERSION_SPC_4					0x06
-#define ACA_UNSUPPORTED					0
-#define STANDARD_INQUIRY_LENGTH				36
-#define ADDITIONAL_STD_INQ_LENGTH			31
-#define EXTENDED_INQUIRY_DATA_PAGE_LENGTH		0x3C
-#define RESERVED_FIELD					0
-
-/* Mode Sense/Select defines */
-#define MODE_PAGE_INFO_EXCEP				0x1C
-#define MODE_PAGE_CACHING				0x08
-#define MODE_PAGE_CONTROL				0x0A
-#define MODE_PAGE_POWER_CONDITION			0x1A
-#define MODE_PAGE_RETURN_ALL				0x3F
-#define MODE_PAGE_BLK_DES_LEN				0x08
-#define MODE_PAGE_LLBAA_BLK_DES_LEN			0x10
-#define MODE_PAGE_CACHING_LEN				0x14
-#define MODE_PAGE_CONTROL_LEN				0x0C
-#define MODE_PAGE_POW_CND_LEN				0x28
-#define MODE_PAGE_INF_EXC_LEN				0x0C
-#define MODE_PAGE_ALL_LEN				0x54
-#define MODE_SENSE6_MPH_SIZE				4
-#define MODE_SENSE_PAGE_CONTROL_MASK			0xC0
-#define MODE_SENSE_PAGE_CODE_OFFSET			2
-#define MODE_SENSE_PAGE_CODE_MASK			0x3F
-#define MODE_SENSE_LLBAA_MASK				0x10
-#define MODE_SENSE_LLBAA_SHIFT				4
-#define MODE_SENSE_DBD_MASK				8
-#define MODE_SENSE_DBD_SHIFT				3
-#define MODE_SENSE10_MPH_SIZE				8
-#define MODE_SELECT_CDB_PAGE_FORMAT_MASK		0x10
-#define MODE_SELECT_CDB_SAVE_PAGES_MASK			0x1
-#define MODE_SELECT_6_BD_OFFSET				3
-#define MODE_SELECT_10_BD_OFFSET			6
-#define MODE_SELECT_10_LLBAA_OFFSET			4
-#define MODE_SELECT_10_LLBAA_MASK			1
-#define MODE_SELECT_6_MPH_SIZE				4
-#define MODE_SELECT_10_MPH_SIZE				8
-#define CACHING_MODE_PAGE_WCE_MASK			0x04
-#define MODE_SENSE_BLK_DESC_ENABLED			0
-#define MODE_SENSE_BLK_DESC_COUNT			1
-#define MODE_SELECT_PAGE_CODE_MASK			0x3F
-#define SHORT_DESC_BLOCK				8
-#define LONG_DESC_BLOCK					16
-#define MODE_PAGE_POW_CND_LEN_FIELD			0x26
-#define MODE_PAGE_INF_EXC_LEN_FIELD			0x0A
-#define MODE_PAGE_CACHING_LEN_FIELD			0x12
-#define MODE_PAGE_CONTROL_LEN_FIELD			0x0A
-#define MODE_SENSE_PC_CURRENT_VALUES			0
-
-/* Log Sense defines */
-#define LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE		0x00
-#define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH		0x07
-#define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE		0x2F
-#define LOG_PAGE_TEMPERATURE_PAGE			0x0D
-#define LOG_SENSE_CDB_SP_NOT_ENABLED			0
-#define LOG_SENSE_CDB_PC_MASK				0xC0
-#define LOG_SENSE_CDB_PC_SHIFT				6
-#define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES		1
-#define LOG_SENSE_CDB_PAGE_CODE_MASK			0x3F
-#define REMAINING_INFO_EXCP_PAGE_LENGTH			0x8
-#define LOG_INFO_EXCP_PAGE_LENGTH			0xC
-#define REMAINING_TEMP_PAGE_LENGTH			0xC
-#define LOG_TEMP_PAGE_LENGTH				0x10
-#define LOG_TEMP_UNKNOWN				0xFF
-#define SUPPORTED_LOG_PAGES_PAGE_LENGTH			0x3
-
-/* Read Capacity defines */
-#define READ_CAP_10_RESP_SIZE				8
-#define READ_CAP_16_RESP_SIZE				32
-
-/* NVMe Namespace and Command Defines */
-#define BYTES_TO_DWORDS					4
-#define NVME_MAX_FIRMWARE_SLOT				7
-
-/* Report LUNs defines */
-#define REPORT_LUNS_FIRST_LUN_OFFSET			8
-
-/* SCSI ADDITIONAL SENSE Codes */
-
-#define SCSI_ASC_NO_SENSE				0x00
-#define SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT		0x03
-#define SCSI_ASC_LUN_NOT_READY				0x04
-#define SCSI_ASC_WARNING				0x0B
-#define SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED		0x10
-#define SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED		0x10
-#define SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED		0x10
-#define SCSI_ASC_UNRECOVERED_READ_ERROR			0x11
-#define SCSI_ASC_MISCOMPARE_DURING_VERIFY		0x1D
-#define SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID		0x20
-#define SCSI_ASC_ILLEGAL_COMMAND			0x20
-#define SCSI_ASC_ILLEGAL_BLOCK				0x21
-#define SCSI_ASC_INVALID_CDB				0x24
-#define SCSI_ASC_INVALID_LUN				0x25
-#define SCSI_ASC_INVALID_PARAMETER			0x26
-#define SCSI_ASC_FORMAT_COMMAND_FAILED			0x31
-#define SCSI_ASC_INTERNAL_TARGET_FAILURE		0x44
-
-/* SCSI ADDITIONAL SENSE Code Qualifiers */
-
-#define SCSI_ASCQ_CAUSE_NOT_REPORTABLE			0x00
-#define SCSI_ASCQ_FORMAT_COMMAND_FAILED			0x01
-#define SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED		0x01
-#define SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED		0x02
-#define SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED		0x03
-#define SCSI_ASCQ_FORMAT_IN_PROGRESS			0x04
-#define SCSI_ASCQ_POWER_LOSS_EXPECTED			0x08
-#define SCSI_ASCQ_INVALID_LUN_ID			0x09
-
-/* copied from drivers/usb/gadget/function/storage_common.h */
-static inline u32 get_unaligned_be24(u8 *buf)
-{
-	return 0xffffff & (u32) get_unaligned_be32(buf - 1);
-}
-
-/* Struct to gather data that needs to be extracted from a SCSI CDB.
-   Not conforming to any particular CDB variant, but compatible with all. */
-
-struct nvme_trans_io_cdb {
-	u8 fua;
-	u8 prot_info;
-	u64 lba;
-	u32 xfer_len;
-};
-
-
-/* Internal Helper Functions */
-
-
-/* Copy data to userspace memory */
-
-static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
-								unsigned long n)
-{
-	int i;
-	void *index = from;
-	size_t remaining = n;
-	size_t xfer_len;
-
-	if (hdr->iovec_count > 0) {
-		struct sg_iovec sgl;
-
-		for (i = 0; i < hdr->iovec_count; i++) {
-			if (copy_from_user(&sgl, hdr->dxferp +
-						i * sizeof(struct sg_iovec),
-						sizeof(struct sg_iovec)))
-				return -EFAULT;
-			xfer_len = min(remaining, sgl.iov_len);
-			if (copy_to_user(sgl.iov_base, index, xfer_len))
-				return -EFAULT;
-
-			index += xfer_len;
-			remaining -= xfer_len;
-			if (remaining == 0)
-				break;
-		}
-		return 0;
-	}
-
-	if (copy_to_user(hdr->dxferp, from, n))
-		return -EFAULT;
-	return 0;
-}
-
-/* Copy data from userspace memory */
-
-static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
-								unsigned long n)
-{
-	int i;
-	void *index = to;
-	size_t remaining = n;
-	size_t xfer_len;
-
-	if (hdr->iovec_count > 0) {
-		struct sg_iovec sgl;
-
-		for (i = 0; i < hdr->iovec_count; i++) {
-			if (copy_from_user(&sgl, hdr->dxferp +
-						i * sizeof(struct sg_iovec),
-						sizeof(struct sg_iovec)))
-				return -EFAULT;
-			xfer_len = min(remaining, sgl.iov_len);
-			if (copy_from_user(index, sgl.iov_base, xfer_len))
-				return -EFAULT;
-			index += xfer_len;
-			remaining -= xfer_len;
-			if (remaining == 0)
-				break;
-		}
-		return 0;
-	}
-
-	if (copy_from_user(to, hdr->dxferp, n))
-		return -EFAULT;
-	return 0;
-}
-
-/* Status/Sense Buffer Writeback */
-
-static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
-				 u8 asc, u8 ascq)
-{
-	u8 xfer_len;
-	u8 resp[DESC_FMT_SENSE_DATA_SIZE];
-
-	if (scsi_status_is_good(status)) {
-		hdr->status = SAM_STAT_GOOD;
-		hdr->masked_status = GOOD;
-		hdr->host_status = DID_OK;
-		hdr->driver_status = DRIVER_OK;
-		hdr->sb_len_wr = 0;
-	} else {
-		hdr->status = status;
-		hdr->masked_status = status >> 1;
-		hdr->host_status = DID_OK;
-		hdr->driver_status = DRIVER_OK;
-
-		memset(resp, 0, DESC_FMT_SENSE_DATA_SIZE);
-		resp[0] = DESC_FORMAT_SENSE_DATA;
-		resp[1] = sense_key;
-		resp[2] = asc;
-		resp[3] = ascq;
-
-		xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE);
-		hdr->sb_len_wr = xfer_len;
-		if (copy_to_user(hdr->sbp, resp, xfer_len) > 0)
-			return -EFAULT;
-	}
-
-	return 0;
-}
-
-/*
- * Take a status code from a lowlevel routine, and if it was a positive NVMe
- * error code update the sense data based on it.  In either case the passed
- * in value is returned again, unless an -EFAULT from copy_to_user overrides
- * it.
- */
-static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
-{
-	u8 status, sense_key, asc, ascq;
-	int res;
-
-	/* For non-nvme (Linux) errors, simply return the error code */
-	if (nvme_sc < 0)
-		return nvme_sc;
-
-	/* Mask DNR, More, and reserved fields */
-	switch (nvme_sc & 0x7FF) {
-	/* Generic Command Status */
-	case NVME_SC_SUCCESS:
-		status = SAM_STAT_GOOD;
-		sense_key = NO_SENSE;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_INVALID_OPCODE:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_ILLEGAL_COMMAND;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_INVALID_FIELD:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_INVALID_CDB;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_DATA_XFER_ERROR:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MEDIUM_ERROR;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_POWER_LOSS:
-		status = SAM_STAT_TASK_ABORTED;
-		sense_key = ABORTED_COMMAND;
-		asc = SCSI_ASC_WARNING;
-		ascq = SCSI_ASCQ_POWER_LOSS_EXPECTED;
-		break;
-	case NVME_SC_INTERNAL:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = HARDWARE_ERROR;
-		asc = SCSI_ASC_INTERNAL_TARGET_FAILURE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_ABORT_REQ:
-		status = SAM_STAT_TASK_ABORTED;
-		sense_key = ABORTED_COMMAND;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_ABORT_QUEUE:
-		status = SAM_STAT_TASK_ABORTED;
-		sense_key = ABORTED_COMMAND;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_FUSED_FAIL:
-		status = SAM_STAT_TASK_ABORTED;
-		sense_key = ABORTED_COMMAND;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_FUSED_MISSING:
-		status = SAM_STAT_TASK_ABORTED;
-		sense_key = ABORTED_COMMAND;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_INVALID_NS:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
-		ascq = SCSI_ASCQ_INVALID_LUN_ID;
-		break;
-	case NVME_SC_LBA_RANGE:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_ILLEGAL_BLOCK;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_CAP_EXCEEDED:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MEDIUM_ERROR;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_NS_NOT_READY:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = NOT_READY;
-		asc = SCSI_ASC_LUN_NOT_READY;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-
-	/* Command Specific Status */
-	case NVME_SC_INVALID_FORMAT:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_FORMAT_COMMAND_FAILED;
-		ascq = SCSI_ASCQ_FORMAT_COMMAND_FAILED;
-		break;
-	case NVME_SC_BAD_ATTRIBUTES:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_INVALID_CDB;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-
-	/* Media Errors */
-	case NVME_SC_WRITE_FAULT:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MEDIUM_ERROR;
-		asc = SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_READ_ERROR:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MEDIUM_ERROR;
-		asc = SCSI_ASC_UNRECOVERED_READ_ERROR;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_GUARD_CHECK:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MEDIUM_ERROR;
-		asc = SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED;
-		ascq = SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED;
-		break;
-	case NVME_SC_APPTAG_CHECK:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MEDIUM_ERROR;
-		asc = SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED;
-		ascq = SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED;
-		break;
-	case NVME_SC_REFTAG_CHECK:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MEDIUM_ERROR;
-		asc = SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED;
-		ascq = SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED;
-		break;
-	case NVME_SC_COMPARE_FAILED:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MISCOMPARE;
-		asc = SCSI_ASC_MISCOMPARE_DURING_VERIFY;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_ACCESS_DENIED:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
-		ascq = SCSI_ASCQ_INVALID_LUN_ID;
-		break;
-
-	/* Unspecified/Default */
-	case NVME_SC_CMDID_CONFLICT:
-	case NVME_SC_CMD_SEQ_ERROR:
-	case NVME_SC_CQ_INVALID:
-	case NVME_SC_QID_INVALID:
-	case NVME_SC_QUEUE_SIZE:
-	case NVME_SC_ABORT_LIMIT:
-	case NVME_SC_ABORT_MISSING:
-	case NVME_SC_ASYNC_LIMIT:
-	case NVME_SC_FIRMWARE_SLOT:
-	case NVME_SC_FIRMWARE_IMAGE:
-	case NVME_SC_INVALID_VECTOR:
-	case NVME_SC_INVALID_LOG_PAGE:
-	default:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	}
-
-	res = nvme_trans_completion(hdr, status, sense_key, asc, ascq);
-	return res ? res : nvme_sc;
-}
-
-/* INQUIRY Helper Functions */
-
-static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *inq_response,
-					int alloc_len)
-{
-	struct nvme_ctrl *ctrl = ns->ctrl;
-	struct nvme_id_ns *id_ns;
-	int res;
-	int nvme_sc;
-	int xfer_len;
-	u8 resp_data_format = 0x02;
-	u8 protect;
-	u8 cmdque = 0x01 << 1;
-	u8 fw_offset = sizeof(ctrl->firmware_rev);
-
-	/* nvme ns identify - use DPS value for PROTECT field */
-	nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;
-
-	if (id_ns->dps)
-		protect = 0x01;
-	else
-		protect = 0;
-	kfree(id_ns);
-
-	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
-	inq_response[2] = VERSION_SPC_4;
-	inq_response[3] = resp_data_format;	/*normaca=0 | hisup=0 */
-	inq_response[4] = ADDITIONAL_STD_INQ_LENGTH;
-	inq_response[5] = protect;	/* sccs=0 | acc=0 | tpgs=0 | pc3=0 */
-	inq_response[7] = cmdque;	/* wbus16=0 | sync=0 | vs=0 */
-	strncpy(&inq_response[8], "NVMe    ", 8);
-	strncpy(&inq_response[16], ctrl->model, 16);
-
-	while (ctrl->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
-		fw_offset--;
-	fw_offset -= 4;
-	strncpy(&inq_response[32], ctrl->firmware_rev + fw_offset, 4);
-
-	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *inq_response,
-					int alloc_len)
-{
-	int xfer_len;
-
-	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
-	inq_response[1] = INQ_SUPPORTED_VPD_PAGES_PAGE;   /* Page Code */
-	inq_response[3] = INQ_NUM_SUPPORTED_VPD_PAGES;    /* Page Length */
-	inq_response[4] = INQ_SUPPORTED_VPD_PAGES_PAGE;
-	inq_response[5] = INQ_UNIT_SERIAL_NUMBER_PAGE;
-	inq_response[6] = INQ_DEVICE_IDENTIFICATION_PAGE;
-	inq_response[7] = INQ_EXTENDED_INQUIRY_DATA_PAGE;
-	inq_response[8] = INQ_BDEV_CHARACTERISTICS_PAGE;
-	inq_response[9] = INQ_BDEV_LIMITS_PAGE;
-
-	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *inq_response,
-					int alloc_len)
-{
-	int xfer_len;
-
-	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
-	inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */
-	inq_response[3] = INQ_SERIAL_NUMBER_LENGTH;    /* Page Length */
-	strncpy(&inq_response[4], ns->ctrl->serial, INQ_SERIAL_NUMBER_LENGTH);
-
-	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-		u8 *inq_response, int alloc_len)
-{
-	struct nvme_id_ns *id_ns;
-	int nvme_sc, res;
-	size_t len;
-	void *eui;
-
-	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;
-
-	eui = id_ns->eui64;
-	len = sizeof(id_ns->eui64);
-
-	if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) {
-		if (bitmap_empty(eui, len * 8)) {
-			eui = id_ns->nguid;
-			len = sizeof(id_ns->nguid);
-		}
-	}
-
-	if (bitmap_empty(eui, len * 8)) {
-		res = -EOPNOTSUPP;
-		goto out_free_id;
-	}
-
-	memset(inq_response, 0, alloc_len);
-	inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
-	inq_response[3] = 4 + len; /* Page Length */
-
-	/* Designation Descriptor start */
-	inq_response[4] = 0x01;	/* Proto ID=0h | Code set=1h */
-	inq_response[5] = 0x02;	/* PIV=0b | Asso=00b | Designator Type=2h */
-	inq_response[6] = 0x00;	/* Rsvd */
-	inq_response[7] = len;	/* Designator Length */
-	memcpy(&inq_response[8], eui, len);
-
-	res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
-out_free_id:
-	kfree(id_ns);
-	return res;
-}
-
-static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
-		struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
-{
-	struct nvme_ctrl *ctrl = ns->ctrl;
-	struct nvme_id_ctrl *id_ctrl;
-	int nvme_sc, res;
-
-	if (alloc_len < 72) {
-		return nvme_trans_completion(hdr,
-				SAM_STAT_CHECK_CONDITION,
-				ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-	}
-
-	nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;
-
-	memset(inq_response, 0, alloc_len);
-	inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
-	inq_response[3] = 0x48;	/* Page Length */
-
-	/* Designation Descriptor start */
-	inq_response[4] = 0x03;	/* Proto ID=0h | Code set=3h */
-	inq_response[5] = 0x08;	/* PIV=0b | Asso=00b | Designator Type=8h */
-	inq_response[6] = 0x00;	/* Rsvd */
-	inq_response[7] = 0x44;	/* Designator Length */
-
-	sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid));
-	memcpy(&inq_response[12], ctrl->model, sizeof(ctrl->model));
-	sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
-	memcpy(&inq_response[56], ctrl->serial, sizeof(ctrl->serial));
-
-	res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
-	kfree(id_ctrl);
-	return res;
-}
-
-static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					u8 *resp, int alloc_len)
-{
-	int res;
-
-	if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) {
-		res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
-		if (res != -EOPNOTSUPP)
-			return res;
-	}
-
-	return nvme_fill_device_id_scsi_string(ns, hdr, resp, alloc_len);
-}
-
-static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					int alloc_len)
-{
-	u8 *inq_response;
-	int res;
-	int nvme_sc;
-	struct nvme_ctrl *ctrl = ns->ctrl;
-	struct nvme_id_ctrl *id_ctrl;
-	struct nvme_id_ns *id_ns;
-	int xfer_len;
-	u8 microcode = 0x80;
-	u8 spt;
-	u8 spt_lut[8] = {0, 0, 2, 1, 4, 6, 5, 7};
-	u8 grd_chk, app_chk, ref_chk, protect;
-	u8 uask_sup = 0x20;
-	u8 v_sup;
-	u8 luiclr = 0x01;
-
-	inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
-	if (inq_response == NULL)
-		return -ENOMEM;
-
-	nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		goto out_free_inq;
-
-	spt = spt_lut[id_ns->dpc & 0x07] << 3;
-	if (id_ns->dps)
-		protect = 0x01;
-	else
-		protect = 0;
-	kfree(id_ns);
-
-	grd_chk = protect << 2;
-	app_chk = protect << 1;
-	ref_chk = protect;
-
-	nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		goto out_free_inq;
-
-	v_sup = id_ctrl->vwc;
-	kfree(id_ctrl);
-
-	memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
-	inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE;    /* Page Code */
-	inq_response[2] = 0x00;    /* Page Length MSB */
-	inq_response[3] = 0x3C;    /* Page Length LSB */
-	inq_response[4] = microcode | spt | grd_chk | app_chk | ref_chk;
-	inq_response[5] = uask_sup;
-	inq_response[6] = v_sup;
-	inq_response[7] = luiclr;
-	inq_response[8] = 0;
-	inq_response[9] = 0;
-
-	xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- out_free_inq:
-	kfree(inq_response);
-	return res;
-}
-
-static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					u8 *inq_response, int alloc_len)
-{
-	__be32 max_sectors = cpu_to_be32(
-		nvme_block_nr(ns, queue_max_hw_sectors(ns->queue)));
-	__be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
-	__be32 discard_desc_count = cpu_to_be32(0x100);
-
-	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
-	inq_response[1] = VPD_BLOCK_LIMITS;
-	inq_response[3] = 0x3c; /* Page Length */
-	memcpy(&inq_response[8], &max_sectors, sizeof(u32));
-	memcpy(&inq_response[20], &max_discard, sizeof(u32));
-
-	if (max_discard)
-		memcpy(&inq_response[24], &discard_desc_count, sizeof(u32));
-
-	return nvme_trans_copy_to_user(hdr, inq_response, 0x3c);
-}
-
-static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					int alloc_len)
-{
-	u8 *inq_response;
-	int res;
-	int xfer_len;
-
-	inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
-	if (inq_response == NULL) {
-		res = -ENOMEM;
-		goto out_mem;
-	}
-
-	inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE;    /* Page Code */
-	inq_response[2] = 0x00;    /* Page Length MSB */
-	inq_response[3] = 0x3C;    /* Page Length LSB */
-	inq_response[4] = 0x00;    /* Medium Rotation Rate MSB */
-	inq_response[5] = 0x01;    /* Medium Rotation Rate LSB */
-	inq_response[6] = 0x00;    /* Form Factor */
-
-	xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
-	kfree(inq_response);
- out_mem:
-	return res;
-}
-
-/* LOG SENSE Helper Functions */
-
-static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					int alloc_len)
-{
-	int res;
-	int xfer_len;
-	u8 *log_response;
-
-	log_response = kzalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL);
-	if (log_response == NULL) {
-		res = -ENOMEM;
-		goto out_mem;
-	}
-
-	log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
-	/* Subpage=0x00, Page Length MSB=0 */
-	log_response[3] = SUPPORTED_LOG_PAGES_PAGE_LENGTH;
-	log_response[4] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
-	log_response[5] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
-	log_response[6] = LOG_PAGE_TEMPERATURE_PAGE;
-
-	xfer_len = min(alloc_len, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
-	kfree(log_response);
- out_mem:
-	return res;
-}
-
-static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, int alloc_len)
-{
-	int res;
-	int xfer_len;
-	u8 *log_response;
-	struct nvme_smart_log *smart_log;
-	u8 temp_c;
-	u16 temp_k;
-
-	log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
-	if (log_response == NULL)
-		return -ENOMEM;
-
-	res = nvme_get_log_page(ns->ctrl, &smart_log);
-	if (res < 0)
-		goto out_free_response;
-
-	if (res != NVME_SC_SUCCESS) {
-		temp_c = LOG_TEMP_UNKNOWN;
-	} else {
-		temp_k = (smart_log->temperature[1] << 8) +
-				(smart_log->temperature[0]);
-		temp_c = temp_k - KELVIN_TEMP_FACTOR;
-	}
-	kfree(smart_log);
-
-	log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
-	/* Subpage=0x00, Page Length MSB=0 */
-	log_response[3] = REMAINING_INFO_EXCP_PAGE_LENGTH;
-	/* Informational Exceptions Log Parameter 1 Start */
-	/* Parameter Code=0x0000 bytes 4,5 */
-	log_response[6] = 0x23; /* DU=0, TSD=1, ETC=0, TMC=0, FMT_AND_LNK=11b */
-	log_response[7] = 0x04; /* PARAMETER LENGTH */
-	/* Add sense Code and qualifier = 0x00 each */
-	/* Use Temperature from NVMe Get Log Page, convert to C from K */
-	log_response[10] = temp_c;
-
-	xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- out_free_response:
-	kfree(log_response);
-	return res;
-}
-
-static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					int alloc_len)
-{
-	int res;
-	int xfer_len;
-	u8 *log_response;
-	struct nvme_smart_log *smart_log;
-	u32 feature_resp;
-	u8 temp_c_cur, temp_c_thresh;
-	u16 temp_k;
-
-	log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
-	if (log_response == NULL)
-		return -ENOMEM;
-
-	res = nvme_get_log_page(ns->ctrl, &smart_log);
-	if (res < 0)
-		goto out_free_response;
-
-	if (res != NVME_SC_SUCCESS) {
-		temp_c_cur = LOG_TEMP_UNKNOWN;
-	} else {
-		temp_k = (smart_log->temperature[1] << 8) +
-				(smart_log->temperature[0]);
-		temp_c_cur = temp_k - KELVIN_TEMP_FACTOR;
-	}
-	kfree(smart_log);
-
-	/* Get Features for Temp Threshold */
-	res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, NULL, 0,
-								&feature_resp);
-	if (res != NVME_SC_SUCCESS)
-		temp_c_thresh = LOG_TEMP_UNKNOWN;
-	else
-		temp_c_thresh = (feature_resp & 0xFFFF) - KELVIN_TEMP_FACTOR;
-
-	log_response[0] = LOG_PAGE_TEMPERATURE_PAGE;
-	/* Subpage=0x00, Page Length MSB=0 */
-	log_response[3] = REMAINING_TEMP_PAGE_LENGTH;
-	/* Temperature Log Parameter 1 (Temperature) Start */
-	/* Parameter Code = 0x0000 */
-	log_response[6] = 0x01;		/* Format and Linking = 01b */
-	log_response[7] = 0x02;		/* Parameter Length */
-	/* Use Temperature from NVMe Get Log Page, convert to C from K */
-	log_response[9] = temp_c_cur;
-	/* Temperature Log Parameter 2 (Reference Temperature) Start */
-	log_response[11] = 0x01;	/* Parameter Code = 0x0001 */
-	log_response[12] = 0x01;	/* Format and Linking = 01b */
-	log_response[13] = 0x02;	/* Parameter Length */
-	/* Use Temperature Thresh from NVMe Get Log Page, convert to C from K */
-	log_response[15] = temp_c_thresh;
-
-	xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- out_free_response:
-	kfree(log_response);
-	return res;
-}
-
-/* MODE SENSE Helper Functions */
-
-static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa,
-					u16 mode_data_length, u16 blk_desc_len)
-{
-	/* Quick check to make sure I don't stomp on my own memory... */
-	if ((cdb10 && len < 8) || (!cdb10 && len < 4))
-		return -EINVAL;
-
-	if (cdb10) {
-		resp[0] = (mode_data_length & 0xFF00) >> 8;
-		resp[1] = (mode_data_length & 0x00FF);
-		resp[3] = 0x10 /* DPOFUA */;
-		resp[4] = llbaa;
-		resp[5] = RESERVED_FIELD;
-		resp[6] = (blk_desc_len & 0xFF00) >> 8;
-		resp[7] = (blk_desc_len & 0x00FF);
-	} else {
-		resp[0] = (mode_data_length & 0x00FF);
-		resp[2] = 0x10 /* DPOFUA */;
-		resp[3] = (blk_desc_len & 0x00FF);
-	}
-
-	return 0;
-}
-
-static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-				    u8 *resp, int len, u8 llbaa)
-{
-	int res;
-	int nvme_sc;
-	struct nvme_id_ns *id_ns;
-	u8 flbas;
-	u32 lba_length;
-
-	if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN)
-		return -EINVAL;
-	else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
-		return -EINVAL;
-
-	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;
-
-	flbas = (id_ns->flbas) & 0x0F;
-	lba_length = (1 << (id_ns->lbaf[flbas].ds));
-
-	if (llbaa == 0) {
-		__be32 tmp_cap = cpu_to_be32(le64_to_cpu(id_ns->ncap));
-		/* Byte 4 is reserved */
-		__be32 tmp_len = cpu_to_be32(lba_length & 0x00FFFFFF);
-
-		memcpy(resp, &tmp_cap, sizeof(u32));
-		memcpy(&resp[4], &tmp_len, sizeof(u32));
-	} else {
-		__be64 tmp_cap = cpu_to_be64(le64_to_cpu(id_ns->ncap));
-		__be32 tmp_len = cpu_to_be32(lba_length);
-
-		memcpy(resp, &tmp_cap, sizeof(u64));
-		/* Bytes 8, 9, 10, 11 are reserved */
-		memcpy(&resp[12], &tmp_len, sizeof(u32));
-	}
-
-	kfree(id_ns);
-	return res;
-}
-
-static int nvme_trans_fill_control_page(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *resp,
-					int len)
-{
-	if (len < MODE_PAGE_CONTROL_LEN)
-		return -EINVAL;
-
-	resp[0] = MODE_PAGE_CONTROL;
-	resp[1] = MODE_PAGE_CONTROL_LEN_FIELD;
-	resp[2] = 0x0E;		/* TST=000b, TMF_ONLY=0, DPICZ=1,
-				 * D_SENSE=1, GLTSD=1, RLEC=0 */
-	resp[3] = 0x12;		/* Q_ALGO_MODIFIER=1h, NUAR=0, QERR=01b */
-	/* Byte 4:  VS=0, RAC=0, UA_INT=0, SWP=0 */
-	resp[5] = 0x40;		/* ATO=0, TAS=1, ATMPE=0, RWWP=0, AUTOLOAD=0 */
-	/* resp[6] and [7] are obsolete, thus zero */
-	resp[8] = 0xFF;		/* Busy timeout period = 0xffff */
-	resp[9] = 0xFF;
-	/* Bytes 10,11: Extended selftest completion time = 0x0000 */
-
-	return 0;
-}
-
-static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr,
-					u8 *resp, int len)
-{
-	int res = 0;
-	int nvme_sc;
-	u32 feature_resp;
-	u8 vwc;
-
-	if (len < MODE_PAGE_CACHING_LEN)
-		return -EINVAL;
-
-	nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, NULL, 0,
-								&feature_resp);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;
-
-	vwc = feature_resp & 0x00000001;
-
-	resp[0] = MODE_PAGE_CACHING;
-	resp[1] = MODE_PAGE_CACHING_LEN_FIELD;
-	resp[2] = vwc << 2;
-	return 0;
-}
-
-static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *resp,
-					int len)
-{
-	if (len < MODE_PAGE_POW_CND_LEN)
-		return -EINVAL;
-
-	resp[0] = MODE_PAGE_POWER_CONDITION;
-	resp[1] = MODE_PAGE_POW_CND_LEN_FIELD;
-	/* All other bytes are zero */
-
-	return 0;
-}
-
-static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *resp,
-					int len)
-{
-	if (len < MODE_PAGE_INF_EXC_LEN)
-		return -EINVAL;
-
-	resp[0] = MODE_PAGE_INFO_EXCEP;
-	resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD;
-	resp[2] = 0x88;
-	/* All other bytes are zero */
-
-	return 0;
-}
-
-static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-				     u8 *resp, int len)
-{
-	int res;
-	u16 mode_pages_offset_1 = 0;
-	u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4;
-
-	mode_pages_offset_2 = mode_pages_offset_1 + MODE_PAGE_CACHING_LEN;
-	mode_pages_offset_3 = mode_pages_offset_2 + MODE_PAGE_CONTROL_LEN;
-	mode_pages_offset_4 = mode_pages_offset_3 + MODE_PAGE_POW_CND_LEN;
-
-	res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1],
-					MODE_PAGE_CACHING_LEN);
-	if (res)
-		return res;
-	res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2],
-					MODE_PAGE_CONTROL_LEN);
-	if (res)
-		return res;
-	res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3],
-					MODE_PAGE_POW_CND_LEN);
-	if (res)
-		return res;
-	return nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
-					MODE_PAGE_INF_EXC_LEN);
-}
-
-static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa)
-{
-	if (dbd == MODE_SENSE_BLK_DESC_ENABLED) {
-		/* SPC-4: len = 8 x Num_of_descriptors if llbaa = 0, 16x if 1 */
-		return 8 * (llbaa + 1) * MODE_SENSE_BLK_DESC_COUNT;
-	} else {
-		return 0;
-	}
-}
-
-static int nvme_trans_mode_page_create(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *cmd,
-					u16 alloc_len, u8 cdb10,
-					int (*mode_page_fill_func)
-					(struct nvme_ns *,
-					struct sg_io_hdr *hdr, u8 *, int),
-					u16 mode_pages_tot_len)
-{
-	int res;
-	int xfer_len;
-	u8 *response;
-	u8 dbd, llbaa;
-	u16 resp_size;
-	int mph_size;
-	u16 mode_pages_offset_1;
-	u16 blk_desc_len, blk_desc_offset, mode_data_length;
-
-	dbd = (cmd[1] & MODE_SENSE_DBD_MASK) >> MODE_SENSE_DBD_SHIFT;
-	llbaa = (cmd[1] & MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT;
-	mph_size = cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE;
-
-	blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa);
-
-	resp_size = mph_size + blk_desc_len + mode_pages_tot_len;
-	/* Refer spc4r34 Table 440 for calculation of Mode data Length field */
-	mode_data_length = 3 + (3 * cdb10) + blk_desc_len + mode_pages_tot_len;
-
-	blk_desc_offset = mph_size;
-	mode_pages_offset_1 = blk_desc_offset + blk_desc_len;
-
-	response = kzalloc(resp_size, GFP_KERNEL);
-	if (response == NULL) {
-		res = -ENOMEM;
-		goto out_mem;
-	}
-
-	res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
-					llbaa, mode_data_length, blk_desc_len);
-	if (res)
-		goto out_free;
-	if (blk_desc_len > 0) {
-		res = nvme_trans_fill_blk_desc(ns, hdr,
-					       &response[blk_desc_offset],
-					       blk_desc_len, llbaa);
-		if (res)
-			goto out_free;
-	}
-	res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1],
-					mode_pages_tot_len);
-	if (res)
-		goto out_free;
-
-	xfer_len = min(alloc_len, resp_size);
-	res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- out_free:
-	kfree(response);
- out_mem:
-	return res;
-}
-
-/* Read Capacity Helper Functions */
-
-static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns,
-								u8 cdb16)
-{
-	u8 flbas;
-	u32 lba_length;
-	u64 rlba;
-	u8 prot_en;
-	u8 p_type_lut[4] = {0, 0, 1, 2};
-	__be64 tmp_rlba;
-	__be32 tmp_rlba_32;
-	__be32 tmp_len;
-
-	flbas = (id_ns->flbas) & 0x0F;
-	lba_length = (1 << (id_ns->lbaf[flbas].ds));
-	rlba = le64_to_cpup(&id_ns->nsze) - 1;
-	(id_ns->dps) ? (prot_en = 0x01) : (prot_en = 0);
-
-	if (!cdb16) {
-		if (rlba > 0xFFFFFFFF)
-			rlba = 0xFFFFFFFF;
-		tmp_rlba_32 = cpu_to_be32(rlba);
-		tmp_len = cpu_to_be32(lba_length);
-		memcpy(response, &tmp_rlba_32, sizeof(u32));
-		memcpy(&response[4], &tmp_len, sizeof(u32));
-	} else {
-		tmp_rlba = cpu_to_be64(rlba);
-		tmp_len = cpu_to_be32(lba_length);
-		memcpy(response, &tmp_rlba, sizeof(u64));
-		memcpy(&response[8], &tmp_len, sizeof(u32));
-		response[12] = (p_type_lut[id_ns->dps & 0x3] << 1) | prot_en;
-		/* P_I_Exponent = 0x0 | LBPPBE = 0x0 */
-		/* LBPME = 0 | LBPRZ = 0 | LALBA = 0x00 */
-		/* Bytes 16-31 - Reserved */
-	}
-}
-
-/* Start Stop Unit Helper Functions */
-
-static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					u8 buffer_id)
-{
-	struct nvme_command c;
-	int nvme_sc;
-
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_admin_activate_fw;
-	c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV);
-
-	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
-	return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					u8 opcode, u32 tot_len, u32 offset,
-					u8 buffer_id)
-{
-	int nvme_sc;
-	struct nvme_command c;
-
-	if (hdr->iovec_count > 0) {
-		/* Assuming SGL is not allowed for this command */
-		return nvme_trans_completion(hdr,
-					SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST,
-					SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-	}
-
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_admin_download_fw;
-	c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
-	c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
-
-	nvme_sc = nvme_submit_user_cmd(ns->ctrl->admin_q, &c,
-			hdr->dxferp, tot_len, NULL, 0);
-	return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-/* Mode Select Helper Functions */
-
-static inline void nvme_trans_modesel_get_bd_len(u8 *parm_list, u8 cdb10,
-						u16 *bd_len, u8 *llbaa)
-{
-	if (cdb10) {
-		/* 10 Byte CDB */
-		*bd_len = (parm_list[MODE_SELECT_10_BD_OFFSET] << 8) +
-			parm_list[MODE_SELECT_10_BD_OFFSET + 1];
-		*llbaa = parm_list[MODE_SELECT_10_LLBAA_OFFSET] &
-				MODE_SELECT_10_LLBAA_MASK;
-	} else {
-		/* 6 Byte CDB */
-		*bd_len = parm_list[MODE_SELECT_6_BD_OFFSET];
-	}
-}
-
-static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list,
-					u16 idx, u16 bd_len, u8 llbaa)
-{
-	/* Store block descriptor info if a FORMAT UNIT comes later */
-	/* TODO Saving 1st BD info; what to do if multiple BD received? */
-	if (llbaa == 0) {
-		/* Standard Block Descriptor - spc4r34 7.5.5.1 */
-		ns->mode_select_num_blocks =
-				(parm_list[idx + 1] << 16) +
-				(parm_list[idx + 2] << 8) +
-				(parm_list[idx + 3]);
-
-		ns->mode_select_block_len =
-				(parm_list[idx + 5] << 16) +
-				(parm_list[idx + 6] << 8) +
-				(parm_list[idx + 7]);
-	} else {
-		/* Long LBA Block Descriptor - sbc3r27 6.4.2.3 */
-		ns->mode_select_num_blocks =
-				(((u64)parm_list[idx + 0]) << 56) +
-				(((u64)parm_list[idx + 1]) << 48) +
-				(((u64)parm_list[idx + 2]) << 40) +
-				(((u64)parm_list[idx + 3]) << 32) +
-				(((u64)parm_list[idx + 4]) << 24) +
-				(((u64)parm_list[idx + 5]) << 16) +
-				(((u64)parm_list[idx + 6]) << 8) +
-				((u64)parm_list[idx + 7]);
-
-		ns->mode_select_block_len =
-				(parm_list[idx + 12] << 24) +
-				(parm_list[idx + 13] << 16) +
-				(parm_list[idx + 14] << 8) +
-				(parm_list[idx + 15]);
-	}
-}
-
-static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					u8 *mode_page, u8 page_code)
-{
-	int res = 0;
-	int nvme_sc;
-	unsigned dword11;
-
-	switch (page_code) {
-	case MODE_PAGE_CACHING:
-		dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0);
-		nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC,
-					    dword11, NULL, 0, NULL);
-		res = nvme_trans_status_code(hdr, nvme_sc);
-		break;
-	case MODE_PAGE_CONTROL:
-		break;
-	case MODE_PAGE_POWER_CONDITION:
-		/* Verify the OS is not trying to set timers */
-		if ((mode_page[2] & 0x01) != 0 || (mode_page[3] & 0x0F) != 0) {
-			res = nvme_trans_completion(hdr,
-						SAM_STAT_CHECK_CONDITION,
-						ILLEGAL_REQUEST,
-						SCSI_ASC_INVALID_PARAMETER,
-						SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			break;
-		}
-		break;
-	default:
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		break;
-	}
-
-	return res;
-}
-
-static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					u8 *cmd, u16 parm_list_len, u8 pf,
-					u8 sp, u8 cdb10)
-{
-	int res;
-	u8 *parm_list;
-	u16 bd_len;
-	u8 llbaa = 0;
-	u16 index, saved_index;
-	u8 page_code;
-	u16 mp_size;
-
-	/* Get parm list from data-in/out buffer */
-	parm_list = kmalloc(parm_list_len, GFP_KERNEL);
-	if (parm_list == NULL) {
-		res = -ENOMEM;
-		goto out;
-	}
-
-	res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len);
-	if (res)
-		goto out_mem;
-
-	nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa);
-	index = (cdb10) ? (MODE_SELECT_10_MPH_SIZE) : (MODE_SELECT_6_MPH_SIZE);
-
-	if (bd_len != 0) {
-		/* Block Descriptors present, parse */
-		nvme_trans_modesel_save_bd(ns, parm_list, index, bd_len, llbaa);
-		index += bd_len;
-	}
-	saved_index = index;
-
-	/* Multiple mode pages may be present; iterate through all */
-	/* In 1st Iteration, don't do NVME Command, only check for CDB errors */
-	do {
-		page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
-		mp_size = parm_list[index + 1] + 2;
-		if ((page_code != MODE_PAGE_CACHING) &&
-		    (page_code != MODE_PAGE_CONTROL) &&
-		    (page_code != MODE_PAGE_POWER_CONDITION)) {
-			res = nvme_trans_completion(hdr,
-						SAM_STAT_CHECK_CONDITION,
-						ILLEGAL_REQUEST,
-						SCSI_ASC_INVALID_CDB,
-						SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			goto out_mem;
-		}
-		index += mp_size;
-	} while (index < parm_list_len);
-
-	/* In 2nd Iteration, do the NVME Commands */
-	index = saved_index;
-	do {
-		page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
-		mp_size = parm_list[index + 1] + 2;
-		res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index],
-								page_code);
-		if (res)
-			break;
-		index += mp_size;
-	} while (index < parm_list_len);
-
- out_mem:
-	kfree(parm_list);
- out:
-	return res;
-}
-
-/* Format Unit Helper Functions */
-
-static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
-					     struct sg_io_hdr *hdr)
-{
-	int res = 0;
-	int nvme_sc;
-	u8 flbas;
-
-	/*
-	 * SCSI Expects a MODE SELECT would have been issued prior to
-	 * a FORMAT UNIT, and the block size and number would be used
-	 * from the block descriptor in it. If a MODE SELECT had not
-	 * been issued, FORMAT shall use the current values for both.
-	 */
-
-	if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
-		struct nvme_id_ns *id_ns;
-
-		nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-		res = nvme_trans_status_code(hdr, nvme_sc);
-		if (res)
-			return res;
-
-		if (ns->mode_select_num_blocks == 0)
-			ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap);
-		if (ns->mode_select_block_len == 0) {
-			flbas = (id_ns->flbas) & 0x0F;
-			ns->mode_select_block_len =
-						(1 << (id_ns->lbaf[flbas].ds));
-		}
-
-		kfree(id_ns);
-	}
-
-	return 0;
-}
-
-static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
-					u8 format_prot_info, u8 *nvme_pf_code)
-{
-	int res;
-	u8 *parm_list;
-	u8 pf_usage, pf_code;
-
-	parm_list = kmalloc(len, GFP_KERNEL);
-	if (parm_list == NULL) {
-		res = -ENOMEM;
-		goto out;
-	}
-	res = nvme_trans_copy_from_user(hdr, parm_list, len);
-	if (res)
-		goto out_mem;
-
-	if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] &
-				FORMAT_UNIT_IMMED_MASK) != 0) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out_mem;
-	}
-
-	if (len == FORMAT_UNIT_LONG_PARM_LIST_LEN &&
-	    (parm_list[FORMAT_UNIT_PROT_INT_OFFSET] & 0x0F) != 0) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out_mem;
-	}
-	pf_usage = parm_list[FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET] &
-			FORMAT_UNIT_PROT_FIELD_USAGE_MASK;
-	pf_code = (pf_usage << 2) | format_prot_info;
-	switch (pf_code) {
-	case 0:
-		*nvme_pf_code = 0;
-		break;
-	case 2:
-		*nvme_pf_code = 1;
-		break;
-	case 3:
-		*nvme_pf_code = 2;
-		break;
-	case 7:
-		*nvme_pf_code = 3;
-		break;
-	default:
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		break;
-	}
-
- out_mem:
-	kfree(parm_list);
- out:
-	return res;
-}
-
-static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-				   u8 prot_info)
-{
-	int res;
-	int nvme_sc;
-	struct nvme_id_ns *id_ns;
-	u8 i;
-	u8 nlbaf;
-	u8 selected_lbaf = 0xFF;
-	u32 cdw10 = 0;
-	struct nvme_command c;
-
-	/* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
-	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;
-
-	nlbaf = id_ns->nlbaf;
-
-	for (i = 0; i < nlbaf; i++) {
-		if (ns->mode_select_block_len == (1 << (id_ns->lbaf[i].ds))) {
-			selected_lbaf = i;
-			break;
-		}
-	}
-	if (selected_lbaf > 0x0F) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-				ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
-				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-	}
-	if (ns->mode_select_num_blocks != le64_to_cpu(id_ns->ncap)) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-				ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
-				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-	}
-
-	cdw10 |= prot_info << 5;
-	cdw10 |= selected_lbaf & 0x0F;
-	memset(&c, 0, sizeof(c));
-	c.format.opcode = nvme_admin_format_nvm;
-	c.format.nsid = cpu_to_le32(ns->ns_id);
-	c.format.cdw10 = cpu_to_le32(cdw10);
-
-	nvme_sc = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-
-	kfree(id_ns);
-	return res;
-}
-
-static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr,
-					struct nvme_trans_io_cdb *cdb_info,
-					u32 max_blocks)
-{
-	/* If using iovecs, send one nvme command per vector */
-	if (hdr->iovec_count > 0)
-		return hdr->iovec_count;
-	else if (cdb_info->xfer_len > max_blocks)
-		return ((cdb_info->xfer_len - 1) / max_blocks) + 1;
-	else
-		return 1;
-}
-
-static u16 nvme_trans_io_get_control(struct nvme_ns *ns,
-					struct nvme_trans_io_cdb *cdb_info)
-{
-	u16 control = 0;
-
-	/* When Protection information support is added, implement here */
-
-	if (cdb_info->fua > 0)
-		control |= NVME_RW_FUA;
-
-	return control;
-}
-
-static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-				struct nvme_trans_io_cdb *cdb_info, u8 is_write)
-{
-	int nvme_sc = NVME_SC_SUCCESS;
-	u32 num_cmds;
-	u64 unit_len;
-	u64 unit_num_blocks;	/* Number of blocks to xfer in each nvme cmd */
-	u32 retcode;
-	u32 i = 0;
-	u64 nvme_offset = 0;
-	void __user *next_mapping_addr;
-	struct nvme_command c;
-	u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read);
-	u16 control;
-	u32 max_blocks = queue_max_hw_sectors(ns->queue) >> (ns->lba_shift - 9);
-
-	num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks);
-
-	/*
-	 * This loop handles two cases.
-	 * First, when an SGL is used in the form of an iovec list:
-	 *   - Use iov_base as the next mapping address for the nvme command_id
-	 *   - Use iov_len as the data transfer length for the command.
-	 * Second, when we have a single buffer
-	 *   - If larger than max_blocks, split into chunks, offset
-	 *        each nvme command accordingly.
-	 */
-	for (i = 0; i < num_cmds; i++) {
-		memset(&c, 0, sizeof(c));
-		if (hdr->iovec_count > 0) {
-			struct sg_iovec sgl;
-
-			retcode = copy_from_user(&sgl, hdr->dxferp +
-					i * sizeof(struct sg_iovec),
-					sizeof(struct sg_iovec));
-			if (retcode)
-				return -EFAULT;
-			unit_len = sgl.iov_len;
-			unit_num_blocks = unit_len >> ns->lba_shift;
-			next_mapping_addr = sgl.iov_base;
-		} else {
-			unit_num_blocks = min((u64)max_blocks,
-					(cdb_info->xfer_len - nvme_offset));
-			unit_len = unit_num_blocks << ns->lba_shift;
-			next_mapping_addr = hdr->dxferp +
-					((1 << ns->lba_shift) * nvme_offset);
-		}
-
-		c.rw.opcode = opcode;
-		c.rw.nsid = cpu_to_le32(ns->ns_id);
-		c.rw.slba = cpu_to_le64(cdb_info->lba + nvme_offset);
-		c.rw.length = cpu_to_le16(unit_num_blocks - 1);
-		control = nvme_trans_io_get_control(ns, cdb_info);
-		c.rw.control = cpu_to_le16(control);
-
-		if (get_capacity(ns->disk) - unit_num_blocks <
-				cdb_info->lba + nvme_offset) {
-			nvme_sc = NVME_SC_LBA_RANGE;
-			break;
-		}
-		nvme_sc = nvme_submit_user_cmd(ns->queue, &c,
-				next_mapping_addr, unit_len, NULL, 0);
-		if (nvme_sc)
-			break;
-
-		nvme_offset += unit_num_blocks;
-	}
-
-	return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-
-/* SCSI Command Translation Functions */
-
-static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
-							u8 *cmd)
-{
-	int res = 0;
-	struct nvme_trans_io_cdb cdb_info = { 0, };
-	u8 opcode = cmd[0];
-	u64 xfer_bytes;
-	u64 sum_iov_len = 0;
-	struct sg_iovec sgl;
-	int i;
-	size_t not_copied;
-
-	/*
-	 * The FUA and WPROTECT fields are not supported in 6-byte CDBs,
-	 * but always in the same place for all others.
-	 */
-	switch (opcode) {
-	case WRITE_6:
-	case READ_6:
-		break;
-	default:
-		cdb_info.fua = cmd[1] & 0x8;
-		cdb_info.prot_info = (cmd[1] & 0xe0) >> 5;
-		if (cdb_info.prot_info && !ns->pi_type) {
-			return nvme_trans_completion(hdr,
-					SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST,
-					SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		}
-	}
-
-	switch (opcode) {
-	case WRITE_6:
-	case READ_6:
-		cdb_info.lba = get_unaligned_be24(&cmd[1]);
-		cdb_info.xfer_len = cmd[4];
-		if (cdb_info.xfer_len == 0)
-			cdb_info.xfer_len = 256;
-		break;
-	case WRITE_10:
-	case READ_10:
-		cdb_info.lba = get_unaligned_be32(&cmd[2]);
-		cdb_info.xfer_len = get_unaligned_be16(&cmd[7]);
-		break;
-	case WRITE_12:
-	case READ_12:
-		cdb_info.lba = get_unaligned_be32(&cmd[2]);
-		cdb_info.xfer_len = get_unaligned_be32(&cmd[6]);
-		break;
-	case WRITE_16:
-	case READ_16:
-		cdb_info.lba = get_unaligned_be64(&cmd[2]);
-		cdb_info.xfer_len = get_unaligned_be32(&cmd[10]);
-		break;
-	default:
-		/* Will never really reach here */
-		res = -EIO;
-		goto out;
-	}
-
-	/* Calculate total length of transfer (in bytes) */
-	if (hdr->iovec_count > 0) {
-		for (i = 0; i < hdr->iovec_count; i++) {
-			not_copied = copy_from_user(&sgl, hdr->dxferp +
-						i * sizeof(struct sg_iovec),
-						sizeof(struct sg_iovec));
-			if (not_copied)
-				return -EFAULT;
-			sum_iov_len += sgl.iov_len;
-			/* IO vector sizes should be multiples of block size */
-			if (sgl.iov_len % (1 << ns->lba_shift) != 0) {
-				res = nvme_trans_completion(hdr,
-						SAM_STAT_CHECK_CONDITION,
-						ILLEGAL_REQUEST,
-						SCSI_ASC_INVALID_PARAMETER,
-						SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-				goto out;
-			}
-		}
-	} else {
-		sum_iov_len = hdr->dxfer_len;
-	}
-
-	/* As Per sg ioctl howto, if the lengths differ, use the lower one */
-	xfer_bytes = min(((u64)hdr->dxfer_len), sum_iov_len);
-
-	/* If block count and actual data buffer size dont match, error out */
-	if (xfer_bytes != (cdb_info.xfer_len << ns->lba_shift)) {
-		res = -EINVAL;
-		goto out;
-	}
-
-	/* Check for 0 length transfer - it is not illegal */
-	if (cdb_info.xfer_len == 0)
-		goto out;
-
-	/* Send NVMe IO Command(s) */
-	res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write);
-	if (res)
-		goto out;
-
- out:
-	return res;
-}
-
-static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	int res = 0;
-	u8 evpd;
-	u8 page_code;
-	int alloc_len;
-	u8 *inq_response;
-
-	evpd = cmd[1] & 0x01;
-	page_code = cmd[2];
-	alloc_len = get_unaligned_be16(&cmd[3]);
-
-	inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH),
-				GFP_KERNEL);
-	if (inq_response == NULL) {
-		res = -ENOMEM;
-		goto out_mem;
-	}
-
-	if (evpd == 0) {
-		if (page_code == INQ_STANDARD_INQUIRY_PAGE) {
-			res = nvme_trans_standard_inquiry_page(ns, hdr,
-						inq_response, alloc_len);
-		} else {
-			res = nvme_trans_completion(hdr,
-						SAM_STAT_CHECK_CONDITION,
-						ILLEGAL_REQUEST,
-						SCSI_ASC_INVALID_CDB,
-						SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		}
-	} else {
-		switch (page_code) {
-		case VPD_SUPPORTED_PAGES:
-			res = nvme_trans_supported_vpd_pages(ns, hdr,
-						inq_response, alloc_len);
-			break;
-		case VPD_SERIAL_NUMBER:
-			res = nvme_trans_unit_serial_page(ns, hdr, inq_response,
-								alloc_len);
-			break;
-		case VPD_DEVICE_IDENTIFIERS:
-			res = nvme_trans_device_id_page(ns, hdr, inq_response,
-								alloc_len);
-			break;
-		case VPD_EXTENDED_INQUIRY:
-			res = nvme_trans_ext_inq_page(ns, hdr, alloc_len);
-			break;
-		case VPD_BLOCK_LIMITS:
-			res = nvme_trans_bdev_limits_page(ns, hdr, inq_response,
-								alloc_len);
-			break;
-		case VPD_BLOCK_DEV_CHARACTERISTICS:
-			res = nvme_trans_bdev_char_page(ns, hdr, alloc_len);
-			break;
-		default:
-			res = nvme_trans_completion(hdr,
-						SAM_STAT_CHECK_CONDITION,
-						ILLEGAL_REQUEST,
-						SCSI_ASC_INVALID_CDB,
-						SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			break;
-		}
-	}
-	kfree(inq_response);
- out_mem:
-	return res;
-}
-
-static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	int res;
-	u16 alloc_len;
-	u8 pc;
-	u8 page_code;
-
-	if (cmd[1] != LOG_SENSE_CDB_SP_NOT_ENABLED) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out;
-	}
-
-	page_code = cmd[2] & LOG_SENSE_CDB_PAGE_CODE_MASK;
-	pc = (cmd[2] & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
-	if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out;
-	}
-	alloc_len = get_unaligned_be16(&cmd[7]);
-	switch (page_code) {
-	case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE:
-		res = nvme_trans_log_supp_pages(ns, hdr, alloc_len);
-		break;
-	case LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE:
-		res = nvme_trans_log_info_exceptions(ns, hdr, alloc_len);
-		break;
-	case LOG_PAGE_TEMPERATURE_PAGE:
-		res = nvme_trans_log_temperature(ns, hdr, alloc_len);
-		break;
-	default:
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		break;
-	}
-
- out:
-	return res;
-}
-
-static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	u8 cdb10 = 0;
-	u16 parm_list_len;
-	u8 page_format;
-	u8 save_pages;
-
-	page_format = cmd[1] & MODE_SELECT_CDB_PAGE_FORMAT_MASK;
-	save_pages = cmd[1] & MODE_SELECT_CDB_SAVE_PAGES_MASK;
-
-	if (cmd[0] == MODE_SELECT) {
-		parm_list_len = cmd[4];
-	} else {
-		parm_list_len = cmd[7];
-		cdb10 = 1;
-	}
-
-	if (parm_list_len != 0) {
-		/*
-		 * According to SPC-4 r24, a paramter list length field of 0
-		 * shall not be considered an error
-		 */
-		return nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
-						page_format, save_pages, cdb10);
-	}
-
-	return 0;
-}
-
-static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	int res = 0;
-	u16 alloc_len;
-	u8 cdb10 = 0;
-
-	if (cmd[0] == MODE_SENSE) {
-		alloc_len = cmd[4];
-	} else {
-		alloc_len = get_unaligned_be16(&cmd[7]);
-		cdb10 = 1;
-	}
-
-	if ((cmd[2] & MODE_SENSE_PAGE_CONTROL_MASK) !=
-			MODE_SENSE_PC_CURRENT_VALUES) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out;
-	}
-
-	switch (cmd[2] & MODE_SENSE_PAGE_CODE_MASK) {
-	case MODE_PAGE_CACHING:
-		res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-						cdb10,
-						&nvme_trans_fill_caching_page,
-						MODE_PAGE_CACHING_LEN);
-		break;
-	case MODE_PAGE_CONTROL:
-		res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-						cdb10,
-						&nvme_trans_fill_control_page,
-						MODE_PAGE_CONTROL_LEN);
-		break;
-	case MODE_PAGE_POWER_CONDITION:
-		res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-						cdb10,
-						&nvme_trans_fill_pow_cnd_page,
-						MODE_PAGE_POW_CND_LEN);
-		break;
-	case MODE_PAGE_INFO_EXCEP:
-		res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-						cdb10,
-						&nvme_trans_fill_inf_exc_page,
-						MODE_PAGE_INF_EXC_LEN);
-		break;
-	case MODE_PAGE_RETURN_ALL:
-		res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-						cdb10,
-						&nvme_trans_fill_all_pages,
-						MODE_PAGE_ALL_LEN);
-		break;
-	default:
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		break;
-	}
-
- out:
-	return res;
-}
-
-static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd, u8 cdb16)
-{
-	int res;
-	int nvme_sc;
-	u32 alloc_len;
-	u32 resp_size;
-	u32 xfer_len;
-	struct nvme_id_ns *id_ns;
-	u8 *response;
-
-	if (cdb16) {
-		alloc_len = get_unaligned_be32(&cmd[10]);
-		resp_size = READ_CAP_16_RESP_SIZE;
-	} else {
-		alloc_len = READ_CAP_10_RESP_SIZE;
-		resp_size = READ_CAP_10_RESP_SIZE;
-	}
-
-	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;	
-
-	response = kzalloc(resp_size, GFP_KERNEL);
-	if (response == NULL) {
-		res = -ENOMEM;
-		goto out_free_id;
-	}
-	nvme_trans_fill_read_cap(response, id_ns, cdb16);
-
-	xfer_len = min(alloc_len, resp_size);
-	res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
-	kfree(response);
- out_free_id:
-	kfree(id_ns);
-	return res;
-}
-
-static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	int res;
-	int nvme_sc;
-	u32 alloc_len, xfer_len, resp_size;
-	u8 *response;
-	struct nvme_id_ctrl *id_ctrl;
-	u32 ll_length, lun_id;
-	u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
-	__be32 tmp_len;
-
-	switch (cmd[2]) {
-	default:
-		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-	case ALL_LUNS_RETURNED:
-	case ALL_WELL_KNOWN_LUNS_RETURNED:
-	case RESTRICTED_LUNS_RETURNED:
-		nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
-		res = nvme_trans_status_code(hdr, nvme_sc);
-		if (res)
-			return res;
-
-		ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE;
-		resp_size = ll_length + LUN_DATA_HEADER_SIZE;
-
-		alloc_len = get_unaligned_be32(&cmd[6]);
-		if (alloc_len < resp_size) {
-			res = nvme_trans_completion(hdr,
-					SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			goto out_free_id;
-		}
-
-		response = kzalloc(resp_size, GFP_KERNEL);
-		if (response == NULL) {
-			res = -ENOMEM;
-			goto out_free_id;
-		}
-
-		/* The first LUN ID will always be 0 per the SAM spec */
-		for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) {
-			/*
-			 * Set the LUN Id and then increment to the next LUN
-			 * location in the parameter data.
-			 */
-			__be64 tmp_id = cpu_to_be64(lun_id);
-			memcpy(&response[lun_id_offset], &tmp_id, sizeof(u64));
-			lun_id_offset += LUN_ENTRY_SIZE;
-		}
-		tmp_len = cpu_to_be32(ll_length);
-		memcpy(response, &tmp_len, sizeof(u32));
-	}
-
-	xfer_len = min(alloc_len, resp_size);
-	res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
-	kfree(response);
- out_free_id:
-	kfree(id_ctrl);
-	return res;
-}
-
-static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	int res;
-	u8 alloc_len, xfer_len, resp_size;
-	u8 desc_format;
-	u8 *response;
-
-	desc_format = cmd[1] & 0x01;
-	alloc_len = cmd[4];
-
-	resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
-					(FIXED_FMT_SENSE_DATA_SIZE));
-	response = kzalloc(resp_size, GFP_KERNEL);
-	if (response == NULL) {
-		res = -ENOMEM;
-		goto out;
-	}
-
-	if (desc_format) {
-		/* Descriptor Format Sense Data */
-		response[0] = DESC_FORMAT_SENSE_DATA;
-		response[1] = NO_SENSE;
-		/* TODO How is LOW POWER CONDITION ON handled? (byte 2) */
-		response[2] = SCSI_ASC_NO_SENSE;
-		response[3] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		/* SDAT_OVFL = 0 | Additional Sense Length = 0 */
-	} else {
-		/* Fixed Format Sense Data */
-		response[0] = FIXED_SENSE_DATA;
-		/* Byte 1 = Obsolete */
-		response[2] = NO_SENSE; /* FM, EOM, ILI, SDAT_OVFL = 0 */
-		/* Bytes 3-6 - Information - set to zero */
-		response[7] = FIXED_SENSE_DATA_ADD_LENGTH;
-		/* Bytes 8-11 - Cmd Specific Information - set to zero */
-		response[12] = SCSI_ASC_NO_SENSE;
-		response[13] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		/* Byte 14 = Field Replaceable Unit Code = 0 */
-		/* Bytes 15-17 - SKSV=0; Sense Key Specific = 0 */
-	}
-
-	xfer_len = min(alloc_len, resp_size);
-	res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
-	kfree(response);
- out:
-	return res;
-}
-
-static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr)
-{
-	int nvme_sc;
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_cmd_flush;
-	c.common.nsid = cpu_to_le32(ns->ns_id);
-
-	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
-	return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	int res;
-	u8 parm_hdr_len = 0;
-	u8 nvme_pf_code = 0;
-	u8 format_prot_info, long_list, format_data;
-
-	format_prot_info = (cmd[1] & 0xc0) >> 6;
-	long_list = cmd[1] & 0x20;
-	format_data = cmd[1] & 0x10;
-
-	if (format_data != 0) {
-		if (format_prot_info != 0) {
-			if (long_list == 0)
-				parm_hdr_len = FORMAT_UNIT_SHORT_PARM_LIST_LEN;
-			else
-				parm_hdr_len = FORMAT_UNIT_LONG_PARM_LIST_LEN;
-		}
-	} else if (format_data == 0 && format_prot_info != 0) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out;
-	}
-
-	/* Get parm header from data-in/out buffer */
-	/*
-	 * According to the translation spec, the only fields in the parameter
-	 * list we are concerned with are in the header. So allocate only that.
-	 */
-	if (parm_hdr_len > 0) {
-		res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len,
-					format_prot_info, &nvme_pf_code);
-		if (res)
-			goto out;
-	}
-
-	/* Attempt to activate any previously downloaded firmware image */
-	res = nvme_trans_send_activate_fw_cmd(ns, hdr, 0);
-
-	/* Determine Block size and count and send format command */
-	res = nvme_trans_fmt_set_blk_size_count(ns, hdr);
-	if (res)
-		goto out;
-
-	res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code);
-
- out:
-	return res;
-}
-
-static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr,
-					u8 *cmd)
-{
-	if (nvme_ctrl_ready(ns->ctrl))
-		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					    NOT_READY, SCSI_ASC_LUN_NOT_READY,
-					    SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-	else
-		return nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
-}
-
-static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	int res = 0;
-	u32 buffer_offset, parm_list_length;
-	u8 buffer_id, mode;
-
-	parm_list_length = get_unaligned_be24(&cmd[6]);
-	if (parm_list_length % BYTES_TO_DWORDS != 0) {
-		/* NVMe expects Firmware file to be a whole number of DWORDS */
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out;
-	}
-	buffer_id = cmd[2];
-	if (buffer_id > NVME_MAX_FIRMWARE_SLOT) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out;
-	}
-	mode = cmd[1] & 0x1f;
-	buffer_offset = get_unaligned_be24(&cmd[3]);
-
-	switch (mode) {
-	case DOWNLOAD_SAVE_ACTIVATE:
-		res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
-						parm_list_length, buffer_offset,
-						buffer_id);
-		if (res)
-			goto out;
-		res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
-		break;
-	case DOWNLOAD_SAVE_DEFER_ACTIVATE:
-		res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
-						parm_list_length, buffer_offset,
-						buffer_id);
-		break;
-	case ACTIVATE_DEFERRED_MICROCODE:
-		res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
-		break;
-	default:
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		break;
-	}
-
- out:
-	return res;
-}
-
-struct scsi_unmap_blk_desc {
-	__be64	slba;
-	__be32	nlb;
-	u32	resv;
-};
-
-struct scsi_unmap_parm_list {
-	__be16	unmap_data_len;
-	__be16	unmap_blk_desc_data_len;
-	u32	resv;
-	struct scsi_unmap_blk_desc desc[0];
-};
-
-static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	struct scsi_unmap_parm_list *plist;
-	struct nvme_dsm_range *range;
-	struct nvme_command c;
-	int i, nvme_sc, res;
-	u16 ndesc, list_len;
-
-	list_len = get_unaligned_be16(&cmd[7]);
-	if (!list_len)
-		return -EINVAL;
-
-	plist = kmalloc(list_len, GFP_KERNEL);
-	if (!plist)
-		return -ENOMEM;
-
-	res = nvme_trans_copy_from_user(hdr, plist, list_len);
-	if (res)
-		goto out;
-
-	ndesc = be16_to_cpu(plist->unmap_blk_desc_data_len) >> 4;
-	if (!ndesc || ndesc > 256) {
-		res = -EINVAL;
-		goto out;
-	}
-
-	range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL);
-	if (!range) {
-		res = -ENOMEM;
-		goto out;
-	}
-
-	for (i = 0; i < ndesc; i++) {
-		range[i].nlb = cpu_to_le32(be32_to_cpu(plist->desc[i].nlb));
-		range[i].slba = cpu_to_le64(be64_to_cpu(plist->desc[i].slba));
-		range[i].cattr = 0;
-	}
-
-	memset(&c, 0, sizeof(c));
-	c.dsm.opcode = nvme_cmd_dsm;
-	c.dsm.nsid = cpu_to_le32(ns->ns_id);
-	c.dsm.nr = cpu_to_le32(ndesc - 1);
-	c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
-
-	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range,
-			ndesc * sizeof(*range));
-	res = nvme_trans_status_code(hdr, nvme_sc);
-
-	kfree(range);
- out:
-	kfree(plist);
-	return res;
-}
-
-static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
-{
-	u8 cmd[16];
-	int retcode;
-	unsigned int opcode;
-
-	if (hdr->cmdp == NULL)
-		return -EMSGSIZE;
-	if (hdr->cmd_len > sizeof(cmd))
-		return -EINVAL;
-	if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len))
-		return -EFAULT;
-
-	/*
-	 * Prime the hdr with good status for scsi commands that don't require
-	 * an nvme command for translation.
-	 */
-	retcode = nvme_trans_status_code(hdr, NVME_SC_SUCCESS);
-	if (retcode)
-		return retcode;
-
-	opcode = cmd[0];
-
-	switch (opcode) {
-	case READ_6:
-	case READ_10:
-	case READ_12:
-	case READ_16:
-		retcode = nvme_trans_io(ns, hdr, 0, cmd);
-		break;
-	case WRITE_6:
-	case WRITE_10:
-	case WRITE_12:
-	case WRITE_16:
-		retcode = nvme_trans_io(ns, hdr, 1, cmd);
-		break;
-	case INQUIRY:
-		retcode = nvme_trans_inquiry(ns, hdr, cmd);
-		break;
-	case LOG_SENSE:
-		retcode = nvme_trans_log_sense(ns, hdr, cmd);
-		break;
-	case MODE_SELECT:
-	case MODE_SELECT_10:
-		retcode = nvme_trans_mode_select(ns, hdr, cmd);
-		break;
-	case MODE_SENSE:
-	case MODE_SENSE_10:
-		retcode = nvme_trans_mode_sense(ns, hdr, cmd);
-		break;
-	case READ_CAPACITY:
-		retcode = nvme_trans_read_capacity(ns, hdr, cmd, 0);
-		break;
-	case SERVICE_ACTION_IN_16:
-		switch (cmd[1]) {
-		case SAI_READ_CAPACITY_16:
-			retcode = nvme_trans_read_capacity(ns, hdr, cmd, 1);
-			break;
-		default:
-			goto out;
-		}
-		break;
-	case REPORT_LUNS:
-		retcode = nvme_trans_report_luns(ns, hdr, cmd);
-		break;
-	case REQUEST_SENSE:
-		retcode = nvme_trans_request_sense(ns, hdr, cmd);
-		break;
-	case SYNCHRONIZE_CACHE:
-		retcode = nvme_trans_synchronize_cache(ns, hdr);
-		break;
-	case FORMAT_UNIT:
-		retcode = nvme_trans_format_unit(ns, hdr, cmd);
-		break;
-	case TEST_UNIT_READY:
-		retcode = nvme_trans_test_unit_ready(ns, hdr, cmd);
-		break;
-	case WRITE_BUFFER:
-		retcode = nvme_trans_write_buffer(ns, hdr, cmd);
-		break;
-	case UNMAP:
-		retcode = nvme_trans_unmap(ns, hdr, cmd);
-		break;
-	default:
- out:
-		retcode = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-				ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND,
-				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		break;
-	}
-	return retcode;
-}
-
-int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
-{
-	struct sg_io_hdr hdr;
-	int retcode;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-	if (copy_from_user(&hdr, u_hdr, sizeof(hdr)))
-		return -EFAULT;
-	if (hdr.interface_id != 'S')
-		return -EINVAL;
-
-	/*
-	 * A positive return code means a NVMe status, which has been
-	 * translated to sense data.
-	 */
-	retcode = nvme_scsi_translate(ns, &hdr);
-	if (retcode < 0)
-		return retcode;
-	if (copy_to_user(u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0)
-		return -EFAULT;
-	return 0;
-}
-
-int nvme_sg_get_version_num(int __user *ip)
-{
-	return put_user(sg_version_num, ip);
-}
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index ff1f97006322..35f930db3c02 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -336,7 +336,7 @@ out:
 
 static void nvmet_execute_identify_nslist(struct nvmet_req *req)
 {
-	static const int buf_size = 4096;
+	static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvmet_ns *ns;
 	u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
@@ -367,6 +367,64 @@ out:
 	nvmet_req_complete(req, status);
 }
 
+static u16 nvmet_copy_ns_identifier(struct nvmet_req *req, u8 type, u8 len,
+				    void *id, off_t *off)
+{
+	struct nvme_ns_id_desc desc = {
+		.nidt = type,
+		.nidl = len,
+	};
+	u16 status;
+
+	status = nvmet_copy_to_sgl(req, *off, &desc, sizeof(desc));
+	if (status)
+		return status;
+	*off += sizeof(desc);
+
+	status = nvmet_copy_to_sgl(req, *off, id, len);
+	if (status)
+		return status;
+	*off += len;
+
+	return 0;
+}
+
+static void nvmet_execute_identify_desclist(struct nvmet_req *req)
+{
+	struct nvmet_ns *ns;
+	u16 status = 0;
+	off_t off = 0;
+
+	ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
+	if (!ns) {
+		status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+		goto out;
+	}
+
+	if (memchr_inv(&ns->uuid, 0, sizeof(ns->uuid))) {
+		status = nvmet_copy_ns_identifier(req, NVME_NIDT_UUID,
+						  NVME_NIDT_UUID_LEN,
+						  &ns->uuid, &off);
+		if (status)
+			goto out_put_ns;
+	}
+	if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) {
+		status = nvmet_copy_ns_identifier(req, NVME_NIDT_NGUID,
+						  NVME_NIDT_NGUID_LEN,
+						  &ns->nguid, &off);
+		if (status)
+			goto out_put_ns;
+	}
+
+	if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off,
+			off) != NVME_IDENTIFY_DATA_SIZE - off)
+		status = NVME_SC_INTERNAL | NVME_SC_DNR;
+out_put_ns:
+	nvmet_put_namespace(ns);
+out:
+	nvmet_req_complete(req, status);
+}
+
 /*
  * A "mimimum viable" abort implementation: the command is mandatory in the
  * spec, but we are not required to do any useful work.  We couldn't really
@@ -504,7 +562,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
 		}
 		break;
 	case nvme_admin_identify:
-		req->data_len = 4096;
+		req->data_len = NVME_IDENTIFY_DATA_SIZE;
 		switch (cmd->identify.cns) {
 		case NVME_ID_CNS_NS:
 			req->execute = nvmet_execute_identify_ns;
@@ -515,6 +573,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
 		case NVME_ID_CNS_NS_ACTIVE_LIST:
 			req->execute = nvmet_execute_identify_nslist;
 			return 0;
+		case NVME_ID_CNS_NS_DESC_LIST:
+			req->execute = nvmet_execute_identify_desclist;
+			return 0;
 		}
 		break;
 	case nvme_admin_abort_cmd:
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index be8c800078e2..a358ecd93e11 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -305,11 +305,41 @@ out_unlock:
 
 CONFIGFS_ATTR(nvmet_ns_, device_path);
 
+static ssize_t nvmet_ns_device_uuid_show(struct config_item *item, char *page)
+{
+	return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->uuid);
+}
+
+static ssize_t nvmet_ns_device_uuid_store(struct config_item *item,
+					  const char *page, size_t count)
+{
+	struct nvmet_ns *ns = to_nvmet_ns(item);
+	struct nvmet_subsys *subsys = ns->subsys;
+	int ret = 0;
+
+
+	mutex_lock(&subsys->lock);
+	if (ns->enabled) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+
+	if (uuid_parse(page, &ns->uuid))
+		ret = -EINVAL;
+
+out_unlock:
+	mutex_unlock(&subsys->lock);
+	return ret ? ret : count;
+}
+
 static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page)
 {
 	return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid);
 }
 
+CONFIGFS_ATTR(nvmet_ns_, device_uuid);
+
 static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
 		const char *page, size_t count)
 {
@@ -379,6 +409,7 @@ CONFIGFS_ATTR(nvmet_ns_, enable);
 static struct configfs_attribute *nvmet_ns_attrs[] = {
 	&nvmet_ns_attr_device_path,
 	&nvmet_ns_attr_device_nguid,
+	&nvmet_ns_attr_device_uuid,
 	&nvmet_ns_attr_enable,
 	NULL,
 };
@@ -619,8 +650,45 @@ out_unlock:
 
 CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host);
 
+static ssize_t nvmet_subsys_version_show(struct config_item *item,
+					      char *page)
+{
+	struct nvmet_subsys *subsys = to_subsys(item);
+
+	if (NVME_TERTIARY(subsys->ver))
+		return snprintf(page, PAGE_SIZE, "%d.%d.%d\n",
+				(int)NVME_MAJOR(subsys->ver),
+				(int)NVME_MINOR(subsys->ver),
+				(int)NVME_TERTIARY(subsys->ver));
+	else
+		return snprintf(page, PAGE_SIZE, "%d.%d\n",
+				(int)NVME_MAJOR(subsys->ver),
+				(int)NVME_MINOR(subsys->ver));
+}
+
+static ssize_t nvmet_subsys_version_store(struct config_item *item,
+					       const char *page, size_t count)
+{
+	struct nvmet_subsys *subsys = to_subsys(item);
+	int major, minor, tertiary = 0;
+	int ret;
+
+
+	ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
+	if (ret != 2 && ret != 3)
+		return -EINVAL;
+
+	down_write(&nvmet_config_sem);
+	subsys->ver = NVME_VS(major, minor, tertiary);
+	up_write(&nvmet_config_sem);
+
+	return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, version);
+
 static struct configfs_attribute *nvmet_subsys_attrs[] = {
 	&nvmet_subsys_attr_attr_allow_any_host,
+	&nvmet_subsys_attr_version,
 	NULL,
 };
 
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index cf90713043da..b5b4ac103748 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -380,6 +380,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
 
 	ns->nsid = nsid;
 	ns->subsys = subsys;
+	uuid_gen(&ns->uuid);
 
 	return ns;
 }
@@ -529,6 +530,12 @@ fail:
 }
 EXPORT_SYMBOL_GPL(nvmet_req_init);
 
+void nvmet_req_uninit(struct nvmet_req *req)
+{
+	percpu_ref_put(&req->sq->ref);
+}
+EXPORT_SYMBOL_GPL(nvmet_req_uninit);
+
 static inline bool nvmet_cc_en(u32 cc)
 {
 	return cc & 0x1;
@@ -920,7 +927,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 	if (!subsys)
 		return NULL;
 
-	subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */
+	subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
 
 	switch (type) {
 	case NVME_NQN_NVME:
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 1aaf597e81fc..8f3b57b4c97b 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -53,7 +53,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
 	e->portid = port->disc_addr.portid;
 	/* we support only dynamic controllers */
 	e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC);
-	e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH);
+	e->asqsz = cpu_to_le16(NVME_AQ_DEPTH);
 	e->subtype = type;
 	memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
 	memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
@@ -185,7 +185,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 		}
 	case nvme_admin_identify:
-		req->data_len = 4096;
+		req->data_len = NVME_IDENTIFY_DATA_SIZE;
 		switch (cmd->identify.cns) {
 		case NVME_ID_CNS_CTRL:
 			req->execute =
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 62eba29c85fb..7692a96c9065 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -517,9 +517,7 @@ nvmet_fc_queue_to_cpu(struct nvmet_fc_tgtport *tgtport, int qid)
 {
 	int cpu, idx, cnt;
 
-	if (!(tgtport->ops->target_features &
-			NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED) ||
-	    tgtport->ops->max_hw_queues == 1)
+	if (tgtport->ops->max_hw_queues == 1)
 		return WORK_CPU_UNBOUND;
 
 	/* Simple cpu selection based on qid modulo active cpu count */
@@ -2098,20 +2096,22 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
 	/* clear any response payload */
 	memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf));
 
+	fod->data_sg = NULL;
+	fod->data_sg_cnt = 0;
+
 	ret = nvmet_req_init(&fod->req,
 				&fod->queue->nvme_cq,
 				&fod->queue->nvme_sq,
 				&nvmet_fc_tgt_fcp_ops);
-	if (!ret) {	/* bad SQE content or invalid ctrl state */
-		nvmet_fc_abort_op(tgtport, fod);
+	if (!ret) {
+		/* bad SQE content or invalid ctrl state */
+		/* nvmet layer has already called op done to send rsp. */
 		return;
 	}
 
 	/* keep a running counter of tail position */
 	atomic_inc(&fod->queue->sqtail);
 
-	fod->data_sg = NULL;
-	fod->data_sg_cnt = 0;
 	if (fod->total_length) {
 		ret = nvmet_fc_alloc_tgt_pgs(fod);
 		if (ret) {
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 15551ef79c8c..1bb9d5b311b1 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -569,7 +569,6 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 			struct nvmefc_tgt_fcp_req *tgt_fcpreq)
 {
 	struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
-	int active;
 
 	/*
 	 * mark aborted only in case there were 2 threads in transport
@@ -577,7 +576,6 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 	 * after the abort request
 	 */
 	spin_lock(&tfcp_req->reqlock);
-	active = tfcp_req->active;
 	tfcp_req->aborted = true;
 	spin_unlock(&tfcp_req->reqlock);
 
@@ -698,7 +696,6 @@ static struct nvmet_fc_target_template tgttemplate = {
 	.dma_boundary		= FCLOOP_DMABOUND_4G,
 	/* optional features */
 	.target_features	= NVMET_FCTGTFEAT_CMD_IN_ISR |
-				  NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED |
 				  NVMET_FCTGTFEAT_OPDONE_IN_ISR,
 	/* sizes of additional private data for data structures */
 	.target_priv_sz		= sizeof(struct fcloop_tport),
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index c77940d80fc8..40128793e613 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -21,7 +21,7 @@ static void nvmet_bio_done(struct bio *bio)
 	struct nvmet_req *req = bio->bi_private;
 
 	nvmet_req_complete(req,
-		bio->bi_error ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+		bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
 
 	if (bio != &req->inline_bio)
 		bio_put(bio);
@@ -145,7 +145,7 @@ static void nvmet_execute_discard(struct nvmet_req *req)
 		bio->bi_private = req;
 		bio->bi_end_io = nvmet_bio_done;
 		if (status) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 		} else {
 			submit_bio(bio);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index feb497134aee..5f55c683b338 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -21,8 +21,6 @@
 #include "../host/nvme.h"
 #include "../host/fabrics.h"
 
-#define NVME_LOOP_AQ_DEPTH		256
-
 #define NVME_LOOP_MAX_SEGMENTS		256
 
 /*
@@ -31,7 +29,7 @@
  */
 #define NVME_LOOP_NR_AEN_COMMANDS	1
 #define NVME_LOOP_AQ_BLKMQ_DEPTH	\
-	(NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
+	(NVME_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
 
 struct nvme_loop_iod {
 	struct nvme_request	nvme_req;
@@ -45,7 +43,6 @@ struct nvme_loop_iod {
 };
 
 struct nvme_loop_ctrl {
-	spinlock_t		lock;
 	struct nvme_loop_queue	*queues;
 	u32			queue_count;
 
@@ -59,7 +56,6 @@ struct nvme_loop_ctrl {
 
 	struct nvmet_ctrl	*target_ctrl;
 	struct work_struct	delete_work;
-	struct work_struct	reset_work;
 };
 
 static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
@@ -151,7 +147,7 @@ nvme_loop_timeout(struct request *rq, bool reserved)
 	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq);
 
 	/* queue error recovery */
-	schedule_work(&iod->queue->ctrl->reset_work);
+	nvme_reset_ctrl(&iod->queue->ctrl->ctrl);
 
 	/* fail with DNR on admin cmd timeout */
 	nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
@@ -159,17 +155,17 @@ nvme_loop_timeout(struct request *rq, bool reserved)
 	return BLK_EH_HANDLED;
 }
 
-static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct nvme_ns *ns = hctx->queue->queuedata;
 	struct nvme_loop_queue *queue = hctx->driver_data;
 	struct request *req = bd->rq;
 	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
-	int ret;
+	blk_status_t ret;
 
 	ret = nvme_setup_cmd(ns, req, &iod->cmd);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		return ret;
 
 	iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
@@ -179,16 +175,15 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		nvme_cleanup_cmd(req);
 		blk_mq_start_request(req);
 		nvme_loop_queue_response(&iod->req);
-		return BLK_MQ_RQ_QUEUE_OK;
+		return BLK_STS_OK;
 	}
 
 	if (blk_rq_bytes(req)) {
 		iod->sg_table.sgl = iod->first_sgl;
-		ret = sg_alloc_table_chained(&iod->sg_table,
+		if (sg_alloc_table_chained(&iod->sg_table,
 				blk_rq_nr_phys_segments(req),
-				iod->sg_table.sgl);
-		if (ret)
-			return BLK_MQ_RQ_QUEUE_BUSY;
+				iod->sg_table.sgl))
+			return BLK_STS_RESOURCE;
 
 		iod->req.sg = iod->sg_table.sgl;
 		iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
@@ -197,7 +192,7 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(req);
 
 	schedule_work(&iod->work);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
@@ -234,15 +229,10 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set,
 		struct request *req, unsigned int hctx_idx,
 		unsigned int numa_node)
 {
-	return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req),
-			hctx_idx + 1);
-}
+	struct nvme_loop_ctrl *ctrl = set->driver_data;
 
-static int nvme_loop_init_admin_request(struct blk_mq_tag_set *set,
-		struct request *req, unsigned int hctx_idx,
-		unsigned int numa_node)
-{
-	return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), 0);
+	return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req),
+			(set == &ctrl->tag_set) ? hctx_idx + 1 : 0);
 }
 
 static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -280,7 +270,7 @@ static const struct blk_mq_ops nvme_loop_mq_ops = {
 static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
 	.queue_rq	= nvme_loop_queue_rq,
 	.complete	= nvme_loop_complete_rq,
-	.init_request	= nvme_loop_init_admin_request,
+	.init_request	= nvme_loop_init_request,
 	.init_hctx	= nvme_loop_init_admin_hctx,
 	.timeout	= nvme_loop_timeout,
 };
@@ -467,7 +457,7 @@ static int __nvme_loop_del_ctrl(struct nvme_loop_ctrl *ctrl)
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
 		return -EBUSY;
 
-	if (!schedule_work(&ctrl->delete_work))
+	if (!queue_work(nvme_wq, &ctrl->delete_work))
 		return -EBUSY;
 
 	return 0;
@@ -501,8 +491,8 @@ static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl)
 
 static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 {
-	struct nvme_loop_ctrl *ctrl = container_of(work,
-					struct nvme_loop_ctrl, reset_work);
+	struct nvme_loop_ctrl *ctrl =
+		container_of(work, struct nvme_loop_ctrl, ctrl.reset_work);
 	bool changed;
 	int ret;
 
@@ -540,33 +530,16 @@ out_disable:
 	nvme_put_ctrl(&ctrl->ctrl);
 }
 
-static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
-{
-	struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
-
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
-		return -EBUSY;
-
-	if (!schedule_work(&ctrl->reset_work))
-		return -EBUSY;
-
-	flush_work(&ctrl->reset_work);
-
-	return 0;
-}
-
 static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
 	.name			= "loop",
 	.module			= THIS_MODULE,
-	.is_fabrics		= true,
+	.flags			= NVME_F_FABRICS,
 	.reg_read32		= nvmf_reg_read32,
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
-	.reset_ctrl		= nvme_loop_reset_ctrl,
 	.free_ctrl		= nvme_loop_free_ctrl,
 	.submit_async_event	= nvme_loop_submit_async_event,
 	.delete_ctrl		= nvme_loop_del_ctrl,
-	.get_subsysnqn		= nvmf_get_subsysnqn,
 };
 
 static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
@@ -629,15 +602,13 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 	INIT_LIST_HEAD(&ctrl->list);
 
 	INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work);
-	INIT_WORK(&ctrl->reset_work, nvme_loop_reset_ctrl_work);
+	INIT_WORK(&ctrl->ctrl.reset_work, nvme_loop_reset_ctrl_work);
 
 	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
 				0 /* no quirks, we're perfect! */);
 	if (ret)
 		goto out_put_ctrl;
 
-	spin_lock_init(&ctrl->lock);
-
 	ret = -ENOMEM;
 
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -766,7 +737,7 @@ static void __exit nvme_loop_cleanup_module(void)
 		__nvme_loop_del_ctrl(ctrl);
 	mutex_unlock(&nvme_loop_ctrl_mutex);
 
-	flush_scheduled_work();
+	flush_workqueue(nvme_wq);
 }
 
 module_init(nvme_loop_init_module);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 7cb77ba5993b..747bbdb4f9c6 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -21,6 +21,7 @@
 #include <linux/percpu-refcount.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/uuid.h>
 #include <linux/nvme.h>
 #include <linux/configfs.h>
 #include <linux/rcupdate.h>
@@ -46,6 +47,7 @@ struct nvmet_ns {
 	u32			blksize_shift;
 	loff_t			size;
 	u8			nguid[16];
+	uuid_t			uuid;
 
 	bool			enabled;
 	struct nvmet_subsys	*subsys;
@@ -261,6 +263,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
 
 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 		struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops);
+void nvmet_req_uninit(struct nvmet_req *req);
 void nvmet_req_complete(struct nvmet_req *req, u16 status);
 
 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 99c69018a35f..56a4cba690b5 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -567,6 +567,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
 	rsp->n_rdma = 0;
 
 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		nvmet_req_uninit(&rsp->req);
 		nvmet_rdma_release_rsp(rsp);
 		if (wc->status != IB_WC_WR_FLUSH_ERR) {
 			pr_info("RDMA READ for CQE 0x%p failed with status %s (%d).\n",
@@ -1026,7 +1027,7 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
 	queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
 	queue->send_queue_size = le16_to_cpu(req->hrqsize);
 
-	if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH)
+	if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH)
 		return NVME_RDMA_CM_INVALID_HSQSIZE;
 
 	/* XXX: Should we enforce some kind of max for IO queues? */
@@ -1306,53 +1307,44 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
 
 /**
  * nvme_rdma_device_removal() - Handle RDMA device removal
+ * @cm_id:	rdma_cm id, used for nvmet port
  * @queue:      nvmet rdma queue (cm id qp_context)
- * @addr:	nvmet address (cm_id context)
  *
  * DEVICE_REMOVAL event notifies us that the RDMA device is about
- * to unplug so we should take care of destroying our RDMA resources.
- * This event will be generated for each allocated cm_id.
+ * to unplug. Note that this event can be generated on a normal
+ * queue cm_id and/or a device bound listener cm_id (where in this
+ * case queue will be null).
  *
- * Note that this event can be generated on a normal queue cm_id
- * and/or a device bound listener cm_id (where in this case
- * queue will be null).
- *
- * we claim ownership on destroying the cm_id. For queues we move
- * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
+ * We registered an ib_client to handle device removal for queues,
+ * so we only need to handle the listening port cm_ids. In this case
  * we nullify the priv to prevent double cm_id destruction and destroying
  * the cm_id implicitely by returning a non-zero rc to the callout.
  */
 static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
 		struct nvmet_rdma_queue *queue)
 {
-	unsigned long flags;
-
-	if (!queue) {
-		struct nvmet_port *port = cm_id->context;
+	struct nvmet_port *port;
 
+	if (queue) {
 		/*
-		 * This is a listener cm_id. Make sure that
-		 * future remove_port won't invoke a double
-		 * cm_id destroy. use atomic xchg to make sure
-		 * we don't compete with remove_port.
-		 */
-		if (xchg(&port->priv, NULL) != cm_id)
-			return 0;
-	} else {
-		/*
-		 * This is a queue cm_id. Make sure that
-		 * release queue will not destroy the cm_id
-		 * and schedule all ctrl queues removal (only
-		 * if the queue is not disconnecting already).
+		 * This is a queue cm_id. we have registered
+		 * an ib_client to handle queues removal
+		 * so don't interfear and just return.
 		 */
-		spin_lock_irqsave(&queue->state_lock, flags);
-		if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
-			queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
-		spin_unlock_irqrestore(&queue->state_lock, flags);
-		nvmet_rdma_queue_disconnect(queue);
-		flush_scheduled_work();
+		return 0;
 	}
 
+	port = cm_id->context;
+
+	/*
+	 * This is a listener cm_id. Make sure that
+	 * future remove_port won't invoke a double
+	 * cm_id destroy. use atomic xchg to make sure
+	 * we don't compete with remove_port.
+	 */
+	if (xchg(&port->priv, NULL) != cm_id)
+		return 0;
+
 	/*
 	 * We need to return 1 so that the core will destroy
 	 * it's own ID.  What a great API design..
@@ -1518,9 +1510,51 @@ static struct nvmet_fabrics_ops nvmet_rdma_ops = {
 	.delete_ctrl		= nvmet_rdma_delete_ctrl,
 };
 
+static void nvmet_rdma_add_one(struct ib_device *ib_device)
+{
+}
+
+static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
+{
+	struct nvmet_rdma_queue *queue;
+
+	/* Device is being removed, delete all queues using this device */
+	mutex_lock(&nvmet_rdma_queue_mutex);
+	list_for_each_entry(queue, &nvmet_rdma_queue_list, queue_list) {
+		if (queue->dev->device != ib_device)
+			continue;
+
+		pr_info("Removing queue %d\n", queue->idx);
+		__nvmet_rdma_queue_disconnect(queue);
+	}
+	mutex_unlock(&nvmet_rdma_queue_mutex);
+
+	flush_scheduled_work();
+}
+
+static struct ib_client nvmet_rdma_ib_client = {
+	.name   = "nvmet_rdma",
+	.add = nvmet_rdma_add_one,
+	.remove = nvmet_rdma_remove_one
+};
+
 static int __init nvmet_rdma_init(void)
 {
-	return nvmet_register_transport(&nvmet_rdma_ops);
+	int ret;
+
+	ret = ib_register_client(&nvmet_rdma_ib_client);
+	if (ret)
+		return ret;
+
+	ret = nvmet_register_transport(&nvmet_rdma_ops);
+	if (ret)
+		goto err_ib_client;
+
+	return 0;
+
+err_ib_client:
+	ib_unregister_client(&nvmet_rdma_ib_client);
+	return ret;
 }
 
 static void __exit nvmet_rdma_exit(void)
@@ -1543,6 +1577,7 @@ static void __exit nvmet_rdma_exit(void)
 	mutex_unlock(&nvmet_rdma_queue_mutex);
 
 	flush_scheduled_work();
+	ib_unregister_client(&nvmet_rdma_ib_client);
 	ida_destroy(&nvmet_rdma_queue_ida);
 }
 
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 9416d052cb89..28c38c756f92 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -144,8 +144,8 @@ int of_dma_configure(struct device *dev, struct device_node *np)
 		coherent ? " " : " not ");
 
 	iommu = of_iommu_configure(dev, np);
-	if (IS_ERR(iommu))
-		return PTR_ERR(iommu);
+	if (IS_ERR(iommu) && PTR_ERR(iommu) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
 
 	dev_dbg(dev, "device is%sbehind an iommu\n",
 		iommu ? " " : " not ");
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index a0972219ccfc..43bd69dceabf 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -507,6 +507,9 @@ void *__unflatten_device_tree(const void *blob,
 
 	/* Allocate memory for the expanded device tree */
 	mem = dt_alloc(size + 4, __alignof__(struct device_node));
+	if (!mem)
+		return NULL;
+
 	memset(mem, 0, size);
 
 	*(__be32 *)(mem + size) = cpu_to_be32(0xdeadbeef);
@@ -756,6 +759,36 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 }
 
 /**
+ * of_scan_flat_dt_subnodes - scan sub-nodes of a node call callback on each.
+ * @it: callback function
+ * @data: context data pointer
+ *
+ * This function is used to scan sub-nodes of a node.
+ */
+int __init of_scan_flat_dt_subnodes(unsigned long parent,
+				    int (*it)(unsigned long node,
+					      const char *uname,
+					      void *data),
+				    void *data)
+{
+	const void *blob = initial_boot_params;
+	int node;
+
+	fdt_for_each_subnode(node, blob, parent) {
+		const char *pathp;
+		int rc;
+
+		pathp = fdt_get_name(blob, node, NULL);
+		if (*pathp == '/')
+			pathp = kbasename(pathp);
+		rc = it(node, pathp, data);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+/**
  * of_get_flat_dt_subnode_by_name - get the subnode by given name
  *
  * @node: the parent node
@@ -814,6 +847,14 @@ int __init of_flat_dt_match(unsigned long node, const char *const *compat)
 	return of_fdt_match(initial_boot_params, node, compat);
 }
 
+/**
+ * of_get_flat_dt_prop - Given a node in the flat blob, return the phandle
+ */
+uint32_t __init of_get_flat_dt_phandle(unsigned long node)
+{
+	return fdt_get_phandle(initial_boot_params, node);
+}
+
 struct fdt_scan_status {
 	const char *name;
 	int namelen;
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 4dec07ea510f..d507c3569a88 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -197,7 +197,7 @@ static int __init __reserved_mem_init_node(struct reserved_mem *rmem)
 	const struct of_device_id *i;
 
 	for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) {
-		int const (*initfn)(struct reserved_mem *rmem) = i->data;
+		reservedmem_of_init_fn initfn = i->data;
 		const char *compat = i->compatible;
 
 		if (!of_flat_dt_is_compatible(rmem->fdt_node, compat))
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 71fecc2debfc..703a42118ffc 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -523,7 +523,7 @@ static int __init of_platform_default_populate_init(void)
 arch_initcall_sync(of_platform_default_populate_init);
 #endif
 
-static int of_platform_device_destroy(struct device *dev, void *data)
+int of_platform_device_destroy(struct device *dev, void *data)
 {
 	/* Do not touch devices not populated from the device tree */
 	if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED))
@@ -544,6 +544,7 @@ static int of_platform_device_destroy(struct device *dev, void *data)
 	of_node_clear_flag(dev->of_node, OF_POPULATED_BUS);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(of_platform_device_destroy);
 
 /**
  * of_platform_depopulate() - Remove devices populated from device tree
diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 9d42dfe65d44..5548193a28a6 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -3150,13 +3150,13 @@ static char *irq[PARPORT_PC_MAX_PORTS];
 static char *dma[PARPORT_PC_MAX_PORTS];
 
 MODULE_PARM_DESC(io, "Base I/O address (SPP regs)");
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_PARM_DESC(io_hi, "Base I/O address (ECR)");
-module_param_array(io_hi, int, NULL, 0);
+module_param_hw_array(io_hi, int, ioport, NULL, 0);
 MODULE_PARM_DESC(irq, "IRQ line");
-module_param_array(irq, charp, NULL, 0);
+module_param_hw_array(irq, charp, irq, NULL, 0);
 MODULE_PARM_DESC(dma, "DMA channel");
-module_param_array(dma, charp, NULL, 0);
+module_param_hw_array(dma, charp, dma, NULL, 0);
 #if defined(CONFIG_PARPORT_PC_SUPERIO) || \
        (defined(CONFIG_PARPORT_1284) && defined(CONFIG_PARPORT_PC_FIFO))
 MODULE_PARM_DESC(verbose_probing, "Log chit-chat during initialisation");
diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 74cf5fffb1e1..c80e37a69305 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -896,7 +896,7 @@ int pci_read_config_byte(const struct pci_dev *dev, int where, u8 *val)
 {
 	if (pci_dev_is_disconnected(dev)) {
 		*val = ~0;
-		return -ENODEV;
+		return PCIBIOS_DEVICE_NOT_FOUND;
 	}
 	return pci_bus_read_config_byte(dev->bus, dev->devfn, where, val);
 }
@@ -906,7 +906,7 @@ int pci_read_config_word(const struct pci_dev *dev, int where, u16 *val)
 {
 	if (pci_dev_is_disconnected(dev)) {
 		*val = ~0;
-		return -ENODEV;
+		return PCIBIOS_DEVICE_NOT_FOUND;
 	}
 	return pci_bus_read_config_word(dev->bus, dev->devfn, where, val);
 }
@@ -917,7 +917,7 @@ int pci_read_config_dword(const struct pci_dev *dev, int where,
 {
 	if (pci_dev_is_disconnected(dev)) {
 		*val = ~0;
-		return -ENODEV;
+		return PCIBIOS_DEVICE_NOT_FOUND;
 	}
 	return pci_bus_read_config_dword(dev->bus, dev->devfn, where, val);
 }
@@ -926,7 +926,7 @@ EXPORT_SYMBOL(pci_read_config_dword);
 int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val)
 {
 	if (pci_dev_is_disconnected(dev))
-		return -ENODEV;
+		return PCIBIOS_DEVICE_NOT_FOUND;
 	return pci_bus_write_config_byte(dev->bus, dev->devfn, where, val);
 }
 EXPORT_SYMBOL(pci_write_config_byte);
@@ -934,7 +934,7 @@ EXPORT_SYMBOL(pci_write_config_byte);
 int pci_write_config_word(const struct pci_dev *dev, int where, u16 val)
 {
 	if (pci_dev_is_disconnected(dev))
-		return -ENODEV;
+		return PCIBIOS_DEVICE_NOT_FOUND;
 	return pci_bus_write_config_word(dev->bus, dev->devfn, where, val);
 }
 EXPORT_SYMBOL(pci_write_config_word);
@@ -943,7 +943,7 @@ int pci_write_config_dword(const struct pci_dev *dev, int where,
 					 u32 val)
 {
 	if (pci_dev_is_disconnected(dev))
-		return -ENODEV;
+		return PCIBIOS_DEVICE_NOT_FOUND;
 	return pci_bus_write_config_dword(dev->bus, dev->devfn, where, val);
 }
 EXPORT_SYMBOL(pci_write_config_dword);
diff --git a/drivers/pci/dwc/pci-imx6.c b/drivers/pci/dwc/pci-imx6.c
index a98cba55c7f0..19a289b8cc94 100644
--- a/drivers/pci/dwc/pci-imx6.c
+++ b/drivers/pci/dwc/pci-imx6.c
@@ -252,7 +252,34 @@ static void imx6_pcie_reset_phy(struct imx6_pcie *imx6_pcie)
 static int imx6q_pcie_abort_handler(unsigned long addr,
 		unsigned int fsr, struct pt_regs *regs)
 {
-	return 0;
+	unsigned long pc = instruction_pointer(regs);
+	unsigned long instr = *(unsigned long *)pc;
+	int reg = (instr >> 12) & 15;
+
+	/*
+	 * If the instruction being executed was a read,
+	 * make it look like it read all-ones.
+	 */
+	if ((instr & 0x0c100000) == 0x04100000) {
+		unsigned long val;
+
+		if (instr & 0x00400000)
+			val = 255;
+		else
+			val = -1;
+
+		regs->uregs[reg] = val;
+		regs->ARM_pc += 4;
+		return 0;
+	}
+
+	if ((instr & 0x0e100090) == 0x00100090) {
+		regs->uregs[reg] = -1;
+		regs->ARM_pc += 4;
+		return 0;
+	}
+
+	return 1;
 }
 
 static void imx6_pcie_assert_core_reset(struct imx6_pcie *imx6_pcie)
@@ -819,8 +846,8 @@ static int __init imx6_pcie_init(void)
 	 * we can install the handler here without risking it
 	 * accessing some uninitialized driver state.
 	 */
-	hook_fault_code(16 + 6, imx6q_pcie_abort_handler, SIGBUS, 0,
-			"imprecise external abort");
+	hook_fault_code(8, imx6q_pcie_abort_handler, SIGBUS, 0,
+			"external abort on non-linefetch");
 
 	return platform_driver_register(&imx6_pcie_driver);
 }
diff --git a/drivers/pci/endpoint/Kconfig b/drivers/pci/endpoint/Kconfig
index c23f146fb5a6..c09623ca8c3b 100644
--- a/drivers/pci/endpoint/Kconfig
+++ b/drivers/pci/endpoint/Kconfig
@@ -6,6 +6,7 @@ menu "PCI Endpoint"
 
 config PCI_ENDPOINT
 	bool "PCI Endpoint Support"
+	depends on HAS_DMA
 	help
 	   Enable this configuration option to support configurable PCI
 	   endpoint. This should be enabled if the platform has a PCI
diff --git a/drivers/pci/endpoint/functions/Kconfig b/drivers/pci/endpoint/functions/Kconfig
index 175edad42d2f..2942066607e0 100644
--- a/drivers/pci/endpoint/functions/Kconfig
+++ b/drivers/pci/endpoint/functions/Kconfig
@@ -5,6 +5,7 @@
 config PCI_EPF_TEST
 	tristate "PCI Endpoint Test driver"
 	depends on PCI_ENDPOINT
+	select CRC32
 	help
 	   Enable this configuration option to enable the test driver
 	   for PCI Endpoint.
diff --git a/drivers/pci/hotplug/cpcihp_generic.c b/drivers/pci/hotplug/cpcihp_generic.c
index 88a44a707b96..bbf9cf8aeaad 100644
--- a/drivers/pci/hotplug/cpcihp_generic.c
+++ b/drivers/pci/hotplug/cpcihp_generic.c
@@ -220,7 +220,7 @@ module_param(first_slot, byte, 0);
 MODULE_PARM_DESC(first_slot, "Hotswap bus first slot number");
 module_param(last_slot, byte, 0);
 MODULE_PARM_DESC(last_slot, "Hotswap bus last slot number");
-module_param(port, ushort, 0);
+module_param_hw(port, ushort, ioport, 0);
 MODULE_PARM_DESC(port, "#ENUM signal I/O port");
 module_param(enum_bit, uint, 0);
 MODULE_PARM_DESC(enum_bit, "#ENUM signal bit (0-7)");
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 001860361434..47070cff508c 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -21,13 +21,12 @@
 #include "pci.h"
 
 /*
- * The UUID is defined in the PCI Firmware Specification available here:
+ * The GUID is defined in the PCI Firmware Specification available here:
  * https://www.pcisig.com/members/downloads/pcifw_r3_1_13Dec10.pdf
  */
-const u8 pci_acpi_dsm_uuid[] = {
-	0xd0, 0x37, 0xc9, 0xe5, 0x53, 0x35, 0x7a, 0x4d,
-	0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d
-};
+const guid_t pci_acpi_dsm_guid =
+	GUID_INIT(0xe5c937d0, 0x3553, 0x4d7a,
+		  0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d);
 
 #if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64)
 static int acpi_get_rc_addr(struct acpi_device *adev, struct resource *res)
@@ -680,7 +679,7 @@ void acpi_pci_add_bus(struct pci_bus *bus)
 	if (!pci_is_root_bus(bus))
 		return;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), pci_acpi_dsm_uuid, 3,
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), &pci_acpi_dsm_guid, 3,
 				RESET_DELAY_DSM, NULL);
 	if (!obj)
 		return;
@@ -745,7 +744,7 @@ static void pci_acpi_optimize_delay(struct pci_dev *pdev,
 	if (bridge->ignore_reset_delay)
 		pdev->d3cold_delay = 0;
 
-	obj = acpi_evaluate_dsm(handle, pci_acpi_dsm_uuid, 3,
+	obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 3,
 				FUNCTION_DELAY_DSM, NULL);
 	if (!obj)
 		return;
diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
index 51357377efbc..2d8db3ead6e8 100644
--- a/drivers/pci/pci-label.c
+++ b/drivers/pci/pci-label.c
@@ -172,7 +172,7 @@ static int dsm_get_label(struct device *dev, char *buf,
 	if (!handle)
 		return -1;
 
-	obj = acpi_evaluate_dsm(handle, pci_acpi_dsm_uuid, 0x2,
+	obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 0x2,
 				DEVICE_LABEL_DSM, NULL);
 	if (!obj)
 		return -1;
@@ -212,7 +212,7 @@ static bool device_has_dsm(struct device *dev)
 	if (!handle)
 		return false;
 
-	return !!acpi_check_dsm(handle, pci_acpi_dsm_uuid, 0x2,
+	return !!acpi_check_dsm(handle, &pci_acpi_dsm_guid, 0x2,
 				1 << DEVICE_LABEL_DSM);
 }
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b01bd5bba8e6..563901cd9c06 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2144,7 +2144,8 @@ bool pci_dev_keep_suspended(struct pci_dev *pci_dev)
 
 	if (!pm_runtime_suspended(dev)
 	    || pci_target_state(pci_dev) != pci_dev->current_state
-	    || platform_pci_need_resume(pci_dev))
+	    || platform_pci_need_resume(pci_dev)
+	    || (pci_dev->dev_flags & PCI_DEV_FLAGS_NEEDS_RESUME))
 		return false;
 
 	/*
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index cc6e085008fb..f6a63406c76e 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -1291,7 +1291,6 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev)
 	cdev = &stdev->cdev;
 	cdev_init(cdev, &switchtec_fops);
 	cdev->owner = THIS_MODULE;
-	cdev->kobj.parent = &dev->kobj;
 
 	return stdev;
 
@@ -1442,12 +1441,15 @@ static int switchtec_init_pci(struct switchtec_dev *stdev,
 	stdev->mmio_sys_info = stdev->mmio + SWITCHTEC_GAS_SYS_INFO_OFFSET;
 	stdev->mmio_flash_info = stdev->mmio + SWITCHTEC_GAS_FLASH_INFO_OFFSET;
 	stdev->mmio_ntb = stdev->mmio + SWITCHTEC_GAS_NTB_OFFSET;
-	stdev->partition = ioread8(&stdev->mmio_ntb->partition_id);
+	stdev->partition = ioread8(&stdev->mmio_sys_info->partition_id);
 	stdev->partition_count = ioread8(&stdev->mmio_ntb->partition_count);
 	stdev->mmio_part_cfg_all = stdev->mmio + SWITCHTEC_GAS_PART_CFG_OFFSET;
 	stdev->mmio_part_cfg = &stdev->mmio_part_cfg_all[stdev->partition];
 	stdev->mmio_pff_csr = stdev->mmio + SWITCHTEC_GAS_PFF_CSR_OFFSET;
 
+	if (stdev->partition_count < 1)
+		stdev->partition_count = 1;
+
 	init_pff(stdev);
 
 	pci_set_drvdata(pdev, stdev);
@@ -1479,11 +1481,7 @@ static int switchtec_pci_probe(struct pci_dev *pdev,
 		  SWITCHTEC_EVENT_EN_IRQ,
 		  &stdev->mmio_part_cfg->mrpc_comp_hdr);
 
-	rc = cdev_add(&stdev->cdev, stdev->dev.devt, 1);
-	if (rc)
-		goto err_put;
-
-	rc = device_add(&stdev->dev);
+	rc = cdev_device_add(&stdev->cdev, &stdev->dev);
 	if (rc)
 		goto err_devadd;
 
@@ -1492,7 +1490,6 @@ static int switchtec_pci_probe(struct pci_dev *pdev,
 	return 0;
 
 err_devadd:
-	cdev_del(&stdev->cdev);
 	stdev_kill(stdev);
 err_put:
 	ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt));
@@ -1506,8 +1503,7 @@ static void switchtec_pci_remove(struct pci_dev *pdev)
 
 	pci_set_drvdata(pdev, NULL);
 
-	device_del(&stdev->dev);
-	cdev_del(&stdev->cdev);
+	cdev_device_del(&stdev->cdev, &stdev->dev);
 	ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt));
 	dev_info(&stdev->dev, "unregistered.\n");
 
diff --git a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c
index eb0d80a429e4..fb38cc01859f 100644
--- a/drivers/pcmcia/i82365.c
+++ b/drivers/pcmcia/i82365.c
@@ -108,12 +108,12 @@ static int async_clock = -1;
 static int cable_mode = -1;
 static int wakeup = 0;
 
-module_param(i365_base, ulong, 0444);
+module_param_hw(i365_base, ulong, ioport, 0444);
 module_param(ignore, int, 0444);
 module_param(extra_sockets, int, 0444);
-module_param(irq_mask, int, 0444);
-module_param_array(irq_list, int, &irq_list_count, 0444);
-module_param(cs_irq, int, 0444);
+module_param_hw(irq_mask, int, other, 0444);
+module_param_hw_array(irq_list, int, irq, &irq_list_count, 0444);
+module_param_hw(cs_irq, int, irq, 0444);
 module_param(async_clock, int, 0444);
 module_param(cable_mode, int, 0444);
 module_param(wakeup, int, 0444);
diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c
index 1ee63e5f0550..a1ac72d51d70 100644
--- a/drivers/pcmcia/tcic.c
+++ b/drivers/pcmcia/tcic.c
@@ -85,12 +85,12 @@ static int poll_quick = HZ/20;
 /* CCLK external clock time, in nanoseconds.  70 ns = 14.31818 MHz */
 static int cycle_time = 70;
 
-module_param(tcic_base, ulong, 0444);
+module_param_hw(tcic_base, ulong, ioport, 0444);
 module_param(ignore, int, 0444);
 module_param(do_scan, int, 0444);
-module_param(irq_mask, int, 0444);
-module_param_array(irq_list, int, &irq_list_count, 0444);
-module_param(cs_irq, int, 0444);
+module_param_hw(irq_mask, int, other, 0444);
+module_param_hw_array(irq_list, int, irq, &irq_list_count, 0444);
+module_param_hw(cs_irq, int, irq, 0444);
 module_param(poll_interval, int, 0444);
 module_param(poll_quick, int, 0444);
 module_param(cycle_time, int, 0444);
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 34c862f213c7..0a9b78705ee8 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -29,6 +29,17 @@ static int arm_pmu_acpi_register_irq(int cpu)
 		return -EINVAL;
 
 	gsi = gicc->performance_interrupt;
+
+	/*
+	 * Per the ACPI spec, the MADT cannot describe a PMU that doesn't
+	 * have an interrupt. QEMU advertises this by using a GSI of zero,
+	 * which is not known to be valid on any hardware despite being
+	 * valid per the spec. Take the pragmatic approach and reject a
+	 * GSI of zero for now.
+	 */
+	if (!gsi)
+		return 0;
+
 	if (gicc->flags & ACPI_MADT_PERFORMANCE_IRQ_MODE)
 		trigger = ACPI_EDGE_SENSITIVE;
 	else
diff --git a/drivers/phy/phy-qcom-qmp.c b/drivers/phy/phy-qcom-qmp.c
index 727e23be7cac..78ca62897784 100644
--- a/drivers/phy/phy-qcom-qmp.c
+++ b/drivers/phy/phy-qcom-qmp.c
@@ -844,7 +844,7 @@ static int qcom_qmp_phy_vreg_init(struct device *dev)
 	int num = qmp->cfg->num_vregs;
 	int i;
 
-	qmp->vregs = devm_kcalloc(dev, num, sizeof(qmp->vregs), GFP_KERNEL);
+	qmp->vregs = devm_kcalloc(dev, num, sizeof(*qmp->vregs), GFP_KERNEL);
 	if (!qmp->vregs)
 		return -ENOMEM;
 
@@ -983,16 +983,16 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id)
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
 	 */
 	qphy->tx = of_iomap(np, 0);
-	if (IS_ERR(qphy->tx))
-		return PTR_ERR(qphy->tx);
+	if (!qphy->tx)
+		return -ENOMEM;
 
 	qphy->rx = of_iomap(np, 1);
-	if (IS_ERR(qphy->rx))
-		return PTR_ERR(qphy->rx);
+	if (!qphy->rx)
+		return -ENOMEM;
 
 	qphy->pcs = of_iomap(np, 2);
-	if (IS_ERR(qphy->pcs))
-		return PTR_ERR(qphy->pcs);
+	if (!qphy->pcs)
+		return -ENOMEM;
 
 	/*
 	 * Get PHY's Pipe clock, if any. USB3 and PCIe are PIPE3
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 1653cbda6a82..bd459a93b0e7 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -680,30 +680,16 @@ EXPORT_SYMBOL_GPL(pinctrl_generic_remove_group);
  * pinctrl_generic_free_groups() - removes all pin groups
  * @pctldev: pin controller device
  *
- * Note that the caller must take care of locking.
+ * Note that the caller must take care of locking. The pinctrl groups
+ * are allocated with devm_kzalloc() so no need to free them here.
  */
 static void pinctrl_generic_free_groups(struct pinctrl_dev *pctldev)
 {
 	struct radix_tree_iter iter;
-	struct group_desc *group;
-	unsigned long *indices;
 	void **slot;
-	int i = 0;
-
-	indices = devm_kzalloc(pctldev->dev, sizeof(*indices) *
-			       pctldev->num_groups, GFP_KERNEL);
-	if (!indices)
-		return;
 
 	radix_tree_for_each_slot(slot, &pctldev->pin_group_tree, &iter, 0)
-		indices[i++] = iter.index;
-
-	for (i = 0; i < pctldev->num_groups; i++) {
-		group = radix_tree_lookup(&pctldev->pin_group_tree,
-					  indices[i]);
-		radix_tree_delete(&pctldev->pin_group_tree, indices[i]);
-		devm_kfree(pctldev->dev, group);
-	}
+		radix_tree_delete(&pctldev->pin_group_tree, iter.index);
 
 	pctldev->num_groups = 0;
 }
diff --git a/drivers/pinctrl/freescale/pinctrl-mxs.c b/drivers/pinctrl/freescale/pinctrl-mxs.c
index 41b5b07d5a2b..6852010a6d70 100644
--- a/drivers/pinctrl/freescale/pinctrl-mxs.c
+++ b/drivers/pinctrl/freescale/pinctrl-mxs.c
@@ -194,6 +194,16 @@ static int mxs_pinctrl_get_func_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
+static void mxs_pinctrl_rmwl(u32 value, u32 mask, u8 shift, void __iomem *reg)
+{
+	u32 tmp;
+
+	tmp = readl(reg);
+	tmp &= ~(mask << shift);
+	tmp |= value << shift;
+	writel(tmp, reg);
+}
+
 static int mxs_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned selector,
 			       unsigned group)
 {
@@ -211,8 +221,7 @@ static int mxs_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned selector,
 		reg += bank * 0x20 + pin / 16 * 0x10;
 		shift = pin % 16 * 2;
 
-		writel(0x3 << shift, reg + CLR);
-		writel(g->muxsel[i] << shift, reg + SET);
+		mxs_pinctrl_rmwl(g->muxsel[i], 0x3, shift, reg);
 	}
 
 	return 0;
@@ -279,8 +288,7 @@ static int mxs_pinconf_group_set(struct pinctrl_dev *pctldev,
 			/* mA */
 			if (config & MA_PRESENT) {
 				shift = pin % 8 * 4;
-				writel(0x3 << shift, reg + CLR);
-				writel(ma << shift, reg + SET);
+				mxs_pinctrl_rmwl(ma, 0x3, shift, reg);
 			}
 
 			/* vol */
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index 2debba62fac9..20f1b4493994 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1539,15 +1539,29 @@ static void chv_gpio_irq_handler(struct irq_desc *desc)
  * is not listed below.
  */
 static const struct dmi_system_id chv_no_valid_mask[] = {
+	/* See https://bugzilla.kernel.org/show_bug.cgi?id=194945 */
 	{
-		/* See https://bugzilla.kernel.org/show_bug.cgi?id=194945 */
-		.ident = "Acer Chromebook (CYAN)",
+		.ident = "Intel_Strago based Chromebooks (All models)",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "Edgar"),
-			DMI_MATCH(DMI_BIOS_DATE, "05/21/2016"),
+			DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_Strago"),
 		},
-	}
+	},
+	{
+		.ident = "Acer Chromebook R11 (Cyan)",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Cyan"),
+		},
+	},
+	{
+		.ident = "Samsung Chromebook 3 (Celes)",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Celes"),
+		},
+	},
+	{}
 };
 
 static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index 0d6b7f4b82af..720a19fd38d2 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -35,7 +35,6 @@ static const struct pin_config_item conf_items[] = {
 	PCONFDUMP(PIN_CONFIG_BIAS_PULL_PIN_DEFAULT,
 				"input bias pull to pin specific state", NULL, false),
 	PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", NULL, false),
-	PCONFDUMP(PIN_CONFIG_BIDIRECTIONAL, "bi-directional pin operations", NULL, false),
 	PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL, false),
 	PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL, false),
 	PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL, false),
@@ -161,7 +160,6 @@ static const struct pinconf_generic_params dt_params[] = {
 	{ "bias-pull-up", PIN_CONFIG_BIAS_PULL_UP, 1 },
 	{ "bias-pull-pin-default", PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, 1 },
 	{ "bias-pull-down", PIN_CONFIG_BIAS_PULL_DOWN, 1 },
-	{ "bi-directional", PIN_CONFIG_BIDIRECTIONAL, 1 },
 	{ "drive-open-drain", PIN_CONFIG_DRIVE_OPEN_DRAIN, 0 },
 	{ "drive-open-source", PIN_CONFIG_DRIVE_OPEN_SOURCE, 0 },
 	{ "drive-push-pull", PIN_CONFIG_DRIVE_PUSH_PULL, 0 },
@@ -174,7 +172,6 @@ static const struct pinconf_generic_params dt_params[] = {
 	{ "input-schmitt-enable", PIN_CONFIG_INPUT_SCHMITT_ENABLE, 1 },
 	{ "low-power-disable", PIN_CONFIG_LOW_POWER_MODE, 0 },
 	{ "low-power-enable", PIN_CONFIG_LOW_POWER_MODE, 1 },
-	{ "output-enable", PIN_CONFIG_OUTPUT, 1, },
 	{ "output-high", PIN_CONFIG_OUTPUT, 1, },
 	{ "output-low", PIN_CONFIG_OUTPUT, 0, },
 	{ "power-source", PIN_CONFIG_POWER_SOURCE, 0 },
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 1482d132fbb8..e432ec887479 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -495,64 +495,54 @@ static struct irq_chip amd_gpio_irqchip = {
 	.flags        = IRQCHIP_SKIP_SET_WAKE,
 };
 
-static void amd_gpio_irq_handler(struct irq_desc *desc)
+#define PIN_IRQ_PENDING	(BIT(INTERRUPT_STS_OFF) | BIT(WAKE_STS_OFF))
+
+static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id)
 {
-	u32 i;
-	u32 off;
-	u32 reg;
-	u32 pin_reg;
-	u64 reg64;
-	int handled = 0;
-	unsigned int irq;
+	struct amd_gpio *gpio_dev = dev_id;
+	struct gpio_chip *gc = &gpio_dev->gc;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned int i, irqnr;
 	unsigned long flags;
-	struct irq_chip *chip = irq_desc_get_chip(desc);
-	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
-	struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
+	u32 *regs, regval;
+	u64 status, mask;
 
-	chained_irq_enter(chip, desc);
-	/*enable GPIO interrupt again*/
+	/* Read the wake status */
 	raw_spin_lock_irqsave(&gpio_dev->lock, flags);
-	reg = readl(gpio_dev->base + WAKE_INT_STATUS_REG1);
-	reg64 = reg;
-	reg64 = reg64 << 32;
-
-	reg = readl(gpio_dev->base + WAKE_INT_STATUS_REG0);
-	reg64 |= reg;
+	status = readl(gpio_dev->base + WAKE_INT_STATUS_REG1);
+	status <<= 32;
+	status |= readl(gpio_dev->base + WAKE_INT_STATUS_REG0);
 	raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
 
-	/*
-	 * first 46 bits indicates interrupt status.
-	 * one bit represents four interrupt sources.
-	*/
-	for (off = 0; off < 46 ; off++) {
-		if (reg64 & BIT(off)) {
-			for (i = 0; i < 4; i++) {
-				pin_reg = readl(gpio_dev->base +
-						(off * 4 + i) * 4);
-				if ((pin_reg & BIT(INTERRUPT_STS_OFF)) ||
-					(pin_reg & BIT(WAKE_STS_OFF))) {
-					irq = irq_find_mapping(gc->irqdomain,
-								off * 4 + i);
-					generic_handle_irq(irq);
-					writel(pin_reg,
-						gpio_dev->base
-						+ (off * 4 + i) * 4);
-					handled++;
-				}
-			}
+	/* Bit 0-45 contain the relevant status bits */
+	status &= (1ULL << 46) - 1;
+	regs = gpio_dev->base;
+	for (mask = 1, irqnr = 0; status; mask <<= 1, regs += 4, irqnr += 4) {
+		if (!(status & mask))
+			continue;
+		status &= ~mask;
+
+		/* Each status bit covers four pins */
+		for (i = 0; i < 4; i++) {
+			regval = readl(regs + i);
+			if (!(regval & PIN_IRQ_PENDING))
+				continue;
+			irq = irq_find_mapping(gc->irqdomain, irqnr + i);
+			generic_handle_irq(irq);
+			/* Clear interrupt */
+			writel(regval, regs + i);
+			ret = IRQ_HANDLED;
 		}
 	}
 
-	if (handled == 0)
-		handle_bad_irq(desc);
-
+	/* Signal EOI to the GPIO unit */
 	raw_spin_lock_irqsave(&gpio_dev->lock, flags);
-	reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG);
-	reg |= EOI_MASK;
-	writel(reg, gpio_dev->base + WAKE_INT_MASTER_REG);
+	regval = readl(gpio_dev->base + WAKE_INT_MASTER_REG);
+	regval |= EOI_MASK;
+	writel(regval, gpio_dev->base + WAKE_INT_MASTER_REG);
 	raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
 
-	chained_irq_exit(chip, desc);
+	return ret;
 }
 
 static int amd_get_groups_count(struct pinctrl_dev *pctldev)
@@ -821,10 +811,11 @@ static int amd_gpio_probe(struct platform_device *pdev)
 		goto out2;
 	}
 
-	gpiochip_set_chained_irqchip(&gpio_dev->gc,
-				 &amd_gpio_irqchip,
-				 irq_base,
-				 amd_gpio_irq_handler);
+	ret = devm_request_irq(&pdev->dev, irq_base, amd_gpio_irq_handler, 0,
+			       KBUILD_MODNAME, gpio_dev);
+	if (ret)
+		goto out2;
+
 	platform_set_drvdata(pdev, gpio_dev);
 
 	dev_dbg(&pdev->dev, "amd gpio driver loaded\n");
diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
index f141aa0430b1..9dd981ddbb17 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -143,9 +143,6 @@ struct rockchip_drv {
  * @gpio_chip: gpiolib chip
  * @grange: gpio range
  * @slock: spinlock for the gpio bank
- * @irq_lock: bus lock for irq chip
- * @new_irqs: newly configured irqs which must be muxed as GPIOs in
- *	irq_bus_sync_unlock()
  */
 struct rockchip_pin_bank {
 	void __iomem			*reg_base;
@@ -168,8 +165,6 @@ struct rockchip_pin_bank {
 	struct pinctrl_gpio_range	grange;
 	raw_spinlock_t			slock;
 	u32				toggle_edge_mode;
-	struct mutex			irq_lock;
-	u32				new_irqs;
 };
 
 #define PIN_BANK(id, pins, label)			\
@@ -2134,12 +2129,11 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
 	int ret;
 
 	/* make sure the pin is configured as gpio input */
-	ret = rockchip_verify_mux(bank, d->hwirq, RK_FUNC_GPIO);
+	ret = rockchip_set_mux(bank, d->hwirq, RK_FUNC_GPIO);
 	if (ret < 0)
 		return ret;
 
-	bank->new_irqs |= mask;
-
+	clk_enable(bank->clk);
 	raw_spin_lock_irqsave(&bank->slock, flags);
 
 	data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
@@ -2197,6 +2191,7 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
 	default:
 		irq_gc_unlock(gc);
 		raw_spin_unlock_irqrestore(&bank->slock, flags);
+		clk_disable(bank->clk);
 		return -EINVAL;
 	}
 
@@ -2205,6 +2200,7 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
 
 	irq_gc_unlock(gc);
 	raw_spin_unlock_irqrestore(&bank->slock, flags);
+	clk_disable(bank->clk);
 
 	return 0;
 }
@@ -2248,34 +2244,6 @@ static void rockchip_irq_disable(struct irq_data *d)
 	clk_disable(bank->clk);
 }
 
-static void rockchip_irq_bus_lock(struct irq_data *d)
-{
-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
-	struct rockchip_pin_bank *bank = gc->private;
-
-	clk_enable(bank->clk);
-	mutex_lock(&bank->irq_lock);
-}
-
-static void rockchip_irq_bus_sync_unlock(struct irq_data *d)
-{
-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
-	struct rockchip_pin_bank *bank = gc->private;
-
-	while (bank->new_irqs) {
-		unsigned int irq = __ffs(bank->new_irqs);
-		int ret;
-
-		ret = rockchip_set_mux(bank, irq, RK_FUNC_GPIO);
-		WARN_ON(ret < 0);
-
-		bank->new_irqs &= ~BIT(irq);
-	}
-
-	mutex_unlock(&bank->irq_lock);
-	clk_disable(bank->clk);
-}
-
 static int rockchip_interrupts_register(struct platform_device *pdev,
 						struct rockchip_pinctrl *info)
 {
@@ -2342,9 +2310,6 @@ static int rockchip_interrupts_register(struct platform_device *pdev,
 		gc->chip_types[0].chip.irq_suspend = rockchip_irq_suspend;
 		gc->chip_types[0].chip.irq_resume = rockchip_irq_resume;
 		gc->chip_types[0].chip.irq_set_type = rockchip_irq_set_type;
-		gc->chip_types[0].chip.irq_bus_lock = rockchip_irq_bus_lock;
-		gc->chip_types[0].chip.irq_bus_sync_unlock =
-						rockchip_irq_bus_sync_unlock;
 		gc->wake_enabled = IRQ_MSK(bank->nr_pins);
 
 		irq_set_chained_handler_and_data(bank->irq,
@@ -2518,7 +2483,6 @@ static struct rockchip_pin_ctrl *rockchip_pinctrl_get_soc_data(
 		int bank_pins = 0;
 
 		raw_spin_lock_init(&bank->slock);
-		mutex_init(&bank->irq_lock);
 		bank->drvdata = d;
 		bank->pin_base = ctrl->nr_pins;
 		ctrl->nr_pins += bank->nr_pins;
diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index 9fd6d9087dc5..16b3ae5e4f44 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -826,30 +826,17 @@ EXPORT_SYMBOL_GPL(pinmux_generic_remove_function);
  * pinmux_generic_free_functions() - removes all functions
  * @pctldev: pin controller device
  *
- * Note that the caller must take care of locking.
+ * Note that the caller must take care of locking. The pinctrl
+ * functions are allocated with devm_kzalloc() so no need to free
+ * them here.
  */
 void pinmux_generic_free_functions(struct pinctrl_dev *pctldev)
 {
 	struct radix_tree_iter iter;
-	struct function_desc *function;
-	unsigned long *indices;
 	void **slot;
-	int i = 0;
-
-	indices = devm_kzalloc(pctldev->dev, sizeof(*indices) *
-			       pctldev->num_functions, GFP_KERNEL);
-	if (!indices)
-		return;
 
 	radix_tree_for_each_slot(slot, &pctldev->pin_function_tree, &iter, 0)
-		indices[i++] = iter.index;
-
-	for (i = 0; i < pctldev->num_functions; i++) {
-		function = radix_tree_lookup(&pctldev->pin_function_tree,
-					     indices[i]);
-		radix_tree_delete(&pctldev->pin_function_tree, indices[i]);
-		devm_kfree(pctldev->dev, function);
-	}
+		radix_tree_delete(&pctldev->pin_function_tree, iter.index);
 
 	pctldev->num_functions = 0;
 }
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index d3c5f5dfbbd7..222b6685b09f 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -798,7 +798,7 @@ static int stm32_pconf_parse_conf(struct pinctrl_dev *pctldev,
 		break;
 	case PIN_CONFIG_OUTPUT:
 		__stm32_gpio_set(bank, offset, arg);
-		ret = stm32_pmx_gpio_set_direction(pctldev, NULL, pin, false);
+		ret = stm32_pmx_gpio_set_direction(pctldev, range, pin, false);
 		break;
 	default:
 		ret = -EINVAL;
diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c
index 9aec1d2232dd..6624499eae72 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c
@@ -394,7 +394,7 @@ static const struct sunxi_desc_pin sun8i_a83t_pins[] = {
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 18),
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
 		  SUNXI_FUNCTION(0x1, "gpio_out"),
-		  SUNXI_FUNCTION(0x3, "owa")),		/* DOUT */
+		  SUNXI_FUNCTION(0x3, "spdif")),	/* DOUT */
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 19),
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
 		  SUNXI_FUNCTION(0x1, "gpio_out")),
diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c
index 2de1e603bd2b..5f3672153b12 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -704,7 +704,7 @@ static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev)
 		/* Reallocate the array */
 		u32 new_capacity = 2 * dev->pipes_capacity;
 		struct goldfish_pipe **pipes =
-			kcalloc(new_capacity, sizeof(*pipes), GFP_KERNEL);
+			kcalloc(new_capacity, sizeof(*pipes), GFP_ATOMIC);
 		if (!pipes)
 			return -ENOMEM;
 		memcpy(pipes, dev->pipes, sizeof(*pipes) * dev->pipes_capacity);
diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c
index ef29f18b1951..4cc2f4ea0a25 100644
--- a/drivers/platform/x86/intel_telemetry_debugfs.c
+++ b/drivers/platform/x86/intel_telemetry_debugfs.c
@@ -97,11 +97,9 @@
 	} \
 }
 
-#ifdef CONFIG_PM_SLEEP
 static u8 suspend_prep_ok;
 static u32 suspend_shlw_ctr_temp, suspend_deep_ctr_temp;
 static u64 suspend_shlw_res_temp, suspend_deep_res_temp;
-#endif
 
 struct telemetry_susp_stats {
 	u32 shlw_swake_ctr;
@@ -807,7 +805,6 @@ static const struct file_operations telem_ioss_trc_verb_ops = {
 	.release	= single_release,
 };
 
-#ifdef CONFIG_PM_SLEEP
 static int pm_suspend_prep_cb(void)
 {
 	struct telemetry_evtlog evtlog[TELEM_MAX_OS_ALLOCATED_EVENTS];
@@ -937,7 +934,6 @@ static int pm_notification(struct notifier_block *this,
 static struct notifier_block pm_notifier = {
 	.notifier_call = pm_notification,
 };
-#endif /* CONFIG_PM_SLEEP */
 
 static int __init telemetry_debugfs_init(void)
 {
@@ -960,14 +956,13 @@ static int __init telemetry_debugfs_init(void)
 	if (err < 0)
 		return -EINVAL;
 
-
-#ifdef CONFIG_PM_SLEEP
 	register_pm_notifier(&pm_notifier);
-#endif /* CONFIG_PM_SLEEP */
 
 	debugfs_conf->telemetry_dbg_dir = debugfs_create_dir("telemetry", NULL);
-	if (!debugfs_conf->telemetry_dbg_dir)
-		return -ENOMEM;
+	if (!debugfs_conf->telemetry_dbg_dir) {
+		err = -ENOMEM;
+		goto out_pm;
+	}
 
 	f = debugfs_create_file("pss_info", S_IFREG | S_IRUGO,
 				debugfs_conf->telemetry_dbg_dir, NULL,
@@ -1014,6 +1009,8 @@ static int __init telemetry_debugfs_init(void)
 out:
 	debugfs_remove_recursive(debugfs_conf->telemetry_dbg_dir);
 	debugfs_conf->telemetry_dbg_dir = NULL;
+out_pm:
+	unregister_pm_notifier(&pm_notifier);
 
 	return err;
 }
@@ -1022,6 +1019,7 @@ static void __exit telemetry_debugfs_exit(void)
 {
 	debugfs_remove_recursive(debugfs_conf->telemetry_dbg_dir);
 	debugfs_conf->telemetry_dbg_dir = NULL;
+	unregister_pm_notifier(&pm_notifier);
 }
 
 late_initcall(telemetry_debugfs_init);
diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig
index da922756149f..86f40bf37c34 100644
--- a/drivers/power/supply/Kconfig
+++ b/drivers/power/supply/Kconfig
@@ -238,6 +238,26 @@ config CHARGER_AXP20X
 	  This driver can also be built as a module. If so, the module will be
 	  called axp20x_ac_power.
 
+config BATTERY_AXP20X
+	tristate "X-Powers AXP20X battery driver"
+	depends on MFD_AXP20X
+	depends on AXP20X_ADC
+	depends on IIO
+	help
+	  Say Y here to enable support for X-Powers AXP20X PMICs' battery power
+	  supply.
+
+	  This driver can also be built as a module. If so, the module will be
+	  called axp20x_battery.
+
+config AXP20X_POWER
+	tristate "AXP20x power supply driver"
+	depends on MFD_AXP20X
+	depends on IIO
+	help
+	  This driver provides support for the power supply features of
+	  AXP20x PMIC.
+
 config AXP288_CHARGER
 	tristate "X-Powers AXP288 Charger"
 	depends on MFD_AXP20X && EXTCON_AXP288
@@ -541,11 +561,4 @@ config CHARGER_RT9455
 	help
 	  Say Y to enable support for Richtek RT9455 battery charger.
 
-config AXP20X_POWER
-	tristate "AXP20x power supply driver"
-	depends on MFD_AXP20X
-	help
-	  This driver provides support for the power supply features of
-	  AXP20x PMIC.
-
 endif # POWER_SUPPLY
diff --git a/drivers/power/supply/Makefile b/drivers/power/supply/Makefile
index 39fc733e6cc4..a39126d7a6ce 100644
--- a/drivers/power/supply/Makefile
+++ b/drivers/power/supply/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_TEST_POWER)	+= test_power.o
 
 obj-$(CONFIG_BATTERY_88PM860X)	+= 88pm860x_battery.o
 obj-$(CONFIG_BATTERY_ACT8945A)	+= act8945a_charger.o
+obj-$(CONFIG_BATTERY_AXP20X)	+= axp20x_battery.o
 obj-$(CONFIG_CHARGER_AXP20X)	+= axp20x_ac_power.o
 obj-$(CONFIG_BATTERY_DS2760)	+= ds2760_battery.o
 obj-$(CONFIG_BATTERY_DS2780)	+= ds2780_battery.o
diff --git a/drivers/power/supply/ab8500_charger.c b/drivers/power/supply/ab8500_charger.c
index 5cee9aa87aa3..4ebbcce45c48 100644
--- a/drivers/power/supply/ab8500_charger.c
+++ b/drivers/power/supply/ab8500_charger.c
@@ -3238,7 +3238,7 @@ static int ab8500_charger_init_hw_registers(struct ab8500_charger *di)
 			BUS_PP_PRECHG_CURRENT_MASK, 0);
 		if (ret) {
 			dev_err(di->dev,
-				"failed to setup usb power path prechage current\n");
+				"failed to setup usb power path precharge current\n");
 			goto out;
 		}
 	}
diff --git a/drivers/power/supply/axp20x_battery.c b/drivers/power/supply/axp20x_battery.c
new file mode 100644
index 000000000000..5d29b2eab8fc
--- /dev/null
+++ b/drivers/power/supply/axp20x_battery.c
@@ -0,0 +1,502 @@
+/*
+ * Battery power supply driver for X-Powers AXP20X and AXP22X PMICs
+ *
+ * Copyright 2016 Free Electrons NextThing Co.
+ *	Quentin Schulz <quentin.schulz@free-electrons.com>
+ *
+ * This driver is based on a previous upstreaming attempt by:
+ *	Bruno Prémont <bonbons@linux-vserver.org>
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/consumer.h>
+#include <linux/mfd/axp20x.h>
+
+#define AXP20X_PWR_STATUS_BAT_CHARGING	BIT(2)
+
+#define AXP20X_PWR_OP_BATT_PRESENT	BIT(5)
+#define AXP20X_PWR_OP_BATT_ACTIVATED	BIT(3)
+
+#define AXP209_FG_PERCENT		GENMASK(6, 0)
+#define AXP22X_FG_VALID			BIT(7)
+
+#define AXP20X_CHRG_CTRL1_TGT_VOLT	GENMASK(6, 5)
+#define AXP20X_CHRG_CTRL1_TGT_4_1V	(0 << 5)
+#define AXP20X_CHRG_CTRL1_TGT_4_15V	(1 << 5)
+#define AXP20X_CHRG_CTRL1_TGT_4_2V	(2 << 5)
+#define AXP20X_CHRG_CTRL1_TGT_4_36V	(3 << 5)
+
+#define AXP22X_CHRG_CTRL1_TGT_4_22V	(1 << 5)
+#define AXP22X_CHRG_CTRL1_TGT_4_24V	(3 << 5)
+
+#define AXP20X_CHRG_CTRL1_TGT_CURR	GENMASK(3, 0)
+
+#define AXP20X_V_OFF_MASK		GENMASK(2, 0)
+
+struct axp20x_batt_ps {
+	struct regmap *regmap;
+	struct power_supply *batt;
+	struct device *dev;
+	struct iio_channel *batt_chrg_i;
+	struct iio_channel *batt_dischrg_i;
+	struct iio_channel *batt_v;
+	u8 axp_id;
+};
+
+static int axp20x_battery_get_max_voltage(struct axp20x_batt_ps *axp20x_batt,
+					  int *val)
+{
+	int ret, reg;
+
+	ret = regmap_read(axp20x_batt->regmap, AXP20X_CHRG_CTRL1, &reg);
+	if (ret)
+		return ret;
+
+	switch (reg & AXP20X_CHRG_CTRL1_TGT_VOLT) {
+	case AXP20X_CHRG_CTRL1_TGT_4_1V:
+		*val = 4100000;
+		break;
+	case AXP20X_CHRG_CTRL1_TGT_4_15V:
+		*val = 4150000;
+		break;
+	case AXP20X_CHRG_CTRL1_TGT_4_2V:
+		*val = 4200000;
+		break;
+	case AXP20X_CHRG_CTRL1_TGT_4_36V:
+		*val = 4360000;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int axp22x_battery_get_max_voltage(struct axp20x_batt_ps *axp20x_batt,
+					  int *val)
+{
+	int ret, reg;
+
+	ret = regmap_read(axp20x_batt->regmap, AXP20X_CHRG_CTRL1, &reg);
+	if (ret)
+		return ret;
+
+	switch (reg & AXP20X_CHRG_CTRL1_TGT_VOLT) {
+	case AXP20X_CHRG_CTRL1_TGT_4_1V:
+		*val = 4100000;
+		break;
+	case AXP20X_CHRG_CTRL1_TGT_4_2V:
+		*val = 4200000;
+		break;
+	case AXP22X_CHRG_CTRL1_TGT_4_22V:
+		*val = 4220000;
+		break;
+	case AXP22X_CHRG_CTRL1_TGT_4_24V:
+		*val = 4240000;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void raw_to_constant_charge_current(struct axp20x_batt_ps *axp, int *val)
+{
+	if (axp->axp_id == AXP209_ID)
+		*val = *val * 100000 + 300000;
+	else
+		*val = *val * 150000 + 300000;
+}
+
+static int axp20x_get_constant_charge_current(struct axp20x_batt_ps *axp,
+					      int *val)
+{
+	int ret;
+
+	ret = regmap_read(axp->regmap, AXP20X_CHRG_CTRL1, val);
+	if (ret)
+		return ret;
+
+	*val &= AXP20X_CHRG_CTRL1_TGT_CURR;
+
+	raw_to_constant_charge_current(axp, val);
+
+	return 0;
+}
+
+static int axp20x_battery_get_prop(struct power_supply *psy,
+				   enum power_supply_property psp,
+				   union power_supply_propval *val)
+{
+	struct axp20x_batt_ps *axp20x_batt = power_supply_get_drvdata(psy);
+	struct iio_channel *chan;
+	int ret = 0, reg, val1;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_PRESENT:
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_PWR_OP_MODE,
+				  &reg);
+		if (ret)
+			return ret;
+
+		val->intval = !!(reg & AXP20X_PWR_OP_BATT_PRESENT);
+		break;
+
+	case POWER_SUPPLY_PROP_STATUS:
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_PWR_INPUT_STATUS,
+				  &reg);
+		if (ret)
+			return ret;
+
+		if (reg & AXP20X_PWR_STATUS_BAT_CHARGING) {
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+			return 0;
+		}
+
+		ret = iio_read_channel_processed(axp20x_batt->batt_dischrg_i,
+						 &val1);
+		if (ret)
+			return ret;
+
+		if (val1) {
+			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+			return 0;
+		}
+
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_FG_RES, &val1);
+		if (ret)
+			return ret;
+
+		/*
+		 * Fuel Gauge data takes 7 bits but the stored value seems to be
+		 * directly the raw percentage without any scaling to 7 bits.
+		 */
+		if ((val1 & AXP209_FG_PERCENT) == 100)
+			val->intval = POWER_SUPPLY_STATUS_FULL;
+		else
+			val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		break;
+
+	case POWER_SUPPLY_PROP_HEALTH:
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_PWR_OP_MODE,
+				  &val1);
+		if (ret)
+			return ret;
+
+		if (val1 & AXP20X_PWR_OP_BATT_ACTIVATED) {
+			val->intval = POWER_SUPPLY_HEALTH_DEAD;
+			return 0;
+		}
+
+		val->intval = POWER_SUPPLY_HEALTH_GOOD;
+		break;
+
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
+		ret = axp20x_get_constant_charge_current(axp20x_batt,
+							 &val->intval);
+		if (ret)
+			return ret;
+		break;
+
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
+		val->intval = AXP20X_CHRG_CTRL1_TGT_CURR;
+		raw_to_constant_charge_current(axp20x_batt, &val->intval);
+
+		break;
+
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_PWR_INPUT_STATUS,
+				  &reg);
+		if (ret)
+			return ret;
+
+		if (reg & AXP20X_PWR_STATUS_BAT_CHARGING)
+			chan = axp20x_batt->batt_chrg_i;
+		else
+			chan = axp20x_batt->batt_dischrg_i;
+
+		ret = iio_read_channel_processed(chan, &val->intval);
+		if (ret)
+			return ret;
+
+		/* IIO framework gives mA but Power Supply framework gives uA */
+		val->intval *= 1000;
+		break;
+
+	case POWER_SUPPLY_PROP_CAPACITY:
+		/* When no battery is present, return capacity is 100% */
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_PWR_OP_MODE,
+				  &reg);
+		if (ret)
+			return ret;
+
+		if (!(reg & AXP20X_PWR_OP_BATT_PRESENT)) {
+			val->intval = 100;
+			return 0;
+		}
+
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_FG_RES, &reg);
+		if (ret)
+			return ret;
+
+		if (axp20x_batt->axp_id == AXP221_ID &&
+		    !(reg & AXP22X_FG_VALID))
+			return -EINVAL;
+
+		/*
+		 * Fuel Gauge data takes 7 bits but the stored value seems to be
+		 * directly the raw percentage without any scaling to 7 bits.
+		 */
+		val->intval = reg & AXP209_FG_PERCENT;
+		break;
+
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+		if (axp20x_batt->axp_id == AXP209_ID)
+			return axp20x_battery_get_max_voltage(axp20x_batt,
+							      &val->intval);
+		return axp22x_battery_get_max_voltage(axp20x_batt,
+						      &val->intval);
+
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_V_OFF, &reg);
+		if (ret)
+			return ret;
+
+		val->intval = 2600000 + 100000 * (reg & AXP20X_V_OFF_MASK);
+		break;
+
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = iio_read_channel_processed(axp20x_batt->batt_v,
+						 &val->intval);
+		if (ret)
+			return ret;
+
+		/* IIO framework gives mV but Power Supply framework gives uV */
+		val->intval *= 1000;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int axp20x_battery_set_max_voltage(struct axp20x_batt_ps *axp20x_batt,
+					  int val)
+{
+	switch (val) {
+	case 4100000:
+		val = AXP20X_CHRG_CTRL1_TGT_4_1V;
+		break;
+
+	case 4150000:
+		if (axp20x_batt->axp_id == AXP221_ID)
+			return -EINVAL;
+
+		val = AXP20X_CHRG_CTRL1_TGT_4_15V;
+		break;
+
+	case 4200000:
+		val = AXP20X_CHRG_CTRL1_TGT_4_2V;
+		break;
+
+	default:
+		/*
+		 * AXP20x max voltage can be set to 4.36V and AXP22X max voltage
+		 * can be set to 4.22V and 4.24V, but these voltages are too
+		 * high for Lithium based batteries (AXP PMICs are supposed to
+		 * be used with these kinds of battery).
+		 */
+		return -EINVAL;
+	}
+
+	return regmap_update_bits(axp20x_batt->regmap, AXP20X_CHRG_CTRL1,
+				  AXP20X_CHRG_CTRL1_TGT_VOLT, val);
+}
+
+static int axp20x_set_constant_charge_current(struct axp20x_batt_ps *axp_batt,
+					      int charge_current)
+{
+	if (axp_batt->axp_id == AXP209_ID)
+		charge_current = (charge_current - 300000) / 100000;
+	else
+		charge_current = (charge_current - 300000) / 150000;
+
+	if (charge_current > AXP20X_CHRG_CTRL1_TGT_CURR || charge_current < 0)
+		return -EINVAL;
+
+	return regmap_update_bits(axp_batt->regmap, AXP20X_CHRG_CTRL1,
+				  AXP20X_CHRG_CTRL1_TGT_CURR, charge_current);
+}
+
+static int axp20x_set_voltage_min_design(struct axp20x_batt_ps *axp_batt,
+					 int min_voltage)
+{
+	int val1 = (min_voltage - 2600000) / 100000;
+
+	if (val1 < 0 || val1 > AXP20X_V_OFF_MASK)
+		return -EINVAL;
+
+	return regmap_update_bits(axp_batt->regmap, AXP20X_V_OFF,
+				  AXP20X_V_OFF_MASK, val1);
+}
+
+static int axp20x_battery_set_prop(struct power_supply *psy,
+				   enum power_supply_property psp,
+				   const union power_supply_propval *val)
+{
+	struct axp20x_batt_ps *axp20x_batt = power_supply_get_drvdata(psy);
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		return axp20x_set_voltage_min_design(axp20x_batt, val->intval);
+
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+		return axp20x_battery_set_max_voltage(axp20x_batt, val->intval);
+
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
+		return axp20x_set_constant_charge_current(axp20x_batt,
+							  val->intval);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static enum power_supply_property axp20x_battery_props[] = {
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT,
+	POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_CAPACITY,
+};
+
+static int axp20x_battery_prop_writeable(struct power_supply *psy,
+					 enum power_supply_property psp)
+{
+	return psp == POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN ||
+	       psp == POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN ||
+	       psp == POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT;
+}
+
+static const struct power_supply_desc axp20x_batt_ps_desc = {
+	.name = "axp20x-battery",
+	.type = POWER_SUPPLY_TYPE_BATTERY,
+	.properties = axp20x_battery_props,
+	.num_properties = ARRAY_SIZE(axp20x_battery_props),
+	.property_is_writeable = axp20x_battery_prop_writeable,
+	.get_property = axp20x_battery_get_prop,
+	.set_property = axp20x_battery_set_prop,
+};
+
+static const struct of_device_id axp20x_battery_ps_id[] = {
+	{
+		.compatible = "x-powers,axp209-battery-power-supply",
+		.data = (void *)AXP209_ID,
+	}, {
+		.compatible = "x-powers,axp221-battery-power-supply",
+		.data = (void *)AXP221_ID,
+	}, { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, axp20x_battery_ps_id);
+
+static int axp20x_power_probe(struct platform_device *pdev)
+{
+	struct axp20x_batt_ps *axp20x_batt;
+	struct power_supply_config psy_cfg = {};
+
+	if (!of_device_is_available(pdev->dev.of_node))
+		return -ENODEV;
+
+	axp20x_batt = devm_kzalloc(&pdev->dev, sizeof(*axp20x_batt),
+				   GFP_KERNEL);
+	if (!axp20x_batt)
+		return -ENOMEM;
+
+	axp20x_batt->dev = &pdev->dev;
+
+	axp20x_batt->batt_v = devm_iio_channel_get(&pdev->dev, "batt_v");
+	if (IS_ERR(axp20x_batt->batt_v)) {
+		if (PTR_ERR(axp20x_batt->batt_v) == -ENODEV)
+			return -EPROBE_DEFER;
+		return PTR_ERR(axp20x_batt->batt_v);
+	}
+
+	axp20x_batt->batt_chrg_i = devm_iio_channel_get(&pdev->dev,
+							"batt_chrg_i");
+	if (IS_ERR(axp20x_batt->batt_chrg_i)) {
+		if (PTR_ERR(axp20x_batt->batt_chrg_i) == -ENODEV)
+			return -EPROBE_DEFER;
+		return PTR_ERR(axp20x_batt->batt_chrg_i);
+	}
+
+	axp20x_batt->batt_dischrg_i = devm_iio_channel_get(&pdev->dev,
+							   "batt_dischrg_i");
+	if (IS_ERR(axp20x_batt->batt_dischrg_i)) {
+		if (PTR_ERR(axp20x_batt->batt_dischrg_i) == -ENODEV)
+			return -EPROBE_DEFER;
+		return PTR_ERR(axp20x_batt->batt_dischrg_i);
+	}
+
+	axp20x_batt->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	platform_set_drvdata(pdev, axp20x_batt);
+
+	psy_cfg.drv_data = axp20x_batt;
+	psy_cfg.of_node = pdev->dev.of_node;
+
+	axp20x_batt->axp_id = (uintptr_t)of_device_get_match_data(&pdev->dev);
+
+	axp20x_batt->batt = devm_power_supply_register(&pdev->dev,
+						       &axp20x_batt_ps_desc,
+						       &psy_cfg);
+	if (IS_ERR(axp20x_batt->batt)) {
+		dev_err(&pdev->dev, "failed to register power supply: %ld\n",
+			PTR_ERR(axp20x_batt->batt));
+		return PTR_ERR(axp20x_batt->batt);
+	}
+
+	return 0;
+}
+
+static struct platform_driver axp20x_batt_driver = {
+	.probe    = axp20x_power_probe,
+	.driver   = {
+		.name  = "axp20x-battery-power-supply",
+		.of_match_table = axp20x_battery_ps_id,
+	},
+};
+
+module_platform_driver(axp20x_batt_driver);
+
+MODULE_DESCRIPTION("Battery power supply driver for AXP20X and AXP22X PMICs");
+MODULE_AUTHOR("Quentin Schulz <quentin.schulz@free-electrons.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c
index bd9e5c3d8cc2..d5a707e14526 100644
--- a/drivers/power/supply/bq24190_charger.c
+++ b/drivers/power/supply/bq24190_charger.c
@@ -533,6 +533,9 @@ static int bq24190_register_reset(struct bq24190_dev_info *bdi)
 	int ret, limit = 100;
 	u8 v;
 
+	if (device_property_read_bool(bdi->dev, "disable-reset"))
+		return 0;
+
 	/* Reset the registers */
 	ret = bq24190_write_mask(bdi, BQ24190_REG_POC,
 			BQ24190_REG_POC_RESET_MASK,
@@ -659,22 +662,25 @@ static int bq24190_charger_get_health(struct bq24190_dev_info *bdi,
 	v = bdi->f_reg;
 	mutex_unlock(&bdi->f_reg_lock);
 
-	if (v & BQ24190_REG_F_BOOST_FAULT_MASK) {
-		/*
-		 * This could be over-current or over-voltage but there's
-		 * no way to tell which.  Return 'OVERVOLTAGE' since there
-		 * isn't an 'OVERCURRENT' value defined that we can return
-		 * even if it was over-current.
-		 */
-		health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
-	} else {
-		v &= BQ24190_REG_F_CHRG_FAULT_MASK;
-		v >>= BQ24190_REG_F_CHRG_FAULT_SHIFT;
-
-		switch (v) {
-		case 0x0: /* Normal */
-			health = POWER_SUPPLY_HEALTH_GOOD;
+	if (v & BQ24190_REG_F_NTC_FAULT_MASK) {
+		switch (v >> BQ24190_REG_F_NTC_FAULT_SHIFT & 0x7) {
+		case 0x1: /* TS1  Cold */
+		case 0x3: /* TS2  Cold */
+		case 0x5: /* Both Cold */
+			health = POWER_SUPPLY_HEALTH_COLD;
+			break;
+		case 0x2: /* TS1  Hot */
+		case 0x4: /* TS2  Hot */
+		case 0x6: /* Both Hot */
+			health = POWER_SUPPLY_HEALTH_OVERHEAT;
 			break;
+		default:
+			health = POWER_SUPPLY_HEALTH_UNKNOWN;
+		}
+	} else if (v & BQ24190_REG_F_BAT_FAULT_MASK) {
+		health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+	} else if (v & BQ24190_REG_F_CHRG_FAULT_MASK) {
+		switch (v >> BQ24190_REG_F_CHRG_FAULT_SHIFT & 0x3) {
 		case 0x1: /* Input Fault (VBUS OVP or VBAT<VBUS<3.8V) */
 			/*
 			 * This could be over-voltage or under-voltage
@@ -691,9 +697,19 @@ static int bq24190_charger_get_health(struct bq24190_dev_info *bdi,
 		case 0x3: /* Charge Safety Timer Expiration */
 			health = POWER_SUPPLY_HEALTH_SAFETY_TIMER_EXPIRE;
 			break;
-		default:
-			health = POWER_SUPPLY_HEALTH_UNKNOWN;
+		default:  /* prevent compiler warning */
+			health = -1;
 		}
+	} else if (v & BQ24190_REG_F_BOOST_FAULT_MASK) {
+		/*
+		 * This could be over-current or over-voltage but there's
+		 * no way to tell which.  Return 'OVERVOLTAGE' since there
+		 * isn't an 'OVERCURRENT' value defined that we can return
+		 * even if it was over-current.
+		 */
+		health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+	} else {
+		health = POWER_SUPPLY_HEALTH_GOOD;
 	}
 
 	val->intval = health;
@@ -704,19 +720,59 @@ static int bq24190_charger_get_health(struct bq24190_dev_info *bdi,
 static int bq24190_charger_get_online(struct bq24190_dev_info *bdi,
 		union power_supply_propval *val)
 {
-	u8 v;
+	u8 pg_stat, batfet_disable;
 	int ret;
 
 	ret = bq24190_read_mask(bdi, BQ24190_REG_SS,
 			BQ24190_REG_SS_PG_STAT_MASK,
-			BQ24190_REG_SS_PG_STAT_SHIFT, &v);
+			BQ24190_REG_SS_PG_STAT_SHIFT, &pg_stat);
 	if (ret < 0)
 		return ret;
 
-	val->intval = v;
+	ret = bq24190_read_mask(bdi, BQ24190_REG_MOC,
+			BQ24190_REG_MOC_BATFET_DISABLE_MASK,
+			BQ24190_REG_MOC_BATFET_DISABLE_SHIFT, &batfet_disable);
+	if (ret < 0)
+		return ret;
+
+	val->intval = pg_stat && !batfet_disable;
+
 	return 0;
 }
 
+static int bq24190_battery_set_online(struct bq24190_dev_info *bdi,
+				      const union power_supply_propval *val);
+static int bq24190_battery_get_status(struct bq24190_dev_info *bdi,
+				      union power_supply_propval *val);
+static int bq24190_battery_get_temp_alert_max(struct bq24190_dev_info *bdi,
+					      union power_supply_propval *val);
+static int bq24190_battery_set_temp_alert_max(struct bq24190_dev_info *bdi,
+					      const union power_supply_propval *val);
+
+static int bq24190_charger_set_online(struct bq24190_dev_info *bdi,
+				      const union power_supply_propval *val)
+{
+	return bq24190_battery_set_online(bdi, val);
+}
+
+static int bq24190_charger_get_status(struct bq24190_dev_info *bdi,
+				      union power_supply_propval *val)
+{
+	return bq24190_battery_get_status(bdi, val);
+}
+
+static int bq24190_charger_get_temp_alert_max(struct bq24190_dev_info *bdi,
+					      union power_supply_propval *val)
+{
+	return bq24190_battery_get_temp_alert_max(bdi, val);
+}
+
+static int bq24190_charger_set_temp_alert_max(struct bq24190_dev_info *bdi,
+					      const union power_supply_propval *val)
+{
+	return bq24190_battery_set_temp_alert_max(bdi, val);
+}
+
 static int bq24190_charger_get_current(struct bq24190_dev_info *bdi,
 		union power_supply_propval *val)
 {
@@ -831,6 +887,12 @@ static int bq24190_charger_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_ONLINE:
 		ret = bq24190_charger_get_online(bdi, val);
 		break;
+	case POWER_SUPPLY_PROP_STATUS:
+		ret = bq24190_charger_get_status(bdi, val);
+		break;
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
+		ret =  bq24190_charger_get_temp_alert_max(bdi, val);
+		break;
 	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
 		ret = bq24190_charger_get_current(bdi, val);
 		break;
@@ -879,6 +941,12 @@ static int bq24190_charger_set_property(struct power_supply *psy,
 		return ret;
 
 	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = bq24190_charger_set_online(bdi, val);
+		break;
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
+		ret = bq24190_charger_set_temp_alert_max(bdi, val);
+		break;
 	case POWER_SUPPLY_PROP_CHARGE_TYPE:
 		ret = bq24190_charger_set_charge_type(bdi, val);
 		break;
@@ -904,6 +972,8 @@ static int bq24190_charger_property_is_writeable(struct power_supply *psy,
 	int ret;
 
 	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
 	case POWER_SUPPLY_PROP_CHARGE_TYPE:
 	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
 	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
@@ -920,6 +990,8 @@ static enum power_supply_property bq24190_charger_properties[] = {
 	POWER_SUPPLY_PROP_CHARGE_TYPE,
 	POWER_SUPPLY_PROP_HEALTH,
 	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_TEMP_ALERT_MAX,
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT,
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX,
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE,
@@ -1093,6 +1165,7 @@ static int bq24190_battery_get_property(struct power_supply *psy,
 	struct bq24190_dev_info *bdi = power_supply_get_drvdata(psy);
 	int ret;
 
+	dev_warn(bdi->dev, "warning: /sys/class/power_supply/bq24190-battery is deprecated\n");
 	dev_dbg(bdi->dev, "prop: %d\n", psp);
 
 	ret = pm_runtime_get_sync(bdi->dev);
@@ -1138,6 +1211,7 @@ static int bq24190_battery_set_property(struct power_supply *psy,
 	struct bq24190_dev_info *bdi = power_supply_get_drvdata(psy);
 	int ret;
 
+	dev_warn(bdi->dev, "warning: /sys/class/power_supply/bq24190-battery is deprecated\n");
 	dev_dbg(bdi->dev, "prop: %d\n", psp);
 
 	ret = pm_runtime_get_sync(bdi->dev);
@@ -1266,9 +1340,9 @@ static void bq24190_check_status(struct bq24190_dev_info *bdi)
 		bdi->ss_reg = ss_reg;
 	}
 
-	if (alert_charger)
+	if (alert_charger || alert_battery)
 		power_supply_changed(bdi->charger);
-	if (alert_battery)
+	if (alert_battery && bdi->battery)
 		power_supply_changed(bdi->battery);
 
 	dev_dbg(bdi->dev, "ss_reg: 0x%02x, f_reg: 0x%02x\n", ss_reg, f_reg);
@@ -1473,19 +1547,23 @@ static int bq24190_probe(struct i2c_client *client,
 		goto out_pmrt;
 	}
 
-	battery_cfg.drv_data = bdi;
-	bdi->battery = power_supply_register(dev, &bq24190_battery_desc,
-						&battery_cfg);
-	if (IS_ERR(bdi->battery)) {
-		dev_err(dev, "Can't register battery\n");
-		ret = PTR_ERR(bdi->battery);
-		goto out_charger;
+	/* the battery class is deprecated and will be removed. */
+	/* in the interim, this property hides it.              */
+	if (!device_property_read_bool(dev, "omit-battery-class")) {
+		battery_cfg.drv_data = bdi;
+		bdi->battery = power_supply_register(dev, &bq24190_battery_desc,
+						     &battery_cfg);
+		if (IS_ERR(bdi->battery)) {
+			dev_err(dev, "Can't register battery\n");
+			ret = PTR_ERR(bdi->battery);
+			goto out_charger;
+		}
 	}
 
 	ret = bq24190_sysfs_create_group(bdi);
 	if (ret) {
 		dev_err(dev, "Can't create sysfs entries\n");
-		goto out_battery;
+		goto out_charger;
 	}
 
 	bdi->initialized = true;
@@ -1523,10 +1601,9 @@ static int bq24190_probe(struct i2c_client *client,
 out_sysfs:
 	bq24190_sysfs_remove_group(bdi);
 
-out_battery:
-	power_supply_unregister(bdi->battery);
-
 out_charger:
+	if (!IS_ERR_OR_NULL(bdi->battery))
+		power_supply_unregister(bdi->battery);
 	power_supply_unregister(bdi->charger);
 
 out_pmrt:
@@ -1549,7 +1626,8 @@ static int bq24190_remove(struct i2c_client *client)
 
 	bq24190_register_reset(bdi);
 	bq24190_sysfs_remove_group(bdi);
-	power_supply_unregister(bdi->battery);
+	if (bdi->battery)
+		power_supply_unregister(bdi->battery);
 	power_supply_unregister(bdi->charger);
 	if (error >= 0)
 		pm_runtime_put_sync(bdi->dev);
@@ -1636,7 +1714,8 @@ static __maybe_unused int bq24190_pm_resume(struct device *dev)
 
 	/* Things may have changed while suspended so alert upper layer */
 	power_supply_changed(bdi->charger);
-	power_supply_changed(bdi->battery);
+	if (bdi->battery)
+		power_supply_changed(bdi->battery);
 
 	return 0;
 }
diff --git a/drivers/power/supply/cpcap-charger.c b/drivers/power/supply/cpcap-charger.c
index 543a1bd21ab9..26a2dc7ac9a2 100644
--- a/drivers/power/supply/cpcap-charger.c
+++ b/drivers/power/supply/cpcap-charger.c
@@ -76,7 +76,7 @@
 #define CPCAP_REG_CRM_VCHRG_4V30	CPCAP_REG_CRM_VCHRG(0x8)
 #define CPCAP_REG_CRM_VCHRG_4V32	CPCAP_REG_CRM_VCHRG(0x9)
 #define CPCAP_REG_CRM_VCHRG_4V34	CPCAP_REG_CRM_VCHRG(0xa)
-#define CPCAP_REG_CRM_VCHRG_4V36	CPCAP_REG_CRM_VCHRG(0xb)
+#define CPCAP_REG_CRM_VCHRG_4V35	CPCAP_REG_CRM_VCHRG(0xb)
 #define CPCAP_REG_CRM_VCHRG_4V38	CPCAP_REG_CRM_VCHRG(0xc)
 #define CPCAP_REG_CRM_VCHRG_4V40	CPCAP_REG_CRM_VCHRG(0xd)
 #define CPCAP_REG_CRM_VCHRG_4V42	CPCAP_REG_CRM_VCHRG(0xe)
@@ -262,7 +262,7 @@ static int cpcap_charger_set_state(struct cpcap_charger_ddata *ddata,
 	bool enable;
 	int error;
 
-	enable = max_voltage && (charge_current || trickle_current);
+	enable = (charge_current || trickle_current);
 	dev_dbg(ddata->dev, "%s enable: %i\n", __func__, enable);
 
 	if (!enable) {
@@ -433,9 +433,8 @@ static void cpcap_usb_detect(struct work_struct *work)
 			max_current = CPCAP_REG_CRM_ICHRG_0A528;
 
 		error = cpcap_charger_set_state(ddata,
-						CPCAP_REG_CRM_VCHRG_4V20,
-						max_current,
-						CPCAP_REG_CRM_TR_0A72);
+						CPCAP_REG_CRM_VCHRG_4V35,
+						max_current, 0);
 		if (error)
 			goto out_err;
 	} else {
@@ -566,7 +565,7 @@ out_err:
 }
 
 static const struct power_supply_desc cpcap_charger_usb_desc = {
-	.name		= "cpcap_usb",
+	.name		= "usb",
 	.type		= POWER_SUPPLY_TYPE_USB,
 	.properties	= cpcap_charger_props,
 	.num_properties	= ARRAY_SIZE(cpcap_charger_props),
diff --git a/drivers/power/supply/generic-adc-battery.c b/drivers/power/supply/generic-adc-battery.c
index edb36bf781b0..37e523374fe0 100644
--- a/drivers/power/supply/generic-adc-battery.c
+++ b/drivers/power/supply/generic-adc-battery.c
@@ -383,8 +383,7 @@ static int gab_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int gab_suspend(struct device *dev)
+static int __maybe_unused gab_suspend(struct device *dev)
 {
 	struct gab *adc_bat = dev_get_drvdata(dev);
 
@@ -393,7 +392,7 @@ static int gab_suspend(struct device *dev)
 	return 0;
 }
 
-static int gab_resume(struct device *dev)
+static int __maybe_unused gab_resume(struct device *dev)
 {
 	struct gab *adc_bat = dev_get_drvdata(dev);
 	struct gab_platform_data *pdata = adc_bat->pdata;
@@ -407,20 +406,12 @@ static int gab_resume(struct device *dev)
 	return 0;
 }
 
-static const struct dev_pm_ops gab_pm_ops = {
-	.suspend        = gab_suspend,
-	.resume         = gab_resume,
-};
-
-#define GAB_PM_OPS       (&gab_pm_ops)
-#else
-#define GAB_PM_OPS       (NULL)
-#endif
+static SIMPLE_DEV_PM_OPS(gab_pm_ops, gab_suspend, gab_resume);
 
 static struct platform_driver gab_driver = {
 	.driver		= {
 		.name	= "generic-adc-battery",
-		.pm	= GAB_PM_OPS
+		.pm	= &gab_pm_ops,
 	},
 	.probe		= gab_probe,
 	.remove		= gab_remove,
diff --git a/drivers/power/supply/isp1704_charger.c b/drivers/power/supply/isp1704_charger.c
index 4cd6899b961e..95af5f305838 100644
--- a/drivers/power/supply/isp1704_charger.c
+++ b/drivers/power/supply/isp1704_charger.c
@@ -418,6 +418,10 @@ static int isp1704_charger_probe(struct platform_device *pdev)
 
 		pdata = devm_kzalloc(&pdev->dev,
 			sizeof(struct isp1704_charger_data), GFP_KERNEL);
+		if (!pdata) {
+			ret = -ENOMEM;
+			goto fail0;
+		}
 		pdata->enable_gpio = gpio;
 
 		dev_info(&pdev->dev, "init gpio %d\n", pdata->enable_gpio);
diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index da7a75f82489..aecaaa2b0586 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -76,15 +76,20 @@ struct max17042_chip {
 };
 
 static enum power_supply_property max17042_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
 	POWER_SUPPLY_PROP_CYCLE_COUNT,
 	POWER_SUPPLY_PROP_VOLTAGE_MAX,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN,
 	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 	POWER_SUPPLY_PROP_VOLTAGE_AVG,
 	POWER_SUPPLY_PROP_VOLTAGE_OCV,
 	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
 	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
 	POWER_SUPPLY_PROP_CHARGE_COUNTER,
 	POWER_SUPPLY_PROP_TEMP,
 	POWER_SUPPLY_PROP_TEMP_ALERT_MIN,
@@ -92,6 +97,7 @@ static enum power_supply_property max17042_battery_props[] = {
 	POWER_SUPPLY_PROP_TEMP_MIN,
 	POWER_SUPPLY_PROP_TEMP_MAX,
 	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_SCOPE,
 	POWER_SUPPLY_PROP_CURRENT_NOW,
 	POWER_SUPPLY_PROP_CURRENT_AVG,
 };
@@ -106,19 +112,53 @@ static int max17042_get_temperature(struct max17042_chip *chip, int *temp)
 	if (ret < 0)
 		return ret;
 
-	*temp = data;
-	/* The value is signed. */
-	if (*temp & 0x8000) {
-		*temp = (0x7fff & ~*temp) + 1;
-		*temp *= -1;
-	}
-
+	*temp = sign_extend32(data, 15);
 	/* The value is converted into deci-centigrade scale */
 	/* Units of LSB = 1 / 256 degree Celsius */
 	*temp = *temp * 10 / 256;
 	return 0;
 }
 
+static int max17042_get_status(struct max17042_chip *chip, int *status)
+{
+	int ret, charge_full, charge_now;
+
+	ret = power_supply_am_i_supplied(chip->battery);
+	if (ret < 0) {
+		*status = POWER_SUPPLY_STATUS_UNKNOWN;
+		return 0;
+	}
+	if (ret == 0) {
+		*status = POWER_SUPPLY_STATUS_DISCHARGING;
+		return 0;
+	}
+
+	/*
+	 * The MAX170xx has builtin end-of-charge detection and will update
+	 * FullCAP to match RepCap when it detects end of charging.
+	 *
+	 * When this cycle the battery gets charged to a higher (calculated)
+	 * capacity then the previous cycle then FullCAP will get updated
+	 * contineously once end-of-charge detection kicks in, so allow the
+	 * 2 to differ a bit.
+	 */
+
+	ret = regmap_read(chip->regmap, MAX17042_FullCAP, &charge_full);
+	if (ret < 0)
+		return ret;
+
+	ret = regmap_read(chip->regmap, MAX17042_RepCap, &charge_now);
+	if (ret < 0)
+		return ret;
+
+	if ((charge_full - charge_now) <= MAX17042_FULL_THRESHOLD)
+		*status = POWER_SUPPLY_STATUS_FULL;
+	else
+		*status = POWER_SUPPLY_STATUS_CHARGING;
+
+	return 0;
+}
+
 static int max17042_get_battery_health(struct max17042_chip *chip, int *health)
 {
 	int temp, vavg, vbatt, ret;
@@ -156,12 +196,12 @@ static int max17042_get_battery_health(struct max17042_chip *chip, int *health)
 	if (ret < 0)
 		goto health_error;
 
-	if (temp <= chip->pdata->temp_min) {
+	if (temp < chip->pdata->temp_min) {
 		*health = POWER_SUPPLY_HEALTH_COLD;
 		goto out;
 	}
 
-	if (temp >= chip->pdata->temp_max) {
+	if (temp > chip->pdata->temp_max) {
 		*health = POWER_SUPPLY_HEALTH_OVERHEAT;
 		goto out;
 	}
@@ -183,11 +223,17 @@ static int max17042_get_property(struct power_supply *psy,
 	struct regmap *map = chip->regmap;
 	int ret;
 	u32 data;
+	u64 data64;
 
 	if (!chip->init_complete)
 		return -EAGAIN;
 
 	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		ret = max17042_get_status(chip, &val->intval);
+		if (ret < 0)
+			return ret;
+		break;
 	case POWER_SUPPLY_PROP_PRESENT:
 		ret = regmap_read(map, MAX17042_STATUS, &data);
 		if (ret < 0)
@@ -198,6 +244,9 @@ static int max17042_get_property(struct power_supply *psy,
 		else
 			val->intval = 1;
 		break;
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
+		break;
 	case POWER_SUPPLY_PROP_CYCLE_COUNT:
 		ret = regmap_read(map, MAX17042_Cycles, &data);
 		if (ret < 0)
@@ -213,6 +262,13 @@ static int max17042_get_property(struct power_supply *psy,
 		val->intval = data >> 8;
 		val->intval *= 20000; /* Units of LSB = 20mV */
 		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN:
+		ret = regmap_read(map, MAX17042_MinMaxVolt, &data);
+		if (ret < 0)
+			return ret;
+
+		val->intval = (data & 0xff) * 20000; /* Units of 20mV */
+		break;
 	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
 		if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17042)
 			ret = regmap_read(map, MAX17042_V_empty, &data);
@@ -252,12 +308,32 @@ static int max17042_get_property(struct power_supply *psy,
 
 		val->intval = data >> 8;
 		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+		ret = regmap_read(map, MAX17042_DesignCap, &data);
+		if (ret < 0)
+			return ret;
+
+		data64 = data * 5000000ll;
+		do_div(data64, chip->pdata->r_sns);
+		val->intval = data64;
+		break;
 	case POWER_SUPPLY_PROP_CHARGE_FULL:
 		ret = regmap_read(map, MAX17042_FullCAP, &data);
 		if (ret < 0)
 			return ret;
 
-		val->intval = data * 1000 / 2;
+		data64 = data * 5000000ll;
+		do_div(data64, chip->pdata->r_sns);
+		val->intval = data64;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+		ret = regmap_read(map, MAX17042_RepCap, &data);
+		if (ret < 0)
+			return ret;
+
+		data64 = data * 5000000ll;
+		do_div(data64, chip->pdata->r_sns);
+		val->intval = data64;
 		break;
 	case POWER_SUPPLY_PROP_CHARGE_COUNTER:
 		ret = regmap_read(map, MAX17042_QH, &data);
@@ -276,14 +352,14 @@ static int max17042_get_property(struct power_supply *psy,
 		if (ret < 0)
 			return ret;
 		/* LSB is Alert Minimum. In deci-centigrade */
-		val->intval = (data & 0xff) * 10;
+		val->intval = sign_extend32(data & 0xff, 7) * 10;
 		break;
 	case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
 		ret = regmap_read(map, MAX17042_TALRT_Th, &data);
 		if (ret < 0)
 			return ret;
 		/* MSB is Alert Maximum. In deci-centigrade */
-		val->intval = (data >> 8) * 10;
+		val->intval = sign_extend32(data >> 8, 7) * 10;
 		break;
 	case POWER_SUPPLY_PROP_TEMP_MIN:
 		val->intval = chip->pdata->temp_min;
@@ -296,19 +372,16 @@ static int max17042_get_property(struct power_supply *psy,
 		if (ret < 0)
 			return ret;
 		break;
+	case POWER_SUPPLY_PROP_SCOPE:
+		val->intval = POWER_SUPPLY_SCOPE_SYSTEM;
+		break;
 	case POWER_SUPPLY_PROP_CURRENT_NOW:
 		if (chip->pdata->enable_current_sense) {
 			ret = regmap_read(map, MAX17042_Current, &data);
 			if (ret < 0)
 				return ret;
 
-			val->intval = data;
-			if (val->intval & 0x8000) {
-				/* Negative */
-				val->intval = ~val->intval & 0x7fff;
-				val->intval++;
-				val->intval *= -1;
-			}
+			val->intval = sign_extend32(data, 15);
 			val->intval *= 1562500 / chip->pdata->r_sns;
 		} else {
 			return -EINVAL;
@@ -320,13 +393,7 @@ static int max17042_get_property(struct power_supply *psy,
 			if (ret < 0)
 				return ret;
 
-			val->intval = data;
-			if (val->intval & 0x8000) {
-				/* Negative */
-				val->intval = ~val->intval & 0x7fff;
-				val->intval++;
-				val->intval *= -1;
-			}
+			val->intval = sign_extend32(data, 15);
 			val->intval *= 1562500 / chip->pdata->r_sns;
 		} else {
 			return -EINVAL;
@@ -401,6 +468,11 @@ static int max17042_property_is_writeable(struct power_supply *psy,
 	return ret;
 }
 
+static void max17042_external_power_changed(struct power_supply *psy)
+{
+	power_supply_changed(psy);
+}
+
 static int max17042_write_verify_reg(struct regmap *map, u8 reg, u32 value)
 {
 	int retries = 8;
@@ -790,8 +862,9 @@ static void max17042_init_worker(struct work_struct *work)
 
 #ifdef CONFIG_OF
 static struct max17042_platform_data *
-max17042_get_pdata(struct device *dev)
+max17042_get_pdata(struct max17042_chip *chip)
 {
+	struct device *dev = &chip->client->dev;
 	struct device_node *np = dev->of_node;
 	u32 prop;
 	struct max17042_platform_data *pdata;
@@ -824,10 +897,55 @@ max17042_get_pdata(struct device *dev)
 	return pdata;
 }
 #else
+static struct max17042_reg_data max17047_default_pdata_init_regs[] = {
+	/*
+	 * Some firmwares do not set FullSOCThr, Enable End-of-Charge Detection
+	 * when the voltage FG reports 95%, as recommended in the datasheet.
+	 */
+	{ MAX17047_FullSOCThr, MAX17042_BATTERY_FULL << 8 },
+};
+
 static struct max17042_platform_data *
-max17042_get_pdata(struct device *dev)
+max17042_get_pdata(struct max17042_chip *chip)
 {
-	return dev->platform_data;
+	struct device *dev = &chip->client->dev;
+	struct max17042_platform_data *pdata;
+	int ret, misc_cfg;
+
+	if (dev->platform_data)
+		return dev->platform_data;
+
+	/*
+	 * The MAX17047 gets used on x86 where we might not have pdata, assume
+	 * the firmware will already have initialized the fuel-gauge and provide
+	 * default values for the non init bits to make things work.
+	 */
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return pdata;
+
+	if (chip->chip_type != MAXIM_DEVICE_TYPE_MAX17042) {
+		pdata->init_data = max17047_default_pdata_init_regs;
+		pdata->num_init_data =
+			ARRAY_SIZE(max17047_default_pdata_init_regs);
+	}
+
+	ret = regmap_read(chip->regmap, MAX17042_MiscCFG, &misc_cfg);
+	if (ret < 0)
+		return NULL;
+
+	/* If bits 0-1 are set to 3 then only Voltage readings are used */
+	if ((misc_cfg & 0x3) == 0x3)
+		pdata->enable_current_sense = false;
+	else
+		pdata->enable_current_sense = true;
+
+	pdata->vmin = MAX17042_DEFAULT_VMIN;
+	pdata->vmax = MAX17042_DEFAULT_VMAX;
+	pdata->temp_min = MAX17042_DEFAULT_TEMP_MIN;
+	pdata->temp_max = MAX17042_DEFAULT_TEMP_MAX;
+
+	return pdata;
 }
 #endif
 
@@ -843,6 +961,7 @@ static const struct power_supply_desc max17042_psy_desc = {
 	.get_property	= max17042_get_property,
 	.set_property	= max17042_set_property,
 	.property_is_writeable	= max17042_property_is_writeable,
+	.external_power_changed	= max17042_external_power_changed,
 	.properties	= max17042_battery_props,
 	.num_properties	= ARRAY_SIZE(max17042_battery_props),
 };
@@ -876,20 +995,20 @@ static int max17042_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	chip->client = client;
+	chip->chip_type = id->driver_data;
 	chip->regmap = devm_regmap_init_i2c(client, &max17042_regmap_config);
 	if (IS_ERR(chip->regmap)) {
 		dev_err(&client->dev, "Failed to initialize regmap\n");
 		return -EINVAL;
 	}
 
-	chip->pdata = max17042_get_pdata(&client->dev);
+	chip->pdata = max17042_get_pdata(chip);
 	if (!chip->pdata) {
 		dev_err(&client->dev, "no platform data provided\n");
 		return -EINVAL;
 	}
 
 	i2c_set_clientdata(client, chip);
-	chip->chip_type = id->driver_data;
 	psy_cfg.drv_data = chip;
 
 	/* When current is not measured,
diff --git a/drivers/power/supply/pda_power.c b/drivers/power/supply/pda_power.c
index dfe1ee89f7c7..922a86787c5c 100644
--- a/drivers/power/supply/pda_power.c
+++ b/drivers/power/supply/pda_power.c
@@ -30,9 +30,9 @@ static inline unsigned int get_irq_flags(struct resource *res)
 static struct device *dev;
 static struct pda_power_pdata *pdata;
 static struct resource *ac_irq, *usb_irq;
-static struct timer_list charger_timer;
-static struct timer_list supply_timer;
-static struct timer_list polling_timer;
+static struct delayed_work charger_work;
+static struct delayed_work polling_work;
+static struct delayed_work supply_work;
 static int polling;
 static struct power_supply *pda_psy_ac, *pda_psy_usb;
 
@@ -140,7 +140,7 @@ static void update_charger(void)
 	}
 }
 
-static void supply_timer_func(unsigned long unused)
+static void supply_work_func(struct work_struct *work)
 {
 	if (ac_status == PDA_PSY_TO_CHANGE) {
 		ac_status = new_ac_status;
@@ -161,11 +161,12 @@ static void psy_changed(void)
 	 * Okay, charger set. Now wait a bit before notifying supplicants,
 	 * charge power should stabilize.
 	 */
-	mod_timer(&supply_timer,
-		  jiffies + msecs_to_jiffies(pdata->wait_for_charger));
+	cancel_delayed_work(&supply_work);
+	schedule_delayed_work(&supply_work,
+			      msecs_to_jiffies(pdata->wait_for_charger));
 }
 
-static void charger_timer_func(unsigned long unused)
+static void charger_work_func(struct work_struct *work)
 {
 	update_status();
 	psy_changed();
@@ -184,13 +185,14 @@ static irqreturn_t power_changed_isr(int irq, void *power_supply)
 	 * Wait a bit before reading ac/usb line status and setting charger,
 	 * because ac/usb status readings may lag from irq.
 	 */
-	mod_timer(&charger_timer,
-		  jiffies + msecs_to_jiffies(pdata->wait_for_status));
+	cancel_delayed_work(&charger_work);
+	schedule_delayed_work(&charger_work,
+			      msecs_to_jiffies(pdata->wait_for_status));
 
 	return IRQ_HANDLED;
 }
 
-static void polling_timer_func(unsigned long unused)
+static void polling_work_func(struct work_struct *work)
 {
 	int changed = 0;
 
@@ -211,8 +213,9 @@ static void polling_timer_func(unsigned long unused)
 	if (changed)
 		psy_changed();
 
-	mod_timer(&polling_timer,
-		  jiffies + msecs_to_jiffies(pdata->polling_interval));
+	cancel_delayed_work(&polling_work);
+	schedule_delayed_work(&polling_work,
+			      msecs_to_jiffies(pdata->polling_interval));
 }
 
 #if IS_ENABLED(CONFIG_USB_PHY)
@@ -250,8 +253,9 @@ static int otg_handle_notification(struct notifier_block *nb,
 	 * Wait a bit before reading ac/usb line status and setting charger,
 	 * because ac/usb status readings may lag from irq.
 	 */
-	mod_timer(&charger_timer,
-		  jiffies + msecs_to_jiffies(pdata->wait_for_status));
+	cancel_delayed_work(&charger_work);
+	schedule_delayed_work(&charger_work,
+			      msecs_to_jiffies(pdata->wait_for_status));
 
 	return NOTIFY_OK;
 }
@@ -300,8 +304,8 @@ static int pda_power_probe(struct platform_device *pdev)
 	if (!pdata->ac_max_uA)
 		pdata->ac_max_uA = 500000;
 
-	setup_timer(&charger_timer, charger_timer_func, 0);
-	setup_timer(&supply_timer, supply_timer_func, 0);
+	INIT_DELAYED_WORK(&charger_work, charger_work_func);
+	INIT_DELAYED_WORK(&supply_work, supply_work_func);
 
 	ac_irq = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "ac");
 	usb_irq = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "usb");
@@ -385,9 +389,10 @@ static int pda_power_probe(struct platform_device *pdev)
 
 	if (polling) {
 		dev_dbg(dev, "will poll for status\n");
-		setup_timer(&polling_timer, polling_timer_func, 0);
-		mod_timer(&polling_timer,
-			  jiffies + msecs_to_jiffies(pdata->polling_interval));
+		INIT_DELAYED_WORK(&polling_work, polling_work_func);
+		cancel_delayed_work(&polling_work);
+		schedule_delayed_work(&polling_work,
+				      msecs_to_jiffies(pdata->polling_interval));
 	}
 
 	if (ac_irq || usb_irq)
@@ -433,9 +438,9 @@ static int pda_power_remove(struct platform_device *pdev)
 		free_irq(ac_irq->start, pda_psy_ac);
 
 	if (polling)
-		del_timer_sync(&polling_timer);
-	del_timer_sync(&charger_timer);
-	del_timer_sync(&supply_timer);
+		cancel_delayed_work_sync(&polling_work);
+	cancel_delayed_work_sync(&charger_work);
+	cancel_delayed_work_sync(&supply_work);
 
 	if (pdata->is_usb_online)
 		power_supply_unregister(pda_psy_usb);
diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index 1e0960b646e8..7ec7c7c202bd 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -280,13 +280,19 @@ static inline int power_supply_check_supplies(struct power_supply *psy)
 }
 #endif
 
-static int __power_supply_am_i_supplied(struct device *dev, void *data)
+struct psy_am_i_supplied_data {
+	struct power_supply *psy;
+	unsigned int count;
+};
+
+static int __power_supply_am_i_supplied(struct device *dev, void *_data)
 {
 	union power_supply_propval ret = {0,};
-	struct power_supply *psy = data;
 	struct power_supply *epsy = dev_get_drvdata(dev);
+	struct psy_am_i_supplied_data *data = _data;
 
-	if (__power_supply_is_supplied_by(epsy, psy))
+	data->count++;
+	if (__power_supply_is_supplied_by(epsy, data->psy))
 		if (!epsy->desc->get_property(epsy, POWER_SUPPLY_PROP_ONLINE,
 					&ret))
 			return ret.intval;
@@ -296,12 +302,16 @@ static int __power_supply_am_i_supplied(struct device *dev, void *data)
 
 int power_supply_am_i_supplied(struct power_supply *psy)
 {
+	struct psy_am_i_supplied_data data = { psy, 0 };
 	int error;
 
-	error = class_for_each_device(power_supply_class, NULL, psy,
+	error = class_for_each_device(power_supply_class, NULL, &data,
 				      __power_supply_am_i_supplied);
 
-	dev_dbg(&psy->dev, "%s %d\n", __func__, error);
+	dev_dbg(&psy->dev, "%s count %u err %d\n", __func__, data.count, error);
+
+	if (data.count == 0)
+		return -ENODEV;
 
 	return error;
 }
diff --git a/drivers/power/supply/rx51_battery.c b/drivers/power/supply/rx51_battery.c
index af9383d23d12..5654708b1279 100644
--- a/drivers/power/supply/rx51_battery.c
+++ b/drivers/power/supply/rx51_battery.c
@@ -23,7 +23,6 @@
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/slab.h>
-#include <linux/i2c/twl4030-madc.h>
 #include <linux/iio/consumer.h>
 #include <linux/of.h>
 
diff --git a/drivers/power/supply/sbs-battery.c b/drivers/power/supply/sbs-battery.c
index 8bb2eb38eb1c..e3a114e60f1a 100644
--- a/drivers/power/supply/sbs-battery.c
+++ b/drivers/power/supply/sbs-battery.c
@@ -171,7 +171,6 @@ struct sbs_info {
 	u32				i2c_retry_count;
 	u32				poll_retry_count;
 	struct delayed_work		work;
-	int				ignore_changes;
 };
 
 static char model_name[I2C_SMBUS_BLOCK_MAX + 1];
@@ -296,6 +295,31 @@ static int sbs_write_word_data(struct i2c_client *client, u8 address,
 	return 0;
 }
 
+static int sbs_status_correct(struct i2c_client *client, int *intval)
+{
+	int ret;
+
+	ret = sbs_read_word_data(client, sbs_data[REG_CURRENT].addr);
+	if (ret < 0)
+		return ret;
+
+	ret = (s16)ret;
+
+	/* Not drawing current means full (cannot be not charging) */
+	if (ret == 0)
+		*intval = POWER_SUPPLY_STATUS_FULL;
+
+	if (*intval == POWER_SUPPLY_STATUS_FULL) {
+		/* Drawing or providing current when full */
+		if (ret > 0)
+			*intval = POWER_SUPPLY_STATUS_CHARGING;
+		else if (ret < 0)
+			*intval = POWER_SUPPLY_STATUS_DISCHARGING;
+	}
+
+	return 0;
+}
+
 static int sbs_get_battery_presence_and_health(
 	struct i2c_client *client, enum power_supply_property psp,
 	union power_supply_propval *val)
@@ -402,6 +426,8 @@ static int sbs_get_battery_property(struct i2c_client *client,
 		else
 			val->intval = POWER_SUPPLY_STATUS_CHARGING;
 
+		sbs_status_correct(client, &val->intval);
+
 		if (chip->poll_time == 0)
 			chip->last_state = val->intval;
 		else if (chip->last_state != val->intval) {
@@ -675,30 +701,34 @@ done:
 	return 0;
 }
 
-static irqreturn_t sbs_irq(int irq, void *devid)
+static void sbs_supply_changed(struct sbs_info *chip)
 {
-	struct sbs_info *chip = devid;
 	struct power_supply *battery = chip->power_supply;
 	int ret;
 
 	ret = gpiod_get_value_cansleep(chip->gpio_detect);
 	if (ret < 0)
-		return ret;
+		return;
 	chip->is_present = ret;
 	power_supply_changed(battery);
+}
 
+static irqreturn_t sbs_irq(int irq, void *devid)
+{
+	sbs_supply_changed(devid);
 	return IRQ_HANDLED;
 }
 
+static void sbs_alert(struct i2c_client *client, enum i2c_alert_protocol prot,
+	unsigned int data)
+{
+	sbs_supply_changed(i2c_get_clientdata(client));
+}
+
 static void sbs_external_power_changed(struct power_supply *psy)
 {
 	struct sbs_info *chip = power_supply_get_drvdata(psy);
 
-	if (chip->ignore_changes > 0) {
-		chip->ignore_changes--;
-		return;
-	}
-
 	/* cancel outstanding work */
 	cancel_delayed_work_sync(&chip->work);
 
@@ -727,6 +757,8 @@ static void sbs_delayed_work(struct work_struct *work)
 	else
 		ret = POWER_SUPPLY_STATUS_CHARGING;
 
+	sbs_status_correct(chip->client, &ret);
+
 	if (chip->last_state != ret) {
 		chip->poll_time = 0;
 		power_supply_changed(chip->power_supply);
@@ -775,10 +807,6 @@ static int sbs_probe(struct i2c_client *client,
 	chip->enable_detection = false;
 	psy_cfg.of_node = client->dev.of_node;
 	psy_cfg.drv_data = chip;
-	/* ignore first notification of external change, it is generated
-	 * from the power_supply_register call back
-	 */
-	chip->ignore_changes = 1;
 	chip->last_state = POWER_SUPPLY_STATUS_UNKNOWN;
 
 	/* use pdata if available, fall back to DT properties,
@@ -820,7 +848,7 @@ static int sbs_probe(struct i2c_client *client,
 	}
 
 	rc = devm_request_threaded_irq(&client->dev, irq, NULL, sbs_irq,
-		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 		dev_name(&client->dev), chip);
 	if (rc) {
 		dev_warn(&client->dev, "Failed to request irq: %d\n", rc);
@@ -917,6 +945,7 @@ MODULE_DEVICE_TABLE(of, sbs_dt_ids);
 static struct i2c_driver sbs_battery_driver = {
 	.probe		= sbs_probe,
 	.remove		= sbs_remove,
+	.alert		= sbs_alert,
 	.id_table	= sbs_id,
 	.driver = {
 		.name	= "sbs-battery",
diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c
index 990ff3d218bc..2f82d0e9ec1b 100644
--- a/drivers/power/supply/twl4030_charger.c
+++ b/drivers/power/supply/twl4030_charger.c
@@ -206,35 +206,6 @@ static int twl4030bci_read_adc_val(u8 reg)
 }
 
 /*
- * Check if Battery Pack was present
- */
-static int twl4030_is_battery_present(struct twl4030_bci *bci)
-{
-	int ret;
-	u8 val = 0;
-
-	/* Battery presence in Main charge? */
-	ret = twl_i2c_read_u8(TWL_MODULE_MAIN_CHARGE, &val, TWL4030_BCIMFSTS3);
-	if (ret)
-		return ret;
-	if (val & TWL4030_BATSTSMCHG)
-		return 0;
-
-	/*
-	 * OK, It could be that bootloader did not enable main charger,
-	 * pre-charge is h/w auto. So, Battery presence in Pre-charge?
-	 */
-	ret = twl_i2c_read_u8(TWL4030_MODULE_PRECHARGE, &val,
-			      TWL4030_BCIMFSTS1);
-	if (ret)
-		return ret;
-	if (val & TWL4030_BATSTSPCHG)
-		return 0;
-
-	return -ENODEV;
-}
-
-/*
  * TI provided formulas:
  * CGAIN == 0: ICHG = (BCIICHG * 1.7) / (2^10 - 1) - 0.85
  * CGAIN == 1: ICHG = (BCIICHG * 3.4) / (2^10 - 1) - 1.7
@@ -922,6 +893,28 @@ static int twl4030_bci_get_property(struct power_supply *psy,
 			twl4030_bci_state_to_status(state) !=
 				POWER_SUPPLY_STATUS_NOT_CHARGING;
 		break;
+	case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+		val->intval = -1;
+		if (psy->desc->type != POWER_SUPPLY_TYPE_USB) {
+			if (!bci->ac_is_active)
+				val->intval = bci->ac_cur;
+		} else {
+			if (bci->ac_is_active)
+				val->intval = bci->usb_cur_target;
+		}
+		if (val->intval < 0) {
+			u8 bcictl1;
+
+			val->intval = twl4030bci_read_adc_val(TWL4030_BCIIREF1);
+			if (val->intval < 0)
+				return val->intval;
+			ret = twl4030_bci_read(TWL4030_BCICTL1, &bcictl1);
+			if (ret < 0)
+				return ret;
+			val->intval = regval2ua(val->intval, bcictl1 &
+							TWL4030_CGAIN);
+		}
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -929,11 +922,44 @@ static int twl4030_bci_get_property(struct power_supply *psy,
 	return 0;
 }
 
+static int twl4030_bci_set_property(struct power_supply *psy,
+				    enum power_supply_property psp,
+				    const union power_supply_propval *val)
+{
+	struct twl4030_bci *bci = dev_get_drvdata(psy->dev.parent);
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+		if (psy->desc->type == POWER_SUPPLY_TYPE_USB)
+			bci->usb_cur_target = val->intval;
+		else
+			bci->ac_cur = val->intval;
+		twl4030_charger_update_current(bci);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int twl4030_bci_property_is_writeable(struct power_supply *psy,
+				      enum power_supply_property psp)
+{
+	switch (psp) {
+	case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static enum power_supply_property twl4030_charger_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_ONLINE,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT,
 };
 
 #ifdef CONFIG_OF
@@ -970,6 +996,8 @@ static const struct power_supply_desc twl4030_bci_ac_desc = {
 	.properties	= twl4030_charger_props,
 	.num_properties	= ARRAY_SIZE(twl4030_charger_props),
 	.get_property	= twl4030_bci_get_property,
+	.set_property	= twl4030_bci_set_property,
+	.property_is_writeable	= twl4030_bci_property_is_writeable,
 };
 
 static const struct power_supply_desc twl4030_bci_usb_desc = {
@@ -978,6 +1006,8 @@ static const struct power_supply_desc twl4030_bci_usb_desc = {
 	.properties	= twl4030_charger_props,
 	.num_properties	= ARRAY_SIZE(twl4030_charger_props),
 	.get_property	= twl4030_bci_get_property,
+	.set_property	= twl4030_bci_set_property,
+	.property_is_writeable	= twl4030_bci_property_is_writeable,
 };
 
 static int twl4030_bci_probe(struct platform_device *pdev)
@@ -1009,13 +1039,6 @@ static int twl4030_bci_probe(struct platform_device *pdev)
 	bci->irq_chg = platform_get_irq(pdev, 0);
 	bci->irq_bci = platform_get_irq(pdev, 1);
 
-	/* Only proceed further *IF* battery is physically present */
-	ret = twl4030_is_battery_present(bci);
-	if  (ret) {
-		dev_crit(&pdev->dev, "Battery was not detected:%d\n", ret);
-		return ret;
-	}
-
 	platform_set_drvdata(pdev, bci);
 
 	bci->ac = devm_power_supply_register(&pdev->dev, &twl4030_bci_ac_desc,
diff --git a/drivers/power/supply/twl4030_madc_battery.c b/drivers/power/supply/twl4030_madc_battery.c
index f5817e422d64..4d41acb98576 100644
--- a/drivers/power/supply/twl4030_madc_battery.c
+++ b/drivers/power/supply/twl4030_madc_battery.c
@@ -17,7 +17,6 @@
 #include <linux/power_supply.h>
 #include <linux/slab.h>
 #include <linux/sort.h>
-#include <linux/i2c/twl4030-madc.h>
 #include <linux/power/twl4030_madc_battery.h>
 #include <linux/iio/consumer.h>
 
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 9a25110c4a46..9ddad0815ba9 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -1164,6 +1164,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
 	RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD,	rapl_defaults_tng),
 	RAPL_CPU(INTEL_FAM6_ATOM_MOOREFIELD,	rapl_defaults_ann),
 	RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT,	rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE,	rapl_defaults_core),
 	RAPL_CPU(INTEL_FAM6_ATOM_DENVERTON,	rapl_defaults_core),
 
 	RAPL_CPU(INTEL_FAM6_XEON_PHI_KNL,	rapl_defaults_hsw_server),
diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c
index 14bde0db8c24..5b10b50f8686 100644
--- a/drivers/powercap/powercap_sys.c
+++ b/drivers/powercap/powercap_sys.c
@@ -538,6 +538,7 @@ struct powercap_zone *powercap_register_zone(
 
 	power_zone->id = result;
 	idr_init(&power_zone->idr);
+	result = -ENOMEM;
 	power_zone->name = kstrdup(name, GFP_KERNEL);
 	if (!power_zone->name)
 		goto err_name_alloc;
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index 0142cc3f0c91..294634836b32 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -71,7 +71,7 @@ EXPORT_SYMBOL(rproc_vq_interrupt);
 static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
 				    unsigned int id,
 				    void (*callback)(struct virtqueue *vq),
-				    const char *name)
+				    const char *name, bool ctx)
 {
 	struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
 	struct rproc *rproc = vdev_to_rproc(vdev);
@@ -103,8 +103,8 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
 	 * Create the new vq, and tell virtio we're not interested in
 	 * the 'weak' smp barriers, since we're talking with a real device.
 	 */
-	vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, addr,
-				 rproc_virtio_notify, callback, name);
+	vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, ctx,
+				 addr, rproc_virtio_notify, callback, name);
 	if (!vq) {
 		dev_err(dev, "vring_new_virtqueue %s failed\n", name);
 		rproc_free_vring(rvring);
@@ -138,12 +138,14 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
 				 struct virtqueue *vqs[],
 				 vq_callback_t *callbacks[],
 				 const char * const names[],
+				 const bool * ctx,
 				 struct irq_affinity *desc)
 {
 	int i, ret;
 
 	for (i = 0; i < nvqs; ++i) {
-		vqs[i] = rp_find_vq(vdev, i, callbacks[i], names[i]);
+		vqs[i] = rp_find_vq(vdev, i, callbacks[i], names[i],
+				    ctx ? ctx[i] : false);
 		if (IS_ERR(vqs[i])) {
 			ret = PTR_ERR(vqs[i]);
 			goto error;
diff --git a/drivers/reset/hisilicon/hi6220_reset.c b/drivers/reset/hisilicon/hi6220_reset.c
index 35ce53edabf9..d5e5229308f2 100644
--- a/drivers/reset/hisilicon/hi6220_reset.c
+++ b/drivers/reset/hisilicon/hi6220_reset.c
@@ -155,3 +155,5 @@ static int __init hi6220_reset_init(void)
 }
 
 postcore_initcall(hi6220_reset_init);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 5e66e081027e..f7cade09d38a 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -869,7 +869,7 @@ static int rpmsg_probe(struct virtio_device *vdev)
 	init_waitqueue_head(&vrp->sendq);
 
 	/* We expect two virtqueues, rx and tx (and in this order) */
-	err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names, NULL);
+	err = virtio_find_vqs(vdev, 2, vqs, vq_cbs, names, NULL);
 	if (err)
 		goto free_vrp;
 
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index ee1b0e9dde79..8d3b95728326 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1303,10 +1303,10 @@ config RTC_DRV_SA1100
 
 config RTC_DRV_SH
 	tristate "SuperH On-Chip RTC"
-	depends on SUPERH && HAVE_CLK
+	depends on SUPERH || ARCH_RENESAS
 	help
 	  Say Y here to enable support for the on-chip RTC found in
-	  most SuperH processors.
+	  most SuperH processors. This RTC is also found in RZ/A SoCs.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called rtc-sh.
@@ -1731,6 +1731,13 @@ config RTC_DRV_STM32
 	   This driver can also be built as a module, if so, the module
 	   will be called "rtc-stm32".
 
+config RTC_DRV_CPCAP
+	depends on MFD_CPCAP
+	tristate "Motorola CPCAP RTC"
+	help
+	   Say y here for CPCAP rtc found on some Motorola phones
+	   and tablets such as Droid 4.
+
 comment "HID Sensor RTC drivers"
 
 config RTC_DRV_HID_SENSOR_TIME
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index f07297b1460a..13857d2fce09 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_RTC_DRV_BQ32K)	+= rtc-bq32k.o
 obj-$(CONFIG_RTC_DRV_BQ4802)	+= rtc-bq4802.o
 obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc-cmos.o
 obj-$(CONFIG_RTC_DRV_COH901331)	+= rtc-coh901331.o
+obj-$(CONFIG_RTC_DRV_CPCAP)	+= rtc-cpcap.o
 obj-$(CONFIG_RTC_DRV_DA9052)	+= rtc-da9052.o
 obj-$(CONFIG_RTC_DRV_DA9055)	+= rtc-da9055.o
 obj-$(CONFIG_RTC_DRV_DA9063)	+= rtc-da9063.o
diff --git a/drivers/rtc/rtc-bq32k.c b/drivers/rtc/rtc-bq32k.c
index 2b223935001f..98ac8d5c7901 100644
--- a/drivers/rtc/rtc-bq32k.c
+++ b/drivers/rtc/rtc-bq32k.c
@@ -310,9 +310,16 @@ static const struct i2c_device_id bq32k_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, bq32k_id);
 
+static const struct of_device_id bq32k_of_match[] = {
+	{ .compatible = "ti,bq32000" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, bq32k_of_match);
+
 static struct i2c_driver bq32k_driver = {
 	.driver = {
 		.name	= "bq32k",
+		.of_match_table = of_match_ptr(bq32k_of_match),
 	},
 	.probe		= bq32k_probe,
 	.remove		= bq32k_remove,
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index f4a96dbdabf2..9dca53df3584 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -41,6 +41,9 @@
 #include <linux/pm.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
+#ifdef CONFIG_X86
+#include <asm/i8259.h>
+#endif
 
 /* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */
 #include <linux/mc146818rtc.h>
@@ -1085,7 +1088,7 @@ static u32 rtc_handler(void *context)
 	}
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	pm_wakeup_event(dev, 0);
+	pm_wakeup_hard_event(dev);
 	acpi_clear_event(ACPI_EVENT_RTC);
 	acpi_disable_event(ACPI_EVENT_RTC, 0);
 	return ACPI_INTERRUPT_HANDLED;
@@ -1193,17 +1196,23 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
 {
 	cmos_wake_setup(&pnp->dev);
 
-	if (pnp_port_start(pnp, 0) == 0x70 && !pnp_irq_valid(pnp, 0))
+	if (pnp_port_start(pnp, 0) == 0x70 && !pnp_irq_valid(pnp, 0)) {
+		unsigned int irq = 0;
+#ifdef CONFIG_X86
 		/* Some machines contain a PNP entry for the RTC, but
 		 * don't define the IRQ. It should always be safe to
-		 * hardcode it in these cases
+		 * hardcode it on systems with a legacy PIC.
 		 */
+		if (nr_legacy_irqs())
+			irq = 8;
+#endif
 		return cmos_do_probe(&pnp->dev,
-				pnp_get_resource(pnp, IORESOURCE_IO, 0), 8);
-	else
+				pnp_get_resource(pnp, IORESOURCE_IO, 0), irq);
+	} else {
 		return cmos_do_probe(&pnp->dev,
 				pnp_get_resource(pnp, IORESOURCE_IO, 0),
 				pnp_irq(pnp, 0));
+	}
 }
 
 static void cmos_pnp_remove(struct pnp_dev *pnp)
diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c
new file mode 100644
index 000000000000..3a0333e1f21a
--- /dev/null
+++ b/drivers/rtc/rtc-cpcap.c
@@ -0,0 +1,330 @@
+/*
+ * Motorola CPCAP PMIC RTC driver
+ *
+ * Based on cpcap-regulator.c from Motorola Linux kernel tree
+ * Copyright (C) 2009 Motorola, Inc.
+ *
+ * Rewritten for mainline kernel
+ *  - use DT
+ *  - use regmap
+ *  - use standard interrupt framework
+ *  - use managed device resources
+ *  - remove custom "secure clock daemon" helpers
+ *
+ * Copyright (C) 2017 Sebastian Reichel <sre@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+#include <linux/err.h>
+#include <linux/regmap.h>
+#include <linux/mfd/motorola-cpcap.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#define SECS_PER_DAY 86400
+#define DAY_MASK  0x7FFF
+#define TOD1_MASK 0x00FF
+#define TOD2_MASK 0x01FF
+
+struct cpcap_time {
+	int day;
+	int tod1;
+	int tod2;
+};
+
+struct cpcap_rtc {
+	struct regmap *regmap;
+	struct rtc_device *rtc_dev;
+	u16 vendor;
+	int alarm_irq;
+	bool alarm_enabled;
+	int update_irq;
+	bool update_enabled;
+};
+
+static void cpcap2rtc_time(struct rtc_time *rtc, struct cpcap_time *cpcap)
+{
+	unsigned long int tod;
+	unsigned long int time;
+
+	tod = (cpcap->tod1 & TOD1_MASK) | ((cpcap->tod2 & TOD2_MASK) << 8);
+	time = tod + ((cpcap->day & DAY_MASK) * SECS_PER_DAY);
+
+	rtc_time_to_tm(time, rtc);
+}
+
+static void rtc2cpcap_time(struct cpcap_time *cpcap, struct rtc_time *rtc)
+{
+	unsigned long time;
+
+	rtc_tm_to_time(rtc, &time);
+
+	cpcap->day = time / SECS_PER_DAY;
+	time %= SECS_PER_DAY;
+	cpcap->tod2 = (time >> 8) & TOD2_MASK;
+	cpcap->tod1 = time & TOD1_MASK;
+}
+
+static int cpcap_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct cpcap_rtc *rtc = dev_get_drvdata(dev);
+
+	if (rtc->alarm_enabled == enabled)
+		return 0;
+
+	if (enabled)
+		enable_irq(rtc->alarm_irq);
+	else
+		disable_irq(rtc->alarm_irq);
+
+	rtc->alarm_enabled = !!enabled;
+
+	return 0;
+}
+
+static int cpcap_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct cpcap_rtc *rtc;
+	struct cpcap_time cpcap_tm;
+	int temp_tod2;
+	int ret;
+
+	rtc = dev_get_drvdata(dev);
+
+	ret = regmap_read(rtc->regmap, CPCAP_REG_TOD2, &temp_tod2);
+	ret |= regmap_read(rtc->regmap, CPCAP_REG_DAY, &cpcap_tm.day);
+	ret |= regmap_read(rtc->regmap, CPCAP_REG_TOD1, &cpcap_tm.tod1);
+	ret |= regmap_read(rtc->regmap, CPCAP_REG_TOD2, &cpcap_tm.tod2);
+
+	if (temp_tod2 > cpcap_tm.tod2)
+		ret |= regmap_read(rtc->regmap, CPCAP_REG_DAY, &cpcap_tm.day);
+
+	if (ret) {
+		dev_err(dev, "Failed to read time\n");
+		return -EIO;
+	}
+
+	cpcap2rtc_time(tm, &cpcap_tm);
+
+	return rtc_valid_tm(tm);
+}
+
+static int cpcap_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct cpcap_rtc *rtc;
+	struct cpcap_time cpcap_tm;
+	int ret = 0;
+
+	rtc = dev_get_drvdata(dev);
+
+	rtc2cpcap_time(&cpcap_tm, tm);
+
+	if (rtc->alarm_enabled)
+		disable_irq(rtc->alarm_irq);
+	if (rtc->update_enabled)
+		disable_irq(rtc->update_irq);
+
+	if (rtc->vendor == CPCAP_VENDOR_ST) {
+		/* The TOD1 and TOD2 registers MUST be written in this order
+		 * for the change to properly set.
+		 */
+		ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_TOD1,
+					  TOD1_MASK, cpcap_tm.tod1);
+		ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_TOD2,
+					  TOD2_MASK, cpcap_tm.tod2);
+		ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_DAY,
+					  DAY_MASK, cpcap_tm.day);
+	} else {
+		/* Clearing the upper lower 8 bits of the TOD guarantees that
+		 * the upper half of TOD (TOD2) will not increment for 0xFF RTC
+		 * ticks (255 seconds).  During this time we can safely write
+		 * to DAY, TOD2, then TOD1 (in that order) and expect RTC to be
+		 * synchronized to the exact time requested upon the final write
+		 * to TOD1.
+		 */
+		ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_TOD1,
+					  TOD1_MASK, 0);
+		ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_DAY,
+					  DAY_MASK, cpcap_tm.day);
+		ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_TOD2,
+					  TOD2_MASK, cpcap_tm.tod2);
+		ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_TOD1,
+					  TOD1_MASK, cpcap_tm.tod1);
+	}
+
+	if (rtc->update_enabled)
+		enable_irq(rtc->update_irq);
+	if (rtc->alarm_enabled)
+		enable_irq(rtc->alarm_irq);
+
+	return ret;
+}
+
+static int cpcap_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct cpcap_rtc *rtc;
+	struct cpcap_time cpcap_tm;
+	int ret;
+
+	rtc = dev_get_drvdata(dev);
+
+	alrm->enabled = rtc->alarm_enabled;
+
+	ret = regmap_read(rtc->regmap, CPCAP_REG_DAYA, &cpcap_tm.day);
+	ret |= regmap_read(rtc->regmap, CPCAP_REG_TODA2, &cpcap_tm.tod2);
+	ret |= regmap_read(rtc->regmap, CPCAP_REG_TODA1, &cpcap_tm.tod1);
+
+	if (ret) {
+		dev_err(dev, "Failed to read time\n");
+		return -EIO;
+	}
+
+	cpcap2rtc_time(&alrm->time, &cpcap_tm);
+	return rtc_valid_tm(&alrm->time);
+}
+
+static int cpcap_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct cpcap_rtc *rtc;
+	struct cpcap_time cpcap_tm;
+	int ret;
+
+	rtc = dev_get_drvdata(dev);
+
+	rtc2cpcap_time(&cpcap_tm, &alrm->time);
+
+	if (rtc->alarm_enabled)
+		disable_irq(rtc->alarm_irq);
+
+	ret = regmap_update_bits(rtc->regmap, CPCAP_REG_DAYA, DAY_MASK,
+				 cpcap_tm.day);
+	ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_TODA2, TOD2_MASK,
+				  cpcap_tm.tod2);
+	ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_TODA1, TOD1_MASK,
+				  cpcap_tm.tod1);
+
+	if (!ret) {
+		enable_irq(rtc->alarm_irq);
+		rtc->alarm_enabled = true;
+	}
+
+	return ret;
+}
+
+static const struct rtc_class_ops cpcap_rtc_ops = {
+	.read_time		= cpcap_rtc_read_time,
+	.set_time		= cpcap_rtc_set_time,
+	.read_alarm		= cpcap_rtc_read_alarm,
+	.set_alarm		= cpcap_rtc_set_alarm,
+	.alarm_irq_enable	= cpcap_rtc_alarm_irq_enable,
+};
+
+static irqreturn_t cpcap_rtc_alarm_irq(int irq, void *data)
+{
+	struct cpcap_rtc *rtc = data;
+
+	rtc_update_irq(rtc->rtc_dev, 1, RTC_AF | RTC_IRQF);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t cpcap_rtc_update_irq(int irq, void *data)
+{
+	struct cpcap_rtc *rtc = data;
+
+	rtc_update_irq(rtc->rtc_dev, 1, RTC_UF | RTC_IRQF);
+	return IRQ_HANDLED;
+}
+
+static int cpcap_rtc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct cpcap_rtc *rtc;
+	int err;
+
+	rtc = devm_kzalloc(dev, sizeof(*rtc), GFP_KERNEL);
+	if (!rtc)
+		return -ENOMEM;
+
+	rtc->regmap = dev_get_regmap(dev->parent, NULL);
+	if (!rtc->regmap)
+		return -ENODEV;
+
+	platform_set_drvdata(pdev, rtc);
+	rtc->rtc_dev = devm_rtc_device_register(dev, "cpcap_rtc",
+						&cpcap_rtc_ops, THIS_MODULE);
+
+	if (IS_ERR(rtc->rtc_dev))
+		return PTR_ERR(rtc->rtc_dev);
+
+	err = cpcap_get_vendor(dev, rtc->regmap, &rtc->vendor);
+	if (err)
+		return err;
+
+	rtc->alarm_irq = platform_get_irq(pdev, 0);
+	err = devm_request_threaded_irq(dev, rtc->alarm_irq, NULL,
+					cpcap_rtc_alarm_irq, IRQF_TRIGGER_NONE,
+					"rtc_alarm", rtc);
+	if (err) {
+		dev_err(dev, "Could not request alarm irq: %d\n", err);
+		return err;
+	}
+	disable_irq(rtc->alarm_irq);
+
+	/* Stock Android uses the 1 Hz interrupt for "secure clock daemon",
+	 * which is not supported by the mainline kernel. The mainline kernel
+	 * does not use the irq at the moment, but we explicitly request and
+	 * disable it, so that its masked and does not wake up the processor
+	 * every second.
+	 */
+	rtc->update_irq = platform_get_irq(pdev, 1);
+	err = devm_request_threaded_irq(dev, rtc->update_irq, NULL,
+					cpcap_rtc_update_irq, IRQF_TRIGGER_NONE,
+					"rtc_1hz", rtc);
+	if (err) {
+		dev_err(dev, "Could not request update irq: %d\n", err);
+		return err;
+	}
+	disable_irq(rtc->update_irq);
+
+	err = device_init_wakeup(dev, 1);
+	if (err) {
+		dev_err(dev, "wakeup initialization failed (%d)\n", err);
+		/* ignore error and continue without wakeup support */
+	}
+
+	return 0;
+}
+
+static const struct of_device_id cpcap_rtc_of_match[] = {
+	{ .compatible = "motorola,cpcap-rtc", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, cpcap_rtc_of_match);
+
+static struct platform_driver cpcap_rtc_driver = {
+	.probe		= cpcap_rtc_probe,
+	.driver		= {
+		.name	= "cpcap-rtc",
+		.of_match_table = cpcap_rtc_of_match,
+	},
+};
+
+module_platform_driver(cpcap_rtc_driver);
+
+MODULE_ALIAS("platform:cpcap-rtc");
+MODULE_DESCRIPTION("CPCAP RTC driver");
+MODULE_AUTHOR("Sebastian Reichel <sre@kernel.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 4ad97be48043..77339b3d50a1 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -16,6 +16,7 @@
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/rtc/ds1307.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
@@ -38,6 +39,7 @@ enum ds_type {
 	ds_1340,
 	ds_1388,
 	ds_3231,
+	m41t0,
 	m41t00,
 	mcp794xx,
 	rx_8025,
@@ -52,6 +54,7 @@ enum ds_type {
 #	define DS1340_BIT_nEOSC		0x80
 #	define MCP794XX_BIT_ST		0x80
 #define DS1307_REG_MIN		0x01	/* 00-59 */
+#	define M41T0_BIT_OF		0x80
 #define DS1307_REG_HOUR		0x02	/* 00-23, or 1-12{am,pm} */
 #	define DS1307_BIT_12HR		0x40	/* in REG_HOUR */
 #	define DS1307_BIT_PM		0x20	/* in REG_HOUR */
@@ -182,6 +185,7 @@ static const struct i2c_device_id ds1307_id[] = {
 	{ "ds1388", ds_1388 },
 	{ "ds1340", ds_1340 },
 	{ "ds3231", ds_3231 },
+	{ "m41t0", m41t0 },
 	{ "m41t00", m41t00 },
 	{ "mcp7940x", mcp794xx },
 	{ "mcp7941x", mcp794xx },
@@ -192,6 +196,69 @@ static const struct i2c_device_id ds1307_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ds1307_id);
 
+#ifdef CONFIG_OF
+static const struct of_device_id ds1307_of_match[] = {
+	{
+		.compatible = "dallas,ds1307",
+		.data = (void *)ds_1307
+	},
+	{
+		.compatible = "dallas,ds1337",
+		.data = (void *)ds_1337
+	},
+	{
+		.compatible = "dallas,ds1338",
+		.data = (void *)ds_1338
+	},
+	{
+		.compatible = "dallas,ds1339",
+		.data = (void *)ds_1339
+	},
+	{
+		.compatible = "dallas,ds1388",
+		.data = (void *)ds_1388
+	},
+	{
+		.compatible = "dallas,ds1340",
+		.data = (void *)ds_1340
+	},
+	{
+		.compatible = "maxim,ds3231",
+		.data = (void *)ds_3231
+	},
+	{
+		.compatible = "st,m41t0",
+		.data = (void *)m41t00
+	},
+	{
+		.compatible = "st,m41t00",
+		.data = (void *)m41t00
+	},
+	{
+		.compatible = "microchip,mcp7940x",
+		.data = (void *)mcp794xx
+	},
+	{
+		.compatible = "microchip,mcp7941x",
+		.data = (void *)mcp794xx
+	},
+	{
+		.compatible = "pericom,pt7c4338",
+		.data = (void *)ds_1307
+	},
+	{
+		.compatible = "epson,rx8025",
+		.data = (void *)rx_8025
+	},
+	{
+		.compatible = "isil,isl12057",
+		.data = (void *)ds_1337
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ds1307_of_match);
+#endif
+
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id ds1307_acpi_ids[] = {
 	{ .id = "DS1307", .driver_data = ds_1307 },
@@ -201,6 +268,7 @@ static const struct acpi_device_id ds1307_acpi_ids[] = {
 	{ .id = "DS1388", .driver_data = ds_1388 },
 	{ .id = "DS1340", .driver_data = ds_1340 },
 	{ .id = "DS3231", .driver_data = ds_3231 },
+	{ .id = "M41T0", .driver_data = m41t0 },
 	{ .id = "M41T00", .driver_data = m41t00 },
 	{ .id = "MCP7940X", .driver_data = mcp794xx },
 	{ .id = "MCP7941X", .driver_data = mcp794xx },
@@ -396,6 +464,13 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
 
 	dev_dbg(dev, "%s: %7ph\n", "read", ds1307->regs);
 
+	/* if oscillator fail bit is set, no data can be trusted */
+	if (ds1307->type == m41t0 &&
+	    ds1307->regs[DS1307_REG_MIN] & M41T0_BIT_OF) {
+		dev_warn_once(dev, "oscillator failed, set time!\n");
+		return -EINVAL;
+	}
+
 	t->tm_sec = bcd2bin(ds1307->regs[DS1307_REG_SECS] & 0x7f);
 	t->tm_min = bcd2bin(ds1307->regs[DS1307_REG_MIN] & 0x7f);
 	tmp = ds1307->regs[DS1307_REG_HOUR] & 0x3f;
@@ -1318,7 +1393,12 @@ static int ds1307_probe(struct i2c_client *client,
 	i2c_set_clientdata(client, ds1307);
 
 	ds1307->client	= client;
-	if (id) {
+
+	if (client->dev.of_node) {
+		ds1307->type = (enum ds_type)
+			of_device_get_match_data(&client->dev);
+		chip = &chips[ds1307->type];
+	} else if (id) {
 		chip = &chips[id->driver_data];
 		ds1307->type = id->driver_data;
 	} else {
@@ -1513,6 +1593,7 @@ read_rtc:
 	tmp = ds1307->regs[DS1307_REG_SECS];
 	switch (ds1307->type) {
 	case ds_1307:
+	case m41t0:
 	case m41t00:
 		/* clock halted?  turn it on, so clock can tick. */
 		if (tmp & DS1307_BIT_CH) {
@@ -1577,6 +1658,7 @@ read_rtc:
 	tmp = ds1307->regs[DS1307_REG_HOUR];
 	switch (ds1307->type) {
 	case ds_1340:
+	case m41t0:
 	case m41t00:
 		/*
 		 * NOTE: ignores century bits; fix before deploying
@@ -1711,6 +1793,7 @@ static int ds1307_remove(struct i2c_client *client)
 static struct i2c_driver ds1307_driver = {
 	.driver = {
 		.name	= "rtc-ds1307",
+		.of_match_table = of_match_ptr(ds1307_of_match),
 		.acpi_match_table = ACPI_PTR(ds1307_acpi_ids),
 	},
 	.probe		= ds1307_probe,
diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c
index 52429f0a57cc..38a2e9e684df 100644
--- a/drivers/rtc/rtc-ds1374.c
+++ b/drivers/rtc/rtc-ds1374.c
@@ -525,6 +525,10 @@ static long ds1374_wdt_ioctl(struct file *file, unsigned int cmd,
 		if (get_user(new_margin, (int __user *)arg))
 			return -EFAULT;
 
+		/* the hardware's tick rate is 4096 Hz, so
+		 * the counter value needs to be scaled accordingly
+		 */
+		new_margin <<= 12;
 		if (new_margin < 1 || new_margin > 16777216)
 			return -EINVAL;
 
@@ -533,7 +537,8 @@ static long ds1374_wdt_ioctl(struct file *file, unsigned int cmd,
 		ds1374_wdt_ping();
 		/* fallthrough */
 	case WDIOC_GETTIMEOUT:
-		return put_user(wdt_margin, (int __user *)arg);
+		/* when returning ... inverse is true */
+		return put_user((wdt_margin >> 12), (int __user *)arg);
 	case WDIOC_SETOPTIONS:
 		if (copy_from_user(&options, (int __user *)arg, sizeof(int)))
 			return -EFAULT;
@@ -541,14 +546,15 @@ static long ds1374_wdt_ioctl(struct file *file, unsigned int cmd,
 		if (options & WDIOS_DISABLECARD) {
 			pr_info("disable watchdog\n");
 			ds1374_wdt_disable();
+			return 0;
 		}
 
 		if (options & WDIOS_ENABLECARD) {
 			pr_info("enable watchdog\n");
 			ds1374_wdt_settimeout(wdt_margin);
 			ds1374_wdt_ping();
+			return 0;
 		}
-
 		return -EINVAL;
 	}
 	return -ENOTTY;
@@ -704,6 +710,7 @@ static SIMPLE_DEV_PM_OPS(ds1374_pm, ds1374_suspend, ds1374_resume);
 static struct i2c_driver ds1374_driver = {
 	.driver = {
 		.name = "rtc-ds1374",
+		.of_match_table = of_match_ptr(ds1374_of_match),
 		.pm = &ds1374_pm,
 	},
 	.probe = ds1374_probe,
diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c
index 5c18ac7394c4..7bf46bfe11a4 100644
--- a/drivers/rtc/rtc-ds1672.c
+++ b/drivers/rtc/rtc-ds1672.c
@@ -196,10 +196,17 @@ static struct i2c_device_id ds1672_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ds1672_id);
 
+static const struct of_device_id ds1672_of_match[] = {
+	{ .compatible = "dallas,ds1672" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ds1672_of_match);
+
 static struct i2c_driver ds1672_driver = {
 	.driver = {
 		   .name = "rtc-ds1672",
-		   },
+		   .of_match_table = of_match_ptr(ds1672_of_match),
+	},
 	.probe = &ds1672_probe,
 	.id_table = ds1672_id,
 };
diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c
index 9bb39a06b994..deff431a37c4 100644
--- a/drivers/rtc/rtc-ds3232.c
+++ b/drivers/rtc/rtc-ds3232.c
@@ -442,9 +442,16 @@ static const struct i2c_device_id ds3232_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ds3232_id);
 
+static const struct of_device_id ds3232_of_match[] = {
+	{ .compatible = "dallas,ds3232" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ds3232_of_match);
+
 static struct i2c_driver ds3232_driver = {
 	.driver = {
 		.name = "rtc-ds3232",
+		.of_match_table = of_match_ptr(ds3232_of_match),
 		.pm	= &ds3232_pm_ops,
 	},
 	.probe = ds3232_i2c_probe,
diff --git a/drivers/rtc/rtc-gemini.c b/drivers/rtc/rtc-gemini.c
index ccf0dbadb62d..5279390bb42d 100644
--- a/drivers/rtc/rtc-gemini.c
+++ b/drivers/rtc/rtc-gemini.c
@@ -139,6 +139,8 @@ static int gemini_rtc_probe(struct platform_device *pdev)
 
 	rtc->rtc_base = devm_ioremap(dev, res->start,
 				     resource_size(res));
+	if (!rtc->rtc_base)
+		return -ENOMEM;
 
 	ret = devm_request_irq(dev, rtc->rtc_irq, gemini_rtc_interrupt,
 			       IRQF_SHARED, pdev->name, dev);
diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c
index c398f74234c6..2751dba850c6 100644
--- a/drivers/rtc/rtc-hid-sensor-time.c
+++ b/drivers/rtc/rtc-hid-sensor-time.c
@@ -291,9 +291,9 @@ static int hid_time_probe(struct platform_device *pdev)
 					"hid-sensor-time", &hid_time_rtc_ops,
 					THIS_MODULE);
 
-	if (IS_ERR_OR_NULL(time_state->rtc)) {
+	if (IS_ERR(time_state->rtc)) {
 		hid_device_io_stop(hsdev->hdev);
-		ret = time_state->rtc ? PTR_ERR(time_state->rtc) : -ENODEV;
+		ret = PTR_ERR(time_state->rtc);
 		time_state->rtc = NULL;
 		dev_err(&pdev->dev, "rtc device register failed!\n");
 		goto err_rtc;
diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c
index 6b54f6c24c5f..80931114c899 100644
--- a/drivers/rtc/rtc-imxdi.c
+++ b/drivers/rtc/rtc-imxdi.c
@@ -709,7 +709,7 @@ static irqreturn_t dryice_irq(int irq, void *dev_id)
 		/*If the write wait queue is empty then there is no pending
 		  operations. It means the interrupt is for DryIce -Security.
 		  IRQ must be returned as none.*/
-		if (list_empty_careful(&imxdi->write_wait.task_list))
+		if (list_empty_careful(&imxdi->write_wait.head))
 			return rc;
 
 		/* DSR_WCF clears itself on DSR read */
diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
index 2893785f0eba..8dd299c6a1f3 100644
--- a/drivers/rtc/rtc-isl1208.c
+++ b/drivers/rtc/rtc-isl1208.c
@@ -687,10 +687,18 @@ static const struct i2c_device_id isl1208_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, isl1208_id);
 
+static const struct of_device_id isl1208_of_match[] = {
+	{ .compatible = "isil,isl1208" },
+	{ .compatible = "isil,isl1218" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, isl1208_of_match);
+
 static struct i2c_driver isl1208_driver = {
 	.driver = {
-		   .name = "rtc-isl1208",
-		   },
+		.name = "rtc-isl1208",
+		.of_match_table = of_match_ptr(isl1208_of_match),
+	},
 	.probe = isl1208_probe,
 	.remove = isl1208_remove,
 	.id_table = isl1208_id,
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 58698d21c2c3..5ec4653022ff 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
@@ -86,8 +87,66 @@ static const struct i2c_device_id m41t80_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, m41t80_id);
 
+static const struct of_device_id m41t80_of_match[] = {
+	{
+		.compatible = "st,m41t62",
+		.data = (void *)(M41T80_FEATURE_SQ | M41T80_FEATURE_SQ_ALT)
+	},
+	{
+		.compatible = "st,m41t65",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_WD)
+	},
+	{
+		.compatible = "st,m41t80",
+		.data = (void *)(M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,m41t81",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,m41t81s",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,m41t82",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,m41t83",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,m41t84",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,m41t85",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,m41t87",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "microcrystal,rv4162",
+		.data = (void *)(M41T80_FEATURE_SQ | M41T80_FEATURE_WD | M41T80_FEATURE_SQ_ALT)
+	},
+	/* DT compatibility only, do not use compatibles below: */
+	{
+		.compatible = "st,rv4162",
+		.data = (void *)(M41T80_FEATURE_SQ | M41T80_FEATURE_WD | M41T80_FEATURE_SQ_ALT)
+	},
+	{
+		.compatible = "rv4162",
+		.data = (void *)(M41T80_FEATURE_SQ | M41T80_FEATURE_WD | M41T80_FEATURE_SQ_ALT)
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, m41t80_of_match);
+
 struct m41t80_data {
-	u8 features;
+	unsigned long features;
 	struct rtc_device *rtc;
 };
 
@@ -786,7 +845,11 @@ static int m41t80_probe(struct i2c_client *client,
 	if (!m41t80_data)
 		return -ENOMEM;
 
-	m41t80_data->features = id->driver_data;
+	if (client->dev.of_node)
+		m41t80_data->features = (unsigned long)
+			of_device_get_match_data(&client->dev);
+	else
+		m41t80_data->features = id->driver_data;
 	i2c_set_clientdata(client, m41t80_data);
 
 	if (client->irq > 0) {
@@ -894,6 +957,7 @@ static int m41t80_remove(struct i2c_client *client)
 static struct i2c_driver m41t80_driver = {
 	.driver = {
 		.name = "rtc-m41t80",
+		.of_match_table = of_match_ptr(m41t80_of_match),
 		.pm = &m41t80_pm,
 	},
 	.probe = m41t80_probe,
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index 73594f38c453..13f7cd11c07e 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -844,7 +844,7 @@ err:
 	return ret;
 }
 
-static int __exit omap_rtc_remove(struct platform_device *pdev)
+static int omap_rtc_remove(struct platform_device *pdev)
 {
 	struct omap_rtc *rtc = platform_get_drvdata(pdev);
 	u8 reg;
@@ -882,8 +882,7 @@ static int __exit omap_rtc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int omap_rtc_suspend(struct device *dev)
+static int __maybe_unused omap_rtc_suspend(struct device *dev)
 {
 	struct omap_rtc *rtc = dev_get_drvdata(dev);
 
@@ -906,7 +905,7 @@ static int omap_rtc_suspend(struct device *dev)
 	return 0;
 }
 
-static int omap_rtc_resume(struct device *dev)
+static int __maybe_unused omap_rtc_resume(struct device *dev)
 {
 	struct omap_rtc *rtc = dev_get_drvdata(dev);
 
@@ -921,10 +920,8 @@ static int omap_rtc_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
-#ifdef CONFIG_PM
-static int omap_rtc_runtime_suspend(struct device *dev)
+static int __maybe_unused omap_rtc_runtime_suspend(struct device *dev)
 {
 	struct omap_rtc *rtc = dev_get_drvdata(dev);
 
@@ -934,16 +931,9 @@ static int omap_rtc_runtime_suspend(struct device *dev)
 	return 0;
 }
 
-static int omap_rtc_runtime_resume(struct device *dev)
-{
-	return 0;
-}
-#endif
-
 static const struct dev_pm_ops omap_rtc_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(omap_rtc_suspend, omap_rtc_resume)
-	SET_RUNTIME_PM_OPS(omap_rtc_runtime_suspend,
-			   omap_rtc_runtime_resume, NULL)
+	SET_RUNTIME_PM_OPS(omap_rtc_runtime_suspend, NULL, NULL)
 };
 
 static void omap_rtc_shutdown(struct platform_device *pdev)
@@ -964,7 +954,7 @@ static void omap_rtc_shutdown(struct platform_device *pdev)
 
 static struct platform_driver omap_rtc_driver = {
 	.probe		= omap_rtc_probe,
-	.remove		= __exit_p(omap_rtc_remove),
+	.remove		= omap_rtc_remove,
 	.shutdown	= omap_rtc_shutdown,
 	.driver		= {
 		.name	= "omap_rtc",
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index c8c757466783..d4eff8d7131f 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -15,6 +15,7 @@
 #include <linux/bcd.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 
 /*
  * Ricoh has a family of I2C based RTCs, which differ only slightly from
@@ -83,6 +84,35 @@ static const struct i2c_device_id rs5c372_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, rs5c372_id);
 
+static const struct of_device_id rs5c372_of_match[] = {
+	{
+		.compatible = "ricoh,r2025sd",
+		.data = (void *)rtc_r2025sd
+	},
+	{
+		.compatible = "ricoh,r2221tl",
+		.data = (void *)rtc_r2221tl
+	},
+	{
+		.compatible = "ricoh,rs5c372a",
+		.data = (void *)rtc_rs5c372a
+	},
+	{
+		.compatible = "ricoh,rs5c372b",
+		.data = (void *)rtc_rs5c372b
+	},
+	{
+		.compatible = "ricoh,rv5c386",
+		.data = (void *)rtc_rv5c386
+	},
+	{
+		.compatible = "ricoh,rv5c387a",
+		.data = (void *)rtc_rv5c387a
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rs5c372_of_match);
+
 /* REVISIT:  this assumes that:
  *  - we're in the 21st century, so it's safe to ignore the century
  *    bit for rv5c38[67] (REG_MONTH bit 7);
@@ -581,7 +611,11 @@ static int rs5c372_probe(struct i2c_client *client,
 
 	rs5c372->client = client;
 	i2c_set_clientdata(client, rs5c372);
-	rs5c372->type = id->driver_data;
+	if (client->dev.of_node)
+		rs5c372->type = (enum rtc_type)
+			of_device_get_match_data(&client->dev);
+	else
+		rs5c372->type = id->driver_data;
 
 	/* we read registers 0x0f then 0x00-0x0f; skip the first one */
 	rs5c372->regs = &rs5c372->buf[1];
@@ -673,6 +707,7 @@ static int rs5c372_remove(struct i2c_client *client)
 static struct i2c_driver rs5c372_driver = {
 	.driver		= {
 		.name	= "rtc-rs5c372",
+		.of_match_table = of_match_ptr(rs5c372_of_match),
 	},
 	.probe		= rs5c372_probe,
 	.remove		= rs5c372_remove,
diff --git a/drivers/rtc/rtc-rv3029c2.c b/drivers/rtc/rtc-rv3029c2.c
index 1f9f7b4bf3fb..85fa1da03762 100644
--- a/drivers/rtc/rtc-rv3029c2.c
+++ b/drivers/rtc/rtc-rv3029c2.c
@@ -875,9 +875,18 @@ static struct i2c_device_id rv3029_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, rv3029_id);
 
+static const struct of_device_id rv3029_of_match[] = {
+	{ .compatible = "rv3029" },
+	{ .compatible = "rv3029c2" },
+	{ .compatible = "mc,rv3029c2" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rv3029_of_match);
+
 static struct i2c_driver rv3029_driver = {
 	.driver = {
 		.name = "rtc-rv3029c2",
+		.of_match_table = of_match_ptr(rv3029_of_match),
 	},
 	.probe		= rv3029_i2c_probe,
 	.id_table	= rv3029_id,
diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
index f9277e536f7e..9ad97ab29866 100644
--- a/drivers/rtc/rtc-rv8803.c
+++ b/drivers/rtc/rtc-rv8803.c
@@ -18,6 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/rtc.h>
 
 #define RV8803_I2C_TRY_COUNT		4
@@ -556,7 +557,11 @@ static int rv8803_probe(struct i2c_client *client,
 
 	mutex_init(&rv8803->flags_lock);
 	rv8803->client = client;
-	rv8803->type = id->driver_data;
+	if (client->dev.of_node)
+		rv8803->type = (enum rv8803_type)
+			of_device_get_match_data(&client->dev);
+	else
+		rv8803->type = id->driver_data;
 	i2c_set_clientdata(client, rv8803);
 
 	flags = rv8803_read_reg(client, RV8803_FLAG);
@@ -627,9 +632,23 @@ static const struct i2c_device_id rv8803_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, rv8803_id);
 
+static const struct of_device_id rv8803_of_match[] = {
+	{
+		.compatible = "microcrystal,rv8803",
+		.data = (void *)rx_8900
+	},
+	{
+		.compatible = "epson,rx8900",
+		.data = (void *)rx_8900
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rv8803_of_match);
+
 static struct i2c_driver rv8803_driver = {
 	.driver = {
 		.name = "rtc-rv8803",
+		.of_match_table = of_match_ptr(rv8803_of_match),
 	},
 	.probe		= rv8803_probe,
 	.remove		= rv8803_remove,
diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c
index d08da371912c..1ed3403ff8ac 100644
--- a/drivers/rtc/rtc-rx8010.c
+++ b/drivers/rtc/rtc-rx8010.c
@@ -59,6 +59,12 @@ static const struct i2c_device_id rx8010_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, rx8010_id);
 
+static const struct of_device_id rx8010_of_match[] = {
+	{ .compatible = "epson,rx8010" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rx8010_of_match);
+
 struct rx8010_data {
 	struct i2c_client *client;
 	struct rtc_device *rtc;
@@ -487,6 +493,7 @@ static int rx8010_probe(struct i2c_client *client,
 static struct i2c_driver rx8010_driver = {
 	.driver = {
 		.name = "rtc-rx8010",
+		.of_match_table = of_match_ptr(rx8010_of_match),
 	},
 	.probe		= rx8010_probe,
 	.id_table	= rx8010_id,
diff --git a/drivers/rtc/rtc-rx8581.c b/drivers/rtc/rtc-rx8581.c
index 0c362a3d1f17..9998d7937688 100644
--- a/drivers/rtc/rtc-rx8581.c
+++ b/drivers/rtc/rtc-rx8581.c
@@ -308,9 +308,16 @@ static const struct i2c_device_id rx8581_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, rx8581_id);
 
+static const struct of_device_id rx8581_of_match[] = {
+	{ .compatible = "epson,rx8581" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rx8581_of_match);
+
 static struct i2c_driver rx8581_driver = {
 	.driver		= {
 		.name	= "rtc-rx8581",
+		.of_match_table = of_match_ptr(rx8581_of_match),
 	},
 	.probe		= rx8581_probe,
 	.id_table	= rx8581_id,
diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index 5dab4665ca3b..449820eeefe8 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -58,6 +58,13 @@ static const struct i2c_device_id s35390a_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, s35390a_id);
 
+static const struct of_device_id s35390a_of_match[] = {
+	{ .compatible = "s35390a" },
+	{ .compatible = "sii,s35390a" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, s35390a_of_match);
+
 struct s35390a {
 	struct i2c_client *client[8];
 	struct rtc_device *rtc;
@@ -502,6 +509,7 @@ static int s35390a_remove(struct i2c_client *client)
 static struct i2c_driver s35390a_driver = {
 	.driver		= {
 		.name	= "rtc-s35390a",
+		.of_match_table = of_match_ptr(s35390a_of_match),
 	},
 	.probe		= s35390a_probe,
 	.remove		= s35390a_remove,
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index c626e43a9cbb..6c2d3989f967 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -27,7 +27,15 @@
 #include <linux/log2.h>
 #include <linux/clk.h>
 #include <linux/slab.h>
+#ifdef CONFIG_SUPERH
 #include <asm/rtc.h>
+#else
+/* Default values for RZ/A RTC */
+#define rtc_reg_size		sizeof(u16)
+#define RTC_BIT_INVERTED        0	/* no chip bugs */
+#define RTC_CAP_4_DIGIT_YEAR    (1 << 0)
+#define RTC_DEF_CAPABILITIES    RTC_CAP_4_DIGIT_YEAR
+#endif
 
 #define DRV_NAME	"sh-rtc"
 
@@ -570,6 +578,8 @@ static int __init sh_rtc_probe(struct platform_device *pdev)
 	rtc->alarm_irq = platform_get_irq(pdev, 2);
 
 	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (!res)
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (unlikely(res == NULL)) {
 		dev_err(&pdev->dev, "No IO resource\n");
 		return -ENOENT;
@@ -587,12 +597,15 @@ static int __init sh_rtc_probe(struct platform_device *pdev)
 	if (unlikely(!rtc->regbase))
 		return -EINVAL;
 
-	clk_id = pdev->id;
-	/* With a single device, the clock id is still "rtc0" */
-	if (clk_id < 0)
-		clk_id = 0;
+	if (!pdev->dev.of_node) {
+		clk_id = pdev->id;
+		/* With a single device, the clock id is still "rtc0" */
+		if (clk_id < 0)
+			clk_id = 0;
 
-	snprintf(clk_name, sizeof(clk_name), "rtc%d", clk_id);
+		snprintf(clk_name, sizeof(clk_name), "rtc%d", clk_id);
+	} else
+		snprintf(clk_name, sizeof(clk_name), "fck");
 
 	rtc->clk = devm_clk_get(&pdev->dev, clk_name);
 	if (IS_ERR(rtc->clk)) {
@@ -608,6 +621,8 @@ static int __init sh_rtc_probe(struct platform_device *pdev)
 	clk_enable(rtc->clk);
 
 	rtc->capabilities = RTC_DEF_CAPABILITIES;
+
+#ifdef CONFIG_SUPERH
 	if (dev_get_platdata(&pdev->dev)) {
 		struct sh_rtc_platform_info *pinfo =
 			dev_get_platdata(&pdev->dev);
@@ -618,6 +633,7 @@ static int __init sh_rtc_probe(struct platform_device *pdev)
 		 */
 		rtc->capabilities |= pinfo->capabilities;
 	}
+#endif
 
 	if (rtc->carry_irq <= 0) {
 		/* register shared periodic/carry/alarm irq */
@@ -718,8 +734,7 @@ static void sh_rtc_set_irq_wake(struct device *dev, int enabled)
 	}
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int sh_rtc_suspend(struct device *dev)
+static int __maybe_unused sh_rtc_suspend(struct device *dev)
 {
 	if (device_may_wakeup(dev))
 		sh_rtc_set_irq_wake(dev, 1);
@@ -727,21 +742,27 @@ static int sh_rtc_suspend(struct device *dev)
 	return 0;
 }
 
-static int sh_rtc_resume(struct device *dev)
+static int __maybe_unused sh_rtc_resume(struct device *dev)
 {
 	if (device_may_wakeup(dev))
 		sh_rtc_set_irq_wake(dev, 0);
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(sh_rtc_pm_ops, sh_rtc_suspend, sh_rtc_resume);
 
+static const struct of_device_id sh_rtc_of_match[] = {
+	{ .compatible = "renesas,sh-rtc", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sh_rtc_of_match);
+
 static struct platform_driver sh_rtc_platform_driver = {
 	.driver		= {
 		.name	= DRV_NAME,
 		.pm	= &sh_rtc_pm_ops,
+		.of_match_table = sh_rtc_of_match,
 	},
 	.remove		= __exit_p(sh_rtc_remove),
 };
diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c
index d51b07d620f7..d8ef9e052c4f 100644
--- a/drivers/rtc/rtc-snvs.c
+++ b/drivers/rtc/rtc-snvs.c
@@ -258,7 +258,7 @@ static int snvs_rtc_probe(struct platform_device *pdev)
 		of_property_read_u32(pdev->dev.of_node, "offset", &data->offset);
 	}
 
-	if (!data->regmap) {
+	if (IS_ERR(data->regmap)) {
 		dev_err(&pdev->dev, "Can't find snvs syscon\n");
 		return -ENODEV;
 	}
diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
index fa247deb9cf4..483c7993516b 100644
--- a/drivers/rtc/rtc-wm8350.c
+++ b/drivers/rtc/rtc-wm8350.c
@@ -30,8 +30,6 @@
 #define WM8350_SET_TIME_RETRIES	5
 #define WM8350_GET_TIME_RETRIES	5
 
-#define to_wm8350_from_rtc_dev(d) container_of(d, struct wm8350, rtc.pdev.dev)
-
 /*
  * Read current time and date in RTC
  */
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 6fb3fd5efc11..b7cbd5d2cdea 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2672,7 +2672,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 	 */
 	if (basedev->state < DASD_STATE_READY) {
 		while ((req = blk_fetch_request(block->request_queue)))
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		return;
 	}
 
@@ -2692,7 +2692,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "Rejecting write request %p",
 				      req);
 			blk_start_request(req);
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			continue;
 		}
 		if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) &&
@@ -2702,7 +2702,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "Rejecting failfast request %p",
 				      req);
 			blk_start_request(req);
-			__blk_end_request_all(req, -ETIMEDOUT);
+			__blk_end_request_all(req, BLK_STS_TIMEOUT);
 			continue;
 		}
 		cqr = basedev->discipline->build_cp(basedev, block, req);
@@ -2734,7 +2734,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "on request %p",
 				      PTR_ERR(cqr), req);
 			blk_start_request(req);
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			continue;
 		}
 		/*
@@ -2755,21 +2755,29 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
 {
 	struct request *req;
 	int status;
-	int error = 0;
+	blk_status_t error = BLK_STS_OK;
 
 	req = (struct request *) cqr->callback_data;
 	dasd_profile_end(cqr->block, cqr, req);
+
 	status = cqr->block->base->discipline->free_cp(cqr, req);
 	if (status < 0)
-		error = status;
+		error = errno_to_blk_status(status);
 	else if (status == 0) {
-		if (cqr->intrc == -EPERM)
-			error = -EBADE;
-		else if (cqr->intrc == -ENOLINK ||
-			 cqr->intrc == -ETIMEDOUT)
-			error = cqr->intrc;
-		else
-			error = -EIO;
+		switch (cqr->intrc) {
+		case -EPERM:
+			error = BLK_STS_NEXUS;
+			break;
+		case -ENOLINK:
+			error = BLK_STS_TRANSPORT;
+			break;
+		case -ETIMEDOUT:
+			error = BLK_STS_TIMEOUT;
+			break;
+		default:
+			error = BLK_STS_IOERR;
+			break;
+		}
 	}
 	__blk_end_request_all(req, error);
 }
@@ -3190,7 +3198,7 @@ static void dasd_flush_request_queue(struct dasd_block *block)
 
 	spin_lock_irq(&block->request_queue_lock);
 	while ((req = blk_fetch_request(block->request_queue)))
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 	spin_unlock_irq(&block->request_queue_lock);
 }
 
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 36e5280af3e4..06eb1de52d1c 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -845,7 +845,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
 	unsigned long source_addr;
 	unsigned long bytes_done;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	bytes_done = 0;
 	dev_info = bio->bi_bdev->bd_disk->private_data;
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 152de6817875..3c2c84b72877 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -231,7 +231,7 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
 	aob->request.data = (u64) aobrq;
 	scmrq->bdev = bdev;
 	scmrq->retries = 4;
-	scmrq->error = 0;
+	scmrq->error = BLK_STS_OK;
 	/* We don't use all msbs - place aidaws at the end of the aob page. */
 	scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
 	scm_request_cluster_init(scmrq);
@@ -364,7 +364,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
 {
 	struct aob *aob = scmrq->aob;
 
-	if (scmrq->error == -ETIMEDOUT)
+	if (scmrq->error == BLK_STS_TIMEOUT)
 		SCM_LOG(1, "Request timeout");
 	else {
 		SCM_LOG(1, "Request error");
@@ -377,7 +377,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
 		       scmrq->error);
 }
 
-void scm_blk_irq(struct scm_device *scmdev, void *data, int error)
+void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
 {
 	struct scm_request *scmrq = data;
 	struct scm_blk_dev *bdev = scmrq->bdev;
@@ -397,7 +397,7 @@ static void scm_blk_handle_error(struct scm_request *scmrq)
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	unsigned long flags;
 
-	if (scmrq->error != -EIO)
+	if (scmrq->error != BLK_STS_IOERR)
 		goto restart;
 
 	/* For -EIO the response block is valid. */
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index 09218cdc5129..cd598d1a4eae 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -35,7 +35,7 @@ struct scm_request {
 	struct aob *aob;
 	struct list_head list;
 	u8 retries;
-	int error;
+	blk_status_t error;
 #ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
 	struct {
 		enum {CLUSTER_NONE, CLUSTER_READ, CLUSTER_WRITE} state;
@@ -50,7 +50,7 @@ struct scm_request {
 int scm_blk_dev_setup(struct scm_blk_dev *, struct scm_device *);
 void scm_blk_dev_cleanup(struct scm_blk_dev *);
 void scm_blk_set_available(struct scm_blk_dev *);
-void scm_blk_irq(struct scm_device *, void *, int);
+void scm_blk_irq(struct scm_device *, void *, blk_status_t);
 
 void scm_request_finish(struct scm_request *);
 void scm_request_requeue(struct scm_request *);
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index b9d7e755c8a3..a48f0d40c1d2 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -190,7 +190,7 @@ static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
 	unsigned long page_addr;
 	unsigned long bytes;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if ((bio->bi_iter.bi_sector & 7) != 0 ||
 	    (bio->bi_iter.bi_size & 4095) != 0)
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index e443b0d0b236..34b9ad6b3143 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -35,7 +35,7 @@ static struct bus_type ccwgroup_bus_type;
 static void __ccwgroup_remove_symlinks(struct ccwgroup_device *gdev)
 {
 	int i;
-	char str[8];
+	char str[16];
 
 	for (i = 0; i < gdev->count; i++) {
 		sprintf(str, "cdev%d", i);
@@ -238,7 +238,7 @@ static void ccwgroup_release(struct device *dev)
 
 static int __ccwgroup_create_symlinks(struct ccwgroup_device *gdev)
 {
-	char str[8];
+	char str[16];
 	int i, rc;
 
 	for (i = 0; i < gdev->count; i++) {
diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c
index b3f44bc7f644..0f11f3bcac82 100644
--- a/drivers/s390/cio/eadm_sch.c
+++ b/drivers/s390/cio/eadm_sch.c
@@ -135,7 +135,7 @@ static void eadm_subchannel_irq(struct subchannel *sch)
 	struct eadm_private *private = get_eadm_private(sch);
 	struct eadm_scsw *scsw = &sch->schib.scsw.eadm;
 	struct irb *irb = this_cpu_ptr(&cio_irb);
-	int error = 0;
+	blk_status_t error = BLK_STS_OK;
 
 	EADM_LOG(6, "irq");
 	EADM_LOG_HEX(6, irb, sizeof(*irb));
@@ -144,10 +144,10 @@ static void eadm_subchannel_irq(struct subchannel *sch)
 
 	if ((scsw->stctl & (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND))
 	    && scsw->eswf == 1 && irb->esw.eadm.erw.r)
-		error = -EIO;
+		error = BLK_STS_IOERR;
 
 	if (scsw->fctl & SCSW_FCTL_CLEAR_FUNC)
-		error = -ETIMEDOUT;
+		error = BLK_STS_TIMEOUT;
 
 	eadm_subchannel_set_timeout(sch, 0);
 
diff --git a/drivers/s390/cio/qdio_debug.h b/drivers/s390/cio/qdio_debug.h
index f33ce8577619..1d595d17bf11 100644
--- a/drivers/s390/cio/qdio_debug.h
+++ b/drivers/s390/cio/qdio_debug.h
@@ -11,7 +11,7 @@
 #include "qdio.h"
 
 /* that gives us 15 characters in the text event views */
-#define QDIO_DBF_LEN	16
+#define QDIO_DBF_LEN	32
 
 extern debug_info_t *qdio_dbf_setup;
 extern debug_info_t *qdio_dbf_error;
diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c
index 15268edc54ae..1fa53ecdc2aa 100644
--- a/drivers/s390/cio/scm.c
+++ b/drivers/s390/cio/scm.c
@@ -71,7 +71,7 @@ void scm_driver_unregister(struct scm_driver *scmdrv)
 }
 EXPORT_SYMBOL_GPL(scm_driver_unregister);
 
-void scm_irq_handler(struct aob *aob, int error)
+void scm_irq_handler(struct aob *aob, blk_status_t error)
 {
 	struct aob_rq_header *aobrq = (void *) aob->request.data;
 	struct scm_device *scmdev = aobrq->scmdev;
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index e72abbc18ee3..a66a317f3e4f 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -70,14 +70,14 @@ static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
 {
 	return sprintf(buf, "I/O subchannel (Non-QDIO)\n");
 }
-MDEV_TYPE_ATTR_RO(name);
+static MDEV_TYPE_ATTR_RO(name);
 
 static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
 			       char *buf)
 {
 	return sprintf(buf, "%s\n", VFIO_DEVICE_API_CCW_STRING);
 }
-MDEV_TYPE_ATTR_RO(device_api);
+static MDEV_TYPE_ATTR_RO(device_api);
 
 static ssize_t available_instances_show(struct kobject *kobj,
 					struct device *dev, char *buf)
@@ -86,7 +86,7 @@ static ssize_t available_instances_show(struct kobject *kobj,
 
 	return sprintf(buf, "%d\n", atomic_read(&private->avail));
 }
-MDEV_TYPE_ATTR_RO(available_instances);
+static MDEV_TYPE_ATTR_RO(available_instances);
 
 static struct attribute *mdev_types_attrs[] = {
 	&mdev_type_attr_name.attr,
@@ -100,7 +100,7 @@ static struct attribute_group mdev_type_group = {
 	.attrs = mdev_types_attrs,
 };
 
-struct attribute_group *mdev_type_groups[] = {
+static struct attribute_group *mdev_type_groups[] = {
 	&mdev_type_group,
 	NULL,
 };
@@ -152,7 +152,7 @@ static int vfio_ccw_mdev_open(struct mdev_device *mdev)
 				      &events, &private->nb);
 }
 
-void vfio_ccw_mdev_release(struct mdev_device *mdev)
+static void vfio_ccw_mdev_release(struct mdev_device *mdev)
 {
 	struct vfio_ccw_private *private =
 		dev_get_drvdata(mdev_parent_dev(mdev));
@@ -233,7 +233,7 @@ static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info,
 	}
 }
 
-int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
+static int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
 {
 	if (info->index != VFIO_CCW_IO_IRQ_INDEX)
 		return -EINVAL;
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 9be4596d8a08..ea099910b4e9 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -668,10 +668,28 @@ static int ap_device_probe(struct device *dev)
 	struct ap_driver *ap_drv = to_ap_drv(dev->driver);
 	int rc;
 
+	/* Add queue/card to list of active queues/cards */
+	spin_lock_bh(&ap_list_lock);
+	if (is_card_dev(dev))
+		list_add(&to_ap_card(dev)->list, &ap_card_list);
+	else
+		list_add(&to_ap_queue(dev)->list,
+			 &to_ap_queue(dev)->card->queues);
+	spin_unlock_bh(&ap_list_lock);
+
 	ap_dev->drv = ap_drv;
 	rc = ap_drv->probe ? ap_drv->probe(ap_dev) : -ENODEV;
-	if (rc)
+
+	if (rc) {
+		spin_lock_bh(&ap_list_lock);
+		if (is_card_dev(dev))
+			list_del_init(&to_ap_card(dev)->list);
+		else
+			list_del_init(&to_ap_queue(dev)->list);
+		spin_unlock_bh(&ap_list_lock);
 		ap_dev->drv = NULL;
+	}
+
 	return rc;
 }
 
@@ -680,14 +698,17 @@ static int ap_device_remove(struct device *dev)
 	struct ap_device *ap_dev = to_ap_dev(dev);
 	struct ap_driver *ap_drv = ap_dev->drv;
 
+	if (ap_drv->remove)
+		ap_drv->remove(ap_dev);
+
+	/* Remove queue/card from list of active queues/cards */
 	spin_lock_bh(&ap_list_lock);
 	if (is_card_dev(dev))
 		list_del_init(&to_ap_card(dev)->list);
 	else
 		list_del_init(&to_ap_queue(dev)->list);
 	spin_unlock_bh(&ap_list_lock);
-	if (ap_drv->remove)
-		ap_drv->remove(ap_dev);
+
 	return 0;
 }
 
@@ -1056,10 +1077,6 @@ static void ap_scan_bus(struct work_struct *unused)
 				}
 				/* get it and thus adjust reference counter */
 				get_device(&ac->ap_dev.device);
-				/* Add card device to card list */
-				spin_lock_bh(&ap_list_lock);
-				list_add(&ac->list, &ap_card_list);
-				spin_unlock_bh(&ap_list_lock);
 			}
 			/* now create the new queue device */
 			aq = ap_queue_create(qid, type);
@@ -1070,10 +1087,6 @@ static void ap_scan_bus(struct work_struct *unused)
 			aq->ap_dev.device.parent = &ac->ap_dev.device;
 			dev_set_name(&aq->ap_dev.device,
 				     "%02x.%04x", id, dom);
-			/* Add queue device to card queue list */
-			spin_lock_bh(&ap_list_lock);
-			list_add(&aq->list, &ac->queues);
-			spin_unlock_bh(&ap_list_lock);
 			/* Start with a device reset */
 			spin_lock_bh(&aq->lock);
 			ap_wait(ap_sm_event(aq, AP_EVENT_POLL));
@@ -1081,9 +1094,6 @@ static void ap_scan_bus(struct work_struct *unused)
 			/* Register device */
 			rc = device_register(&aq->ap_dev.device);
 			if (rc) {
-				spin_lock_bh(&ap_list_lock);
-				list_del_init(&aq->list);
-				spin_unlock_bh(&ap_list_lock);
 				put_device(&aq->ap_dev.device);
 				continue;
 			}
diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c
index cfa161ccc74e..836efac96813 100644
--- a/drivers/s390/crypto/ap_card.c
+++ b/drivers/s390/crypto/ap_card.c
@@ -160,7 +160,14 @@ static struct device_type ap_card_type = {
 
 static void ap_card_device_release(struct device *dev)
 {
-	kfree(to_ap_card(dev));
+	struct ap_card *ac = to_ap_card(dev);
+
+	if (!list_empty(&ac->list)) {
+		spin_lock_bh(&ap_list_lock);
+		list_del_init(&ac->list);
+		spin_unlock_bh(&ap_list_lock);
+	}
+	kfree(ac);
 }
 
 struct ap_card *ap_card_create(int id, int queue_depth, int device_type,
diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index 480c58a63769..0f1a5d02acb0 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -584,7 +584,14 @@ static struct device_type ap_queue_type = {
 
 static void ap_queue_device_release(struct device *dev)
 {
-	kfree(to_ap_queue(dev));
+	struct ap_queue *aq = to_ap_queue(dev);
+
+	if (!list_empty(&aq->list)) {
+		spin_lock_bh(&ap_list_lock);
+		list_del_init(&aq->list);
+		spin_unlock_bh(&ap_list_lock);
+	}
+	kfree(aq);
 }
 
 struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index dba94b486f05..fa732bd86729 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -1954,7 +1954,6 @@ static void netiucv_free_netdevice(struct net_device *dev)
 		privptr->conn = NULL; privptr->fsm = NULL;
 		/* privptr gets freed by free_netdev() */
 	}
-	free_netdev(dev);
 }
 
 /**
@@ -1972,7 +1971,8 @@ static void netiucv_setup_netdevice(struct net_device *dev)
 	dev->mtu	         = NETIUCV_MTU_DEFAULT;
 	dev->min_mtu		 = 576;
 	dev->max_mtu		 = NETIUCV_MTU_MAX;
-	dev->destructor          = netiucv_free_netdevice;
+	dev->needs_free_netdev   = true;
+	dev->priv_destructor     = netiucv_free_netdevice;
 	dev->hard_header_len     = NETIUCV_HDRLEN;
 	dev->addr_len            = 0;
 	dev->type                = ARPHRD_SLIP;
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index f6aa21176d89..30bc6105aac3 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -701,6 +701,7 @@ enum qeth_discipline_id {
 };
 
 struct qeth_discipline {
+	const struct device_type *devtype;
 	void (*start_poll)(struct ccw_device *, int, unsigned long);
 	qdio_handler_t *input_handler;
 	qdio_handler_t *output_handler;
@@ -875,6 +876,9 @@ extern struct qeth_discipline qeth_l2_discipline;
 extern struct qeth_discipline qeth_l3_discipline;
 extern const struct attribute_group *qeth_generic_attr_groups[];
 extern const struct attribute_group *qeth_osn_attr_groups[];
+extern const struct attribute_group qeth_device_attr_group;
+extern const struct attribute_group qeth_device_blkt_group;
+extern const struct device_type qeth_generic_devtype;
 extern struct workqueue_struct *qeth_wq;
 
 int qeth_card_hw_is_reachable(struct qeth_card *);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 38114a8d56e0..fc6d85f2b38d 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5530,10 +5530,12 @@ void qeth_core_free_discipline(struct qeth_card *card)
 	card->discipline = NULL;
 }
 
-static const struct device_type qeth_generic_devtype = {
+const struct device_type qeth_generic_devtype = {
 	.name = "qeth_generic",
 	.groups = qeth_generic_attr_groups,
 };
+EXPORT_SYMBOL_GPL(qeth_generic_devtype);
+
 static const struct device_type qeth_osn_devtype = {
 	.name = "qeth_osn",
 	.groups = qeth_osn_attr_groups,
@@ -5659,23 +5661,22 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev)
 		goto err_card;
 	}
 
-	if (card->info.type == QETH_CARD_TYPE_OSN)
-		gdev->dev.type = &qeth_osn_devtype;
-	else
-		gdev->dev.type = &qeth_generic_devtype;
-
 	switch (card->info.type) {
 	case QETH_CARD_TYPE_OSN:
 	case QETH_CARD_TYPE_OSM:
 		rc = qeth_core_load_discipline(card, QETH_DISCIPLINE_LAYER2);
 		if (rc)
 			goto err_card;
+
+		gdev->dev.type = (card->info.type != QETH_CARD_TYPE_OSN)
+					? card->discipline->devtype
+					: &qeth_osn_devtype;
 		rc = card->discipline->setup(card->gdev);
 		if (rc)
 			goto err_disc;
-	case QETH_CARD_TYPE_OSD:
-	case QETH_CARD_TYPE_OSX:
+		break;
 	default:
+		gdev->dev.type = &qeth_generic_devtype;
 		break;
 	}
 
@@ -5731,8 +5732,10 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev)
 		if (rc)
 			goto err;
 		rc = card->discipline->setup(card->gdev);
-		if (rc)
+		if (rc) {
+			qeth_core_free_discipline(card);
 			goto err;
+		}
 	}
 	rc = card->discipline->set_online(gdev);
 err:
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index 75b29fd2fcf4..db6a285d41e0 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -413,12 +413,16 @@ static ssize_t qeth_dev_layer2_store(struct device *dev,
 
 	if (card->options.layer2 == newdis)
 		goto out;
-	else {
-		card->info.mac_bits  = 0;
-		if (card->discipline) {
-			card->discipline->remove(card->gdev);
-			qeth_core_free_discipline(card);
-		}
+	if (card->info.type == QETH_CARD_TYPE_OSM) {
+		/* fixed layer, can't switch */
+		rc = -EOPNOTSUPP;
+		goto out;
+	}
+
+	card->info.mac_bits = 0;
+	if (card->discipline) {
+		card->discipline->remove(card->gdev);
+		qeth_core_free_discipline(card);
 	}
 
 	rc = qeth_core_load_discipline(card, newdis);
@@ -426,6 +430,8 @@ static ssize_t qeth_dev_layer2_store(struct device *dev,
 		goto out;
 
 	rc = card->discipline->setup(card->gdev);
+	if (rc)
+		qeth_core_free_discipline(card);
 out:
 	mutex_unlock(&card->discipline_mutex);
 	return rc ? rc : count;
@@ -703,10 +709,11 @@ static struct attribute *qeth_blkt_device_attrs[] = {
 	&dev_attr_inter_jumbo.attr,
 	NULL,
 };
-static struct attribute_group qeth_device_blkt_group = {
+const struct attribute_group qeth_device_blkt_group = {
 	.name = "blkt",
 	.attrs = qeth_blkt_device_attrs,
 };
+EXPORT_SYMBOL_GPL(qeth_device_blkt_group);
 
 static struct attribute *qeth_device_attrs[] = {
 	&dev_attr_state.attr,
@@ -726,9 +733,10 @@ static struct attribute *qeth_device_attrs[] = {
 	&dev_attr_switch_attrs.attr,
 	NULL,
 };
-static struct attribute_group qeth_device_attr_group = {
+const struct attribute_group qeth_device_attr_group = {
 	.attrs = qeth_device_attrs,
 };
+EXPORT_SYMBOL_GPL(qeth_device_attr_group);
 
 const struct attribute_group *qeth_generic_attr_groups[] = {
 	&qeth_device_attr_group,
diff --git a/drivers/s390/net/qeth_l2.h b/drivers/s390/net/qeth_l2.h
index 29d9fb3890ad..0d59f9a45ea9 100644
--- a/drivers/s390/net/qeth_l2.h
+++ b/drivers/s390/net/qeth_l2.h
@@ -8,6 +8,8 @@
 
 #include "qeth_core.h"
 
+extern const struct attribute_group *qeth_l2_attr_groups[];
+
 int qeth_l2_create_device_attributes(struct device *);
 void qeth_l2_remove_device_attributes(struct device *);
 void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card);
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 1b07f382d74c..bd2df62a5cdf 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -880,11 +880,21 @@ static int qeth_l2_stop(struct net_device *dev)
 	return 0;
 }
 
+static const struct device_type qeth_l2_devtype = {
+	.name = "qeth_layer2",
+	.groups = qeth_l2_attr_groups,
+};
+
 static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
 {
 	struct qeth_card *card = dev_get_drvdata(&gdev->dev);
+	int rc;
 
-	qeth_l2_create_device_attributes(&gdev->dev);
+	if (gdev->dev.type == &qeth_generic_devtype) {
+		rc = qeth_l2_create_device_attributes(&gdev->dev);
+		if (rc)
+			return rc;
+	}
 	INIT_LIST_HEAD(&card->vid_list);
 	hash_init(card->mac_htable);
 	card->options.layer2 = 1;
@@ -896,7 +906,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
 {
 	struct qeth_card *card = dev_get_drvdata(&cgdev->dev);
 
-	qeth_l2_remove_device_attributes(&cgdev->dev);
+	if (cgdev->dev.type == &qeth_generic_devtype)
+		qeth_l2_remove_device_attributes(&cgdev->dev);
 	qeth_set_allowed_threads(card, 0, 1);
 	wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
 
@@ -954,7 +965,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 	case QETH_CARD_TYPE_OSN:
 		card->dev = alloc_netdev(0, "osn%d", NET_NAME_UNKNOWN,
 					 ether_setup);
-		card->dev->flags |= IFF_NOARP;
 		break;
 	default:
 		card->dev = alloc_etherdev(0);
@@ -969,9 +979,12 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 	card->dev->min_mtu = 64;
 	card->dev->max_mtu = ETH_MAX_MTU;
 	card->dev->netdev_ops = &qeth_l2_netdev_ops;
-	card->dev->ethtool_ops =
-		(card->info.type != QETH_CARD_TYPE_OSN) ?
-		&qeth_l2_ethtool_ops : &qeth_l2_osn_ops;
+	if (card->info.type == QETH_CARD_TYPE_OSN) {
+		card->dev->ethtool_ops = &qeth_l2_osn_ops;
+		card->dev->flags |= IFF_NOARP;
+	} else {
+		card->dev->ethtool_ops = &qeth_l2_ethtool_ops;
+	}
 	card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 	if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
 		card->dev->hw_features = NETIF_F_SG;
@@ -1269,6 +1282,7 @@ static int qeth_l2_control_event(struct qeth_card *card,
 }
 
 struct qeth_discipline qeth_l2_discipline = {
+	.devtype = &qeth_l2_devtype,
 	.start_poll = qeth_qdio_start_poll,
 	.input_handler = (qdio_handler_t *) qeth_qdio_input_handler,
 	.output_handler = (qdio_handler_t *) qeth_qdio_output_handler,
diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c
index 687972356d6b..9696baa49e2d 100644
--- a/drivers/s390/net/qeth_l2_sys.c
+++ b/drivers/s390/net/qeth_l2_sys.c
@@ -269,3 +269,11 @@ void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card)
 	} else
 		qeth_bridgeport_an_set(card, 0);
 }
+
+const struct attribute_group *qeth_l2_attr_groups[] = {
+	&qeth_device_attr_group,
+	&qeth_device_blkt_group,
+	/* l2 specific, see l2_{create,remove}_device_attributes(): */
+	&qeth_l2_bridgeport_attr_group,
+	NULL,
+};
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 6e0354ef4b86..d8df1e635163 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3039,8 +3039,13 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 static int qeth_l3_probe_device(struct ccwgroup_device *gdev)
 {
 	struct qeth_card *card = dev_get_drvdata(&gdev->dev);
+	int rc;
 
-	qeth_l3_create_device_attributes(&gdev->dev);
+	rc = qeth_l3_create_device_attributes(&gdev->dev);
+	if (rc)
+		return rc;
+	hash_init(card->ip_htable);
+	hash_init(card->ip_mc_htable);
 	card->options.layer2 = 0;
 	card->info.hwtrap = 0;
 	return 0;
@@ -3306,6 +3311,7 @@ static int qeth_l3_control_event(struct qeth_card *card,
 }
 
 struct qeth_discipline qeth_l3_discipline = {
+	.devtype = &qeth_generic_devtype,
 	.start_poll = qeth_qdio_start_poll,
 	.input_handler = (qdio_handler_t *) qeth_qdio_input_handler,
 	.output_handler = (qdio_handler_t *) qeth_qdio_output_handler,
diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c
index 2ce0b3eb2efe..a99d09a11f05 100644
--- a/drivers/s390/virtio/kvm_virtio.c
+++ b/drivers/s390/virtio/kvm_virtio.c
@@ -189,7 +189,7 @@ static bool kvm_notify(struct virtqueue *vq)
 static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
 				     unsigned index,
 				     void (*callback)(struct virtqueue *vq),
-				     const char *name)
+				     const char *name, bool ctx)
 {
 	struct kvm_device *kdev = to_kvmdev(vdev);
 	struct kvm_vqconfig *config;
@@ -211,7 +211,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
 		goto out;
 
 	vq = vring_new_virtqueue(index, config->num, KVM_S390_VIRTIO_RING_ALIGN,
-				 vdev, true, (void *) config->address,
+				 vdev, true, ctx, (void *) config->address,
 				 kvm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
@@ -256,6 +256,7 @@ static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			struct virtqueue *vqs[],
 			vq_callback_t *callbacks[],
 			const char * const names[],
+			const bool *ctx,
 			struct irq_affinity *desc)
 {
 	struct kvm_device *kdev = to_kvmdev(vdev);
@@ -266,7 +267,8 @@ static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		return -ENOENT;
 
 	for (i = 0; i < nvqs; ++i) {
-		vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i]);
+		vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i],
+				     ctx ? ctx[i] : false);
 		if (IS_ERR(vqs[i]))
 			goto error;
 	}
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 0ed209f3d8b0..b18fe2014cf2 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -87,7 +87,7 @@ struct vq_info_block {
 } __packed;
 
 struct virtio_feature_desc {
-	__u32 features;
+	__le32 features;
 	__u8 index;
 } __packed;
 
@@ -484,7 +484,7 @@ static void virtio_ccw_del_vqs(struct virtio_device *vdev)
 
 static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
 					     int i, vq_callback_t *callback,
-					     const char *name,
+					     const char *name, bool ctx,
 					     struct ccw1 *ccw)
 {
 	struct virtio_ccw_device *vcdev = to_vc_device(vdev);
@@ -522,7 +522,7 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
 	}
 
 	vq = vring_new_virtqueue(i, info->num, KVM_VIRTIO_CCW_RING_ALIGN, vdev,
-				 true, info->queue, virtio_ccw_kvm_notify,
+				 true, ctx, info->queue, virtio_ccw_kvm_notify,
 				 callback, name);
 	if (!vq) {
 		/* For now, we fail if we can't get the requested size. */
@@ -629,6 +629,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			       struct virtqueue *vqs[],
 			       vq_callback_t *callbacks[],
 			       const char * const names[],
+			       const bool *ctx,
 			       struct irq_affinity *desc)
 {
 	struct virtio_ccw_device *vcdev = to_vc_device(vdev);
@@ -642,7 +643,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 
 	for (i = 0; i < nvqs; ++i) {
 		vqs[i] = virtio_ccw_setup_vq(vdev, i, callbacks[i], names[i],
-					     ccw);
+					     ctx ? ctx[i] : false, ccw);
 		if (IS_ERR(vqs[i])) {
 			ret = PTR_ERR(vqs[i]);
 			vqs[i] = NULL;
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 62fed9dc893e..14f377ac1280 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -214,7 +214,7 @@ static void jsfd_request(void)
 		struct jsfd_part *jdp = req->rq_disk->private_data;
 		unsigned long offset = blk_rq_pos(req) << 9;
 		size_t len = blk_rq_cur_bytes(req);
-		int err = -EIO;
+		blk_status_t err = BLK_STS_IOERR;
 
 		if ((offset + len) > jdp->dsize)
 			goto end;
@@ -230,7 +230,7 @@ static void jsfd_request(void)
 		}
 
 		jsfd_read(bio_data(req->bio), jdp->dbase + offset, len);
-		err = 0;
+		err = BLK_STS_OK;
 	end:
 		if (!__blk_end_request_cur(req, err))
 			req = jsfd_next_request();
@@ -592,6 +592,7 @@ static int jsfd_init(void)
 			put_disk(disk);
 			goto out;
 		}
+		blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
 		jsfd_disk[i] = disk;
 	}
 
diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index f44d0487236e..ce5dc73d85bb 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -331,11 +331,11 @@ MODULE_LICENSE("GPL");
 #if !defined(PCMCIA)
 #if defined(MODULE)
 static int io[] = {0, 0};
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_PARM_DESC(io,"base io address of controller");
 
 static int irq[] = {0, 0};
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq,"interrupt for controller");
 
 static int scsiid[] = {7, 7};
diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c
index 7db448ec8beb..a23cc9ac5acd 100644
--- a/drivers/scsi/aha1542.c
+++ b/drivers/scsi/aha1542.c
@@ -31,7 +31,7 @@ module_param(isapnp, bool, 0);
 MODULE_PARM_DESC(isapnp, "enable PnP support (default=1)");
 
 static int io[MAXBOARDS] = { 0x330, 0x334, 0, 0 };
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_PARM_DESC(io, "base IO address of controller (0x130,0x134,0x230,0x234,0x330,0x334, default=0x330,0x334)");
 
 /* time AHA spends on the AT-bus during data transfer */
diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h
index 4fc8ed5fe067..1f424e40afdf 100644
--- a/drivers/scsi/bnx2fc/bnx2fc.h
+++ b/drivers/scsi/bnx2fc/bnx2fc.h
@@ -191,6 +191,7 @@ struct bnx2fc_hba {
 	struct bnx2fc_cmd_mgr *cmd_mgr;
 	spinlock_t hba_lock;
 	struct mutex hba_mutex;
+	struct mutex hba_stats_mutex;
 	unsigned long adapter_state;
 		#define ADAPTER_STATE_UP		0
 		#define ADAPTER_STATE_GOING_DOWN	1
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 93b5a0012417..902722dc4ce3 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -663,15 +663,17 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost)
 	if (!fw_stats)
 		return NULL;
 
+	mutex_lock(&hba->hba_stats_mutex);
+
 	bnx2fc_stats = fc_get_host_stats(shost);
 
 	init_completion(&hba->stat_req_done);
 	if (bnx2fc_send_stat_req(hba))
-		return bnx2fc_stats;
+		goto unlock_stats_mutex;
 	rc = wait_for_completion_timeout(&hba->stat_req_done, (2 * HZ));
 	if (!rc) {
 		BNX2FC_HBA_DBG(lport, "FW stat req timed out\n");
-		return bnx2fc_stats;
+		goto unlock_stats_mutex;
 	}
 	BNX2FC_STATS(hba, rx_stat2, fc_crc_cnt);
 	bnx2fc_stats->invalid_crc_count += hba->bfw_stats.fc_crc_cnt;
@@ -693,6 +695,9 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost)
 
 	memcpy(&hba->prev_stats, hba->stats_buffer,
 	       sizeof(struct fcoe_statistics_params));
+
+unlock_stats_mutex:
+	mutex_unlock(&hba->hba_stats_mutex);
 	return bnx2fc_stats;
 }
 
@@ -1340,6 +1345,7 @@ static struct bnx2fc_hba *bnx2fc_hba_create(struct cnic_dev *cnic)
 	}
 	spin_lock_init(&hba->hba_lock);
 	mutex_init(&hba->hba_mutex);
+	mutex_init(&hba->hba_stats_mutex);
 
 	hba->cnic = cnic;
 
diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c
index 622bdabc8894..dab195f04da7 100644
--- a/drivers/scsi/csiostor/csio_hw.c
+++ b/drivers/scsi/csiostor/csio_hw.c
@@ -1769,7 +1769,6 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param)
 		goto bye;
 	}
 
-	mempool_free(mbp, hw->mb_mempool);
 	if (finicsum != cfcsum) {
 		csio_warn(hw,
 		      "Config File checksum mismatch: csum=%#x, computed=%#x\n",
@@ -1780,6 +1779,10 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param)
 	rv = csio_hw_validate_caps(hw, mbp);
 	if (rv != 0)
 		goto bye;
+
+	mempool_free(mbp, hw->mb_mempool);
+	mbp = NULL;
+
 	/*
 	 * Note that we're operating with parameters
 	 * not supplied by the driver, rather than from hard-wired
diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index 1076c1578322..0aae094ab91c 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -1595,7 +1595,6 @@ static void release_offload_resources(struct cxgbi_sock *csk)
 		cxgbi_sock_put(csk);
 	}
 	csk->dst = NULL;
-	csk->cdev = NULL;
 }
 
 static int init_act_open(struct cxgbi_sock *csk)
diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c
index bd7d39ecbd24..e4c83b7c96a8 100644
--- a/drivers/scsi/cxgbi/libcxgbi.c
+++ b/drivers/scsi/cxgbi/libcxgbi.c
@@ -867,7 +867,8 @@ static void need_active_close(struct cxgbi_sock *csk)
 	log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
 		csk, (csk)->state, (csk)->flags, (csk)->tid);
 	spin_lock_bh(&csk->lock);
-	dst_confirm(csk->dst);
+	if (csk->dst)
+		dst_confirm(csk->dst);
 	data_lost = skb_queue_len(&csk->receive_queue);
 	__skb_queue_purge(&csk->receive_queue);
 
@@ -882,7 +883,8 @@ static void need_active_close(struct cxgbi_sock *csk)
 	}
 
 	if (close_req) {
-		if (data_lost)
+		if (!cxgbi_sock_flag(csk, CTPF_LOGOUT_RSP_RCVD) ||
+		    data_lost)
 			csk->cdev->csk_send_abort_req(csk);
 		else
 			csk->cdev->csk_send_close_req(csk);
@@ -1186,9 +1188,10 @@ static int cxgbi_sock_send_pdus(struct cxgbi_sock *csk, struct sk_buff *skb)
 				cxgbi_ulp_extra_len(cxgbi_skcb_ulp_mode(skb));
 		skb = next;
 	}
-done:
+
 	if (likely(skb_queue_len(&csk->write_queue)))
 		cdev->csk_push_tx_frames(csk, 1);
+done:
 	spin_unlock_bh(&csk->lock);
 	return copied;
 
@@ -1568,9 +1571,12 @@ static inline int read_pdu_skb(struct iscsi_conn *conn,
 	}
 }
 
-static int skb_read_pdu_bhs(struct iscsi_conn *conn, struct sk_buff *skb)
+static int
+skb_read_pdu_bhs(struct cxgbi_sock *csk, struct iscsi_conn *conn,
+		 struct sk_buff *skb)
 {
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+	int err;
 
 	log_debug(1 << CXGBI_DBG_PDU_RX,
 		"conn 0x%p, skb 0x%p, len %u, flag 0x%lx.\n",
@@ -1608,7 +1614,16 @@ static int skb_read_pdu_bhs(struct iscsi_conn *conn, struct sk_buff *skb)
 		}
 	}
 
-	return read_pdu_skb(conn, skb, 0, 0);
+	err = read_pdu_skb(conn, skb, 0, 0);
+	if (likely(err >= 0)) {
+		struct iscsi_hdr *hdr = (struct iscsi_hdr *)skb->data;
+		u8 opcode = hdr->opcode & ISCSI_OPCODE_MASK;
+
+		if (unlikely(opcode == ISCSI_OP_LOGOUT_RSP))
+			cxgbi_sock_set_flag(csk, CTPF_LOGOUT_RSP_RCVD);
+	}
+
+	return err;
 }
 
 static int skb_read_pdu_data(struct iscsi_conn *conn, struct sk_buff *lskb,
@@ -1713,7 +1728,7 @@ void cxgbi_conn_pdu_ready(struct cxgbi_sock *csk)
 			cxgbi_skcb_rx_pdulen(skb));
 
 		if (cxgbi_skcb_test_flag(skb, SKCBF_RX_COALESCED)) {
-			err = skb_read_pdu_bhs(conn, skb);
+			err = skb_read_pdu_bhs(csk, conn, skb);
 			if (err < 0) {
 				pr_err("coalesced bhs, csk 0x%p, skb 0x%p,%u, "
 					"f 0x%lx, plen %u.\n",
@@ -1731,7 +1746,7 @@ void cxgbi_conn_pdu_ready(struct cxgbi_sock *csk)
 					cxgbi_skcb_flags(skb),
 					cxgbi_skcb_rx_pdulen(skb));
 		} else {
-			err = skb_read_pdu_bhs(conn, skb);
+			err = skb_read_pdu_bhs(csk, conn, skb);
 			if (err < 0) {
 				pr_err("bhs, csk 0x%p, skb 0x%p,%u, "
 					"f 0x%lx, plen %u.\n",
@@ -1873,6 +1888,11 @@ int cxgbi_conn_alloc_pdu(struct iscsi_task *task, u8 opcode)
 	tcp_task->dd_data = tdata;
 	task->hdr = NULL;
 
+	if (tdata->skb) {
+		kfree_skb(tdata->skb);
+		tdata->skb = NULL;
+	}
+
 	if (SKB_MAX_HEAD(cdev->skb_tx_rsvd) > (512 * MAX_SKB_FRAGS) &&
 	    (opcode == ISCSI_OP_SCSI_DATA_OUT ||
 	     (opcode == ISCSI_OP_SCSI_CMD &&
@@ -1890,6 +1910,7 @@ int cxgbi_conn_alloc_pdu(struct iscsi_task *task, u8 opcode)
 		return -ENOMEM;
 	}
 
+	skb_get(tdata->skb);
 	skb_reserve(tdata->skb, cdev->skb_tx_rsvd);
 	task->hdr = (struct iscsi_hdr *)tdata->skb->data;
 	task->hdr_max = SKB_TX_ISCSI_PDU_HEADER_MAX; /* BHS + AHS */
@@ -2035,9 +2056,9 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
 	unsigned int datalen;
 	int err;
 
-	if (!skb) {
+	if (!skb || cxgbi_skcb_test_flag(skb, SKCBF_TX_DONE)) {
 		log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
-			"task 0x%p, skb NULL.\n", task);
+			"task 0x%p, skb 0x%p\n", task, skb);
 		return 0;
 	}
 
@@ -2050,7 +2071,6 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
 	}
 
 	datalen = skb->data_len;
-	tdata->skb = NULL;
 
 	/* write ppod first if using ofldq to write ppod */
 	if (ttinfo->flags & CXGBI_PPOD_INFO_FLAG_VALID) {
@@ -2078,6 +2098,7 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
 			pdulen += ISCSI_DIGEST_SIZE;
 
 		task->conn->txdata_octets += pdulen;
+		cxgbi_skcb_set_flag(skb, SKCBF_TX_DONE);
 		return 0;
 	}
 
@@ -2086,7 +2107,6 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
 			"task 0x%p, skb 0x%p, len %u/%u, %d EAGAIN.\n",
 			task, skb, skb->len, skb->data_len, err);
 		/* reset skb to send when we are called again */
-		tdata->skb = skb;
 		return err;
 	}
 
@@ -2094,7 +2114,8 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
 		"itt 0x%x, skb 0x%p, len %u/%u, xmit err %d.\n",
 		task->itt, skb, skb->len, skb->data_len, err);
 
-	kfree_skb(skb);
+	__kfree_skb(tdata->skb);
+	tdata->skb = NULL;
 
 	iscsi_conn_printk(KERN_ERR, task->conn, "xmit err %d.\n", err);
 	iscsi_conn_failure(task->conn, ISCSI_ERR_XMIT_FAILED);
@@ -2113,8 +2134,10 @@ void cxgbi_cleanup_task(struct iscsi_task *task)
 
 	tcp_task->dd_data = NULL;
 	/*  never reached the xmit task callout */
-	if (tdata->skb)
-		__kfree_skb(tdata->skb);
+	if (tdata->skb) {
+		kfree_skb(tdata->skb);
+		tdata->skb = NULL;
+	}
 
 	task_release_itt(task, task->hdr_itt);
 	memset(tdata, 0, sizeof(*tdata));
@@ -2714,6 +2737,9 @@ EXPORT_SYMBOL_GPL(cxgbi_attr_is_visible);
 static int __init libcxgbi_init_module(void)
 {
 	pr_info("%s", version);
+
+	BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, cb) <
+		     sizeof(struct cxgbi_skb_cb));
 	return 0;
 }
 
diff --git a/drivers/scsi/cxgbi/libcxgbi.h b/drivers/scsi/cxgbi/libcxgbi.h
index 18e0ea83d361..37f07aaab1e4 100644
--- a/drivers/scsi/cxgbi/libcxgbi.h
+++ b/drivers/scsi/cxgbi/libcxgbi.h
@@ -187,6 +187,7 @@ enum cxgbi_sock_flags {
 	CTPF_HAS_ATID,		/* reserved atid */
 	CTPF_HAS_TID,		/* reserved hw tid */
 	CTPF_OFFLOAD_DOWN,	/* offload function off */
+	CTPF_LOGOUT_RSP_RCVD,   /* received logout response */
 };
 
 struct cxgbi_skb_rx_cb {
@@ -195,7 +196,8 @@ struct cxgbi_skb_rx_cb {
 };
 
 struct cxgbi_skb_tx_cb {
-	void *l2t;
+	void *handle;
+	void *arp_err_handler;
 	struct sk_buff *wr_next;
 };
 
@@ -203,6 +205,7 @@ enum cxgbi_skcb_flags {
 	SKCBF_TX_NEED_HDR,	/* packet needs a header */
 	SKCBF_TX_MEM_WRITE,     /* memory write */
 	SKCBF_TX_FLAG_COMPL,    /* wr completion flag */
+	SKCBF_TX_DONE,		/* skb tx done */
 	SKCBF_RX_COALESCED,	/* received whole pdu */
 	SKCBF_RX_HDR,		/* received pdu header */
 	SKCBF_RX_DATA,		/* received pdu payload */
@@ -215,13 +218,13 @@ enum cxgbi_skcb_flags {
 };
 
 struct cxgbi_skb_cb {
-	unsigned char ulp_mode;
-	unsigned long flags;
-	unsigned int seq;
 	union {
 		struct cxgbi_skb_rx_cb rx;
 		struct cxgbi_skb_tx_cb tx;
 	};
+	unsigned char ulp_mode;
+	unsigned long flags;
+	unsigned int seq;
 };
 
 #define CXGBI_SKB_CB(skb)	((struct cxgbi_skb_cb *)&((skb)->cb[0]))
@@ -374,11 +377,9 @@ static inline void cxgbi_sock_enqueue_wr(struct cxgbi_sock *csk,
 	cxgbi_skcb_tx_wr_next(skb) = NULL;
 	/*
 	 * We want to take an extra reference since both us and the driver
-	 * need to free the packet before it's really freed. We know there's
-	 * just one user currently so we use atomic_set rather than skb_get
-	 * to avoid the atomic op.
+	 * need to free the packet before it's really freed.
 	 */
-	atomic_set(&skb->users, 2);
+	skb_get(skb);
 
 	if (!csk->wr_pending_head)
 		csk->wr_pending_head = skb;
diff --git a/drivers/scsi/cxlflash/Kconfig b/drivers/scsi/cxlflash/Kconfig
index c052104e523e..a011c5dbf214 100644
--- a/drivers/scsi/cxlflash/Kconfig
+++ b/drivers/scsi/cxlflash/Kconfig
@@ -5,6 +5,7 @@
 config CXLFLASH
 	tristate "Support for IBM CAPI Flash"
 	depends on PCI && SCSI && CXL && EEH
+	select IRQ_POLL
 	default m
 	help
 	  Allows CAPI Accelerated IO to Flash
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 3cbab8710e58..2ceff585f189 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -265,18 +265,16 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr,
 				      struct list_head *list,
 				      unsigned char *cdb)
 {
-	struct scsi_device *sdev = ctlr->ms_sdev;
-	struct rdac_dh_data *h = sdev->handler_data;
 	struct rdac_mode_common *common;
 	unsigned data_size;
 	struct rdac_queue_data *qdata;
 	u8 *lun_table;
 
-	if (h->ctlr->use_ms10) {
+	if (ctlr->use_ms10) {
 		struct rdac_pg_expanded *rdac_pg;
 
 		data_size = sizeof(struct rdac_pg_expanded);
-		rdac_pg = &h->ctlr->mode_select.expanded;
+		rdac_pg = &ctlr->mode_select.expanded;
 		memset(rdac_pg, 0, data_size);
 		common = &rdac_pg->common;
 		rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER + 0x40;
@@ -288,7 +286,7 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr,
 		struct rdac_pg_legacy *rdac_pg;
 
 		data_size = sizeof(struct rdac_pg_legacy);
-		rdac_pg = &h->ctlr->mode_select.legacy;
+		rdac_pg = &ctlr->mode_select.legacy;
 		memset(rdac_pg, 0, data_size);
 		common = &rdac_pg->common;
 		rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER;
@@ -304,7 +302,7 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr,
 	}
 
 	/* Prepare the command. */
-	if (h->ctlr->use_ms10) {
+	if (ctlr->use_ms10) {
 		cdb[0] = MODE_SELECT_10;
 		cdb[7] = data_size >> 8;
 		cdb[8] = data_size & 0xff;
diff --git a/drivers/scsi/dpt/dpti_i2o.h b/drivers/scsi/dpt/dpti_i2o.h
index bd9e31e16249..16fc380b5512 100644
--- a/drivers/scsi/dpt/dpti_i2o.h
+++ b/drivers/scsi/dpt/dpti_i2o.h
@@ -48,7 +48,7 @@
 #include <linux/wait.h>
 typedef wait_queue_head_t adpt_wait_queue_head_t;
 #define ADPT_DECLARE_WAIT_QUEUE_HEAD(wait) DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait)
-typedef wait_queue_t adpt_wait_queue_t;
+typedef wait_queue_entry_t adpt_wait_queue_entry_t;
 
 /*
  * message structures
diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 67c8dac321ad..c34fc91ba486 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -85,8 +85,8 @@ static int ncr_53c400;
 static int ncr_53c400a;
 static int dtc_3181e;
 static int hp_c2502;
-module_param(ncr_irq, int, 0);
-module_param(ncr_addr, int, 0);
+module_param_hw(ncr_irq, int, irq, 0);
+module_param_hw(ncr_addr, int, ioport, 0);
 module_param(ncr_5380, int, 0);
 module_param(ncr_53c400, int, 0);
 module_param(ncr_53c400a, int, 0);
@@ -94,11 +94,11 @@ module_param(dtc_3181e, int, 0);
 module_param(hp_c2502, int, 0);
 
 static int irq[] = { -1, -1, -1, -1, -1, -1, -1, -1 };
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "IRQ number(s) (0=none, 254=auto [default])");
 
 static int base[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
-module_param_array(base, int, NULL, 0);
+module_param_hw_array(base, int, ioport, NULL, 0);
 MODULE_PARM_DESC(base, "base address(es)");
 
 static int card[] = { -1, -1, -1, -1, -1, -1, -1, -1 };
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index d020a13646ae..facc7271f932 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -353,7 +353,7 @@ static int probe_eisa_isa = 0;
 static int force_dma32 = 0;
 
 /* parameters for modprobe/insmod */
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param(disable, int, 0);
 module_param(reserve_mode, int, 0);
 module_param_array(reserve_list, int, NULL, 0);
diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index 0f807798c624..abf6026645dd 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -1170,6 +1170,9 @@ static struct ibmvscsis_cmd *ibmvscsis_get_free_cmd(struct scsi_info *vscsi)
 		cmd = list_first_entry_or_null(&vscsi->free_cmd,
 					       struct ibmvscsis_cmd, list);
 		if (cmd) {
+			if (cmd->abort_cmd)
+				cmd->abort_cmd = NULL;
+			cmd->flags &= ~(DELAY_SEND);
 			list_del(&cmd->list);
 			cmd->iue = iue;
 			cmd->type = UNSET_TYPE;
@@ -1749,45 +1752,99 @@ static void srp_snd_msg_failed(struct scsi_info *vscsi, long rc)
 static void ibmvscsis_send_messages(struct scsi_info *vscsi)
 {
 	u64 msg_hi = 0;
-	/* note do not attmempt to access the IU_data_ptr with this pointer
+	/* note do not attempt to access the IU_data_ptr with this pointer
 	 * it is not valid
 	 */
 	struct viosrp_crq *crq = (struct viosrp_crq *)&msg_hi;
 	struct ibmvscsis_cmd *cmd, *nxt;
 	struct iu_entry *iue;
 	long rc = ADAPT_SUCCESS;
+	bool retry = false;
 
 	if (!(vscsi->flags & RESPONSE_Q_DOWN)) {
-		list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp, list) {
-			iue = cmd->iue;
+		do {
+			retry = false;
+			list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp,
+						 list) {
+				/*
+				 * Check to make sure abort cmd gets processed
+				 * prior to the abort tmr cmd
+				 */
+				if (cmd->flags & DELAY_SEND)
+					continue;
 
-			crq->valid = VALID_CMD_RESP_EL;
-			crq->format = cmd->rsp.format;
+				if (cmd->abort_cmd) {
+					retry = true;
+					cmd->abort_cmd->flags &= ~(DELAY_SEND);
+					cmd->abort_cmd = NULL;
+				}
 
-			if (cmd->flags & CMD_FAST_FAIL)
-				crq->status = VIOSRP_ADAPTER_FAIL;
+				/*
+				 * If CMD_T_ABORTED w/o CMD_T_TAS scenarios and
+				 * the case where LIO issued a
+				 * ABORT_TASK: Sending TMR_TASK_DOES_NOT_EXIST
+				 * case then we dont send a response, since it
+				 * was already done.
+				 */
+				if (cmd->se_cmd.transport_state & CMD_T_ABORTED &&
+				    !(cmd->se_cmd.transport_state & CMD_T_TAS)) {
+					list_del(&cmd->list);
+					ibmvscsis_free_cmd_resources(vscsi,
+								     cmd);
+					/*
+					 * With a successfully aborted op
+					 * through LIO we want to increment the
+					 * the vscsi credit so that when we dont
+					 * send a rsp to the original scsi abort
+					 * op (h_send_crq), but the tm rsp to
+					 * the abort is sent, the credit is
+					 * correctly sent with the abort tm rsp.
+					 * We would need 1 for the abort tm rsp
+					 * and 1 credit for the aborted scsi op.
+					 * Thus we need to increment here.
+					 * Also we want to increment the credit
+					 * here because we want to make sure
+					 * cmd is actually released first
+					 * otherwise the client will think it
+					 * it can send a new cmd, and we could
+					 * find ourselves short of cmd elements.
+					 */
+					vscsi->credit += 1;
+				} else {
+					iue = cmd->iue;
 
-			crq->IU_length = cpu_to_be16(cmd->rsp.len);
+					crq->valid = VALID_CMD_RESP_EL;
+					crq->format = cmd->rsp.format;
 
-			rc = h_send_crq(vscsi->dma_dev->unit_address,
-					be64_to_cpu(msg_hi),
-					be64_to_cpu(cmd->rsp.tag));
+					if (cmd->flags & CMD_FAST_FAIL)
+						crq->status = VIOSRP_ADAPTER_FAIL;
 
-			pr_debug("send_messages: cmd %p, tag 0x%llx, rc %ld\n",
-				 cmd, be64_to_cpu(cmd->rsp.tag), rc);
+					crq->IU_length = cpu_to_be16(cmd->rsp.len);
 
-			/* if all ok free up the command element resources */
-			if (rc == H_SUCCESS) {
-				/* some movement has occurred */
-				vscsi->rsp_q_timer.timer_pops = 0;
-				list_del(&cmd->list);
+					rc = h_send_crq(vscsi->dma_dev->unit_address,
+							be64_to_cpu(msg_hi),
+							be64_to_cpu(cmd->rsp.tag));
 
-				ibmvscsis_free_cmd_resources(vscsi, cmd);
-			} else {
-				srp_snd_msg_failed(vscsi, rc);
-				break;
+					pr_debug("send_messages: cmd %p, tag 0x%llx, rc %ld\n",
+						 cmd, be64_to_cpu(cmd->rsp.tag), rc);
+
+					/* if all ok free up the command
+					 * element resources
+					 */
+					if (rc == H_SUCCESS) {
+						/* some movement has occurred */
+						vscsi->rsp_q_timer.timer_pops = 0;
+						list_del(&cmd->list);
+
+						ibmvscsis_free_cmd_resources(vscsi,
+									     cmd);
+					} else {
+						srp_snd_msg_failed(vscsi, rc);
+						break;
+					}
+				}
 			}
-		}
+		} while (retry);
 
 		if (!rc) {
 			/*
@@ -2708,6 +2765,7 @@ static int ibmvscsis_alloc_cmds(struct scsi_info *vscsi, int num)
 
 	for (i = 0, cmd = (struct ibmvscsis_cmd *)vscsi->cmd_pool; i < num;
 	     i++, cmd++) {
+		cmd->abort_cmd = NULL;
 		cmd->adapter = vscsi;
 		INIT_WORK(&cmd->work, ibmvscsis_scheduler);
 		list_add_tail(&cmd->list, &vscsi->free_cmd);
@@ -2926,10 +2984,7 @@ static long srp_build_response(struct scsi_info *vscsi,
 
 	rsp->opcode = SRP_RSP;
 
-	if (vscsi->credit > 0 && vscsi->state == SRP_PROCESSING)
-		rsp->req_lim_delta = cpu_to_be32(vscsi->credit);
-	else
-		rsp->req_lim_delta = cpu_to_be32(1 + vscsi->credit);
+	rsp->req_lim_delta = cpu_to_be32(1 + vscsi->credit);
 	rsp->tag = cmd->rsp.tag;
 	rsp->flags = 0;
 
@@ -3579,9 +3634,20 @@ static int ibmvscsis_write_pending(struct se_cmd *se_cmd)
 {
 	struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd,
 						 se_cmd);
+	struct scsi_info *vscsi = cmd->adapter;
 	struct iu_entry *iue = cmd->iue;
 	int rc;
 
+	/*
+	 * If CLIENT_FAILED OR RESPONSE_Q_DOWN, then just return success
+	 * since LIO can't do anything about it, and we dont want to
+	 * attempt an srp_transfer_data.
+	 */
+	if ((vscsi->flags & (CLIENT_FAILED | RESPONSE_Q_DOWN))) {
+		pr_err("write_pending failed since: %d\n", vscsi->flags);
+		return 0;
+	}
+
 	rc = srp_transfer_data(cmd, &vio_iu(iue)->srp.cmd, ibmvscsis_rdma,
 			       1, 1);
 	if (rc) {
@@ -3660,11 +3726,28 @@ static void ibmvscsis_queue_tm_rsp(struct se_cmd *se_cmd)
 	struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd,
 						 se_cmd);
 	struct scsi_info *vscsi = cmd->adapter;
+	struct ibmvscsis_cmd *cmd_itr;
+	struct iu_entry *iue = iue = cmd->iue;
+	struct srp_tsk_mgmt *srp_tsk = &vio_iu(iue)->srp.tsk_mgmt;
+	u64 tag_to_abort = be64_to_cpu(srp_tsk->task_tag);
 	uint len;
 
 	pr_debug("queue_tm_rsp %p, status %d\n",
 		 se_cmd, (int)se_cmd->se_tmr_req->response);
 
+	if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK &&
+	    cmd->se_cmd.se_tmr_req->response == TMR_TASK_DOES_NOT_EXIST) {
+		spin_lock_bh(&vscsi->intr_lock);
+		list_for_each_entry(cmd_itr, &vscsi->active_q, list) {
+			if (tag_to_abort == cmd_itr->se_cmd.tag) {
+				cmd_itr->abort_cmd = cmd;
+				cmd->flags |= DELAY_SEND;
+				break;
+			}
+		}
+		spin_unlock_bh(&vscsi->intr_lock);
+	}
+
 	srp_build_response(vscsi, cmd, &len);
 	cmd->rsp.format = SRP_FORMAT;
 	cmd->rsp.len = len;
@@ -3672,8 +3755,8 @@ static void ibmvscsis_queue_tm_rsp(struct se_cmd *se_cmd)
 
 static void ibmvscsis_aborted_task(struct se_cmd *se_cmd)
 {
-	/* TBD: What (if anything) should we do here? */
-	pr_debug("ibmvscsis_aborted_task %p\n", se_cmd);
+	pr_debug("ibmvscsis_aborted_task %p task_tag: %llu\n",
+		 se_cmd, se_cmd->tag);
 }
 
 static struct se_wwn *ibmvscsis_make_tport(struct target_fabric_configfs *tf,
diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h
index 65c6189885ab..b4391a8de456 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h
@@ -168,10 +168,12 @@ struct ibmvscsis_cmd {
 	struct iu_rsp rsp;
 	struct work_struct work;
 	struct scsi_info *adapter;
+	struct ibmvscsis_cmd *abort_cmd;
 	/* Sense buffer that will be mapped into outgoing status */
 	unsigned char sense_buf[TRANSPORT_SENSE_BUFFER];
 	u64 init_time;
 #define CMD_FAST_FAIL	BIT(0)
+#define DELAY_SEND	BIT(1)
 	u32 flags;
 	char type;
 };
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index 3419e1bcdff6..67621308eb9c 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -301,13 +301,13 @@ static uint32_t ips_statupd_copperhead_memio(ips_ha_t *);
 static uint32_t ips_statupd_morpheus(ips_ha_t *);
 static ips_scb_t *ips_getscb(ips_ha_t *);
 static void ips_putq_scb_head(ips_scb_queue_t *, ips_scb_t *);
-static void ips_putq_wait_tail(ips_wait_queue_t *, struct scsi_cmnd *);
+static void ips_putq_wait_tail(ips_wait_queue_entry_t *, struct scsi_cmnd *);
 static void ips_putq_copp_tail(ips_copp_queue_t *,
 				      ips_copp_wait_item_t *);
 static ips_scb_t *ips_removeq_scb_head(ips_scb_queue_t *);
 static ips_scb_t *ips_removeq_scb(ips_scb_queue_t *, ips_scb_t *);
-static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_t *);
-static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_t *,
+static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_entry_t *);
+static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_entry_t *,
 					  struct scsi_cmnd *);
 static ips_copp_wait_item_t *ips_removeq_copp(ips_copp_queue_t *,
 						     ips_copp_wait_item_t *);
@@ -2871,7 +2871,7 @@ ips_removeq_scb(ips_scb_queue_t * queue, ips_scb_t * item)
 /* ASSUMED to be called from within the HA lock                             */
 /*                                                                          */
 /****************************************************************************/
-static void ips_putq_wait_tail(ips_wait_queue_t *queue, struct scsi_cmnd *item)
+static void ips_putq_wait_tail(ips_wait_queue_entry_t *queue, struct scsi_cmnd *item)
 {
 	METHOD_TRACE("ips_putq_wait_tail", 1);
 
@@ -2902,7 +2902,7 @@ static void ips_putq_wait_tail(ips_wait_queue_t *queue, struct scsi_cmnd *item)
 /* ASSUMED to be called from within the HA lock                             */
 /*                                                                          */
 /****************************************************************************/
-static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_t *queue)
+static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_entry_t *queue)
 {
 	struct scsi_cmnd *item;
 
@@ -2936,7 +2936,7 @@ static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_t *queue)
 /* ASSUMED to be called from within the HA lock                             */
 /*                                                                          */
 /****************************************************************************/
-static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_t *queue,
+static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_entry_t *queue,
 					  struct scsi_cmnd *item)
 {
 	struct scsi_cmnd *p;
diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h
index b782bb60baf0..366be3b2f9b4 100644
--- a/drivers/scsi/ips.h
+++ b/drivers/scsi/ips.h
@@ -989,7 +989,7 @@ typedef struct ips_wait_queue {
 	struct scsi_cmnd *head;
 	struct scsi_cmnd *tail;
 	int count;
-} ips_wait_queue_t;
+} ips_wait_queue_entry_t;
 
 typedef struct ips_copp_wait_item {
 	struct scsi_cmnd *scsi_cmd;
@@ -1035,7 +1035,7 @@ typedef struct ips_ha {
    ips_stat_t         sp;                 /* Status packer pointer      */
    struct ips_scb    *scbs;               /* Array of all CCBS          */
    struct ips_scb    *scb_freelist;       /* SCB free list              */
-   ips_wait_queue_t   scb_waitlist;       /* Pending SCB list           */
+   ips_wait_queue_entry_t   scb_waitlist;       /* Pending SCB list           */
    ips_copp_queue_t   copp_waitlist;      /* Pending PT list            */
    ips_scb_queue_t    scb_activelist;     /* Active SCB list            */
    IPS_IO_CMD        *dummy;              /* dummy command              */
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index a808e8ef1d08..234352da5c3c 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -407,11 +407,12 @@ unlock:
  * can_queue. Eventually we will hit the point where we run
  * on all reserved structs.
  */
-static void fc_fcp_can_queue_ramp_down(struct fc_lport *lport)
+static bool fc_fcp_can_queue_ramp_down(struct fc_lport *lport)
 {
 	struct fc_fcp_internal *si = fc_get_scsi_internal(lport);
 	unsigned long flags;
 	int can_queue;
+	bool changed = false;
 
 	spin_lock_irqsave(lport->host->host_lock, flags);
 
@@ -427,9 +428,11 @@ static void fc_fcp_can_queue_ramp_down(struct fc_lport *lport)
 	if (!can_queue)
 		can_queue = 1;
 	lport->host->can_queue = can_queue;
+	changed = true;
 
 unlock:
 	spin_unlock_irqrestore(lport->host->host_lock, flags);
+	return changed;
 }
 
 /*
@@ -1896,11 +1899,11 @@ int fc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *sc_cmd)
 
 	if (!fc_fcp_lport_queue_ready(lport)) {
 		if (lport->qfull) {
-			fc_fcp_can_queue_ramp_down(lport);
-			shost_printk(KERN_ERR, lport->host,
-				     "libfc: queue full, "
-				     "reducing can_queue to %d.\n",
-				     lport->host->can_queue);
+			if (fc_fcp_can_queue_ramp_down(lport))
+				shost_printk(KERN_ERR, lport->host,
+					     "libfc: queue full, "
+					     "reducing can_queue to %d.\n",
+					     lport->host->can_queue);
 		}
 		rc = SCSI_MLQUEUE_HOST_BUSY;
 		goto out;
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index b44c3136eb51..520325867e2b 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -1422,7 +1422,7 @@ static void fc_rport_recv_rtv_req(struct fc_rport_priv *rdata,
 	fp = fc_frame_alloc(lport, sizeof(*rtv));
 	if (!fp) {
 		rjt_data.reason = ELS_RJT_UNAB;
-		rjt_data.reason = ELS_EXPL_INSUF_RES;
+		rjt_data.explan = ELS_EXPL_INSUF_RES;
 		fc_seq_els_rsp_send(in_fp, ELS_LS_RJT, &rjt_data);
 		goto drop;
 	}
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 6d7840b096e6..f2c0ba6ced78 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -141,6 +141,13 @@ struct lpfc_dmabuf {
 	uint32_t   buffer_tag;	/* used for tagged queue ring */
 };
 
+struct lpfc_nvmet_ctxbuf {
+	struct list_head list;
+	struct lpfc_nvmet_rcv_ctx *context;
+	struct lpfc_iocbq *iocbq;
+	struct lpfc_sglq *sglq;
+};
+
 struct lpfc_dma_pool {
 	struct lpfc_dmabuf   *elements;
 	uint32_t    max_count;
@@ -163,9 +170,7 @@ struct rqb_dmabuf {
 	struct lpfc_dmabuf dbuf;
 	uint16_t total_size;
 	uint16_t bytes_recv;
-	void *context;
-	struct lpfc_iocbq *iocbq;
-	struct lpfc_sglq *sglq;
+	uint16_t idx;
 	struct lpfc_queue *hrq;	  /* ptr to associated Header RQ */
 	struct lpfc_queue *drq;	  /* ptr to associated Data RQ */
 };
@@ -670,6 +675,8 @@ struct lpfc_hba {
 					/* INIT_LINK mailbox command */
 #define LS_NPIV_FAB_SUPPORTED 0x2	/* Fabric supports NPIV */
 #define LS_IGNORE_ERATT       0x4	/* intr handler should ignore ERATT */
+#define LS_MDS_LINK_DOWN      0x8	/* MDS Diagnostics Link Down */
+#define LS_MDS_LOOPBACK      0x16	/* MDS Diagnostics Link Up (Loopback) */
 
 	uint32_t hba_flag;	/* hba generic flags */
 #define HBA_ERATT_HANDLED	0x1 /* This flag is set when eratt handled */
@@ -777,7 +784,6 @@ struct lpfc_hba {
 	uint32_t cfg_nvme_oas;
 	uint32_t cfg_nvme_io_channel;
 	uint32_t cfg_nvmet_mrq;
-	uint32_t cfg_nvmet_mrq_post;
 	uint32_t cfg_enable_nvmet;
 	uint32_t cfg_nvme_enable_fb;
 	uint32_t cfg_nvmet_fb_size;
@@ -943,6 +949,7 @@ struct lpfc_hba {
 	struct pci_pool *lpfc_mbuf_pool;
 	struct pci_pool *lpfc_hrb_pool;	/* header receive buffer pool */
 	struct pci_pool *lpfc_drb_pool; /* data receive buffer pool */
+	struct pci_pool *lpfc_nvmet_drb_pool; /* data receive buffer pool */
 	struct pci_pool *lpfc_hbq_pool;	/* SLI3 hbq buffer pool */
 	struct pci_pool *txrdy_payload_pool;
 	struct lpfc_dma_pool lpfc_mbuf_safety_pool;
@@ -1228,7 +1235,11 @@ lpfc_sli_read_hs(struct lpfc_hba *phba)
 static inline struct lpfc_sli_ring *
 lpfc_phba_elsring(struct lpfc_hba *phba)
 {
-	if (phba->sli_rev == LPFC_SLI_REV4)
-		return phba->sli4_hba.els_wq->pring;
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		if (phba->sli4_hba.els_wq)
+			return phba->sli4_hba.els_wq->pring;
+		else
+			return NULL;
+	}
 	return &phba->sli.sli3_ring[LPFC_ELS_RING];
 }
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 4830370bfab1..bb2d9e238225 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -60,9 +60,9 @@
 #define LPFC_MIN_DEVLOSS_TMO	1
 #define LPFC_MAX_DEVLOSS_TMO	255
 
-#define LPFC_DEF_MRQ_POST	256
-#define LPFC_MIN_MRQ_POST	32
-#define LPFC_MAX_MRQ_POST	512
+#define LPFC_DEF_MRQ_POST	512
+#define LPFC_MIN_MRQ_POST	512
+#define LPFC_MAX_MRQ_POST	2048
 
 /*
  * Write key size should be multiple of 4. If write key is changed
@@ -205,8 +205,9 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 				atomic_read(&tgtp->xmt_ls_rsp_error));
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
-				"FCP: Rcv %08x Drop %08x\n",
+				"FCP: Rcv %08x Release %08x Drop %08x\n",
 				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->xmt_fcp_release),
 				atomic_read(&tgtp->rcv_fcp_cmd_drop));
 
 		if (atomic_read(&tgtp->rcv_fcp_cmd_in) !=
@@ -218,15 +219,12 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 		}
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
-				"FCP Rsp: RD %08x rsp %08x WR %08x rsp %08x\n",
+				"FCP Rsp: RD %08x rsp %08x WR %08x rsp %08x "
+				"drop %08x\n",
 				atomic_read(&tgtp->xmt_fcp_read),
 				atomic_read(&tgtp->xmt_fcp_read_rsp),
 				atomic_read(&tgtp->xmt_fcp_write),
-				atomic_read(&tgtp->xmt_fcp_rsp));
-
-		len += snprintf(buf+len, PAGE_SIZE-len,
-				"FCP Rsp: abort %08x drop %08x\n",
-				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_rsp),
 				atomic_read(&tgtp->xmt_fcp_drop));
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
@@ -236,10 +234,22 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 				atomic_read(&tgtp->xmt_fcp_rsp_drop));
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
-				"ABORT: Xmt %08x Err %08x Cmpl %08x",
+				"ABORT: Xmt %08x Cmpl %08x\n",
+				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_abort_cmpl));
+
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"ABORT: Sol %08x  Usol %08x Err %08x Cmpl %08x",
+				atomic_read(&tgtp->xmt_abort_sol),
+				atomic_read(&tgtp->xmt_abort_unsol),
 				atomic_read(&tgtp->xmt_abort_rsp),
-				atomic_read(&tgtp->xmt_abort_rsp_error),
-				atomic_read(&tgtp->xmt_abort_cmpl));
+				atomic_read(&tgtp->xmt_abort_rsp_error));
+
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"IO_CTX: %08x outstanding %08x total %x",
+				phba->sli4_hba.nvmet_ctx_cnt,
+				phba->sli4_hba.nvmet_io_wait_cnt,
+				phba->sli4_hba.nvmet_io_wait_total);
 
 		len +=  snprintf(buf+len, PAGE_SIZE-len, "\n");
 		return len;
@@ -3312,14 +3322,6 @@ LPFC_ATTR_R(nvmet_mrq,
 	    "Specify number of RQ pairs for processing NVMET cmds");
 
 /*
- * lpfc_nvmet_mrq_post: Specify number buffers to post on every MRQ
- *
- */
-LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST,
-	    LPFC_MIN_MRQ_POST, LPFC_MAX_MRQ_POST,
-	    "Specify number of buffers to post on every MRQ");
-
-/*
  * lpfc_enable_fc4_type: Defines what FC4 types are supported.
  * Supported Values:  1 - register just FCP
  *                    3 - register both FCP and NVME
@@ -5154,7 +5156,6 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_suppress_rsp,
 	&dev_attr_lpfc_nvme_io_channel,
 	&dev_attr_lpfc_nvmet_mrq,
-	&dev_attr_lpfc_nvmet_mrq_post,
 	&dev_attr_lpfc_nvme_enable_fb,
 	&dev_attr_lpfc_nvmet_fb_size,
 	&dev_attr_lpfc_enable_bg,
@@ -6194,7 +6195,6 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 
 	lpfc_enable_fc4_type_init(phba, lpfc_enable_fc4_type);
 	lpfc_nvmet_mrq_init(phba, lpfc_nvmet_mrq);
-	lpfc_nvmet_mrq_post_init(phba, lpfc_nvmet_mrq_post);
 
 	/* Initialize first burst. Target vs Initiator are different. */
 	lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb);
@@ -6291,7 +6291,6 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
 		/* Not NVME Target mode.  Turn off Target parameters. */
 		phba->nvmet_support = 0;
 		phba->cfg_nvmet_mrq = 0;
-		phba->cfg_nvmet_mrq_post = 0;
 		phba->cfg_nvmet_fb_size = 0;
 	}
 
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 944b32ca4931..da669dce12fe 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -75,6 +75,10 @@ void lpfc_init_vpi_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_cancel_all_vport_retry_delay_timer(struct lpfc_hba *);
 void lpfc_retry_pport_discovery(struct lpfc_hba *);
 void lpfc_release_rpi(struct lpfc_hba *, struct lpfc_vport *, uint16_t);
+int lpfc_init_iocb_list(struct lpfc_hba *phba, int cnt);
+void lpfc_free_iocb_list(struct lpfc_hba *phba);
+int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
+			struct lpfc_queue *drq, int count, int idx);
 
 void lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
@@ -123,7 +127,7 @@ int lpfc_disc_state_machine(struct lpfc_vport *, struct lpfc_nodelist *, void *,
 void lpfc_do_scr_ns_plogi(struct lpfc_hba *, struct lpfc_vport *);
 int lpfc_check_sparm(struct lpfc_vport *, struct lpfc_nodelist *,
 		     struct serv_parm *, uint32_t, int);
-int lpfc_els_abort(struct lpfc_hba *, struct lpfc_nodelist *);
+void lpfc_els_abort(struct lpfc_hba *, struct lpfc_nodelist *);
 void lpfc_more_plogi(struct lpfc_vport *);
 void lpfc_more_adisc(struct lpfc_vport *);
 void lpfc_end_rscn(struct lpfc_vport *);
@@ -246,16 +250,14 @@ struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *);
 void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *);
 struct rqb_dmabuf *lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba);
 void lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab);
-void lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
-			struct lpfc_dmabuf *mp);
+void lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba,
+			    struct lpfc_nvmet_ctxbuf *ctxp);
 int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 			       struct fc_frame_header *fc_hdr);
 void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
 			uint16_t);
 int lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 		     struct lpfc_rqe *hrqe, struct lpfc_rqe *drqe);
-int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hq,
-			struct lpfc_queue *dq, int count);
 int lpfc_free_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hq);
 void lpfc_unregister_fcf(struct lpfc_hba *);
 void lpfc_unregister_fcf_rescan(struct lpfc_hba *);
@@ -271,6 +273,7 @@ int lpfc_sli4_fcf_rr_next_proc(struct lpfc_vport *, uint16_t);
 void lpfc_sli4_clear_fcf_rr_bmask(struct lpfc_hba *);
 
 int lpfc_mem_alloc(struct lpfc_hba *, int align);
+int lpfc_nvmet_mem_alloc(struct lpfc_hba *phba);
 int lpfc_mem_alloc_active_rrq_pool_s4(struct lpfc_hba *);
 void lpfc_mem_free(struct lpfc_hba *);
 void lpfc_mem_free_all(struct lpfc_hba *);
@@ -294,6 +297,7 @@ int lpfc_selective_reset(struct lpfc_hba *);
 void lpfc_reset_barrier(struct lpfc_hba *);
 int lpfc_sli_brdready(struct lpfc_hba *, uint32_t);
 int lpfc_sli_brdkill(struct lpfc_hba *);
+int lpfc_sli_chipset_init(struct lpfc_hba *phba);
 int lpfc_sli_brdreset(struct lpfc_hba *);
 int lpfc_sli_brdrestart(struct lpfc_hba *);
 int lpfc_sli_hba_setup(struct lpfc_hba *);
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 1487406aea77..24ce96dcc94d 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -630,7 +630,7 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type,
 						NLP_EVT_DEVICE_RECOVERY);
 			spin_lock_irq(shost->host_lock);
 			ndlp->nlp_flag &= ~NLP_NVMET_RECOV;
-			spin_lock_irq(shost->host_lock);
+			spin_unlock_irq(shost->host_lock);
 		}
 	}
 
@@ -978,9 +978,10 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 					 ndlp, did, ndlp->nlp_fc4_type,
 					 FC_TYPE_FCP, FC_TYPE_NVME);
 			ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+
+			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
+			lpfc_issue_els_prli(vport, ndlp, 0);
 		}
-		lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
-		lpfc_issue_els_prli(vport, ndlp, 0);
 	} else
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY,
 				 "3065 GFT_ID failed x%08x\n", irsp->ulpStatus);
@@ -2092,6 +2093,7 @@ lpfc_fdmi_port_attr_fc4type(struct lpfc_vport *vport,
 
 	ae->un.AttrTypes[3] = 0x02; /* Type 1 - ELS */
 	ae->un.AttrTypes[2] = 0x01; /* Type 8 - FCP */
+	ae->un.AttrTypes[6] = 0x01; /* Type 40 - NVME */
 	ae->un.AttrTypes[7] = 0x01; /* Type 32 - CT */
 	size = FOURBYTES + 32;
 	ad->AttrLen = cpu_to_be16(size);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index fce549a91911..4bcb92c844ca 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -798,21 +798,22 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 				atomic_read(&tgtp->xmt_fcp_rsp));
 
 		len += snprintf(buf + len, size - len,
-				"FCP Rsp: abort %08x drop %08x\n",
-				atomic_read(&tgtp->xmt_fcp_abort),
-				atomic_read(&tgtp->xmt_fcp_drop));
-
-		len += snprintf(buf + len, size - len,
 				"FCP Rsp Cmpl: %08x err %08x drop %08x\n",
 				atomic_read(&tgtp->xmt_fcp_rsp_cmpl),
 				atomic_read(&tgtp->xmt_fcp_rsp_error),
 				atomic_read(&tgtp->xmt_fcp_rsp_drop));
 
 		len += snprintf(buf + len, size - len,
-				"ABORT: Xmt %08x Err %08x Cmpl %08x",
+				"ABORT: Xmt %08x Cmpl %08x\n",
+				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_abort_cmpl));
+
+		len += snprintf(buf + len, size - len,
+				"ABORT: Sol %08x  Usol %08x Err %08x Cmpl %08x",
+				atomic_read(&tgtp->xmt_abort_sol),
+				atomic_read(&tgtp->xmt_abort_unsol),
 				atomic_read(&tgtp->xmt_abort_rsp),
-				atomic_read(&tgtp->xmt_abort_rsp_error),
-				atomic_read(&tgtp->xmt_abort_cmpl));
+				atomic_read(&tgtp->xmt_abort_rsp_error));
 
 		len +=  snprintf(buf + len, size - len, "\n");
 
@@ -841,6 +842,12 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 			}
 			spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
 		}
+
+		len += snprintf(buf + len, size - len,
+				"IO_CTX: %08x  outstanding %08x total %08x\n",
+				phba->sli4_hba.nvmet_ctx_cnt,
+				phba->sli4_hba.nvmet_io_wait_cnt,
+				phba->sli4_hba.nvmet_io_wait_total);
 	} else {
 		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 			return len;
@@ -1959,6 +1966,7 @@ lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf,
 		atomic_set(&tgtp->rcv_ls_req_out, 0);
 		atomic_set(&tgtp->rcv_ls_req_drop, 0);
 		atomic_set(&tgtp->xmt_ls_abort, 0);
+		atomic_set(&tgtp->xmt_ls_abort_cmpl, 0);
 		atomic_set(&tgtp->xmt_ls_rsp, 0);
 		atomic_set(&tgtp->xmt_ls_drop, 0);
 		atomic_set(&tgtp->xmt_ls_rsp_error, 0);
@@ -1967,19 +1975,22 @@ lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf,
 		atomic_set(&tgtp->rcv_fcp_cmd_in, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_out, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_drop, 0);
-		atomic_set(&tgtp->xmt_fcp_abort, 0);
 		atomic_set(&tgtp->xmt_fcp_drop, 0);
 		atomic_set(&tgtp->xmt_fcp_read_rsp, 0);
 		atomic_set(&tgtp->xmt_fcp_read, 0);
 		atomic_set(&tgtp->xmt_fcp_write, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_release, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_cmpl, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_error, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_drop, 0);
 
+		atomic_set(&tgtp->xmt_fcp_abort, 0);
+		atomic_set(&tgtp->xmt_fcp_abort_cmpl, 0);
+		atomic_set(&tgtp->xmt_abort_sol, 0);
+		atomic_set(&tgtp->xmt_abort_unsol, 0);
 		atomic_set(&tgtp->xmt_abort_rsp, 0);
 		atomic_set(&tgtp->xmt_abort_rsp_error, 0);
-		atomic_set(&tgtp->xmt_abort_cmpl, 0);
 	}
 	return nbytes;
 }
@@ -3070,11 +3081,11 @@ __lpfc_idiag_print_wq(struct lpfc_queue *qp, char *wqtype,
 			qp->assoc_qid, qp->q_cnt_1,
 			(unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\t\tWQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			"\t\tWQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
 			qp->queue_id, qp->entry_count,
 			qp->entry_size, qp->host_index,
-			qp->hba_index);
+			qp->hba_index, qp->entry_repost);
 	len +=  snprintf(pbuffer + len,
 			LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 	return len;
@@ -3121,11 +3132,11 @@ __lpfc_idiag_print_cq(struct lpfc_queue *qp, char *cqtype,
 			qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2,
 			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\tCQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			"\tCQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
 			qp->queue_id, qp->entry_count,
 			qp->entry_size, qp->host_index,
-			qp->hba_index);
+			qp->hba_index, qp->entry_repost);
 
 	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 
@@ -3143,20 +3154,20 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp,
 			"\t\t%s RQ info: ", rqtype);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"AssocCQID[%02d]: RQ-STAT[nopost:x%x nobuf:x%x "
-			"trunc:x%x rcv:x%llx]\n",
+			"posted:x%x rcv:x%llx]\n",
 			qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2,
 			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\t\tHQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]\n",
+			"\t\tHQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n",
 			qp->queue_id, qp->entry_count, qp->entry_size,
-			qp->host_index, qp->hba_index);
+			qp->host_index, qp->hba_index, qp->entry_repost);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\t\tDQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]\n",
+			"\t\tDQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n",
 			datqp->queue_id, datqp->entry_count,
 			datqp->entry_size, datqp->host_index,
-			datqp->hba_index);
+			datqp->hba_index, datqp->entry_repost);
 	return len;
 }
 
@@ -3242,10 +3253,10 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype,
 			eqtype, qp->q_cnt_1, qp->q_cnt_2, qp->q_cnt_3,
 			(unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"EQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			"EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
 			qp->queue_id, qp->entry_count, qp->entry_size,
-			qp->host_index, qp->hba_index);
+			qp->host_index, qp->hba_index, qp->entry_repost);
 	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 
 	return len;
@@ -5855,8 +5866,10 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 			atomic_dec(&lpfc_debugfs_hba_count);
 		}
 
-		debugfs_remove(lpfc_debugfs_root); /* lpfc */
-		lpfc_debugfs_root = NULL;
+		if (atomic_read(&lpfc_debugfs_hba_count) == 0) {
+			debugfs_remove(lpfc_debugfs_root); /* lpfc */
+			lpfc_debugfs_root = NULL;
+		}
 	}
 #endif
 	return;
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index 9d5a379f4b15..094c97b9e5f7 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -90,6 +90,7 @@ struct lpfc_nodelist {
 #define NLP_FCP_INITIATOR  0x10			/* entry is an FCP Initiator */
 #define NLP_NVME_TARGET    0x20			/* entry is a NVME Target */
 #define NLP_NVME_INITIATOR 0x40			/* entry is a NVME Initiator */
+#define NLP_NVME_DISCOVERY 0x80                 /* entry has NVME disc srvc */
 
 	uint16_t	nlp_fc4_type;		/* FC types node supports. */
 						/* Assigned from GID_FF, only
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 67827e397431..8e532b39ae93 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1047,6 +1047,13 @@ stop_rr_fcf_flogi:
 				 irsp->ulpStatus, irsp->un.ulpWord[4],
 				 irsp->ulpTimeout);
 
+
+		/* If this is not a loop open failure, bail out */
+		if (!(irsp->ulpStatus == IOSTAT_LOCAL_REJECT &&
+		      ((irsp->un.ulpWord[4] & IOERR_PARAM_MASK) ==
+					IOERR_LOOP_OPEN_FAILURE)))
+			goto flogifail;
+
 		/* FLOGI failed, so there is no fabric */
 		spin_lock_irq(shost->host_lock);
 		vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
@@ -2077,16 +2084,19 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
 	if (irsp->ulpStatus) {
 		/* Check for retry */
+		ndlp->fc4_prli_sent--;
 		if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
 			/* ELS command is being retried */
-			ndlp->fc4_prli_sent--;
 			goto out;
 		}
+
 		/* PRLI failed */
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
-				 "2754 PRLI failure DID:%06X Status:x%x/x%x\n",
+				 "2754 PRLI failure DID:%06X Status:x%x/x%x, "
+				 "data: x%x\n",
 				 ndlp->nlp_DID, irsp->ulpStatus,
-				 irsp->un.ulpWord[4]);
+				 irsp->un.ulpWord[4], ndlp->fc4_prli_sent);
+
 		/* Do not call DSM for lpfc_els_abort'ed ELS cmds */
 		if (lpfc_error_lost_link(irsp))
 			goto out;
@@ -7441,6 +7451,13 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
 	 */
 	spin_lock_irq(&phba->hbalock);
 	pring = lpfc_phba_elsring(phba);
+
+	/* Bail out if we've no ELS wq, like in PCI error recovery case. */
+	if (unlikely(!pring)) {
+		spin_unlock_irq(&phba->hbalock);
+		return;
+	}
+
 	if (phba->sli_rev == LPFC_SLI_REV4)
 		spin_lock(&pring->ring_lock);
 
@@ -8667,7 +8684,8 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		lpfc_do_scr_ns_plogi(phba, vport);
 	goto out;
 fdisc_failed:
-	if (vport->fc_vport->vport_state != FC_VPORT_NO_FABRIC_RSCS)
+	if (vport->fc_vport &&
+	    (vport->fc_vport->vport_state != FC_VPORT_NO_FABRIC_RSCS))
 		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
 	/* Cancel discovery timer */
 	lpfc_can_disctmo(vport);
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 0482c5580331..3ffcd9215ca8 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -693,15 +693,16 @@ lpfc_work_done(struct lpfc_hba *phba)
 	pring = lpfc_phba_elsring(phba);
 	status = (ha_copy & (HA_RXMASK  << (4*LPFC_ELS_RING)));
 	status >>= (4*LPFC_ELS_RING);
-	if ((status & HA_RXMASK) ||
-	    (pring->flag & LPFC_DEFERRED_RING_EVENT) ||
-	    (phba->hba_flag & HBA_SP_QUEUE_EVT)) {
+	if (pring && (status & HA_RXMASK ||
+		      pring->flag & LPFC_DEFERRED_RING_EVENT ||
+		      phba->hba_flag & HBA_SP_QUEUE_EVT)) {
 		if (pring->flag & LPFC_STOP_IOCB_EVENT) {
 			pring->flag |= LPFC_DEFERRED_RING_EVENT;
 			/* Set the lpfc data pending flag */
 			set_bit(LPFC_DATA_READY, &phba->data_flags);
 		} else {
-			if (phba->link_state >= LPFC_LINK_UP) {
+			if (phba->link_state >= LPFC_LINK_UP ||
+			    phba->link_flag & LS_MDS_LOOPBACK) {
 				pring->flag &= ~LPFC_DEFERRED_RING_EVENT;
 				lpfc_sli_handle_slow_ring_event(phba, pring,
 								(status &
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 1d12f2be36bc..e0a5fce416ae 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1356,6 +1356,7 @@ struct lpfc_mbx_wq_destroy {
 
 #define LPFC_HDR_BUF_SIZE 128
 #define LPFC_DATA_BUF_SIZE 2048
+#define LPFC_NVMET_DATA_BUF_SIZE 128
 struct rq_context {
 	uint32_t word0;
 #define lpfc_rq_context_rqe_count_SHIFT	16	/* Version 0 Only */
@@ -4420,6 +4421,19 @@ struct fcp_treceive64_wqe {
 };
 #define TXRDY_PAYLOAD_LEN      12
 
+#define CMD_SEND_FRAME	0xE1
+
+struct send_frame_wqe {
+	struct ulp_bde64 bde;          /* words 0-2 */
+	uint32_t frame_len;            /* word 3 */
+	uint32_t fc_hdr_wd0;           /* word 4 */
+	uint32_t fc_hdr_wd1;           /* word 5 */
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t fc_hdr_wd2;           /* word 12 */
+	uint32_t fc_hdr_wd3;           /* word 13 */
+	uint32_t fc_hdr_wd4;           /* word 14 */
+	uint32_t fc_hdr_wd5;           /* word 15 */
+};
 
 union lpfc_wqe {
 	uint32_t words[16];
@@ -4438,7 +4452,7 @@ union lpfc_wqe {
 	struct fcp_trsp64_wqe fcp_trsp;
 	struct fcp_tsend64_wqe fcp_tsend;
 	struct fcp_treceive64_wqe fcp_treceive;
-
+	struct send_frame_wqe send_frame;
 };
 
 union lpfc_wqe128 {
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 90ae354a9c45..9add9473cae5 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1099,7 +1099,7 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 
 		list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) {
 			ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP);
-			lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+			lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 		}
 	}
 
@@ -3381,7 +3381,7 @@ lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba)
 {
 	struct lpfc_sglq *sglq_entry = NULL, *sglq_entry_next = NULL;
 	uint16_t i, lxri, xri_cnt, els_xri_cnt;
-	uint16_t nvmet_xri_cnt, tot_cnt;
+	uint16_t nvmet_xri_cnt;
 	LIST_HEAD(nvmet_sgl_list);
 	int rc;
 
@@ -3389,15 +3389,9 @@ lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba)
 	 * update on pci function's nvmet xri-sgl list
 	 */
 	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
-	nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post;
-	tot_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
-	if (nvmet_xri_cnt > tot_cnt) {
-		phba->cfg_nvmet_mrq_post = tot_cnt / phba->cfg_nvmet_mrq;
-		nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post;
-		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
-				"6301 NVMET post-sgl count changed to %d\n",
-				phba->cfg_nvmet_mrq_post);
-	}
+
+	/* For NVMET, ALL remaining XRIs are dedicated for IO processing */
+	nvmet_xri_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
 
 	if (nvmet_xri_cnt > phba->sli4_hba.nvmet_xri_cnt) {
 		/* els xri-sgl expanded */
@@ -3602,6 +3596,13 @@ lpfc_get_wwpn(struct lpfc_hba *phba)
 	LPFC_MBOXQ_t *mboxq;
 	MAILBOX_t *mb;
 
+	if (phba->sli_rev < LPFC_SLI_REV4) {
+		/* Reset the port first */
+		lpfc_sli_brdrestart(phba);
+		rc = lpfc_sli_chipset_init(phba);
+		if (rc)
+			return (uint64_t)-1;
+	}
 
 	mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
 						GFP_KERNEL);
@@ -4539,6 +4540,19 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc)
 	pmb->vport = phba->pport;
 
 	if (phba->sli4_hba.link_state.status != LPFC_FC_LA_TYPE_LINK_UP) {
+		phba->link_flag &= ~(LS_MDS_LINK_DOWN | LS_MDS_LOOPBACK);
+
+		switch (phba->sli4_hba.link_state.status) {
+		case LPFC_FC_LA_TYPE_MDS_LINK_DOWN:
+			phba->link_flag |= LS_MDS_LINK_DOWN;
+			break;
+		case LPFC_FC_LA_TYPE_MDS_LOOPBACK:
+			phba->link_flag |= LS_MDS_LOOPBACK;
+			break;
+		default:
+			break;
+		}
+
 		/* Parse and translate status field */
 		mb = &pmb->u.mb;
 		mb->mbxStatus = lpfc_sli4_parse_latt_fault(phba,
@@ -5823,6 +5837,9 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
+		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_ctx_list);
+		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_io_wait_list);
+
 		/* Fast-path XRI aborted CQ Event work queue list */
 		INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
 	}
@@ -5830,6 +5847,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	/* This abort list used by worker thread */
 	spin_lock_init(&phba->sli4_hba.sgl_list_lock);
 	spin_lock_init(&phba->sli4_hba.nvmet_io_lock);
+	spin_lock_init(&phba->sli4_hba.nvmet_io_wait_lock);
 
 	/*
 	 * Initialize driver internal slow-path work queues
@@ -5944,16 +5962,21 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		for (i = 0; i < lpfc_enable_nvmet_cnt; i++) {
 			if (wwn == lpfc_enable_nvmet[i]) {
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+				if (lpfc_nvmet_mem_alloc(phba))
+					break;
+
+				phba->nvmet_support = 1; /* a match */
+
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"6017 NVME Target %016llx\n",
 						wwn);
-				phba->nvmet_support = 1; /* a match */
 #else
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"6021 Can't enable NVME Target."
 						" NVME_TARGET_FC infrastructure"
 						" is not in kernel\n");
 #endif
+				break;
 			}
 		}
 	}
@@ -6262,7 +6285,7 @@ lpfc_unset_driver_resource_phase2(struct lpfc_hba *phba)
  *
  * This routine is invoked to free the driver's IOCB list and memory.
  **/
-static void
+void
 lpfc_free_iocb_list(struct lpfc_hba *phba)
 {
 	struct lpfc_iocbq *iocbq_entry = NULL, *iocbq_next = NULL;
@@ -6290,7 +6313,7 @@ lpfc_free_iocb_list(struct lpfc_hba *phba)
  *	0 - successful
  *	other values - error
  **/
-static int
+int
 lpfc_init_iocb_list(struct lpfc_hba *phba, int iocb_count)
 {
 	struct lpfc_iocbq *iocbq_entry = NULL;
@@ -6518,7 +6541,6 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 	uint16_t rpi_limit, curr_rpi_range;
 	struct lpfc_dmabuf *dmabuf;
 	struct lpfc_rpi_hdr *rpi_hdr;
-	uint32_t rpi_count;
 
 	/*
 	 * If the SLI4 port supports extents, posting the rpi header isn't
@@ -6531,8 +6553,7 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 		return NULL;
 
 	/* The limit on the logical index is just the max_rpi count. */
-	rpi_limit = phba->sli4_hba.max_cfg_param.rpi_base +
-	phba->sli4_hba.max_cfg_param.max_rpi - 1;
+	rpi_limit = phba->sli4_hba.max_cfg_param.max_rpi;
 
 	spin_lock_irq(&phba->hbalock);
 	/*
@@ -6543,18 +6564,10 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 	curr_rpi_range = phba->sli4_hba.next_rpi;
 	spin_unlock_irq(&phba->hbalock);
 
-	/*
-	 * The port has a limited number of rpis. The increment here
-	 * is LPFC_RPI_HDR_COUNT - 1 to account for the starting value
-	 * and to allow the full max_rpi range per port.
-	 */
-	if ((curr_rpi_range + (LPFC_RPI_HDR_COUNT - 1)) > rpi_limit)
-		rpi_count = rpi_limit - curr_rpi_range;
-	else
-		rpi_count = LPFC_RPI_HDR_COUNT;
-
-	if (!rpi_count)
+	/* Reached full RPI range */
+	if (curr_rpi_range == rpi_limit)
 		return NULL;
+
 	/*
 	 * First allocate the protocol header region for the port.  The
 	 * port expects a 4KB DMA-mapped memory region that is 4K aligned.
@@ -6588,13 +6601,9 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 
 	/* The rpi_hdr stores the logical index only. */
 	rpi_hdr->start_rpi = curr_rpi_range;
+	rpi_hdr->next_rpi = phba->sli4_hba.next_rpi + LPFC_RPI_HDR_COUNT;
 	list_add_tail(&rpi_hdr->list, &phba->sli4_hba.lpfc_rpi_hdr_list);
 
-	/*
-	 * The next_rpi stores the next logical module-64 rpi value used
-	 * to post physical rpis in subsequent rpi postings.
-	 */
-	phba->sli4_hba.next_rpi += rpi_count;
 	spin_unlock_irq(&phba->hbalock);
 	return rpi_hdr;
 
@@ -8165,7 +8174,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			/* Create NVMET Receive Queue for header */
 			qdesc = lpfc_sli4_queue_alloc(phba,
 						      phba->sli4_hba.rq_esize,
-						      phba->sli4_hba.rq_ecount);
+						      LPFC_NVMET_RQE_DEF_COUNT);
 			if (!qdesc) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"3146 Failed allocate "
@@ -8187,7 +8196,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			/* Create NVMET Receive Queue for data */
 			qdesc = lpfc_sli4_queue_alloc(phba,
 						      phba->sli4_hba.rq_esize,
-						      phba->sli4_hba.rq_ecount);
+						      LPFC_NVMET_RQE_DEF_COUNT);
 			if (!qdesc) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"3156 Failed allocate "
@@ -8319,46 +8328,6 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 }
 
 int
-lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
-		    struct lpfc_queue *drq, int count)
-{
-	int rc, i;
-	struct lpfc_rqe hrqe;
-	struct lpfc_rqe drqe;
-	struct lpfc_rqb *rqbp;
-	struct rqb_dmabuf *rqb_buffer;
-	LIST_HEAD(rqb_buf_list);
-
-	rqbp = hrq->rqbp;
-	for (i = 0; i < count; i++) {
-		rqb_buffer = (rqbp->rqb_alloc_buffer)(phba);
-		if (!rqb_buffer)
-			break;
-		rqb_buffer->hrq = hrq;
-		rqb_buffer->drq = drq;
-		list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list);
-	}
-	while (!list_empty(&rqb_buf_list)) {
-		list_remove_head(&rqb_buf_list, rqb_buffer, struct rqb_dmabuf,
-				 hbuf.list);
-
-		hrqe.address_lo = putPaddrLow(rqb_buffer->hbuf.phys);
-		hrqe.address_hi = putPaddrHigh(rqb_buffer->hbuf.phys);
-		drqe.address_lo = putPaddrLow(rqb_buffer->dbuf.phys);
-		drqe.address_hi = putPaddrHigh(rqb_buffer->dbuf.phys);
-		rc = lpfc_sli4_rq_put(hrq, drq, &hrqe, &drqe);
-		if (rc < 0) {
-			(rqbp->rqb_free_buffer)(phba, rqb_buffer);
-		} else {
-			list_add_tail(&rqb_buffer->hbuf.list,
-				      &rqbp->rqb_buffer_list);
-			rqbp->buffer_count++;
-		}
-	}
-	return 1;
-}
-
-int
 lpfc_free_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *rq)
 {
 	struct lpfc_rqb *rqbp;
@@ -8777,9 +8746,6 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		goto out_destroy;
 	}
 
-	lpfc_rq_adjust_repost(phba, phba->sli4_hba.hdr_rq, LPFC_ELS_HBQ);
-	lpfc_rq_adjust_repost(phba, phba->sli4_hba.dat_rq, LPFC_ELS_HBQ);
-
 	rc = lpfc_rq_create(phba, phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq,
 			    phba->sli4_hba.els_cq, LPFC_USOL);
 	if (rc) {
@@ -8847,7 +8813,7 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 		lpfc_wq_destroy(phba, phba->sli4_hba.nvmels_wq);
 
 	/* Unset ELS work queue */
-	if (phba->sli4_hba.els_cq)
+	if (phba->sli4_hba.els_wq)
 		lpfc_wq_destroy(phba, phba->sli4_hba.els_wq);
 
 	/* Unset unsolicited receive queue */
@@ -11103,7 +11069,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	struct lpfc_hba   *phba;
 	struct lpfc_vport *vport = NULL;
 	struct Scsi_Host  *shost = NULL;
-	int error, cnt;
+	int error;
 	uint32_t cfg_mode, intr_mode;
 
 	/* Allocate memory for HBA structure */
@@ -11137,22 +11103,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 		goto out_unset_pci_mem_s4;
 	}
 
-	cnt = phba->cfg_iocb_cnt * 1024;
-	if (phba->nvmet_support)
-		cnt += phba->cfg_nvmet_mrq_post * phba->cfg_nvmet_mrq;
-
-	/* Initialize and populate the iocb list per host */
-	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"2821 initialize iocb list %d total %d\n",
-			phba->cfg_iocb_cnt, cnt);
-	error = lpfc_init_iocb_list(phba, cnt);
-
-	if (error) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1413 Failed to initialize iocb list.\n");
-		goto out_unset_driver_resource_s4;
-	}
-
 	INIT_LIST_HEAD(&phba->active_rrq_list);
 	INIT_LIST_HEAD(&phba->fcf.fcf_pri_list);
 
@@ -11161,7 +11111,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"1414 Failed to set up driver resource.\n");
-		goto out_free_iocb_list;
+		goto out_unset_driver_resource_s4;
 	}
 
 	/* Get the default values for Model Name and Description */
@@ -11261,8 +11211,6 @@ out_destroy_shost:
 	lpfc_destroy_shost(phba);
 out_unset_driver_resource:
 	lpfc_unset_driver_resource_phase2(phba);
-out_free_iocb_list:
-	lpfc_free_iocb_list(phba);
 out_unset_driver_resource_s4:
 	lpfc_sli4_driver_resource_unset(phba);
 out_unset_pci_mem_s4:
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 5986c7957199..fcc05a1517c2 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -214,6 +214,21 @@ fail_free_drb_pool:
 	return -ENOMEM;
 }
 
+int
+lpfc_nvmet_mem_alloc(struct lpfc_hba *phba)
+{
+	phba->lpfc_nvmet_drb_pool =
+		pci_pool_create("lpfc_nvmet_drb_pool",
+				phba->pcidev, LPFC_NVMET_DATA_BUF_SIZE,
+				SGL_ALIGN_SZ, 0);
+	if (!phba->lpfc_nvmet_drb_pool) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"6024 Can't enable NVME Target - no memory\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
 /**
  * lpfc_mem_free - Frees memory allocated by lpfc_mem_alloc
  * @phba: HBA to free memory for
@@ -232,6 +247,9 @@ lpfc_mem_free(struct lpfc_hba *phba)
 
 	/* Free HBQ pools */
 	lpfc_sli_hbqbuf_free_all(phba);
+	if (phba->lpfc_nvmet_drb_pool)
+		pci_pool_destroy(phba->lpfc_nvmet_drb_pool);
+	phba->lpfc_nvmet_drb_pool = NULL;
 	if (phba->lpfc_drb_pool)
 		pci_pool_destroy(phba->lpfc_drb_pool);
 	phba->lpfc_drb_pool = NULL;
@@ -611,8 +629,6 @@ struct rqb_dmabuf *
 lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 {
 	struct rqb_dmabuf *dma_buf;
-	struct lpfc_iocbq *nvmewqe;
-	union lpfc_wqe128 *wqe;
 
 	dma_buf = kzalloc(sizeof(struct rqb_dmabuf), GFP_KERNEL);
 	if (!dma_buf)
@@ -624,69 +640,15 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 		kfree(dma_buf);
 		return NULL;
 	}
-	dma_buf->dbuf.virt = pci_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL,
-					    &dma_buf->dbuf.phys);
+	dma_buf->dbuf.virt = pci_pool_alloc(phba->lpfc_nvmet_drb_pool,
+					    GFP_KERNEL, &dma_buf->dbuf.phys);
 	if (!dma_buf->dbuf.virt) {
 		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
 			      dma_buf->hbuf.phys);
 		kfree(dma_buf);
 		return NULL;
 	}
-	dma_buf->total_size = LPFC_DATA_BUF_SIZE;
-
-	dma_buf->context = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx),
-				   GFP_KERNEL);
-	if (!dma_buf->context) {
-		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
-			      dma_buf->dbuf.phys);
-		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
-			      dma_buf->hbuf.phys);
-		kfree(dma_buf);
-		return NULL;
-	}
-
-	dma_buf->iocbq = lpfc_sli_get_iocbq(phba);
-	if (!dma_buf->iocbq) {
-		kfree(dma_buf->context);
-		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
-			      dma_buf->dbuf.phys);
-		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
-			      dma_buf->hbuf.phys);
-		kfree(dma_buf);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
-				"2621 Ran out of nvmet iocb/WQEs\n");
-		return NULL;
-	}
-	dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
-	nvmewqe = dma_buf->iocbq;
-	wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
-	/* Initialize WQE */
-	memset(wqe, 0, sizeof(union lpfc_wqe));
-	/* Word 7 */
-	bf_set(wqe_ct, &wqe->generic.wqe_com, SLI4_CT_RPI);
-	bf_set(wqe_class, &wqe->generic.wqe_com, CLASS3);
-	bf_set(wqe_pu, &wqe->generic.wqe_com, 1);
-	/* Word 10 */
-	bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
-	bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0);
-	bf_set(wqe_qosd, &wqe->generic.wqe_com, 0);
-
-	dma_buf->iocbq->context1 = NULL;
-	spin_lock(&phba->sli4_hba.sgl_list_lock);
-	dma_buf->sglq = __lpfc_sli_get_nvmet_sglq(phba, dma_buf->iocbq);
-	spin_unlock(&phba->sli4_hba.sgl_list_lock);
-	if (!dma_buf->sglq) {
-		lpfc_sli_release_iocbq(phba, dma_buf->iocbq);
-		kfree(dma_buf->context);
-		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
-			      dma_buf->dbuf.phys);
-		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
-			      dma_buf->hbuf.phys);
-		kfree(dma_buf);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
-				"6132 Ran out of nvmet XRIs\n");
-		return NULL;
-	}
+	dma_buf->total_size = LPFC_NVMET_DATA_BUF_SIZE;
 	return dma_buf;
 }
 
@@ -705,20 +667,9 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 void
 lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab)
 {
-	unsigned long flags;
-
-	__lpfc_clear_active_sglq(phba, dmab->sglq->sli4_lxritag);
-	dmab->sglq->state = SGL_FREED;
-	dmab->sglq->ndlp = NULL;
-
-	spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock, flags);
-	list_add_tail(&dmab->sglq->list, &phba->sli4_hba.lpfc_nvmet_sgl_list);
-	spin_unlock_irqrestore(&phba->sli4_hba.sgl_list_lock, flags);
-
-	lpfc_sli_release_iocbq(phba, dmab->iocbq);
-	kfree(dmab->context);
 	pci_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys);
-	pci_pool_free(phba->lpfc_drb_pool, dmab->dbuf.virt, dmab->dbuf.phys);
+	pci_pool_free(phba->lpfc_nvmet_drb_pool,
+		      dmab->dbuf.virt, dmab->dbuf.phys);
 	kfree(dmab);
 }
 
@@ -803,6 +754,11 @@ lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
 	rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe);
 	if (rc < 0) {
 		(rqbp->rqb_free_buffer)(phba, rqb_entry);
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"6409 Cannot post to RQ %d: %x %x\n",
+				rqb_entry->hrq->queue_id,
+				rqb_entry->hrq->host_index,
+				rqb_entry->hrq->hba_index);
 	} else {
 		list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list);
 		rqbp->buffer_count++;
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 8777c2d5f50d..f74cb0142fd4 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -206,7 +206,7 @@ lpfc_check_elscmpl_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
  * associated with a LPFC_NODELIST entry. This
  * routine effectively results in a "software abort".
  */
-int
+void
 lpfc_els_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 {
 	LIST_HEAD(abort_list);
@@ -215,6 +215,10 @@ lpfc_els_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 
 	pring = lpfc_phba_elsring(phba);
 
+	/* In case of error recovery path, we might have a NULL pring here */
+	if (!pring)
+		return;
+
 	/* Abort outstanding I/O on NPort <nlp_DID> */
 	lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_DISCOVERY,
 			 "2819 Abort outstanding I/O on NPort x%x "
@@ -273,7 +277,6 @@ lpfc_els_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 			      IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED);
 
 	lpfc_cancel_retry_delay_tmo(phba->pport, ndlp);
-	return 0;
 }
 
 static int
@@ -1944,7 +1947,13 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 
 		/* Target driver cannot solicit NVME FB. */
 		if (bf_get_be32(prli_tgt, nvpr)) {
+			/* Complete the nvme target roles.  The transport
+			 * needs to know if the rport is capable of
+			 * discovery in addition to its role.
+			 */
 			ndlp->nlp_type |= NLP_NVME_TARGET;
+			if (bf_get_be32(prli_disc, nvpr))
+				ndlp->nlp_type |= NLP_NVME_DISCOVERY;
 			if ((bf_get_be32(prli_fba, nvpr) == 1) &&
 			    (bf_get_be32(prli_fb_sz, nvpr) > 0) &&
 			    (phba->cfg_nvme_enable_fb) &&
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 94434e621c33..518b15e6f222 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -142,7 +142,7 @@ out:
 }
 
 /**
- * lpfc_nvmet_rq_post - Repost a NVMET RQ DMA buffer and clean up context
+ * lpfc_nvmet_ctxbuf_post - Repost a NVMET RQ DMA buffer and clean up context
  * @phba: HBA buffer is associated with
  * @ctxp: context to clean up
  * @mp: Buffer to free
@@ -155,24 +155,113 @@ out:
  * Returns: None
  **/
 void
-lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
-		   struct lpfc_dmabuf *mp)
+lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 {
-	if (ctxp) {
-		if (ctxp->flag)
-			lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-				"6314 rq_post ctx xri x%x flag x%x\n",
-				ctxp->oxid, ctxp->flag);
-
-		if (ctxp->txrdy) {
-			pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy,
-				      ctxp->txrdy_phys);
-			ctxp->txrdy = NULL;
-			ctxp->txrdy_phys = 0;
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+	struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context;
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct fc_frame_header *fc_hdr;
+	struct rqb_dmabuf *nvmebuf;
+	struct lpfc_dmabuf *hbufp;
+	uint32_t *payload;
+	uint32_t size, oxid, sid, rc;
+	unsigned long iflag;
+
+	if (ctxp->txrdy) {
+		pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy,
+			      ctxp->txrdy_phys);
+		ctxp->txrdy = NULL;
+		ctxp->txrdy_phys = 0;
+	}
+	ctxp->state = LPFC_NVMET_STE_FREE;
+
+	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
+	if (phba->sli4_hba.nvmet_io_wait_cnt) {
+		hbufp = &nvmebuf->hbuf;
+		list_remove_head(&phba->sli4_hba.lpfc_nvmet_io_wait_list,
+				 nvmebuf, struct rqb_dmabuf,
+				 hbuf.list);
+		phba->sli4_hba.nvmet_io_wait_cnt--;
+		spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock,
+				       iflag);
+
+		fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
+		oxid = be16_to_cpu(fc_hdr->fh_ox_id);
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+		payload = (uint32_t *)(nvmebuf->dbuf.virt);
+		size = nvmebuf->bytes_recv;
+		sid = sli4_sid_from_fc_hdr(fc_hdr);
+
+		ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
+		memset(ctxp, 0, sizeof(ctxp->ctx));
+		ctxp->wqeq = NULL;
+		ctxp->txrdy = NULL;
+		ctxp->offset = 0;
+		ctxp->phba = phba;
+		ctxp->size = size;
+		ctxp->oxid = oxid;
+		ctxp->sid = sid;
+		ctxp->state = LPFC_NVMET_STE_RCV;
+		ctxp->entry_cnt = 1;
+		ctxp->flag = 0;
+		ctxp->ctxbuf = ctx_buf;
+		spin_lock_init(&ctxp->ctxlock);
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+		if (phba->ktime_on) {
+			ctxp->ts_cmd_nvme = ktime_get_ns();
+			ctxp->ts_isr_cmd = ctxp->ts_cmd_nvme;
+			ctxp->ts_nvme_data = 0;
+			ctxp->ts_data_wqput = 0;
+			ctxp->ts_isr_data = 0;
+			ctxp->ts_data_nvme = 0;
+			ctxp->ts_nvme_status = 0;
+			ctxp->ts_status_wqput = 0;
+			ctxp->ts_isr_status = 0;
+			ctxp->ts_status_nvme = 0;
 		}
-		ctxp->state = LPFC_NVMET_STE_FREE;
+#endif
+		atomic_inc(&tgtp->rcv_fcp_cmd_in);
+		/*
+		 * The calling sequence should be:
+		 * nvmet_fc_rcv_fcp_req->lpfc_nvmet_xmt_fcp_op/cmp- req->done
+		 * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp.
+		 * When we return from nvmet_fc_rcv_fcp_req, all relevant info
+		 * the NVME command / FC header is stored.
+		 * A buffer has already been reposted for this IO, so just free
+		 * the nvmebuf.
+		 */
+		rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
+					  payload, size);
+
+		/* Process FCP command */
+		if (rc == 0) {
+			atomic_inc(&tgtp->rcv_fcp_cmd_out);
+			nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
+			return;
+		}
+
+		atomic_inc(&tgtp->rcv_fcp_cmd_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"2582 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
+				ctxp->oxid, rc,
+				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->rcv_fcp_cmd_out),
+				atomic_read(&tgtp->xmt_fcp_release));
+
+		lpfc_nvmet_defer_release(phba, ctxp);
+		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
+		nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
+		return;
 	}
-	lpfc_rq_buf_free(phba, mp);
+	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
+
+	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_lock, iflag);
+	list_add_tail(&ctx_buf->list,
+		      &phba->sli4_hba.lpfc_nvmet_ctx_list);
+	phba->sli4_hba.nvmet_ctx_cnt++;
+	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_lock, iflag);
+#endif
 }
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -502,6 +591,7 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
 				"6150 LS Drop IO x%x: Prep\n",
 				ctxp->oxid);
 		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+		atomic_inc(&nvmep->xmt_ls_abort);
 		lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp,
 						ctxp->sid, ctxp->oxid);
 		return -ENOMEM;
@@ -545,6 +635,7 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
 	lpfc_nlp_put(nvmewqeq->context1);
 
 	lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+	atomic_inc(&nvmep->xmt_ls_abort);
 	lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid);
 	return -ENXIO;
 }
@@ -612,9 +703,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n",
 			 ctxp->oxid, rsp->op, rsp->rsplen);
 
+	ctxp->flag |= LPFC_NVMET_IO_INP;
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq);
 	if (rc == WQE_SUCCESS) {
-		ctxp->flag |= LPFC_NVMET_IO_INP;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 		if (!phba->ktime_on)
 			return 0;
@@ -692,6 +783,7 @@ static void
 lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
 			   struct nvmefc_tgt_fcp_req *rsp)
 {
+	struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private;
 	struct lpfc_nvmet_rcv_ctx *ctxp =
 		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
 	struct lpfc_hba *phba = ctxp->phba;
@@ -707,13 +799,15 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
 	}
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 
-	lpfc_nvmeio_data(phba, "NVMET FCP FREE: xri x%x ste %d\n", ctxp->oxid,
-			 ctxp->state, 0);
+	lpfc_nvmeio_data(phba, "NVMET FCP FREE: xri x%x ste %d abt %d\n", ctxp->oxid,
+			 ctxp->state, aborting);
+
+	atomic_inc(&lpfc_nvmep->xmt_fcp_release);
 
 	if (aborting)
 		return;
 
-	lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+	lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 }
 
 static struct nvmet_fc_target_template lpfc_tgttemplate = {
@@ -734,17 +828,128 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = {
 	.target_priv_sz = sizeof(struct lpfc_nvmet_tgtport),
 };
 
+void
+lpfc_nvmet_cleanup_io_context(struct lpfc_hba *phba)
+{
+	struct lpfc_nvmet_ctxbuf *ctx_buf, *next_ctx_buf;
+	unsigned long flags;
+
+	list_for_each_entry_safe(
+		ctx_buf, next_ctx_buf,
+		&phba->sli4_hba.lpfc_nvmet_ctx_list, list) {
+		spin_lock_irqsave(
+			&phba->sli4_hba.abts_nvme_buf_list_lock, flags);
+		list_del_init(&ctx_buf->list);
+		spin_unlock_irqrestore(
+			&phba->sli4_hba.abts_nvme_buf_list_lock, flags);
+		__lpfc_clear_active_sglq(phba,
+					 ctx_buf->sglq->sli4_lxritag);
+		ctx_buf->sglq->state = SGL_FREED;
+		ctx_buf->sglq->ndlp = NULL;
+
+		spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock, flags);
+		list_add_tail(&ctx_buf->sglq->list,
+			      &phba->sli4_hba.lpfc_nvmet_sgl_list);
+		spin_unlock_irqrestore(&phba->sli4_hba.sgl_list_lock,
+				       flags);
+
+		lpfc_sli_release_iocbq(phba, ctx_buf->iocbq);
+		kfree(ctx_buf->context);
+	}
+}
+
+int
+lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
+{
+	struct lpfc_nvmet_ctxbuf *ctx_buf;
+	struct lpfc_iocbq *nvmewqe;
+	union lpfc_wqe128 *wqe;
+	int i;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME,
+			"6403 Allocate NVMET resources for %d XRIs\n",
+			phba->sli4_hba.nvmet_xri_cnt);
+
+	/* For all nvmet xris, allocate resources needed to process a
+	 * received command on a per xri basis.
+	 */
+	for (i = 0; i < phba->sli4_hba.nvmet_xri_cnt; i++) {
+		ctx_buf = kzalloc(sizeof(*ctx_buf), GFP_KERNEL);
+		if (!ctx_buf) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6404 Ran out of memory for NVMET\n");
+			return -ENOMEM;
+		}
+
+		ctx_buf->context = kzalloc(sizeof(*ctx_buf->context),
+					   GFP_KERNEL);
+		if (!ctx_buf->context) {
+			kfree(ctx_buf);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6405 Ran out of NVMET "
+					"context memory\n");
+			return -ENOMEM;
+		}
+		ctx_buf->context->ctxbuf = ctx_buf;
+
+		ctx_buf->iocbq = lpfc_sli_get_iocbq(phba);
+		if (!ctx_buf->iocbq) {
+			kfree(ctx_buf->context);
+			kfree(ctx_buf);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6406 Ran out of NVMET iocb/WQEs\n");
+			return -ENOMEM;
+		}
+		ctx_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
+		nvmewqe = ctx_buf->iocbq;
+		wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
+		/* Initialize WQE */
+		memset(wqe, 0, sizeof(union lpfc_wqe));
+		/* Word 7 */
+		bf_set(wqe_ct, &wqe->generic.wqe_com, SLI4_CT_RPI);
+		bf_set(wqe_class, &wqe->generic.wqe_com, CLASS3);
+		bf_set(wqe_pu, &wqe->generic.wqe_com, 1);
+		/* Word 10 */
+		bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
+		bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0);
+		bf_set(wqe_qosd, &wqe->generic.wqe_com, 0);
+
+		ctx_buf->iocbq->context1 = NULL;
+		spin_lock(&phba->sli4_hba.sgl_list_lock);
+		ctx_buf->sglq = __lpfc_sli_get_nvmet_sglq(phba, ctx_buf->iocbq);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
+		if (!ctx_buf->sglq) {
+			lpfc_sli_release_iocbq(phba, ctx_buf->iocbq);
+			kfree(ctx_buf->context);
+			kfree(ctx_buf);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6407 Ran out of NVMET XRIs\n");
+			return -ENOMEM;
+		}
+		spin_lock(&phba->sli4_hba.nvmet_io_lock);
+		list_add_tail(&ctx_buf->list,
+			      &phba->sli4_hba.lpfc_nvmet_ctx_list);
+		spin_unlock(&phba->sli4_hba.nvmet_io_lock);
+	}
+	phba->sli4_hba.nvmet_ctx_cnt = phba->sli4_hba.nvmet_xri_cnt;
+	return 0;
+}
+
 int
 lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 {
 	struct lpfc_vport  *vport = phba->pport;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct nvmet_fc_port_info pinfo;
-	int error = 0;
+	int error;
 
 	if (phba->targetport)
 		return 0;
 
+	error = lpfc_nvmet_setup_io_context(phba);
+	if (error)
+		return error;
+
 	memset(&pinfo, 0, sizeof(struct nvmet_fc_port_info));
 	pinfo.node_name = wwn_to_u64(vport->fc_nodename.u.wwn);
 	pinfo.port_name = wwn_to_u64(vport->fc_portname.u.wwn);
@@ -764,7 +969,6 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 	lpfc_tgttemplate.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
 	lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel;
 	lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
-					   NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED |
 					   NVMET_FCTGTFEAT_CMD_IN_ISR |
 					   NVMET_FCTGTFEAT_OPDONE_IN_ISR;
 
@@ -773,13 +977,16 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 					     &phba->pcidev->dev,
 					     &phba->targetport);
 #else
-	error = -ENOMEM;
+	error = -ENOENT;
 #endif
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
 				"6025 Cannot register NVME targetport "
 				"x%x\n", error);
 		phba->targetport = NULL;
+
+		lpfc_nvmet_cleanup_io_context(phba);
+
 	} else {
 		tgtp = (struct lpfc_nvmet_tgtport *)
 			phba->targetport->private;
@@ -796,6 +1003,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 		atomic_set(&tgtp->rcv_ls_req_out, 0);
 		atomic_set(&tgtp->rcv_ls_req_drop, 0);
 		atomic_set(&tgtp->xmt_ls_abort, 0);
+		atomic_set(&tgtp->xmt_ls_abort_cmpl, 0);
 		atomic_set(&tgtp->xmt_ls_rsp, 0);
 		atomic_set(&tgtp->xmt_ls_drop, 0);
 		atomic_set(&tgtp->xmt_ls_rsp_error, 0);
@@ -803,18 +1011,21 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 		atomic_set(&tgtp->rcv_fcp_cmd_in, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_out, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_drop, 0);
-		atomic_set(&tgtp->xmt_fcp_abort, 0);
 		atomic_set(&tgtp->xmt_fcp_drop, 0);
 		atomic_set(&tgtp->xmt_fcp_read_rsp, 0);
 		atomic_set(&tgtp->xmt_fcp_read, 0);
 		atomic_set(&tgtp->xmt_fcp_write, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_release, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_cmpl, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_error, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_drop, 0);
+		atomic_set(&tgtp->xmt_fcp_abort, 0);
+		atomic_set(&tgtp->xmt_fcp_abort_cmpl, 0);
+		atomic_set(&tgtp->xmt_abort_unsol, 0);
+		atomic_set(&tgtp->xmt_abort_sol, 0);
 		atomic_set(&tgtp->xmt_abort_rsp, 0);
 		atomic_set(&tgtp->xmt_abort_rsp_error, 0);
-		atomic_set(&tgtp->xmt_abort_cmpl, 0);
 	}
 	return error;
 }
@@ -865,7 +1076,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 	list_for_each_entry_safe(ctxp, next_ctxp,
 				 &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
 				 list) {
-		if (ctxp->rqb_buffer->sglq->sli4_xritag != xri)
+		if (ctxp->ctxbuf->sglq->sli4_xritag != xri)
 			continue;
 
 		/* Check if we already received a free context call
@@ -886,7 +1097,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 		    (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE ||
 		     ndlp->nlp_state == NLP_STE_MAPPED_NODE)) {
 			lpfc_set_rrq_active(phba, ndlp,
-				ctxp->rqb_buffer->sglq->sli4_lxritag,
+				ctxp->ctxbuf->sglq->sli4_lxritag,
 				rxid, 1);
 			lpfc_sli4_abts_err_handler(phba, ndlp, axri);
 		}
@@ -895,8 +1106,8 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 				"6318 XB aborted %x flg x%x (%x)\n",
 				ctxp->oxid, ctxp->flag, released);
 		if (released)
-			lpfc_nvmet_rq_post(phba, ctxp,
-					   &ctxp->rqb_buffer->hbuf);
+			lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
+
 		if (rrq_empty)
 			lpfc_worker_wake_up(phba);
 		return;
@@ -924,7 +1135,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 	list_for_each_entry_safe(ctxp, next_ctxp,
 				 &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
 				 list) {
-		if (ctxp->rqb_buffer->sglq->sli4_xritag != xri)
+		if (ctxp->ctxbuf->sglq->sli4_xritag != xri)
 			continue;
 
 		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
@@ -976,6 +1187,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 		init_completion(&tgtp->tport_unreg_done);
 		nvmet_fc_unregister_targetport(phba->targetport);
 		wait_for_completion_timeout(&tgtp->tport_unreg_done, 5);
+		lpfc_nvmet_cleanup_io_context(phba);
 	}
 	phba->targetport = NULL;
 #endif
@@ -1011,6 +1223,7 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		oxid = 0;
 		size = 0;
 		sid = 0;
+		ctxp = NULL;
 		goto dropit;
 	}
 
@@ -1105,39 +1318,71 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	struct lpfc_nvmet_rcv_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;
+	struct lpfc_nvmet_ctxbuf *ctx_buf;
 	uint32_t *payload;
-	uint32_t size, oxid, sid, rc;
+	uint32_t size, oxid, sid, rc, qno;
+	unsigned long iflag;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	uint32_t id;
 #endif
 
+	ctx_buf = NULL;
 	if (!nvmebuf || !phba->targetport) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6157 FCP Drop IO\n");
+				"6157 NVMET FCP Drop IO\n");
 		oxid = 0;
 		size = 0;
 		sid = 0;
+		ctxp = NULL;
 		goto dropit;
 	}
 
+	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_lock, iflag);
+	if (phba->sli4_hba.nvmet_ctx_cnt) {
+		list_remove_head(&phba->sli4_hba.lpfc_nvmet_ctx_list,
+				 ctx_buf, struct lpfc_nvmet_ctxbuf, list);
+		phba->sli4_hba.nvmet_ctx_cnt--;
+	}
+	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_lock, iflag);
 
-	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	payload = (uint32_t *)(nvmebuf->dbuf.virt);
 	fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
-	size = nvmebuf->bytes_recv;
 	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
-	sid = sli4_sid_from_fc_hdr(fc_hdr);
+	size = nvmebuf->bytes_recv;
 
-	ctxp = (struct lpfc_nvmet_rcv_ctx *)nvmebuf->context;
-	if (ctxp == NULL) {
-		atomic_inc(&tgtp->rcv_fcp_cmd_drop);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6158 FCP Drop IO x%x: Alloc\n",
-				oxid);
-		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
-		/* Cannot send ABTS without context */
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) {
+		id = smp_processor_id();
+		if (id < LPFC_CHECK_CPU_CNT)
+			phba->cpucheck_rcv_io[id]++;
+	}
+#endif
+
+	lpfc_nvmeio_data(phba, "NVMET FCP  RCV: xri x%x sz %d CPU %02x\n",
+			 oxid, size, smp_processor_id());
+
+	if (!ctx_buf) {
+		/* Queue this NVME IO to process later */
+		spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
+		list_add_tail(&nvmebuf->hbuf.list,
+			      &phba->sli4_hba.lpfc_nvmet_io_wait_list);
+		phba->sli4_hba.nvmet_io_wait_cnt++;
+		phba->sli4_hba.nvmet_io_wait_total++;
+		spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock,
+				       iflag);
+
+		/* Post a brand new DMA buffer to RQ */
+		qno = nvmebuf->idx;
+		lpfc_post_rq_buffer(
+			phba, phba->sli4_hba.nvmet_mrq_hdr[qno],
+			phba->sli4_hba.nvmet_mrq_data[qno], 1, qno);
 		return;
 	}
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	payload = (uint32_t *)(nvmebuf->dbuf.virt);
+	sid = sli4_sid_from_fc_hdr(fc_hdr);
+
+	ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
 	memset(ctxp, 0, sizeof(ctxp->ctx));
 	ctxp->wqeq = NULL;
 	ctxp->txrdy = NULL;
@@ -1147,9 +1392,9 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	ctxp->oxid = oxid;
 	ctxp->sid = sid;
 	ctxp->state = LPFC_NVMET_STE_RCV;
-	ctxp->rqb_buffer = nvmebuf;
 	ctxp->entry_cnt = 1;
 	ctxp->flag = 0;
+	ctxp->ctxbuf = ctx_buf;
 	spin_lock_init(&ctxp->ctxlock);
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -1165,22 +1410,16 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 		ctxp->ts_isr_status = 0;
 		ctxp->ts_status_nvme = 0;
 	}
-
-	if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) {
-		id = smp_processor_id();
-		if (id < LPFC_CHECK_CPU_CNT)
-			phba->cpucheck_rcv_io[id]++;
-	}
 #endif
 
-	lpfc_nvmeio_data(phba, "NVMET FCP  RCV: xri x%x sz %d CPU %02x\n",
-			 oxid, size, smp_processor_id());
-
 	atomic_inc(&tgtp->rcv_fcp_cmd_in);
 	/*
 	 * The calling sequence should be:
 	 * nvmet_fc_rcv_fcp_req -> lpfc_nvmet_xmt_fcp_op/cmp -> req->done
 	 * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp.
+	 * When we return from nvmet_fc_rcv_fcp_req, all relevant info in
+	 * the NVME command / FC header is stored, so we are free to repost
+	 * the buffer.
 	 */
 	rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
 				  payload, size);
@@ -1188,26 +1427,32 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	/* Process FCP command */
 	if (rc == 0) {
 		atomic_inc(&tgtp->rcv_fcp_cmd_out);
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
 		return;
 	}
 
 	atomic_inc(&tgtp->rcv_fcp_cmd_drop);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-			"6159 FCP Drop IO x%x: err x%x\n",
-			ctxp->oxid, rc);
+			"6159 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
+			ctxp->oxid, rc,
+			atomic_read(&tgtp->rcv_fcp_cmd_in),
+			atomic_read(&tgtp->rcv_fcp_cmd_out),
+			atomic_read(&tgtp->xmt_fcp_release));
 dropit:
 	lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
 			 oxid, size, sid);
 	if (oxid) {
+		lpfc_nvmet_defer_release(phba, ctxp);
 		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
 		return;
 	}
 
-	if (nvmebuf) {
-		nvmebuf->iocbq->hba_wqidx = 0;
-		/* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
-		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
-	}
+	if (ctx_buf)
+		lpfc_nvmet_ctxbuf_post(phba, ctx_buf);
+
+	if (nvmebuf)
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
 #endif
 }
 
@@ -1259,7 +1504,7 @@ lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba,
 			   uint64_t isr_timestamp)
 {
 	if (phba->nvmet_support == 0) {
-		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf);
 		return;
 	}
 	lpfc_nvmet_unsol_fcp_buffer(phba, pring, nvmebuf,
@@ -1460,7 +1705,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
 	nvmewqe = ctxp->wqeq;
 	if (nvmewqe == NULL) {
 		/* Allocate buffer for  command wqe */
-		nvmewqe = ctxp->rqb_buffer->iocbq;
+		nvmewqe = ctxp->ctxbuf->iocbq;
 		if (nvmewqe == NULL) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 					"6110 lpfc_nvmet_prep_fcp_wqe: No "
@@ -1487,7 +1732,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
 		return NULL;
 	}
 
-	sgl  = (struct sli4_sge *)ctxp->rqb_buffer->sglq->sgl;
+	sgl  = (struct sli4_sge *)ctxp->ctxbuf->sglq->sgl;
 	switch (rsp->op) {
 	case NVMET_FCOP_READDATA:
 	case NVMET_FCOP_READDATA_RSP:
@@ -1812,7 +2057,8 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	result = wcqe->parameter;
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	atomic_inc(&tgtp->xmt_abort_cmpl);
+	if (ctxp->flag & LPFC_NVMET_ABORT_OP)
+		atomic_inc(&tgtp->xmt_fcp_abort_cmpl);
 
 	ctxp->state = LPFC_NVMET_STE_DONE;
 
@@ -1827,6 +2073,7 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	}
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+	atomic_inc(&tgtp->xmt_abort_rsp);
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
 			"6165 ABORT cmpl: xri x%x flg x%x (%d) "
@@ -1835,15 +2082,16 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			wcqe->word0, wcqe->total_data_placed,
 			result, wcqe->word3);
 
+	cmdwqe->context2 = NULL;
+	cmdwqe->context3 = NULL;
 	/*
 	 * if transport has released ctx, then can reuse it. Otherwise,
 	 * will be recycled by transport release call.
 	 */
 	if (released)
-		lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+		lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 
-	cmdwqe->context2 = NULL;
-	cmdwqe->context3 = NULL;
+	/* This is the iocbq for the abort, not the command */
 	lpfc_sli_release_iocbq(phba, cmdwqe);
 
 	/* Since iaab/iaar are NOT set, there is no work left.
@@ -1877,7 +2125,8 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	result = wcqe->parameter;
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	atomic_inc(&tgtp->xmt_abort_cmpl);
+	if (ctxp->flag & LPFC_NVMET_ABORT_OP)
+		atomic_inc(&tgtp->xmt_fcp_abort_cmpl);
 
 	if (!ctxp) {
 		/* if context is clear, related io alrady complete */
@@ -1907,6 +2156,7 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	}
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+	atomic_inc(&tgtp->xmt_abort_rsp);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 			"6316 ABTS cmpl xri x%x flg x%x (%x) "
@@ -1914,15 +2164,15 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			ctxp->oxid, ctxp->flag, released,
 			wcqe->word0, wcqe->total_data_placed,
 			result, wcqe->word3);
+
+	cmdwqe->context2 = NULL;
+	cmdwqe->context3 = NULL;
 	/*
 	 * if transport has released ctx, then can reuse it. Otherwise,
 	 * will be recycled by transport release call.
 	 */
 	if (released)
-		lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
-
-	cmdwqe->context2 = NULL;
-	cmdwqe->context3 = NULL;
+		lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 
 	/* Since iaab/iaar are NOT set, there is no work left.
 	 * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted
@@ -1953,7 +2203,7 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	result = wcqe->parameter;
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	atomic_inc(&tgtp->xmt_abort_cmpl);
+	atomic_inc(&tgtp->xmt_ls_abort_cmpl);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 			"6083 Abort cmpl: ctx %p WCQE: %08x %08x %08x %08x\n",
@@ -1984,10 +2234,6 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
 			sid, xri, ctxp->wqeq->sli4_xritag);
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	if (!ctxp->wqeq) {
-		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
-		ctxp->wqeq->hba_wqidx = 0;
-	}
 
 	ndlp = lpfc_findnode_did(phba->pport, sid);
 	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
@@ -2083,7 +2329,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	if (!ctxp->wqeq) {
-		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
+		ctxp->wqeq = ctxp->ctxbuf->iocbq;
 		ctxp->wqeq->hba_wqidx = 0;
 	}
 
@@ -2104,6 +2350,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	/* Issue ABTS for this WQE based on iotag */
 	ctxp->abort_wqeq = lpfc_sli_get_iocbq(phba);
 	if (!ctxp->abort_wqeq) {
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
 				"6161 ABORT failed: No wqeqs: "
 				"xri: x%x\n", ctxp->oxid);
@@ -2128,6 +2375,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	/* driver queued commands are in process of being flushed */
 	if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
 				"6163 Driver in reset cleanup - flushing "
 				"NVME Req now. hba_flag x%x oxid x%x\n",
@@ -2140,6 +2388,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	/* Outstanding abort is in progress */
 	if (abts_wqeq->iocb_flag & LPFC_DRIVER_ABORTED) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
 				"6164 Outstanding NVME I/O Abort Request "
 				"still pending on oxid x%x\n",
@@ -2190,9 +2439,12 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	abts_wqeq->context2 = ctxp;
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
-	if (rc == WQE_SUCCESS)
+	if (rc == WQE_SUCCESS) {
+		atomic_inc(&tgtp->xmt_abort_sol);
 		return 0;
+	}
 
+	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	lpfc_sli_release_iocbq(phba, abts_wqeq);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
@@ -2215,7 +2467,7 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	if (!ctxp->wqeq) {
-		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
+		ctxp->wqeq = ctxp->ctxbuf->iocbq;
 		ctxp->wqeq->hba_wqidx = 0;
 	}
 
@@ -2231,11 +2483,11 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 	if (rc == WQE_SUCCESS) {
-		atomic_inc(&tgtp->xmt_abort_rsp);
 		return 0;
 	}
 
 aerr:
+	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
@@ -2270,6 +2522,7 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
 	}
 	abts_wqeq = ctxp->wqeq;
 	wqe_abts = &abts_wqeq->wqe;
+
 	lpfc_nvmet_unsol_issue_abort(phba, ctxp, sid, xri);
 
 	spin_lock_irqsave(&phba->hbalock, flags);
@@ -2279,7 +2532,7 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 	if (rc == WQE_SUCCESS) {
-		atomic_inc(&tgtp->xmt_abort_rsp);
+		atomic_inc(&tgtp->xmt_abort_unsol);
 		return 0;
 	}
 
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index 128759fe6650..6eb2f5d8d4ed 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -22,6 +22,7 @@
  ********************************************************************/
 
 #define LPFC_NVMET_DEFAULT_SEGS		(64 + 1)	/* 256K IOs */
+#define LPFC_NVMET_RQE_DEF_COUNT	512
 #define LPFC_NVMET_SUCCESS_LEN	12
 
 /* Used for NVME Target */
@@ -34,6 +35,7 @@ struct lpfc_nvmet_tgtport {
 	atomic_t rcv_ls_req_out;
 	atomic_t rcv_ls_req_drop;
 	atomic_t xmt_ls_abort;
+	atomic_t xmt_ls_abort_cmpl;
 
 	/* Stats counters - lpfc_nvmet_xmt_ls_rsp */
 	atomic_t xmt_ls_rsp;
@@ -47,9 +49,9 @@ struct lpfc_nvmet_tgtport {
 	atomic_t rcv_fcp_cmd_in;
 	atomic_t rcv_fcp_cmd_out;
 	atomic_t rcv_fcp_cmd_drop;
+	atomic_t xmt_fcp_release;
 
 	/* Stats counters - lpfc_nvmet_xmt_fcp_op */
-	atomic_t xmt_fcp_abort;
 	atomic_t xmt_fcp_drop;
 	atomic_t xmt_fcp_read_rsp;
 	atomic_t xmt_fcp_read;
@@ -62,12 +64,13 @@ struct lpfc_nvmet_tgtport {
 	atomic_t xmt_fcp_rsp_drop;
 
 
-	/* Stats counters - lpfc_nvmet_unsol_issue_abort */
+	/* Stats counters - lpfc_nvmet_xmt_fcp_abort */
+	atomic_t xmt_fcp_abort;
+	atomic_t xmt_fcp_abort_cmpl;
+	atomic_t xmt_abort_sol;
+	atomic_t xmt_abort_unsol;
 	atomic_t xmt_abort_rsp;
 	atomic_t xmt_abort_rsp_error;
-
-	/* Stats counters - lpfc_nvmet_xmt_abort_cmp */
-	atomic_t xmt_abort_cmpl;
 };
 
 struct lpfc_nvmet_rcv_ctx {
@@ -103,6 +106,7 @@ struct lpfc_nvmet_rcv_ctx {
 #define LPFC_NVMET_CTX_RLS		0x8  /* ctx free requested */
 #define LPFC_NVMET_ABTS_RCV		0x10  /* ABTS received on exchange */
 	struct rqb_dmabuf *rqb_buffer;
+	struct lpfc_nvmet_ctxbuf *ctxbuf;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	uint64_t ts_isr_cmd;
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index cf19f4976f5f..d6b184839bc2 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -74,6 +74,8 @@ static struct lpfc_iocbq *lpfc_sli4_els_wcqe_to_rspiocbq(struct lpfc_hba *,
 							 struct lpfc_iocbq *);
 static void lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *,
 				      struct hbq_dmabuf *);
+static void lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport,
+					  struct hbq_dmabuf *dmabuf);
 static int lpfc_sli4_fp_handle_cqe(struct lpfc_hba *, struct lpfc_queue *,
 				    struct lpfc_cqe *);
 static int lpfc_sli4_post_sgl_list(struct lpfc_hba *, struct list_head *,
@@ -479,22 +481,23 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 	if (unlikely(!hq) || unlikely(!dq))
 		return -ENOMEM;
 	put_index = hq->host_index;
-	temp_hrqe = hq->qe[hq->host_index].rqe;
+	temp_hrqe = hq->qe[put_index].rqe;
 	temp_drqe = dq->qe[dq->host_index].rqe;
 
 	if (hq->type != LPFC_HRQ || dq->type != LPFC_DRQ)
 		return -EINVAL;
-	if (hq->host_index != dq->host_index)
+	if (put_index != dq->host_index)
 		return -EINVAL;
 	/* If the host has not yet processed the next entry then we are done */
-	if (((hq->host_index + 1) % hq->entry_count) == hq->hba_index)
+	if (((put_index + 1) % hq->entry_count) == hq->hba_index)
 		return -EBUSY;
 	lpfc_sli_pcimem_bcopy(hrqe, temp_hrqe, hq->entry_size);
 	lpfc_sli_pcimem_bcopy(drqe, temp_drqe, dq->entry_size);
 
 	/* Update the host index to point to the next slot */
-	hq->host_index = ((hq->host_index + 1) % hq->entry_count);
+	hq->host_index = ((put_index + 1) % hq->entry_count);
 	dq->host_index = ((dq->host_index + 1) % dq->entry_count);
+	hq->RQ_buf_posted++;
 
 	/* Ring The Header Receive Queue Doorbell */
 	if (!(hq->host_index % hq->entry_repost)) {
@@ -4204,13 +4207,16 @@ lpfc_sli_brdreset(struct lpfc_hba *phba)
 	/* Reset HBA */
 	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
 			"0325 Reset HBA Data: x%x x%x\n",
-			phba->pport->port_state, psli->sli_flag);
+			(phba->pport) ? phba->pport->port_state : 0,
+			psli->sli_flag);
 
 	/* perform board reset */
 	phba->fc_eventTag = 0;
 	phba->link_events = 0;
-	phba->pport->fc_myDID = 0;
-	phba->pport->fc_prevDID = 0;
+	if (phba->pport) {
+		phba->pport->fc_myDID = 0;
+		phba->pport->fc_prevDID = 0;
+	}
 
 	/* Turn off parity checking and serr during the physical reset */
 	pci_read_config_word(phba->pcidev, PCI_COMMAND, &cfg_value);
@@ -4336,7 +4342,8 @@ lpfc_sli_brdrestart_s3(struct lpfc_hba *phba)
 	/* Restart HBA */
 	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
 			"0337 Restart HBA Data: x%x x%x\n",
-			phba->pport->port_state, psli->sli_flag);
+			(phba->pport) ? phba->pport->port_state : 0,
+			psli->sli_flag);
 
 	word0 = 0;
 	mb = (MAILBOX_t *) &word0;
@@ -4350,7 +4357,7 @@ lpfc_sli_brdrestart_s3(struct lpfc_hba *phba)
 	readl(to_slim); /* flush */
 
 	/* Only skip post after fc_ffinit is completed */
-	if (phba->pport->port_state)
+	if (phba->pport && phba->pport->port_state)
 		word0 = 1;	/* This is really setting up word1 */
 	else
 		word0 = 0;	/* This is really setting up word1 */
@@ -4359,7 +4366,8 @@ lpfc_sli_brdrestart_s3(struct lpfc_hba *phba)
 	readl(to_slim); /* flush */
 
 	lpfc_sli_brdreset(phba);
-	phba->pport->stopped = 0;
+	if (phba->pport)
+		phba->pport->stopped = 0;
 	phba->link_state = LPFC_INIT_START;
 	phba->hba_flag = 0;
 	spin_unlock_irq(&phba->hbalock);
@@ -4446,7 +4454,7 @@ lpfc_sli_brdrestart(struct lpfc_hba *phba)
  * iteration, the function will restart the HBA again. The function returns
  * zero if HBA successfully restarted else returns negative error code.
  **/
-static int
+int
 lpfc_sli_chipset_init(struct lpfc_hba *phba)
 {
 	uint32_t status, i = 0;
@@ -5901,7 +5909,7 @@ lpfc_set_features(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox,
 		bf_set(lpfc_mbx_set_feature_mds,
 		       &mbox->u.mqe.un.set_feature, 1);
 		bf_set(lpfc_mbx_set_feature_mds_deep_loopbk,
-		       &mbox->u.mqe.un.set_feature, 0);
+		       &mbox->u.mqe.un.set_feature, 1);
 		mbox->u.mqe.un.set_feature.feature = LPFC_SET_MDS_DIAGS;
 		mbox->u.mqe.un.set_feature.param_len = 8;
 		break;
@@ -6507,6 +6515,50 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 		 (phba->hba_flag & HBA_FCOE_MODE) ? "FCoE" : "FC");
 }
 
+int
+lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
+		    struct lpfc_queue *drq, int count, int idx)
+{
+	int rc, i;
+	struct lpfc_rqe hrqe;
+	struct lpfc_rqe drqe;
+	struct lpfc_rqb *rqbp;
+	struct rqb_dmabuf *rqb_buffer;
+	LIST_HEAD(rqb_buf_list);
+
+	rqbp = hrq->rqbp;
+	for (i = 0; i < count; i++) {
+		/* IF RQ is already full, don't bother */
+		if (rqbp->buffer_count + i >= rqbp->entry_count - 1)
+			break;
+		rqb_buffer = rqbp->rqb_alloc_buffer(phba);
+		if (!rqb_buffer)
+			break;
+		rqb_buffer->hrq = hrq;
+		rqb_buffer->drq = drq;
+		rqb_buffer->idx = idx;
+		list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list);
+	}
+	while (!list_empty(&rqb_buf_list)) {
+		list_remove_head(&rqb_buf_list, rqb_buffer, struct rqb_dmabuf,
+				 hbuf.list);
+
+		hrqe.address_lo = putPaddrLow(rqb_buffer->hbuf.phys);
+		hrqe.address_hi = putPaddrHigh(rqb_buffer->hbuf.phys);
+		drqe.address_lo = putPaddrLow(rqb_buffer->dbuf.phys);
+		drqe.address_hi = putPaddrHigh(rqb_buffer->dbuf.phys);
+		rc = lpfc_sli4_rq_put(hrq, drq, &hrqe, &drqe);
+		if (rc < 0) {
+			rqbp->rqb_free_buffer(phba, rqb_buffer);
+		} else {
+			list_add_tail(&rqb_buffer->hbuf.list,
+				      &rqbp->rqb_buffer_list);
+			rqbp->buffer_count++;
+		}
+	}
+	return 1;
+}
+
 /**
  * lpfc_sli4_hba_setup - SLI4 device initialization PCI function
  * @phba: Pointer to HBA context object.
@@ -6519,7 +6571,7 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 int
 lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 {
-	int rc, i;
+	int rc, i, cnt;
 	LPFC_MBOXQ_t *mboxq;
 	struct lpfc_mqe *mqe;
 	uint8_t *vpd;
@@ -6870,6 +6922,21 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 			goto out_destroy_queue;
 		}
 		phba->sli4_hba.nvmet_xri_cnt = rc;
+
+		cnt = phba->cfg_iocb_cnt * 1024;
+		/* We need 1 iocbq for every SGL, for IO processing */
+		cnt += phba->sli4_hba.nvmet_xri_cnt;
+		/* Initialize and populate the iocb list per host */
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"2821 initialize iocb list %d total %d\n",
+				phba->cfg_iocb_cnt, cnt);
+		rc = lpfc_init_iocb_list(phba, cnt);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"1413 Failed to init iocb list.\n");
+			goto out_destroy_queue;
+		}
+
 		lpfc_nvmet_create_targetport(phba);
 	} else {
 		/* update host scsi xri-sgl sizes and mappings */
@@ -6889,28 +6956,34 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 					"and mapping: %d\n", rc);
 			goto out_destroy_queue;
 		}
+
+		cnt = phba->cfg_iocb_cnt * 1024;
+		/* Initialize and populate the iocb list per host */
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"2820 initialize iocb list %d total %d\n",
+				phba->cfg_iocb_cnt, cnt);
+		rc = lpfc_init_iocb_list(phba, cnt);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6301 Failed to init iocb list.\n");
+			goto out_destroy_queue;
+		}
 	}
 
 	if (phba->nvmet_support && phba->cfg_nvmet_mrq) {
-
 		/* Post initial buffers to all RQs created */
 		for (i = 0; i < phba->cfg_nvmet_mrq; i++) {
 			rqbp = phba->sli4_hba.nvmet_mrq_hdr[i]->rqbp;
 			INIT_LIST_HEAD(&rqbp->rqb_buffer_list);
 			rqbp->rqb_alloc_buffer = lpfc_sli4_nvmet_alloc;
 			rqbp->rqb_free_buffer = lpfc_sli4_nvmet_free;
-			rqbp->entry_count = 256;
+			rqbp->entry_count = LPFC_NVMET_RQE_DEF_COUNT;
 			rqbp->buffer_count = 0;
 
-			/* Divide by 4 and round down to multiple of 16 */
-			rc = (phba->cfg_nvmet_mrq_post >> 2) & 0xfff8;
-			phba->sli4_hba.nvmet_mrq_hdr[i]->entry_repost = rc;
-			phba->sli4_hba.nvmet_mrq_data[i]->entry_repost = rc;
-
 			lpfc_post_rq_buffer(
 				phba, phba->sli4_hba.nvmet_mrq_hdr[i],
 				phba->sli4_hba.nvmet_mrq_data[i],
-				phba->cfg_nvmet_mrq_post);
+				LPFC_NVMET_RQE_DEF_COUNT, i);
 		}
 	}
 
@@ -7077,6 +7150,7 @@ out_unset_queue:
 	/* Unset all the queues set up in this routine when error out */
 	lpfc_sli4_queue_unset(phba);
 out_destroy_queue:
+	lpfc_free_iocb_list(phba);
 	lpfc_sli4_queue_destroy(phba);
 out_stop_timers:
 	lpfc_stop_hba_timers(phba);
@@ -8616,8 +8690,11 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 		memset(wqe, 0, sizeof(union lpfc_wqe128));
 	/* Some of the fields are in the right position already */
 	memcpy(wqe, &iocbq->iocb, sizeof(union lpfc_wqe));
-	wqe->generic.wqe_com.word7 = 0; /* The ct field has moved so reset */
-	wqe->generic.wqe_com.word10 = 0;
+	if (iocbq->iocb.ulpCommand != CMD_SEND_FRAME) {
+		/* The ct field has moved so reset */
+		wqe->generic.wqe_com.word7 = 0;
+		wqe->generic.wqe_com.word10 = 0;
+	}
 
 	abort_tag = (uint32_t) iocbq->iotag;
 	xritag = iocbq->sli4_xritag;
@@ -9111,6 +9188,10 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 		}
 
 		break;
+	case CMD_SEND_FRAME:
+		bf_set(wqe_xri_tag, &wqe->generic.wqe_com, xritag);
+		bf_set(wqe_reqtag, &wqe->generic.wqe_com, iocbq->iotag);
+		return 0;
 	case CMD_XRI_ABORTED_CX:
 	case CMD_CREATE_XRI_CR: /* Do we expect to use this? */
 	case CMD_IOCB_FCP_IBIDIR64_CR: /* bidirectional xfer */
@@ -12783,6 +12864,7 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 	struct fc_frame_header *fc_hdr;
 	struct lpfc_queue *hrq = phba->sli4_hba.hdr_rq;
 	struct lpfc_queue *drq = phba->sli4_hba.dat_rq;
+	struct lpfc_nvmet_tgtport *tgtp;
 	struct hbq_dmabuf *dma_buf;
 	uint32_t status, rq_id;
 	unsigned long iflags;
@@ -12803,7 +12885,6 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 	case FC_STATUS_RQ_BUF_LEN_EXCEEDED:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"2537 Receive Frame Truncated!!\n");
-		hrq->RQ_buf_trunc++;
 	case FC_STATUS_RQ_SUCCESS:
 		lpfc_sli4_rq_release(hrq, drq);
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -12814,6 +12895,7 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 			goto out;
 		}
 		hrq->RQ_rcv_buf++;
+		hrq->RQ_buf_posted--;
 		memcpy(&dma_buf->cq_event.cqe.rcqe_cmpl, rcqe, sizeof(*rcqe));
 
 		/* If a NVME LS event (type 0x28), treat it as Fast path */
@@ -12827,8 +12909,21 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 		spin_unlock_irqrestore(&phba->hbalock, iflags);
 		workposted = true;
 		break;
-	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 	case FC_STATUS_INSUFF_BUF_FRM_DISC:
+		if (phba->nvmet_support) {
+			tgtp = phba->targetport->private;
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_NVME,
+					"6402 RQE Error x%x, posted %d err_cnt "
+					"%d: %x %x %x\n",
+					status, hrq->RQ_buf_posted,
+					hrq->RQ_no_posted_buf,
+					atomic_read(&tgtp->rcv_fcp_cmd_in),
+					atomic_read(&tgtp->rcv_fcp_cmd_out),
+					atomic_read(&tgtp->xmt_fcp_release));
+		}
+		/* fallthrough */
+
+	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 		hrq->RQ_no_posted_buf++;
 		/* Post more buffers if possible */
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -12946,7 +13041,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 		while ((cqe = lpfc_sli4_cq_get(cq))) {
 			workposted |= lpfc_sli4_sp_handle_mcqe(phba, cqe);
 			if (!(++ecount % cq->entry_repost))
-				lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+				break;
 			cq->CQ_mbox++;
 		}
 		break;
@@ -12960,7 +13055,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 				workposted |= lpfc_sli4_sp_handle_cqe(phba, cq,
 								      cqe);
 			if (!(++ecount % cq->entry_repost))
-				lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+				break;
 		}
 
 		/* Track the max number of CQEs processed in 1 EQ */
@@ -13130,6 +13225,7 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	struct lpfc_queue *drq;
 	struct rqb_dmabuf *dma_buf;
 	struct fc_frame_header *fc_hdr;
+	struct lpfc_nvmet_tgtport *tgtp;
 	uint32_t status, rq_id;
 	unsigned long iflags;
 	uint32_t fctl, idx;
@@ -13160,8 +13256,6 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	case FC_STATUS_RQ_BUF_LEN_EXCEEDED:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"6126 Receive Frame Truncated!!\n");
-		hrq->RQ_buf_trunc++;
-		break;
 	case FC_STATUS_RQ_SUCCESS:
 		lpfc_sli4_rq_release(hrq, drq);
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -13173,6 +13267,7 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 		}
 		spin_unlock_irqrestore(&phba->hbalock, iflags);
 		hrq->RQ_rcv_buf++;
+		hrq->RQ_buf_posted--;
 		fc_hdr = (struct fc_frame_header *)dma_buf->hbuf.virt;
 
 		/* Just some basic sanity checks on FCP Command frame */
@@ -13195,14 +13290,23 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 drop:
 		lpfc_in_buf_free(phba, &dma_buf->dbuf);
 		break;
-	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 	case FC_STATUS_INSUFF_BUF_FRM_DISC:
+		if (phba->nvmet_support) {
+			tgtp = phba->targetport->private;
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_NVME,
+					"6401 RQE Error x%x, posted %d err_cnt "
+					"%d: %x %x %x\n",
+					status, hrq->RQ_buf_posted,
+					hrq->RQ_no_posted_buf,
+					atomic_read(&tgtp->rcv_fcp_cmd_in),
+					atomic_read(&tgtp->rcv_fcp_cmd_out),
+					atomic_read(&tgtp->xmt_fcp_release));
+		}
+		/* fallthrough */
+
+	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 		hrq->RQ_no_posted_buf++;
 		/* Post more buffers if possible */
-		spin_lock_irqsave(&phba->hbalock, iflags);
-		phba->hba_flag |= HBA_POST_RECEIVE_BUFFER;
-		spin_unlock_irqrestore(&phba->hbalock, iflags);
-		workposted = true;
 		break;
 	}
 out:
@@ -13356,7 +13460,7 @@ process_cq:
 	while ((cqe = lpfc_sli4_cq_get(cq))) {
 		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
 		if (!(++ecount % cq->entry_repost))
-			lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+			break;
 	}
 
 	/* Track the max number of CQEs processed in 1 EQ */
@@ -13447,7 +13551,7 @@ lpfc_sli4_fof_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe)
 	while ((cqe = lpfc_sli4_cq_get(cq))) {
 		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
 		if (!(++ecount % cq->entry_repost))
-			lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+			break;
 	}
 
 	/* Track the max number of CQEs processed in 1 EQ */
@@ -13529,7 +13633,7 @@ lpfc_sli4_fof_intr_handler(int irq, void *dev_id)
 	while ((eqe = lpfc_sli4_eq_get(eq))) {
 		lpfc_sli4_fof_handle_eqe(phba, eqe);
 		if (!(++ecount % eq->entry_repost))
-			lpfc_sli4_eq_release(eq, LPFC_QUEUE_NOARM);
+			break;
 		eq->EQ_processed++;
 	}
 
@@ -13646,7 +13750,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 
 		lpfc_sli4_hba_handle_eqe(phba, eqe, hba_eqidx);
 		if (!(++ecount % fpeq->entry_repost))
-			lpfc_sli4_eq_release(fpeq, LPFC_QUEUE_NOARM);
+			break;
 		fpeq->EQ_processed++;
 	}
 
@@ -13827,17 +13931,10 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t entry_size,
 	}
 	queue->entry_size = entry_size;
 	queue->entry_count = entry_count;
-
-	/*
-	 * entry_repost is calculated based on the number of entries in the
-	 * queue. This works out except for RQs. If buffers are NOT initially
-	 * posted for every RQE, entry_repost should be adjusted accordingly.
-	 */
-	queue->entry_repost = (entry_count >> 3);
-	if (queue->entry_repost < LPFC_QUEUE_MIN_REPOST)
-		queue->entry_repost = LPFC_QUEUE_MIN_REPOST;
 	queue->phba = phba;
 
+	/* entry_repost will be set during q creation */
+
 	return queue;
 out_fail:
 	lpfc_sli4_queue_free(queue);
@@ -14068,6 +14165,7 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax)
 		status = -ENXIO;
 	eq->host_index = 0;
 	eq->hba_index = 0;
+	eq->entry_repost = LPFC_EQ_REPOST;
 
 	mempool_free(mbox, phba->mbox_mem_pool);
 	return status;
@@ -14141,9 +14239,9 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0361 Unsupported CQ count: "
-				"entry cnt %d sz %d pg cnt %d repost %d\n",
+				"entry cnt %d sz %d pg cnt %d\n",
 				cq->entry_count, cq->entry_size,
-				cq->page_count, cq->entry_repost);
+				cq->page_count);
 		if (cq->entry_count < 256) {
 			status = -EINVAL;
 			goto out;
@@ -14196,6 +14294,7 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	cq->assoc_qid = eq->queue_id;
 	cq->host_index = 0;
 	cq->hba_index = 0;
+	cq->entry_repost = LPFC_CQ_REPOST;
 
 out:
 	mempool_free(mbox, phba->mbox_mem_pool);
@@ -14387,6 +14486,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 		cq->assoc_qid = eq->queue_id;
 		cq->host_index = 0;
 		cq->hba_index = 0;
+		cq->entry_repost = LPFC_CQ_REPOST;
 
 		rc = 0;
 		list_for_each_entry(dmabuf, &cq->page_list, list) {
@@ -14635,6 +14735,7 @@ lpfc_mq_create(struct lpfc_hba *phba, struct lpfc_queue *mq,
 	mq->subtype = subtype;
 	mq->host_index = 0;
 	mq->hba_index = 0;
+	mq->entry_repost = LPFC_MQ_REPOST;
 
 	/* link the mq onto the parent cq child list */
 	list_add_tail(&mq->list, &cq->child_list);
@@ -14860,34 +14961,6 @@ out:
 }
 
 /**
- * lpfc_rq_adjust_repost - Adjust entry_repost for an RQ
- * @phba: HBA structure that indicates port to create a queue on.
- * @rq:   The queue structure to use for the receive queue.
- * @qno:  The associated HBQ number
- *
- *
- * For SLI4 we need to adjust the RQ repost value based on
- * the number of buffers that are initially posted to the RQ.
- */
-void
-lpfc_rq_adjust_repost(struct lpfc_hba *phba, struct lpfc_queue *rq, int qno)
-{
-	uint32_t cnt;
-
-	/* sanity check on queue memory */
-	if (!rq)
-		return;
-	cnt = lpfc_hbq_defs[qno]->entry_count;
-
-	/* Recalc repost for RQs based on buffers initially posted */
-	cnt = (cnt >> 3);
-	if (cnt < LPFC_QUEUE_MIN_REPOST)
-		cnt = LPFC_QUEUE_MIN_REPOST;
-
-	rq->entry_repost = cnt;
-}
-
-/**
  * lpfc_rq_create - Create a Receive Queue on the HBA
  * @phba: HBA structure that indicates port to create a queue on.
  * @hrq: The queue structure to use to create the header receive queue.
@@ -15072,6 +15145,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	hrq->subtype = subtype;
 	hrq->host_index = 0;
 	hrq->hba_index = 0;
+	hrq->entry_repost = LPFC_RQ_REPOST;
 
 	/* now create the data queue */
 	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
@@ -15082,7 +15156,12 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	if (phba->sli4_hba.pc_sli4_params.rqv == LPFC_Q_CREATE_VERSION_1) {
 		bf_set(lpfc_rq_context_rqe_count_1,
 		       &rq_create->u.request.context, hrq->entry_count);
-		rq_create->u.request.context.buffer_size = LPFC_DATA_BUF_SIZE;
+		if (subtype == LPFC_NVMET)
+			rq_create->u.request.context.buffer_size =
+				LPFC_NVMET_DATA_BUF_SIZE;
+		else
+			rq_create->u.request.context.buffer_size =
+				LPFC_DATA_BUF_SIZE;
 		bf_set(lpfc_rq_context_rqe_size, &rq_create->u.request.context,
 		       LPFC_RQE_SIZE_8);
 		bf_set(lpfc_rq_context_page_size, &rq_create->u.request.context,
@@ -15119,8 +15198,14 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 			       LPFC_RQ_RING_SIZE_4096);
 			break;
 		}
-		bf_set(lpfc_rq_context_buf_size, &rq_create->u.request.context,
-		       LPFC_DATA_BUF_SIZE);
+		if (subtype == LPFC_NVMET)
+			bf_set(lpfc_rq_context_buf_size,
+			       &rq_create->u.request.context,
+			       LPFC_NVMET_DATA_BUF_SIZE);
+		else
+			bf_set(lpfc_rq_context_buf_size,
+			       &rq_create->u.request.context,
+			       LPFC_DATA_BUF_SIZE);
 	}
 	bf_set(lpfc_rq_context_cq_id, &rq_create->u.request.context,
 	       cq->queue_id);
@@ -15153,6 +15238,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	drq->subtype = subtype;
 	drq->host_index = 0;
 	drq->hba_index = 0;
+	drq->entry_repost = LPFC_RQ_REPOST;
 
 	/* link the header and data RQs onto the parent cq child list */
 	list_add_tail(&hrq->list, &cq->child_list);
@@ -15265,7 +15351,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 			       cq->queue_id);
 			bf_set(lpfc_rq_context_data_size,
 			       &rq_create->u.request.context,
-			       LPFC_DATA_BUF_SIZE);
+			       LPFC_NVMET_DATA_BUF_SIZE);
 			bf_set(lpfc_rq_context_hdr_size,
 			       &rq_create->u.request.context,
 			       LPFC_HDR_BUF_SIZE);
@@ -15310,6 +15396,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		hrq->subtype = subtype;
 		hrq->host_index = 0;
 		hrq->hba_index = 0;
+		hrq->entry_repost = LPFC_RQ_REPOST;
 
 		drq->db_format = LPFC_DB_RING_FORMAT;
 		drq->db_regaddr = phba->sli4_hba.RQDBregaddr;
@@ -15318,6 +15405,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		drq->subtype = subtype;
 		drq->host_index = 0;
 		drq->hba_index = 0;
+		drq->entry_repost = LPFC_RQ_REPOST;
 
 		list_add_tail(&hrq->list, &cq->child_list);
 		list_add_tail(&drq->list, &cq->child_list);
@@ -16058,6 +16146,8 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	struct fc_vft_header *fc_vft_hdr;
 	uint32_t *header = (uint32_t *) fc_hdr;
 
+#define FC_RCTL_MDS_DIAGS	0xF4
+
 	switch (fc_hdr->fh_r_ctl) {
 	case FC_RCTL_DD_UNCAT:		/* uncategorized information */
 	case FC_RCTL_DD_SOL_DATA:	/* solicited data */
@@ -16085,6 +16175,7 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	case FC_RCTL_F_BSY:	/* fabric busy to data frame */
 	case FC_RCTL_F_BSYL:	/* fabric busy to link control frame */
 	case FC_RCTL_LCR:	/* link credit reset */
+	case FC_RCTL_MDS_DIAGS: /* MDS Diagnostics */
 	case FC_RCTL_END:	/* end */
 		break;
 	case FC_RCTL_VFTH:	/* Virtual Fabric tagging Header */
@@ -16094,12 +16185,16 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	default:
 		goto drop;
 	}
+
+#define FC_TYPE_VENDOR_UNIQUE	0xFF
+
 	switch (fc_hdr->fh_type) {
 	case FC_TYPE_BLS:
 	case FC_TYPE_ELS:
 	case FC_TYPE_FCP:
 	case FC_TYPE_CT:
 	case FC_TYPE_NVME:
+	case FC_TYPE_VENDOR_UNIQUE:
 		break;
 	case FC_TYPE_IP:
 	case FC_TYPE_ILS:
@@ -16110,12 +16205,14 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
 			"2538 Received frame rctl:%s (x%x), type:%s (x%x), "
 			"frame Data:%08x %08x %08x %08x %08x %08x %08x\n",
+			(fc_hdr->fh_r_ctl == FC_RCTL_MDS_DIAGS) ? "MDS Diags" :
 			lpfc_rctl_names[fc_hdr->fh_r_ctl], fc_hdr->fh_r_ctl,
-			lpfc_type_names[fc_hdr->fh_type], fc_hdr->fh_type,
-			be32_to_cpu(header[0]), be32_to_cpu(header[1]),
-			be32_to_cpu(header[2]), be32_to_cpu(header[3]),
-			be32_to_cpu(header[4]), be32_to_cpu(header[5]),
-			be32_to_cpu(header[6]));
+			(fc_hdr->fh_type == FC_TYPE_VENDOR_UNIQUE) ?
+			"Vendor Unique" : lpfc_type_names[fc_hdr->fh_type],
+			fc_hdr->fh_type, be32_to_cpu(header[0]),
+			be32_to_cpu(header[1]), be32_to_cpu(header[2]),
+			be32_to_cpu(header[3]), be32_to_cpu(header[4]),
+			be32_to_cpu(header[5]), be32_to_cpu(header[6]));
 	return 0;
 drop:
 	lpfc_printf_log(phba, KERN_WARNING, LOG_ELS,
@@ -16921,6 +17018,96 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport,
 	lpfc_sli_release_iocbq(phba, iocbq);
 }
 
+static void
+lpfc_sli4_mds_loopback_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+			    struct lpfc_iocbq *rspiocb)
+{
+	struct lpfc_dmabuf *pcmd = cmdiocb->context2;
+
+	if (pcmd && pcmd->virt)
+		pci_pool_free(phba->lpfc_drb_pool, pcmd->virt, pcmd->phys);
+	kfree(pcmd);
+	lpfc_sli_release_iocbq(phba, cmdiocb);
+}
+
+static void
+lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport,
+			      struct hbq_dmabuf *dmabuf)
+{
+	struct fc_frame_header *fc_hdr;
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_iocbq *iocbq = NULL;
+	union  lpfc_wqe *wqe;
+	struct lpfc_dmabuf *pcmd = NULL;
+	uint32_t frame_len;
+	int rc;
+
+	fc_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt;
+	frame_len = bf_get(lpfc_rcqe_length, &dmabuf->cq_event.cqe.rcqe_cmpl);
+
+	/* Send the received frame back */
+	iocbq = lpfc_sli_get_iocbq(phba);
+	if (!iocbq)
+		goto exit;
+
+	/* Allocate buffer for command payload */
+	pcmd = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (pcmd)
+		pcmd->virt = pci_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL,
+					    &pcmd->phys);
+	if (!pcmd || !pcmd->virt)
+		goto exit;
+
+	INIT_LIST_HEAD(&pcmd->list);
+
+	/* copyin the payload */
+	memcpy(pcmd->virt, dmabuf->dbuf.virt, frame_len);
+
+	/* fill in BDE's for command */
+	iocbq->iocb.un.xseq64.bdl.addrHigh = putPaddrHigh(pcmd->phys);
+	iocbq->iocb.un.xseq64.bdl.addrLow = putPaddrLow(pcmd->phys);
+	iocbq->iocb.un.xseq64.bdl.bdeFlags = BUFF_TYPE_BDE_64;
+	iocbq->iocb.un.xseq64.bdl.bdeSize = frame_len;
+
+	iocbq->context2 = pcmd;
+	iocbq->vport = vport;
+	iocbq->iocb_flag &= ~LPFC_FIP_ELS_ID_MASK;
+	iocbq->iocb_flag |= LPFC_USE_FCPWQIDX;
+
+	/*
+	 * Setup rest of the iocb as though it were a WQE
+	 * Build the SEND_FRAME WQE
+	 */
+	wqe = (union lpfc_wqe *)&iocbq->iocb;
+
+	wqe->send_frame.frame_len = frame_len;
+	wqe->send_frame.fc_hdr_wd0 = be32_to_cpu(*((uint32_t *)fc_hdr));
+	wqe->send_frame.fc_hdr_wd1 = be32_to_cpu(*((uint32_t *)fc_hdr + 1));
+	wqe->send_frame.fc_hdr_wd2 = be32_to_cpu(*((uint32_t *)fc_hdr + 2));
+	wqe->send_frame.fc_hdr_wd3 = be32_to_cpu(*((uint32_t *)fc_hdr + 3));
+	wqe->send_frame.fc_hdr_wd4 = be32_to_cpu(*((uint32_t *)fc_hdr + 4));
+	wqe->send_frame.fc_hdr_wd5 = be32_to_cpu(*((uint32_t *)fc_hdr + 5));
+
+	iocbq->iocb.ulpCommand = CMD_SEND_FRAME;
+	iocbq->iocb.ulpLe = 1;
+	iocbq->iocb_cmpl = lpfc_sli4_mds_loopback_cmpl;
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, iocbq, 0);
+	if (rc == IOCB_ERROR)
+		goto exit;
+
+	lpfc_in_buf_free(phba, &dmabuf->dbuf);
+	return;
+
+exit:
+	lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+			"2023 Unable to process MDS loopback frame\n");
+	if (pcmd && pcmd->virt)
+		pci_pool_free(phba->lpfc_drb_pool, pcmd->virt, pcmd->phys);
+	kfree(pcmd);
+	lpfc_sli_release_iocbq(phba, iocbq);
+	lpfc_in_buf_free(phba, &dmabuf->dbuf);
+}
+
 /**
  * lpfc_sli4_handle_received_buffer - Handle received buffers from firmware
  * @phba: Pointer to HBA context object.
@@ -16959,6 +17146,13 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba,
 		fcfi = bf_get(lpfc_rcqe_fcf_id,
 			      &dmabuf->cq_event.cqe.rcqe_cmpl);
 
+	if (fc_hdr->fh_r_ctl == 0xF4 && fc_hdr->fh_type == 0xFF) {
+		vport = phba->pport;
+		/* Handle MDS Loopback frames */
+		lpfc_sli4_handle_mds_loopback(vport, dmabuf);
+		return;
+	}
+
 	/* d_id this frame is directed to */
 	did = sli4_did_from_fc_hdr(fc_hdr);
 
@@ -17132,6 +17326,14 @@ lpfc_sli4_post_rpi_hdr(struct lpfc_hba *phba, struct lpfc_rpi_hdr *rpi_page)
 				"status x%x add_status x%x, mbx status x%x\n",
 				shdr_status, shdr_add_status, rc);
 		rc = -ENXIO;
+	} else {
+		/*
+		 * The next_rpi stores the next logical module-64 rpi value used
+		 * to post physical rpis in subsequent rpi postings.
+		 */
+		spin_lock_irq(&phba->hbalock);
+		phba->sli4_hba.next_rpi = rpi_page->next_rpi;
+		spin_unlock_irq(&phba->hbalock);
 	}
 	return rc;
 }
@@ -18712,7 +18914,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 
 		spin_lock_irqsave(&pring->ring_lock, iflags);
 		ctxp = pwqe->context2;
-		sglq = ctxp->rqb_buffer->sglq;
+		sglq = ctxp->ctxbuf->sglq;
 		if (pwqe->sli4_xritag ==  NO_XRI) {
 			pwqe->sli4_lxritag = sglq->sli4_lxritag;
 			pwqe->sli4_xritag = sglq->sli4_xritag;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index da46471337c8..cf863db27700 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -24,7 +24,6 @@
 #define LPFC_XRI_EXCH_BUSY_WAIT_TMO		10000
 #define LPFC_XRI_EXCH_BUSY_WAIT_T1   		10
 #define LPFC_XRI_EXCH_BUSY_WAIT_T2              30000
-#define LPFC_RELEASE_NOTIFICATION_INTERVAL	32
 #define LPFC_RPI_LOW_WATER_MARK			10
 
 #define LPFC_UNREG_FCF                          1
@@ -155,7 +154,11 @@ struct lpfc_queue {
 	uint32_t entry_count;	/* Number of entries to support on the queue */
 	uint32_t entry_size;	/* Size of each queue entry. */
 	uint32_t entry_repost;	/* Count of entries before doorbell is rung */
-#define LPFC_QUEUE_MIN_REPOST	8
+#define LPFC_EQ_REPOST		8
+#define LPFC_MQ_REPOST		8
+#define LPFC_CQ_REPOST		64
+#define LPFC_RQ_REPOST		64
+#define LPFC_RELEASE_NOTIFICATION_INTERVAL	32  /* For WQs */
 	uint32_t queue_id;	/* Queue ID assigned by the hardware */
 	uint32_t assoc_qid;     /* Queue ID associated with, for CQ/WQ/MQ */
 	uint32_t page_count;	/* Number of pages allocated for this queue */
@@ -195,7 +198,7 @@ struct lpfc_queue {
 /* defines for RQ stats */
 #define	RQ_no_posted_buf	q_cnt_1
 #define	RQ_no_buf_found		q_cnt_2
-#define	RQ_buf_trunc		q_cnt_3
+#define	RQ_buf_posted		q_cnt_3
 #define	RQ_rcv_buf		q_cnt_4
 
 	uint64_t isr_timestamp;
@@ -617,12 +620,17 @@ struct lpfc_sli4_hba {
 	uint16_t scsi_xri_start;
 	uint16_t els_xri_cnt;
 	uint16_t nvmet_xri_cnt;
+	uint16_t nvmet_ctx_cnt;
+	uint16_t nvmet_io_wait_cnt;
+	uint16_t nvmet_io_wait_total;
 	struct list_head lpfc_els_sgl_list;
 	struct list_head lpfc_abts_els_sgl_list;
 	struct list_head lpfc_nvmet_sgl_list;
 	struct list_head lpfc_abts_nvmet_ctx_list;
 	struct list_head lpfc_abts_scsi_buf_list;
 	struct list_head lpfc_abts_nvme_buf_list;
+	struct list_head lpfc_nvmet_ctx_list;
+	struct list_head lpfc_nvmet_io_wait_list;
 	struct lpfc_sglq **lpfc_sglq_active_list;
 	struct list_head lpfc_rpi_hdr_list;
 	unsigned long *rpi_bmask;
@@ -654,6 +662,7 @@ struct lpfc_sli4_hba {
 	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
 	spinlock_t sgl_list_lock; /* list of aborted els IOs */
 	spinlock_t nvmet_io_lock;
+	spinlock_t nvmet_io_wait_lock; /* IOs waiting for ctx resources */
 	uint32_t physical_port;
 
 	/* CPU to vector mapping information */
@@ -661,8 +670,6 @@ struct lpfc_sli4_hba {
 	uint16_t num_online_cpu;
 	uint16_t num_present_cpu;
 	uint16_t curr_disp_cpu;
-
-	uint16_t nvmet_mrq_post_idx;
 };
 
 enum lpfc_sge_type {
@@ -698,6 +705,7 @@ struct lpfc_rpi_hdr {
 	struct lpfc_dmabuf *dmabuf;
 	uint32_t page_count;
 	uint32_t start_rpi;
+	uint16_t next_rpi;
 };
 
 struct lpfc_rsrc_blks {
@@ -762,7 +770,6 @@ int lpfc_rq_create(struct lpfc_hba *, struct lpfc_queue *,
 int lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 			struct lpfc_queue **drqp, struct lpfc_queue **cqp,
 			uint32_t subtype);
-void lpfc_rq_adjust_repost(struct lpfc_hba *, struct lpfc_queue *, int);
 int lpfc_eq_destroy(struct lpfc_hba *, struct lpfc_queue *);
 int lpfc_cq_destroy(struct lpfc_hba *, struct lpfc_queue *);
 int lpfc_mq_destroy(struct lpfc_hba *, struct lpfc_queue *);
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 1c26dc67151b..c2653244221c 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "11.2.0.12"
+#define LPFC_DRIVER_VERSION "11.2.0.14"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 8a1b94816419..a4f28b7e4c65 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -446,7 +446,7 @@ static void _put_request(struct request *rq)
 	 *       code paths.
 	 */
 	if (unlikely(rq->bio))
-		blk_end_request(rq, -ENOMEM, blk_rq_bytes(rq));
+		blk_end_request(rq, BLK_STS_IOERR, blk_rq_bytes(rq));
 	else
 		blk_put_request(rq);
 }
@@ -474,10 +474,10 @@ void osd_end_request(struct osd_request *or)
 EXPORT_SYMBOL(osd_end_request);
 
 static void _set_error_resid(struct osd_request *or, struct request *req,
-			     int error)
+			     blk_status_t error)
 {
 	or->async_error = error;
-	or->req_errors = scsi_req(req)->result ? : error;
+	or->req_errors = scsi_req(req)->result;
 	or->sense_len = scsi_req(req)->sense_len;
 	if (or->sense_len)
 		memcpy(or->sense, scsi_req(req)->sense, or->sense_len);
@@ -489,17 +489,19 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
 
 int osd_execute_request(struct osd_request *or)
 {
-	int error;
-
 	blk_execute_rq(or->request->q, NULL, or->request, 0);
-	error = scsi_req(or->request)->result ? -EIO : 0;
 
-	_set_error_resid(or, or->request, error);
-	return error;
+	if (scsi_req(or->request)->result) {
+		_set_error_resid(or, or->request, BLK_STS_IOERR);
+		return -EIO;
+	}
+
+	_set_error_resid(or, or->request, BLK_STS_OK);
+	return 0;
 }
 EXPORT_SYMBOL(osd_execute_request);
 
-static void osd_request_async_done(struct request *req, int error)
+static void osd_request_async_done(struct request *req, blk_status_t error)
 {
 	struct osd_request *or = req->end_io_data;
 
@@ -1572,13 +1574,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
 			flags);
 	if (IS_ERR(req))
 		return req;
-	scsi_req_init(req);
 
 	for_each_bio(bio) {
-		struct bio *bounce_bio = bio;
-
-		blk_queue_bounce(req->q, &bounce_bio);
-		ret = blk_rq_append_bio(req, bounce_bio);
+		ret = blk_rq_append_bio(req, bio);
 		if (ret)
 			return ERR_PTR(ret);
 	}
@@ -1617,7 +1615,6 @@ static int _init_blk_request(struct osd_request *or,
 				ret = PTR_ERR(req);
 				goto out;
 			}
-			scsi_req_init(req);
 			or->in.req = or->request->next_rq = req;
 		}
 	} else if (has_in)
@@ -1914,7 +1911,7 @@ analyze:
 		/* scsi sense is Empty, the request was never issued to target
 		 * linux return code might tell us what happened.
 		 */
-		if (or->async_error == -ENOMEM)
+		if (or->async_error == BLK_STS_RESOURCE)
 			osi->osd_err_pri = OSD_ERR_PRI_RESOURCE;
 		else
 			osi->osd_err_pri = OSD_ERR_PRI_UNREACHABLE;
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 67cbed92f07d..929ee7e88120 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -320,7 +320,7 @@ static int osst_chk_result(struct osst_tape * STp, struct osst_request * SRpnt)
 
 
 /* Wakeup from interrupt */
-static void osst_end_async(struct request *req, int update)
+static void osst_end_async(struct request *req, blk_status_t status)
 {
 	struct scsi_request *rq = scsi_req(req);
 	struct osst_request *SRpnt = req->end_io_data;
@@ -373,7 +373,6 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
 		return DRIVER_ERROR << 24;
 
 	rq = scsi_req(req);
-	scsi_req_init(req);
 	req->rq_flags |= RQF_QUIET;
 
 	SRpnt->bio = NULL;
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index a4aadf5f4dc6..1cc814f1505a 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -3770,9 +3770,6 @@ static long pmcraid_ioctl_passthrough(
 			pmcraid_err("couldn't build passthrough ioadls\n");
 			goto out_free_cmd;
 		}
-	} else if (request_size < 0) {
-		rc = -EINVAL;
-		goto out_free_cmd;
 	}
 
 	/* If data is being written into the device, copy the data from user
diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h
index 40aeb6bb96a2..07ee88200e91 100644
--- a/drivers/scsi/qedf/qedf.h
+++ b/drivers/scsi/qedf/qedf.h
@@ -259,7 +259,7 @@ struct qedf_io_log {
 	uint16_t task_id;
 	uint32_t port_id; /* Remote port fabric ID */
 	int lun;
-	char op; /* SCSI CDB */
+	unsigned char op; /* SCSI CDB */
 	uint8_t lba[4];
 	unsigned int bufflen; /* SCSI buffer length */
 	unsigned int sg_count; /* Number of SG elements */
diff --git a/drivers/scsi/qedf/qedf_els.c b/drivers/scsi/qedf/qedf_els.c
index c505d41f6dc8..90627033bde6 100644
--- a/drivers/scsi/qedf/qedf_els.c
+++ b/drivers/scsi/qedf/qedf_els.c
@@ -109,7 +109,7 @@ retry_els:
 	did = fcport->rdata->ids.port_id;
 	sid = fcport->sid;
 
-	__fc_fill_fc_hdr(fc_hdr, FC_RCTL_ELS_REQ, sid, did,
+	__fc_fill_fc_hdr(fc_hdr, FC_RCTL_ELS_REQ, did, sid,
 			   FC_TYPE_ELS, FC_FC_FIRST_SEQ | FC_FC_END_SEQ |
 			   FC_FC_SEQ_INIT, 0);
 
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index cceddd995a4b..a5c97342fd5d 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2895,7 +2895,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
 	slowpath_params.drv_minor = QEDF_DRIVER_MINOR_VER;
 	slowpath_params.drv_rev = QEDF_DRIVER_REV_VER;
 	slowpath_params.drv_eng = QEDF_DRIVER_ENG_VER;
-	memcpy(slowpath_params.name, "qedf", QED_DRV_VER_STR_SIZE);
+	strncpy(slowpath_params.name, "qedf", QED_DRV_VER_STR_SIZE);
 	rc = qed_ops->common->slowpath_start(qedf->cdev, &slowpath_params);
 	if (rc) {
 		QEDF_ERR(&(qedf->dbg_ctx), "Cannot start slowpath.\n");
diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h
index 5ca3e8c28a3f..32632c9b2276 100644
--- a/drivers/scsi/qedi/qedi.h
+++ b/drivers/scsi/qedi/qedi.h
@@ -38,7 +38,7 @@ struct qedi_endpoint;
 #define QEDI_MAX_ISCSI_TASK		4096
 #define QEDI_MAX_TASK_NUM		0x0FFF
 #define QEDI_MAX_ISCSI_CONNS_PER_HBA	1024
-#define QEDI_ISCSI_MAX_BDS_PER_CMD	256	/* Firmware max BDs is 256 */
+#define QEDI_ISCSI_MAX_BDS_PER_CMD	255	/* Firmware max BDs is 255 */
 #define MAX_OUSTANDING_TASKS_PER_CON	1024
 
 #define QEDI_MAX_BD_LEN		0xffff
@@ -63,6 +63,7 @@ struct qedi_endpoint;
 #define QEDI_PAGE_MASK		(~((QEDI_PAGE_SIZE) - 1))
 
 #define QEDI_PAGE_SIZE		4096
+#define QEDI_HW_DMA_BOUNDARY	0xfff
 #define QEDI_PATH_HANDLE	0xFE0000000UL
 
 struct qedi_uio_ctrl {
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index d6978cbc56f0..507512cc478b 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -870,7 +870,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
 		QEDI_ERR(&qedi->dbg_ctx,
 			 "Delayed or untracked cleanup response, itt=0x%x, tid=0x%x, cid=0x%x, task=%p\n",
 			 protoitt, cqe->itid, qedi_conn->iscsi_conn_id, task);
-		WARN_ON(1);
 	}
 }
 
@@ -1494,6 +1493,8 @@ static int qedi_send_iscsi_tmf(struct qedi_conn *qedi_conn,
 	tmf_hdr = (struct iscsi_tm *)mtask->hdr;
 	qedi_cmd = (struct qedi_cmd *)mtask->dd_data;
 	ep = qedi_conn->ep;
+	if (!ep)
+		return -ENODEV;
 
 	tid = qedi_get_task_idx(qedi);
 	if (tid == -1)
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
index 3548d46f9b27..87f0af358b33 100644
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c
@@ -59,6 +59,7 @@ struct scsi_host_template qedi_host_template = {
 	.this_id = -1,
 	.sg_tablesize = QEDI_ISCSI_MAX_BDS_PER_CMD,
 	.max_sectors = 0xffff,
+	.dma_boundary = QEDI_HW_DMA_BOUNDARY,
 	.cmd_per_lun = 128,
 	.use_clustering = ENABLE_CLUSTERING,
 	.shost_attrs = qedi_shost_attrs,
@@ -1223,8 +1224,12 @@ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data)
 
 	iscsi_cid = (u32)path_data->handle;
 	qedi_ep = qedi->ep_tbl[iscsi_cid];
-	QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_CONN,
+	QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
 		  "iscsi_cid=0x%x, qedi_ep=%p\n", iscsi_cid, qedi_ep);
+	if (!qedi_ep) {
+		ret = -EINVAL;
+		goto set_path_exit;
+	}
 
 	if (!is_valid_ether_addr(&path_data->mac_addr[0])) {
 		QEDI_NOTICE(&qedi->dbg_ctx, "dst mac NOT VALID\n");
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 92775a8b74b1..879d3b7462f9 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -151,6 +151,11 @@ static int qedi_uio_close(struct uio_info *uinfo, struct inode *inode)
 
 static void __qedi_free_uio_rings(struct qedi_uio_dev *udev)
 {
+	if (udev->uctrl) {
+		free_page((unsigned long)udev->uctrl);
+		udev->uctrl = NULL;
+	}
+
 	if (udev->ll2_ring) {
 		free_page((unsigned long)udev->ll2_ring);
 		udev->ll2_ring = NULL;
@@ -169,7 +174,6 @@ static void __qedi_free_uio(struct qedi_uio_dev *udev)
 	__qedi_free_uio_rings(udev);
 
 	pci_dev_put(udev->pdev);
-	kfree(udev->uctrl);
 	kfree(udev);
 }
 
@@ -208,6 +212,11 @@ static int __qedi_alloc_uio_rings(struct qedi_uio_dev *udev)
 	if (udev->ll2_ring || udev->ll2_buf)
 		return rc;
 
+	/* Memory for control area.  */
+	udev->uctrl = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!udev->uctrl)
+		return -ENOMEM;
+
 	/* Allocating memory for LL2 ring  */
 	udev->ll2_ring_size = QEDI_PAGE_SIZE;
 	udev->ll2_ring = (void *)get_zeroed_page(GFP_KERNEL | __GFP_COMP);
@@ -237,7 +246,6 @@ exit_alloc_ring:
 static int qedi_alloc_uio_rings(struct qedi_ctx *qedi)
 {
 	struct qedi_uio_dev *udev = NULL;
-	struct qedi_uio_ctrl *uctrl = NULL;
 	int rc = 0;
 
 	list_for_each_entry(udev, &qedi_udev_list, list) {
@@ -258,21 +266,14 @@ static int qedi_alloc_uio_rings(struct qedi_ctx *qedi)
 		goto err_udev;
 	}
 
-	uctrl = kzalloc(sizeof(*uctrl), GFP_KERNEL);
-	if (!uctrl) {
-		rc = -ENOMEM;
-		goto err_uctrl;
-	}
-
 	udev->uio_dev = -1;
 
 	udev->qedi = qedi;
 	udev->pdev = qedi->pdev;
-	udev->uctrl = uctrl;
 
 	rc = __qedi_alloc_uio_rings(udev);
 	if (rc)
-		goto err_uio_rings;
+		goto err_uctrl;
 
 	list_add(&udev->list, &qedi_udev_list);
 
@@ -283,8 +284,6 @@ static int qedi_alloc_uio_rings(struct qedi_ctx *qedi)
 	udev->rx_pkt = udev->ll2_buf + LL2_SINGLE_BUF_SIZE;
 	return 0;
 
- err_uio_rings:
-	kfree(uctrl);
  err_uctrl:
 	kfree(udev);
  err_udev:
@@ -828,6 +827,8 @@ static int qedi_set_iscsi_pf_param(struct qedi_ctx *qedi)
 	qedi->pf_params.iscsi_pf_params.num_uhq_pages_in_ring = num_sq_pages;
 	qedi->pf_params.iscsi_pf_params.num_queues = qedi->num_queues;
 	qedi->pf_params.iscsi_pf_params.debug_mode = qedi_fw_debug;
+	qedi->pf_params.iscsi_pf_params.two_msl_timer = 4000;
+	qedi->pf_params.iscsi_pf_params.max_fin_rt = 2;
 
 	for (log_page_size = 0 ; log_page_size < 32 ; log_page_size++) {
 		if ((1 << log_page_size) == PAGE_SIZE)
@@ -1498,11 +1499,9 @@ err_idx:
 
 void qedi_clear_task_idx(struct qedi_ctx *qedi, int idx)
 {
-	if (!test_and_clear_bit(idx, qedi->task_idx_map)) {
+	if (!test_and_clear_bit(idx, qedi->task_idx_map))
 		QEDI_ERR(&qedi->dbg_ctx,
 			 "FW task context, already cleared, tid=0x%x\n", idx);
-		WARN_ON(1);
-	}
 }
 
 void qedi_update_itt_map(struct qedi_ctx *qedi, u32 tid, u32 proto_itt,
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index 16d1cd50feed..ca3420de5a01 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -730,6 +730,8 @@ qla2x00_process_loopback(struct bsg_job *bsg_job)
 		return -EIO;
 	}
 
+	memset(&elreq, 0, sizeof(elreq));
+
 	elreq.req_sg_cnt = dma_map_sg(&ha->pdev->dev,
 		bsg_job->request_payload.sg_list, bsg_job->request_payload.sg_cnt,
 		DMA_TO_DEVICE);
@@ -795,10 +797,9 @@ qla2x00_process_loopback(struct bsg_job *bsg_job)
 
 	if (atomic_read(&vha->loop_state) == LOOP_READY &&
 	    (ha->current_topology == ISP_CFG_F ||
-	    ((IS_QLA81XX(ha) || IS_QLA8031(ha) || IS_QLA8044(ha)) &&
-	    le32_to_cpu(*(uint32_t *)req_data) == ELS_OPCODE_BYTE
-	    && req_data_len == MAX_ELS_FRAME_PAYLOAD)) &&
-		elreq.options == EXTERNAL_LOOPBACK) {
+	    (le32_to_cpu(*(uint32_t *)req_data) == ELS_OPCODE_BYTE &&
+	     req_data_len == MAX_ELS_FRAME_PAYLOAD)) &&
+	    elreq.options == EXTERNAL_LOOPBACK) {
 		type = "FC_BSG_HST_VENDOR_ECHO_DIAG";
 		ql_dbg(ql_dbg_user, vha, 0x701e,
 		    "BSG request type: %s.\n", type);
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 51b4179469d1..88748a6ab73f 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -1131,7 +1131,7 @@ qla24xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked)
 
 	/* Mailbox registers. */
 	mbx_reg = &reg->mailbox0;
-	for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++, dmp_reg++)
+	for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++, mbx_reg++)
 		fw->mailbox_reg[cnt] = htons(RD_REG_WORD(mbx_reg));
 
 	/* Transfer sequence registers. */
@@ -2090,7 +2090,7 @@ qla83xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked)
 
 	/* Mailbox registers. */
 	mbx_reg = &reg->mailbox0;
-	for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++, dmp_reg++)
+	for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++, mbx_reg++)
 		fw->mailbox_reg[cnt] = htons(RD_REG_WORD(mbx_reg));
 
 	/* Transfer sequence registers. */
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index ae119018dfaa..eddbc1218a39 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3425,6 +3425,7 @@ struct qla_hw_data {
 	uint8_t 	max_req_queues;
 	uint8_t 	max_rsp_queues;
 	uint8_t		max_qpairs;
+	uint8_t		num_qpairs;
 	struct qla_qpair *base_qpair;
 	struct qla_npiv_entry *npiv_info;
 	uint16_t	nvram_npiv_size;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 034743309ada..0391fc317003 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -7543,12 +7543,13 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos, int v
 		/* Assign available que pair id */
 		mutex_lock(&ha->mq_lock);
 		qpair_id = find_first_zero_bit(ha->qpair_qid_map, ha->max_qpairs);
-		if (qpair_id >= ha->max_qpairs) {
+		if (ha->num_qpairs >= ha->max_qpairs) {
 			mutex_unlock(&ha->mq_lock);
 			ql_log(ql_log_warn, vha, 0x0183,
 			    "No resources to create additional q pair.\n");
 			goto fail_qid_map;
 		}
+		ha->num_qpairs++;
 		set_bit(qpair_id, ha->qpair_qid_map);
 		ha->queue_pair_map[qpair_id] = qpair;
 		qpair->id = qpair_id;
@@ -7635,6 +7636,7 @@ fail_rsp:
 fail_msix:
 	ha->queue_pair_map[qpair_id] = NULL;
 	clear_bit(qpair_id, ha->qpair_qid_map);
+	ha->num_qpairs--;
 	mutex_unlock(&ha->mq_lock);
 fail_qid_map:
 	kfree(qpair);
@@ -7660,6 +7662,7 @@ int qla2xxx_delete_qpair(struct scsi_qla_host *vha, struct qla_qpair *qpair)
 	mutex_lock(&ha->mq_lock);
 	ha->queue_pair_map[qpair->id] = NULL;
 	clear_bit(qpair->id, ha->qpair_qid_map);
+	ha->num_qpairs--;
 	list_del(&qpair->qp_list_elem);
 	if (list_empty(&vha->qp_list))
 		vha->flags.qpairs_available = 0;
diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index 66df6cec59da..c61a6a871c8e 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -129,28 +129,16 @@ qla2x00_clear_loop_id(fc_port_t *fcport) {
 }
 
 static inline void
-qla2x00_clean_dsd_pool(struct qla_hw_data *ha, srb_t *sp,
-	struct qla_tgt_cmd *tc)
+qla2x00_clean_dsd_pool(struct qla_hw_data *ha, struct crc_context *ctx)
 {
-	struct dsd_dma *dsd_ptr, *tdsd_ptr;
-	struct crc_context *ctx;
-
-	if (sp)
-		ctx = (struct crc_context *)GET_CMD_CTX_SP(sp);
-	else if (tc)
-		ctx = (struct crc_context *)tc->ctx;
-	else {
-		BUG();
-		return;
-	}
+	struct dsd_dma *dsd, *tdsd;
 
 	/* clean up allocated prev pool */
-	list_for_each_entry_safe(dsd_ptr, tdsd_ptr,
-	    &ctx->dsd_list, list) {
-		dma_pool_free(ha->dl_dma_pool, dsd_ptr->dsd_addr,
-		    dsd_ptr->dsd_list_dma);
-		list_del(&dsd_ptr->list);
-		kfree(dsd_ptr);
+	list_for_each_entry_safe(dsd, tdsd, &ctx->dsd_list, list) {
+		dma_pool_free(ha->dl_dma_pool, dsd->dsd_addr,
+		    dsd->dsd_list_dma);
+		list_del(&dsd->list);
+		kfree(dsd);
 	}
 	INIT_LIST_HEAD(&ctx->dsd_list);
 }
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index aac03504d9a3..2572121b765b 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -3282,7 +3282,7 @@ msix_register_fail:
 	}
 
 	/* Enable MSI-X vector for response queue update for queue 0 */
-	if (IS_QLA83XX(ha) || IS_QLA27XX(ha)) {
+	if (IS_QLA25XX(ha) || IS_QLA83XX(ha) || IS_QLA27XX(ha)) {
 		if (ha->msixbase && ha->mqiobase &&
 		    (ha->max_rsp_queues > 1 || ha->max_req_queues > 1 ||
 		     ql2xmqsupport))
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index a113ab3592a7..cba1fc5e8be9 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -3676,15 +3676,6 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 				qlt_update_host_map(vha, id);
 			}
 
-			fc_host_port_name(vha->host) =
-			    wwn_to_u64(vha->port_name);
-
-			if (qla_ini_mode_enabled(vha))
-				ql_dbg(ql_dbg_mbx, vha, 0x1018,
-				    "FA-WWN portname %016llx (%x)\n",
-				    fc_host_port_name(vha->host),
-				    rptid_entry->vp_status);
-
 			set_bit(REGISTER_FC4_NEEDED, &vha->dpc_flags);
 			set_bit(REGISTER_FDMI_NEEDED, &vha->dpc_flags);
 		} else {
@@ -4821,9 +4812,9 @@ qla2x00_echo_test(scsi_qla_host_t *vha, struct msg_echo_lb *mreq,
 
 	memset(mcp->mb, 0 , sizeof(mcp->mb));
 	mcp->mb[0] = MBC_DIAGNOSTIC_ECHO;
-	mcp->mb[1] = mreq->options | BIT_6;	/* BIT_6 specifies 64bit address */
+	/* BIT_6 specifies 64bit address */
+	mcp->mb[1] = mreq->options | BIT_15 | BIT_6;
 	if (IS_CNA_CAPABLE(ha)) {
-		mcp->mb[1] |= BIT_15;
 		mcp->mb[2] = vha->fcoe_fcf_idx;
 	}
 	mcp->mb[16] = LSW(mreq->rcv_dma);
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 1c7957903283..79f050256c55 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -630,29 +630,34 @@ qla2x00_sp_free_dma(void *ptr)
 		sp->flags &= ~SRB_CRC_PROT_DMA_VALID;
 	}
 
+	if (!ctx)
+		goto end;
+
 	if (sp->flags & SRB_CRC_CTX_DSD_VALID) {
 		/* List assured to be having elements */
-		qla2x00_clean_dsd_pool(ha, sp, NULL);
+		qla2x00_clean_dsd_pool(ha, ctx);
 		sp->flags &= ~SRB_CRC_CTX_DSD_VALID;
 	}
 
 	if (sp->flags & SRB_CRC_CTX_DMA_VALID) {
-		dma_pool_free(ha->dl_dma_pool, ctx,
-		    ((struct crc_context *)ctx)->crc_ctx_dma);
+		struct crc_context *ctx0 = ctx;
+
+		dma_pool_free(ha->dl_dma_pool, ctx0, ctx0->crc_ctx_dma);
 		sp->flags &= ~SRB_CRC_CTX_DMA_VALID;
 	}
 
 	if (sp->flags & SRB_FCP_CMND_DMA_VALID) {
-		struct ct6_dsd *ctx1 = (struct ct6_dsd *)ctx;
+		struct ct6_dsd *ctx1 = ctx;
 
 		dma_pool_free(ha->fcp_cmnd_dma_pool, ctx1->fcp_cmnd,
-			ctx1->fcp_cmnd_dma);
+		    ctx1->fcp_cmnd_dma);
 		list_splice(&ctx1->dsd_list, &ha->gbl_dsd_list);
 		ha->gbl_dsd_inuse -= ctx1->dsd_use_cnt;
 		ha->gbl_dsd_avail += ctx1->dsd_use_cnt;
 		mempool_free(ctx1, ha->ctx_mempool);
 	}
 
+end:
 	CMD_SP(cmd) = NULL;
 	qla2x00_rel_sp(sp);
 }
@@ -699,21 +704,24 @@ qla2xxx_qpair_sp_free_dma(void *ptr)
 		sp->flags &= ~SRB_CRC_PROT_DMA_VALID;
 	}
 
+	if (!ctx)
+		goto end;
+
 	if (sp->flags & SRB_CRC_CTX_DSD_VALID) {
 		/* List assured to be having elements */
-		qla2x00_clean_dsd_pool(ha, sp, NULL);
+		qla2x00_clean_dsd_pool(ha, ctx);
 		sp->flags &= ~SRB_CRC_CTX_DSD_VALID;
 	}
 
 	if (sp->flags & SRB_CRC_CTX_DMA_VALID) {
-		dma_pool_free(ha->dl_dma_pool, ctx,
-		    ((struct crc_context *)ctx)->crc_ctx_dma);
+		struct crc_context *ctx0 = ctx;
+
+		dma_pool_free(ha->dl_dma_pool, ctx, ctx0->crc_ctx_dma);
 		sp->flags &= ~SRB_CRC_CTX_DMA_VALID;
 	}
 
 	if (sp->flags & SRB_FCP_CMND_DMA_VALID) {
-		struct ct6_dsd *ctx1 = (struct ct6_dsd *)ctx;
-
+		struct ct6_dsd *ctx1 = ctx;
 		dma_pool_free(ha->fcp_cmnd_dma_pool, ctx1->fcp_cmnd,
 		    ctx1->fcp_cmnd_dma);
 		list_splice(&ctx1->dsd_list, &ha->gbl_dsd_list);
@@ -721,7 +729,7 @@ qla2xxx_qpair_sp_free_dma(void *ptr)
 		ha->gbl_dsd_avail += ctx1->dsd_use_cnt;
 		mempool_free(ctx1, ha->ctx_mempool);
 	}
-
+end:
 	CMD_SP(cmd) = NULL;
 	qla2xxx_rel_qpair_sp(sp->qpair, sp);
 }
@@ -1632,7 +1640,7 @@ qla2x00_loop_reset(scsi_qla_host_t *vha)
 void
 qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
 {
-	int que, cnt;
+	int que, cnt, status;
 	unsigned long flags;
 	srb_t *sp;
 	struct qla_hw_data *ha = vha->hw;
@@ -1662,8 +1670,12 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
 					 */
 					sp_get(sp);
 					spin_unlock_irqrestore(&ha->hardware_lock, flags);
-					qla2xxx_eh_abort(GET_CMD_SP(sp));
+					status = qla2xxx_eh_abort(GET_CMD_SP(sp));
 					spin_lock_irqsave(&ha->hardware_lock, flags);
+					/* Get rid of extra reference if immediate exit
+					 * from ql2xxx_eh_abort */
+					if (status == FAILED && (qla2x00_isp_reg_stat(ha)))
+						atomic_dec(&sp->ref_count);
 				}
 				req->outstanding_cmds[cnt] = NULL;
 				sp->done(sp, res);
@@ -2623,10 +2635,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	if (mem_only) {
 		if (pci_enable_device_mem(pdev))
-			goto probe_out;
+			return ret;
 	} else {
 		if (pci_enable_device(pdev))
-			goto probe_out;
+			return ret;
 	}
 
 	/* This may fail but that's ok */
@@ -2636,7 +2648,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (!ha) {
 		ql_log_pci(ql_log_fatal, pdev, 0x0009,
 		    "Unable to allocate memory for ha.\n");
-		goto probe_out;
+		goto disable_device;
 	}
 	ql_dbg_pci(ql_dbg_init, pdev, 0x000a,
 	    "Memory allocated for ha=%p.\n", ha);
@@ -3254,7 +3266,7 @@ iospace_config_failed:
 	pci_release_selected_regions(ha->pdev, ha->bars);
 	kfree(ha);
 
-probe_out:
+disable_device:
 	pci_disable_device(pdev);
 	return ret;
 }
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 0e03ca2ab3e5..e766d8412384 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -2245,11 +2245,13 @@ static void qlt_unmap_sg(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd)
 		pci_unmap_sg(ha->pdev, cmd->prot_sg, cmd->prot_sg_cnt,
 			cmd->dma_data_direction);
 
+	if (!cmd->ctx)
+		return;
+
 	if (cmd->ctx_dsd_alloced)
-		qla2x00_clean_dsd_pool(ha, NULL, cmd);
+		qla2x00_clean_dsd_pool(ha, cmd->ctx);
 
-	if (cmd->ctx)
-		dma_pool_free(ha->dl_dma_pool, cmd->ctx, cmd->ctx->crc_ctx_dma);
+	dma_pool_free(ha->dl_dma_pool, cmd->ctx, cmd->ctx->crc_ctx_dma);
 }
 
 static int qlt_check_reserve_free_req(struct scsi_qla_host *vha,
diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c
index 8a58ef3adab4..c197972a3e2d 100644
--- a/drivers/scsi/qla2xxx/qla_tmpl.c
+++ b/drivers/scsi/qla2xxx/qla_tmpl.c
@@ -371,7 +371,7 @@ qla27xx_fwdt_entry_t262(struct scsi_qla_host *vha,
 		goto done;
 	}
 
-	if (end <= start || start == 0 || end == 0) {
+	if (end < start || start == 0 || end == 0) {
 		ql_dbg(ql_dbg_misc, vha, 0xd023,
 		    "%s: unusable range (start=%x end=%x)\n", __func__,
 		    ent->t262.end_addr, ent->t262.start_addr);
diff --git a/drivers/scsi/qlogicfas.c b/drivers/scsi/qlogicfas.c
index 61cac87fb86f..840823b99e51 100644
--- a/drivers/scsi/qlogicfas.c
+++ b/drivers/scsi/qlogicfas.c
@@ -137,8 +137,8 @@ err:
 static struct qlogicfas408_priv *cards;
 static int iobase[MAX_QLOGICFAS];
 static int irq[MAX_QLOGICFAS] = { [0 ... MAX_QLOGICFAS-1] = -1 };
-module_param_array(iobase, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(iobase, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(iobase, "I/O address");
 MODULE_PARM_DESC(irq, "IRQ");
 
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 7bfbcfa7af40..61cdd99ae41e 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -763,6 +763,8 @@ struct scsi_device *__scsi_device_lookup(struct Scsi_Host *shost,
 	struct scsi_device *sdev;
 
 	list_for_each_entry(sdev, &shost->__devices, siblings) {
+		if (sdev->sdev_state == SDEV_DEL)
+			continue;
 		if (sdev->channel == channel && sdev->id == id &&
 				sdev->lun ==lun)
 			return sdev;
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 17249c3650fe..3be980d47268 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -245,7 +245,7 @@ struct sdebug_dev_info {
 	unsigned int channel;
 	unsigned int target;
 	u64 lun;
-	uuid_be lu_name;
+	uuid_t lu_name;
 	struct sdebug_host_info *sdbg_host;
 	unsigned long uas_bm[1];
 	atomic_t num_in_q;
@@ -965,7 +965,7 @@ static const u64 naa3_comp_c = 0x3111111000000000ULL;
 static int inquiry_vpd_83(unsigned char *arr, int port_group_id,
 			  int target_dev_id, int dev_id_num,
 			  const char *dev_id_str, int dev_id_str_len,
-			  const uuid_be *lu_name)
+			  const uuid_t *lu_name)
 {
 	int num, port_a;
 	char b[32];
@@ -1404,7 +1404,7 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
 	arr[4] = SDEBUG_LONG_INQ_SZ - 5;
 	arr[5] = (int)have_dif_prot;	/* PROTECT bit */
 	if (sdebug_vpd_use_hostno == 0)
-		arr[5] = 0x10; /* claim: implicit TGPS */
+		arr[5] |= 0x10; /* claim: implicit TPGS */
 	arr[6] = 0x10; /* claim: MultiP */
 	/* arr[6] |= 0x40; ... claim: EncServ (enclosure services) */
 	arr[7] = 0xa; /* claim: LINKED + CMDQUE */
@@ -3568,7 +3568,7 @@ static void sdebug_q_cmd_wq_complete(struct work_struct *work)
 }
 
 static bool got_shared_uuid;
-static uuid_be shared_uuid;
+static uuid_t shared_uuid;
 
 static struct sdebug_dev_info *sdebug_device_create(
 			struct sdebug_host_info *sdbg_host, gfp_t flags)
@@ -3578,12 +3578,12 @@ static struct sdebug_dev_info *sdebug_device_create(
 	devip = kzalloc(sizeof(*devip), flags);
 	if (devip) {
 		if (sdebug_uuid_ctl == 1)
-			uuid_be_gen(&devip->lu_name);
+			uuid_gen(&devip->lu_name);
 		else if (sdebug_uuid_ctl == 2) {
 			if (got_shared_uuid)
 				devip->lu_name = shared_uuid;
 			else {
-				uuid_be_gen(&shared_uuid);
+				uuid_gen(&shared_uuid);
 				got_shared_uuid = true;
 				devip->lu_name = shared_uuid;
 			}
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index ecc07dab893d..304a7158540f 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1874,7 +1874,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
 	}
 }
 
-static void eh_lock_door_done(struct request *req, int uptodate)
+static void eh_lock_door_done(struct request *req, blk_status_t status)
 {
 	__blk_put_request(req->q, req);
 }
@@ -1903,7 +1903,6 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 	if (IS_ERR(req))
 		return;
 	rq = scsi_req(req);
-	scsi_req_init(req);
 
 	rq->cmd[0] = ALLOW_MEDIUM_REMOVAL;
 	rq->cmd[1] = 0;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 814a4bd8405d..550e29f903b7 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -30,6 +30,7 @@
 #include <scsi/scsi_driver.h>
 #include <scsi/scsi_eh.h>
 #include <scsi/scsi_host.h>
+#include <scsi/scsi_transport.h> /* __scsi_init_queue() */
 #include <scsi/scsi_dh.h>
 
 #include <trace/events/scsi.h>
@@ -249,7 +250,6 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	if (IS_ERR(req))
 		return ret;
 	rq = scsi_req(req);
-	scsi_req_init(req);
 
 	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
 					buffer, bufflen, __GFP_RECLAIM))
@@ -634,7 +634,7 @@ static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
 	cmd->request->next_rq->special = NULL;
 }
 
-static bool scsi_end_request(struct request *req, int error,
+static bool scsi_end_request(struct request *req, blk_status_t error,
 		unsigned int bytes, unsigned int bidi_bytes)
 {
 	struct scsi_cmnd *cmd = req->special;
@@ -693,45 +693,28 @@ static bool scsi_end_request(struct request *req, int error,
  * @cmd:	SCSI command (unused)
  * @result:	scsi error code
  *
- * Translate SCSI error code into standard UNIX errno.
- * Return values:
- * -ENOLINK	temporary transport failure
- * -EREMOTEIO	permanent target failure, do not retry
- * -EBADE	permanent nexus failure, retry on other path
- * -ENOSPC	No write space available
- * -ENODATA	Medium error
- * -EIO		unspecified I/O error
+ * Translate SCSI error code into block errors.
  */
-static int __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result)
+static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd,
+		int result)
 {
-	int error = 0;
-
-	switch(host_byte(result)) {
+	switch (host_byte(result)) {
 	case DID_TRANSPORT_FAILFAST:
-		error = -ENOLINK;
-		break;
+		return BLK_STS_TRANSPORT;
 	case DID_TARGET_FAILURE:
 		set_host_byte(cmd, DID_OK);
-		error = -EREMOTEIO;
-		break;
+		return BLK_STS_TARGET;
 	case DID_NEXUS_FAILURE:
-		set_host_byte(cmd, DID_OK);
-		error = -EBADE;
-		break;
+		return BLK_STS_NEXUS;
 	case DID_ALLOC_FAILURE:
 		set_host_byte(cmd, DID_OK);
-		error = -ENOSPC;
-		break;
+		return BLK_STS_NOSPC;
 	case DID_MEDIUM_ERROR:
 		set_host_byte(cmd, DID_OK);
-		error = -ENODATA;
-		break;
+		return BLK_STS_MEDIUM;
 	default:
-		error = -EIO;
-		break;
+		return BLK_STS_IOERR;
 	}
-
-	return error;
 }
 
 /*
@@ -768,7 +751,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	int result = cmd->result;
 	struct request_queue *q = cmd->device->request_queue;
 	struct request *req = cmd->request;
-	int error = 0;
+	blk_status_t error = BLK_STS_OK;
 	struct scsi_sense_hdr sshdr;
 	bool sense_valid = false;
 	int sense_deferred = 0, level = 0;
@@ -807,7 +790,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			 * both sides at once.
 			 */
 			scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
-			if (scsi_end_request(req, 0, blk_rq_bytes(req),
+			if (scsi_end_request(req, BLK_STS_OK, blk_rq_bytes(req),
 					blk_rq_bytes(req->next_rq)))
 				BUG();
 			return;
@@ -849,7 +832,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			scsi_print_sense(cmd);
 		result = 0;
 		/* for passthrough error may be set */
-		error = 0;
+		error = BLK_STS_OK;
 	}
 
 	/*
@@ -921,18 +904,18 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 				action = ACTION_REPREP;
 			} else if (sshdr.asc == 0x10) /* DIX */ {
 				action = ACTION_FAIL;
-				error = -EILSEQ;
+				error = BLK_STS_PROTECTION;
 			/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
 			} else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
 				action = ACTION_FAIL;
-				error = -EREMOTEIO;
+				error = BLK_STS_TARGET;
 			} else
 				action = ACTION_FAIL;
 			break;
 		case ABORTED_COMMAND:
 			action = ACTION_FAIL;
 			if (sshdr.asc == 0x10) /* DIF */
-				error = -EILSEQ;
+				error = BLK_STS_PROTECTION;
 			break;
 		case NOT_READY:
 			/* If the device is in the process of becoming
@@ -1133,6 +1116,20 @@ err_exit:
 }
 EXPORT_SYMBOL(scsi_init_io);
 
+/**
+ * scsi_initialize_rq - initialize struct scsi_cmnd.req
+ *
+ * Called from inside blk_get_request().
+ */
+void scsi_initialize_rq(struct request *rq)
+{
+	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+
+	scsi_req_init(&cmd->req);
+}
+EXPORT_SYMBOL(scsi_initialize_rq);
+
+/* Called after a request has been started. */
 void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
 {
 	void *buf = cmd->sense_buffer;
@@ -1828,15 +1825,15 @@ out_delay:
 		blk_delay_queue(q, SCSI_QUEUE_DELAY);
 }
 
-static inline int prep_to_mq(int ret)
+static inline blk_status_t prep_to_mq(int ret)
 {
 	switch (ret) {
 	case BLKPREP_OK:
-		return BLK_MQ_RQ_QUEUE_OK;
+		return BLK_STS_OK;
 	case BLKPREP_DEFER:
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 	default:
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -1850,7 +1847,7 @@ static int scsi_mq_prep_fn(struct request *req)
 
 	/* zero out the cmd, except for the embedded scsi_request */
 	memset((char *)cmd + sizeof(cmd->req), 0,
-		sizeof(*cmd) - sizeof(cmd->req));
+		sizeof(*cmd) - sizeof(cmd->req) + shost->hostt->cmd_size);
 
 	req->special = cmd;
 
@@ -1908,7 +1905,7 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
 	blk_mq_complete_request(cmd->request);
 }
 
-static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct request *req = bd->rq;
@@ -1916,14 +1913,14 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct scsi_device *sdev = q->queuedata;
 	struct Scsi_Host *shost = sdev->host;
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
-	int ret;
+	blk_status_t ret;
 	int reason;
 
 	ret = prep_to_mq(scsi_prep_state_check(sdev, req));
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret != BLK_STS_OK)
 		goto out;
 
-	ret = BLK_MQ_RQ_QUEUE_BUSY;
+	ret = BLK_STS_RESOURCE;
 	if (!get_device(&sdev->sdev_gendev))
 		goto out;
 
@@ -1936,7 +1933,7 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (!(req->rq_flags & RQF_DONTPREP)) {
 		ret = prep_to_mq(scsi_mq_prep_fn(req));
-		if (ret != BLK_MQ_RQ_QUEUE_OK)
+		if (ret != BLK_STS_OK)
 			goto out_dec_host_busy;
 		req->rq_flags |= RQF_DONTPREP;
 	} else {
@@ -1954,11 +1951,11 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 	reason = scsi_dispatch_cmd(cmd);
 	if (reason) {
 		scsi_set_blocked(cmd, reason);
-		ret = BLK_MQ_RQ_QUEUE_BUSY;
+		ret = BLK_STS_RESOURCE;
 		goto out_dec_host_busy;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 
 out_dec_host_busy:
 	atomic_dec(&shost->host_busy);
@@ -1971,12 +1968,14 @@ out_put_device:
 	put_device(&sdev->sdev_gendev);
 out:
 	switch (ret) {
-	case BLK_MQ_RQ_QUEUE_BUSY:
+	case BLK_STS_OK:
+		break;
+	case BLK_STS_RESOURCE:
 		if (atomic_read(&sdev->device_busy) == 0 &&
 		    !scsi_device_blocked(sdev))
 			blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
 		break;
-	case BLK_MQ_RQ_QUEUE_ERROR:
+	default:
 		/*
 		 * Make sure to release all allocated ressources when
 		 * we hit an error, as we will never see this command
@@ -1985,8 +1984,6 @@ out:
 		if (req->rq_flags & RQF_DONTPREP)
 			scsi_mq_uninit_cmd(cmd);
 		break;
-	default:
-		break;
 	}
 	return ret;
 }
@@ -2056,6 +2053,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 {
 	struct device *dev = shost->dma_dev;
 
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+
 	/*
 	 * this limit is imposed by hardware restrictions
 	 */
@@ -2138,6 +2137,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
 	q->request_fn = scsi_request_fn;
 	q->init_rq_fn = scsi_init_rq;
 	q->exit_rq_fn = scsi_exit_rq;
+	q->initialize_rq_fn = scsi_initialize_rq;
 
 	if (blk_init_allocated_queue(q) < 0) {
 		blk_cleanup_queue(q);
@@ -2162,6 +2162,7 @@ static const struct blk_mq_ops scsi_mq_ops = {
 #endif
 	.init_request	= scsi_init_request,
 	.exit_request	= scsi_exit_request,
+	.initialize_rq_fn = scsi_initialize_rq,
 	.map_queues	= scsi_map_queues,
 };
 
@@ -2976,7 +2977,7 @@ scsi_internal_device_block(struct scsi_device *sdev, bool wait)
 		if (wait)
 			blk_mq_quiesce_queue(q);
 		else
-			blk_mq_stop_hw_queues(q);
+			blk_mq_quiesce_queue_nowait(q);
 	} else {
 		spin_lock_irqsave(q->queue_lock, flags);
 		blk_stop_queue(q);
@@ -3030,7 +3031,7 @@ scsi_internal_device_unblock(struct scsi_device *sdev,
 		return -EINVAL;
 
 	if (q->mq_ops) {
-		blk_mq_start_stopped_hw_queues(q, false);
+		blk_mq_unquiesce_queue(q);
 	} else {
 		spin_lock_irqsave(q->queue_lock, flags);
 		blk_start_queue(q);
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 0ebe2f1bb908..5006a656e16a 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -33,6 +33,7 @@
 #include <linux/bsg.h>
 
 #include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_request.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
@@ -172,7 +173,7 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
 			    struct sas_rphy *rphy)
 {
 	struct request *req;
-	int ret;
+	blk_status_t ret;
 	int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
 
 	while ((req = blk_fetch_request(q)) != NULL) {
@@ -230,6 +231,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 	q = blk_alloc_queue(GFP_KERNEL);
 	if (!q)
 		return -ENOMEM;
+	q->initialize_rq_fn = scsi_initialize_rq;
 	q->cmd_size = sizeof(struct scsi_request);
 
 	if (rphy) {
@@ -249,6 +251,11 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 	if (error)
 		goto out_cleanup_queue;
 
+	/*
+	 * by default assume old behaviour and bounce for any highmem page
+	 */
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+
 	error = bsg_register_queue(q, dev, name, release);
 	if (error)
 		goto out_cleanup_queue;
@@ -264,6 +271,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 		q->queuedata = shost;
 
 	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	return 0;
 
 out_cleanup_queue:
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index f9d1432d7cc5..b6bb4e0ce0e3 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -827,21 +827,32 @@ static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
 	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+	int ret;
 
 	if (!(rq->cmd_flags & REQ_NOUNMAP)) {
 		switch (sdkp->zeroing_mode) {
 		case SD_ZERO_WS16_UNMAP:
-			return sd_setup_write_same16_cmnd(cmd, true);
+			ret = sd_setup_write_same16_cmnd(cmd, true);
+			goto out;
 		case SD_ZERO_WS10_UNMAP:
-			return sd_setup_write_same10_cmnd(cmd, true);
+			ret = sd_setup_write_same10_cmnd(cmd, true);
+			goto out;
 		}
 	}
 
 	if (sdp->no_write_same)
 		return BLKPREP_INVALID;
+
 	if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff)
-		return sd_setup_write_same16_cmnd(cmd, false);
-	return sd_setup_write_same10_cmnd(cmd, false);
+		ret = sd_setup_write_same16_cmnd(cmd, false);
+	else
+		ret = sd_setup_write_same10_cmnd(cmd, false);
+
+out:
+	if (sd_is_zoned(sdkp) && ret == BLKPREP_OK)
+		return sd_zbc_write_lock_zone(cmd);
+
+	return ret;
 }
 
 static void sd_config_write_same(struct scsi_disk *sdkp)
@@ -948,6 +959,10 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
 	rq->__data_len = sdp->sector_size;
 	ret = scsi_init_io(cmd);
 	rq->__data_len = nr_bytes;
+
+	if (sd_is_zoned(sdkp) && ret != BLKPREP_OK)
+		sd_zbc_write_unlock_zone(cmd);
+
 	return ret;
 }
 
@@ -1567,17 +1582,21 @@ out:
 	return retval;
 }
 
-static int sd_sync_cache(struct scsi_disk *sdkp)
+static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
 {
 	int retries, res;
 	struct scsi_device *sdp = sdkp->device;
 	const int timeout = sdp->request_queue->rq_timeout
 		* SD_FLUSH_TIMEOUT_MULTIPLIER;
-	struct scsi_sense_hdr sshdr;
+	struct scsi_sense_hdr my_sshdr;
 
 	if (!scsi_device_online(sdp))
 		return -ENODEV;
 
+	/* caller might not be interested in sense, but we need it */
+	if (!sshdr)
+		sshdr = &my_sshdr;
+
 	for (retries = 3; retries > 0; --retries) {
 		unsigned char cmd[10] = { 0 };
 
@@ -1586,7 +1605,7 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 		 * Leave the rest of the command zero to indicate
 		 * flush everything.
 		 */
-		res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+		res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, sshdr,
 				timeout, SD_MAX_RETRIES, 0, RQF_PM, NULL);
 		if (res == 0)
 			break;
@@ -1596,11 +1615,12 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 		sd_print_result(sdkp, "Synchronize Cache(10) failed", res);
 
 		if (driver_byte(res) & DRIVER_SENSE)
-			sd_print_sense_hdr(sdkp, &sshdr);
+			sd_print_sense_hdr(sdkp, sshdr);
+
 		/* we need to evaluate the error return  */
-		if (scsi_sense_valid(&sshdr) &&
-			(sshdr.asc == 0x3a ||	/* medium not present */
-			 sshdr.asc == 0x20))	/* invalid command */
+		if (scsi_sense_valid(sshdr) &&
+			(sshdr->asc == 0x3a ||	/* medium not present */
+			 sshdr->asc == 0x20))	/* invalid command */
 				/* this is no error here */
 				return 0;
 
@@ -3444,7 +3464,7 @@ static void sd_shutdown(struct device *dev)
 
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-		sd_sync_cache(sdkp);
+		sd_sync_cache(sdkp, NULL);
 	}
 
 	if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) {
@@ -3456,6 +3476,7 @@ static void sd_shutdown(struct device *dev)
 static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 {
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
+	struct scsi_sense_hdr sshdr;
 	int ret = 0;
 
 	if (!sdkp)	/* E.g.: runtime suspend following sd_remove() */
@@ -3463,12 +3484,23 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-		ret = sd_sync_cache(sdkp);
+		ret = sd_sync_cache(sdkp, &sshdr);
+
 		if (ret) {
 			/* ignore OFFLINE device */
 			if (ret == -ENODEV)
-				ret = 0;
-			goto done;
+				return 0;
+
+			if (!scsi_sense_valid(&sshdr) ||
+			    sshdr.sense_key != ILLEGAL_REQUEST)
+				return ret;
+
+			/*
+			 * sshdr.sense_key == ILLEGAL_REQUEST means this drive
+			 * doesn't support sync. There's not much to do and
+			 * suspend shouldn't fail.
+			 */
+			 ret = 0;
 		}
 	}
 
@@ -3480,7 +3512,6 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 			ret = 0;
 	}
 
-done:
 	return ret;
 }
 
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 0a38ba01b7b4..21225d62b0c1 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -177,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */
 } Sg_device;
 
 /* tasklet or soft irq callback */
-static void sg_rq_end_io(struct request *rq, int uptodate);
+static void sg_rq_end_io(struct request *rq, blk_status_t status);
 static int sg_start_req(Sg_request *srp, unsigned char *cmd);
 static int sg_finish_rem_req(Sg_request * srp);
 static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size);
@@ -808,7 +808,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
 	if (atomic_read(&sdp->detaching)) {
 		if (srp->bio) {
 			scsi_req_free_cmd(scsi_req(srp->rq));
-			blk_end_request_all(srp->rq, -EIO);
+			blk_end_request_all(srp->rq, BLK_STS_IOERR);
 			srp->rq = NULL;
 		}
 
@@ -1300,7 +1300,7 @@ sg_rq_end_io_usercontext(struct work_struct *work)
  * level when a command is completed (or has failed).
  */
 static void
-sg_rq_end_io(struct request *rq, int uptodate)
+sg_rq_end_io(struct request *rq, blk_status_t status)
 {
 	struct sg_request *srp = rq->end_io_data;
 	struct scsi_request *req = scsi_req(rq);
@@ -1732,8 +1732,6 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
 	}
 	req = scsi_req(rq);
 
-	scsi_req_init(rq);
-
 	if (hp->cmd_len > BLK_MAX_CDB)
 		req->cmd = long_cmdp;
 	memcpy(req->cmd, cmd, hp->cmd_len);
@@ -2074,11 +2072,12 @@ sg_get_rq_mark(Sg_fd * sfp, int pack_id)
 		if ((1 == resp->done) && (!resp->sg_io_owned) &&
 		    ((-1 == pack_id) || (resp->header.pack_id == pack_id))) {
 			resp->done = 2;	/* guard against other readers */
-			break;
+			write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+			return resp;
 		}
 	}
 	write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
-	return resp;
+	return NULL;
 }
 
 /* always adds to end of list */
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 1ea34d6f5437..8e5013d9cad4 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -511,7 +511,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
 	atomic64_dec(&STp->stats->in_flight);
 }
 
-static void st_scsi_execute_end(struct request *req, int uptodate)
+static void st_scsi_execute_end(struct request *req, blk_status_t status)
 {
 	struct st_request *SRpnt = req->end_io_data;
 	struct scsi_request *rq = scsi_req(req);
@@ -549,7 +549,6 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
 	if (IS_ERR(req))
 		return DRIVER_ERROR << 24;
 	rq = scsi_req(req);
-	scsi_req_init(req);
 	req->rq_flags |= RQF_QUIET;
 
 	mdata->null_mapped = 1;
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index abc7e87937cc..ffe8d8608818 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -7698,6 +7698,12 @@ static inline void ufshcd_add_sysfs_nodes(struct ufs_hba *hba)
 	ufshcd_add_spm_lvl_sysfs_nodes(hba);
 }
 
+static inline void ufshcd_remove_sysfs_nodes(struct ufs_hba *hba)
+{
+	device_remove_file(hba->dev, &hba->rpm_lvl_attr);
+	device_remove_file(hba->dev, &hba->spm_lvl_attr);
+}
+
 /**
  * ufshcd_shutdown - shutdown routine
  * @hba: per adapter instance
@@ -7735,6 +7741,7 @@ EXPORT_SYMBOL(ufshcd_shutdown);
  */
 void ufshcd_remove(struct ufs_hba *hba)
 {
+	ufshcd_remove_sysfs_nodes(hba);
 	scsi_remove_host(hba->host);
 	/* disable interrupts */
 	ufshcd_disable_intr(hba, hba->intr_mask);
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index a29d068b7696..f8dbfeee6c63 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -894,8 +894,7 @@ static int virtscsi_init(struct virtio_device *vdev,
 	}
 
 	/* Discover virtqueues and write information to configuration.  */
-	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
-			&desc);
+	err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc);
 	if (err)
 		goto out;
 
diff --git a/drivers/soc/bcm/brcmstb/common.c b/drivers/soc/bcm/brcmstb/common.c
index b6195fdf0d00..22e98a90468c 100644
--- a/drivers/soc/bcm/brcmstb/common.c
+++ b/drivers/soc/bcm/brcmstb/common.c
@@ -49,7 +49,7 @@ static const struct of_device_id sun_top_ctrl_match[] = {
 	{ .compatible = "brcm,bcm7420-sun-top-ctrl", },
 	{ .compatible = "brcm,bcm7425-sun-top-ctrl", },
 	{ .compatible = "brcm,bcm7429-sun-top-ctrl", },
-	{ .compatible = "brcm,bcm7425-sun-top-ctrl", },
+	{ .compatible = "brcm,bcm7435-sun-top-ctrl", },
 	{ .compatible = "brcm,brcmstb-sun-top-ctrl", },
 	{ }
 };
diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c
index 3d891db57ee6..18eefc3f1abe 100644
--- a/drivers/soc/fsl/qbman/qman.c
+++ b/drivers/soc/fsl/qbman/qman.c
@@ -1344,6 +1344,7 @@ static void qm_congestion_task(struct work_struct *work)
 	if (!qm_mc_result_timeout(&p->p, &mcr)) {
 		spin_unlock(&p->cgr_lock);
 		dev_crit(p->config->dev, "QUERYCONGESTION timeout\n");
+		qman_p_irqsource_add(p, QM_PIRQ_CSCI);
 		return;
 	}
 	/* mask out the ones I'm not interested in */
@@ -1358,6 +1359,7 @@ static void qm_congestion_task(struct work_struct *work)
 		if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid))
 			cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid));
 	spin_unlock(&p->cgr_lock);
+	qman_p_irqsource_add(p, QM_PIRQ_CSCI);
 }
 
 static void qm_mr_process_task(struct work_struct *work)
@@ -1417,12 +1419,14 @@ static void qm_mr_process_task(struct work_struct *work)
 	}
 
 	qm_mr_cci_consume(&p->p, num);
+	qman_p_irqsource_add(p, QM_PIRQ_MRI);
 	preempt_enable();
 }
 
 static u32 __poll_portal_slow(struct qman_portal *p, u32 is)
 {
 	if (is & QM_PIRQ_CSCI) {
+		qman_p_irqsource_remove(p, QM_PIRQ_CSCI);
 		queue_work_on(smp_processor_id(), qm_portal_wq,
 			      &p->congestion_work);
 	}
@@ -1434,6 +1438,7 @@ static u32 __poll_portal_slow(struct qman_portal *p, u32 is)
 	}
 
 	if (is & QM_PIRQ_MRI) {
+		qman_p_irqsource_remove(p, QM_PIRQ_MRI);
 		queue_work_on(smp_processor_id(), qm_portal_wq,
 			      &p->mr_work);
 	}
diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index ade168f5328e..2ef6fc6487c1 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -66,7 +66,7 @@ static unsigned int qe_num_of_snum;
 
 static phys_addr_t qebase = -1;
 
-phys_addr_t get_qe_base(void)
+static phys_addr_t get_qe_base(void)
 {
 	struct device_node *qe;
 	int ret;
@@ -90,8 +90,6 @@ phys_addr_t get_qe_base(void)
 	return qebase;
 }
 
-EXPORT_SYMBOL(get_qe_base);
-
 void qe_reset(void)
 {
 	if (qe_immr == NULL)
@@ -163,11 +161,15 @@ EXPORT_SYMBOL(qe_issue_cmd);
  */
 static unsigned int brg_clk = 0;
 
+#define CLK_GRAN	(1000)
+#define CLK_GRAN_LIMIT	(5)
+
 unsigned int qe_get_brg_clk(void)
 {
 	struct device_node *qe;
 	int size;
 	const u32 *prop;
+	unsigned int mod;
 
 	if (brg_clk)
 		return brg_clk;
@@ -185,10 +187,22 @@ unsigned int qe_get_brg_clk(void)
 
 	of_node_put(qe);
 
+	/* round this if near to a multiple of CLK_GRAN */
+	mod = brg_clk % CLK_GRAN;
+	if (mod) {
+		if (mod < CLK_GRAN_LIMIT)
+			brg_clk -= mod;
+		else if (mod > (CLK_GRAN - CLK_GRAN_LIMIT))
+			brg_clk += CLK_GRAN - mod;
+	}
+
 	return brg_clk;
 }
 EXPORT_SYMBOL(qe_get_brg_clk);
 
+#define PVR_VER_836x	0x8083
+#define PVR_VER_832x	0x8084
+
 /* Program the BRG to the given sampling rate and multiplier
  *
  * @brg: the BRG, QE_BRG1 - QE_BRG16
@@ -215,8 +229,9 @@ int qe_setbrg(enum qe_clock brg, unsigned int rate, unsigned int multiplier)
 	/* Errata QE_General4, which affects some MPC832x and MPC836x SOCs, says
 	   that the BRG divisor must be even if you're not using divide-by-16
 	   mode. */
-	if (!div16 && (divisor & 1) && (divisor > 3))
-		divisor++;
+	if (pvr_version_is(PVR_VER_836x) || pvr_version_is(PVR_VER_832x))
+		if (!div16 && (divisor & 1) && (divisor > 3))
+			divisor++;
 
 	tempval = ((divisor - 1) << QE_BRGC_DIVISOR_SHIFT) |
 		QE_BRGC_ENABLE | div16;
diff --git a/drivers/soc/fsl/qe/qe_tdm.c b/drivers/soc/fsl/qe/qe_tdm.c
index a1048b44e6b9..f744c214f680 100644
--- a/drivers/soc/fsl/qe/qe_tdm.c
+++ b/drivers/soc/fsl/qe/qe_tdm.c
@@ -177,6 +177,7 @@ err_miss_siram_property:
 	devm_iounmap(&pdev->dev, utdm->si_regs);
 	return ret;
 }
+EXPORT_SYMBOL(ucc_of_parse_tdm);
 
 void ucc_tdm_init(struct ucc_tdm *utdm, struct ucc_tdm_info *ut_info)
 {
@@ -274,3 +275,4 @@ void ucc_tdm_init(struct ucc_tdm *utdm, struct ucc_tdm_info *ut_info)
 		break;
 	}
 }
+EXPORT_SYMBOL(ucc_tdm_init);
diff --git a/drivers/soc/imx/Kconfig b/drivers/soc/imx/Kconfig
index 357a5d8f8da0..a5b86a28f343 100644
--- a/drivers/soc/imx/Kconfig
+++ b/drivers/soc/imx/Kconfig
@@ -2,8 +2,9 @@ menu "i.MX SoC drivers"
 
 config IMX7_PM_DOMAINS
 	bool "i.MX7 PM domains"
-	select PM_GENERIC_DOMAINS
 	depends on SOC_IMX7D || (COMPILE_TEST && OF)
+	depends on PM
+	select PM_GENERIC_DOMAINS
 	default y if SOC_IMX7D
 
 endmenu
diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c
index ecebe2eecc3a..026182d3b27c 100644
--- a/drivers/soc/ti/knav_dma.c
+++ b/drivers/soc/ti/knav_dma.c
@@ -413,7 +413,7 @@ static int of_channel_match_helper(struct device_node *np, const char *name,
  * @name:	slave channel name
  * @config:	dma configuration parameters
  *
- * Returns pointer to appropriate DMA channel on success or NULL.
+ * Returns pointer to appropriate DMA channel on success or error.
  */
 void *knav_dma_open_channel(struct device *dev, const char *name,
 					struct knav_dma_cfg *config)
diff --git a/drivers/staging/android/ion/devicetree.txt b/drivers/staging/android/ion/devicetree.txt
deleted file mode 100644
index 168715271f06..000000000000
--- a/drivers/staging/android/ion/devicetree.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-Ion Memory Manager
-
-Ion is a memory manager that allows for sharing of buffers via dma-buf.
-Ion allows for different types of allocation via an abstraction called
-a 'heap'. A heap represents a specific type of memory. Each heap has
-a different type. There can be multiple instances of the same heap
-type.
-
-Specific heap instances are tied to heap IDs. Heap IDs are not to be specified
-in the devicetree.
-
-Required properties for Ion
-
-- compatible: "linux,ion" PLUS a compatible property for the device
-
-All child nodes of a linux,ion node are interpreted as heaps
-
-required properties for heaps
-
-- compatible: compatible string for a heap type PLUS a compatible property
-for the specific instance of the heap. Current heap types
--- linux,ion-heap-system
--- linux,ion-heap-system-contig
--- linux,ion-heap-carveout
--- linux,ion-heap-chunk
--- linux,ion-heap-dma
--- linux,ion-heap-custom
-
-Optional properties
-- memory-region: A phandle to a memory region. Required for DMA heap type
-(see reserved-memory.txt for details on the reservation)
-
-Example:
-
-	ion {
-		compatbile = "hisilicon,ion", "linux,ion";
-
-		ion-system-heap {
-			compatbile = "hisilicon,system-heap", "linux,ion-heap-system"
-		};
-
-		ion-camera-region {
-			compatible = "hisilicon,camera-heap", "linux,ion-heap-dma"
-			memory-region = <&camera_region>;
-		};
-
-		ion-fb-region {
-			compatbile = "hisilicon,fb-heap", "linux,ion-heap-dma"
-			memory-region = <&fb_region>;
-		};
-	}
diff --git a/drivers/staging/ccree/Kconfig b/drivers/staging/ccree/Kconfig
index ae627049c499..4be87f503e3b 100644
--- a/drivers/staging/ccree/Kconfig
+++ b/drivers/staging/ccree/Kconfig
@@ -1,6 +1,6 @@
 config CRYPTO_DEV_CCREE
 	tristate "Support for ARM TrustZone CryptoCell C7XX family of Crypto accelerators"
-	depends on CRYPTO_HW && OF && HAS_DMA
+	depends on CRYPTO && CRYPTO_HW && OF && HAS_DMA
 	default n
 	select CRYPTO_HASH
 	select CRYPTO_BLKCIPHER
diff --git a/drivers/staging/ccree/ssi_buffer_mgr.c b/drivers/staging/ccree/ssi_buffer_mgr.c
index 038e2ff5e545..6471d3d2d375 100644
--- a/drivers/staging/ccree/ssi_buffer_mgr.c
+++ b/drivers/staging/ccree/ssi_buffer_mgr.c
@@ -216,7 +216,8 @@ void ssi_buffer_mgr_copy_scatterlist_portion(
 	uint32_t nents, lbytes;
 
 	nents = ssi_buffer_mgr_get_sgl_nents(sg, end, &lbytes, NULL);
-	sg_copy_buffer(sg, nents, (void *)dest, (end - to_skip), 0, (direct == SSI_SG_TO_BUF));
+	sg_copy_buffer(sg, nents, (void *)dest, (end - to_skip + 1), to_skip,
+		       (direct == SSI_SG_TO_BUF));
 }
 
 static inline int ssi_buffer_mgr_render_buff_to_mlli(
diff --git a/drivers/staging/ccree/ssi_request_mgr.c b/drivers/staging/ccree/ssi_request_mgr.c
index 522bd62c102e..8611adf3bb2e 100644
--- a/drivers/staging/ccree/ssi_request_mgr.c
+++ b/drivers/staging/ccree/ssi_request_mgr.c
@@ -376,7 +376,6 @@ int send_request(
 	rc = ssi_power_mgr_runtime_get(&drvdata->plat_dev->dev);
 	if (rc != 0) {
 		SSI_LOG_ERR("ssi_power_mgr_runtime_get returned %x\n",rc);
-		spin_unlock_bh(&req_mgr_h->hw_lock);
 		return rc;
 	}
 #endif
diff --git a/drivers/staging/fsl-dpaa2/Kconfig b/drivers/staging/fsl-dpaa2/Kconfig
index 2e325cb747ae..730fd6d4db33 100644
--- a/drivers/staging/fsl-dpaa2/Kconfig
+++ b/drivers/staging/fsl-dpaa2/Kconfig
@@ -12,6 +12,7 @@ config FSL_DPAA2
 config FSL_DPAA2_ETH
 	tristate "Freescale DPAA2 Ethernet"
 	depends on FSL_DPAA2 && FSL_MC_DPIO
+	depends on NETDEVICES && ETHERNET
 	---help---
 	  Ethernet driver for Freescale DPAA2 SoCs, using the
 	  Freescale MC bus driver
diff --git a/drivers/staging/iio/cdc/ad7152.c b/drivers/staging/iio/cdc/ad7152.c
index dc6ecd824365..ff10d1f0a7e4 100644
--- a/drivers/staging/iio/cdc/ad7152.c
+++ b/drivers/staging/iio/cdc/ad7152.c
@@ -231,16 +231,12 @@ static int ad7152_write_raw_samp_freq(struct device *dev, int val)
 	if (i >= ARRAY_SIZE(ad7152_filter_rate_table))
 		i = ARRAY_SIZE(ad7152_filter_rate_table) - 1;
 
-	mutex_lock(&chip->state_lock);
 	ret = i2c_smbus_write_byte_data(chip->client,
 					AD7152_REG_CFG2, AD7152_CFG2_OSR(i));
-	if (ret < 0) {
-		mutex_unlock(&chip->state_lock);
+	if (ret < 0)
 		return ret;
-	}
 
 	chip->filter_rate_setup = i;
-	mutex_unlock(&chip->state_lock);
 
 	return ret;
 }
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 0db662d6abdd..85b242ec5f9b 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -3267,7 +3267,7 @@ int
 kiblnd_connd(void *arg)
 {
 	spinlock_t *lock = &kiblnd_data.kib_connd_lock;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	unsigned long flags;
 	struct kib_conn *conn;
 	int timeout;
@@ -3521,7 +3521,7 @@ kiblnd_scheduler(void *arg)
 	long id = (long)arg;
 	struct kib_sched_info *sched;
 	struct kib_conn *conn;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	unsigned long flags;
 	struct ib_wc wc;
 	int did_something;
@@ -3656,7 +3656,7 @@ kiblnd_failover_thread(void *arg)
 {
 	rwlock_t *glock = &kiblnd_data.kib_global_lock;
 	struct kib_dev *dev;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	unsigned long flags;
 	int rc;
 
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
index 3ed3b08c122c..6b38d5a8fe92 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
@@ -2166,7 +2166,7 @@ ksocknal_connd(void *arg)
 {
 	spinlock_t *connd_lock = &ksocknal_data.ksnd_connd_lock;
 	struct ksock_connreq *cr;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int nloops = 0;
 	int cons_retry = 0;
 
@@ -2554,7 +2554,7 @@ ksocknal_check_peer_timeouts(int idx)
 int
 ksocknal_reaper(void *arg)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct ksock_conn *conn;
 	struct ksock_sched *sched;
 	struct list_head enomem_conns;
diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c
index c56e9922cd5b..49deb448b044 100644
--- a/drivers/staging/lustre/lnet/libcfs/debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/debug.c
@@ -361,7 +361,7 @@ static int libcfs_debug_dumplog_thread(void *arg)
 
 void libcfs_debug_dumplog(void)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct task_struct *dumper;
 
 	/* we're being careful to ensure that the kernel thread is
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c
index 9599b7441feb..27082d2f7938 100644
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.c
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c
@@ -990,7 +990,7 @@ static int tracefiled(void *arg)
 	complete(&tctl->tctl_start);
 
 	while (1) {
-		wait_queue_t __wait;
+		wait_queue_entry_t __wait;
 
 		pc.pc_want_daemon_pages = 0;
 		collect_pages(&pc);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-eq.c b/drivers/staging/lustre/lnet/lnet/lib-eq.c
index ce4b83584e17..9ebba4ef5f90 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-eq.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-eq.c
@@ -312,7 +312,7 @@ __must_hold(&the_lnet.ln_eq_wait_lock)
 {
 	int tms = *timeout_ms;
 	int wait;
-	wait_queue_t wl;
+	wait_queue_entry_t wl;
 	unsigned long now;
 
 	if (!tms)
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index 9fca8d225ee0..f075706bba6d 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -516,7 +516,7 @@ lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port,
 int
 lnet_sock_accept(struct socket **newsockp, struct socket *sock)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct socket *newsock;
 	int rc;
 
diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c
index 999f250ceed0..bf31bc200d27 100644
--- a/drivers/staging/lustre/lustre/fid/fid_request.c
+++ b/drivers/staging/lustre/lustre/fid/fid_request.c
@@ -192,7 +192,7 @@ static int seq_client_alloc_seq(const struct lu_env *env,
 }
 
 static int seq_fid_alloc_prep(struct lu_client_seq *seq,
-			      wait_queue_t *link)
+			      wait_queue_entry_t *link)
 {
 	if (seq->lcs_update) {
 		add_wait_queue(&seq->lcs_waitq, link);
@@ -223,7 +223,7 @@ static void seq_fid_alloc_fini(struct lu_client_seq *seq)
 int seq_client_alloc_fid(const struct lu_env *env,
 			 struct lu_client_seq *seq, struct lu_fid *fid)
 {
-	wait_queue_t link;
+	wait_queue_entry_t link;
 	int rc;
 
 	LASSERT(seq);
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(seq_client_alloc_fid);
  */
 void seq_client_flush(struct lu_client_seq *seq)
 {
-	wait_queue_t link;
+	wait_queue_entry_t link;
 
 	LASSERT(seq);
 	init_waitqueue_entry(&link, current);
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index b04d613846ee..f24970da8323 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -201,7 +201,7 @@ struct l_wait_info {
 			   sigmask(SIGALRM))
 
 /**
- * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively
+ * wait_queue_entry_t of Linux (version < 2.6.34) is a FIFO list for exclusively
  * waiting threads, which is not always desirable because all threads will
  * be waken up again and again, even user only needs a few of them to be
  * active most time. This is not good for performance because cache can
@@ -228,7 +228,7 @@ struct l_wait_info {
  */
 #define __l_wait_event(wq, condition, info, ret, l_add_wait)		   \
 do {									   \
-	wait_queue_t __wait;						 \
+	wait_queue_entry_t __wait;						 \
 	long __timeout = info->lwi_timeout;			  \
 	sigset_t   __blocked;					      \
 	int   __allow_intr = info->lwi_allow_intr;			     \
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
index 6f9d540a97ce..fff930fc3cff 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
@@ -1115,7 +1115,7 @@ int ldlm_init(void)
 	ldlm_lock_slab = kmem_cache_create("ldlm_locks",
 					   sizeof(struct ldlm_lock), 0,
 					   SLAB_HWCACHE_ALIGN |
-					   SLAB_DESTROY_BY_RCU, NULL);
+					   SLAB_TYPESAFE_BY_RCU, NULL);
 	if (!ldlm_lock_slab) {
 		kmem_cache_destroy(ldlm_resource_slab);
 		return -ENOMEM;
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
index 8af611033e12..96515b839436 100644
--- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
@@ -207,7 +207,7 @@ int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
 static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
 {
 	struct lu_object_header *header = obj->co_lu.lo_header;
-	wait_queue_t	   waiter;
+	wait_queue_entry_t	   waiter;
 
 	if (unlikely(atomic_read(&header->loh_ref) != 1)) {
 		struct lu_site *site = obj->co_lu.lo_dev->ld_site;
diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
index 391c632365ae..e889d3a7de9c 100644
--- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
@@ -370,7 +370,7 @@ struct lov_thread_info {
 	struct ost_lvb	  lti_lvb;
 	struct cl_2queue	lti_cl2q;
 	struct cl_page_list     lti_plist;
-	wait_queue_t	  lti_waiter;
+	wait_queue_entry_t	  lti_waiter;
 	struct cl_attr          lti_attr;
 };
 
diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
index ab3ecfeeadc8..eddabbe31e5c 100644
--- a/drivers/staging/lustre/lustre/lov/lov_object.c
+++ b/drivers/staging/lustre/lustre/lov/lov_object.c
@@ -371,7 +371,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
 	struct lov_layout_raid0 *r0;
 	struct lu_site	  *site;
 	struct lu_site_bkt_data *bkt;
-	wait_queue_t	  *waiter;
+	wait_queue_entry_t	  *waiter;
 
 	r0  = &lov->u.raid0;
 	LASSERT(r0->lo_sub[idx] == los);
diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c
index 2e1bd47337fd..e6727cefde05 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pack.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pack.c
@@ -293,18 +293,10 @@ int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm,
 	size_t lmmk_size;
 	size_t lum_size;
 	int rc;
-	mm_segment_t seg;
 
 	if (!lsm)
 		return -ENODATA;
 
-	/*
-	 * "Switch to kernel segment" to allow copying from kernel space by
-	 * copy_{to,from}_user().
-	 */
-	seg = get_fs();
-	set_fs(KERNEL_DS);
-
 	if (lsm->lsm_magic != LOV_MAGIC_V1 && lsm->lsm_magic != LOV_MAGIC_V3) {
 		CERROR("bad LSM MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
 		       lsm->lsm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
@@ -406,6 +398,5 @@ int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm,
 out_free:
 	kvfree(lmmk);
 out:
-	set_fs(seg);
 	return rc;
 }
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index abcf951208d2..76ae600ae2c8 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -556,7 +556,7 @@ EXPORT_SYMBOL(lu_object_print);
 static struct lu_object *htable_lookup(struct lu_site *s,
 				       struct cfs_hash_bd *bd,
 				       const struct lu_fid *f,
-				       wait_queue_t *waiter,
+				       wait_queue_entry_t *waiter,
 				       __u64 *version)
 {
 	struct lu_site_bkt_data *bkt;
@@ -670,7 +670,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 					    struct lu_device *dev,
 					    const struct lu_fid *f,
 					    const struct lu_object_conf *conf,
-					    wait_queue_t *waiter)
+					    wait_queue_entry_t *waiter)
 {
 	struct lu_object      *o;
 	struct lu_object      *shadow;
@@ -750,7 +750,7 @@ struct lu_object *lu_object_find_at(const struct lu_env *env,
 {
 	struct lu_site_bkt_data *bkt;
 	struct lu_object	*obj;
-	wait_queue_t	   wait;
+	wait_queue_entry_t	   wait;
 
 	while (1) {
 		obj = lu_object_find_try(env, dev, f, conf, &wait);
diff --git a/drivers/staging/media/atomisp/i2c/Makefile b/drivers/staging/media/atomisp/i2c/Makefile
index 8ea01904c0ea..466517c7c8e6 100644
--- a/drivers/staging/media/atomisp/i2c/Makefile
+++ b/drivers/staging/media/atomisp/i2c/Makefile
@@ -19,5 +19,3 @@ obj-$(CONFIG_VIDEO_AP1302)     += ap1302.o
 
 obj-$(CONFIG_VIDEO_LM3554) += lm3554.o
 
-ccflags-y += -Werror
-
diff --git a/drivers/staging/media/atomisp/i2c/imx/Makefile b/drivers/staging/media/atomisp/i2c/imx/Makefile
index 1d7f7ab94cac..6b13a3a66e49 100644
--- a/drivers/staging/media/atomisp/i2c/imx/Makefile
+++ b/drivers/staging/media/atomisp/i2c/imx/Makefile
@@ -4,5 +4,3 @@ imx1x5-objs := imx.o drv201.o ad5816g.o dw9714.o dw9719.o dw9718.o vcm.o otp.o o
 
 ov8858_driver-objs := ../ov8858.o dw9718.o vcm.o
 obj-$(CONFIG_VIDEO_OV8858)     += ov8858_driver.o
-
-ccflags-y += -Werror
diff --git a/drivers/staging/media/atomisp/i2c/ov5693/Makefile b/drivers/staging/media/atomisp/i2c/ov5693/Makefile
index fceb9e9b881b..c9c0e1245858 100644
--- a/drivers/staging/media/atomisp/i2c/ov5693/Makefile
+++ b/drivers/staging/media/atomisp/i2c/ov5693/Makefile
@@ -1,3 +1 @@
 obj-$(CONFIG_VIDEO_OV5693) += ov5693.o
-
-ccflags-y += -Werror
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/Makefile b/drivers/staging/media/atomisp/pci/atomisp2/Makefile
index 3fa7c1c1479f..f126a89a08e9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/Makefile
+++ b/drivers/staging/media/atomisp/pci/atomisp2/Makefile
@@ -351,5 +351,5 @@ DEFINES := -DHRT_HW -DHRT_ISP_CSS_CUSTOM_HOST -DHRT_USE_VIR_ADDRS -D__HOST__
 DEFINES += -DATOMISP_POSTFIX=\"css2400b0_v21\" -DISP2400B0
 DEFINES += -DSYSTEM_hive_isp_css_2400_system -DISP2400
 
-ccflags-y += $(INCLUDES) $(DEFINES) -fno-common -Werror
+ccflags-y += $(INCLUDES) $(DEFINES) -fno-common
 
diff --git a/drivers/staging/rtl8188eu/os_dep/mon.c b/drivers/staging/rtl8188eu/os_dep/mon.c
index cfe37eb026d6..859d0d6051cd 100644
--- a/drivers/staging/rtl8188eu/os_dep/mon.c
+++ b/drivers/staging/rtl8188eu/os_dep/mon.c
@@ -152,7 +152,7 @@ static const struct net_device_ops mon_netdev_ops = {
 static void mon_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &mon_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	ether_setup(dev);
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->type = ARPHRD_IEEE80211;
diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index 4723a0bd5067..1c6ed5b2a6f9 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -97,8 +97,9 @@ void rtl92e_set_reg(struct net_device *dev, u8 variable, u8 *val)
 
 	switch (variable) {
 	case HW_VAR_BSSID:
-		rtl92e_writel(dev, BSSIDR, ((u32 *)(val))[0]);
-		rtl92e_writew(dev, BSSIDR+2, ((u16 *)(val+2))[0]);
+		/* BSSIDR 2 byte alignment */
+		rtl92e_writew(dev, BSSIDR, *(u16 *)val);
+		rtl92e_writel(dev, BSSIDR + 2, *(u32 *)(val + 2));
 		break;
 
 	case HW_VAR_MEDIA_STATUS:
@@ -624,7 +625,7 @@ void rtl92e_get_eeprom_size(struct net_device *dev)
 	struct r8192_priv *priv = rtllib_priv(dev);
 
 	RT_TRACE(COMP_INIT, "===========>%s()\n", __func__);
-	curCR = rtl92e_readl(dev, EPROM_CMD);
+	curCR = rtl92e_readw(dev, EPROM_CMD);
 	RT_TRACE(COMP_INIT, "read from Reg Cmd9346CR(%x):%x\n", EPROM_CMD,
 		 curCR);
 	priv->epromtype = (curCR & EPROM_CMD_9356SEL) ? EEPROM_93C56 :
@@ -961,8 +962,8 @@ static void _rtl92e_net_update(struct net_device *dev)
 	rtl92e_config_rate(dev, &rate_config);
 	priv->dot11CurrentPreambleMode = PREAMBLE_AUTO;
 	 priv->basic_rate = rate_config &= 0x15f;
-	rtl92e_writel(dev, BSSIDR, ((u32 *)net->bssid)[0]);
-	rtl92e_writew(dev, BSSIDR+4, ((u16 *)net->bssid)[2]);
+	rtl92e_writew(dev, BSSIDR, *(u16 *)net->bssid);
+	rtl92e_writel(dev, BSSIDR + 2, *(u32 *)(net->bssid + 2));
 
 	if (priv->rtllib->iw_mode == IW_MODE_ADHOC) {
 		rtl92e_writew(dev, ATIMWND, 2);
@@ -1182,8 +1183,7 @@ void  rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc,
 			  struct cb_desc *cb_desc, struct sk_buff *skb)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	dma_addr_t mapping = pci_map_single(priv->pdev, skb->data, skb->len,
-			 PCI_DMA_TODEVICE);
+	dma_addr_t mapping;
 	struct tx_fwinfo_8190pci *pTxFwInfo;
 
 	pTxFwInfo = (struct tx_fwinfo_8190pci *)skb->data;
@@ -1194,8 +1194,6 @@ void  rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc,
 	pTxFwInfo->Short = _rtl92e_query_is_short(pTxFwInfo->TxHT,
 						  pTxFwInfo->TxRate, cb_desc);
 
-	if (pci_dma_mapping_error(priv->pdev, mapping))
-		netdev_err(dev, "%s(): DMA Mapping error\n", __func__);
 	if (cb_desc->bAMPDUEnable) {
 		pTxFwInfo->AllowAggregation = 1;
 		pTxFwInfo->RxMF = cb_desc->ampdu_factor;
@@ -1230,6 +1228,14 @@ void  rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc,
 	}
 
 	memset((u8 *)pdesc, 0, 12);
+
+	mapping = pci_map_single(priv->pdev, skb->data, skb->len,
+				 PCI_DMA_TODEVICE);
+	if (pci_dma_mapping_error(priv->pdev, mapping)) {
+		netdev_err(dev, "%s(): DMA Mapping error\n", __func__);
+		return;
+	}
+
 	pdesc->LINIP = 0;
 	pdesc->CmdInit = 1;
 	pdesc->Offset = sizeof(struct tx_fwinfo_8190pci) + 8;
diff --git a/drivers/staging/rtl8192e/rtl819x_TSProc.c b/drivers/staging/rtl8192e/rtl819x_TSProc.c
index 48bbd9e8a52f..dcc4eb691889 100644
--- a/drivers/staging/rtl8192e/rtl819x_TSProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_TSProc.c
@@ -306,11 +306,6 @@ static void MakeTSEntry(struct ts_common_info *pTsCommonInfo, u8 *Addr,
 	pTsCommonInfo->TClasNum = TCLAS_Num;
 }
 
-static bool IsACValid(unsigned int tid)
-{
-	return tid < 7;
-}
-
 bool GetTs(struct rtllib_device *ieee, struct ts_common_info **ppTS,
 	   u8 *Addr, u8 TID, enum tr_select TxRxSelect, bool bAddNewTs)
 {
@@ -328,12 +323,6 @@ bool GetTs(struct rtllib_device *ieee, struct ts_common_info **ppTS,
 	if (ieee->current_network.qos_data.supported == 0) {
 		UP = 0;
 	} else {
-		if (!IsACValid(TID)) {
-			netdev_warn(ieee->dev, "%s(): TID(%d) is not valid\n",
-				    __func__, TID);
-			return false;
-		}
-
 		switch (TID) {
 		case 0:
 		case 3:
@@ -351,6 +340,10 @@ bool GetTs(struct rtllib_device *ieee, struct ts_common_info **ppTS,
 		case 7:
 			UP = 7;
 			break;
+		default:
+			netdev_warn(ieee->dev, "%s(): TID(%d) is not valid\n",
+				    __func__, TID);
+			return false;
 		}
 	}
 
diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
index 5e7a61f24f8d..bd4352fe2de3 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
@@ -2667,7 +2667,8 @@ static int rtw_cfg80211_add_monitor_if (struct adapter *padapter, char *name, st
 	mon_ndev->type = ARPHRD_IEEE80211_RADIOTAP;
 	strncpy(mon_ndev->name, name, IFNAMSIZ);
 	mon_ndev->name[IFNAMSIZ - 1] = 0;
-	mon_ndev->destructor = rtw_ndev_destructor;
+	mon_ndev->needs_free_netdev = true;
+	mon_ndev->priv_destructor = rtw_ndev_destructor;
 
 	mon_ndev->netdev_ops = &rtw_cfg80211_monitor_if_ops;
 
@@ -3531,7 +3532,6 @@ int rtw_wdev_alloc(struct adapter *padapter, struct device *dev)
 		pwdev_priv->power_mgmt = true;
 	else
 		pwdev_priv->power_mgmt = false;
-	kfree((u8 *)wdev);
 
 	return ret;
 
diff --git a/drivers/staging/rtl8723bs/os_dep/os_intfs.c b/drivers/staging/rtl8723bs/os_dep/os_intfs.c
index f83cfc76505c..021589913681 100644
--- a/drivers/staging/rtl8723bs/os_dep/os_intfs.c
+++ b/drivers/staging/rtl8723bs/os_dep/os_intfs.c
@@ -1207,8 +1207,6 @@ void rtw_ndev_destructor(struct net_device *ndev)
 
 	if (ndev->ieee80211_ptr)
 		kfree((u8 *)ndev->ieee80211_ptr);
-
-	free_netdev(ndev);
 }
 
 void rtw_dev_unload(struct adapter *padapter)
diff --git a/drivers/staging/rtl8723bs/os_dep/osdep_service.c b/drivers/staging/rtl8723bs/os_dep/osdep_service.c
index 02db59e8b593..aa16d1ab955b 100644
--- a/drivers/staging/rtl8723bs/os_dep/osdep_service.c
+++ b/drivers/staging/rtl8723bs/os_dep/osdep_service.c
@@ -160,7 +160,7 @@ static int isFileReadable(char *path)
 		oldfs = get_fs(); set_fs(get_ds());
 
 		if (1!=readFile(fp, &buf, 1))
-			ret = PTR_ERR(fp);
+			ret = -EINVAL;
 
 		set_fs(oldfs);
 		filp_close(fp, NULL);
diff --git a/drivers/staging/speakup/speakup_acntpc.c b/drivers/staging/speakup/speakup_acntpc.c
index ad72f8e883fc..a041441766aa 100644
--- a/drivers/staging/speakup/speakup_acntpc.c
+++ b/drivers/staging/speakup/speakup_acntpc.c
@@ -310,7 +310,7 @@ static void accent_release(void)
 	speakup_info.port_tts = 0;
 }
 
-module_param_named(port, port_forced, int, 0444);
+module_param_hw_named(port, port_forced, int, ioport, 0444);
 module_param_named(start, synth_acntpc.startup, short, 0444);
 
 MODULE_PARM_DESC(port, "Set the port for the synthesizer (override probing).");
diff --git a/drivers/staging/speakup/speakup_dtlk.c b/drivers/staging/speakup/speakup_dtlk.c
index 5973acc0a006..33180937222d 100644
--- a/drivers/staging/speakup/speakup_dtlk.c
+++ b/drivers/staging/speakup/speakup_dtlk.c
@@ -382,7 +382,7 @@ static void dtlk_release(void)
 	speakup_info.port_tts = 0;
 }
 
-module_param_named(port, port_forced, int, 0444);
+module_param_hw_named(port, port_forced, int, ioport, 0444);
 module_param_named(start, synth_dtlk.startup, short, 0444);
 
 MODULE_PARM_DESC(port, "Set the port for the synthesizer (override probing).");
diff --git a/drivers/staging/speakup/speakup_keypc.c b/drivers/staging/speakup/speakup_keypc.c
index ba7901178e0b..d3203f8fc3d0 100644
--- a/drivers/staging/speakup/speakup_keypc.c
+++ b/drivers/staging/speakup/speakup_keypc.c
@@ -312,7 +312,7 @@ static void keynote_release(void)
 	synth_port = 0;
 }
 
-module_param_named(port, port_forced, int, 0444);
+module_param_hw_named(port, port_forced, int, ioport, 0444);
 module_param_named(start, synth_keypc.startup, short, 0444);
 
 MODULE_PARM_DESC(port, "Set the port for the synthesizer (override probing).");
diff --git a/drivers/staging/typec/fusb302/fusb302.c b/drivers/staging/typec/fusb302/fusb302.c
index 2cee9a952c9b..4a356e509fe4 100644
--- a/drivers/staging/typec/fusb302/fusb302.c
+++ b/drivers/staging/typec/fusb302/fusb302.c
@@ -264,22 +264,36 @@ static void fusb302_debugfs_exit(const struct fusb302_chip *chip) { }
 
 #define FUSB302_RESUME_RETRY 10
 #define FUSB302_RESUME_RETRY_SLEEP 50
-static int fusb302_i2c_write(struct fusb302_chip *chip,
-			     u8 address, u8 data)
+
+static bool fusb302_is_suspended(struct fusb302_chip *chip)
 {
 	int retry_cnt;
-	int ret = 0;
 
-	atomic_set(&chip->i2c_busy, 1);
 	for (retry_cnt = 0; retry_cnt < FUSB302_RESUME_RETRY; retry_cnt++) {
 		if (atomic_read(&chip->pm_suspend)) {
-			pr_err("fusb302_i2c: pm suspend, retry %d/%d\n",
-			       retry_cnt + 1, FUSB302_RESUME_RETRY);
+			dev_err(chip->dev, "i2c: pm suspend, retry %d/%d\n",
+				retry_cnt + 1, FUSB302_RESUME_RETRY);
 			msleep(FUSB302_RESUME_RETRY_SLEEP);
 		} else {
-			break;
+			return false;
 		}
 	}
+
+	return true;
+}
+
+static int fusb302_i2c_write(struct fusb302_chip *chip,
+			     u8 address, u8 data)
+{
+	int ret = 0;
+
+	atomic_set(&chip->i2c_busy, 1);
+
+	if (fusb302_is_suspended(chip)) {
+		atomic_set(&chip->i2c_busy, 0);
+		return -ETIMEDOUT;
+	}
+
 	ret = i2c_smbus_write_byte_data(chip->i2c_client, address, data);
 	if (ret < 0)
 		fusb302_log(chip, "cannot write 0x%02x to 0x%02x, ret=%d",
@@ -292,21 +306,17 @@ static int fusb302_i2c_write(struct fusb302_chip *chip,
 static int fusb302_i2c_block_write(struct fusb302_chip *chip, u8 address,
 				   u8 length, const u8 *data)
 {
-	int retry_cnt;
 	int ret = 0;
 
 	if (length <= 0)
 		return ret;
 	atomic_set(&chip->i2c_busy, 1);
-	for (retry_cnt = 0; retry_cnt < FUSB302_RESUME_RETRY; retry_cnt++) {
-		if (atomic_read(&chip->pm_suspend)) {
-			pr_err("fusb302_i2c: pm suspend, retry %d/%d\n",
-			       retry_cnt + 1, FUSB302_RESUME_RETRY);
-			msleep(FUSB302_RESUME_RETRY_SLEEP);
-		} else {
-			break;
-		}
+
+	if (fusb302_is_suspended(chip)) {
+		atomic_set(&chip->i2c_busy, 0);
+		return -ETIMEDOUT;
 	}
+
 	ret = i2c_smbus_write_i2c_block_data(chip->i2c_client, address,
 					     length, data);
 	if (ret < 0)
@@ -320,19 +330,15 @@ static int fusb302_i2c_block_write(struct fusb302_chip *chip, u8 address,
 static int fusb302_i2c_read(struct fusb302_chip *chip,
 			    u8 address, u8 *data)
 {
-	int retry_cnt;
 	int ret = 0;
 
 	atomic_set(&chip->i2c_busy, 1);
-	for (retry_cnt = 0; retry_cnt < FUSB302_RESUME_RETRY; retry_cnt++) {
-		if (atomic_read(&chip->pm_suspend)) {
-			pr_err("fusb302_i2c: pm suspend, retry %d/%d\n",
-			       retry_cnt + 1, FUSB302_RESUME_RETRY);
-			msleep(FUSB302_RESUME_RETRY_SLEEP);
-		} else {
-			break;
-		}
+
+	if (fusb302_is_suspended(chip)) {
+		atomic_set(&chip->i2c_busy, 0);
+		return -ETIMEDOUT;
 	}
+
 	ret = i2c_smbus_read_byte_data(chip->i2c_client, address);
 	*data = (u8)ret;
 	if (ret < 0)
@@ -345,33 +351,31 @@ static int fusb302_i2c_read(struct fusb302_chip *chip,
 static int fusb302_i2c_block_read(struct fusb302_chip *chip, u8 address,
 				  u8 length, u8 *data)
 {
-	int retry_cnt;
 	int ret = 0;
 
 	if (length <= 0)
 		return ret;
 	atomic_set(&chip->i2c_busy, 1);
-	for (retry_cnt = 0; retry_cnt < FUSB302_RESUME_RETRY; retry_cnt++) {
-		if (atomic_read(&chip->pm_suspend)) {
-			pr_err("fusb302_i2c: pm suspend, retry %d/%d\n",
-			       retry_cnt + 1, FUSB302_RESUME_RETRY);
-			msleep(FUSB302_RESUME_RETRY_SLEEP);
-		} else {
-			break;
-		}
+
+	if (fusb302_is_suspended(chip)) {
+		atomic_set(&chip->i2c_busy, 0);
+		return -ETIMEDOUT;
 	}
+
 	ret = i2c_smbus_read_i2c_block_data(chip->i2c_client, address,
 					    length, data);
 	if (ret < 0) {
 		fusb302_log(chip, "cannot block read 0x%02x, len=%d, ret=%d",
 			    address, length, ret);
-		return ret;
+		goto done;
 	}
 	if (ret != length) {
 		fusb302_log(chip, "only read %d/%d bytes from 0x%02x",
 			    ret, length, address);
-		return -EIO;
+		ret = -EIO;
 	}
+
+done:
 	atomic_set(&chip->i2c_busy, 0);
 
 	return ret;
@@ -489,7 +493,7 @@ static int tcpm_init(struct tcpc_dev *dev)
 	ret = fusb302_i2c_read(chip, FUSB_REG_STATUS0, &data);
 	if (ret < 0)
 		return ret;
-	chip->vbus_present = !!(FUSB_REG_STATUS0 & FUSB_REG_STATUS0_VBUSOK);
+	chip->vbus_present = !!(data & FUSB_REG_STATUS0_VBUSOK);
 	ret = fusb302_i2c_read(chip, FUSB_REG_DEVICE_ID, &data);
 	if (ret < 0)
 		return ret;
@@ -1025,7 +1029,7 @@ static int fusb302_pd_send_message(struct fusb302_chip *chip,
 	buf[pos++] = FUSB302_TKN_SYNC1;
 	buf[pos++] = FUSB302_TKN_SYNC2;
 
-	len = pd_header_cnt(msg->header) * 4;
+	len = pd_header_cnt_le(msg->header) * 4;
 	/* plug 2 for header */
 	len += 2;
 	if (len > 0x1F) {
@@ -1481,7 +1485,7 @@ static int fusb302_pd_read_message(struct fusb302_chip *chip,
 				     (u8 *)&msg->header);
 	if (ret < 0)
 		return ret;
-	len = pd_header_cnt(msg->header) * 4;
+	len = pd_header_cnt_le(msg->header) * 4;
 	/* add 4 to length to include the CRC */
 	if (len > PD_MAX_PAYLOAD * 4) {
 		fusb302_log(chip, "PD message too long %d", len);
@@ -1663,14 +1667,12 @@ static int init_gpio(struct fusb302_chip *chip)
 	if (ret < 0) {
 		fusb302_log(chip,
 			    "cannot set GPIO Int_N to input, ret=%d", ret);
-		gpio_free(chip->gpio_int_n);
 		return ret;
 	}
 	ret = gpio_to_irq(chip->gpio_int_n);
 	if (ret < 0) {
 		fusb302_log(chip,
 			    "cannot request IRQ for GPIO Int_N, ret=%d", ret);
-		gpio_free(chip->gpio_int_n);
 		return ret;
 	}
 	chip->gpio_int_n_irq = ret;
@@ -1787,11 +1789,13 @@ static const struct of_device_id fusb302_dt_match[] = {
 	{.compatible = "fcs,fusb302"},
 	{},
 };
+MODULE_DEVICE_TABLE(of, fusb302_dt_match);
 
 static const struct i2c_device_id fusb302_i2c_device_id[] = {
 	{"typec_fusb302", 0},
 	{},
 };
+MODULE_DEVICE_TABLE(i2c, fusb302_i2c_device_id);
 
 static const struct dev_pm_ops fusb302_pm_ops = {
 	.suspend = fusb302_pm_suspend,
diff --git a/drivers/staging/typec/pd.h b/drivers/staging/typec/pd.h
index 8d97bdb95f23..510ef7279900 100644
--- a/drivers/staging/typec/pd.h
+++ b/drivers/staging/typec/pd.h
@@ -92,6 +92,16 @@ static inline unsigned int pd_header_type_le(__le16 header)
 	return pd_header_type(le16_to_cpu(header));
 }
 
+static inline unsigned int pd_header_msgid(u16 header)
+{
+	return (header >> PD_HEADER_ID_SHIFT) & PD_HEADER_ID_MASK;
+}
+
+static inline unsigned int pd_header_msgid_le(__le16 header)
+{
+	return pd_header_msgid(le16_to_cpu(header));
+}
+
 #define PD_MAX_PAYLOAD		7
 
 struct pd_message {
diff --git a/drivers/staging/typec/pd_vdo.h b/drivers/staging/typec/pd_vdo.h
index dba172e0e0d1..d92259f8de0a 100644
--- a/drivers/staging/typec/pd_vdo.h
+++ b/drivers/staging/typec/pd_vdo.h
@@ -22,6 +22,9 @@
  * VDM object is minimum of VDM header + 6 additional data objects.
  */
 
+#define VDO_MAX_OBJECTS		6
+#define VDO_MAX_SIZE		(VDO_MAX_OBJECTS + 1)
+
 /*
  * VDM header
  * ----------
@@ -34,7 +37,6 @@
  * <5>      :: reserved (SVDM), command type (UVDM)
  * <4:0>    :: command
  */
-#define VDO_MAX_SIZE 7
 #define VDO(vid, type, custom)				\
 	(((vid) << 16) |				\
 	 ((type) << 15) |				\
diff --git a/drivers/staging/typec/tcpci.c b/drivers/staging/typec/tcpci.c
index 5e5be74c7850..df72d8b01e73 100644
--- a/drivers/staging/typec/tcpci.c
+++ b/drivers/staging/typec/tcpci.c
@@ -425,7 +425,7 @@ static const struct regmap_config tcpci_regmap_config = {
 	.max_register = 0x7F, /* 0x80 .. 0xFF are vendor defined */
 };
 
-const struct tcpc_config tcpci_tcpc_config = {
+static const struct tcpc_config tcpci_tcpc_config = {
 	.type = TYPEC_PORT_DFP,
 	.default_role = TYPEC_SINK,
 };
diff --git a/drivers/staging/typec/tcpm.c b/drivers/staging/typec/tcpm.c
index abba655ba00a..20eb4ebcf8c3 100644
--- a/drivers/staging/typec/tcpm.c
+++ b/drivers/staging/typec/tcpm.c
@@ -238,6 +238,7 @@ struct tcpm_port {
 	unsigned int hard_reset_count;
 	bool pd_capable;
 	bool explicit_contract;
+	unsigned int rx_msgid;
 
 	/* Partner capabilities/requests */
 	u32 sink_request;
@@ -251,6 +252,8 @@ struct tcpm_port {
 	unsigned int nr_src_pdo;
 	u32 snk_pdo[PDO_MAX_OBJECTS];
 	unsigned int nr_snk_pdo;
+	u32 snk_vdo[VDO_MAX_OBJECTS];
+	unsigned int nr_snk_vdo;
 
 	unsigned int max_snk_mv;
 	unsigned int max_snk_ma;
@@ -997,6 +1000,7 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt,
 	struct pd_mode_data *modep;
 	int rlen = 0;
 	u16 svid;
+	int i;
 
 	tcpm_log(port, "Rx VDM cmd 0x%x type %d cmd %d len %d",
 		 p0, cmd_type, cmd, cnt);
@@ -1007,6 +1011,14 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt,
 	case CMDT_INIT:
 		switch (cmd) {
 		case CMD_DISCOVER_IDENT:
+			/* 6.4.4.3.1: Only respond as UFP (device) */
+			if (port->data_role == TYPEC_DEVICE &&
+			    port->nr_snk_vdo) {
+				for (i = 0; i <  port->nr_snk_vdo; i++)
+					response[i + 1]
+						= cpu_to_le32(port->snk_vdo[i]);
+				rlen = port->nr_snk_vdo + 1;
+			}
 			break;
 		case CMD_DISCOVER_SVID:
 			break;
@@ -1415,6 +1427,7 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
 			break;
 		case SOFT_RESET_SEND:
 			port->message_id = 0;
+			port->rx_msgid = -1;
 			if (port->pwr_role == TYPEC_SOURCE)
 				next_state = SRC_SEND_CAPABILITIES;
 			else
@@ -1503,6 +1516,22 @@ static void tcpm_pd_rx_handler(struct work_struct *work)
 		 port->attached);
 
 	if (port->attached) {
+		enum pd_ctrl_msg_type type = pd_header_type_le(msg->header);
+		unsigned int msgid = pd_header_msgid_le(msg->header);
+
+		/*
+		 * USB PD standard, 6.6.1.2:
+		 * "... if MessageID value in a received Message is the
+		 * same as the stored value, the receiver shall return a
+		 * GoodCRC Message with that MessageID value and drop
+		 * the Message (this is a retry of an already received
+		 * Message). Note: this shall not apply to the Soft_Reset
+		 * Message which always has a MessageID value of zero."
+		 */
+		if (msgid == port->rx_msgid && type != PD_CTRL_SOFT_RESET)
+			goto done;
+		port->rx_msgid = msgid;
+
 		/*
 		 * If both ends believe to be DFP/host, we have a data role
 		 * mismatch.
@@ -1520,6 +1549,7 @@ static void tcpm_pd_rx_handler(struct work_struct *work)
 		}
 	}
 
+done:
 	mutex_unlock(&port->lock);
 	kfree(event);
 }
@@ -1719,8 +1749,7 @@ static int tcpm_pd_build_request(struct tcpm_port *port, u32 *rdo)
 	}
 	ma = min(ma, port->max_snk_ma);
 
-	/* XXX: Any other flags need to be set? */
-	flags = 0;
+	flags = RDO_USB_COMM | RDO_NO_SUSPEND;
 
 	/* Set mismatch bit if offered power is less than operating power */
 	mw = ma * mv / 1000;
@@ -1957,6 +1986,12 @@ static void tcpm_reset_port(struct tcpm_port *port)
 	port->attached = false;
 	port->pd_capable = false;
 
+	/*
+	 * First Rx ID should be 0; set this to a sentinel of -1 so that
+	 * we can check tcpm_pd_rx_handler() if we had seen it before.
+	 */
+	port->rx_msgid = -1;
+
 	port->tcpc->set_pd_rx(port->tcpc, false);
 	tcpm_init_vbus(port);	/* also disables charging */
 	tcpm_init_vconn(port);
@@ -2170,6 +2205,7 @@ static void run_state_machine(struct tcpm_port *port)
 		port->pwr_opmode = TYPEC_PWR_MODE_USB;
 		port->caps_count = 0;
 		port->message_id = 0;
+		port->rx_msgid = -1;
 		port->explicit_contract = false;
 		tcpm_set_state(port, SRC_SEND_CAPABILITIES, 0);
 		break;
@@ -2329,6 +2365,7 @@ static void run_state_machine(struct tcpm_port *port)
 		typec_set_pwr_opmode(port->typec_port, TYPEC_PWR_MODE_USB);
 		port->pwr_opmode = TYPEC_PWR_MODE_USB;
 		port->message_id = 0;
+		port->rx_msgid = -1;
 		port->explicit_contract = false;
 		tcpm_set_state(port, SNK_DISCOVERY, 0);
 		break;
@@ -2496,6 +2533,7 @@ static void run_state_machine(struct tcpm_port *port)
 	/* Soft_Reset states */
 	case SOFT_RESET:
 		port->message_id = 0;
+		port->rx_msgid = -1;
 		tcpm_pd_send_control(port, PD_CTRL_ACCEPT);
 		if (port->pwr_role == TYPEC_SOURCE)
 			tcpm_set_state(port, SRC_SEND_CAPABILITIES, 0);
@@ -2504,6 +2542,7 @@ static void run_state_machine(struct tcpm_port *port)
 		break;
 	case SOFT_RESET_SEND:
 		port->message_id = 0;
+		port->rx_msgid = -1;
 		if (tcpm_pd_send_control(port, PD_CTRL_SOFT_RESET))
 			tcpm_set_state_cond(port, hard_reset_state(port), 0);
 		else
@@ -2568,6 +2607,14 @@ static void run_state_machine(struct tcpm_port *port)
 		break;
 	case PR_SWAP_SRC_SNK_SOURCE_OFF:
 		tcpm_set_cc(port, TYPEC_CC_RD);
+		/*
+		 * USB-PD standard, 6.2.1.4, Port Power Role:
+		 * "During the Power Role Swap Sequence, for the initial Source
+		 * Port, the Port Power Role field shall be set to Sink in the
+		 * PS_RDY Message indicating that the initial Source’s power
+		 * supply is turned off"
+		 */
+		tcpm_set_pwr_role(port, TYPEC_SINK);
 		if (tcpm_pd_send_control(port, PD_CTRL_PS_RDY)) {
 			tcpm_set_state(port, ERROR_RECOVERY, 0);
 			break;
@@ -2575,7 +2622,6 @@ static void run_state_machine(struct tcpm_port *port)
 		tcpm_set_state_cond(port, SNK_UNATTACHED, PD_T_PS_SOURCE_ON);
 		break;
 	case PR_SWAP_SRC_SNK_SINK_ON:
-		tcpm_set_pwr_role(port, TYPEC_SINK);
 		tcpm_swap_complete(port, 0);
 		tcpm_set_state(port, SNK_STARTUP, 0);
 		break;
@@ -2587,8 +2633,15 @@ static void run_state_machine(struct tcpm_port *port)
 	case PR_SWAP_SNK_SRC_SOURCE_ON:
 		tcpm_set_cc(port, tcpm_rp_cc(port));
 		tcpm_set_vbus(port, true);
-		tcpm_pd_send_control(port, PD_CTRL_PS_RDY);
+		/*
+		 * USB PD standard, 6.2.1.4:
+		 * "Subsequent Messages initiated by the Policy Engine,
+		 * such as the PS_RDY Message sent to indicate that Vbus
+		 * is ready, will have the Port Power Role field set to
+		 * Source."
+		 */
 		tcpm_set_pwr_role(port, TYPEC_SOURCE);
+		tcpm_pd_send_control(port, PD_CTRL_PS_RDY);
 		tcpm_swap_complete(port, 0);
 		tcpm_set_state(port, SRC_STARTUP, 0);
 		break;
@@ -3292,6 +3345,20 @@ static int tcpm_copy_pdos(u32 *dest_pdo, const u32 *src_pdo,
 	return nr_pdo;
 }
 
+static int tcpm_copy_vdos(u32 *dest_vdo, const u32 *src_vdo,
+			  unsigned int nr_vdo)
+{
+	unsigned int i;
+
+	if (nr_vdo > VDO_MAX_OBJECTS)
+		nr_vdo = VDO_MAX_OBJECTS;
+
+	for (i = 0; i < nr_vdo; i++)
+		dest_vdo[i] = src_vdo[i];
+
+	return nr_vdo;
+}
+
 void tcpm_update_source_capabilities(struct tcpm_port *port, const u32 *pdo,
 				     unsigned int nr_pdo)
 {
@@ -3382,6 +3449,8 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 					  tcpc->config->nr_src_pdo);
 	port->nr_snk_pdo = tcpm_copy_pdos(port->snk_pdo, tcpc->config->snk_pdo,
 					  tcpc->config->nr_snk_pdo);
+	port->nr_snk_vdo = tcpm_copy_vdos(port->snk_vdo, tcpc->config->snk_vdo,
+					  tcpc->config->nr_snk_vdo);
 
 	port->max_snk_mv = tcpc->config->max_snk_mv;
 	port->max_snk_ma = tcpc->config->max_snk_ma;
diff --git a/drivers/staging/typec/tcpm.h b/drivers/staging/typec/tcpm.h
index 969b365e6549..19c307d31a5a 100644
--- a/drivers/staging/typec/tcpm.h
+++ b/drivers/staging/typec/tcpm.h
@@ -60,6 +60,9 @@ struct tcpc_config {
 	const u32 *snk_pdo;
 	unsigned int nr_snk_pdo;
 
+	const u32 *snk_vdo;
+	unsigned int nr_snk_vdo;
+
 	unsigned int max_snk_mv;
 	unsigned int max_snk_ma;
 	unsigned int max_snk_mw;
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
index 988ee61fb4a7..d04db3f55519 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
@@ -502,8 +502,15 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
 	 */
 	sg_init_table(scatterlist, num_pages);
 	/* Now set the pages for each scatterlist */
-	for (i = 0; i < num_pages; i++)
-		sg_set_page(scatterlist + i, pages[i], PAGE_SIZE, 0);
+	for (i = 0; i < num_pages; i++)	{
+		unsigned int len = PAGE_SIZE - offset;
+
+		if (len > count)
+			len = count;
+		sg_set_page(scatterlist + i, pages[i], len, offset);
+		offset = 0;
+		count -= len;
+	}
 
 	dma_buffers = dma_map_sg(g_dev,
 				 scatterlist,
@@ -524,20 +531,20 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
 		u32 addr = sg_dma_address(sg);
 
 		/* Note: addrs is the address + page_count - 1
-		 * The firmware expects the block to be page
+		 * The firmware expects blocks after the first to be page-
 		 * aligned and a multiple of the page size
 		 */
 		WARN_ON(len == 0);
-		WARN_ON(len & ~PAGE_MASK);
-		WARN_ON(addr & ~PAGE_MASK);
+		WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK));
+		WARN_ON(i && (addr & ~PAGE_MASK));
 		if (k > 0 &&
-		    ((addrs[k - 1] & PAGE_MASK) |
-			((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT)
-		    == addr) {
-			addrs[k - 1] += (len >> PAGE_SHIFT);
-		} else {
-			addrs[k++] = addr | ((len >> PAGE_SHIFT) - 1);
-		}
+		    ((addrs[k - 1] & PAGE_MASK) +
+		     (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT))
+		    == (addr & PAGE_MASK))
+			addrs[k - 1] += ((len + PAGE_SIZE - 1) >> PAGE_SHIFT);
+		else
+			addrs[k++] = (addr & PAGE_MASK) |
+				(((len + PAGE_SIZE - 1) >> PAGE_SHIFT) - 1);
 	}
 
 	/* Partial cache lines (fragments) require special measures */
diff --git a/drivers/staging/vme/devices/vme_pio2_core.c b/drivers/staging/vme/devices/vme_pio2_core.c
index 20a2d835fdaa..367535b4b77f 100644
--- a/drivers/staging/vme/devices/vme_pio2_core.c
+++ b/drivers/staging/vme/devices/vme_pio2_core.c
@@ -466,16 +466,16 @@ static void __exit pio2_exit(void)
 
 /* These are required for each board */
 MODULE_PARM_DESC(bus, "Enumeration of VMEbus to which the board is connected");
-module_param_array(bus, int, &bus_num, 0444);
+module_param_hw_array(bus, int, other, &bus_num, 0444);
 
 MODULE_PARM_DESC(base, "Base VME address for PIO2 Registers");
-module_param_array(base, long, &base_num, 0444);
+module_param_hw_array(base, long, other, &base_num, 0444);
 
 MODULE_PARM_DESC(vector, "VME IRQ Vector (Lower 4 bits masked)");
-module_param_array(vector, int, &vector_num, 0444);
+module_param_hw_array(vector, int, other, &vector_num, 0444);
 
 MODULE_PARM_DESC(level, "VME IRQ Level");
-module_param_array(level, int, &level_num, 0444);
+module_param_hw_array(level, int, other, &level_num, 0444);
 
 MODULE_PARM_DESC(variant, "Last 4 characters of PIO2 board variant");
 module_param_array(variant, charp, &variant_num, 0444);
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index e3f9ed3690b7..3fdca2cdd8da 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -128,11 +128,9 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf)
 		return ERR_PTR(-EINVAL);
 	}
 
-	tiqn = kzalloc(sizeof(struct iscsi_tiqn), GFP_KERNEL);
-	if (!tiqn) {
-		pr_err("Unable to allocate struct iscsi_tiqn\n");
+	tiqn = kzalloc(sizeof(*tiqn), GFP_KERNEL);
+	if (!tiqn)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	sprintf(tiqn->tiqn, "%s", buf);
 	INIT_LIST_HEAD(&tiqn->tiqn_list);
@@ -362,9 +360,8 @@ struct iscsi_np *iscsit_add_np(
 		return np;
 	}
 
-	np = kzalloc(sizeof(struct iscsi_np), GFP_KERNEL);
+	np = kzalloc(sizeof(*np), GFP_KERNEL);
 	if (!np) {
-		pr_err("Unable to allocate memory for struct iscsi_np\n");
 		mutex_unlock(&np_lock);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -696,12 +693,10 @@ static int __init iscsi_target_init_module(void)
 	int ret = 0, size;
 
 	pr_debug("iSCSI-Target "ISCSIT_VERSION"\n");
-
-	iscsit_global = kzalloc(sizeof(struct iscsit_global), GFP_KERNEL);
-	if (!iscsit_global) {
-		pr_err("Unable to allocate memory for iscsit_global\n");
+	iscsit_global = kzalloc(sizeof(*iscsit_global), GFP_KERNEL);
+	if (!iscsit_global)
 		return -1;
-	}
+
 	spin_lock_init(&iscsit_global->ts_bitmap_lock);
 	mutex_init(&auth_id_lock);
 	spin_lock_init(&sess_idr_lock);
@@ -714,10 +709,8 @@ static int __init iscsi_target_init_module(void)
 
 	size = BITS_TO_LONGS(ISCSIT_BITMAP_BITS) * sizeof(long);
 	iscsit_global->ts_bitmap = vzalloc(size);
-	if (!iscsit_global->ts_bitmap) {
-		pr_err("Unable to allocate iscsit_global->ts_bitmap\n");
+	if (!iscsit_global->ts_bitmap)
 		goto configfs_out;
-	}
 
 	lio_qr_cache = kmem_cache_create("lio_qr_cache",
 			sizeof(struct iscsi_queue_req),
@@ -984,12 +977,9 @@ static int iscsit_allocate_iovecs(struct iscsi_cmd *cmd)
 	u32 iov_count = max(1UL, DIV_ROUND_UP(cmd->se_cmd.data_length, PAGE_SIZE));
 
 	iov_count += ISCSI_IOV_DATA_BUFFER;
-
-	cmd->iov_data = kzalloc(iov_count * sizeof(struct kvec), GFP_KERNEL);
-	if (!cmd->iov_data) {
-		pr_err("Unable to allocate cmd->iov_data\n");
+	cmd->iov_data = kcalloc(iov_count, sizeof(*cmd->iov_data), GFP_KERNEL);
+	if (!cmd->iov_data)
 		return -ENOMEM;
-	}
 
 	cmd->orig_iov_data_count = iov_count;
 	return 0;
@@ -1289,6 +1279,18 @@ iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr,
 	 */
 	if (dump_payload)
 		goto after_immediate_data;
+	/*
+	 * Check for underflow case where both EDTL and immediate data payload
+	 * exceeds what is presented by CDB's TRANSFER LENGTH, and what has
+	 * already been set in target_cmd_size_check() as se_cmd->data_length.
+	 *
+	 * For this special case, fail the command and dump the immediate data
+	 * payload.
+	 */
+	if (cmd->first_burst_len > cmd->se_cmd.data_length) {
+		cmd->sense_reason = TCM_INVALID_CDB_FIELD;
+		goto after_immediate_data;
+	}
 
 	immed_ret = iscsit_handle_immediate_data(cmd, hdr,
 					cmd->first_burst_len);
@@ -1850,8 +1852,6 @@ static int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 
 		ping_data = kzalloc(payload_length + 1, GFP_KERNEL);
 		if (!ping_data) {
-			pr_err("Unable to allocate memory for"
-				" NOPOUT ping data.\n");
 			ret = -1;
 			goto out;
 		}
@@ -1997,15 +1997,11 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 		hdr->refcmdsn = cpu_to_be32(ISCSI_RESERVED_TAG);
 
 	cmd->data_direction = DMA_NONE;
-
-	cmd->tmr_req = kzalloc(sizeof(struct iscsi_tmr_req), GFP_KERNEL);
-	if (!cmd->tmr_req) {
-		pr_err("Unable to allocate memory for"
-			" Task Management command!\n");
+	cmd->tmr_req = kzalloc(sizeof(*cmd->tmr_req), GFP_KERNEL);
+	if (!cmd->tmr_req)
 		return iscsit_add_reject_cmd(cmd,
 					     ISCSI_REASON_BOOKMARK_NO_RESOURCES,
 					     buf);
-	}
 
 	/*
 	 * TASK_REASSIGN for ERL=2 / connection stays inside of
@@ -2265,11 +2261,9 @@ iscsit_handle_text_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 		struct kvec iov[3];
 
 		text_in = kzalloc(payload_length, GFP_KERNEL);
-		if (!text_in) {
-			pr_err("Unable to allocate memory for"
-				" incoming text parameters\n");
+		if (!text_in)
 			goto reject;
-		}
+
 		cmd->text_in_ptr = text_in;
 
 		memset(iov, 0, 3 * sizeof(struct kvec));
@@ -3353,11 +3347,9 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd,
 			 SENDTARGETS_BUF_LIMIT);
 
 	payload = kzalloc(buffer_len, GFP_KERNEL);
-	if (!payload) {
-		pr_err("Unable to allocate memory for sendtargets"
-				" response.\n");
+	if (!payload)
 		return -ENOMEM;
-	}
+
 	/*
 	 * Locate pointer to iqn./eui. string for ICF_SENDTARGETS_SINGLE
 	 * explicit case..
@@ -3810,6 +3802,8 @@ int iscsi_target_tx_thread(void *arg)
 {
 	int ret = 0;
 	struct iscsi_conn *conn = arg;
+	bool conn_freed = false;
+
 	/*
 	 * Allow ourselves to be interrupted by SIGINT so that a
 	 * connection recovery / failure event can be triggered externally.
@@ -3835,12 +3829,14 @@ get_immediate:
 			goto transport_err;
 
 		ret = iscsit_handle_response_queue(conn);
-		if (ret == 1)
+		if (ret == 1) {
 			goto get_immediate;
-		else if (ret == -ECONNRESET)
+		} else if (ret == -ECONNRESET) {
+			conn_freed = true;
 			goto out;
-		else if (ret < 0)
+		} else if (ret < 0) {
 			goto transport_err;
+		}
 	}
 
 transport_err:
@@ -3850,8 +3846,13 @@ transport_err:
 	 * responsible for cleaning up the early connection failure.
 	 */
 	if (conn->conn_state != TARG_CONN_STATE_IN_LOGIN)
-		iscsit_take_action_for_connection_exit(conn);
+		iscsit_take_action_for_connection_exit(conn, &conn_freed);
 out:
+	if (!conn_freed) {
+		while (!kthread_should_stop()) {
+			msleep(100);
+		}
+	}
 	return 0;
 }
 
@@ -4024,6 +4025,7 @@ int iscsi_target_rx_thread(void *arg)
 {
 	int rc;
 	struct iscsi_conn *conn = arg;
+	bool conn_freed = false;
 
 	/*
 	 * Allow ourselves to be interrupted by SIGINT so that a
@@ -4036,7 +4038,7 @@ int iscsi_target_rx_thread(void *arg)
 	 */
 	rc = wait_for_completion_interruptible(&conn->rx_login_comp);
 	if (rc < 0 || iscsi_target_check_conn_state(conn))
-		return 0;
+		goto out;
 
 	if (!conn->conn_transport->iscsit_get_rx_pdu)
 		return 0;
@@ -4045,7 +4047,15 @@ int iscsi_target_rx_thread(void *arg)
 
 	if (!signal_pending(current))
 		atomic_set(&conn->transport_failed, 1);
-	iscsit_take_action_for_connection_exit(conn);
+	iscsit_take_action_for_connection_exit(conn, &conn_freed);
+
+out:
+	if (!conn_freed) {
+		while (!kthread_should_stop()) {
+			msleep(100);
+		}
+	}
+
 	return 0;
 }
 
@@ -4425,8 +4435,11 @@ static void iscsit_logout_post_handler_closesession(
 	 * always sleep waiting for RX/TX thread shutdown to complete
 	 * within iscsit_close_connection().
 	 */
-	if (!conn->conn_transport->rdma_shutdown)
+	if (!conn->conn_transport->rdma_shutdown) {
 		sleep = cmpxchg(&conn->tx_thread_active, true, false);
+		if (!sleep)
+			return;
+	}
 
 	atomic_set(&conn->conn_logout_remove, 0);
 	complete(&conn->conn_logout_comp);
@@ -4442,8 +4455,11 @@ static void iscsit_logout_post_handler_samecid(
 {
 	int sleep = 1;
 
-	if (!conn->conn_transport->rdma_shutdown)
+	if (!conn->conn_transport->rdma_shutdown) {
 		sleep = cmpxchg(&conn->tx_thread_active, true, false);
+		if (!sleep)
+			return;
+	}
 
 	atomic_set(&conn->conn_logout_remove, 0);
 	complete(&conn->conn_logout_comp);
@@ -4683,6 +4699,7 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force)
 			continue;
 		}
 		atomic_set(&sess->session_reinstatement, 1);
+		atomic_set(&sess->session_fall_back_to_erl0, 1);
 		spin_unlock(&sess->conn_lock);
 
 		list_move_tail(&se_sess->sess_list, &free_list);
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 5798810197ec..535a8e06a401 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -1506,6 +1506,7 @@ static void lio_tpg_close_session(struct se_session *se_sess)
 		return;
 	}
 	atomic_set(&sess->session_reinstatement, 1);
+	atomic_set(&sess->session_fall_back_to_erl0, 1);
 	spin_unlock(&sess->conn_lock);
 
 	iscsit_stop_time2retain_timer(sess);
diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c
index 9a96e17bf7cd..7fe2aa73cff6 100644
--- a/drivers/target/iscsi/iscsi_target_erl0.c
+++ b/drivers/target/iscsi/iscsi_target_erl0.c
@@ -930,8 +930,10 @@ static void iscsit_handle_connection_cleanup(struct iscsi_conn *conn)
 	}
 }
 
-void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn)
+void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn, bool *conn_freed)
 {
+	*conn_freed = false;
+
 	spin_lock_bh(&conn->state_lock);
 	if (atomic_read(&conn->connection_exit)) {
 		spin_unlock_bh(&conn->state_lock);
@@ -942,6 +944,7 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn)
 	if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) {
 		spin_unlock_bh(&conn->state_lock);
 		iscsit_close_connection(conn);
+		*conn_freed = true;
 		return;
 	}
 
@@ -955,4 +958,5 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn)
 	spin_unlock_bh(&conn->state_lock);
 
 	iscsit_handle_connection_cleanup(conn);
+	*conn_freed = true;
 }
diff --git a/drivers/target/iscsi/iscsi_target_erl0.h b/drivers/target/iscsi/iscsi_target_erl0.h
index 60e69e2af6ed..3822d9cd1230 100644
--- a/drivers/target/iscsi/iscsi_target_erl0.h
+++ b/drivers/target/iscsi/iscsi_target_erl0.h
@@ -15,6 +15,6 @@ extern int iscsit_stop_time2retain_timer(struct iscsi_session *);
 extern void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *);
 extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int);
 extern void iscsit_fall_back_to_erl0(struct iscsi_session *);
-extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *);
+extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *, bool *);
 
 #endif   /*** ISCSI_TARGET_ERL0_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index ad8f3011bdc2..92b96b51d506 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -208,6 +208,7 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn)
 			    initiatorname_param->value) &&
 		   (sess_p->sess_ops->SessionType == sessiontype))) {
 			atomic_set(&sess_p->session_reinstatement, 1);
+			atomic_set(&sess_p->session_fall_back_to_erl0, 1);
 			spin_unlock(&sess_p->conn_lock);
 			iscsit_inc_session_usage_count(sess_p);
 			iscsit_stop_time2retain_timer(sess_p);
@@ -1463,5 +1464,9 @@ int iscsi_target_login_thread(void *arg)
 			break;
 	}
 
+	while (!kthread_should_stop()) {
+		msleep(100);
+	}
+
 	return 0;
 }
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 7ccc9c1cbfd1..6f88b31242b0 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -493,14 +493,60 @@ static void iscsi_target_restore_sock_callbacks(struct iscsi_conn *conn)
 
 static int iscsi_target_do_login(struct iscsi_conn *, struct iscsi_login *);
 
-static bool iscsi_target_sk_state_check(struct sock *sk)
+static bool __iscsi_target_sk_check_close(struct sock *sk)
 {
 	if (sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) {
-		pr_debug("iscsi_target_sk_state_check: TCP_CLOSE_WAIT|TCP_CLOSE,"
+		pr_debug("__iscsi_target_sk_check_close: TCP_CLOSE_WAIT|TCP_CLOSE,"
 			"returning FALSE\n");
-		return false;
+		return true;
 	}
-	return true;
+	return false;
+}
+
+static bool iscsi_target_sk_check_close(struct iscsi_conn *conn)
+{
+	bool state = false;
+
+	if (conn->sock) {
+		struct sock *sk = conn->sock->sk;
+
+		read_lock_bh(&sk->sk_callback_lock);
+		state = (__iscsi_target_sk_check_close(sk) ||
+			 test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags));
+		read_unlock_bh(&sk->sk_callback_lock);
+	}
+	return state;
+}
+
+static bool iscsi_target_sk_check_flag(struct iscsi_conn *conn, unsigned int flag)
+{
+	bool state = false;
+
+	if (conn->sock) {
+		struct sock *sk = conn->sock->sk;
+
+		read_lock_bh(&sk->sk_callback_lock);
+		state = test_bit(flag, &conn->login_flags);
+		read_unlock_bh(&sk->sk_callback_lock);
+	}
+	return state;
+}
+
+static bool iscsi_target_sk_check_and_clear(struct iscsi_conn *conn, unsigned int flag)
+{
+	bool state = false;
+
+	if (conn->sock) {
+		struct sock *sk = conn->sock->sk;
+
+		write_lock_bh(&sk->sk_callback_lock);
+		state = (__iscsi_target_sk_check_close(sk) ||
+			 test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags));
+		if (!state)
+			clear_bit(flag, &conn->login_flags);
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+	return state;
 }
 
 static void iscsi_target_login_drop(struct iscsi_conn *conn, struct iscsi_login *login)
@@ -540,6 +586,20 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
 
 	pr_debug("entering iscsi_target_do_login_rx, conn: %p, %s:%d\n",
 			conn, current->comm, current->pid);
+	/*
+	 * If iscsi_target_do_login_rx() has been invoked by ->sk_data_ready()
+	 * before initial PDU processing in iscsi_target_start_negotiation()
+	 * has completed, go ahead and retry until it's cleared.
+	 *
+	 * Otherwise if the TCP connection drops while this is occuring,
+	 * iscsi_target_start_negotiation() will detect the failure, call
+	 * cancel_delayed_work_sync(&conn->login_work), and cleanup the
+	 * remaining iscsi connection resources from iscsi_np process context.
+	 */
+	if (iscsi_target_sk_check_flag(conn, LOGIN_FLAGS_INITIAL_PDU)) {
+		schedule_delayed_work(&conn->login_work, msecs_to_jiffies(10));
+		return;
+	}
 
 	spin_lock(&tpg->tpg_state_lock);
 	state = (tpg->tpg_state == TPG_STATE_ACTIVE);
@@ -547,26 +607,12 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
 
 	if (!state) {
 		pr_debug("iscsi_target_do_login_rx: tpg_state != TPG_STATE_ACTIVE\n");
-		iscsi_target_restore_sock_callbacks(conn);
-		iscsi_target_login_drop(conn, login);
-		iscsit_deaccess_np(np, tpg, tpg_np);
-		return;
+		goto err;
 	}
 
-	if (conn->sock) {
-		struct sock *sk = conn->sock->sk;
-
-		read_lock_bh(&sk->sk_callback_lock);
-		state = iscsi_target_sk_state_check(sk);
-		read_unlock_bh(&sk->sk_callback_lock);
-
-		if (!state) {
-			pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n");
-			iscsi_target_restore_sock_callbacks(conn);
-			iscsi_target_login_drop(conn, login);
-			iscsit_deaccess_np(np, tpg, tpg_np);
-			return;
-		}
+	if (iscsi_target_sk_check_close(conn)) {
+		pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n");
+		goto err;
 	}
 
 	conn->login_kworker = current;
@@ -584,34 +630,29 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
 	flush_signals(current);
 	conn->login_kworker = NULL;
 
-	if (rc < 0) {
-		iscsi_target_restore_sock_callbacks(conn);
-		iscsi_target_login_drop(conn, login);
-		iscsit_deaccess_np(np, tpg, tpg_np);
-		return;
-	}
+	if (rc < 0)
+		goto err;
 
 	pr_debug("iscsi_target_do_login_rx after rx_login_io, %p, %s:%d\n",
 			conn, current->comm, current->pid);
 
 	rc = iscsi_target_do_login(conn, login);
 	if (rc < 0) {
-		iscsi_target_restore_sock_callbacks(conn);
-		iscsi_target_login_drop(conn, login);
-		iscsit_deaccess_np(np, tpg, tpg_np);
+		goto err;
 	} else if (!rc) {
-		if (conn->sock) {
-			struct sock *sk = conn->sock->sk;
-
-			write_lock_bh(&sk->sk_callback_lock);
-			clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags);
-			write_unlock_bh(&sk->sk_callback_lock);
-		}
+		if (iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_READ_ACTIVE))
+			goto err;
 	} else if (rc == 1) {
 		iscsi_target_nego_release(conn);
 		iscsi_post_login_handler(np, conn, zero_tsih);
 		iscsit_deaccess_np(np, tpg, tpg_np);
 	}
+	return;
+
+err:
+	iscsi_target_restore_sock_callbacks(conn);
+	iscsi_target_login_drop(conn, login);
+	iscsit_deaccess_np(np, tpg, tpg_np);
 }
 
 static void iscsi_target_do_cleanup(struct work_struct *work)
@@ -659,31 +700,54 @@ static void iscsi_target_sk_state_change(struct sock *sk)
 		orig_state_change(sk);
 		return;
 	}
+	state = __iscsi_target_sk_check_close(sk);
+	pr_debug("__iscsi_target_sk_close_change: state: %d\n", state);
+
 	if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) {
 		pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1 sk_state_change"
 			 " conn: %p\n", conn);
+		if (state)
+			set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags);
 		write_unlock_bh(&sk->sk_callback_lock);
 		orig_state_change(sk);
 		return;
 	}
-	if (test_and_set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) {
+	if (test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) {
 		pr_debug("Got LOGIN_FLAGS_CLOSED=1 sk_state_change conn: %p\n",
 			 conn);
 		write_unlock_bh(&sk->sk_callback_lock);
 		orig_state_change(sk);
 		return;
 	}
+	/*
+	 * If the TCP connection has dropped, go ahead and set LOGIN_FLAGS_CLOSED,
+	 * but only queue conn->login_work -> iscsi_target_do_login_rx()
+	 * processing if LOGIN_FLAGS_INITIAL_PDU has already been cleared.
+	 *
+	 * When iscsi_target_do_login_rx() runs, iscsi_target_sk_check_close()
+	 * will detect the dropped TCP connection from delayed workqueue context.
+	 *
+	 * If LOGIN_FLAGS_INITIAL_PDU is still set, which means the initial
+	 * iscsi_target_start_negotiation() is running, iscsi_target_do_login()
+	 * via iscsi_target_sk_check_close() or iscsi_target_start_negotiation()
+	 * via iscsi_target_sk_check_and_clear() is responsible for detecting the
+	 * dropped TCP connection in iscsi_np process context, and cleaning up
+	 * the remaining iscsi connection resources.
+	 */
+	if (state) {
+		pr_debug("iscsi_target_sk_state_change got failed state\n");
+		set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags);
+		state = test_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags);
+		write_unlock_bh(&sk->sk_callback_lock);
 
-	state = iscsi_target_sk_state_check(sk);
-	write_unlock_bh(&sk->sk_callback_lock);
-
-	pr_debug("iscsi_target_sk_state_change: state: %d\n", state);
+		orig_state_change(sk);
 
-	if (!state) {
-		pr_debug("iscsi_target_sk_state_change got failed state\n");
-		schedule_delayed_work(&conn->login_cleanup_work, 0);
+		if (!state)
+			schedule_delayed_work(&conn->login_work, 0);
 		return;
 	}
+	write_unlock_bh(&sk->sk_callback_lock);
+
 	orig_state_change(sk);
 }
 
@@ -946,6 +1010,15 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo
 			if (iscsi_target_handle_csg_one(conn, login) < 0)
 				return -1;
 			if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) {
+				/*
+				 * Check to make sure the TCP connection has not
+				 * dropped asynchronously while session reinstatement
+				 * was occuring in this kthread context, before
+				 * transitioning to full feature phase operation.
+				 */
+				if (iscsi_target_sk_check_close(conn))
+					return -1;
+
 				login->tsih = conn->sess->tsih;
 				login->login_complete = 1;
 				iscsi_target_restore_sock_callbacks(conn);
@@ -972,21 +1045,6 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo
 		break;
 	}
 
-	if (conn->sock) {
-		struct sock *sk = conn->sock->sk;
-		bool state;
-
-		read_lock_bh(&sk->sk_callback_lock);
-		state = iscsi_target_sk_state_check(sk);
-		read_unlock_bh(&sk->sk_callback_lock);
-
-		if (!state) {
-			pr_debug("iscsi_target_do_login() failed state for"
-				 " conn: %p\n", conn);
-			return -1;
-		}
-	}
-
 	return 0;
 }
 
@@ -1255,10 +1313,22 @@ int iscsi_target_start_negotiation(
 
 		write_lock_bh(&sk->sk_callback_lock);
 		set_bit(LOGIN_FLAGS_READY, &conn->login_flags);
+		set_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags);
 		write_unlock_bh(&sk->sk_callback_lock);
 	}
-
+	/*
+	 * If iscsi_target_do_login returns zero to signal more PDU
+	 * exchanges are required to complete the login, go ahead and
+	 * clear LOGIN_FLAGS_INITIAL_PDU but only if the TCP connection
+	 * is still active.
+	 *
+	 * Otherwise if TCP connection dropped asynchronously, go ahead
+	 * and perform connection cleanup now.
+	 */
 	ret = iscsi_target_do_login(conn, login);
+	if (!ret && iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_INITIAL_PDU))
+		ret = -1;
+
 	if (ret < 0) {
 		cancel_delayed_work_sync(&conn->login_work);
 		cancel_delayed_work_sync(&conn->login_cleanup_work);
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 70657fd56440..0326607e5ab8 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -533,6 +533,7 @@ DEF_CONFIGFS_ATTRIB_SHOW(emulate_3pc);
 DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_type);
 DEF_CONFIGFS_ATTRIB_SHOW(hw_pi_prot_type);
 DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_format);
+DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_verify);
 DEF_CONFIGFS_ATTRIB_SHOW(enforce_pr_isids);
 DEF_CONFIGFS_ATTRIB_SHOW(is_nonrot);
 DEF_CONFIGFS_ATTRIB_SHOW(emulate_rest_reord);
@@ -823,6 +824,7 @@ static ssize_t pi_prot_type_store(struct config_item *item,
 		ret = dev->transport->init_prot(dev);
 		if (ret) {
 			da->pi_prot_type = old_prot;
+			da->pi_prot_verify = (bool) da->pi_prot_type;
 			return ret;
 		}
 
@@ -830,6 +832,7 @@ static ssize_t pi_prot_type_store(struct config_item *item,
 		dev->transport->free_prot(dev);
 	}
 
+	da->pi_prot_verify = (bool) da->pi_prot_type;
 	pr_debug("dev[%p]: SE Device Protection Type: %d\n", dev, flag);
 	return count;
 }
@@ -872,6 +875,35 @@ static ssize_t pi_prot_format_store(struct config_item *item,
 	return count;
 }
 
+static ssize_t pi_prot_verify_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct se_dev_attrib *da = to_attrib(item);
+	bool flag;
+	int ret;
+
+	ret = strtobool(page, &flag);
+	if (ret < 0)
+		return ret;
+
+	if (!flag) {
+		da->pi_prot_verify = flag;
+		return count;
+	}
+	if (da->hw_pi_prot_type) {
+		pr_warn("DIF protection enabled on underlying hardware,"
+			" ignoring\n");
+		return count;
+	}
+	if (!da->pi_prot_type) {
+		pr_warn("DIF protection not supported by backend, ignoring\n");
+		return count;
+	}
+	da->pi_prot_verify = flag;
+
+	return count;
+}
+
 static ssize_t force_pr_aptpl_store(struct config_item *item,
 		const char *page, size_t count)
 {
@@ -1067,6 +1099,7 @@ CONFIGFS_ATTR(, emulate_3pc);
 CONFIGFS_ATTR(, pi_prot_type);
 CONFIGFS_ATTR_RO(, hw_pi_prot_type);
 CONFIGFS_ATTR(, pi_prot_format);
+CONFIGFS_ATTR(, pi_prot_verify);
 CONFIGFS_ATTR(, enforce_pr_isids);
 CONFIGFS_ATTR(, is_nonrot);
 CONFIGFS_ATTR(, emulate_rest_reord);
@@ -1104,6 +1137,7 @@ struct configfs_attribute *sbc_attrib_attrs[] = {
 	&attr_pi_prot_type,
 	&attr_hw_pi_prot_type,
 	&attr_pi_prot_format,
+	&attr_pi_prot_verify,
 	&attr_enforce_pr_isids,
 	&attr_is_nonrot,
 	&attr_emulate_rest_reord,
@@ -1366,7 +1400,7 @@ static ssize_t target_pr_res_holder_show(struct config_item *item, char *page)
 	struct se_device *dev = pr_to_dev(item);
 	int ret;
 
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
 		return sprintf(page, "Passthrough\n");
 
 	spin_lock(&dev->dev_reservation_lock);
@@ -1506,7 +1540,7 @@ static ssize_t target_pr_res_type_show(struct config_item *item, char *page)
 {
 	struct se_device *dev = pr_to_dev(item);
 
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
 		return sprintf(page, "SPC_PASSTHROUGH\n");
 	else if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
 		return sprintf(page, "SPC2_RESERVATIONS\n");
@@ -1519,7 +1553,7 @@ static ssize_t target_pr_res_aptpl_active_show(struct config_item *item,
 {
 	struct se_device *dev = pr_to_dev(item);
 
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
 		return 0;
 
 	return sprintf(page, "APTPL Bit Status: %s\n",
@@ -1531,7 +1565,7 @@ static ssize_t target_pr_res_aptpl_metadata_show(struct config_item *item,
 {
 	struct se_device *dev = pr_to_dev(item);
 
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
 		return 0;
 
 	return sprintf(page, "Ready to process PR APTPL metadata..\n");
@@ -1577,7 +1611,7 @@ static ssize_t target_pr_res_aptpl_metadata_store(struct config_item *item,
 	u16 tpgt = 0;
 	u8 type = 0;
 
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
 		return count;
 	if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
 		return count;
@@ -2511,7 +2545,7 @@ static ssize_t target_tg_pt_gp_alua_support_##_name##_store(		\
 	int ret;							\
 									\
 	if (!t->tg_pt_gp_valid_id) {					\
-		pr_err("Unable to do set ##_name ALUA state on non"	\
+		pr_err("Unable to do set " #_name " ALUA state on non"	\
 		       " valid tg_pt_gp ID: %hu\n",			\
 		       t->tg_pt_gp_valid_id);				\
 		return -EINVAL;						\
@@ -2643,13 +2677,13 @@ static ssize_t target_tg_pt_gp_tg_pt_gp_id_store(struct config_item *item,
 
 	ret = kstrtoul(page, 0, &tg_pt_gp_id);
 	if (ret < 0) {
-		pr_err("kstrtoul() returned %d for"
-			" tg_pt_gp_id\n", ret);
+		pr_err("ALUA tg_pt_gp_id: invalid value '%s' for tg_pt_gp_id\n",
+		       page);
 		return ret;
 	}
 	if (tg_pt_gp_id > 0x0000ffff) {
-		pr_err("ALUA tg_pt_gp_id: %lu exceeds maximum:"
-			" 0x0000ffff\n", tg_pt_gp_id);
+		pr_err("ALUA tg_pt_gp_id: %lu exceeds maximum: 0x0000ffff\n",
+		       tg_pt_gp_id);
 		return -EINVAL;
 	}
 
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index d2f089cfa9ae..8add07f387f9 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -1045,6 +1045,8 @@ passthrough_parse_cdb(struct se_cmd *cmd,
 	sense_reason_t (*exec_cmd)(struct se_cmd *cmd))
 {
 	unsigned char *cdb = cmd->t_task_cdb;
+	struct se_device *dev = cmd->se_dev;
+	unsigned int size;
 
 	/*
 	 * Clear a lun set in the cdb if the initiator talking to use spoke
@@ -1076,6 +1078,42 @@ passthrough_parse_cdb(struct se_cmd *cmd,
 		return TCM_NO_SENSE;
 	}
 
+	/*
+	 * For PERSISTENT RESERVE IN/OUT, RELEASE, and RESERVE we need to
+	 * emulate the response, since tcmu does not have the information
+	 * required to process these commands.
+	 */
+	if (!(dev->transport->transport_flags &
+	      TRANSPORT_FLAG_PASSTHROUGH_PGR)) {
+		if (cdb[0] == PERSISTENT_RESERVE_IN) {
+			cmd->execute_cmd = target_scsi3_emulate_pr_in;
+			size = (cdb[7] << 8) + cdb[8];
+			return target_cmd_size_check(cmd, size);
+		}
+		if (cdb[0] == PERSISTENT_RESERVE_OUT) {
+			cmd->execute_cmd = target_scsi3_emulate_pr_out;
+			size = (cdb[7] << 8) + cdb[8];
+			return target_cmd_size_check(cmd, size);
+		}
+
+		if (cdb[0] == RELEASE || cdb[0] == RELEASE_10) {
+			cmd->execute_cmd = target_scsi2_reservation_release;
+			if (cdb[0] == RELEASE_10)
+				size = (cdb[7] << 8) | cdb[8];
+			else
+				size = cmd->data_length;
+			return target_cmd_size_check(cmd, size);
+		}
+		if (cdb[0] == RESERVE || cdb[0] == RESERVE_10) {
+			cmd->execute_cmd = target_scsi2_reservation_reserve;
+			if (cdb[0] == RESERVE_10)
+				size = (cdb[7] << 8) | cdb[8];
+			else
+				size = cmd->data_length;
+			return target_cmd_size_check(cmd, size);
+		}
+	}
+
 	/* Set DATA_CDB flag for ops that should have it */
 	switch (cdb[0]) {
 	case READ_6:
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 87aa376a1a1a..73b8f93a5fef 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -277,12 +277,11 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
 	else
 		ret = vfs_iter_read(fd, &iter, &pos);
 
-	kfree(bvec);
-
 	if (is_write) {
 		if (ret < 0 || ret != data_length) {
 			pr_err("%s() write returned %d\n", __func__, ret);
-			return (ret < 0 ? ret : -EINVAL);
+			if (ret >= 0)
+				ret = -EINVAL;
 		}
 	} else {
 		/*
@@ -295,17 +294,29 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
 				pr_err("%s() returned %d, expecting %u for "
 						"S_ISBLK\n", __func__, ret,
 						data_length);
-				return (ret < 0 ? ret : -EINVAL);
+				if (ret >= 0)
+					ret = -EINVAL;
 			}
 		} else {
 			if (ret < 0) {
 				pr_err("%s() returned %d for non S_ISBLK\n",
 						__func__, ret);
-				return ret;
+			} else if (ret != data_length) {
+				/*
+				 * Short read case:
+				 * Probably some one truncate file under us.
+				 * We must explicitly zero sg-pages to prevent
+				 * expose uninizialized pages to userspace.
+				 */
+				if (ret < data_length)
+					ret += iov_iter_zero(data_length - ret, &iter);
+				else
+					ret = -EINVAL;
 			}
 		}
 	}
-	return 1;
+	kfree(bvec);
+	return ret;
 }
 
 static sense_reason_t
@@ -543,7 +554,8 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 		ret = fd_do_rw(cmd, file, dev->dev_attrib.block_size,
 			       sgl, sgl_nents, cmd->data_length, 0);
 
-		if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type) {
+		if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type &&
+		    dev->dev_attrib.pi_prot_verify) {
 			u32 sectors = cmd->data_length >>
 					ilog2(dev->dev_attrib.block_size);
 
@@ -553,7 +565,8 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 				return rc;
 		}
 	} else {
-		if (cmd->prot_type && dev->dev_attrib.pi_prot_type) {
+		if (cmd->prot_type && dev->dev_attrib.pi_prot_type &&
+		    dev->dev_attrib.pi_prot_verify) {
 			u32 sectors = cmd->data_length >>
 					ilog2(dev->dev_attrib.block_size);
 
@@ -595,8 +608,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 	if (ret < 0)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-	if (ret)
-		target_complete_cmd(cmd, SAM_STAT_GOOD);
+	target_complete_cmd(cmd, SAM_STAT_GOOD);
 	return 0;
 }
 
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index d316ed537d59..c05d38016556 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -93,7 +93,7 @@ static int iblock_configure_device(struct se_device *dev)
 		return -EINVAL;
 	}
 
-	ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0);
+	ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!ib_dev->ibd_bio_set) {
 		pr_err("IBLOCK: Unable to create bioset\n");
 		goto out;
@@ -279,7 +279,7 @@ static void iblock_complete_cmd(struct se_cmd *cmd)
 	struct iblock_req *ibr = cmd->priv;
 	u8 status;
 
-	if (!atomic_dec_and_test(&ibr->pending))
+	if (!refcount_dec_and_test(&ibr->pending))
 		return;
 
 	if (atomic_read(&ibr->ib_bio_err_cnt))
@@ -296,8 +296,8 @@ static void iblock_bio_done(struct bio *bio)
 	struct se_cmd *cmd = bio->bi_private;
 	struct iblock_req *ibr = cmd->priv;
 
-	if (bio->bi_error) {
-		pr_err("bio error: %p,  err: %d\n", bio, bio->bi_error);
+	if (bio->bi_status) {
+		pr_err("bio error: %p,  err: %d\n", bio, bio->bi_status);
 		/*
 		 * Bump the ib_bio_err_cnt and release bio.
 		 */
@@ -354,11 +354,11 @@ static void iblock_end_io_flush(struct bio *bio)
 {
 	struct se_cmd *cmd = bio->bi_private;
 
-	if (bio->bi_error)
-		pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_error);
+	if (bio->bi_status)
+		pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_status);
 
 	if (cmd) {
-		if (bio->bi_error)
+		if (bio->bi_status)
 			target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION);
 		else
 			target_complete_cmd(cmd, SAM_STAT_GOOD);
@@ -487,7 +487,7 @@ iblock_execute_write_same(struct se_cmd *cmd)
 	bio_list_init(&list);
 	bio_list_add(&list, bio);
 
-	atomic_set(&ibr->pending, 1);
+	refcount_set(&ibr->pending, 1);
 
 	while (sectors) {
 		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
@@ -498,7 +498,7 @@ iblock_execute_write_same(struct se_cmd *cmd)
 			if (!bio)
 				goto fail_put_bios;
 
-			atomic_inc(&ibr->pending);
+			refcount_inc(&ibr->pending);
 			bio_list_add(&list, bio);
 		}
 
@@ -706,7 +706,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 	cmd->priv = ibr;
 
 	if (!sgl_nents) {
-		atomic_set(&ibr->pending, 1);
+		refcount_set(&ibr->pending, 1);
 		iblock_complete_cmd(cmd);
 		return 0;
 	}
@@ -719,7 +719,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 	bio_list_init(&list);
 	bio_list_add(&list, bio);
 
-	atomic_set(&ibr->pending, 2);
+	refcount_set(&ibr->pending, 2);
 	bio_cnt = 1;
 
 	for_each_sg(sgl, sg, sgl_nents, i) {
@@ -740,7 +740,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 			if (!bio)
 				goto fail_put_bios;
 
-			atomic_inc(&ibr->pending);
+			refcount_inc(&ibr->pending);
 			bio_list_add(&list, bio);
 			bio_cnt++;
 		}
diff --git a/drivers/target/target_core_iblock.h b/drivers/target/target_core_iblock.h
index 718d3fcd3e7c..f2a5797217d4 100644
--- a/drivers/target/target_core_iblock.h
+++ b/drivers/target/target_core_iblock.h
@@ -2,6 +2,7 @@
 #define TARGET_CORE_IBLOCK_H
 
 #include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <target/target_core_base.h>
 
 #define IBLOCK_VERSION		"4.0"
@@ -10,7 +11,7 @@
 #define IBLOCK_LBA_SHIFT	9
 
 struct iblock_req {
-	atomic_t pending;
+	refcount_t pending;
 	atomic_t ib_bio_err_cnt;
 } ____cacheline_aligned;
 
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index 9ab7090f7c83..0912de7c0cf8 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -136,7 +136,7 @@ int	init_se_kmem_caches(void);
 void	release_se_kmem_caches(void);
 u32	scsi_get_new_index(scsi_index_t);
 void	transport_subsystem_check_init(void);
-void	transport_cmd_finish_abort(struct se_cmd *, int);
+int	transport_cmd_finish_abort(struct se_cmd *, int);
 unsigned char *transport_dump_cmd_direction(struct se_cmd *);
 void	transport_dump_dev_state(struct se_device *, char *, int *);
 void	transport_dump_dev_info(struct se_device *, struct se_lun *,
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index e18051185846..129ca572673c 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -4147,7 +4147,7 @@ target_check_reservation(struct se_cmd *cmd)
 		return 0;
 	if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)
 		return 0;
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
 		return 0;
 
 	spin_lock(&dev->dev_reservation_lock);
diff --git a/drivers/target/target_core_pr.h b/drivers/target/target_core_pr.h
index 847bd470339c..772f9148e75c 100644
--- a/drivers/target/target_core_pr.h
+++ b/drivers/target/target_core_pr.h
@@ -7,7 +7,7 @@
 /*
  * PERSISTENT_RESERVE_OUT service action codes
  *
- * spc4r17 section 6.14.2 Table 171
+ * spc5r04b section 6.15.2 Table 174
  */
 #define PRO_REGISTER				0x00
 #define PRO_RESERVE				0x01
@@ -17,10 +17,11 @@
 #define PRO_PREEMPT_AND_ABORT			0x05
 #define PRO_REGISTER_AND_IGNORE_EXISTING_KEY	0x06
 #define PRO_REGISTER_AND_MOVE			0x07
+#define PRO_REPLACE_LOST_RESERVATION	0x08
 /*
  * PERSISTENT_RESERVE_IN service action codes
  *
- * spc4r17 section 6.13.1 Table 159
+ * spc5r04b section 6.14.1 Table 162
  */
 #define PRI_READ_KEYS				0x00
 #define PRI_READ_RESERVATION			0x01
@@ -29,13 +30,13 @@
 /*
  * PERSISTENT_RESERVE_ SCOPE field
  *
- * spc4r17 section 6.13.3.3 Table 163
+ * spc5r04b section 6.14.3.2 Table 166
  */
 #define PR_SCOPE_LU_SCOPE			0x00
 /*
  * PERSISTENT_RESERVE_* TYPE field
  *
- * spc4r17 section 6.13.3.4 Table 164
+ * spc5r04b section 6.14.3.3 Table 167
  */
 #define PR_TYPE_WRITE_EXCLUSIVE			0x01
 #define PR_TYPE_EXCLUSIVE_ACCESS		0x03
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index a93d94e68ab5..ceec0211e84e 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -55,7 +55,7 @@ static inline struct pscsi_dev_virt *PSCSI_DEV(struct se_device *dev)
 }
 
 static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd);
-static void pscsi_req_done(struct request *, int);
+static void pscsi_req_done(struct request *, blk_status_t);
 
 /*	pscsi_attach_hba():
  *
@@ -992,8 +992,6 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 		goto fail;
 	}
 
-	scsi_req_init(req);
-
 	if (sgl) {
 		ret = pscsi_map_sg(cmd, sgl, sgl_nents, req);
 		if (ret)
@@ -1045,7 +1043,7 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
 	return 0;
 }
 
-static void pscsi_req_done(struct request *req, int uptodate)
+static void pscsi_req_done(struct request *req, blk_status_t status)
 {
 	struct se_cmd *cmd = req->end_io_data;
 	struct pscsi_plugin_task *pt = cmd->priv;
@@ -1081,7 +1079,8 @@ static const struct target_backend_ops pscsi_ops = {
 	.name			= "pscsi",
 	.owner			= THIS_MODULE,
 	.transport_flags	= TRANSPORT_FLAG_PASSTHROUGH |
-				  TRANSPORT_FLAG_PASSTHROUGH_ALUA,
+				  TRANSPORT_FLAG_PASSTHROUGH_ALUA |
+				  TRANSPORT_FLAG_PASSTHROUGH_PGR,
 	.attach_hba		= pscsi_attach_hba,
 	.detach_hba		= pscsi_detach_hba,
 	.pmode_enable_hba	= pscsi_pmode_enable_hba,
diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index ddc216c9f1f6..20253d04103f 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c
@@ -47,11 +47,9 @@ static int rd_attach_hba(struct se_hba *hba, u32 host_id)
 {
 	struct rd_host *rd_host;
 
-	rd_host = kzalloc(sizeof(struct rd_host), GFP_KERNEL);
-	if (!rd_host) {
-		pr_err("Unable to allocate memory for struct rd_host\n");
+	rd_host = kzalloc(sizeof(*rd_host), GFP_KERNEL);
+	if (!rd_host)
 		return -ENOMEM;
-	}
 
 	rd_host->rd_host_id = host_id;
 
@@ -148,11 +146,8 @@ static int rd_allocate_sgl_table(struct rd_dev *rd_dev, struct rd_dev_sg_table *
 
 		sg = kcalloc(sg_per_table + chain_entry, sizeof(*sg),
 				GFP_KERNEL);
-		if (!sg) {
-			pr_err("Unable to allocate scatterlist array"
-				" for struct rd_dev\n");
+		if (!sg)
 			return -ENOMEM;
-		}
 
 		sg_init_table(sg, sg_per_table + chain_entry);
 
@@ -210,13 +205,9 @@ static int rd_build_device_space(struct rd_dev *rd_dev)
 	total_sg_needed = rd_dev->rd_page_count;
 
 	sg_tables = (total_sg_needed / max_sg_per_table) + 1;
-
-	sg_table = kzalloc(sg_tables * sizeof(struct rd_dev_sg_table), GFP_KERNEL);
-	if (!sg_table) {
-		pr_err("Unable to allocate memory for Ramdisk"
-		       " scatterlist tables\n");
+	sg_table = kcalloc(sg_tables, sizeof(*sg_table), GFP_KERNEL);
+	if (!sg_table)
 		return -ENOMEM;
-	}
 
 	rd_dev->sg_table_array = sg_table;
 	rd_dev->sg_table_count = sg_tables;
@@ -271,13 +262,9 @@ static int rd_build_prot_space(struct rd_dev *rd_dev, int prot_length, int block
 	total_sg_needed = (rd_dev->rd_page_count * prot_length / block_size) + 1;
 
 	sg_tables = (total_sg_needed / max_sg_per_table) + 1;
-
-	sg_table = kzalloc(sg_tables * sizeof(struct rd_dev_sg_table), GFP_KERNEL);
-	if (!sg_table) {
-		pr_err("Unable to allocate memory for Ramdisk protection"
-		       " scatterlist tables\n");
+	sg_table = kcalloc(sg_tables, sizeof(*sg_table), GFP_KERNEL);
+	if (!sg_table)
 		return -ENOMEM;
-	}
 
 	rd_dev->sg_prot_array = sg_table;
 	rd_dev->sg_prot_count = sg_tables;
@@ -298,11 +285,9 @@ static struct se_device *rd_alloc_device(struct se_hba *hba, const char *name)
 	struct rd_dev *rd_dev;
 	struct rd_host *rd_host = hba->hba_ptr;
 
-	rd_dev = kzalloc(sizeof(struct rd_dev), GFP_KERNEL);
-	if (!rd_dev) {
-		pr_err("Unable to allocate memory for struct rd_dev\n");
+	rd_dev = kzalloc(sizeof(*rd_dev), GFP_KERNEL);
+	if (!rd_dev)
 		return NULL;
-	}
 
 	rd_dev->rd_host = rd_host;
 
@@ -410,7 +395,7 @@ static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, bool is_read)
 	u32 prot_offset, prot_page;
 	u32 prot_npages __maybe_unused;
 	u64 tmp;
-	sense_reason_t rc = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+	sense_reason_t rc = 0;
 
 	tmp = cmd->t_task_lba * se_dev->prot_length;
 	prot_offset = do_div(tmp, PAGE_SIZE);
@@ -423,13 +408,14 @@ static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, bool is_read)
 	prot_sg = &prot_table->sg_table[prot_page -
 					prot_table->page_start_offset];
 
-	if (is_read)
-		rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0,
-				    prot_sg, prot_offset);
-	else
-		rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0,
-				    cmd->t_prot_sg, 0);
-
+	if (se_dev->dev_attrib.pi_prot_verify) {
+		if (is_read)
+			rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0,
+					    prot_sg, prot_offset);
+		else
+			rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0,
+					    cmd->t_prot_sg, 0);
+	}
 	if (!rc)
 		sbc_dif_copy_prot(cmd, sectors, is_read, prot_sg, prot_offset);
 
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index c194063f169b..4316f7b65fb7 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -507,8 +507,11 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes
 	 * been failed with a non-zero SCSI status.
 	 */
 	if (cmd->scsi_status) {
-		pr_err("compare_and_write_callback: non zero scsi_status:"
+		pr_debug("compare_and_write_callback: non zero scsi_status:"
 			" 0x%02x\n", cmd->scsi_status);
+		*post_ret = 1;
+		if (cmd->scsi_status == SAM_STAT_CHECK_CONDITION)
+			ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 		goto out;
 	}
 
@@ -519,8 +522,8 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes
 		goto out;
 	}
 
-	write_sg = kmalloc(sizeof(struct scatterlist) * cmd->t_data_nents,
-			   GFP_KERNEL);
+	write_sg = kmalloc_array(cmd->t_data_nents, sizeof(*write_sg),
+				 GFP_KERNEL);
 	if (!write_sg) {
 		pr_err("Unable to allocate compare_and_write sg\n");
 		ret = TCM_OUT_OF_RESOURCES;
@@ -924,6 +927,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 		cmd->execute_cmd = sbc_execute_rw;
 		break;
 	case WRITE_16:
+	case WRITE_VERIFY_16:
 		sectors = transport_get_sectors_16(cdb);
 		cmd->t_task_lba = transport_lba_64(cdb);
 
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index dce1e1b47316..13f47bf4d16b 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -75,7 +75,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr)
 	kfree(tmr);
 }
 
-static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
+static int core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
 {
 	unsigned long flags;
 	bool remove = true, send_tas;
@@ -91,7 +91,7 @@ static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
 		transport_send_task_abort(cmd);
 	}
 
-	transport_cmd_finish_abort(cmd, remove);
+	return transport_cmd_finish_abort(cmd, remove);
 }
 
 static int target_check_cdb_and_preempt(struct list_head *list,
@@ -184,8 +184,8 @@ void core_tmr_abort_task(
 		cancel_work_sync(&se_cmd->work);
 		transport_wait_for_tasks(se_cmd);
 
-		transport_cmd_finish_abort(se_cmd, true);
-		target_put_sess_cmd(se_cmd);
+		if (!transport_cmd_finish_abort(se_cmd, true))
+			target_put_sess_cmd(se_cmd);
 
 		printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for"
 				" ref_tag: %llu\n", ref_tag);
@@ -281,8 +281,8 @@ static void core_tmr_drain_tmr_list(
 		cancel_work_sync(&cmd->work);
 		transport_wait_for_tasks(cmd);
 
-		transport_cmd_finish_abort(cmd, 1);
-		target_put_sess_cmd(cmd);
+		if (!transport_cmd_finish_abort(cmd, 1))
+			target_put_sess_cmd(cmd);
 	}
 }
 
@@ -380,8 +380,8 @@ static void core_tmr_drain_state_list(
 		cancel_work_sync(&cmd->work);
 		transport_wait_for_tasks(cmd);
 
-		core_tmr_handle_tas_abort(cmd, tas);
-		target_put_sess_cmd(cmd);
+		if (!core_tmr_handle_tas_abort(cmd, tas))
+			target_put_sess_cmd(cmd);
 	}
 }
 
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index dfaef4d3b2d2..310d9e55c6eb 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -398,6 +398,13 @@ int core_tpg_set_initiator_node_queue_depth(
 	struct se_portal_group *tpg = acl->se_tpg;
 
 	/*
+	 * Allow the setting of se_node_acl queue_depth to be idempotent,
+	 * and not force a session shutdown event if the value is not
+	 * changing.
+	 */
+	if (acl->queue_depth == queue_depth)
+		return 0;
+	/*
 	 * User has requested to change the queue depth for a Initiator Node.
 	 * Change the value in the Node's struct se_node_acl, and call
 	 * target_set_nacl_queue_depth() to set the new queue depth.
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index a0cd56ee5fe9..f1b3a46bdcaf 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -651,9 +651,10 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd)
 		percpu_ref_put(&lun->lun_ref);
 }
 
-void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
+int transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
 {
 	bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF);
+	int ret = 0;
 
 	if (cmd->se_cmd_flags & SCF_SE_LUN_CMD)
 		transport_lun_remove_cmd(cmd);
@@ -665,9 +666,11 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
 		cmd->se_tfo->aborted_task(cmd);
 
 	if (transport_cmd_check_stop_to_fabric(cmd))
-		return;
+		return 1;
 	if (remove && ack_kref)
-		transport_put_cmd(cmd);
+		ret = transport_put_cmd(cmd);
+
+	return ret;
 }
 
 static void target_complete_failure_work(struct work_struct *work)
@@ -1160,15 +1163,28 @@ target_cmd_size_check(struct se_cmd *cmd, unsigned int size)
 	if (cmd->unknown_data_length) {
 		cmd->data_length = size;
 	} else if (size != cmd->data_length) {
-		pr_warn("TARGET_CORE[%s]: Expected Transfer Length:"
+		pr_warn_ratelimited("TARGET_CORE[%s]: Expected Transfer Length:"
 			" %u does not match SCSI CDB Length: %u for SAM Opcode:"
 			" 0x%02x\n", cmd->se_tfo->get_fabric_name(),
 				cmd->data_length, size, cmd->t_task_cdb[0]);
 
-		if (cmd->data_direction == DMA_TO_DEVICE &&
-		    cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) {
-			pr_err("Rejecting underflow/overflow WRITE data\n");
-			return TCM_INVALID_CDB_FIELD;
+		if (cmd->data_direction == DMA_TO_DEVICE) {
+			if (cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) {
+				pr_err_ratelimited("Rejecting underflow/overflow"
+						   " for WRITE data CDB\n");
+				return TCM_INVALID_CDB_FIELD;
+			}
+			/*
+			 * Some fabric drivers like iscsi-target still expect to
+			 * always reject overflow writes.  Reject this case until
+			 * full fabric driver level support for overflow writes
+			 * is introduced tree-wide.
+			 */
+			if (size > cmd->data_length) {
+				pr_err_ratelimited("Rejecting overflow for"
+						   " WRITE control CDB\n");
+				return TCM_INVALID_CDB_FIELD;
+			}
 		}
 		/*
 		 * Reject READ_* or WRITE_* with overflow/underflow for
@@ -2311,7 +2327,7 @@ void *transport_kmap_data_sg(struct se_cmd *cmd)
 		return kmap(sg_page(sg)) + sg->offset;
 
 	/* >1 page. use vmap */
-	pages = kmalloc(sizeof(*pages) * cmd->t_data_nents, GFP_KERNEL);
+	pages = kmalloc_array(cmd->t_data_nents, sizeof(*pages), GFP_KERNEL);
 	if (!pages)
 		return NULL;
 
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index f615c3bbb73e..beb5f098f32d 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -2,6 +2,7 @@
  * Copyright (C) 2013 Shaohua Li <shli@kernel.org>
  * Copyright (C) 2014 Red Hat, Inc.
  * Copyright (C) 2015 Arrikto, Inc.
+ * Copyright (C) 2017 Chinamobile, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -25,10 +26,13 @@
 #include <linux/parser.h>
 #include <linux/vmalloc.h>
 #include <linux/uio_driver.h>
+#include <linux/radix-tree.h>
 #include <linux/stringify.h>
 #include <linux/bitops.h>
 #include <linux/highmem.h>
 #include <linux/configfs.h>
+#include <linux/mutex.h>
+#include <linux/kthread.h>
 #include <net/genetlink.h>
 #include <scsi/scsi_common.h>
 #include <scsi/scsi_proto.h>
@@ -63,17 +67,26 @@
  * this may have a 'UAM' comment.
  */
 
-
 #define TCMU_TIME_OUT (30 * MSEC_PER_SEC)
 
-#define DATA_BLOCK_BITS 256
-#define DATA_BLOCK_SIZE 4096
+/* For cmd area, the size is fixed 8MB */
+#define CMDR_SIZE (8 * 1024 * 1024)
 
-#define CMDR_SIZE (16 * 4096)
+/*
+ * For data area, the block size is PAGE_SIZE and
+ * the total size is 256K * PAGE_SIZE.
+ */
+#define DATA_BLOCK_SIZE PAGE_SIZE
+#define DATA_BLOCK_BITS (256 * 1024)
 #define DATA_SIZE (DATA_BLOCK_BITS * DATA_BLOCK_SIZE)
+#define DATA_BLOCK_INIT_BITS 128
 
+/* The total size of the ring is 8M + 256K * PAGE_SIZE */
 #define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE)
 
+/* Default maximum of the global data blocks(512K * PAGE_SIZE) */
+#define TCMU_GLOBAL_MAX_BLOCKS (512 * 1024)
+
 static struct device *tcmu_root_device;
 
 struct tcmu_hba {
@@ -83,6 +96,8 @@ struct tcmu_hba {
 #define TCMU_CONFIG_LEN 256
 
 struct tcmu_dev {
+	struct list_head node;
+	struct kref kref;
 	struct se_device se_dev;
 
 	char *name;
@@ -94,6 +109,8 @@ struct tcmu_dev {
 
 	struct uio_info uio_info;
 
+	struct inode *inode;
+
 	struct tcmu_mailbox *mb_addr;
 	size_t dev_size;
 	u32 cmdr_size;
@@ -103,11 +120,14 @@ struct tcmu_dev {
 	size_t data_off;
 	size_t data_size;
 
-	DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS);
-
 	wait_queue_head_t wait_cmdr;
-	/* TODO should this be a mutex? */
-	spinlock_t cmdr_lock;
+	struct mutex cmdr_lock;
+
+	bool waiting_global;
+	uint32_t dbi_max;
+	uint32_t dbi_thresh;
+	DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS);
+	struct radix_tree_root data_blocks;
 
 	struct idr commands;
 	spinlock_t commands_lock;
@@ -130,7 +150,9 @@ struct tcmu_cmd {
 
 	/* Can't use se_cmd when cleaning up expired cmds, because if
 	   cmd has been completed then accessing se_cmd is off limits */
-	DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS);
+	uint32_t dbi_cnt;
+	uint32_t dbi_cur;
+	uint32_t *dbi;
 
 	unsigned long deadline;
 
@@ -138,6 +160,13 @@ struct tcmu_cmd {
 	unsigned long flags;
 };
 
+static struct task_struct *unmap_thread;
+static wait_queue_head_t unmap_wait;
+static DEFINE_MUTEX(root_udev_mutex);
+static LIST_HEAD(root_udev);
+
+static atomic_t global_db_count = ATOMIC_INIT(0);
+
 static struct kmem_cache *tcmu_cmd_cache;
 
 /* multicast group */
@@ -161,6 +190,114 @@ static struct genl_family tcmu_genl_family __ro_after_init = {
 	.netnsok = true,
 };
 
+#define tcmu_cmd_set_dbi_cur(cmd, index) ((cmd)->dbi_cur = (index))
+#define tcmu_cmd_reset_dbi_cur(cmd) tcmu_cmd_set_dbi_cur(cmd, 0)
+#define tcmu_cmd_set_dbi(cmd, index) ((cmd)->dbi[(cmd)->dbi_cur++] = (index))
+#define tcmu_cmd_get_dbi(cmd) ((cmd)->dbi[(cmd)->dbi_cur++])
+
+static void tcmu_cmd_free_data(struct tcmu_cmd *tcmu_cmd, uint32_t len)
+{
+	struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
+	uint32_t i;
+
+	for (i = 0; i < len; i++)
+		clear_bit(tcmu_cmd->dbi[i], udev->data_bitmap);
+}
+
+static inline bool tcmu_get_empty_block(struct tcmu_dev *udev,
+					struct tcmu_cmd *tcmu_cmd)
+{
+	struct page *page;
+	int ret, dbi;
+
+	dbi = find_first_zero_bit(udev->data_bitmap, udev->dbi_thresh);
+	if (dbi == udev->dbi_thresh)
+		return false;
+
+	page = radix_tree_lookup(&udev->data_blocks, dbi);
+	if (!page) {
+
+		if (atomic_add_return(1, &global_db_count) >
+					TCMU_GLOBAL_MAX_BLOCKS) {
+			atomic_dec(&global_db_count);
+			return false;
+		}
+
+		/* try to get new page from the mm */
+		page = alloc_page(GFP_KERNEL);
+		if (!page)
+			return false;
+
+		ret = radix_tree_insert(&udev->data_blocks, dbi, page);
+		if (ret) {
+			__free_page(page);
+			return false;
+		}
+
+	}
+
+	if (dbi > udev->dbi_max)
+		udev->dbi_max = dbi;
+
+	set_bit(dbi, udev->data_bitmap);
+	tcmu_cmd_set_dbi(tcmu_cmd, dbi);
+
+	return true;
+}
+
+static bool tcmu_get_empty_blocks(struct tcmu_dev *udev,
+				  struct tcmu_cmd *tcmu_cmd)
+{
+	int i;
+
+	udev->waiting_global = false;
+
+	for (i = tcmu_cmd->dbi_cur; i < tcmu_cmd->dbi_cnt; i++) {
+		if (!tcmu_get_empty_block(udev, tcmu_cmd))
+			goto err;
+	}
+	return true;
+
+err:
+	udev->waiting_global = true;
+	/* Try to wake up the unmap thread */
+	wake_up(&unmap_wait);
+	return false;
+}
+
+static inline struct page *
+tcmu_get_block_page(struct tcmu_dev *udev, uint32_t dbi)
+{
+	return radix_tree_lookup(&udev->data_blocks, dbi);
+}
+
+static inline void tcmu_free_cmd(struct tcmu_cmd *tcmu_cmd)
+{
+	kfree(tcmu_cmd->dbi);
+	kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
+}
+
+static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd)
+{
+	struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
+	size_t data_length = round_up(se_cmd->data_length, DATA_BLOCK_SIZE);
+
+	if (se_cmd->se_cmd_flags & SCF_BIDI) {
+		BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
+		data_length += round_up(se_cmd->t_bidi_data_sg->length,
+				DATA_BLOCK_SIZE);
+	}
+
+	return data_length;
+}
+
+static inline uint32_t tcmu_cmd_get_block_cnt(struct tcmu_cmd *tcmu_cmd)
+{
+	size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd);
+
+	return data_length / DATA_BLOCK_SIZE;
+}
+
 static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
 {
 	struct se_device *se_dev = se_cmd->se_dev;
@@ -178,6 +315,15 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
 		tcmu_cmd->deadline = jiffies +
 					msecs_to_jiffies(udev->cmd_time_out);
 
+	tcmu_cmd_reset_dbi_cur(tcmu_cmd);
+	tcmu_cmd->dbi_cnt = tcmu_cmd_get_block_cnt(tcmu_cmd);
+	tcmu_cmd->dbi = kcalloc(tcmu_cmd->dbi_cnt, sizeof(uint32_t),
+				GFP_KERNEL);
+	if (!tcmu_cmd->dbi) {
+		kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
+		return NULL;
+	}
+
 	idr_preload(GFP_KERNEL);
 	spin_lock_irq(&udev->commands_lock);
 	cmd_id = idr_alloc(&udev->commands, tcmu_cmd, 0,
@@ -186,7 +332,7 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
 	idr_preload_end();
 
 	if (cmd_id < 0) {
-		kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
+		tcmu_free_cmd(tcmu_cmd);
 		return NULL;
 	}
 	tcmu_cmd->cmd_id = cmd_id;
@@ -248,10 +394,10 @@ static inline void new_iov(struct iovec **iov, int *iov_cnt,
 #define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size)
 
 /* offset is relative to mb_addr */
-static inline size_t get_block_offset(struct tcmu_dev *dev,
-		int block, int remaining)
+static inline size_t get_block_offset_user(struct tcmu_dev *dev,
+		int dbi, int remaining)
 {
-	return dev->data_off + block * DATA_BLOCK_SIZE +
+	return dev->data_off + dbi * DATA_BLOCK_SIZE +
 		DATA_BLOCK_SIZE - remaining;
 }
 
@@ -260,37 +406,45 @@ static inline size_t iov_tail(struct tcmu_dev *udev, struct iovec *iov)
 	return (size_t)iov->iov_base + iov->iov_len;
 }
 
-static void alloc_and_scatter_data_area(struct tcmu_dev *udev,
-	struct scatterlist *data_sg, unsigned int data_nents,
-	struct iovec **iov, int *iov_cnt, bool copy_data)
+static int scatter_data_area(struct tcmu_dev *udev,
+	struct tcmu_cmd *tcmu_cmd, struct scatterlist *data_sg,
+	unsigned int data_nents, struct iovec **iov,
+	int *iov_cnt, bool copy_data)
 {
-	int i, block;
+	int i, dbi;
 	int block_remaining = 0;
-	void *from, *to;
-	size_t copy_bytes, to_offset;
+	void *from, *to = NULL;
+	size_t copy_bytes, to_offset, offset;
 	struct scatterlist *sg;
+	struct page *page;
 
 	for_each_sg(data_sg, sg, data_nents, i) {
 		int sg_remaining = sg->length;
 		from = kmap_atomic(sg_page(sg)) + sg->offset;
 		while (sg_remaining > 0) {
 			if (block_remaining == 0) {
-				block = find_first_zero_bit(udev->data_bitmap,
-						DATA_BLOCK_BITS);
+				if (to)
+					kunmap_atomic(to);
+
 				block_remaining = DATA_BLOCK_SIZE;
-				set_bit(block, udev->data_bitmap);
+				dbi = tcmu_cmd_get_dbi(tcmu_cmd);
+				page = tcmu_get_block_page(udev, dbi);
+				to = kmap_atomic(page);
 			}
+
 			copy_bytes = min_t(size_t, sg_remaining,
 					block_remaining);
-			to_offset = get_block_offset(udev, block,
+			to_offset = get_block_offset_user(udev, dbi,
 					block_remaining);
-			to = (void *)udev->mb_addr + to_offset;
+			offset = DATA_BLOCK_SIZE - block_remaining;
+			to = (void *)(unsigned long)to + offset;
+
 			if (*iov_cnt != 0 &&
 			    to_offset == iov_tail(udev, *iov)) {
 				(*iov)->iov_len += copy_bytes;
 			} else {
 				new_iov(iov, iov_cnt, udev);
-				(*iov)->iov_base = (void __user *) to_offset;
+				(*iov)->iov_base = (void __user *)to_offset;
 				(*iov)->iov_len = copy_bytes;
 			}
 			if (copy_data) {
@@ -303,33 +457,29 @@ static void alloc_and_scatter_data_area(struct tcmu_dev *udev,
 		}
 		kunmap_atomic(from - sg->offset);
 	}
-}
+	if (to)
+		kunmap_atomic(to);
 
-static void free_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd)
-{
-	bitmap_xor(udev->data_bitmap, udev->data_bitmap, cmd->data_bitmap,
-		   DATA_BLOCK_BITS);
+	return 0;
 }
 
 static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 			     bool bidi)
 {
 	struct se_cmd *se_cmd = cmd->se_cmd;
-	int i, block;
+	int i, dbi;
 	int block_remaining = 0;
-	void *from, *to;
-	size_t copy_bytes, from_offset;
+	void *from = NULL, *to;
+	size_t copy_bytes, offset;
 	struct scatterlist *sg, *data_sg;
+	struct page *page;
 	unsigned int data_nents;
-	DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
-
-	bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
+	uint32_t count = 0;
 
 	if (!bidi) {
 		data_sg = se_cmd->t_data_sg;
 		data_nents = se_cmd->t_data_nents;
 	} else {
-		uint32_t count;
 
 		/*
 		 * For bidi case, the first count blocks are for Data-Out
@@ -337,30 +487,30 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 		 * the Data-Out buffer blocks should be discarded.
 		 */
 		count = DIV_ROUND_UP(se_cmd->data_length, DATA_BLOCK_SIZE);
-		while (count--) {
-			block = find_first_bit(bitmap, DATA_BLOCK_BITS);
-			clear_bit(block, bitmap);
-		}
 
 		data_sg = se_cmd->t_bidi_data_sg;
 		data_nents = se_cmd->t_bidi_data_nents;
 	}
 
+	tcmu_cmd_set_dbi_cur(cmd, count);
+
 	for_each_sg(data_sg, sg, data_nents, i) {
 		int sg_remaining = sg->length;
 		to = kmap_atomic(sg_page(sg)) + sg->offset;
 		while (sg_remaining > 0) {
 			if (block_remaining == 0) {
-				block = find_first_bit(bitmap,
-						DATA_BLOCK_BITS);
+				if (from)
+					kunmap_atomic(from);
+
 				block_remaining = DATA_BLOCK_SIZE;
-				clear_bit(block, bitmap);
+				dbi = tcmu_cmd_get_dbi(cmd);
+				page = tcmu_get_block_page(udev, dbi);
+				from = kmap_atomic(page);
 			}
 			copy_bytes = min_t(size_t, sg_remaining,
 					block_remaining);
-			from_offset = get_block_offset(udev, block,
-					block_remaining);
-			from = (void *) udev->mb_addr + from_offset;
+			offset = DATA_BLOCK_SIZE - block_remaining;
+			from = (void *)(unsigned long)from + offset;
 			tcmu_flush_dcache_range(from, copy_bytes);
 			memcpy(to + sg->length - sg_remaining, from,
 					copy_bytes);
@@ -370,12 +520,13 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 		}
 		kunmap_atomic(to - sg->offset);
 	}
+	if (from)
+		kunmap_atomic(from);
 }
 
-static inline size_t spc_bitmap_free(unsigned long *bitmap)
+static inline size_t spc_bitmap_free(unsigned long *bitmap, uint32_t thresh)
 {
-	return DATA_BLOCK_SIZE * (DATA_BLOCK_BITS -
-			bitmap_weight(bitmap, DATA_BLOCK_BITS));
+	return DATA_BLOCK_SIZE * (thresh - bitmap_weight(bitmap, thresh));
 }
 
 /*
@@ -384,9 +535,12 @@ static inline size_t spc_bitmap_free(unsigned long *bitmap)
  *
  * Called with ring lock held.
  */
-static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t data_needed)
+static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
+		size_t cmd_size, size_t data_needed)
 {
 	struct tcmu_mailbox *mb = udev->mb_addr;
+	uint32_t blocks_needed = (data_needed + DATA_BLOCK_SIZE - 1)
+				/ DATA_BLOCK_SIZE;
 	size_t space, cmd_needed;
 	u32 cmd_head;
 
@@ -410,35 +564,63 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t d
 		return false;
 	}
 
-	space = spc_bitmap_free(udev->data_bitmap);
+	/* try to check and get the data blocks as needed */
+	space = spc_bitmap_free(udev->data_bitmap, udev->dbi_thresh);
 	if (space < data_needed) {
-		pr_debug("no data space: only %zu available, but ask for %zu\n",
-				space, data_needed);
-		return false;
+		unsigned long blocks_left = DATA_BLOCK_BITS - udev->dbi_thresh;
+		unsigned long grow;
+
+		if (blocks_left < blocks_needed) {
+			pr_debug("no data space: only %lu available, but ask for %zu\n",
+					blocks_left * DATA_BLOCK_SIZE,
+					data_needed);
+			return false;
+		}
+
+		/* Try to expand the thresh */
+		if (!udev->dbi_thresh) {
+			/* From idle state */
+			uint32_t init_thresh = DATA_BLOCK_INIT_BITS;
+
+			udev->dbi_thresh = max(blocks_needed, init_thresh);
+		} else {
+			/*
+			 * Grow the data area by max(blocks needed,
+			 * dbi_thresh / 2), but limited to the max
+			 * DATA_BLOCK_BITS size.
+			 */
+			grow = max(blocks_needed, udev->dbi_thresh / 2);
+			udev->dbi_thresh += grow;
+			if (udev->dbi_thresh > DATA_BLOCK_BITS)
+				udev->dbi_thresh = DATA_BLOCK_BITS;
+		}
 	}
 
+	if (!tcmu_get_empty_blocks(udev, cmd))
+		return false;
+
 	return true;
 }
 
-static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd)
+static inline size_t tcmu_cmd_get_base_cmd_size(size_t iov_cnt)
 {
-	struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
-	size_t data_length = round_up(se_cmd->data_length, DATA_BLOCK_SIZE);
-
-	if (se_cmd->se_cmd_flags & SCF_BIDI) {
-		BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
-		data_length += round_up(se_cmd->t_bidi_data_sg->length,
-				DATA_BLOCK_SIZE);
-	}
-
-	return data_length;
+	return max(offsetof(struct tcmu_cmd_entry, req.iov[iov_cnt]),
+			sizeof(struct tcmu_cmd_entry));
 }
 
-static inline uint32_t tcmu_cmd_get_block_cnt(struct tcmu_cmd *tcmu_cmd)
+static inline size_t tcmu_cmd_get_cmd_size(struct tcmu_cmd *tcmu_cmd,
+					   size_t base_command_size)
 {
-	size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd);
+	struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
+	size_t command_size;
 
-	return data_length / DATA_BLOCK_SIZE;
+	command_size = base_command_size +
+		round_up(scsi_command_size(se_cmd->t_task_cdb),
+				TCMU_OP_ALIGN_SIZE);
+
+	WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1));
+
+	return command_size;
 }
 
 static sense_reason_t
@@ -450,12 +632,11 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	struct tcmu_mailbox *mb;
 	struct tcmu_cmd_entry *entry;
 	struct iovec *iov;
-	int iov_cnt;
+	int iov_cnt, ret;
 	uint32_t cmd_head;
 	uint64_t cdb_off;
 	bool copy_to_data_area;
 	size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd);
-	DECLARE_BITMAP(old_bitmap, DATA_BLOCK_BITS);
 
 	if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags))
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -464,18 +645,18 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	 * Must be a certain minimum size for response sense info, but
 	 * also may be larger if the iov array is large.
 	 *
-	 * We prepare way too many iovs for potential uses here, because it's
-	 * expensive to tell how many regions are freed in the bitmap
-	*/
-	base_command_size = max(offsetof(struct tcmu_cmd_entry,
-				req.iov[tcmu_cmd_get_block_cnt(tcmu_cmd)]),
-				sizeof(struct tcmu_cmd_entry));
-	command_size = base_command_size
-		+ round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE);
-
-	WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1));
+	 * We prepare as many iovs as possbile for potential uses here,
+	 * because it's expensive to tell how many regions are freed in
+	 * the bitmap & global data pool, as the size calculated here
+	 * will only be used to do the checks.
+	 *
+	 * The size will be recalculated later as actually needed to save
+	 * cmd area memories.
+	 */
+	base_command_size = tcmu_cmd_get_base_cmd_size(tcmu_cmd->dbi_cnt);
+	command_size = tcmu_cmd_get_cmd_size(tcmu_cmd, base_command_size);
 
-	spin_lock_irq(&udev->cmdr_lock);
+	mutex_lock(&udev->cmdr_lock);
 
 	mb = udev->mb_addr;
 	cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
@@ -484,18 +665,18 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 		pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu "
 			"cmd ring/data area\n", command_size, data_length,
 			udev->cmdr_size, udev->data_size);
-		spin_unlock_irq(&udev->cmdr_lock);
+		mutex_unlock(&udev->cmdr_lock);
 		return TCM_INVALID_CDB_FIELD;
 	}
 
-	while (!is_ring_space_avail(udev, command_size, data_length)) {
+	while (!is_ring_space_avail(udev, tcmu_cmd, command_size, data_length)) {
 		int ret;
 		DEFINE_WAIT(__wait);
 
 		prepare_to_wait(&udev->wait_cmdr, &__wait, TASK_INTERRUPTIBLE);
 
 		pr_debug("sleeping for ring space\n");
-		spin_unlock_irq(&udev->cmdr_lock);
+		mutex_unlock(&udev->cmdr_lock);
 		if (udev->cmd_time_out)
 			ret = schedule_timeout(
 					msecs_to_jiffies(udev->cmd_time_out));
@@ -507,7 +688,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 			return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 		}
 
-		spin_lock_irq(&udev->cmdr_lock);
+		mutex_lock(&udev->cmdr_lock);
 
 		/* We dropped cmdr_lock, cmd_head is stale */
 		cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
@@ -534,20 +715,26 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	entry = (void *) mb + CMDR_OFF + cmd_head;
 	tcmu_flush_dcache_range(entry, sizeof(*entry));
 	tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD);
-	tcmu_hdr_set_len(&entry->hdr.len_op, command_size);
 	entry->hdr.cmd_id = tcmu_cmd->cmd_id;
 	entry->hdr.kflags = 0;
 	entry->hdr.uflags = 0;
 
-	bitmap_copy(old_bitmap, udev->data_bitmap, DATA_BLOCK_BITS);
-
 	/* Handle allocating space from the data area */
+	tcmu_cmd_reset_dbi_cur(tcmu_cmd);
 	iov = &entry->req.iov[0];
 	iov_cnt = 0;
 	copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE
 		|| se_cmd->se_cmd_flags & SCF_BIDI);
-	alloc_and_scatter_data_area(udev, se_cmd->t_data_sg,
-		se_cmd->t_data_nents, &iov, &iov_cnt, copy_to_data_area);
+	ret = scatter_data_area(udev, tcmu_cmd, se_cmd->t_data_sg,
+				se_cmd->t_data_nents, &iov, &iov_cnt,
+				copy_to_data_area);
+	if (ret) {
+		tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt);
+		mutex_unlock(&udev->cmdr_lock);
+
+		pr_err("tcmu: alloc and scatter data failed\n");
+		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+	}
 	entry->req.iov_cnt = iov_cnt;
 	entry->req.iov_dif_cnt = 0;
 
@@ -555,14 +742,29 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	if (se_cmd->se_cmd_flags & SCF_BIDI) {
 		iov_cnt = 0;
 		iov++;
-		alloc_and_scatter_data_area(udev, se_cmd->t_bidi_data_sg,
-				se_cmd->t_bidi_data_nents, &iov, &iov_cnt,
-				false);
+		ret = scatter_data_area(udev, tcmu_cmd,
+					se_cmd->t_bidi_data_sg,
+					se_cmd->t_bidi_data_nents,
+					&iov, &iov_cnt, false);
+		if (ret) {
+			tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt);
+			mutex_unlock(&udev->cmdr_lock);
+
+			pr_err("tcmu: alloc and scatter bidi data failed\n");
+			return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+		}
 		entry->req.iov_bidi_cnt = iov_cnt;
 	}
-	/* cmd's data_bitmap is what changed in process */
-	bitmap_xor(tcmu_cmd->data_bitmap, old_bitmap, udev->data_bitmap,
-			DATA_BLOCK_BITS);
+
+	/*
+	 * Recalaulate the command's base size and size according
+	 * to the actual needs
+	 */
+	base_command_size = tcmu_cmd_get_base_cmd_size(entry->req.iov_cnt +
+						       entry->req.iov_bidi_cnt);
+	command_size = tcmu_cmd_get_cmd_size(tcmu_cmd, base_command_size);
+
+	tcmu_hdr_set_len(&entry->hdr.len_op, command_size);
 
 	/* All offsets relative to mb_addr, not start of entry! */
 	cdb_off = CMDR_OFF + cmd_head + base_command_size;
@@ -572,8 +774,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 
 	UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size);
 	tcmu_flush_dcache_range(mb, sizeof(*mb));
-
-	spin_unlock_irq(&udev->cmdr_lock);
+	mutex_unlock(&udev->cmdr_lock);
 
 	/* TODO: only if FLUSH and FUA? */
 	uio_event_notify(&udev->uio_info);
@@ -604,7 +805,7 @@ tcmu_queue_cmd(struct se_cmd *se_cmd)
 		idr_remove(&udev->commands, tcmu_cmd->cmd_id);
 		spin_unlock_irq(&udev->commands_lock);
 
-		kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
+		tcmu_free_cmd(tcmu_cmd);
 	}
 
 	return ret;
@@ -615,50 +816,45 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *
 	struct se_cmd *se_cmd = cmd->se_cmd;
 	struct tcmu_dev *udev = cmd->tcmu_dev;
 
-	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
-		/*
-		 * cmd has been completed already from timeout, just reclaim
-		 * data area space and free cmd
-		 */
-		free_data_area(udev, cmd);
+	/*
+	 * cmd has been completed already from timeout, just reclaim
+	 * data area space and free cmd
+	 */
+	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags))
+		goto out;
 
-		kmem_cache_free(tcmu_cmd_cache, cmd);
-		return;
-	}
+	tcmu_cmd_reset_dbi_cur(cmd);
 
 	if (entry->hdr.uflags & TCMU_UFLAG_UNKNOWN_OP) {
-		free_data_area(udev, cmd);
 		pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n",
 			cmd->se_cmd);
 		entry->rsp.scsi_status = SAM_STAT_CHECK_CONDITION;
 	} else if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
 		memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer,
 			       se_cmd->scsi_sense_length);
-		free_data_area(udev, cmd);
 	} else if (se_cmd->se_cmd_flags & SCF_BIDI) {
 		/* Get Data-In buffer before clean up */
 		gather_data_area(udev, cmd, true);
-		free_data_area(udev, cmd);
 	} else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
 		gather_data_area(udev, cmd, false);
-		free_data_area(udev, cmd);
 	} else if (se_cmd->data_direction == DMA_TO_DEVICE) {
-		free_data_area(udev, cmd);
+		/* TODO: */
 	} else if (se_cmd->data_direction != DMA_NONE) {
 		pr_warn("TCMU: data direction was %d!\n",
 			se_cmd->data_direction);
 	}
 
 	target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status);
-	cmd->se_cmd = NULL;
 
-	kmem_cache_free(tcmu_cmd_cache, cmd);
+out:
+	cmd->se_cmd = NULL;
+	tcmu_cmd_free_data(cmd, cmd->dbi_cnt);
+	tcmu_free_cmd(cmd);
 }
 
 static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 {
 	struct tcmu_mailbox *mb;
-	unsigned long flags;
 	int handled = 0;
 
 	if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) {
@@ -666,8 +862,6 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 		return 0;
 	}
 
-	spin_lock_irqsave(&udev->cmdr_lock, flags);
-
 	mb = udev->mb_addr;
 	tcmu_flush_dcache_range(mb, sizeof(*mb));
 
@@ -708,8 +902,6 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 	if (mb->cmd_tail == mb->cmd_head)
 		del_timer(&udev->timeout); /* no more pending cmds */
 
-	spin_unlock_irqrestore(&udev->cmdr_lock, flags);
-
 	wake_up(&udev->wait_cmdr);
 
 	return handled;
@@ -736,16 +928,14 @@ static void tcmu_device_timedout(unsigned long data)
 {
 	struct tcmu_dev *udev = (struct tcmu_dev *)data;
 	unsigned long flags;
-	int handled;
-
-	handled = tcmu_handle_completions(udev);
-
-	pr_warn("%d completions handled from timeout\n", handled);
 
 	spin_lock_irqsave(&udev->commands_lock, flags);
 	idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL);
 	spin_unlock_irqrestore(&udev->commands_lock, flags);
 
+	/* Try to wake up the ummap thread */
+	wake_up(&unmap_wait);
+
 	/*
 	 * We don't need to wakeup threads on wait_cmdr since they have their
 	 * own timeout.
@@ -779,6 +969,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 	udev = kzalloc(sizeof(struct tcmu_dev), GFP_KERNEL);
 	if (!udev)
 		return NULL;
+	kref_init(&udev->kref);
 
 	udev->name = kstrdup(name, GFP_KERNEL);
 	if (!udev->name) {
@@ -790,7 +981,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 	udev->cmd_time_out = TCMU_TIME_OUT;
 
 	init_waitqueue_head(&udev->wait_cmdr);
-	spin_lock_init(&udev->cmdr_lock);
+	mutex_init(&udev->cmdr_lock);
 
 	idr_init(&udev->commands);
 	spin_lock_init(&udev->commands_lock);
@@ -805,7 +996,9 @@ static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on)
 {
 	struct tcmu_dev *tcmu_dev = container_of(info, struct tcmu_dev, uio_info);
 
+	mutex_lock(&tcmu_dev->cmdr_lock);
 	tcmu_handle_completions(tcmu_dev);
+	mutex_unlock(&tcmu_dev->cmdr_lock);
 
 	return 0;
 }
@@ -827,6 +1020,60 @@ static int tcmu_find_mem_index(struct vm_area_struct *vma)
 	return -1;
 }
 
+static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi)
+{
+	struct page *page;
+	int ret;
+
+	mutex_lock(&udev->cmdr_lock);
+	page = tcmu_get_block_page(udev, dbi);
+	if (likely(page)) {
+		mutex_unlock(&udev->cmdr_lock);
+		return page;
+	}
+
+	/*
+	 * Normally it shouldn't be here:
+	 * Only when the userspace has touched the blocks which
+	 * are out of the tcmu_cmd's data iov[], and will return
+	 * one zeroed page.
+	 */
+	pr_warn("Block(%u) out of cmd's iov[] has been touched!\n", dbi);
+	pr_warn("Mostly it will be a bug of userspace, please have a check!\n");
+
+	if (dbi >= udev->dbi_thresh) {
+		/* Extern the udev->dbi_thresh to dbi + 1 */
+		udev->dbi_thresh = dbi + 1;
+		udev->dbi_max = dbi;
+	}
+
+	page = radix_tree_lookup(&udev->data_blocks, dbi);
+	if (!page) {
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!page) {
+			mutex_unlock(&udev->cmdr_lock);
+			return NULL;
+		}
+
+		ret = radix_tree_insert(&udev->data_blocks, dbi, page);
+		if (ret) {
+			mutex_unlock(&udev->cmdr_lock);
+			__free_page(page);
+			return NULL;
+		}
+
+		/*
+		 * Since this case is rare in page fault routine, here we
+		 * will allow the global_db_count >= TCMU_GLOBAL_MAX_BLOCKS
+		 * to reduce possible page fault call trace.
+		 */
+		atomic_inc(&global_db_count);
+	}
+	mutex_unlock(&udev->cmdr_lock);
+
+	return page;
+}
+
 static int tcmu_vma_fault(struct vm_fault *vmf)
 {
 	struct tcmu_dev *udev = vmf->vma->vm_private_data;
@@ -845,11 +1092,20 @@ static int tcmu_vma_fault(struct vm_fault *vmf)
 	 */
 	offset = (vmf->pgoff - mi) << PAGE_SHIFT;
 
-	addr = (void *)(unsigned long)info->mem[mi].addr + offset;
-	if (info->mem[mi].memtype == UIO_MEM_LOGICAL)
-		page = virt_to_page(addr);
-	else
+	if (offset < udev->data_off) {
+		/* For the vmalloc()ed cmd area pages */
+		addr = (void *)(unsigned long)info->mem[mi].addr + offset;
 		page = vmalloc_to_page(addr);
+	} else {
+		uint32_t dbi;
+
+		/* For the dynamically growing data area pages */
+		dbi = (offset - udev->data_off) / DATA_BLOCK_SIZE;
+		page = tcmu_try_get_block_page(udev, dbi);
+		if (!page)
+			return VM_FAULT_NOPAGE;
+	}
+
 	get_page(page);
 	vmf->page = page;
 	return 0;
@@ -883,11 +1139,31 @@ static int tcmu_open(struct uio_info *info, struct inode *inode)
 	if (test_and_set_bit(TCMU_DEV_BIT_OPEN, &udev->flags))
 		return -EBUSY;
 
+	udev->inode = inode;
+
 	pr_debug("open\n");
 
 	return 0;
 }
 
+static void tcmu_dev_call_rcu(struct rcu_head *p)
+{
+	struct se_device *dev = container_of(p, struct se_device, rcu_head);
+	struct tcmu_dev *udev = TCMU_DEV(dev);
+
+	kfree(udev->uio_info.name);
+	kfree(udev->name);
+	kfree(udev);
+}
+
+static void tcmu_dev_kref_release(struct kref *kref)
+{
+	struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref);
+	struct se_device *dev = &udev->se_dev;
+
+	call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
+}
+
 static int tcmu_release(struct uio_info *info, struct inode *inode)
 {
 	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
@@ -895,7 +1171,8 @@ static int tcmu_release(struct uio_info *info, struct inode *inode)
 	clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
 
 	pr_debug("close\n");
-
+	/* release ref from configure */
+	kref_put(&udev->kref, tcmu_dev_kref_release);
 	return 0;
 }
 
@@ -963,7 +1240,7 @@ static int tcmu_configure_device(struct se_device *dev)
 
 	info->name = str;
 
-	udev->mb_addr = vzalloc(TCMU_RING_SIZE);
+	udev->mb_addr = vzalloc(CMDR_SIZE);
 	if (!udev->mb_addr) {
 		ret = -ENOMEM;
 		goto err_vzalloc;
@@ -972,8 +1249,11 @@ static int tcmu_configure_device(struct se_device *dev)
 	/* mailbox fits in first part of CMDR space */
 	udev->cmdr_size = CMDR_SIZE - CMDR_OFF;
 	udev->data_off = CMDR_SIZE;
-	udev->data_size = TCMU_RING_SIZE - CMDR_SIZE;
+	udev->data_size = DATA_SIZE;
+	udev->dbi_thresh = 0; /* Default in Idle state */
+	udev->waiting_global = false;
 
+	/* Initialise the mailbox of the ring buffer */
 	mb = udev->mb_addr;
 	mb->version = TCMU_MAILBOX_VERSION;
 	mb->flags = TCMU_MAILBOX_FLAG_CAP_OOOC;
@@ -984,12 +1264,14 @@ static int tcmu_configure_device(struct se_device *dev)
 	WARN_ON(udev->data_size % PAGE_SIZE);
 	WARN_ON(udev->data_size % DATA_BLOCK_SIZE);
 
+	INIT_RADIX_TREE(&udev->data_blocks, GFP_KERNEL);
+
 	info->version = __stringify(TCMU_MAILBOX_VERSION);
 
 	info->mem[0].name = "tcm-user command & data buffer";
 	info->mem[0].addr = (phys_addr_t)(uintptr_t)udev->mb_addr;
 	info->mem[0].size = TCMU_RING_SIZE;
-	info->mem[0].memtype = UIO_MEM_VIRTUAL;
+	info->mem[0].memtype = UIO_MEM_NONE;
 
 	info->irqcontrol = tcmu_irqcontrol;
 	info->irq = UIO_IRQ_CUSTOM;
@@ -1010,19 +1292,31 @@ static int tcmu_configure_device(struct se_device *dev)
 		dev->dev_attrib.hw_max_sectors = 128;
 	dev->dev_attrib.hw_queue_depth = 128;
 
+	/*
+	 * Get a ref incase userspace does a close on the uio device before
+	 * LIO has initiated tcmu_free_device.
+	 */
+	kref_get(&udev->kref);
+
 	ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
 				 udev->uio_info.uio_dev->minor);
 	if (ret)
 		goto err_netlink;
 
+	mutex_lock(&root_udev_mutex);
+	list_add(&udev->node, &root_udev);
+	mutex_unlock(&root_udev_mutex);
+
 	return 0;
 
 err_netlink:
+	kref_put(&udev->kref, tcmu_dev_kref_release);
 	uio_unregister_device(&udev->uio_info);
 err_register:
 	vfree(udev->mb_addr);
 err_vzalloc:
 	kfree(info->name);
+	info->name = NULL;
 
 	return ret;
 }
@@ -1036,17 +1330,26 @@ static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
 	return -EINVAL;
 }
 
-static void tcmu_dev_call_rcu(struct rcu_head *p)
+static bool tcmu_dev_configured(struct tcmu_dev *udev)
 {
-	struct se_device *dev = container_of(p, struct se_device, rcu_head);
-	struct tcmu_dev *udev = TCMU_DEV(dev);
-
-	kfree(udev);
+	return udev->uio_info.uio_dev ? true : false;
 }
 
-static bool tcmu_dev_configured(struct tcmu_dev *udev)
+static void tcmu_blocks_release(struct tcmu_dev *udev)
 {
-	return udev->uio_info.uio_dev ? true : false;
+	int i;
+	struct page *page;
+
+	/* Try to release all block pages */
+	mutex_lock(&udev->cmdr_lock);
+	for (i = 0; i <= udev->dbi_max; i++) {
+		page = radix_tree_delete(&udev->data_blocks, i);
+		if (page) {
+			__free_page(page);
+			atomic_dec(&global_db_count);
+		}
+	}
+	mutex_unlock(&udev->cmdr_lock);
 }
 
 static void tcmu_free_device(struct se_device *dev)
@@ -1058,6 +1361,10 @@ static void tcmu_free_device(struct se_device *dev)
 
 	del_timer_sync(&udev->timeout);
 
+	mutex_lock(&root_udev_mutex);
+	list_del(&udev->node);
+	mutex_unlock(&root_udev_mutex);
+
 	vfree(udev->mb_addr);
 
 	/* Upper layer should drain all requests before calling this */
@@ -1070,15 +1377,17 @@ static void tcmu_free_device(struct se_device *dev)
 	spin_unlock_irq(&udev->commands_lock);
 	WARN_ON(!all_expired);
 
+	tcmu_blocks_release(udev);
+
 	if (tcmu_dev_configured(udev)) {
 		tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
 				   udev->uio_info.uio_dev->minor);
 
 		uio_unregister_device(&udev->uio_info);
-		kfree(udev->uio_info.name);
-		kfree(udev->name);
 	}
-	call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
+
+	/* release ref from init */
+	kref_put(&udev->kref, tcmu_dev_kref_release);
 }
 
 enum {
@@ -1256,6 +1565,84 @@ static struct target_backend_ops tcmu_ops = {
 	.tb_dev_attrib_attrs	= NULL,
 };
 
+static int unmap_thread_fn(void *data)
+{
+	struct tcmu_dev *udev;
+	loff_t off;
+	uint32_t start, end, block;
+	struct page *page;
+	int i;
+
+	while (1) {
+		DEFINE_WAIT(__wait);
+
+		prepare_to_wait(&unmap_wait, &__wait, TASK_INTERRUPTIBLE);
+		schedule();
+		finish_wait(&unmap_wait, &__wait);
+
+		if (kthread_should_stop())
+			break;
+
+		mutex_lock(&root_udev_mutex);
+		list_for_each_entry(udev, &root_udev, node) {
+			mutex_lock(&udev->cmdr_lock);
+
+			/* Try to complete the finished commands first */
+			tcmu_handle_completions(udev);
+
+			/* Skip the udevs waiting the global pool or in idle */
+			if (udev->waiting_global || !udev->dbi_thresh) {
+				mutex_unlock(&udev->cmdr_lock);
+				continue;
+			}
+
+			end = udev->dbi_max + 1;
+			block = find_last_bit(udev->data_bitmap, end);
+			if (block == udev->dbi_max) {
+				/*
+				 * The last bit is dbi_max, so there is
+				 * no need to shrink any blocks.
+				 */
+				mutex_unlock(&udev->cmdr_lock);
+				continue;
+			} else if (block == end) {
+				/* The current udev will goto idle state */
+				udev->dbi_thresh = start = 0;
+				udev->dbi_max = 0;
+			} else {
+				udev->dbi_thresh = start = block + 1;
+				udev->dbi_max = block;
+			}
+
+			/* Here will truncate the data area from off */
+			off = udev->data_off + start * DATA_BLOCK_SIZE;
+			unmap_mapping_range(udev->inode->i_mapping, off, 0, 1);
+
+			/* Release the block pages */
+			for (i = start; i < end; i++) {
+				page = radix_tree_delete(&udev->data_blocks, i);
+				if (page) {
+					__free_page(page);
+					atomic_dec(&global_db_count);
+				}
+			}
+			mutex_unlock(&udev->cmdr_lock);
+		}
+
+		/*
+		 * Try to wake up the udevs who are waiting
+		 * for the global data pool.
+		 */
+		list_for_each_entry(udev, &root_udev, node) {
+			if (udev->waiting_global)
+				wake_up(&udev->wait_cmdr);
+		}
+		mutex_unlock(&root_udev_mutex);
+	}
+
+	return 0;
+}
+
 static int __init tcmu_module_init(void)
 {
 	int ret, i, len = 0;
@@ -1301,8 +1688,17 @@ static int __init tcmu_module_init(void)
 	if (ret)
 		goto out_attrs;
 
+	init_waitqueue_head(&unmap_wait);
+	unmap_thread = kthread_run(unmap_thread_fn, NULL, "tcmu_unmap");
+	if (IS_ERR(unmap_thread)) {
+		ret = PTR_ERR(unmap_thread);
+		goto out_unreg_transport;
+	}
+
 	return 0;
 
+out_unreg_transport:
+	target_backend_unregister(&tcmu_ops);
 out_attrs:
 	kfree(tcmu_attrs);
 out_unreg_genl:
@@ -1317,6 +1713,7 @@ out_free_cache:
 
 static void __exit tcmu_module_exit(void)
 {
+	kthread_stop(unmap_thread);
 	target_backend_unregister(&tcmu_ops);
 	kfree(tcmu_attrs);
 	genl_unregister_family(&tcmu_genl_family);
diff --git a/drivers/tee/Kconfig b/drivers/tee/Kconfig
new file mode 100644
index 000000000000..a6df12d88f90
--- /dev/null
+++ b/drivers/tee/Kconfig
@@ -0,0 +1,19 @@
+# Generic Trusted Execution Environment Configuration
+config TEE
+	tristate "Trusted Execution Environment support"
+	depends on HAVE_ARM_SMCCC || COMPILE_TEST
+	select DMA_SHARED_BUFFER
+	select GENERIC_ALLOCATOR
+	help
+	  This implements a generic interface towards a Trusted Execution
+	  Environment (TEE).
+
+if TEE
+
+menu "TEE drivers"
+
+source "drivers/tee/optee/Kconfig"
+
+endmenu
+
+endif
diff --git a/drivers/tee/Makefile b/drivers/tee/Makefile
new file mode 100644
index 000000000000..7a4e4a1ac39c
--- /dev/null
+++ b/drivers/tee/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_TEE) += tee.o
+tee-objs += tee_core.o
+tee-objs += tee_shm.o
+tee-objs += tee_shm_pool.o
+obj-$(CONFIG_OPTEE) += optee/
diff --git a/drivers/tee/optee/Kconfig b/drivers/tee/optee/Kconfig
new file mode 100644
index 000000000000..0126de898036
--- /dev/null
+++ b/drivers/tee/optee/Kconfig
@@ -0,0 +1,7 @@
+# OP-TEE Trusted Execution Environment Configuration
+config OPTEE
+	tristate "OP-TEE"
+	depends on HAVE_ARM_SMCCC
+	help
+	  This implements the OP-TEE Trusted Execution Environment (TEE)
+	  driver.
diff --git a/drivers/tee/optee/Makefile b/drivers/tee/optee/Makefile
new file mode 100644
index 000000000000..92fe5789bcce
--- /dev/null
+++ b/drivers/tee/optee/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_OPTEE) += optee.o
+optee-objs += core.o
+optee-objs += call.o
+optee-objs += rpc.o
+optee-objs += supp.o
diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c
new file mode 100644
index 000000000000..f7b7b404c990
--- /dev/null
+++ b/drivers/tee/optee/call.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/arm-smccc.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/tee_drv.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include "optee_private.h"
+#include "optee_smc.h"
+
+struct optee_call_waiter {
+	struct list_head list_node;
+	struct completion c;
+};
+
+static void optee_cq_wait_init(struct optee_call_queue *cq,
+			       struct optee_call_waiter *w)
+{
+	/*
+	 * We're preparing to make a call to secure world. In case we can't
+	 * allocate a thread in secure world we'll end up waiting in
+	 * optee_cq_wait_for_completion().
+	 *
+	 * Normally if there's no contention in secure world the call will
+	 * complete and we can cleanup directly with optee_cq_wait_final().
+	 */
+	mutex_lock(&cq->mutex);
+
+	/*
+	 * We add ourselves to the queue, but we don't wait. This
+	 * guarantees that we don't lose a completion if secure world
+	 * returns busy and another thread just exited and try to complete
+	 * someone.
+	 */
+	init_completion(&w->c);
+	list_add_tail(&w->list_node, &cq->waiters);
+
+	mutex_unlock(&cq->mutex);
+}
+
+static void optee_cq_wait_for_completion(struct optee_call_queue *cq,
+					 struct optee_call_waiter *w)
+{
+	wait_for_completion(&w->c);
+
+	mutex_lock(&cq->mutex);
+
+	/* Move to end of list to get out of the way for other waiters */
+	list_del(&w->list_node);
+	reinit_completion(&w->c);
+	list_add_tail(&w->list_node, &cq->waiters);
+
+	mutex_unlock(&cq->mutex);
+}
+
+static void optee_cq_complete_one(struct optee_call_queue *cq)
+{
+	struct optee_call_waiter *w;
+
+	list_for_each_entry(w, &cq->waiters, list_node) {
+		if (!completion_done(&w->c)) {
+			complete(&w->c);
+			break;
+		}
+	}
+}
+
+static void optee_cq_wait_final(struct optee_call_queue *cq,
+				struct optee_call_waiter *w)
+{
+	/*
+	 * We're done with the call to secure world. The thread in secure
+	 * world that was used for this call is now available for some
+	 * other task to use.
+	 */
+	mutex_lock(&cq->mutex);
+
+	/* Get out of the list */
+	list_del(&w->list_node);
+
+	/* Wake up one eventual waiting task */
+	optee_cq_complete_one(cq);
+
+	/*
+	 * If we're completed we've got a completion from another task that
+	 * was just done with its call to secure world. Since yet another
+	 * thread now is available in secure world wake up another eventual
+	 * waiting task.
+	 */
+	if (completion_done(&w->c))
+		optee_cq_complete_one(cq);
+
+	mutex_unlock(&cq->mutex);
+}
+
+/* Requires the filpstate mutex to be held */
+static struct optee_session *find_session(struct optee_context_data *ctxdata,
+					  u32 session_id)
+{
+	struct optee_session *sess;
+
+	list_for_each_entry(sess, &ctxdata->sess_list, list_node)
+		if (sess->session_id == session_id)
+			return sess;
+
+	return NULL;
+}
+
+/**
+ * optee_do_call_with_arg() - Do an SMC to OP-TEE in secure world
+ * @ctx:	calling context
+ * @parg:	physical address of message to pass to secure world
+ *
+ * Does and SMC to OP-TEE in secure world and handles eventual resulting
+ * Remote Procedure Calls (RPC) from OP-TEE.
+ *
+ * Returns return code from secure world, 0 is OK
+ */
+u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg)
+{
+	struct optee *optee = tee_get_drvdata(ctx->teedev);
+	struct optee_call_waiter w;
+	struct optee_rpc_param param = { };
+	u32 ret;
+
+	param.a0 = OPTEE_SMC_CALL_WITH_ARG;
+	reg_pair_from_64(&param.a1, &param.a2, parg);
+	/* Initialize waiter */
+	optee_cq_wait_init(&optee->call_queue, &w);
+	while (true) {
+		struct arm_smccc_res res;
+
+		optee->invoke_fn(param.a0, param.a1, param.a2, param.a3,
+				 param.a4, param.a5, param.a6, param.a7,
+				 &res);
+
+		if (res.a0 == OPTEE_SMC_RETURN_ETHREAD_LIMIT) {
+			/*
+			 * Out of threads in secure world, wait for a thread
+			 * become available.
+			 */
+			optee_cq_wait_for_completion(&optee->call_queue, &w);
+		} else if (OPTEE_SMC_RETURN_IS_RPC(res.a0)) {
+			param.a0 = res.a0;
+			param.a1 = res.a1;
+			param.a2 = res.a2;
+			param.a3 = res.a3;
+			optee_handle_rpc(ctx, &param);
+		} else {
+			ret = res.a0;
+			break;
+		}
+	}
+
+	/*
+	 * We're done with our thread in secure world, if there's any
+	 * thread waiters wake up one.
+	 */
+	optee_cq_wait_final(&optee->call_queue, &w);
+
+	return ret;
+}
+
+static struct tee_shm *get_msg_arg(struct tee_context *ctx, size_t num_params,
+				   struct optee_msg_arg **msg_arg,
+				   phys_addr_t *msg_parg)
+{
+	int rc;
+	struct tee_shm *shm;
+	struct optee_msg_arg *ma;
+
+	shm = tee_shm_alloc(ctx, OPTEE_MSG_GET_ARG_SIZE(num_params),
+			    TEE_SHM_MAPPED);
+	if (IS_ERR(shm))
+		return shm;
+
+	ma = tee_shm_get_va(shm, 0);
+	if (IS_ERR(ma)) {
+		rc = PTR_ERR(ma);
+		goto out;
+	}
+
+	rc = tee_shm_get_pa(shm, 0, msg_parg);
+	if (rc)
+		goto out;
+
+	memset(ma, 0, OPTEE_MSG_GET_ARG_SIZE(num_params));
+	ma->num_params = num_params;
+	*msg_arg = ma;
+out:
+	if (rc) {
+		tee_shm_free(shm);
+		return ERR_PTR(rc);
+	}
+
+	return shm;
+}
+
+int optee_open_session(struct tee_context *ctx,
+		       struct tee_ioctl_open_session_arg *arg,
+		       struct tee_param *param)
+{
+	struct optee_context_data *ctxdata = ctx->data;
+	int rc;
+	struct tee_shm *shm;
+	struct optee_msg_arg *msg_arg;
+	phys_addr_t msg_parg;
+	struct optee_session *sess = NULL;
+
+	/* +2 for the meta parameters added below */
+	shm = get_msg_arg(ctx, arg->num_params + 2, &msg_arg, &msg_parg);
+	if (IS_ERR(shm))
+		return PTR_ERR(shm);
+
+	msg_arg->cmd = OPTEE_MSG_CMD_OPEN_SESSION;
+	msg_arg->cancel_id = arg->cancel_id;
+
+	/*
+	 * Initialize and add the meta parameters needed when opening a
+	 * session.
+	 */
+	msg_arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT |
+				  OPTEE_MSG_ATTR_META;
+	msg_arg->params[1].attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT |
+				  OPTEE_MSG_ATTR_META;
+	memcpy(&msg_arg->params[0].u.value, arg->uuid, sizeof(arg->uuid));
+	memcpy(&msg_arg->params[1].u.value, arg->uuid, sizeof(arg->clnt_uuid));
+	msg_arg->params[1].u.value.c = arg->clnt_login;
+
+	rc = optee_to_msg_param(msg_arg->params + 2, arg->num_params, param);
+	if (rc)
+		goto out;
+
+	sess = kzalloc(sizeof(*sess), GFP_KERNEL);
+	if (!sess) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	if (optee_do_call_with_arg(ctx, msg_parg)) {
+		msg_arg->ret = TEEC_ERROR_COMMUNICATION;
+		msg_arg->ret_origin = TEEC_ORIGIN_COMMS;
+	}
+
+	if (msg_arg->ret == TEEC_SUCCESS) {
+		/* A new session has been created, add it to the list. */
+		sess->session_id = msg_arg->session;
+		mutex_lock(&ctxdata->mutex);
+		list_add(&sess->list_node, &ctxdata->sess_list);
+		mutex_unlock(&ctxdata->mutex);
+	} else {
+		kfree(sess);
+	}
+
+	if (optee_from_msg_param(param, arg->num_params, msg_arg->params + 2)) {
+		arg->ret = TEEC_ERROR_COMMUNICATION;
+		arg->ret_origin = TEEC_ORIGIN_COMMS;
+		/* Close session again to avoid leakage */
+		optee_close_session(ctx, msg_arg->session);
+	} else {
+		arg->session = msg_arg->session;
+		arg->ret = msg_arg->ret;
+		arg->ret_origin = msg_arg->ret_origin;
+	}
+out:
+	tee_shm_free(shm);
+
+	return rc;
+}
+
+int optee_close_session(struct tee_context *ctx, u32 session)
+{
+	struct optee_context_data *ctxdata = ctx->data;
+	struct tee_shm *shm;
+	struct optee_msg_arg *msg_arg;
+	phys_addr_t msg_parg;
+	struct optee_session *sess;
+
+	/* Check that the session is valid and remove it from the list */
+	mutex_lock(&ctxdata->mutex);
+	sess = find_session(ctxdata, session);
+	if (sess)
+		list_del(&sess->list_node);
+	mutex_unlock(&ctxdata->mutex);
+	if (!sess)
+		return -EINVAL;
+	kfree(sess);
+
+	shm = get_msg_arg(ctx, 0, &msg_arg, &msg_parg);
+	if (IS_ERR(shm))
+		return PTR_ERR(shm);
+
+	msg_arg->cmd = OPTEE_MSG_CMD_CLOSE_SESSION;
+	msg_arg->session = session;
+	optee_do_call_with_arg(ctx, msg_parg);
+
+	tee_shm_free(shm);
+	return 0;
+}
+
+int optee_invoke_func(struct tee_context *ctx, struct tee_ioctl_invoke_arg *arg,
+		      struct tee_param *param)
+{
+	struct optee_context_data *ctxdata = ctx->data;
+	struct tee_shm *shm;
+	struct optee_msg_arg *msg_arg;
+	phys_addr_t msg_parg;
+	struct optee_session *sess;
+	int rc;
+
+	/* Check that the session is valid */
+	mutex_lock(&ctxdata->mutex);
+	sess = find_session(ctxdata, arg->session);
+	mutex_unlock(&ctxdata->mutex);
+	if (!sess)
+		return -EINVAL;
+
+	shm = get_msg_arg(ctx, arg->num_params, &msg_arg, &msg_parg);
+	if (IS_ERR(shm))
+		return PTR_ERR(shm);
+	msg_arg->cmd = OPTEE_MSG_CMD_INVOKE_COMMAND;
+	msg_arg->func = arg->func;
+	msg_arg->session = arg->session;
+	msg_arg->cancel_id = arg->cancel_id;
+
+	rc = optee_to_msg_param(msg_arg->params, arg->num_params, param);
+	if (rc)
+		goto out;
+
+	if (optee_do_call_with_arg(ctx, msg_parg)) {
+		msg_arg->ret = TEEC_ERROR_COMMUNICATION;
+		msg_arg->ret_origin = TEEC_ORIGIN_COMMS;
+	}
+
+	if (optee_from_msg_param(param, arg->num_params, msg_arg->params)) {
+		msg_arg->ret = TEEC_ERROR_COMMUNICATION;
+		msg_arg->ret_origin = TEEC_ORIGIN_COMMS;
+	}
+
+	arg->ret = msg_arg->ret;
+	arg->ret_origin = msg_arg->ret_origin;
+out:
+	tee_shm_free(shm);
+	return rc;
+}
+
+int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session)
+{
+	struct optee_context_data *ctxdata = ctx->data;
+	struct tee_shm *shm;
+	struct optee_msg_arg *msg_arg;
+	phys_addr_t msg_parg;
+	struct optee_session *sess;
+
+	/* Check that the session is valid */
+	mutex_lock(&ctxdata->mutex);
+	sess = find_session(ctxdata, session);
+	mutex_unlock(&ctxdata->mutex);
+	if (!sess)
+		return -EINVAL;
+
+	shm = get_msg_arg(ctx, 0, &msg_arg, &msg_parg);
+	if (IS_ERR(shm))
+		return PTR_ERR(shm);
+
+	msg_arg->cmd = OPTEE_MSG_CMD_CANCEL;
+	msg_arg->session = session;
+	msg_arg->cancel_id = cancel_id;
+	optee_do_call_with_arg(ctx, msg_parg);
+
+	tee_shm_free(shm);
+	return 0;
+}
+
+/**
+ * optee_enable_shm_cache() - Enables caching of some shared memory allocation
+ *			      in OP-TEE
+ * @optee:	main service struct
+ */
+void optee_enable_shm_cache(struct optee *optee)
+{
+	struct optee_call_waiter w;
+
+	/* We need to retry until secure world isn't busy. */
+	optee_cq_wait_init(&optee->call_queue, &w);
+	while (true) {
+		struct arm_smccc_res res;
+
+		optee->invoke_fn(OPTEE_SMC_ENABLE_SHM_CACHE, 0, 0, 0, 0, 0, 0,
+				 0, &res);
+		if (res.a0 == OPTEE_SMC_RETURN_OK)
+			break;
+		optee_cq_wait_for_completion(&optee->call_queue, &w);
+	}
+	optee_cq_wait_final(&optee->call_queue, &w);
+}
+
+/**
+ * optee_disable_shm_cache() - Disables caching of some shared memory allocation
+ *			      in OP-TEE
+ * @optee:	main service struct
+ */
+void optee_disable_shm_cache(struct optee *optee)
+{
+	struct optee_call_waiter w;
+
+	/* We need to retry until secure world isn't busy. */
+	optee_cq_wait_init(&optee->call_queue, &w);
+	while (true) {
+		union {
+			struct arm_smccc_res smccc;
+			struct optee_smc_disable_shm_cache_result result;
+		} res;
+
+		optee->invoke_fn(OPTEE_SMC_DISABLE_SHM_CACHE, 0, 0, 0, 0, 0, 0,
+				 0, &res.smccc);
+		if (res.result.status == OPTEE_SMC_RETURN_ENOTAVAIL)
+			break; /* All shm's freed */
+		if (res.result.status == OPTEE_SMC_RETURN_OK) {
+			struct tee_shm *shm;
+
+			shm = reg_pair_to_ptr(res.result.shm_upper32,
+					      res.result.shm_lower32);
+			tee_shm_free(shm);
+		} else {
+			optee_cq_wait_for_completion(&optee->call_queue, &w);
+		}
+	}
+	optee_cq_wait_final(&optee->call_queue, &w);
+}
diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
new file mode 100644
index 000000000000..58169e519422
--- /dev/null
+++ b/drivers/tee/optee/core.c
@@ -0,0 +1,622 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/arm-smccc.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/tee_drv.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include "optee_private.h"
+#include "optee_smc.h"
+
+#define DRIVER_NAME "optee"
+
+#define OPTEE_SHM_NUM_PRIV_PAGES	1
+
+/**
+ * optee_from_msg_param() - convert from OPTEE_MSG parameters to
+ *			    struct tee_param
+ * @params:	subsystem internal parameter representation
+ * @num_params:	number of elements in the parameter arrays
+ * @msg_params:	OPTEE_MSG parameters
+ * Returns 0 on success or <0 on failure
+ */
+int optee_from_msg_param(struct tee_param *params, size_t num_params,
+			 const struct optee_msg_param *msg_params)
+{
+	int rc;
+	size_t n;
+	struct tee_shm *shm;
+	phys_addr_t pa;
+
+	for (n = 0; n < num_params; n++) {
+		struct tee_param *p = params + n;
+		const struct optee_msg_param *mp = msg_params + n;
+		u32 attr = mp->attr & OPTEE_MSG_ATTR_TYPE_MASK;
+
+		switch (attr) {
+		case OPTEE_MSG_ATTR_TYPE_NONE:
+			p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_NONE;
+			memset(&p->u, 0, sizeof(p->u));
+			break;
+		case OPTEE_MSG_ATTR_TYPE_VALUE_INPUT:
+		case OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT:
+		case OPTEE_MSG_ATTR_TYPE_VALUE_INOUT:
+			p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT +
+				  attr - OPTEE_MSG_ATTR_TYPE_VALUE_INPUT;
+			p->u.value.a = mp->u.value.a;
+			p->u.value.b = mp->u.value.b;
+			p->u.value.c = mp->u.value.c;
+			break;
+		case OPTEE_MSG_ATTR_TYPE_TMEM_INPUT:
+		case OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT:
+		case OPTEE_MSG_ATTR_TYPE_TMEM_INOUT:
+			p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT +
+				  attr - OPTEE_MSG_ATTR_TYPE_TMEM_INPUT;
+			p->u.memref.size = mp->u.tmem.size;
+			shm = (struct tee_shm *)(unsigned long)
+				mp->u.tmem.shm_ref;
+			if (!shm) {
+				p->u.memref.shm_offs = 0;
+				p->u.memref.shm = NULL;
+				break;
+			}
+			rc = tee_shm_get_pa(shm, 0, &pa);
+			if (rc)
+				return rc;
+			p->u.memref.shm_offs = mp->u.tmem.buf_ptr - pa;
+			p->u.memref.shm = shm;
+
+			/* Check that the memref is covered by the shm object */
+			if (p->u.memref.size) {
+				size_t o = p->u.memref.shm_offs +
+					   p->u.memref.size - 1;
+
+				rc = tee_shm_get_pa(shm, o, NULL);
+				if (rc)
+					return rc;
+			}
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+/**
+ * optee_to_msg_param() - convert from struct tee_params to OPTEE_MSG parameters
+ * @msg_params:	OPTEE_MSG parameters
+ * @num_params:	number of elements in the parameter arrays
+ * @params:	subsystem itnernal parameter representation
+ * Returns 0 on success or <0 on failure
+ */
+int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params,
+		       const struct tee_param *params)
+{
+	int rc;
+	size_t n;
+	phys_addr_t pa;
+
+	for (n = 0; n < num_params; n++) {
+		const struct tee_param *p = params + n;
+		struct optee_msg_param *mp = msg_params + n;
+
+		switch (p->attr) {
+		case TEE_IOCTL_PARAM_ATTR_TYPE_NONE:
+			mp->attr = TEE_IOCTL_PARAM_ATTR_TYPE_NONE;
+			memset(&mp->u, 0, sizeof(mp->u));
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+			mp->attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT + p->attr -
+				   TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+			mp->u.value.a = p->u.value.a;
+			mp->u.value.b = p->u.value.b;
+			mp->u.value.c = p->u.value.c;
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+			mp->attr = OPTEE_MSG_ATTR_TYPE_TMEM_INPUT +
+				   p->attr -
+				   TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+			mp->u.tmem.shm_ref = (unsigned long)p->u.memref.shm;
+			mp->u.tmem.size = p->u.memref.size;
+			if (!p->u.memref.shm) {
+				mp->u.tmem.buf_ptr = 0;
+				break;
+			}
+			rc = tee_shm_get_pa(p->u.memref.shm,
+					    p->u.memref.shm_offs, &pa);
+			if (rc)
+				return rc;
+			mp->u.tmem.buf_ptr = pa;
+			mp->attr |= OPTEE_MSG_ATTR_CACHE_PREDEFINED <<
+					OPTEE_MSG_ATTR_CACHE_SHIFT;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static void optee_get_version(struct tee_device *teedev,
+			      struct tee_ioctl_version_data *vers)
+{
+	struct tee_ioctl_version_data v = {
+		.impl_id = TEE_IMPL_ID_OPTEE,
+		.impl_caps = TEE_OPTEE_CAP_TZ,
+		.gen_caps = TEE_GEN_CAP_GP,
+	};
+	*vers = v;
+}
+
+static int optee_open(struct tee_context *ctx)
+{
+	struct optee_context_data *ctxdata;
+	struct tee_device *teedev = ctx->teedev;
+	struct optee *optee = tee_get_drvdata(teedev);
+
+	ctxdata = kzalloc(sizeof(*ctxdata), GFP_KERNEL);
+	if (!ctxdata)
+		return -ENOMEM;
+
+	if (teedev == optee->supp_teedev) {
+		bool busy = true;
+
+		mutex_lock(&optee->supp.ctx_mutex);
+		if (!optee->supp.ctx) {
+			busy = false;
+			optee->supp.ctx = ctx;
+		}
+		mutex_unlock(&optee->supp.ctx_mutex);
+		if (busy) {
+			kfree(ctxdata);
+			return -EBUSY;
+		}
+	}
+
+	mutex_init(&ctxdata->mutex);
+	INIT_LIST_HEAD(&ctxdata->sess_list);
+
+	ctx->data = ctxdata;
+	return 0;
+}
+
+static void optee_release(struct tee_context *ctx)
+{
+	struct optee_context_data *ctxdata = ctx->data;
+	struct tee_device *teedev = ctx->teedev;
+	struct optee *optee = tee_get_drvdata(teedev);
+	struct tee_shm *shm;
+	struct optee_msg_arg *arg = NULL;
+	phys_addr_t parg;
+	struct optee_session *sess;
+	struct optee_session *sess_tmp;
+
+	if (!ctxdata)
+		return;
+
+	shm = tee_shm_alloc(ctx, sizeof(struct optee_msg_arg), TEE_SHM_MAPPED);
+	if (!IS_ERR(shm)) {
+		arg = tee_shm_get_va(shm, 0);
+		/*
+		 * If va2pa fails for some reason, we can't call
+		 * optee_close_session(), only free the memory. Secure OS
+		 * will leak sessions and finally refuse more sessions, but
+		 * we will at least let normal world reclaim its memory.
+		 */
+		if (!IS_ERR(arg))
+			tee_shm_va2pa(shm, arg, &parg);
+	}
+
+	list_for_each_entry_safe(sess, sess_tmp, &ctxdata->sess_list,
+				 list_node) {
+		list_del(&sess->list_node);
+		if (!IS_ERR_OR_NULL(arg)) {
+			memset(arg, 0, sizeof(*arg));
+			arg->cmd = OPTEE_MSG_CMD_CLOSE_SESSION;
+			arg->session = sess->session_id;
+			optee_do_call_with_arg(ctx, parg);
+		}
+		kfree(sess);
+	}
+	kfree(ctxdata);
+
+	if (!IS_ERR(shm))
+		tee_shm_free(shm);
+
+	ctx->data = NULL;
+
+	if (teedev == optee->supp_teedev) {
+		mutex_lock(&optee->supp.ctx_mutex);
+		optee->supp.ctx = NULL;
+		mutex_unlock(&optee->supp.ctx_mutex);
+	}
+}
+
+static struct tee_driver_ops optee_ops = {
+	.get_version = optee_get_version,
+	.open = optee_open,
+	.release = optee_release,
+	.open_session = optee_open_session,
+	.close_session = optee_close_session,
+	.invoke_func = optee_invoke_func,
+	.cancel_req = optee_cancel_req,
+};
+
+static struct tee_desc optee_desc = {
+	.name = DRIVER_NAME "-clnt",
+	.ops = &optee_ops,
+	.owner = THIS_MODULE,
+};
+
+static struct tee_driver_ops optee_supp_ops = {
+	.get_version = optee_get_version,
+	.open = optee_open,
+	.release = optee_release,
+	.supp_recv = optee_supp_recv,
+	.supp_send = optee_supp_send,
+};
+
+static struct tee_desc optee_supp_desc = {
+	.name = DRIVER_NAME "-supp",
+	.ops = &optee_supp_ops,
+	.owner = THIS_MODULE,
+	.flags = TEE_DESC_PRIVILEGED,
+};
+
+static bool optee_msg_api_uid_is_optee_api(optee_invoke_fn *invoke_fn)
+{
+	struct arm_smccc_res res;
+
+	invoke_fn(OPTEE_SMC_CALLS_UID, 0, 0, 0, 0, 0, 0, 0, &res);
+
+	if (res.a0 == OPTEE_MSG_UID_0 && res.a1 == OPTEE_MSG_UID_1 &&
+	    res.a2 == OPTEE_MSG_UID_2 && res.a3 == OPTEE_MSG_UID_3)
+		return true;
+	return false;
+}
+
+static bool optee_msg_api_revision_is_compatible(optee_invoke_fn *invoke_fn)
+{
+	union {
+		struct arm_smccc_res smccc;
+		struct optee_smc_calls_revision_result result;
+	} res;
+
+	invoke_fn(OPTEE_SMC_CALLS_REVISION, 0, 0, 0, 0, 0, 0, 0, &res.smccc);
+
+	if (res.result.major == OPTEE_MSG_REVISION_MAJOR &&
+	    (int)res.result.minor >= OPTEE_MSG_REVISION_MINOR)
+		return true;
+	return false;
+}
+
+static bool optee_msg_exchange_capabilities(optee_invoke_fn *invoke_fn,
+					    u32 *sec_caps)
+{
+	union {
+		struct arm_smccc_res smccc;
+		struct optee_smc_exchange_capabilities_result result;
+	} res;
+	u32 a1 = 0;
+
+	/*
+	 * TODO This isn't enough to tell if it's UP system (from kernel
+	 * point of view) or not, is_smp() returns the the information
+	 * needed, but can't be called directly from here.
+	 */
+	if (!IS_ENABLED(CONFIG_SMP) || nr_cpu_ids == 1)
+		a1 |= OPTEE_SMC_NSEC_CAP_UNIPROCESSOR;
+
+	invoke_fn(OPTEE_SMC_EXCHANGE_CAPABILITIES, a1, 0, 0, 0, 0, 0, 0,
+		  &res.smccc);
+
+	if (res.result.status != OPTEE_SMC_RETURN_OK)
+		return false;
+
+	*sec_caps = res.result.capabilities;
+	return true;
+}
+
+static struct tee_shm_pool *
+optee_config_shm_memremap(optee_invoke_fn *invoke_fn, void **memremaped_shm)
+{
+	union {
+		struct arm_smccc_res smccc;
+		struct optee_smc_get_shm_config_result result;
+	} res;
+	struct tee_shm_pool *pool;
+	unsigned long vaddr;
+	phys_addr_t paddr;
+	size_t size;
+	phys_addr_t begin;
+	phys_addr_t end;
+	void *va;
+	struct tee_shm_pool_mem_info priv_info;
+	struct tee_shm_pool_mem_info dmabuf_info;
+
+	invoke_fn(OPTEE_SMC_GET_SHM_CONFIG, 0, 0, 0, 0, 0, 0, 0, &res.smccc);
+	if (res.result.status != OPTEE_SMC_RETURN_OK) {
+		pr_info("shm service not available\n");
+		return ERR_PTR(-ENOENT);
+	}
+
+	if (res.result.settings != OPTEE_SMC_SHM_CACHED) {
+		pr_err("only normal cached shared memory supported\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	begin = roundup(res.result.start, PAGE_SIZE);
+	end = rounddown(res.result.start + res.result.size, PAGE_SIZE);
+	paddr = begin;
+	size = end - begin;
+
+	if (size < 2 * OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE) {
+		pr_err("too small shared memory area\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	va = memremap(paddr, size, MEMREMAP_WB);
+	if (!va) {
+		pr_err("shared memory ioremap failed\n");
+		return ERR_PTR(-EINVAL);
+	}
+	vaddr = (unsigned long)va;
+
+	priv_info.vaddr = vaddr;
+	priv_info.paddr = paddr;
+	priv_info.size = OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE;
+	dmabuf_info.vaddr = vaddr + OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE;
+	dmabuf_info.paddr = paddr + OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE;
+	dmabuf_info.size = size - OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE;
+
+	pool = tee_shm_pool_alloc_res_mem(&priv_info, &dmabuf_info);
+	if (IS_ERR(pool)) {
+		memunmap(va);
+		goto out;
+	}
+
+	*memremaped_shm = va;
+out:
+	return pool;
+}
+
+/* Simple wrapper functions to be able to use a function pointer */
+static void optee_smccc_smc(unsigned long a0, unsigned long a1,
+			    unsigned long a2, unsigned long a3,
+			    unsigned long a4, unsigned long a5,
+			    unsigned long a6, unsigned long a7,
+			    struct arm_smccc_res *res)
+{
+	arm_smccc_smc(a0, a1, a2, a3, a4, a5, a6, a7, res);
+}
+
+static void optee_smccc_hvc(unsigned long a0, unsigned long a1,
+			    unsigned long a2, unsigned long a3,
+			    unsigned long a4, unsigned long a5,
+			    unsigned long a6, unsigned long a7,
+			    struct arm_smccc_res *res)
+{
+	arm_smccc_hvc(a0, a1, a2, a3, a4, a5, a6, a7, res);
+}
+
+static optee_invoke_fn *get_invoke_func(struct device_node *np)
+{
+	const char *method;
+
+	pr_info("probing for conduit method from DT.\n");
+
+	if (of_property_read_string(np, "method", &method)) {
+		pr_warn("missing \"method\" property\n");
+		return ERR_PTR(-ENXIO);
+	}
+
+	if (!strcmp("hvc", method))
+		return optee_smccc_hvc;
+	else if (!strcmp("smc", method))
+		return optee_smccc_smc;
+
+	pr_warn("invalid \"method\" property: %s\n", method);
+	return ERR_PTR(-EINVAL);
+}
+
+static struct optee *optee_probe(struct device_node *np)
+{
+	optee_invoke_fn *invoke_fn;
+	struct tee_shm_pool *pool;
+	struct optee *optee = NULL;
+	void *memremaped_shm = NULL;
+	struct tee_device *teedev;
+	u32 sec_caps;
+	int rc;
+
+	invoke_fn = get_invoke_func(np);
+	if (IS_ERR(invoke_fn))
+		return (void *)invoke_fn;
+
+	if (!optee_msg_api_uid_is_optee_api(invoke_fn)) {
+		pr_warn("api uid mismatch\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!optee_msg_api_revision_is_compatible(invoke_fn)) {
+		pr_warn("api revision mismatch\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!optee_msg_exchange_capabilities(invoke_fn, &sec_caps)) {
+		pr_warn("capabilities mismatch\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/*
+	 * We have no other option for shared memory, if secure world
+	 * doesn't have any reserved memory we can use we can't continue.
+	 */
+	if (!(sec_caps & OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM))
+		return ERR_PTR(-EINVAL);
+
+	pool = optee_config_shm_memremap(invoke_fn, &memremaped_shm);
+	if (IS_ERR(pool))
+		return (void *)pool;
+
+	optee = kzalloc(sizeof(*optee), GFP_KERNEL);
+	if (!optee) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	optee->invoke_fn = invoke_fn;
+
+	teedev = tee_device_alloc(&optee_desc, NULL, pool, optee);
+	if (IS_ERR(teedev)) {
+		rc = PTR_ERR(teedev);
+		goto err;
+	}
+	optee->teedev = teedev;
+
+	teedev = tee_device_alloc(&optee_supp_desc, NULL, pool, optee);
+	if (IS_ERR(teedev)) {
+		rc = PTR_ERR(teedev);
+		goto err;
+	}
+	optee->supp_teedev = teedev;
+
+	rc = tee_device_register(optee->teedev);
+	if (rc)
+		goto err;
+
+	rc = tee_device_register(optee->supp_teedev);
+	if (rc)
+		goto err;
+
+	mutex_init(&optee->call_queue.mutex);
+	INIT_LIST_HEAD(&optee->call_queue.waiters);
+	optee_wait_queue_init(&optee->wait_queue);
+	optee_supp_init(&optee->supp);
+	optee->memremaped_shm = memremaped_shm;
+	optee->pool = pool;
+
+	optee_enable_shm_cache(optee);
+
+	pr_info("initialized driver\n");
+	return optee;
+err:
+	if (optee) {
+		/*
+		 * tee_device_unregister() is safe to call even if the
+		 * devices hasn't been registered with
+		 * tee_device_register() yet.
+		 */
+		tee_device_unregister(optee->supp_teedev);
+		tee_device_unregister(optee->teedev);
+		kfree(optee);
+	}
+	if (pool)
+		tee_shm_pool_free(pool);
+	if (memremaped_shm)
+		memunmap(memremaped_shm);
+	return ERR_PTR(rc);
+}
+
+static void optee_remove(struct optee *optee)
+{
+	/*
+	 * Ask OP-TEE to free all cached shared memory objects to decrease
+	 * reference counters and also avoid wild pointers in secure world
+	 * into the old shared memory range.
+	 */
+	optee_disable_shm_cache(optee);
+
+	/*
+	 * The two devices has to be unregistered before we can free the
+	 * other resources.
+	 */
+	tee_device_unregister(optee->supp_teedev);
+	tee_device_unregister(optee->teedev);
+
+	tee_shm_pool_free(optee->pool);
+	if (optee->memremaped_shm)
+		memunmap(optee->memremaped_shm);
+	optee_wait_queue_exit(&optee->wait_queue);
+	optee_supp_uninit(&optee->supp);
+	mutex_destroy(&optee->call_queue.mutex);
+
+	kfree(optee);
+}
+
+static const struct of_device_id optee_match[] = {
+	{ .compatible = "linaro,optee-tz" },
+	{},
+};
+
+static struct optee *optee_svc;
+
+static int __init optee_driver_init(void)
+{
+	struct device_node *fw_np;
+	struct device_node *np;
+	struct optee *optee;
+
+	/* Node is supposed to be below /firmware */
+	fw_np = of_find_node_by_name(NULL, "firmware");
+	if (!fw_np)
+		return -ENODEV;
+
+	np = of_find_matching_node(fw_np, optee_match);
+	of_node_put(fw_np);
+	if (!np)
+		return -ENODEV;
+
+	optee = optee_probe(np);
+	of_node_put(np);
+
+	if (IS_ERR(optee))
+		return PTR_ERR(optee);
+
+	optee_svc = optee;
+
+	return 0;
+}
+module_init(optee_driver_init);
+
+static void __exit optee_driver_exit(void)
+{
+	struct optee *optee = optee_svc;
+
+	optee_svc = NULL;
+	if (optee)
+		optee_remove(optee);
+}
+module_exit(optee_driver_exit);
+
+MODULE_AUTHOR("Linaro");
+MODULE_DESCRIPTION("OP-TEE driver");
+MODULE_SUPPORTED_DEVICE("");
+MODULE_VERSION("1.0");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/tee/optee/optee_msg.h b/drivers/tee/optee/optee_msg.h
new file mode 100644
index 000000000000..dd7a06ee0462
--- /dev/null
+++ b/drivers/tee/optee/optee_msg.h
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _OPTEE_MSG_H
+#define _OPTEE_MSG_H
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+
+/*
+ * This file defines the OP-TEE message protocol used to communicate
+ * with an instance of OP-TEE running in secure world.
+ *
+ * This file is divided into three sections.
+ * 1. Formatting of messages.
+ * 2. Requests from normal world
+ * 3. Requests from secure world, Remote Procedure Call (RPC), handled by
+ *    tee-supplicant.
+ */
+
+/*****************************************************************************
+ * Part 1 - formatting of messages
+ *****************************************************************************/
+
+#define OPTEE_MSG_ATTR_TYPE_NONE		0x0
+#define OPTEE_MSG_ATTR_TYPE_VALUE_INPUT		0x1
+#define OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT	0x2
+#define OPTEE_MSG_ATTR_TYPE_VALUE_INOUT		0x3
+#define OPTEE_MSG_ATTR_TYPE_RMEM_INPUT		0x5
+#define OPTEE_MSG_ATTR_TYPE_RMEM_OUTPUT		0x6
+#define OPTEE_MSG_ATTR_TYPE_RMEM_INOUT		0x7
+#define OPTEE_MSG_ATTR_TYPE_TMEM_INPUT		0x9
+#define OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT		0xa
+#define OPTEE_MSG_ATTR_TYPE_TMEM_INOUT		0xb
+
+#define OPTEE_MSG_ATTR_TYPE_MASK		GENMASK(7, 0)
+
+/*
+ * Meta parameter to be absorbed by the Secure OS and not passed
+ * to the Trusted Application.
+ *
+ * Currently only used with OPTEE_MSG_CMD_OPEN_SESSION.
+ */
+#define OPTEE_MSG_ATTR_META			BIT(8)
+
+/*
+ * The temporary shared memory object is not physically contigous and this
+ * temp memref is followed by another fragment until the last temp memref
+ * that doesn't have this bit set.
+ */
+#define OPTEE_MSG_ATTR_FRAGMENT			BIT(9)
+
+/*
+ * Memory attributes for caching passed with temp memrefs. The actual value
+ * used is defined outside the message protocol with the exception of
+ * OPTEE_MSG_ATTR_CACHE_PREDEFINED which means the attributes already
+ * defined for the memory range should be used. If optee_smc.h is used as
+ * bearer of this protocol OPTEE_SMC_SHM_* is used for values.
+ */
+#define OPTEE_MSG_ATTR_CACHE_SHIFT		16
+#define OPTEE_MSG_ATTR_CACHE_MASK		GENMASK(2, 0)
+#define OPTEE_MSG_ATTR_CACHE_PREDEFINED		0
+
+/*
+ * Same values as TEE_LOGIN_* from TEE Internal API
+ */
+#define OPTEE_MSG_LOGIN_PUBLIC			0x00000000
+#define OPTEE_MSG_LOGIN_USER			0x00000001
+#define OPTEE_MSG_LOGIN_GROUP			0x00000002
+#define OPTEE_MSG_LOGIN_APPLICATION		0x00000004
+#define OPTEE_MSG_LOGIN_APPLICATION_USER	0x00000005
+#define OPTEE_MSG_LOGIN_APPLICATION_GROUP	0x00000006
+
+/**
+ * struct optee_msg_param_tmem - temporary memory reference parameter
+ * @buf_ptr:	Address of the buffer
+ * @size:	Size of the buffer
+ * @shm_ref:	Temporary shared memory reference, pointer to a struct tee_shm
+ *
+ * Secure and normal world communicates pointers as physical address
+ * instead of the virtual address. This is because secure and normal world
+ * have completely independent memory mapping. Normal world can even have a
+ * hypervisor which need to translate the guest physical address (AKA IPA
+ * in ARM documentation) to a real physical address before passing the
+ * structure to secure world.
+ */
+struct optee_msg_param_tmem {
+	u64 buf_ptr;
+	u64 size;
+	u64 shm_ref;
+};
+
+/**
+ * struct optee_msg_param_rmem - registered memory reference parameter
+ * @offs:	Offset into shared memory reference
+ * @size:	Size of the buffer
+ * @shm_ref:	Shared memory reference, pointer to a struct tee_shm
+ */
+struct optee_msg_param_rmem {
+	u64 offs;
+	u64 size;
+	u64 shm_ref;
+};
+
+/**
+ * struct optee_msg_param_value - opaque value parameter
+ *
+ * Value parameters are passed unchecked between normal and secure world.
+ */
+struct optee_msg_param_value {
+	u64 a;
+	u64 b;
+	u64 c;
+};
+
+/**
+ * struct optee_msg_param - parameter used together with struct optee_msg_arg
+ * @attr:	attributes
+ * @tmem:	parameter by temporary memory reference
+ * @rmem:	parameter by registered memory reference
+ * @value:	parameter by opaque value
+ *
+ * @attr & OPTEE_MSG_ATTR_TYPE_MASK indicates if tmem, rmem or value is used in
+ * the union. OPTEE_MSG_ATTR_TYPE_VALUE_* indicates value,
+ * OPTEE_MSG_ATTR_TYPE_TMEM_* indicates tmem and
+ * OPTEE_MSG_ATTR_TYPE_RMEM_* indicates rmem.
+ * OPTEE_MSG_ATTR_TYPE_NONE indicates that none of the members are used.
+ */
+struct optee_msg_param {
+	u64 attr;
+	union {
+		struct optee_msg_param_tmem tmem;
+		struct optee_msg_param_rmem rmem;
+		struct optee_msg_param_value value;
+	} u;
+};
+
+/**
+ * struct optee_msg_arg - call argument
+ * @cmd: Command, one of OPTEE_MSG_CMD_* or OPTEE_MSG_RPC_CMD_*
+ * @func: Trusted Application function, specific to the Trusted Application,
+ *	     used if cmd == OPTEE_MSG_CMD_INVOKE_COMMAND
+ * @session: In parameter for all OPTEE_MSG_CMD_* except
+ *	     OPTEE_MSG_CMD_OPEN_SESSION where it's an output parameter instead
+ * @cancel_id: Cancellation id, a unique value to identify this request
+ * @ret: return value
+ * @ret_origin: origin of the return value
+ * @num_params: number of parameters supplied to the OS Command
+ * @params: the parameters supplied to the OS Command
+ *
+ * All normal calls to Trusted OS uses this struct. If cmd requires further
+ * information than what these field holds it can be passed as a parameter
+ * tagged as meta (setting the OPTEE_MSG_ATTR_META bit in corresponding
+ * attrs field). All parameters tagged as meta has to come first.
+ *
+ * Temp memref parameters can be fragmented if supported by the Trusted OS
+ * (when optee_smc.h is bearer of this protocol this is indicated with
+ * OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM). If a logical memref parameter is
+ * fragmented then has all but the last fragment the
+ * OPTEE_MSG_ATTR_FRAGMENT bit set in attrs. Even if a memref is fragmented
+ * it will still be presented as a single logical memref to the Trusted
+ * Application.
+ */
+struct optee_msg_arg {
+	u32 cmd;
+	u32 func;
+	u32 session;
+	u32 cancel_id;
+	u32 pad;
+	u32 ret;
+	u32 ret_origin;
+	u32 num_params;
+
+	/* num_params tells the actual number of element in params */
+	struct optee_msg_param params[0];
+};
+
+/**
+ * OPTEE_MSG_GET_ARG_SIZE - return size of struct optee_msg_arg
+ *
+ * @num_params: Number of parameters embedded in the struct optee_msg_arg
+ *
+ * Returns the size of the struct optee_msg_arg together with the number
+ * of embedded parameters.
+ */
+#define OPTEE_MSG_GET_ARG_SIZE(num_params) \
+	(sizeof(struct optee_msg_arg) + \
+	 sizeof(struct optee_msg_param) * (num_params))
+
+/*****************************************************************************
+ * Part 2 - requests from normal world
+ *****************************************************************************/
+
+/*
+ * Return the following UID if using API specified in this file without
+ * further extensions:
+ * 384fb3e0-e7f8-11e3-af63-0002a5d5c51b.
+ * Represented in 4 32-bit words in OPTEE_MSG_UID_0, OPTEE_MSG_UID_1,
+ * OPTEE_MSG_UID_2, OPTEE_MSG_UID_3.
+ */
+#define OPTEE_MSG_UID_0			0x384fb3e0
+#define OPTEE_MSG_UID_1			0xe7f811e3
+#define OPTEE_MSG_UID_2			0xaf630002
+#define OPTEE_MSG_UID_3			0xa5d5c51b
+#define OPTEE_MSG_FUNCID_CALLS_UID	0xFF01
+
+/*
+ * Returns 2.0 if using API specified in this file without further
+ * extensions. Represented in 2 32-bit words in OPTEE_MSG_REVISION_MAJOR
+ * and OPTEE_MSG_REVISION_MINOR
+ */
+#define OPTEE_MSG_REVISION_MAJOR	2
+#define OPTEE_MSG_REVISION_MINOR	0
+#define OPTEE_MSG_FUNCID_CALLS_REVISION	0xFF03
+
+/*
+ * Get UUID of Trusted OS.
+ *
+ * Used by non-secure world to figure out which Trusted OS is installed.
+ * Note that returned UUID is the UUID of the Trusted OS, not of the API.
+ *
+ * Returns UUID in 4 32-bit words in the same way as
+ * OPTEE_MSG_FUNCID_CALLS_UID described above.
+ */
+#define OPTEE_MSG_OS_OPTEE_UUID_0	0x486178e0
+#define OPTEE_MSG_OS_OPTEE_UUID_1	0xe7f811e3
+#define OPTEE_MSG_OS_OPTEE_UUID_2	0xbc5e0002
+#define OPTEE_MSG_OS_OPTEE_UUID_3	0xa5d5c51b
+#define OPTEE_MSG_FUNCID_GET_OS_UUID	0x0000
+
+/*
+ * Get revision of Trusted OS.
+ *
+ * Used by non-secure world to figure out which version of the Trusted OS
+ * is installed. Note that the returned revision is the revision of the
+ * Trusted OS, not of the API.
+ *
+ * Returns revision in 2 32-bit words in the same way as
+ * OPTEE_MSG_CALLS_REVISION described above.
+ */
+#define OPTEE_MSG_FUNCID_GET_OS_REVISION	0x0001
+
+/*
+ * Do a secure call with struct optee_msg_arg as argument
+ * The OPTEE_MSG_CMD_* below defines what goes in struct optee_msg_arg::cmd
+ *
+ * OPTEE_MSG_CMD_OPEN_SESSION opens a session to a Trusted Application.
+ * The first two parameters are tagged as meta, holding two value
+ * parameters to pass the following information:
+ * param[0].u.value.a-b uuid of Trusted Application
+ * param[1].u.value.a-b uuid of Client
+ * param[1].u.value.c Login class of client OPTEE_MSG_LOGIN_*
+ *
+ * OPTEE_MSG_CMD_INVOKE_COMMAND invokes a command a previously opened
+ * session to a Trusted Application.  struct optee_msg_arg::func is Trusted
+ * Application function, specific to the Trusted Application.
+ *
+ * OPTEE_MSG_CMD_CLOSE_SESSION closes a previously opened session to
+ * Trusted Application.
+ *
+ * OPTEE_MSG_CMD_CANCEL cancels a currently invoked command.
+ *
+ * OPTEE_MSG_CMD_REGISTER_SHM registers a shared memory reference. The
+ * information is passed as:
+ * [in] param[0].attr			OPTEE_MSG_ATTR_TYPE_TMEM_INPUT
+ *					[| OPTEE_MSG_ATTR_FRAGMENT]
+ * [in] param[0].u.tmem.buf_ptr		physical address (of first fragment)
+ * [in] param[0].u.tmem.size		size (of first fragment)
+ * [in] param[0].u.tmem.shm_ref		holds shared memory reference
+ * ...
+ * The shared memory can optionally be fragmented, temp memrefs can follow
+ * each other with all but the last with the OPTEE_MSG_ATTR_FRAGMENT bit set.
+ *
+ * OPTEE_MSG_CMD_UNREGISTER_SHM unregisteres a previously registered shared
+ * memory reference. The information is passed as:
+ * [in] param[0].attr			OPTEE_MSG_ATTR_TYPE_RMEM_INPUT
+ * [in] param[0].u.rmem.shm_ref		holds shared memory reference
+ * [in] param[0].u.rmem.offs		0
+ * [in] param[0].u.rmem.size		0
+ */
+#define OPTEE_MSG_CMD_OPEN_SESSION	0
+#define OPTEE_MSG_CMD_INVOKE_COMMAND	1
+#define OPTEE_MSG_CMD_CLOSE_SESSION	2
+#define OPTEE_MSG_CMD_CANCEL		3
+#define OPTEE_MSG_CMD_REGISTER_SHM	4
+#define OPTEE_MSG_CMD_UNREGISTER_SHM	5
+#define OPTEE_MSG_FUNCID_CALL_WITH_ARG	0x0004
+
+/*****************************************************************************
+ * Part 3 - Requests from secure world, RPC
+ *****************************************************************************/
+
+/*
+ * All RPC is done with a struct optee_msg_arg as bearer of information,
+ * struct optee_msg_arg::arg holds values defined by OPTEE_MSG_RPC_CMD_* below
+ *
+ * RPC communication with tee-supplicant is reversed compared to normal
+ * client communication desribed above. The supplicant receives requests
+ * and sends responses.
+ */
+
+/*
+ * Load a TA into memory, defined in tee-supplicant
+ */
+#define OPTEE_MSG_RPC_CMD_LOAD_TA	0
+
+/*
+ * Reserved
+ */
+#define OPTEE_MSG_RPC_CMD_RPMB		1
+
+/*
+ * File system access, defined in tee-supplicant
+ */
+#define OPTEE_MSG_RPC_CMD_FS		2
+
+/*
+ * Get time
+ *
+ * Returns number of seconds and nano seconds since the Epoch,
+ * 1970-01-01 00:00:00 +0000 (UTC).
+ *
+ * [out] param[0].u.value.a	Number of seconds
+ * [out] param[0].u.value.b	Number of nano seconds.
+ */
+#define OPTEE_MSG_RPC_CMD_GET_TIME	3
+
+/*
+ * Wait queue primitive, helper for secure world to implement a wait queue.
+ *
+ * If secure world need to wait for a secure world mutex it issues a sleep
+ * request instead of spinning in secure world. Conversely is a wakeup
+ * request issued when a secure world mutex with a thread waiting thread is
+ * unlocked.
+ *
+ * Waiting on a key
+ * [in] param[0].u.value.a OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP
+ * [in] param[0].u.value.b wait key
+ *
+ * Waking up a key
+ * [in] param[0].u.value.a OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP
+ * [in] param[0].u.value.b wakeup key
+ */
+#define OPTEE_MSG_RPC_CMD_WAIT_QUEUE	4
+#define OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP	0
+#define OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP	1
+
+/*
+ * Suspend execution
+ *
+ * [in] param[0].value	.a number of milliseconds to suspend
+ */
+#define OPTEE_MSG_RPC_CMD_SUSPEND	5
+
+/*
+ * Allocate a piece of shared memory
+ *
+ * Shared memory can optionally be fragmented, to support that additional
+ * spare param entries are allocated to make room for eventual fragments.
+ * The spare param entries has .attr = OPTEE_MSG_ATTR_TYPE_NONE when
+ * unused. All returned temp memrefs except the last should have the
+ * OPTEE_MSG_ATTR_FRAGMENT bit set in the attr field.
+ *
+ * [in]  param[0].u.value.a		type of memory one of
+ *					OPTEE_MSG_RPC_SHM_TYPE_* below
+ * [in]  param[0].u.value.b		requested size
+ * [in]  param[0].u.value.c		required alignment
+ *
+ * [out] param[0].u.tmem.buf_ptr	physical address (of first fragment)
+ * [out] param[0].u.tmem.size		size (of first fragment)
+ * [out] param[0].u.tmem.shm_ref	shared memory reference
+ * ...
+ * [out] param[n].u.tmem.buf_ptr	physical address
+ * [out] param[n].u.tmem.size		size
+ * [out] param[n].u.tmem.shm_ref	shared memory reference (same value
+ *					as in param[n-1].u.tmem.shm_ref)
+ */
+#define OPTEE_MSG_RPC_CMD_SHM_ALLOC	6
+/* Memory that can be shared with a non-secure user space application */
+#define OPTEE_MSG_RPC_SHM_TYPE_APPL	0
+/* Memory only shared with non-secure kernel */
+#define OPTEE_MSG_RPC_SHM_TYPE_KERNEL	1
+
+/*
+ * Free shared memory previously allocated with OPTEE_MSG_RPC_CMD_SHM_ALLOC
+ *
+ * [in]  param[0].u.value.a		type of memory one of
+ *					OPTEE_MSG_RPC_SHM_TYPE_* above
+ * [in]  param[0].u.value.b		value of shared memory reference
+ *					returned in param[0].u.tmem.shm_ref
+ *					above
+ */
+#define OPTEE_MSG_RPC_CMD_SHM_FREE	7
+
+#endif /* _OPTEE_MSG_H */
diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h
new file mode 100644
index 000000000000..c374cd594314
--- /dev/null
+++ b/drivers/tee/optee/optee_private.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef OPTEE_PRIVATE_H
+#define OPTEE_PRIVATE_H
+
+#include <linux/arm-smccc.h>
+#include <linux/semaphore.h>
+#include <linux/tee_drv.h>
+#include <linux/types.h>
+#include "optee_msg.h"
+
+#define OPTEE_MAX_ARG_SIZE	1024
+
+/* Some Global Platform error codes used in this driver */
+#define TEEC_SUCCESS			0x00000000
+#define TEEC_ERROR_BAD_PARAMETERS	0xFFFF0006
+#define TEEC_ERROR_COMMUNICATION	0xFFFF000E
+#define TEEC_ERROR_OUT_OF_MEMORY	0xFFFF000C
+
+#define TEEC_ORIGIN_COMMS		0x00000002
+
+typedef void (optee_invoke_fn)(unsigned long, unsigned long, unsigned long,
+				unsigned long, unsigned long, unsigned long,
+				unsigned long, unsigned long,
+				struct arm_smccc_res *);
+
+struct optee_call_queue {
+	/* Serializes access to this struct */
+	struct mutex mutex;
+	struct list_head waiters;
+};
+
+struct optee_wait_queue {
+	/* Serializes access to this struct */
+	struct mutex mu;
+	struct list_head db;
+};
+
+/**
+ * struct optee_supp - supplicant synchronization struct
+ * @ctx			the context of current connected supplicant.
+ *			if !NULL the supplicant device is available for use,
+ *			else busy
+ * @ctx_mutex:		held while accessing @ctx
+ * @func:		supplicant function id to call
+ * @ret:		call return value
+ * @num_params:		number of elements in @param
+ * @param:		parameters for @func
+ * @req_posted:		if true, a request has been posted to the supplicant
+ * @supp_next_send:	if true, next step is for supplicant to send response
+ * @thrd_mutex:		held by the thread doing a request to supplicant
+ * @supp_mutex:		held by supplicant while operating on this struct
+ * @data_to_supp:	supplicant is waiting on this for next request
+ * @data_from_supp:	requesting thread is waiting on this to get the result
+ */
+struct optee_supp {
+	struct tee_context *ctx;
+	/* Serializes access of ctx */
+	struct mutex ctx_mutex;
+
+	u32 func;
+	u32 ret;
+	size_t num_params;
+	struct tee_param *param;
+
+	bool req_posted;
+	bool supp_next_send;
+	/* Serializes access to this struct for requesting thread */
+	struct mutex thrd_mutex;
+	/* Serializes access to this struct for supplicant threads */
+	struct mutex supp_mutex;
+	struct completion data_to_supp;
+	struct completion data_from_supp;
+};
+
+/**
+ * struct optee - main service struct
+ * @supp_teedev:	supplicant device
+ * @teedev:		client device
+ * @invoke_fn:		function to issue smc or hvc
+ * @call_queue:		queue of threads waiting to call @invoke_fn
+ * @wait_queue:		queue of threads from secure world waiting for a
+ *			secure world sync object
+ * @supp:		supplicant synchronization struct for RPC to supplicant
+ * @pool:		shared memory pool
+ * @memremaped_shm	virtual address of memory in shared memory pool
+ */
+struct optee {
+	struct tee_device *supp_teedev;
+	struct tee_device *teedev;
+	optee_invoke_fn *invoke_fn;
+	struct optee_call_queue call_queue;
+	struct optee_wait_queue wait_queue;
+	struct optee_supp supp;
+	struct tee_shm_pool *pool;
+	void *memremaped_shm;
+};
+
+struct optee_session {
+	struct list_head list_node;
+	u32 session_id;
+};
+
+struct optee_context_data {
+	/* Serializes access to this struct */
+	struct mutex mutex;
+	struct list_head sess_list;
+};
+
+struct optee_rpc_param {
+	u32	a0;
+	u32	a1;
+	u32	a2;
+	u32	a3;
+	u32	a4;
+	u32	a5;
+	u32	a6;
+	u32	a7;
+};
+
+void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param);
+
+void optee_wait_queue_init(struct optee_wait_queue *wq);
+void optee_wait_queue_exit(struct optee_wait_queue *wq);
+
+u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params,
+			struct tee_param *param);
+
+int optee_supp_read(struct tee_context *ctx, void __user *buf, size_t len);
+int optee_supp_write(struct tee_context *ctx, void __user *buf, size_t len);
+void optee_supp_init(struct optee_supp *supp);
+void optee_supp_uninit(struct optee_supp *supp);
+
+int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params,
+		    struct tee_param *param);
+int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params,
+		    struct tee_param *param);
+
+u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg);
+int optee_open_session(struct tee_context *ctx,
+		       struct tee_ioctl_open_session_arg *arg,
+		       struct tee_param *param);
+int optee_close_session(struct tee_context *ctx, u32 session);
+int optee_invoke_func(struct tee_context *ctx, struct tee_ioctl_invoke_arg *arg,
+		      struct tee_param *param);
+int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session);
+
+void optee_enable_shm_cache(struct optee *optee);
+void optee_disable_shm_cache(struct optee *optee);
+
+int optee_from_msg_param(struct tee_param *params, size_t num_params,
+			 const struct optee_msg_param *msg_params);
+int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params,
+		       const struct tee_param *params);
+
+/*
+ * Small helpers
+ */
+
+static inline void *reg_pair_to_ptr(u32 reg0, u32 reg1)
+{
+	return (void *)(unsigned long)(((u64)reg0 << 32) | reg1);
+}
+
+static inline void reg_pair_from_64(u32 *reg0, u32 *reg1, u64 val)
+{
+	*reg0 = val >> 32;
+	*reg1 = val;
+}
+
+#endif /*OPTEE_PRIVATE_H*/
diff --git a/drivers/tee/optee/optee_smc.h b/drivers/tee/optee/optee_smc.h
new file mode 100644
index 000000000000..13b7c98cdf25
--- /dev/null
+++ b/drivers/tee/optee/optee_smc.h
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef OPTEE_SMC_H
+#define OPTEE_SMC_H
+
+#include <linux/arm-smccc.h>
+#include <linux/bitops.h>
+
+#define OPTEE_SMC_STD_CALL_VAL(func_num) \
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL, ARM_SMCCC_SMC_32, \
+			   ARM_SMCCC_OWNER_TRUSTED_OS, (func_num))
+#define OPTEE_SMC_FAST_CALL_VAL(func_num) \
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \
+			   ARM_SMCCC_OWNER_TRUSTED_OS, (func_num))
+
+/*
+ * Function specified by SMC Calling convention.
+ */
+#define OPTEE_SMC_FUNCID_CALLS_COUNT	0xFF00
+#define OPTEE_SMC_CALLS_COUNT \
+	ARM_SMCCC_CALL_VAL(OPTEE_SMC_FAST_CALL, SMCCC_SMC_32, \
+			   SMCCC_OWNER_TRUSTED_OS_END, \
+			   OPTEE_SMC_FUNCID_CALLS_COUNT)
+
+/*
+ * Normal cached memory (write-back), shareable for SMP systems and not
+ * shareable for UP systems.
+ */
+#define OPTEE_SMC_SHM_CACHED		1
+
+/*
+ * a0..a7 is used as register names in the descriptions below, on arm32
+ * that translates to r0..r7 and on arm64 to w0..w7. In both cases it's
+ * 32-bit registers.
+ */
+
+/*
+ * Function specified by SMC Calling convention
+ *
+ * Return one of the following UIDs if using API specified in this file
+ * without further extentions:
+ * 65cb6b93-af0c-4617-8ed6-644a8d1140f8
+ * see also OPTEE_SMC_UID_* in optee_msg.h
+ */
+#define OPTEE_SMC_FUNCID_CALLS_UID OPTEE_MSG_FUNCID_CALLS_UID
+#define OPTEE_SMC_CALLS_UID \
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \
+			   ARM_SMCCC_OWNER_TRUSTED_OS_END, \
+			   OPTEE_SMC_FUNCID_CALLS_UID)
+
+/*
+ * Function specified by SMC Calling convention
+ *
+ * Returns 2.0 if using API specified in this file without further extentions.
+ * see also OPTEE_MSG_REVISION_* in optee_msg.h
+ */
+#define OPTEE_SMC_FUNCID_CALLS_REVISION OPTEE_MSG_FUNCID_CALLS_REVISION
+#define OPTEE_SMC_CALLS_REVISION \
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \
+			   ARM_SMCCC_OWNER_TRUSTED_OS_END, \
+			   OPTEE_SMC_FUNCID_CALLS_REVISION)
+
+struct optee_smc_calls_revision_result {
+	unsigned long major;
+	unsigned long minor;
+	unsigned long reserved0;
+	unsigned long reserved1;
+};
+
+/*
+ * Get UUID of Trusted OS.
+ *
+ * Used by non-secure world to figure out which Trusted OS is installed.
+ * Note that returned UUID is the UUID of the Trusted OS, not of the API.
+ *
+ * Returns UUID in a0-4 in the same way as OPTEE_SMC_CALLS_UID
+ * described above.
+ */
+#define OPTEE_SMC_FUNCID_GET_OS_UUID OPTEE_MSG_FUNCID_GET_OS_UUID
+#define OPTEE_SMC_CALL_GET_OS_UUID \
+	OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_OS_UUID)
+
+/*
+ * Get revision of Trusted OS.
+ *
+ * Used by non-secure world to figure out which version of the Trusted OS
+ * is installed. Note that the returned revision is the revision of the
+ * Trusted OS, not of the API.
+ *
+ * Returns revision in a0-1 in the same way as OPTEE_SMC_CALLS_REVISION
+ * described above.
+ */
+#define OPTEE_SMC_FUNCID_GET_OS_REVISION OPTEE_MSG_FUNCID_GET_OS_REVISION
+#define OPTEE_SMC_CALL_GET_OS_REVISION \
+	OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_OS_REVISION)
+
+/*
+ * Call with struct optee_msg_arg as argument
+ *
+ * Call register usage:
+ * a0	SMC Function ID, OPTEE_SMC*CALL_WITH_ARG
+ * a1	Upper 32bit of a 64bit physical pointer to a struct optee_msg_arg
+ * a2	Lower 32bit of a 64bit physical pointer to a struct optee_msg_arg
+ * a3	Cache settings, not used if physical pointer is in a predefined shared
+ *	memory area else per OPTEE_SMC_SHM_*
+ * a4-6	Not used
+ * a7	Hypervisor Client ID register
+ *
+ * Normal return register usage:
+ * a0	Return value, OPTEE_SMC_RETURN_*
+ * a1-3	Not used
+ * a4-7	Preserved
+ *
+ * OPTEE_SMC_RETURN_ETHREAD_LIMIT return register usage:
+ * a0	Return value, OPTEE_SMC_RETURN_ETHREAD_LIMIT
+ * a1-3	Preserved
+ * a4-7	Preserved
+ *
+ * RPC return register usage:
+ * a0	Return value, OPTEE_SMC_RETURN_IS_RPC(val)
+ * a1-2	RPC parameters
+ * a3-7	Resume information, must be preserved
+ *
+ * Possible return values:
+ * OPTEE_SMC_RETURN_UNKNOWN_FUNCTION	Trusted OS does not recognize this
+ *					function.
+ * OPTEE_SMC_RETURN_OK			Call completed, result updated in
+ *					the previously supplied struct
+ *					optee_msg_arg.
+ * OPTEE_SMC_RETURN_ETHREAD_LIMIT	Number of Trusted OS threads exceeded,
+ *					try again later.
+ * OPTEE_SMC_RETURN_EBADADDR		Bad physcial pointer to struct
+ *					optee_msg_arg.
+ * OPTEE_SMC_RETURN_EBADCMD		Bad/unknown cmd in struct optee_msg_arg
+ * OPTEE_SMC_RETURN_IS_RPC()		Call suspended by RPC call to normal
+ *					world.
+ */
+#define OPTEE_SMC_FUNCID_CALL_WITH_ARG OPTEE_MSG_FUNCID_CALL_WITH_ARG
+#define OPTEE_SMC_CALL_WITH_ARG \
+	OPTEE_SMC_STD_CALL_VAL(OPTEE_SMC_FUNCID_CALL_WITH_ARG)
+
+/*
+ * Get Shared Memory Config
+ *
+ * Returns the Secure/Non-secure shared memory config.
+ *
+ * Call register usage:
+ * a0	SMC Function ID, OPTEE_SMC_GET_SHM_CONFIG
+ * a1-6	Not used
+ * a7	Hypervisor Client ID register
+ *
+ * Have config return register usage:
+ * a0	OPTEE_SMC_RETURN_OK
+ * a1	Physical address of start of SHM
+ * a2	Size of of SHM
+ * a3	Cache settings of memory, as defined by the
+ *	OPTEE_SMC_SHM_* values above
+ * a4-7	Preserved
+ *
+ * Not available register usage:
+ * a0	OPTEE_SMC_RETURN_ENOTAVAIL
+ * a1-3 Not used
+ * a4-7	Preserved
+ */
+#define OPTEE_SMC_FUNCID_GET_SHM_CONFIG	7
+#define OPTEE_SMC_GET_SHM_CONFIG \
+	OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_SHM_CONFIG)
+
+struct optee_smc_get_shm_config_result {
+	unsigned long status;
+	unsigned long start;
+	unsigned long size;
+	unsigned long settings;
+};
+
+/*
+ * Exchanges capabilities between normal world and secure world
+ *
+ * Call register usage:
+ * a0	SMC Function ID, OPTEE_SMC_EXCHANGE_CAPABILITIES
+ * a1	bitfield of normal world capabilities OPTEE_SMC_NSEC_CAP_*
+ * a2-6	Not used
+ * a7	Hypervisor Client ID register
+ *
+ * Normal return register usage:
+ * a0	OPTEE_SMC_RETURN_OK
+ * a1	bitfield of secure world capabilities OPTEE_SMC_SEC_CAP_*
+ * a2-7	Preserved
+ *
+ * Error return register usage:
+ * a0	OPTEE_SMC_RETURN_ENOTAVAIL, can't use the capabilities from normal world
+ * a1	bitfield of secure world capabilities OPTEE_SMC_SEC_CAP_*
+ * a2-7 Preserved
+ */
+/* Normal world works as a uniprocessor system */
+#define OPTEE_SMC_NSEC_CAP_UNIPROCESSOR		BIT(0)
+/* Secure world has reserved shared memory for normal world to use */
+#define OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM	BIT(0)
+/* Secure world can communicate via previously unregistered shared memory */
+#define OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM	BIT(1)
+#define OPTEE_SMC_FUNCID_EXCHANGE_CAPABILITIES	9
+#define OPTEE_SMC_EXCHANGE_CAPABILITIES \
+	OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_EXCHANGE_CAPABILITIES)
+
+struct optee_smc_exchange_capabilities_result {
+	unsigned long status;
+	unsigned long capabilities;
+	unsigned long reserved0;
+	unsigned long reserved1;
+};
+
+/*
+ * Disable and empties cache of shared memory objects
+ *
+ * Secure world can cache frequently used shared memory objects, for
+ * example objects used as RPC arguments. When secure world is idle this
+ * function returns one shared memory reference to free. To disable the
+ * cache and free all cached objects this function has to be called until
+ * it returns OPTEE_SMC_RETURN_ENOTAVAIL.
+ *
+ * Call register usage:
+ * a0	SMC Function ID, OPTEE_SMC_DISABLE_SHM_CACHE
+ * a1-6	Not used
+ * a7	Hypervisor Client ID register
+ *
+ * Normal return register usage:
+ * a0	OPTEE_SMC_RETURN_OK
+ * a1	Upper 32bit of a 64bit Shared memory cookie
+ * a2	Lower 32bit of a 64bit Shared memory cookie
+ * a3-7	Preserved
+ *
+ * Cache empty return register usage:
+ * a0	OPTEE_SMC_RETURN_ENOTAVAIL
+ * a1-7	Preserved
+ *
+ * Not idle return register usage:
+ * a0	OPTEE_SMC_RETURN_EBUSY
+ * a1-7	Preserved
+ */
+#define OPTEE_SMC_FUNCID_DISABLE_SHM_CACHE	10
+#define OPTEE_SMC_DISABLE_SHM_CACHE \
+	OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_DISABLE_SHM_CACHE)
+
+struct optee_smc_disable_shm_cache_result {
+	unsigned long status;
+	unsigned long shm_upper32;
+	unsigned long shm_lower32;
+	unsigned long reserved0;
+};
+
+/*
+ * Enable cache of shared memory objects
+ *
+ * Secure world can cache frequently used shared memory objects, for
+ * example objects used as RPC arguments. When secure world is idle this
+ * function returns OPTEE_SMC_RETURN_OK and the cache is enabled. If
+ * secure world isn't idle OPTEE_SMC_RETURN_EBUSY is returned.
+ *
+ * Call register usage:
+ * a0	SMC Function ID, OPTEE_SMC_ENABLE_SHM_CACHE
+ * a1-6	Not used
+ * a7	Hypervisor Client ID register
+ *
+ * Normal return register usage:
+ * a0	OPTEE_SMC_RETURN_OK
+ * a1-7	Preserved
+ *
+ * Not idle return register usage:
+ * a0	OPTEE_SMC_RETURN_EBUSY
+ * a1-7	Preserved
+ */
+#define OPTEE_SMC_FUNCID_ENABLE_SHM_CACHE	11
+#define OPTEE_SMC_ENABLE_SHM_CACHE \
+	OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_ENABLE_SHM_CACHE)
+
+/*
+ * Resume from RPC (for example after processing an IRQ)
+ *
+ * Call register usage:
+ * a0	SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC
+ * a1-3	Value of a1-3 when OPTEE_SMC_CALL_WITH_ARG returned
+ *	OPTEE_SMC_RETURN_RPC in a0
+ *
+ * Return register usage is the same as for OPTEE_SMC_*CALL_WITH_ARG above.
+ *
+ * Possible return values
+ * OPTEE_SMC_RETURN_UNKNOWN_FUNCTION	Trusted OS does not recognize this
+ *					function.
+ * OPTEE_SMC_RETURN_OK			Original call completed, result
+ *					updated in the previously supplied.
+ *					struct optee_msg_arg
+ * OPTEE_SMC_RETURN_RPC			Call suspended by RPC call to normal
+ *					world.
+ * OPTEE_SMC_RETURN_ERESUME		Resume failed, the opaque resume
+ *					information was corrupt.
+ */
+#define OPTEE_SMC_FUNCID_RETURN_FROM_RPC	3
+#define OPTEE_SMC_CALL_RETURN_FROM_RPC \
+	OPTEE_SMC_STD_CALL_VAL(OPTEE_SMC_FUNCID_RETURN_FROM_RPC)
+
+#define OPTEE_SMC_RETURN_RPC_PREFIX_MASK	0xFFFF0000
+#define OPTEE_SMC_RETURN_RPC_PREFIX		0xFFFF0000
+#define OPTEE_SMC_RETURN_RPC_FUNC_MASK		0x0000FFFF
+
+#define OPTEE_SMC_RETURN_GET_RPC_FUNC(ret) \
+	((ret) & OPTEE_SMC_RETURN_RPC_FUNC_MASK)
+
+#define OPTEE_SMC_RPC_VAL(func)		((func) | OPTEE_SMC_RETURN_RPC_PREFIX)
+
+/*
+ * Allocate memory for RPC parameter passing. The memory is used to hold a
+ * struct optee_msg_arg.
+ *
+ * "Call" register usage:
+ * a0	This value, OPTEE_SMC_RETURN_RPC_ALLOC
+ * a1	Size in bytes of required argument memory
+ * a2	Not used
+ * a3	Resume information, must be preserved
+ * a4-5	Not used
+ * a6-7	Resume information, must be preserved
+ *
+ * "Return" register usage:
+ * a0	SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC.
+ * a1	Upper 32bits of 64bit physical pointer to allocated
+ *	memory, (a1 == 0 && a2 == 0) if size was 0 or if memory can't
+ *	be allocated.
+ * a2	Lower 32bits of 64bit physical pointer to allocated
+ *	memory, (a1 == 0 && a2 == 0) if size was 0 or if memory can't
+ *	be allocated
+ * a3	Preserved
+ * a4	Upper 32bits of 64bit Shared memory cookie used when freeing
+ *	the memory or doing an RPC
+ * a5	Lower 32bits of 64bit Shared memory cookie used when freeing
+ *	the memory or doing an RPC
+ * a6-7	Preserved
+ */
+#define OPTEE_SMC_RPC_FUNC_ALLOC	0
+#define OPTEE_SMC_RETURN_RPC_ALLOC \
+	OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_ALLOC)
+
+/*
+ * Free memory previously allocated by OPTEE_SMC_RETURN_RPC_ALLOC
+ *
+ * "Call" register usage:
+ * a0	This value, OPTEE_SMC_RETURN_RPC_FREE
+ * a1	Upper 32bits of 64bit shared memory cookie belonging to this
+ *	argument memory
+ * a2	Lower 32bits of 64bit shared memory cookie belonging to this
+ *	argument memory
+ * a3-7	Resume information, must be preserved
+ *
+ * "Return" register usage:
+ * a0	SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC.
+ * a1-2	Not used
+ * a3-7	Preserved
+ */
+#define OPTEE_SMC_RPC_FUNC_FREE		2
+#define OPTEE_SMC_RETURN_RPC_FREE \
+	OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_FREE)
+
+/*
+ * Deliver an IRQ in normal world.
+ *
+ * "Call" register usage:
+ * a0	OPTEE_SMC_RETURN_RPC_IRQ
+ * a1-7	Resume information, must be preserved
+ *
+ * "Return" register usage:
+ * a0	SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC.
+ * a1-7	Preserved
+ */
+#define OPTEE_SMC_RPC_FUNC_IRQ		4
+#define OPTEE_SMC_RETURN_RPC_IRQ \
+	OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_IRQ)
+
+/*
+ * Do an RPC request. The supplied struct optee_msg_arg tells which
+ * request to do and the parameters for the request. The following fields
+ * are used (the rest are unused):
+ * - cmd		the Request ID
+ * - ret		return value of the request, filled in by normal world
+ * - num_params		number of parameters for the request
+ * - params		the parameters
+ * - param_attrs	attributes of the parameters
+ *
+ * "Call" register usage:
+ * a0	OPTEE_SMC_RETURN_RPC_CMD
+ * a1	Upper 32bit of a 64bit Shared memory cookie holding a
+ *	struct optee_msg_arg, must be preserved, only the data should
+ *	be updated
+ * a2	Lower 32bit of a 64bit Shared memory cookie holding a
+ *	struct optee_msg_arg, must be preserved, only the data should
+ *	be updated
+ * a3-7	Resume information, must be preserved
+ *
+ * "Return" register usage:
+ * a0	SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC.
+ * a1-2	Not used
+ * a3-7	Preserved
+ */
+#define OPTEE_SMC_RPC_FUNC_CMD		5
+#define OPTEE_SMC_RETURN_RPC_CMD \
+	OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_CMD)
+
+/* Returned in a0 */
+#define OPTEE_SMC_RETURN_UNKNOWN_FUNCTION 0xFFFFFFFF
+
+/* Returned in a0 only from Trusted OS functions */
+#define OPTEE_SMC_RETURN_OK		0x0
+#define OPTEE_SMC_RETURN_ETHREAD_LIMIT	0x1
+#define OPTEE_SMC_RETURN_EBUSY		0x2
+#define OPTEE_SMC_RETURN_ERESUME	0x3
+#define OPTEE_SMC_RETURN_EBADADDR	0x4
+#define OPTEE_SMC_RETURN_EBADCMD	0x5
+#define OPTEE_SMC_RETURN_ENOMEM		0x6
+#define OPTEE_SMC_RETURN_ENOTAVAIL	0x7
+#define OPTEE_SMC_RETURN_IS_RPC(ret)	__optee_smc_return_is_rpc((ret))
+
+static inline bool __optee_smc_return_is_rpc(u32 ret)
+{
+	return ret != OPTEE_SMC_RETURN_UNKNOWN_FUNCTION &&
+	       (ret & OPTEE_SMC_RETURN_RPC_PREFIX_MASK) ==
+			OPTEE_SMC_RETURN_RPC_PREFIX;
+}
+
+#endif /* OPTEE_SMC_H */
diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c
new file mode 100644
index 000000000000..8814eca06021
--- /dev/null
+++ b/drivers/tee/optee/rpc.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/tee_drv.h>
+#include "optee_private.h"
+#include "optee_smc.h"
+
+struct wq_entry {
+	struct list_head link;
+	struct completion c;
+	u32 key;
+};
+
+void optee_wait_queue_init(struct optee_wait_queue *priv)
+{
+	mutex_init(&priv->mu);
+	INIT_LIST_HEAD(&priv->db);
+}
+
+void optee_wait_queue_exit(struct optee_wait_queue *priv)
+{
+	mutex_destroy(&priv->mu);
+}
+
+static void handle_rpc_func_cmd_get_time(struct optee_msg_arg *arg)
+{
+	struct timespec64 ts;
+
+	if (arg->num_params != 1)
+		goto bad;
+	if ((arg->params[0].attr & OPTEE_MSG_ATTR_TYPE_MASK) !=
+			OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT)
+		goto bad;
+
+	getnstimeofday64(&ts);
+	arg->params[0].u.value.a = ts.tv_sec;
+	arg->params[0].u.value.b = ts.tv_nsec;
+
+	arg->ret = TEEC_SUCCESS;
+	return;
+bad:
+	arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+}
+
+static struct wq_entry *wq_entry_get(struct optee_wait_queue *wq, u32 key)
+{
+	struct wq_entry *w;
+
+	mutex_lock(&wq->mu);
+
+	list_for_each_entry(w, &wq->db, link)
+		if (w->key == key)
+			goto out;
+
+	w = kmalloc(sizeof(*w), GFP_KERNEL);
+	if (w) {
+		init_completion(&w->c);
+		w->key = key;
+		list_add_tail(&w->link, &wq->db);
+	}
+out:
+	mutex_unlock(&wq->mu);
+	return w;
+}
+
+static void wq_sleep(struct optee_wait_queue *wq, u32 key)
+{
+	struct wq_entry *w = wq_entry_get(wq, key);
+
+	if (w) {
+		wait_for_completion(&w->c);
+		mutex_lock(&wq->mu);
+		list_del(&w->link);
+		mutex_unlock(&wq->mu);
+		kfree(w);
+	}
+}
+
+static void wq_wakeup(struct optee_wait_queue *wq, u32 key)
+{
+	struct wq_entry *w = wq_entry_get(wq, key);
+
+	if (w)
+		complete(&w->c);
+}
+
+static void handle_rpc_func_cmd_wq(struct optee *optee,
+				   struct optee_msg_arg *arg)
+{
+	if (arg->num_params != 1)
+		goto bad;
+
+	if ((arg->params[0].attr & OPTEE_MSG_ATTR_TYPE_MASK) !=
+			OPTEE_MSG_ATTR_TYPE_VALUE_INPUT)
+		goto bad;
+
+	switch (arg->params[0].u.value.a) {
+	case OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP:
+		wq_sleep(&optee->wait_queue, arg->params[0].u.value.b);
+		break;
+	case OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP:
+		wq_wakeup(&optee->wait_queue, arg->params[0].u.value.b);
+		break;
+	default:
+		goto bad;
+	}
+
+	arg->ret = TEEC_SUCCESS;
+	return;
+bad:
+	arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+}
+
+static void handle_rpc_func_cmd_wait(struct optee_msg_arg *arg)
+{
+	u32 msec_to_wait;
+
+	if (arg->num_params != 1)
+		goto bad;
+
+	if ((arg->params[0].attr & OPTEE_MSG_ATTR_TYPE_MASK) !=
+			OPTEE_MSG_ATTR_TYPE_VALUE_INPUT)
+		goto bad;
+
+	msec_to_wait = arg->params[0].u.value.a;
+
+	/* set task's state to interruptible sleep */
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	/* take a nap */
+	msleep(msec_to_wait);
+
+	arg->ret = TEEC_SUCCESS;
+	return;
+bad:
+	arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+}
+
+static void handle_rpc_supp_cmd(struct tee_context *ctx,
+				struct optee_msg_arg *arg)
+{
+	struct tee_param *params;
+
+	arg->ret_origin = TEEC_ORIGIN_COMMS;
+
+	params = kmalloc_array(arg->num_params, sizeof(struct tee_param),
+			       GFP_KERNEL);
+	if (!params) {
+		arg->ret = TEEC_ERROR_OUT_OF_MEMORY;
+		return;
+	}
+
+	if (optee_from_msg_param(params, arg->num_params, arg->params)) {
+		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+		goto out;
+	}
+
+	arg->ret = optee_supp_thrd_req(ctx, arg->cmd, arg->num_params, params);
+
+	if (optee_to_msg_param(arg->params, arg->num_params, params))
+		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+out:
+	kfree(params);
+}
+
+static struct tee_shm *cmd_alloc_suppl(struct tee_context *ctx, size_t sz)
+{
+	u32 ret;
+	struct tee_param param;
+	struct optee *optee = tee_get_drvdata(ctx->teedev);
+	struct tee_shm *shm;
+
+	param.attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT;
+	param.u.value.a = OPTEE_MSG_RPC_SHM_TYPE_APPL;
+	param.u.value.b = sz;
+	param.u.value.c = 0;
+
+	ret = optee_supp_thrd_req(ctx, OPTEE_MSG_RPC_CMD_SHM_ALLOC, 1, &param);
+	if (ret)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&optee->supp.ctx_mutex);
+	/* Increases count as secure world doesn't have a reference */
+	shm = tee_shm_get_from_id(optee->supp.ctx, param.u.value.c);
+	mutex_unlock(&optee->supp.ctx_mutex);
+	return shm;
+}
+
+static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx,
+					  struct optee_msg_arg *arg)
+{
+	phys_addr_t pa;
+	struct tee_shm *shm;
+	size_t sz;
+	size_t n;
+
+	arg->ret_origin = TEEC_ORIGIN_COMMS;
+
+	if (!arg->num_params ||
+	    arg->params[0].attr != OPTEE_MSG_ATTR_TYPE_VALUE_INPUT) {
+		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+		return;
+	}
+
+	for (n = 1; n < arg->num_params; n++) {
+		if (arg->params[n].attr != OPTEE_MSG_ATTR_TYPE_NONE) {
+			arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+			return;
+		}
+	}
+
+	sz = arg->params[0].u.value.b;
+	switch (arg->params[0].u.value.a) {
+	case OPTEE_MSG_RPC_SHM_TYPE_APPL:
+		shm = cmd_alloc_suppl(ctx, sz);
+		break;
+	case OPTEE_MSG_RPC_SHM_TYPE_KERNEL:
+		shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED);
+		break;
+	default:
+		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+		return;
+	}
+
+	if (IS_ERR(shm)) {
+		arg->ret = TEEC_ERROR_OUT_OF_MEMORY;
+		return;
+	}
+
+	if (tee_shm_get_pa(shm, 0, &pa)) {
+		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+		goto bad;
+	}
+
+	arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT;
+	arg->params[0].u.tmem.buf_ptr = pa;
+	arg->params[0].u.tmem.size = sz;
+	arg->params[0].u.tmem.shm_ref = (unsigned long)shm;
+	arg->ret = TEEC_SUCCESS;
+	return;
+bad:
+	tee_shm_free(shm);
+}
+
+static void cmd_free_suppl(struct tee_context *ctx, struct tee_shm *shm)
+{
+	struct tee_param param;
+
+	param.attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT;
+	param.u.value.a = OPTEE_MSG_RPC_SHM_TYPE_APPL;
+	param.u.value.b = tee_shm_get_id(shm);
+	param.u.value.c = 0;
+
+	/*
+	 * Match the tee_shm_get_from_id() in cmd_alloc_suppl() as secure
+	 * world has released its reference.
+	 *
+	 * It's better to do this before sending the request to supplicant
+	 * as we'd like to let the process doing the initial allocation to
+	 * do release the last reference too in order to avoid stacking
+	 * many pending fput() on the client process. This could otherwise
+	 * happen if secure world does many allocate and free in a single
+	 * invoke.
+	 */
+	tee_shm_put(shm);
+
+	optee_supp_thrd_req(ctx, OPTEE_MSG_RPC_CMD_SHM_FREE, 1, &param);
+}
+
+static void handle_rpc_func_cmd_shm_free(struct tee_context *ctx,
+					 struct optee_msg_arg *arg)
+{
+	struct tee_shm *shm;
+
+	arg->ret_origin = TEEC_ORIGIN_COMMS;
+
+	if (arg->num_params != 1 ||
+	    arg->params[0].attr != OPTEE_MSG_ATTR_TYPE_VALUE_INPUT) {
+		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+		return;
+	}
+
+	shm = (struct tee_shm *)(unsigned long)arg->params[0].u.value.b;
+	switch (arg->params[0].u.value.a) {
+	case OPTEE_MSG_RPC_SHM_TYPE_APPL:
+		cmd_free_suppl(ctx, shm);
+		break;
+	case OPTEE_MSG_RPC_SHM_TYPE_KERNEL:
+		tee_shm_free(shm);
+		break;
+	default:
+		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+	}
+	arg->ret = TEEC_SUCCESS;
+}
+
+static void handle_rpc_func_cmd(struct tee_context *ctx, struct optee *optee,
+				struct tee_shm *shm)
+{
+	struct optee_msg_arg *arg;
+
+	arg = tee_shm_get_va(shm, 0);
+	if (IS_ERR(arg)) {
+		pr_err("%s: tee_shm_get_va %p failed\n", __func__, shm);
+		return;
+	}
+
+	switch (arg->cmd) {
+	case OPTEE_MSG_RPC_CMD_GET_TIME:
+		handle_rpc_func_cmd_get_time(arg);
+		break;
+	case OPTEE_MSG_RPC_CMD_WAIT_QUEUE:
+		handle_rpc_func_cmd_wq(optee, arg);
+		break;
+	case OPTEE_MSG_RPC_CMD_SUSPEND:
+		handle_rpc_func_cmd_wait(arg);
+		break;
+	case OPTEE_MSG_RPC_CMD_SHM_ALLOC:
+		handle_rpc_func_cmd_shm_alloc(ctx, arg);
+		break;
+	case OPTEE_MSG_RPC_CMD_SHM_FREE:
+		handle_rpc_func_cmd_shm_free(ctx, arg);
+		break;
+	default:
+		handle_rpc_supp_cmd(ctx, arg);
+	}
+}
+
+/**
+ * optee_handle_rpc() - handle RPC from secure world
+ * @ctx:	context doing the RPC
+ * @param:	value of registers for the RPC
+ *
+ * Result of RPC is written back into @param.
+ */
+void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param)
+{
+	struct tee_device *teedev = ctx->teedev;
+	struct optee *optee = tee_get_drvdata(teedev);
+	struct tee_shm *shm;
+	phys_addr_t pa;
+
+	switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) {
+	case OPTEE_SMC_RPC_FUNC_ALLOC:
+		shm = tee_shm_alloc(ctx, param->a1, TEE_SHM_MAPPED);
+		if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) {
+			reg_pair_from_64(&param->a1, &param->a2, pa);
+			reg_pair_from_64(&param->a4, &param->a5,
+					 (unsigned long)shm);
+		} else {
+			param->a1 = 0;
+			param->a2 = 0;
+			param->a4 = 0;
+			param->a5 = 0;
+		}
+		break;
+	case OPTEE_SMC_RPC_FUNC_FREE:
+		shm = reg_pair_to_ptr(param->a1, param->a2);
+		tee_shm_free(shm);
+		break;
+	case OPTEE_SMC_RPC_FUNC_IRQ:
+		/*
+		 * An IRQ was raised while secure world was executing,
+		 * since all IRQs are handled in Linux a dummy RPC is
+		 * performed to let Linux take the IRQ through the normal
+		 * vector.
+		 */
+		break;
+	case OPTEE_SMC_RPC_FUNC_CMD:
+		shm = reg_pair_to_ptr(param->a1, param->a2);
+		handle_rpc_func_cmd(ctx, optee, shm);
+		break;
+	default:
+		pr_warn("Unknown RPC func 0x%x\n",
+			(u32)OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0));
+		break;
+	}
+
+	param->a0 = OPTEE_SMC_CALL_RETURN_FROM_RPC;
+}
diff --git a/drivers/tee/optee/supp.c b/drivers/tee/optee/supp.c
new file mode 100644
index 000000000000..b4ea0678a436
--- /dev/null
+++ b/drivers/tee/optee/supp.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "optee_private.h"
+
+void optee_supp_init(struct optee_supp *supp)
+{
+	memset(supp, 0, sizeof(*supp));
+	mutex_init(&supp->ctx_mutex);
+	mutex_init(&supp->thrd_mutex);
+	mutex_init(&supp->supp_mutex);
+	init_completion(&supp->data_to_supp);
+	init_completion(&supp->data_from_supp);
+}
+
+void optee_supp_uninit(struct optee_supp *supp)
+{
+	mutex_destroy(&supp->ctx_mutex);
+	mutex_destroy(&supp->thrd_mutex);
+	mutex_destroy(&supp->supp_mutex);
+}
+
+/**
+ * optee_supp_thrd_req() - request service from supplicant
+ * @ctx:	context doing the request
+ * @func:	function requested
+ * @num_params:	number of elements in @param array
+ * @param:	parameters for function
+ *
+ * Returns result of operation to be passed to secure world
+ */
+u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params,
+			struct tee_param *param)
+{
+	bool interruptable;
+	struct optee *optee = tee_get_drvdata(ctx->teedev);
+	struct optee_supp *supp = &optee->supp;
+	u32 ret;
+
+	/*
+	 * Other threads blocks here until we've copied our answer from
+	 * supplicant.
+	 */
+	while (mutex_lock_interruptible(&supp->thrd_mutex)) {
+		/* See comment below on when the RPC can be interrupted. */
+		mutex_lock(&supp->ctx_mutex);
+		interruptable = !supp->ctx;
+		mutex_unlock(&supp->ctx_mutex);
+		if (interruptable)
+			return TEEC_ERROR_COMMUNICATION;
+	}
+
+	/*
+	 * We have exclusive access now since the supplicant at this
+	 * point is either doing a
+	 * wait_for_completion_interruptible(&supp->data_to_supp) or is in
+	 * userspace still about to do the ioctl() to enter
+	 * optee_supp_recv() below.
+	 */
+
+	supp->func = func;
+	supp->num_params = num_params;
+	supp->param = param;
+	supp->req_posted = true;
+
+	/* Let supplicant get the data */
+	complete(&supp->data_to_supp);
+
+	/*
+	 * Wait for supplicant to process and return result, once we've
+	 * returned from wait_for_completion(data_from_supp) we have
+	 * exclusive access again.
+	 */
+	while (wait_for_completion_interruptible(&supp->data_from_supp)) {
+		mutex_lock(&supp->ctx_mutex);
+		interruptable = !supp->ctx;
+		if (interruptable) {
+			/*
+			 * There's no supplicant available and since the
+			 * supp->ctx_mutex currently is held none can
+			 * become available until the mutex released
+			 * again.
+			 *
+			 * Interrupting an RPC to supplicant is only
+			 * allowed as a way of slightly improving the user
+			 * experience in case the supplicant hasn't been
+			 * started yet. During normal operation the supplicant
+			 * will serve all requests in a timely manner and
+			 * interrupting then wouldn't make sense.
+			 */
+			supp->ret = TEEC_ERROR_COMMUNICATION;
+			init_completion(&supp->data_to_supp);
+		}
+		mutex_unlock(&supp->ctx_mutex);
+		if (interruptable)
+			break;
+	}
+
+	ret = supp->ret;
+	supp->param = NULL;
+	supp->req_posted = false;
+
+	/* We're done, let someone else talk to the supplicant now. */
+	mutex_unlock(&supp->thrd_mutex);
+
+	return ret;
+}
+
+/**
+ * optee_supp_recv() - receive request for supplicant
+ * @ctx:	context receiving the request
+ * @func:	requested function in supplicant
+ * @num_params:	number of elements allocated in @param, updated with number
+ *		used elements
+ * @param:	space for parameters for @func
+ *
+ * Returns 0 on success or <0 on failure
+ */
+int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params,
+		    struct tee_param *param)
+{
+	struct tee_device *teedev = ctx->teedev;
+	struct optee *optee = tee_get_drvdata(teedev);
+	struct optee_supp *supp = &optee->supp;
+	int rc;
+
+	/*
+	 * In case two threads in one supplicant is calling this function
+	 * simultaneously we need to protect the data with a mutex which
+	 * we'll release before returning.
+	 */
+	mutex_lock(&supp->supp_mutex);
+
+	if (supp->supp_next_send) {
+		/*
+		 * optee_supp_recv() has been called again without
+		 * a optee_supp_send() in between. Supplicant has
+		 * probably been restarted before it was able to
+		 * write back last result. Abort last request and
+		 * wait for a new.
+		 */
+		if (supp->req_posted) {
+			supp->ret = TEEC_ERROR_COMMUNICATION;
+			supp->supp_next_send = false;
+			complete(&supp->data_from_supp);
+		}
+	}
+
+	/*
+	 * This is where supplicant will be hanging most of the
+	 * time, let's make this interruptable so we can easily
+	 * restart supplicant if needed.
+	 */
+	if (wait_for_completion_interruptible(&supp->data_to_supp)) {
+		rc = -ERESTARTSYS;
+		goto out;
+	}
+
+	/* We have exlusive access to the data */
+
+	if (*num_params < supp->num_params) {
+		/*
+		 * Not enough room for parameters, tell supplicant
+		 * it failed and abort last request.
+		 */
+		supp->ret = TEEC_ERROR_COMMUNICATION;
+		rc = -EINVAL;
+		complete(&supp->data_from_supp);
+		goto out;
+	}
+
+	*func = supp->func;
+	*num_params = supp->num_params;
+	memcpy(param, supp->param,
+	       sizeof(struct tee_param) * supp->num_params);
+
+	/* Allow optee_supp_send() below to do its work */
+	supp->supp_next_send = true;
+
+	rc = 0;
+out:
+	mutex_unlock(&supp->supp_mutex);
+	return rc;
+}
+
+/**
+ * optee_supp_send() - send result of request from supplicant
+ * @ctx:	context sending result
+ * @ret:	return value of request
+ * @num_params:	number of parameters returned
+ * @param:	returned parameters
+ *
+ * Returns 0 on success or <0 on failure.
+ */
+int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params,
+		    struct tee_param *param)
+{
+	struct tee_device *teedev = ctx->teedev;
+	struct optee *optee = tee_get_drvdata(teedev);
+	struct optee_supp *supp = &optee->supp;
+	size_t n;
+	int rc = 0;
+
+	/*
+	 * We still have exclusive access to the data since that's how we
+	 * left it when returning from optee_supp_read().
+	 */
+
+	/* See comment on mutex in optee_supp_read() above */
+	mutex_lock(&supp->supp_mutex);
+
+	if (!supp->supp_next_send) {
+		/*
+		 * Something strange is going on, supplicant shouldn't
+		 * enter optee_supp_send() in this state
+		 */
+		rc = -ENOENT;
+		goto out;
+	}
+
+	if (num_params != supp->num_params) {
+		/*
+		 * Something is wrong, let supplicant restart. Next call to
+		 * optee_supp_recv() will give an error to the requesting
+		 * thread and release it.
+		 */
+		rc = -EINVAL;
+		goto out;
+	}
+
+	/* Update out and in/out parameters */
+	for (n = 0; n < num_params; n++) {
+		struct tee_param *p = supp->param + n;
+
+		switch (p->attr) {
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+			p->u.value.a = param[n].u.value.a;
+			p->u.value.b = param[n].u.value.b;
+			p->u.value.c = param[n].u.value.c;
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+			p->u.memref.size = param[n].u.memref.size;
+			break;
+		default:
+			break;
+		}
+	}
+	supp->ret = ret;
+
+	/* Allow optee_supp_recv() above to do its work */
+	supp->supp_next_send = false;
+
+	/* Let the requesting thread continue */
+	complete(&supp->data_from_supp);
+out:
+	mutex_unlock(&supp->supp_mutex);
+	return rc;
+}
diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
new file mode 100644
index 000000000000..5c60bf4423e6
--- /dev/null
+++ b/drivers/tee/tee_core.c
@@ -0,0 +1,893 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/tee_drv.h>
+#include <linux/uaccess.h>
+#include "tee_private.h"
+
+#define TEE_NUM_DEVICES	32
+
+#define TEE_IOCTL_PARAM_SIZE(x) (sizeof(struct tee_param) * (x))
+
+/*
+ * Unprivileged devices in the lower half range and privileged devices in
+ * the upper half range.
+ */
+static DECLARE_BITMAP(dev_mask, TEE_NUM_DEVICES);
+static DEFINE_SPINLOCK(driver_lock);
+
+static struct class *tee_class;
+static dev_t tee_devt;
+
+static int tee_open(struct inode *inode, struct file *filp)
+{
+	int rc;
+	struct tee_device *teedev;
+	struct tee_context *ctx;
+
+	teedev = container_of(inode->i_cdev, struct tee_device, cdev);
+	if (!tee_device_get(teedev))
+		return -EINVAL;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	ctx->teedev = teedev;
+	INIT_LIST_HEAD(&ctx->list_shm);
+	filp->private_data = ctx;
+	rc = teedev->desc->ops->open(ctx);
+	if (rc)
+		goto err;
+
+	return 0;
+err:
+	kfree(ctx);
+	tee_device_put(teedev);
+	return rc;
+}
+
+static int tee_release(struct inode *inode, struct file *filp)
+{
+	struct tee_context *ctx = filp->private_data;
+	struct tee_device *teedev = ctx->teedev;
+	struct tee_shm *shm;
+
+	ctx->teedev->desc->ops->release(ctx);
+	mutex_lock(&ctx->teedev->mutex);
+	list_for_each_entry(shm, &ctx->list_shm, link)
+		shm->ctx = NULL;
+	mutex_unlock(&ctx->teedev->mutex);
+	kfree(ctx);
+	tee_device_put(teedev);
+	return 0;
+}
+
+static int tee_ioctl_version(struct tee_context *ctx,
+			     struct tee_ioctl_version_data __user *uvers)
+{
+	struct tee_ioctl_version_data vers;
+
+	ctx->teedev->desc->ops->get_version(ctx->teedev, &vers);
+	if (copy_to_user(uvers, &vers, sizeof(vers)))
+		return -EFAULT;
+	return 0;
+}
+
+static int tee_ioctl_shm_alloc(struct tee_context *ctx,
+			       struct tee_ioctl_shm_alloc_data __user *udata)
+{
+	long ret;
+	struct tee_ioctl_shm_alloc_data data;
+	struct tee_shm *shm;
+
+	if (copy_from_user(&data, udata, sizeof(data)))
+		return -EFAULT;
+
+	/* Currently no input flags are supported */
+	if (data.flags)
+		return -EINVAL;
+
+	data.id = -1;
+
+	shm = tee_shm_alloc(ctx, data.size, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	if (IS_ERR(shm))
+		return PTR_ERR(shm);
+
+	data.id = shm->id;
+	data.flags = shm->flags;
+	data.size = shm->size;
+
+	if (copy_to_user(udata, &data, sizeof(data)))
+		ret = -EFAULT;
+	else
+		ret = tee_shm_get_fd(shm);
+
+	/*
+	 * When user space closes the file descriptor the shared memory
+	 * should be freed or if tee_shm_get_fd() failed then it will
+	 * be freed immediately.
+	 */
+	tee_shm_put(shm);
+	return ret;
+}
+
+static int params_from_user(struct tee_context *ctx, struct tee_param *params,
+			    size_t num_params,
+			    struct tee_ioctl_param __user *uparams)
+{
+	size_t n;
+
+	for (n = 0; n < num_params; n++) {
+		struct tee_shm *shm;
+		struct tee_ioctl_param ip;
+
+		if (copy_from_user(&ip, uparams + n, sizeof(ip)))
+			return -EFAULT;
+
+		/* All unused attribute bits has to be zero */
+		if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_TYPE_MASK)
+			return -EINVAL;
+
+		params[n].attr = ip.attr;
+		switch (ip.attr) {
+		case TEE_IOCTL_PARAM_ATTR_TYPE_NONE:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+			params[n].u.value.a = ip.a;
+			params[n].u.value.b = ip.b;
+			params[n].u.value.c = ip.c;
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+			/*
+			 * If we fail to get a pointer to a shared memory
+			 * object (and increase the ref count) from an
+			 * identifier we return an error. All pointers that
+			 * has been added in params have an increased ref
+			 * count. It's the callers responibility to do
+			 * tee_shm_put() on all resolved pointers.
+			 */
+			shm = tee_shm_get_from_id(ctx, ip.c);
+			if (IS_ERR(shm))
+				return PTR_ERR(shm);
+
+			params[n].u.memref.shm_offs = ip.a;
+			params[n].u.memref.size = ip.b;
+			params[n].u.memref.shm = shm;
+			break;
+		default:
+			/* Unknown attribute */
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int params_to_user(struct tee_ioctl_param __user *uparams,
+			  size_t num_params, struct tee_param *params)
+{
+	size_t n;
+
+	for (n = 0; n < num_params; n++) {
+		struct tee_ioctl_param __user *up = uparams + n;
+		struct tee_param *p = params + n;
+
+		switch (p->attr) {
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+			if (put_user(p->u.value.a, &up->a) ||
+			    put_user(p->u.value.b, &up->b) ||
+			    put_user(p->u.value.c, &up->c))
+				return -EFAULT;
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+			if (put_user((u64)p->u.memref.size, &up->b))
+				return -EFAULT;
+		default:
+			break;
+		}
+	}
+	return 0;
+}
+
+static bool param_is_memref(struct tee_param *param)
+{
+	switch (param->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) {
+	case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT:
+	case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+	case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int tee_ioctl_open_session(struct tee_context *ctx,
+				  struct tee_ioctl_buf_data __user *ubuf)
+{
+	int rc;
+	size_t n;
+	struct tee_ioctl_buf_data buf;
+	struct tee_ioctl_open_session_arg __user *uarg;
+	struct tee_ioctl_open_session_arg arg;
+	struct tee_ioctl_param __user *uparams = NULL;
+	struct tee_param *params = NULL;
+	bool have_session = false;
+
+	if (!ctx->teedev->desc->ops->open_session)
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, sizeof(buf)))
+		return -EFAULT;
+
+	if (buf.buf_len > TEE_MAX_ARG_SIZE ||
+	    buf.buf_len < sizeof(struct tee_ioctl_open_session_arg))
+		return -EINVAL;
+
+	uarg = u64_to_user_ptr(buf.buf_ptr);
+	if (copy_from_user(&arg, uarg, sizeof(arg)))
+		return -EFAULT;
+
+	if (sizeof(arg) + TEE_IOCTL_PARAM_SIZE(arg.num_params) != buf.buf_len)
+		return -EINVAL;
+
+	if (arg.num_params) {
+		params = kcalloc(arg.num_params, sizeof(struct tee_param),
+				 GFP_KERNEL);
+		if (!params)
+			return -ENOMEM;
+		uparams = uarg->params;
+		rc = params_from_user(ctx, params, arg.num_params, uparams);
+		if (rc)
+			goto out;
+	}
+
+	rc = ctx->teedev->desc->ops->open_session(ctx, &arg, params);
+	if (rc)
+		goto out;
+	have_session = true;
+
+	if (put_user(arg.session, &uarg->session) ||
+	    put_user(arg.ret, &uarg->ret) ||
+	    put_user(arg.ret_origin, &uarg->ret_origin)) {
+		rc = -EFAULT;
+		goto out;
+	}
+	rc = params_to_user(uparams, arg.num_params, params);
+out:
+	/*
+	 * If we've succeeded to open the session but failed to communicate
+	 * it back to user space, close the session again to avoid leakage.
+	 */
+	if (rc && have_session && ctx->teedev->desc->ops->close_session)
+		ctx->teedev->desc->ops->close_session(ctx, arg.session);
+
+	if (params) {
+		/* Decrease ref count for all valid shared memory pointers */
+		for (n = 0; n < arg.num_params; n++)
+			if (param_is_memref(params + n) &&
+			    params[n].u.memref.shm)
+				tee_shm_put(params[n].u.memref.shm);
+		kfree(params);
+	}
+
+	return rc;
+}
+
+static int tee_ioctl_invoke(struct tee_context *ctx,
+			    struct tee_ioctl_buf_data __user *ubuf)
+{
+	int rc;
+	size_t n;
+	struct tee_ioctl_buf_data buf;
+	struct tee_ioctl_invoke_arg __user *uarg;
+	struct tee_ioctl_invoke_arg arg;
+	struct tee_ioctl_param __user *uparams = NULL;
+	struct tee_param *params = NULL;
+
+	if (!ctx->teedev->desc->ops->invoke_func)
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, sizeof(buf)))
+		return -EFAULT;
+
+	if (buf.buf_len > TEE_MAX_ARG_SIZE ||
+	    buf.buf_len < sizeof(struct tee_ioctl_invoke_arg))
+		return -EINVAL;
+
+	uarg = u64_to_user_ptr(buf.buf_ptr);
+	if (copy_from_user(&arg, uarg, sizeof(arg)))
+		return -EFAULT;
+
+	if (sizeof(arg) + TEE_IOCTL_PARAM_SIZE(arg.num_params) != buf.buf_len)
+		return -EINVAL;
+
+	if (arg.num_params) {
+		params = kcalloc(arg.num_params, sizeof(struct tee_param),
+				 GFP_KERNEL);
+		if (!params)
+			return -ENOMEM;
+		uparams = uarg->params;
+		rc = params_from_user(ctx, params, arg.num_params, uparams);
+		if (rc)
+			goto out;
+	}
+
+	rc = ctx->teedev->desc->ops->invoke_func(ctx, &arg, params);
+	if (rc)
+		goto out;
+
+	if (put_user(arg.ret, &uarg->ret) ||
+	    put_user(arg.ret_origin, &uarg->ret_origin)) {
+		rc = -EFAULT;
+		goto out;
+	}
+	rc = params_to_user(uparams, arg.num_params, params);
+out:
+	if (params) {
+		/* Decrease ref count for all valid shared memory pointers */
+		for (n = 0; n < arg.num_params; n++)
+			if (param_is_memref(params + n) &&
+			    params[n].u.memref.shm)
+				tee_shm_put(params[n].u.memref.shm);
+		kfree(params);
+	}
+	return rc;
+}
+
+static int tee_ioctl_cancel(struct tee_context *ctx,
+			    struct tee_ioctl_cancel_arg __user *uarg)
+{
+	struct tee_ioctl_cancel_arg arg;
+
+	if (!ctx->teedev->desc->ops->cancel_req)
+		return -EINVAL;
+
+	if (copy_from_user(&arg, uarg, sizeof(arg)))
+		return -EFAULT;
+
+	return ctx->teedev->desc->ops->cancel_req(ctx, arg.cancel_id,
+						  arg.session);
+}
+
+static int
+tee_ioctl_close_session(struct tee_context *ctx,
+			struct tee_ioctl_close_session_arg __user *uarg)
+{
+	struct tee_ioctl_close_session_arg arg;
+
+	if (!ctx->teedev->desc->ops->close_session)
+		return -EINVAL;
+
+	if (copy_from_user(&arg, uarg, sizeof(arg)))
+		return -EFAULT;
+
+	return ctx->teedev->desc->ops->close_session(ctx, arg.session);
+}
+
+static int params_to_supp(struct tee_context *ctx,
+			  struct tee_ioctl_param __user *uparams,
+			  size_t num_params, struct tee_param *params)
+{
+	size_t n;
+
+	for (n = 0; n < num_params; n++) {
+		struct tee_ioctl_param ip;
+		struct tee_param *p = params + n;
+
+		ip.attr = p->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK;
+		switch (p->attr) {
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+			ip.a = p->u.value.a;
+			ip.b = p->u.value.b;
+			ip.c = p->u.value.c;
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+			ip.b = p->u.memref.size;
+			if (!p->u.memref.shm) {
+				ip.a = 0;
+				ip.c = (u64)-1; /* invalid shm id */
+				break;
+			}
+			ip.a = p->u.memref.shm_offs;
+			ip.c = p->u.memref.shm->id;
+			break;
+		default:
+			ip.a = 0;
+			ip.b = 0;
+			ip.c = 0;
+			break;
+		}
+
+		if (copy_to_user(uparams + n, &ip, sizeof(ip)))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int tee_ioctl_supp_recv(struct tee_context *ctx,
+			       struct tee_ioctl_buf_data __user *ubuf)
+{
+	int rc;
+	struct tee_ioctl_buf_data buf;
+	struct tee_iocl_supp_recv_arg __user *uarg;
+	struct tee_param *params;
+	u32 num_params;
+	u32 func;
+
+	if (!ctx->teedev->desc->ops->supp_recv)
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, sizeof(buf)))
+		return -EFAULT;
+
+	if (buf.buf_len > TEE_MAX_ARG_SIZE ||
+	    buf.buf_len < sizeof(struct tee_iocl_supp_recv_arg))
+		return -EINVAL;
+
+	uarg = u64_to_user_ptr(buf.buf_ptr);
+	if (get_user(num_params, &uarg->num_params))
+		return -EFAULT;
+
+	if (sizeof(*uarg) + TEE_IOCTL_PARAM_SIZE(num_params) != buf.buf_len)
+		return -EINVAL;
+
+	params = kcalloc(num_params, sizeof(struct tee_param), GFP_KERNEL);
+	if (!params)
+		return -ENOMEM;
+
+	rc = ctx->teedev->desc->ops->supp_recv(ctx, &func, &num_params, params);
+	if (rc)
+		goto out;
+
+	if (put_user(func, &uarg->func) ||
+	    put_user(num_params, &uarg->num_params)) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	rc = params_to_supp(ctx, uarg->params, num_params, params);
+out:
+	kfree(params);
+	return rc;
+}
+
+static int params_from_supp(struct tee_param *params, size_t num_params,
+			    struct tee_ioctl_param __user *uparams)
+{
+	size_t n;
+
+	for (n = 0; n < num_params; n++) {
+		struct tee_param *p = params + n;
+		struct tee_ioctl_param ip;
+
+		if (copy_from_user(&ip, uparams + n, sizeof(ip)))
+			return -EFAULT;
+
+		/* All unused attribute bits has to be zero */
+		if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_TYPE_MASK)
+			return -EINVAL;
+
+		p->attr = ip.attr;
+		switch (ip.attr) {
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+			/* Only out and in/out values can be updated */
+			p->u.value.a = ip.a;
+			p->u.value.b = ip.b;
+			p->u.value.c = ip.c;
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+			/*
+			 * Only the size of the memref can be updated.
+			 * Since we don't have access to the original
+			 * parameters here, only store the supplied size.
+			 * The driver will copy the updated size into the
+			 * original parameters.
+			 */
+			p->u.memref.shm = NULL;
+			p->u.memref.shm_offs = 0;
+			p->u.memref.size = ip.b;
+			break;
+		default:
+			memset(&p->u, 0, sizeof(p->u));
+			break;
+		}
+	}
+	return 0;
+}
+
+static int tee_ioctl_supp_send(struct tee_context *ctx,
+			       struct tee_ioctl_buf_data __user *ubuf)
+{
+	long rc;
+	struct tee_ioctl_buf_data buf;
+	struct tee_iocl_supp_send_arg __user *uarg;
+	struct tee_param *params;
+	u32 num_params;
+	u32 ret;
+
+	/* Not valid for this driver */
+	if (!ctx->teedev->desc->ops->supp_send)
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, sizeof(buf)))
+		return -EFAULT;
+
+	if (buf.buf_len > TEE_MAX_ARG_SIZE ||
+	    buf.buf_len < sizeof(struct tee_iocl_supp_send_arg))
+		return -EINVAL;
+
+	uarg = u64_to_user_ptr(buf.buf_ptr);
+	if (get_user(ret, &uarg->ret) ||
+	    get_user(num_params, &uarg->num_params))
+		return -EFAULT;
+
+	if (sizeof(*uarg) + TEE_IOCTL_PARAM_SIZE(num_params) > buf.buf_len)
+		return -EINVAL;
+
+	params = kcalloc(num_params, sizeof(struct tee_param), GFP_KERNEL);
+	if (!params)
+		return -ENOMEM;
+
+	rc = params_from_supp(params, num_params, uarg->params);
+	if (rc)
+		goto out;
+
+	rc = ctx->teedev->desc->ops->supp_send(ctx, ret, num_params, params);
+out:
+	kfree(params);
+	return rc;
+}
+
+static long tee_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct tee_context *ctx = filp->private_data;
+	void __user *uarg = (void __user *)arg;
+
+	switch (cmd) {
+	case TEE_IOC_VERSION:
+		return tee_ioctl_version(ctx, uarg);
+	case TEE_IOC_SHM_ALLOC:
+		return tee_ioctl_shm_alloc(ctx, uarg);
+	case TEE_IOC_OPEN_SESSION:
+		return tee_ioctl_open_session(ctx, uarg);
+	case TEE_IOC_INVOKE:
+		return tee_ioctl_invoke(ctx, uarg);
+	case TEE_IOC_CANCEL:
+		return tee_ioctl_cancel(ctx, uarg);
+	case TEE_IOC_CLOSE_SESSION:
+		return tee_ioctl_close_session(ctx, uarg);
+	case TEE_IOC_SUPPL_RECV:
+		return tee_ioctl_supp_recv(ctx, uarg);
+	case TEE_IOC_SUPPL_SEND:
+		return tee_ioctl_supp_send(ctx, uarg);
+	default:
+		return -EINVAL;
+	}
+}
+
+static const struct file_operations tee_fops = {
+	.owner = THIS_MODULE,
+	.open = tee_open,
+	.release = tee_release,
+	.unlocked_ioctl = tee_ioctl,
+	.compat_ioctl = tee_ioctl,
+};
+
+static void tee_release_device(struct device *dev)
+{
+	struct tee_device *teedev = container_of(dev, struct tee_device, dev);
+
+	spin_lock(&driver_lock);
+	clear_bit(teedev->id, dev_mask);
+	spin_unlock(&driver_lock);
+	mutex_destroy(&teedev->mutex);
+	idr_destroy(&teedev->idr);
+	kfree(teedev);
+}
+
+/**
+ * tee_device_alloc() - Allocate a new struct tee_device instance
+ * @teedesc:	Descriptor for this driver
+ * @dev:	Parent device for this device
+ * @pool:	Shared memory pool, NULL if not used
+ * @driver_data: Private driver data for this device
+ *
+ * Allocates a new struct tee_device instance. The device is
+ * removed by tee_device_unregister().
+ *
+ * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure
+ */
+struct tee_device *tee_device_alloc(const struct tee_desc *teedesc,
+				    struct device *dev,
+				    struct tee_shm_pool *pool,
+				    void *driver_data)
+{
+	struct tee_device *teedev;
+	void *ret;
+	int rc;
+	int offs = 0;
+
+	if (!teedesc || !teedesc->name || !teedesc->ops ||
+	    !teedesc->ops->get_version || !teedesc->ops->open ||
+	    !teedesc->ops->release || !pool)
+		return ERR_PTR(-EINVAL);
+
+	teedev = kzalloc(sizeof(*teedev), GFP_KERNEL);
+	if (!teedev) {
+		ret = ERR_PTR(-ENOMEM);
+		goto err;
+	}
+
+	if (teedesc->flags & TEE_DESC_PRIVILEGED)
+		offs = TEE_NUM_DEVICES / 2;
+
+	spin_lock(&driver_lock);
+	teedev->id = find_next_zero_bit(dev_mask, TEE_NUM_DEVICES, offs);
+	if (teedev->id < TEE_NUM_DEVICES)
+		set_bit(teedev->id, dev_mask);
+	spin_unlock(&driver_lock);
+
+	if (teedev->id >= TEE_NUM_DEVICES) {
+		ret = ERR_PTR(-ENOMEM);
+		goto err;
+	}
+
+	snprintf(teedev->name, sizeof(teedev->name), "tee%s%d",
+		 teedesc->flags & TEE_DESC_PRIVILEGED ? "priv" : "",
+		 teedev->id - offs);
+
+	teedev->dev.class = tee_class;
+	teedev->dev.release = tee_release_device;
+	teedev->dev.parent = dev;
+
+	teedev->dev.devt = MKDEV(MAJOR(tee_devt), teedev->id);
+
+	rc = dev_set_name(&teedev->dev, "%s", teedev->name);
+	if (rc) {
+		ret = ERR_PTR(rc);
+		goto err_devt;
+	}
+
+	cdev_init(&teedev->cdev, &tee_fops);
+	teedev->cdev.owner = teedesc->owner;
+	teedev->cdev.kobj.parent = &teedev->dev.kobj;
+
+	dev_set_drvdata(&teedev->dev, driver_data);
+	device_initialize(&teedev->dev);
+
+	/* 1 as tee_device_unregister() does one final tee_device_put() */
+	teedev->num_users = 1;
+	init_completion(&teedev->c_no_users);
+	mutex_init(&teedev->mutex);
+	idr_init(&teedev->idr);
+
+	teedev->desc = teedesc;
+	teedev->pool = pool;
+
+	return teedev;
+err_devt:
+	unregister_chrdev_region(teedev->dev.devt, 1);
+err:
+	pr_err("could not register %s driver\n",
+	       teedesc->flags & TEE_DESC_PRIVILEGED ? "privileged" : "client");
+	if (teedev && teedev->id < TEE_NUM_DEVICES) {
+		spin_lock(&driver_lock);
+		clear_bit(teedev->id, dev_mask);
+		spin_unlock(&driver_lock);
+	}
+	kfree(teedev);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tee_device_alloc);
+
+static ssize_t implementation_id_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct tee_device *teedev = container_of(dev, struct tee_device, dev);
+	struct tee_ioctl_version_data vers;
+
+	teedev->desc->ops->get_version(teedev, &vers);
+	return scnprintf(buf, PAGE_SIZE, "%d\n", vers.impl_id);
+}
+static DEVICE_ATTR_RO(implementation_id);
+
+static struct attribute *tee_dev_attrs[] = {
+	&dev_attr_implementation_id.attr,
+	NULL
+};
+
+static const struct attribute_group tee_dev_group = {
+	.attrs = tee_dev_attrs,
+};
+
+/**
+ * tee_device_register() - Registers a TEE device
+ * @teedev:	Device to register
+ *
+ * tee_device_unregister() need to be called to remove the @teedev if
+ * this function fails.
+ *
+ * @returns < 0 on failure
+ */
+int tee_device_register(struct tee_device *teedev)
+{
+	int rc;
+
+	if (teedev->flags & TEE_DEVICE_FLAG_REGISTERED) {
+		dev_err(&teedev->dev, "attempt to register twice\n");
+		return -EINVAL;
+	}
+
+	rc = cdev_add(&teedev->cdev, teedev->dev.devt, 1);
+	if (rc) {
+		dev_err(&teedev->dev,
+			"unable to cdev_add() %s, major %d, minor %d, err=%d\n",
+			teedev->name, MAJOR(teedev->dev.devt),
+			MINOR(teedev->dev.devt), rc);
+		return rc;
+	}
+
+	rc = device_add(&teedev->dev);
+	if (rc) {
+		dev_err(&teedev->dev,
+			"unable to device_add() %s, major %d, minor %d, err=%d\n",
+			teedev->name, MAJOR(teedev->dev.devt),
+			MINOR(teedev->dev.devt), rc);
+		goto err_device_add;
+	}
+
+	rc = sysfs_create_group(&teedev->dev.kobj, &tee_dev_group);
+	if (rc) {
+		dev_err(&teedev->dev,
+			"failed to create sysfs attributes, err=%d\n", rc);
+		goto err_sysfs_create_group;
+	}
+
+	teedev->flags |= TEE_DEVICE_FLAG_REGISTERED;
+	return 0;
+
+err_sysfs_create_group:
+	device_del(&teedev->dev);
+err_device_add:
+	cdev_del(&teedev->cdev);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(tee_device_register);
+
+void tee_device_put(struct tee_device *teedev)
+{
+	mutex_lock(&teedev->mutex);
+	/* Shouldn't put in this state */
+	if (!WARN_ON(!teedev->desc)) {
+		teedev->num_users--;
+		if (!teedev->num_users) {
+			teedev->desc = NULL;
+			complete(&teedev->c_no_users);
+		}
+	}
+	mutex_unlock(&teedev->mutex);
+}
+
+bool tee_device_get(struct tee_device *teedev)
+{
+	mutex_lock(&teedev->mutex);
+	if (!teedev->desc) {
+		mutex_unlock(&teedev->mutex);
+		return false;
+	}
+	teedev->num_users++;
+	mutex_unlock(&teedev->mutex);
+	return true;
+}
+
+/**
+ * tee_device_unregister() - Removes a TEE device
+ * @teedev:	Device to unregister
+ *
+ * This function should be called to remove the @teedev even if
+ * tee_device_register() hasn't been called yet. Does nothing if
+ * @teedev is NULL.
+ */
+void tee_device_unregister(struct tee_device *teedev)
+{
+	if (!teedev)
+		return;
+
+	if (teedev->flags & TEE_DEVICE_FLAG_REGISTERED) {
+		sysfs_remove_group(&teedev->dev.kobj, &tee_dev_group);
+		cdev_del(&teedev->cdev);
+		device_del(&teedev->dev);
+	}
+
+	tee_device_put(teedev);
+	wait_for_completion(&teedev->c_no_users);
+
+	/*
+	 * No need to take a mutex any longer now since teedev->desc was
+	 * set to NULL before teedev->c_no_users was completed.
+	 */
+
+	teedev->pool = NULL;
+
+	put_device(&teedev->dev);
+}
+EXPORT_SYMBOL_GPL(tee_device_unregister);
+
+/**
+ * tee_get_drvdata() - Return driver_data pointer
+ * @teedev:	Device containing the driver_data pointer
+ * @returns the driver_data pointer supplied to tee_register().
+ */
+void *tee_get_drvdata(struct tee_device *teedev)
+{
+	return dev_get_drvdata(&teedev->dev);
+}
+EXPORT_SYMBOL_GPL(tee_get_drvdata);
+
+static int __init tee_init(void)
+{
+	int rc;
+
+	tee_class = class_create(THIS_MODULE, "tee");
+	if (IS_ERR(tee_class)) {
+		pr_err("couldn't create class\n");
+		return PTR_ERR(tee_class);
+	}
+
+	rc = alloc_chrdev_region(&tee_devt, 0, TEE_NUM_DEVICES, "tee");
+	if (rc) {
+		pr_err("failed to allocate char dev region\n");
+		class_destroy(tee_class);
+		tee_class = NULL;
+	}
+
+	return rc;
+}
+
+static void __exit tee_exit(void)
+{
+	class_destroy(tee_class);
+	tee_class = NULL;
+	unregister_chrdev_region(tee_devt, TEE_NUM_DEVICES);
+}
+
+subsys_initcall(tee_init);
+module_exit(tee_exit);
+
+MODULE_AUTHOR("Linaro");
+MODULE_DESCRIPTION("TEE Driver");
+MODULE_VERSION("1.0");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/tee/tee_private.h b/drivers/tee/tee_private.h
new file mode 100644
index 000000000000..21cb6be8bce9
--- /dev/null
+++ b/drivers/tee/tee_private.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef TEE_PRIVATE_H
+#define TEE_PRIVATE_H
+
+#include <linux/cdev.h>
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+struct tee_device;
+
+/**
+ * struct tee_shm - shared memory object
+ * @teedev:	device used to allocate the object
+ * @ctx:	context using the object, if NULL the context is gone
+ * @link	link element
+ * @paddr:	physical address of the shared memory
+ * @kaddr:	virtual address of the shared memory
+ * @size:	size of shared memory
+ * @dmabuf:	dmabuf used to for exporting to user space
+ * @flags:	defined by TEE_SHM_* in tee_drv.h
+ * @id:		unique id of a shared memory object on this device
+ */
+struct tee_shm {
+	struct tee_device *teedev;
+	struct tee_context *ctx;
+	struct list_head link;
+	phys_addr_t paddr;
+	void *kaddr;
+	size_t size;
+	struct dma_buf *dmabuf;
+	u32 flags;
+	int id;
+};
+
+struct tee_shm_pool_mgr;
+
+/**
+ * struct tee_shm_pool_mgr_ops - shared memory pool manager operations
+ * @alloc:	called when allocating shared memory
+ * @free:	called when freeing shared memory
+ */
+struct tee_shm_pool_mgr_ops {
+	int (*alloc)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm,
+		     size_t size);
+	void (*free)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm);
+};
+
+/**
+ * struct tee_shm_pool_mgr - shared memory manager
+ * @ops:		operations
+ * @private_data:	private data for the shared memory manager
+ */
+struct tee_shm_pool_mgr {
+	const struct tee_shm_pool_mgr_ops *ops;
+	void *private_data;
+};
+
+/**
+ * struct tee_shm_pool - shared memory pool
+ * @private_mgr:	pool manager for shared memory only between kernel
+ *			and secure world
+ * @dma_buf_mgr:	pool manager for shared memory exported to user space
+ * @destroy:		called when destroying the pool
+ * @private_data:	private data for the pool
+ */
+struct tee_shm_pool {
+	struct tee_shm_pool_mgr private_mgr;
+	struct tee_shm_pool_mgr dma_buf_mgr;
+	void (*destroy)(struct tee_shm_pool *pool);
+	void *private_data;
+};
+
+#define TEE_DEVICE_FLAG_REGISTERED	0x1
+#define TEE_MAX_DEV_NAME_LEN		32
+
+/**
+ * struct tee_device - TEE Device representation
+ * @name:	name of device
+ * @desc:	description of device
+ * @id:		unique id of device
+ * @flags:	represented by TEE_DEVICE_FLAG_REGISTERED above
+ * @dev:	embedded basic device structure
+ * @cdev:	embedded cdev
+ * @num_users:	number of active users of this device
+ * @c_no_user:	completion used when unregistering the device
+ * @mutex:	mutex protecting @num_users and @idr
+ * @idr:	register of shared memory object allocated on this device
+ * @pool:	shared memory pool
+ */
+struct tee_device {
+	char name[TEE_MAX_DEV_NAME_LEN];
+	const struct tee_desc *desc;
+	int id;
+	unsigned int flags;
+
+	struct device dev;
+	struct cdev cdev;
+
+	size_t num_users;
+	struct completion c_no_users;
+	struct mutex mutex;	/* protects num_users and idr */
+
+	struct idr idr;
+	struct tee_shm_pool *pool;
+};
+
+int tee_shm_init(void);
+
+int tee_shm_get_fd(struct tee_shm *shm);
+
+bool tee_device_get(struct tee_device *teedev);
+void tee_device_put(struct tee_device *teedev);
+
+#endif /*TEE_PRIVATE_H*/
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
new file mode 100644
index 000000000000..d356d7f025eb
--- /dev/null
+++ b/drivers/tee/tee_shm.c
@@ -0,0 +1,358 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/fdtable.h>
+#include <linux/idr.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/tee_drv.h>
+#include "tee_private.h"
+
+static void tee_shm_release(struct tee_shm *shm)
+{
+	struct tee_device *teedev = shm->teedev;
+	struct tee_shm_pool_mgr *poolm;
+
+	mutex_lock(&teedev->mutex);
+	idr_remove(&teedev->idr, shm->id);
+	if (shm->ctx)
+		list_del(&shm->link);
+	mutex_unlock(&teedev->mutex);
+
+	if (shm->flags & TEE_SHM_DMA_BUF)
+		poolm = &teedev->pool->dma_buf_mgr;
+	else
+		poolm = &teedev->pool->private_mgr;
+
+	poolm->ops->free(poolm, shm);
+	kfree(shm);
+
+	tee_device_put(teedev);
+}
+
+static struct sg_table *tee_shm_op_map_dma_buf(struct dma_buf_attachment
+			*attach, enum dma_data_direction dir)
+{
+	return NULL;
+}
+
+static void tee_shm_op_unmap_dma_buf(struct dma_buf_attachment *attach,
+				     struct sg_table *table,
+				     enum dma_data_direction dir)
+{
+}
+
+static void tee_shm_op_release(struct dma_buf *dmabuf)
+{
+	struct tee_shm *shm = dmabuf->priv;
+
+	tee_shm_release(shm);
+}
+
+static void *tee_shm_op_map_atomic(struct dma_buf *dmabuf, unsigned long pgnum)
+{
+	return NULL;
+}
+
+static void *tee_shm_op_map(struct dma_buf *dmabuf, unsigned long pgnum)
+{
+	return NULL;
+}
+
+static int tee_shm_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct tee_shm *shm = dmabuf->priv;
+	size_t size = vma->vm_end - vma->vm_start;
+
+	return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT,
+			       size, vma->vm_page_prot);
+}
+
+static struct dma_buf_ops tee_shm_dma_buf_ops = {
+	.map_dma_buf = tee_shm_op_map_dma_buf,
+	.unmap_dma_buf = tee_shm_op_unmap_dma_buf,
+	.release = tee_shm_op_release,
+	.map_atomic = tee_shm_op_map_atomic,
+	.map = tee_shm_op_map,
+	.mmap = tee_shm_op_mmap,
+};
+
+/**
+ * tee_shm_alloc() - Allocate shared memory
+ * @ctx:	Context that allocates the shared memory
+ * @size:	Requested size of shared memory
+ * @flags:	Flags setting properties for the requested shared memory.
+ *
+ * Memory allocated as global shared memory is automatically freed when the
+ * TEE file pointer is closed. The @flags field uses the bits defined by
+ * TEE_SHM_* in <linux/tee_drv.h>. TEE_SHM_MAPPED must currently always be
+ * set. If TEE_SHM_DMA_BUF global shared memory will be allocated and
+ * associated with a dma-buf handle, else driver private memory.
+ */
+struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags)
+{
+	struct tee_device *teedev = ctx->teedev;
+	struct tee_shm_pool_mgr *poolm = NULL;
+	struct tee_shm *shm;
+	void *ret;
+	int rc;
+
+	if (!(flags & TEE_SHM_MAPPED)) {
+		dev_err(teedev->dev.parent,
+			"only mapped allocations supported\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF))) {
+		dev_err(teedev->dev.parent, "invalid shm flags 0x%x", flags);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!tee_device_get(teedev))
+		return ERR_PTR(-EINVAL);
+
+	if (!teedev->pool) {
+		/* teedev has been detached from driver */
+		ret = ERR_PTR(-EINVAL);
+		goto err_dev_put;
+	}
+
+	shm = kzalloc(sizeof(*shm), GFP_KERNEL);
+	if (!shm) {
+		ret = ERR_PTR(-ENOMEM);
+		goto err_dev_put;
+	}
+
+	shm->flags = flags;
+	shm->teedev = teedev;
+	shm->ctx = ctx;
+	if (flags & TEE_SHM_DMA_BUF)
+		poolm = &teedev->pool->dma_buf_mgr;
+	else
+		poolm = &teedev->pool->private_mgr;
+
+	rc = poolm->ops->alloc(poolm, shm, size);
+	if (rc) {
+		ret = ERR_PTR(rc);
+		goto err_kfree;
+	}
+
+	mutex_lock(&teedev->mutex);
+	shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL);
+	mutex_unlock(&teedev->mutex);
+	if (shm->id < 0) {
+		ret = ERR_PTR(shm->id);
+		goto err_pool_free;
+	}
+
+	if (flags & TEE_SHM_DMA_BUF) {
+		DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+		exp_info.ops = &tee_shm_dma_buf_ops;
+		exp_info.size = shm->size;
+		exp_info.flags = O_RDWR;
+		exp_info.priv = shm;
+
+		shm->dmabuf = dma_buf_export(&exp_info);
+		if (IS_ERR(shm->dmabuf)) {
+			ret = ERR_CAST(shm->dmabuf);
+			goto err_rem;
+		}
+	}
+	mutex_lock(&teedev->mutex);
+	list_add_tail(&shm->link, &ctx->list_shm);
+	mutex_unlock(&teedev->mutex);
+
+	return shm;
+err_rem:
+	mutex_lock(&teedev->mutex);
+	idr_remove(&teedev->idr, shm->id);
+	mutex_unlock(&teedev->mutex);
+err_pool_free:
+	poolm->ops->free(poolm, shm);
+err_kfree:
+	kfree(shm);
+err_dev_put:
+	tee_device_put(teedev);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tee_shm_alloc);
+
+/**
+ * tee_shm_get_fd() - Increase reference count and return file descriptor
+ * @shm:	Shared memory handle
+ * @returns user space file descriptor to shared memory
+ */
+int tee_shm_get_fd(struct tee_shm *shm)
+{
+	u32 req_flags = TEE_SHM_MAPPED | TEE_SHM_DMA_BUF;
+	int fd;
+
+	if ((shm->flags & req_flags) != req_flags)
+		return -EINVAL;
+
+	fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC);
+	if (fd >= 0)
+		get_dma_buf(shm->dmabuf);
+	return fd;
+}
+
+/**
+ * tee_shm_free() - Free shared memory
+ * @shm:	Handle to shared memory to free
+ */
+void tee_shm_free(struct tee_shm *shm)
+{
+	/*
+	 * dma_buf_put() decreases the dmabuf reference counter and will
+	 * call tee_shm_release() when the last reference is gone.
+	 *
+	 * In the case of driver private memory we call tee_shm_release
+	 * directly instead as it doesn't have a reference counter.
+	 */
+	if (shm->flags & TEE_SHM_DMA_BUF)
+		dma_buf_put(shm->dmabuf);
+	else
+		tee_shm_release(shm);
+}
+EXPORT_SYMBOL_GPL(tee_shm_free);
+
+/**
+ * tee_shm_va2pa() - Get physical address of a virtual address
+ * @shm:	Shared memory handle
+ * @va:		Virtual address to tranlsate
+ * @pa:		Returned physical address
+ * @returns 0 on success and < 0 on failure
+ */
+int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa)
+{
+	/* Check that we're in the range of the shm */
+	if ((char *)va < (char *)shm->kaddr)
+		return -EINVAL;
+	if ((char *)va >= ((char *)shm->kaddr + shm->size))
+		return -EINVAL;
+
+	return tee_shm_get_pa(
+			shm, (unsigned long)va - (unsigned long)shm->kaddr, pa);
+}
+EXPORT_SYMBOL_GPL(tee_shm_va2pa);
+
+/**
+ * tee_shm_pa2va() - Get virtual address of a physical address
+ * @shm:	Shared memory handle
+ * @pa:		Physical address to tranlsate
+ * @va:		Returned virtual address
+ * @returns 0 on success and < 0 on failure
+ */
+int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va)
+{
+	/* Check that we're in the range of the shm */
+	if (pa < shm->paddr)
+		return -EINVAL;
+	if (pa >= (shm->paddr + shm->size))
+		return -EINVAL;
+
+	if (va) {
+		void *v = tee_shm_get_va(shm, pa - shm->paddr);
+
+		if (IS_ERR(v))
+			return PTR_ERR(v);
+		*va = v;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tee_shm_pa2va);
+
+/**
+ * tee_shm_get_va() - Get virtual address of a shared memory plus an offset
+ * @shm:	Shared memory handle
+ * @offs:	Offset from start of this shared memory
+ * @returns virtual address of the shared memory + offs if offs is within
+ *	the bounds of this shared memory, else an ERR_PTR
+ */
+void *tee_shm_get_va(struct tee_shm *shm, size_t offs)
+{
+	if (offs >= shm->size)
+		return ERR_PTR(-EINVAL);
+	return (char *)shm->kaddr + offs;
+}
+EXPORT_SYMBOL_GPL(tee_shm_get_va);
+
+/**
+ * tee_shm_get_pa() - Get physical address of a shared memory plus an offset
+ * @shm:	Shared memory handle
+ * @offs:	Offset from start of this shared memory
+ * @pa:		Physical address to return
+ * @returns 0 if offs is within the bounds of this shared memory, else an
+ *	error code.
+ */
+int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa)
+{
+	if (offs >= shm->size)
+		return -EINVAL;
+	if (pa)
+		*pa = shm->paddr + offs;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tee_shm_get_pa);
+
+/**
+ * tee_shm_get_from_id() - Find shared memory object and increase reference
+ * count
+ * @ctx:	Context owning the shared memory
+ * @id:		Id of shared memory object
+ * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure
+ */
+struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id)
+{
+	struct tee_device *teedev;
+	struct tee_shm *shm;
+
+	if (!ctx)
+		return ERR_PTR(-EINVAL);
+
+	teedev = ctx->teedev;
+	mutex_lock(&teedev->mutex);
+	shm = idr_find(&teedev->idr, id);
+	if (!shm || shm->ctx != ctx)
+		shm = ERR_PTR(-EINVAL);
+	else if (shm->flags & TEE_SHM_DMA_BUF)
+		get_dma_buf(shm->dmabuf);
+	mutex_unlock(&teedev->mutex);
+	return shm;
+}
+EXPORT_SYMBOL_GPL(tee_shm_get_from_id);
+
+/**
+ * tee_shm_get_id() - Get id of a shared memory object
+ * @shm:	Shared memory handle
+ * @returns id
+ */
+int tee_shm_get_id(struct tee_shm *shm)
+{
+	return shm->id;
+}
+EXPORT_SYMBOL_GPL(tee_shm_get_id);
+
+/**
+ * tee_shm_put() - Decrease reference count on a shared memory handle
+ * @shm:	Shared memory handle
+ */
+void tee_shm_put(struct tee_shm *shm)
+{
+	if (shm->flags & TEE_SHM_DMA_BUF)
+		dma_buf_put(shm->dmabuf);
+}
+EXPORT_SYMBOL_GPL(tee_shm_put);
diff --git a/drivers/tee/tee_shm_pool.c b/drivers/tee/tee_shm_pool.c
new file mode 100644
index 000000000000..fb4f8522a526
--- /dev/null
+++ b/drivers/tee/tee_shm_pool.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+#include <linux/tee_drv.h>
+#include "tee_private.h"
+
+static int pool_op_gen_alloc(struct tee_shm_pool_mgr *poolm,
+			     struct tee_shm *shm, size_t size)
+{
+	unsigned long va;
+	struct gen_pool *genpool = poolm->private_data;
+	size_t s = roundup(size, 1 << genpool->min_alloc_order);
+
+	va = gen_pool_alloc(genpool, s);
+	if (!va)
+		return -ENOMEM;
+
+	memset((void *)va, 0, s);
+	shm->kaddr = (void *)va;
+	shm->paddr = gen_pool_virt_to_phys(genpool, va);
+	shm->size = s;
+	return 0;
+}
+
+static void pool_op_gen_free(struct tee_shm_pool_mgr *poolm,
+			     struct tee_shm *shm)
+{
+	gen_pool_free(poolm->private_data, (unsigned long)shm->kaddr,
+		      shm->size);
+	shm->kaddr = NULL;
+}
+
+static const struct tee_shm_pool_mgr_ops pool_ops_generic = {
+	.alloc = pool_op_gen_alloc,
+	.free = pool_op_gen_free,
+};
+
+static void pool_res_mem_destroy(struct tee_shm_pool *pool)
+{
+	gen_pool_destroy(pool->private_mgr.private_data);
+	gen_pool_destroy(pool->dma_buf_mgr.private_data);
+}
+
+static int pool_res_mem_mgr_init(struct tee_shm_pool_mgr *mgr,
+				 struct tee_shm_pool_mem_info *info,
+				 int min_alloc_order)
+{
+	size_t page_mask = PAGE_SIZE - 1;
+	struct gen_pool *genpool = NULL;
+	int rc;
+
+	/*
+	 * Start and end must be page aligned
+	 */
+	if ((info->vaddr & page_mask) || (info->paddr & page_mask) ||
+	    (info->size & page_mask))
+		return -EINVAL;
+
+	genpool = gen_pool_create(min_alloc_order, -1);
+	if (!genpool)
+		return -ENOMEM;
+
+	gen_pool_set_algo(genpool, gen_pool_best_fit, NULL);
+	rc = gen_pool_add_virt(genpool, info->vaddr, info->paddr, info->size,
+			       -1);
+	if (rc) {
+		gen_pool_destroy(genpool);
+		return rc;
+	}
+
+	mgr->private_data = genpool;
+	mgr->ops = &pool_ops_generic;
+	return 0;
+}
+
+/**
+ * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved
+ * memory range
+ * @priv_info:	Information for driver private shared memory pool
+ * @dmabuf_info: Information for dma-buf shared memory pool
+ *
+ * Start and end of pools will must be page aligned.
+ *
+ * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied
+ * in @dmabuf, others will use the range provided by @priv.
+ *
+ * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure.
+ */
+struct tee_shm_pool *
+tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info,
+			   struct tee_shm_pool_mem_info *dmabuf_info)
+{
+	struct tee_shm_pool *pool = NULL;
+	int ret;
+
+	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/*
+	 * Create the pool for driver private shared memory
+	 */
+	ret = pool_res_mem_mgr_init(&pool->private_mgr, priv_info,
+				    3 /* 8 byte aligned */);
+	if (ret)
+		goto err;
+
+	/*
+	 * Create the pool for dma_buf shared memory
+	 */
+	ret = pool_res_mem_mgr_init(&pool->dma_buf_mgr, dmabuf_info,
+				    PAGE_SHIFT);
+	if (ret)
+		goto err;
+
+	pool->destroy = pool_res_mem_destroy;
+	return pool;
+err:
+	if (ret == -ENOMEM)
+		pr_err("%s: can't allocate memory for res_mem shared memory pool\n", __func__);
+	if (pool && pool->private_mgr.private_data)
+		gen_pool_destroy(pool->private_mgr.private_data);
+	kfree(pool);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(tee_shm_pool_alloc_res_mem);
+
+/**
+ * tee_shm_pool_free() - Free a shared memory pool
+ * @pool:	The shared memory pool to free
+ *
+ * There must be no remaining shared memory allocated from this pool when
+ * this function is called.
+ */
+void tee_shm_pool_free(struct tee_shm_pool *pool)
+{
+	pool->destroy(pool);
+	kfree(pool);
+}
+EXPORT_SYMBOL_GPL(tee_shm_pool_free);
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 6871ecc5b951..b5b5facb8747 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -15,6 +15,23 @@ menuconfig THERMAL
 
 if THERMAL
 
+config THERMAL_EMERGENCY_POWEROFF_DELAY_MS
+	int "Emergency poweroff delay in milli-seconds"
+	depends on THERMAL
+	default 0
+	help
+	  Thermal subsystem will issue a graceful shutdown when
+	  critical temperatures are reached using orderly_poweroff(). In
+	  case of failure of an orderly_poweroff(), the thermal emergency
+	  poweroff kicks in after a delay has elapsed and shuts down the system.
+	  This config is number of milliseconds to delay before emergency
+	  poweroff kicks in. Similarly to the critical trip point,
+	  the delay should be carefully profiled so as to give adequate
+	  time for orderly_poweroff() to finish on regular execution.
+	  If set to 0 emergency poweroff will not be supported.
+
+	  In doubt, leave as 0.
+
 config THERMAL_HWMON
 	bool
 	prompt "Expose thermal sensors as hwmon device"
@@ -291,6 +308,16 @@ config ARMADA_THERMAL
 	  Enable this option if you want to have support for thermal management
 	  controller present in Armada 370 and Armada XP SoC.
 
+config DA9062_THERMAL
+	tristate "DA9062/DA9061 Dialog Semiconductor thermal driver"
+	depends on MFD_DA9062 || COMPILE_TEST
+	depends on OF
+	help
+	  Enable this for the Dialog Semiconductor thermal sensor driver.
+	  This will report PMIC junction over-temperature for one thermal trip
+	  zone.
+	  Compatible with the DA9062 and DA9061 PMICs.
+
 config INTEL_POWERCLAMP
 	tristate "Intel PowerClamp idle injection driver"
 	depends on THERMAL
@@ -380,6 +407,11 @@ config MTK_THERMAL
 	  Enable this option if you want to have support for thermal management
 	  controller present in Mediatek SoCs
 
+menu "Broadcom thermal drivers"
+depends on ARCH_BCM || COMPILE_TEST
+source "drivers/thermal/broadcom/Kconfig"
+endmenu
+
 menu "Texas Instruments thermal drivers"
 depends on ARCH_HAS_BANDGAP || COMPILE_TEST
 depends on HAS_IOMEM
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index c2372f10dae5..094d7039981c 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -27,6 +27,7 @@ thermal_sys-$(CONFIG_CLOCK_THERMAL)	+= clock_cooling.o
 thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
 
 # platform thermal drivers
+obj-y				+= broadcom/
 obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM)	+= qcom-spmi-temp-alarm.o
 obj-$(CONFIG_SPEAR_THERMAL)	+= spear_thermal.o
 obj-$(CONFIG_ROCKCHIP_THERMAL)	+= rockchip_thermal.o
@@ -41,6 +42,7 @@ obj-$(CONFIG_TANGO_THERMAL)	+= tango_thermal.o
 obj-$(CONFIG_IMX_THERMAL)	+= imx_thermal.o
 obj-$(CONFIG_MAX77620_THERMAL)	+= max77620_thermal.o
 obj-$(CONFIG_QORIQ_THERMAL)	+= qoriq_thermal.o
+obj-$(CONFIG_DA9062_THERMAL)	+= da9062-thermal.o
 obj-$(CONFIG_INTEL_POWERCLAMP)	+= intel_powerclamp.o
 obj-$(CONFIG_X86_PKG_TEMP_THERMAL)	+= x86_pkg_temp_thermal.o
 obj-$(CONFIG_INTEL_SOC_DTS_IOSF_CORE)	+= intel_soc_dts_iosf.o
diff --git a/drivers/thermal/broadcom/Kconfig b/drivers/thermal/broadcom/Kconfig
new file mode 100644
index 000000000000..42c098e86f84
--- /dev/null
+++ b/drivers/thermal/broadcom/Kconfig
@@ -0,0 +1,17 @@
+config BCM2835_THERMAL
+	tristate "Thermal sensors on bcm2835 SoC"
+	depends on ARCH_BCM2835 || COMPILE_TEST
+	depends on HAS_IOMEM
+	depends on THERMAL_OF
+	help
+	  Support for thermal sensors on Broadcom bcm2835 SoCs.
+
+config BCM_NS_THERMAL
+	tristate "Northstar thermal driver"
+	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	default y if ARCH_BCM_IPROC
+	help
+	  Support for the Northstar and Northstar Plus family of SoCs (e.g.
+	  BCM4708, BCM4709, BCM5301x, BCM95852X, etc). It contains DMU (Device
+	  Management Unit) block with a thermal sensor that allows checking CPU
+	  temperature.
diff --git a/drivers/thermal/broadcom/Makefile b/drivers/thermal/broadcom/Makefile
new file mode 100644
index 000000000000..c6f62e4fd0ee
--- /dev/null
+++ b/drivers/thermal/broadcom/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_BCM2835_THERMAL)		+= bcm2835_thermal.o
+obj-$(CONFIG_BCM_NS_THERMAL)		+= ns-thermal.o
diff --git a/drivers/thermal/broadcom/bcm2835_thermal.c b/drivers/thermal/broadcom/bcm2835_thermal.c
new file mode 100644
index 000000000000..0ecf80890c84
--- /dev/null
+++ b/drivers/thermal/broadcom/bcm2835_thermal.c
@@ -0,0 +1,314 @@
+/*
+ * Driver for Broadcom BCM2835 SoC temperature sensor
+ *
+ * Copyright (C) 2016 Martin Sperl
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+#define BCM2835_TS_TSENSCTL			0x00
+#define BCM2835_TS_TSENSSTAT			0x04
+
+#define BCM2835_TS_TSENSCTL_PRWDW		BIT(0)
+#define BCM2835_TS_TSENSCTL_RSTB		BIT(1)
+
+/*
+ * bandgap reference voltage in 6 mV increments
+ * 000b = 1178 mV, 001b = 1184 mV, ... 111b = 1220 mV
+ */
+#define BCM2835_TS_TSENSCTL_CTRL_BITS		3
+#define BCM2835_TS_TSENSCTL_CTRL_SHIFT		2
+#define BCM2835_TS_TSENSCTL_CTRL_MASK		    \
+	GENMASK(BCM2835_TS_TSENSCTL_CTRL_BITS +     \
+		BCM2835_TS_TSENSCTL_CTRL_SHIFT - 1, \
+		BCM2835_TS_TSENSCTL_CTRL_SHIFT)
+#define BCM2835_TS_TSENSCTL_CTRL_DEFAULT	1
+#define BCM2835_TS_TSENSCTL_EN_INT		BIT(5)
+#define BCM2835_TS_TSENSCTL_DIRECT		BIT(6)
+#define BCM2835_TS_TSENSCTL_CLR_INT		BIT(7)
+#define BCM2835_TS_TSENSCTL_THOLD_SHIFT		8
+#define BCM2835_TS_TSENSCTL_THOLD_BITS		10
+#define BCM2835_TS_TSENSCTL_THOLD_MASK		     \
+	GENMASK(BCM2835_TS_TSENSCTL_THOLD_BITS +     \
+		BCM2835_TS_TSENSCTL_THOLD_SHIFT - 1, \
+		BCM2835_TS_TSENSCTL_THOLD_SHIFT)
+/*
+ * time how long the block to be asserted in reset
+ * which based on a clock counter (TSENS clock assumed)
+ */
+#define BCM2835_TS_TSENSCTL_RSTDELAY_SHIFT	18
+#define BCM2835_TS_TSENSCTL_RSTDELAY_BITS	8
+#define BCM2835_TS_TSENSCTL_REGULEN		BIT(26)
+
+#define BCM2835_TS_TSENSSTAT_DATA_BITS		10
+#define BCM2835_TS_TSENSSTAT_DATA_SHIFT		0
+#define BCM2835_TS_TSENSSTAT_DATA_MASK		     \
+	GENMASK(BCM2835_TS_TSENSSTAT_DATA_BITS +     \
+		BCM2835_TS_TSENSSTAT_DATA_SHIFT - 1, \
+		BCM2835_TS_TSENSSTAT_DATA_SHIFT)
+#define BCM2835_TS_TSENSSTAT_VALID		BIT(10)
+#define BCM2835_TS_TSENSSTAT_INTERRUPT		BIT(11)
+
+struct bcm2835_thermal_data {
+	struct thermal_zone_device *tz;
+	void __iomem *regs;
+	struct clk *clk;
+	struct dentry *debugfsdir;
+};
+
+static int bcm2835_thermal_adc2temp(u32 adc, int offset, int slope)
+{
+	return offset + slope * adc;
+}
+
+static int bcm2835_thermal_temp2adc(int temp, int offset, int slope)
+{
+	temp -= offset;
+	temp /= slope;
+
+	if (temp < 0)
+		temp = 0;
+	if (temp >= BIT(BCM2835_TS_TSENSSTAT_DATA_BITS))
+		temp = BIT(BCM2835_TS_TSENSSTAT_DATA_BITS) - 1;
+
+	return temp;
+}
+
+static int bcm2835_thermal_get_temp(void *d, int *temp)
+{
+	struct bcm2835_thermal_data *data = d;
+	u32 val = readl(data->regs + BCM2835_TS_TSENSSTAT);
+
+	if (!(val & BCM2835_TS_TSENSSTAT_VALID))
+		return -EIO;
+
+	val &= BCM2835_TS_TSENSSTAT_DATA_MASK;
+
+	*temp = bcm2835_thermal_adc2temp(
+		val,
+		thermal_zone_get_offset(data->tz),
+		thermal_zone_get_slope(data->tz));
+
+	return 0;
+}
+
+static const struct debugfs_reg32 bcm2835_thermal_regs[] = {
+	{
+		.name = "ctl",
+		.offset = 0
+	},
+	{
+		.name = "stat",
+		.offset = 4
+	}
+};
+
+static void bcm2835_thermal_debugfs(struct platform_device *pdev)
+{
+	struct thermal_zone_device *tz = platform_get_drvdata(pdev);
+	struct bcm2835_thermal_data *data = tz->devdata;
+	struct debugfs_regset32 *regset;
+
+	data->debugfsdir = debugfs_create_dir("bcm2835_thermal", NULL);
+	if (!data->debugfsdir)
+		return;
+
+	regset = devm_kzalloc(&pdev->dev, sizeof(*regset), GFP_KERNEL);
+	if (!regset)
+		return;
+
+	regset->regs = bcm2835_thermal_regs;
+	regset->nregs = ARRAY_SIZE(bcm2835_thermal_regs);
+	regset->base = data->regs;
+
+	debugfs_create_regset32("regset", 0444, data->debugfsdir, regset);
+}
+
+static struct thermal_zone_of_device_ops bcm2835_thermal_ops = {
+	.get_temp = bcm2835_thermal_get_temp,
+};
+
+/*
+ * Note: as per Raspberry Foundation FAQ
+ * (https://www.raspberrypi.org/help/faqs/#performanceOperatingTemperature)
+ * the recommended temperature range for the SoC -40C to +85C
+ * so the trip limit is set to 80C.
+ * this applies to all the BCM283X SoC
+ */
+
+static const struct of_device_id bcm2835_thermal_of_match_table[] = {
+	{
+		.compatible = "brcm,bcm2835-thermal",
+	},
+	{
+		.compatible = "brcm,bcm2836-thermal",
+	},
+	{
+		.compatible = "brcm,bcm2837-thermal",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, bcm2835_thermal_of_match_table);
+
+static int bcm2835_thermal_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	struct thermal_zone_device *tz;
+	struct bcm2835_thermal_data *data;
+	struct resource *res;
+	int err = 0;
+	u32 val;
+	unsigned long rate;
+
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	match = of_match_device(bcm2835_thermal_of_match_table,
+				&pdev->dev);
+	if (!match)
+		return -EINVAL;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(data->regs)) {
+		err = PTR_ERR(data->regs);
+		dev_err(&pdev->dev, "Could not get registers: %d\n", err);
+		return err;
+	}
+
+	data->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(data->clk)) {
+		err = PTR_ERR(data->clk);
+		if (err != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "Could not get clk: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(data->clk);
+	if (err)
+		return err;
+
+	rate = clk_get_rate(data->clk);
+	if ((rate < 1920000) || (rate > 5000000))
+		dev_warn(&pdev->dev,
+			 "Clock %pCn running at %pCr Hz is outside of the recommended range: 1.92 to 5MHz\n",
+			 data->clk, data->clk);
+
+	/* register of thermal sensor and get info from DT */
+	tz = thermal_zone_of_sensor_register(&pdev->dev, 0, data,
+					     &bcm2835_thermal_ops);
+	if (IS_ERR(tz)) {
+		err = PTR_ERR(tz);
+		dev_err(&pdev->dev,
+			"Failed to register the thermal device: %d\n",
+			err);
+		goto err_clk;
+	}
+
+	/*
+	 * right now the FW does set up the HW-block, so we are not
+	 * touching the configuration registers.
+	 * But if the HW is not enabled, then set it up
+	 * using "sane" values used by the firmware right now.
+	 */
+	val = readl(data->regs + BCM2835_TS_TSENSCTL);
+	if (!(val & BCM2835_TS_TSENSCTL_RSTB)) {
+		int trip_temp, offset, slope;
+
+		slope = thermal_zone_get_slope(tz);
+		offset = thermal_zone_get_offset(tz);
+		/*
+		 * For now we deal only with critical, otherwise
+		 * would need to iterate
+		 */
+		err = tz->ops->get_trip_temp(tz, 0, &trip_temp);
+		if (err < 0) {
+			err = PTR_ERR(tz);
+			dev_err(&pdev->dev,
+				"Not able to read trip_temp: %d\n",
+				err);
+			goto err_tz;
+		}
+
+		/* set bandgap reference voltage and enable voltage regulator */
+		val = (BCM2835_TS_TSENSCTL_CTRL_DEFAULT <<
+		       BCM2835_TS_TSENSCTL_CTRL_SHIFT) |
+		      BCM2835_TS_TSENSCTL_REGULEN;
+
+		/* use the recommended reset duration */
+		val |= (0xFE << BCM2835_TS_TSENSCTL_RSTDELAY_SHIFT);
+
+		/*  trip_adc value from info */
+		val |= bcm2835_thermal_temp2adc(trip_temp,
+						offset,
+						slope)
+			<< BCM2835_TS_TSENSCTL_THOLD_SHIFT;
+
+		/* write the value back to the register as 2 steps */
+		writel(val, data->regs + BCM2835_TS_TSENSCTL);
+		val |= BCM2835_TS_TSENSCTL_RSTB;
+		writel(val, data->regs + BCM2835_TS_TSENSCTL);
+	}
+
+	data->tz = tz;
+
+	platform_set_drvdata(pdev, tz);
+
+	bcm2835_thermal_debugfs(pdev);
+
+	return 0;
+err_tz:
+	thermal_zone_of_sensor_unregister(&pdev->dev, tz);
+err_clk:
+	clk_disable_unprepare(data->clk);
+
+	return err;
+}
+
+static int bcm2835_thermal_remove(struct platform_device *pdev)
+{
+	struct thermal_zone_device *tz = platform_get_drvdata(pdev);
+	struct bcm2835_thermal_data *data = tz->devdata;
+
+	debugfs_remove_recursive(data->debugfsdir);
+	thermal_zone_of_sensor_unregister(&pdev->dev, tz);
+	clk_disable_unprepare(data->clk);
+
+	return 0;
+}
+
+static struct platform_driver bcm2835_thermal_driver = {
+	.probe = bcm2835_thermal_probe,
+	.remove = bcm2835_thermal_remove,
+	.driver = {
+		.name = "bcm2835_thermal",
+		.of_match_table = bcm2835_thermal_of_match_table,
+	},
+};
+module_platform_driver(bcm2835_thermal_driver);
+
+MODULE_AUTHOR("Martin Sperl");
+MODULE_DESCRIPTION("Thermal driver for bcm2835 chip");
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/broadcom/ns-thermal.c b/drivers/thermal/broadcom/ns-thermal.c
new file mode 100644
index 000000000000..322e741a2463
--- /dev/null
+++ b/drivers/thermal/broadcom/ns-thermal.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2017 Rafał Miłecki <rafal@milecki.pl>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+#define PVTMON_CONTROL0					0x00
+#define PVTMON_CONTROL0_SEL_MASK			0x0000000e
+#define PVTMON_CONTROL0_SEL_TEMP_MONITOR		0x00000000
+#define PVTMON_CONTROL0_SEL_TEST_MODE			0x0000000e
+#define PVTMON_STATUS					0x08
+
+struct ns_thermal {
+	struct thermal_zone_device *tz;
+	void __iomem *pvtmon;
+};
+
+static int ns_thermal_get_temp(void *data, int *temp)
+{
+	struct ns_thermal *ns_thermal = data;
+	int offset = thermal_zone_get_offset(ns_thermal->tz);
+	int slope = thermal_zone_get_slope(ns_thermal->tz);
+	u32 val;
+
+	val = readl(ns_thermal->pvtmon + PVTMON_CONTROL0);
+	if ((val & PVTMON_CONTROL0_SEL_MASK) != PVTMON_CONTROL0_SEL_TEMP_MONITOR) {
+		/* Clear current mode selection */
+		val &= ~PVTMON_CONTROL0_SEL_MASK;
+
+		/* Set temp monitor mode (it's the default actually) */
+		val |= PVTMON_CONTROL0_SEL_TEMP_MONITOR;
+
+		writel(val, ns_thermal->pvtmon + PVTMON_CONTROL0);
+	}
+
+	val = readl(ns_thermal->pvtmon + PVTMON_STATUS);
+	*temp = slope * val + offset;
+
+	return 0;
+}
+
+static const struct thermal_zone_of_device_ops ns_thermal_ops = {
+	.get_temp = ns_thermal_get_temp,
+};
+
+static int ns_thermal_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ns_thermal *ns_thermal;
+
+	ns_thermal = devm_kzalloc(dev, sizeof(*ns_thermal), GFP_KERNEL);
+	if (!ns_thermal)
+		return -ENOMEM;
+
+	ns_thermal->pvtmon = of_iomap(dev_of_node(dev), 0);
+	if (WARN_ON(!ns_thermal->pvtmon))
+		return -ENOENT;
+
+	ns_thermal->tz = devm_thermal_zone_of_sensor_register(dev, 0,
+							      ns_thermal,
+							      &ns_thermal_ops);
+	if (IS_ERR(ns_thermal->tz)) {
+		iounmap(ns_thermal->pvtmon);
+		return PTR_ERR(ns_thermal->tz);
+	}
+
+	platform_set_drvdata(pdev, ns_thermal);
+
+	return 0;
+}
+
+static int ns_thermal_remove(struct platform_device *pdev)
+{
+	struct ns_thermal *ns_thermal = platform_get_drvdata(pdev);
+
+	iounmap(ns_thermal->pvtmon);
+
+	return 0;
+}
+
+static const struct of_device_id ns_thermal_of_match[] = {
+	{ .compatible = "brcm,ns-thermal", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, ns_thermal_of_match);
+
+static struct platform_driver ns_thermal_driver = {
+	.probe		= ns_thermal_probe,
+	.remove		= ns_thermal_remove,
+	.driver = {
+		.name = "ns-thermal",
+		.of_match_table = ns_thermal_of_match,
+	},
+};
+module_platform_driver(ns_thermal_driver);
+
+MODULE_AUTHOR("Rafał Miłecki <rafal@milecki.pl>");
+MODULE_DESCRIPTION("Northstar thermal driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/da9062-thermal.c b/drivers/thermal/da9062-thermal.c
new file mode 100644
index 000000000000..dd8dd947b7f0
--- /dev/null
+++ b/drivers/thermal/da9062-thermal.c
@@ -0,0 +1,315 @@
+/*
+ * Thermal device driver for DA9062 and DA9061
+ * Copyright (C) 2017  Dialog Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* When over-temperature is reached, an interrupt from the device will be
+ * triggered. Following this event the interrupt will be disabled and
+ * periodic transmission of uevents (HOT trip point) should define the
+ * first level of temperature supervision. It is expected that any final
+ * implementation of the thermal driver will include a .notify() function
+ * to implement these uevents to userspace.
+ *
+ * These uevents are intended to indicate non-invasive temperature control
+ * of the system, where the necessary measures for cooling are the
+ * responsibility of the host software. Once the temperature falls again,
+ * the IRQ is re-enabled so the start of a new over-temperature event can
+ * be detected without constant software monitoring.
+ */
+
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/thermal.h>
+#include <linux/workqueue.h>
+
+#include <linux/mfd/da9062/core.h>
+#include <linux/mfd/da9062/registers.h>
+
+/* Minimum, maximum and default polling millisecond periods are provided
+ * here as an example. It is expected that any final implementation to also
+ * include a modification of these settings to match the required
+ * application.
+ */
+#define DA9062_DEFAULT_POLLING_MS_PERIOD	3000
+#define DA9062_MAX_POLLING_MS_PERIOD		10000
+#define DA9062_MIN_POLLING_MS_PERIOD		1000
+
+#define DA9062_MILLI_CELSIUS(t)			((t) * 1000)
+
+struct da9062_thermal_config {
+	const char *name;
+};
+
+struct da9062_thermal {
+	struct da9062 *hw;
+	struct delayed_work work;
+	struct thermal_zone_device *zone;
+	enum thermal_device_mode mode;
+	struct mutex lock; /* protection for da9062_thermal temperature */
+	int temperature;
+	int irq;
+	const struct da9062_thermal_config *config;
+	struct device *dev;
+};
+
+static void da9062_thermal_poll_on(struct work_struct *work)
+{
+	struct da9062_thermal *thermal = container_of(work,
+						struct da9062_thermal,
+						work.work);
+	unsigned long delay;
+	unsigned int val;
+	int ret;
+
+	/* clear E_TEMP */
+	ret = regmap_write(thermal->hw->regmap,
+			   DA9062AA_EVENT_B,
+			   DA9062AA_E_TEMP_MASK);
+	if (ret < 0) {
+		dev_err(thermal->dev,
+			"Cannot clear the TJUNC temperature status\n");
+		goto err_enable_irq;
+	}
+
+	/* Now read E_TEMP again: it is acting like a status bit.
+	 * If over-temperature, then this status will be true.
+	 * If not over-temperature, this status will be false.
+	 */
+	ret = regmap_read(thermal->hw->regmap,
+			  DA9062AA_EVENT_B,
+			  &val);
+	if (ret < 0) {
+		dev_err(thermal->dev,
+			"Cannot check the TJUNC temperature status\n");
+		goto err_enable_irq;
+	}
+
+	if (val & DA9062AA_E_TEMP_MASK) {
+		mutex_lock(&thermal->lock);
+		thermal->temperature = DA9062_MILLI_CELSIUS(125);
+		mutex_unlock(&thermal->lock);
+		thermal_zone_device_update(thermal->zone,
+					   THERMAL_EVENT_UNSPECIFIED);
+
+		delay = msecs_to_jiffies(thermal->zone->passive_delay);
+		schedule_delayed_work(&thermal->work, delay);
+		return;
+	}
+
+	mutex_lock(&thermal->lock);
+	thermal->temperature = DA9062_MILLI_CELSIUS(0);
+	mutex_unlock(&thermal->lock);
+	thermal_zone_device_update(thermal->zone,
+				   THERMAL_EVENT_UNSPECIFIED);
+
+err_enable_irq:
+	enable_irq(thermal->irq);
+}
+
+static irqreturn_t da9062_thermal_irq_handler(int irq, void *data)
+{
+	struct da9062_thermal *thermal = data;
+
+	disable_irq_nosync(thermal->irq);
+	schedule_delayed_work(&thermal->work, 0);
+
+	return IRQ_HANDLED;
+}
+
+static int da9062_thermal_get_mode(struct thermal_zone_device *z,
+				   enum thermal_device_mode *mode)
+{
+	struct da9062_thermal *thermal = z->devdata;
+	*mode = thermal->mode;
+	return 0;
+}
+
+static int da9062_thermal_get_trip_type(struct thermal_zone_device *z,
+					int trip,
+					enum thermal_trip_type *type)
+{
+	struct da9062_thermal *thermal = z->devdata;
+
+	switch (trip) {
+	case 0:
+		*type = THERMAL_TRIP_HOT;
+		break;
+	default:
+		dev_err(thermal->dev,
+			"Driver does not support more than 1 trip-wire\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int da9062_thermal_get_trip_temp(struct thermal_zone_device *z,
+					int trip,
+					int *temp)
+{
+	struct da9062_thermal *thermal = z->devdata;
+
+	switch (trip) {
+	case 0:
+		*temp = DA9062_MILLI_CELSIUS(125);
+		break;
+	default:
+		dev_err(thermal->dev,
+			"Driver does not support more than 1 trip-wire\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int da9062_thermal_get_temp(struct thermal_zone_device *z,
+				   int *temp)
+{
+	struct da9062_thermal *thermal = z->devdata;
+
+	mutex_lock(&thermal->lock);
+	*temp = thermal->temperature;
+	mutex_unlock(&thermal->lock);
+
+	return 0;
+}
+
+static struct thermal_zone_device_ops da9062_thermal_ops = {
+	.get_temp	= da9062_thermal_get_temp,
+	.get_mode	= da9062_thermal_get_mode,
+	.get_trip_type	= da9062_thermal_get_trip_type,
+	.get_trip_temp	= da9062_thermal_get_trip_temp,
+};
+
+static const struct da9062_thermal_config da9062_config = {
+	.name = "da9062-thermal",
+};
+
+static const struct of_device_id da9062_compatible_reg_id_table[] = {
+	{ .compatible = "dlg,da9062-thermal", .data = &da9062_config },
+	{ },
+};
+
+MODULE_DEVICE_TABLE(of, da9062_compatible_reg_id_table);
+
+static int da9062_thermal_probe(struct platform_device *pdev)
+{
+	struct da9062 *chip = dev_get_drvdata(pdev->dev.parent);
+	struct da9062_thermal *thermal;
+	unsigned int pp_tmp = DA9062_DEFAULT_POLLING_MS_PERIOD;
+	const struct of_device_id *match;
+	int ret = 0;
+
+	match = of_match_node(da9062_compatible_reg_id_table,
+			      pdev->dev.of_node);
+	if (!match)
+		return -ENXIO;
+
+	if (pdev->dev.of_node) {
+		if (!of_property_read_u32(pdev->dev.of_node,
+					  "polling-delay-passive",
+					  &pp_tmp)) {
+			if (pp_tmp < DA9062_MIN_POLLING_MS_PERIOD ||
+			    pp_tmp > DA9062_MAX_POLLING_MS_PERIOD) {
+				dev_warn(&pdev->dev,
+					 "Out-of-range polling period %d ms\n",
+					 pp_tmp);
+				pp_tmp = DA9062_DEFAULT_POLLING_MS_PERIOD;
+			}
+		}
+	}
+
+	thermal = devm_kzalloc(&pdev->dev, sizeof(struct da9062_thermal),
+			       GFP_KERNEL);
+	if (!thermal) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	thermal->config = match->data;
+	thermal->hw = chip;
+	thermal->mode = THERMAL_DEVICE_ENABLED;
+	thermal->dev = &pdev->dev;
+
+	INIT_DELAYED_WORK(&thermal->work, da9062_thermal_poll_on);
+	mutex_init(&thermal->lock);
+
+	thermal->zone = thermal_zone_device_register(thermal->config->name,
+					1, 0, thermal,
+					&da9062_thermal_ops, NULL, pp_tmp,
+					0);
+	if (IS_ERR(thermal->zone)) {
+		dev_err(&pdev->dev, "Cannot register thermal zone device\n");
+		ret = PTR_ERR(thermal->zone);
+		goto err;
+	}
+
+	dev_dbg(&pdev->dev,
+		"TJUNC temperature polling period set at %d ms\n",
+		thermal->zone->passive_delay);
+
+	ret = platform_get_irq_byname(pdev, "THERMAL");
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to get platform IRQ.\n");
+		goto err_zone;
+	}
+	thermal->irq = ret;
+
+	ret = request_threaded_irq(thermal->irq, NULL,
+				   da9062_thermal_irq_handler,
+				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				   "THERMAL", thermal);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Failed to request thermal device IRQ.\n");
+		goto err_zone;
+	}
+
+	platform_set_drvdata(pdev, thermal);
+	return 0;
+
+err_zone:
+	thermal_zone_device_unregister(thermal->zone);
+err:
+	return ret;
+}
+
+static int da9062_thermal_remove(struct platform_device *pdev)
+{
+	struct	da9062_thermal *thermal = platform_get_drvdata(pdev);
+
+	free_irq(thermal->irq, thermal);
+	cancel_delayed_work_sync(&thermal->work);
+	thermal_zone_device_unregister(thermal->zone);
+	return 0;
+}
+
+static struct platform_driver da9062_thermal_driver = {
+	.probe	= da9062_thermal_probe,
+	.remove	= da9062_thermal_remove,
+	.driver	= {
+		.name	= "da9062-thermal",
+		.of_match_table = da9062_compatible_reg_id_table,
+	},
+};
+
+module_platform_driver(da9062_thermal_driver);
+
+MODULE_AUTHOR("Steve Twiss");
+MODULE_DESCRIPTION("Thermal TJUNC device driver for Dialog DA9062 and DA9061");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:da9062-thermal");
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 4bf4ad58cffd..ef59256887ff 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -28,6 +28,8 @@
 
 #include <trace/events/thermal.h>
 
+#define SCALE_ERROR_MITIGATION 100
+
 static DEFINE_IDA(devfreq_ida);
 
 /**
@@ -45,6 +47,12 @@ static DEFINE_IDA(devfreq_ida);
  * @freq_table_size:	Size of the @freq_table and @power_table
  * @power_ops:	Pointer to devfreq_cooling_power, used to generate the
  *		@power_table.
+ * @res_util:	Resource utilization scaling factor for the power.
+ *		It is multiplied by 100 to minimize the error. It is used
+ *		for estimation of the power budget instead of using
+ *		'utilization' (which is	'busy_time / 'total_time').
+ *		The 'res_util' range is from 100 to (power_table[state] * 100)
+ *		for the corresponding 'state'.
  */
 struct devfreq_cooling_device {
 	int id;
@@ -55,6 +63,8 @@ struct devfreq_cooling_device {
 	u32 *freq_table;
 	size_t freq_table_size;
 	struct devfreq_cooling_power *power_ops;
+	u32 res_util;
+	int capped_state;
 };
 
 /**
@@ -164,27 +174,12 @@ freq_get_state(struct devfreq_cooling_device *dfc, unsigned long freq)
 	return THERMAL_CSTATE_INVALID;
 }
 
-/**
- * get_static_power() - calculate the static power
- * @dfc:	Pointer to devfreq cooling device
- * @freq:	Frequency in Hz
- *
- * Calculate the static power in milliwatts using the supplied
- * get_static_power().  The current voltage is calculated using the
- * OPP library.  If no get_static_power() was supplied, assume the
- * static power is negligible.
- */
-static unsigned long
-get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
+static unsigned long get_voltage(struct devfreq *df, unsigned long freq)
 {
-	struct devfreq *df = dfc->devfreq;
 	struct device *dev = df->dev.parent;
 	unsigned long voltage;
 	struct dev_pm_opp *opp;
 
-	if (!dfc->power_ops->get_static_power)
-		return 0;
-
 	opp = dev_pm_opp_find_freq_exact(dev, freq, true);
 	if (PTR_ERR(opp) == -ERANGE)
 		opp = dev_pm_opp_find_freq_exact(dev, freq, false);
@@ -202,9 +197,35 @@ get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
 		dev_err_ratelimited(dev,
 				    "Failed to get voltage for frequency %lu\n",
 				    freq);
-		return 0;
 	}
 
+	return voltage;
+}
+
+/**
+ * get_static_power() - calculate the static power
+ * @dfc:	Pointer to devfreq cooling device
+ * @freq:	Frequency in Hz
+ *
+ * Calculate the static power in milliwatts using the supplied
+ * get_static_power().  The current voltage is calculated using the
+ * OPP library.  If no get_static_power() was supplied, assume the
+ * static power is negligible.
+ */
+static unsigned long
+get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
+{
+	struct devfreq *df = dfc->devfreq;
+	unsigned long voltage;
+
+	if (!dfc->power_ops->get_static_power)
+		return 0;
+
+	voltage = get_voltage(df, freq);
+
+	if (voltage == 0)
+		return 0;
+
 	return dfc->power_ops->get_static_power(df, voltage);
 }
 
@@ -239,6 +260,16 @@ get_dynamic_power(struct devfreq_cooling_device *dfc, unsigned long freq,
 	return power;
 }
 
+
+static inline unsigned long get_total_power(struct devfreq_cooling_device *dfc,
+					    unsigned long freq,
+					    unsigned long voltage)
+{
+	return get_static_power(dfc, freq) + get_dynamic_power(dfc, freq,
+							       voltage);
+}
+
+
 static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev,
 					       struct thermal_zone_device *tz,
 					       u32 *power)
@@ -248,27 +279,55 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
 	struct devfreq_dev_status *status = &df->last_status;
 	unsigned long state;
 	unsigned long freq = status->current_frequency;
-	u32 dyn_power, static_power;
+	unsigned long voltage;
+	u32 dyn_power = 0;
+	u32 static_power = 0;
+	int res;
 
-	/* Get dynamic power for state */
 	state = freq_get_state(dfc, freq);
-	if (state == THERMAL_CSTATE_INVALID)
-		return -EAGAIN;
+	if (state == THERMAL_CSTATE_INVALID) {
+		res = -EAGAIN;
+		goto fail;
+	}
 
-	dyn_power = dfc->power_table[state];
+	if (dfc->power_ops->get_real_power) {
+		voltage = get_voltage(df, freq);
+		if (voltage == 0) {
+			res = -EINVAL;
+			goto fail;
+		}
 
-	/* Scale dynamic power for utilization */
-	dyn_power = (dyn_power * status->busy_time) / status->total_time;
+		res = dfc->power_ops->get_real_power(df, power, freq, voltage);
+		if (!res) {
+			state = dfc->capped_state;
+			dfc->res_util = dfc->power_table[state];
+			dfc->res_util *= SCALE_ERROR_MITIGATION;
 
-	/* Get static power */
-	static_power = get_static_power(dfc, freq);
+			if (*power > 1)
+				dfc->res_util /= *power;
+		} else {
+			goto fail;
+		}
+	} else {
+		dyn_power = dfc->power_table[state];
 
-	trace_thermal_power_devfreq_get_power(cdev, status, freq, dyn_power,
-					      static_power);
+		/* Scale dynamic power for utilization */
+		dyn_power *= status->busy_time;
+		dyn_power /= status->total_time;
+		/* Get static power */
+		static_power = get_static_power(dfc, freq);
 
-	*power = dyn_power + static_power;
+		*power = dyn_power + static_power;
+	}
+
+	trace_thermal_power_devfreq_get_power(cdev, status, freq, dyn_power,
+					      static_power, *power);
 
 	return 0;
+fail:
+	/* It is safe to set max in this case */
+	dfc->res_util = SCALE_ERROR_MITIGATION;
+	return res;
 }
 
 static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
@@ -301,26 +360,34 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
 	unsigned long busy_time;
 	s32 dyn_power;
 	u32 static_power;
+	s32 est_power;
 	int i;
 
-	static_power = get_static_power(dfc, freq);
+	if (dfc->power_ops->get_real_power) {
+		/* Scale for resource utilization */
+		est_power = power * dfc->res_util;
+		est_power /= SCALE_ERROR_MITIGATION;
+	} else {
+		static_power = get_static_power(dfc, freq);
 
-	dyn_power = power - static_power;
-	dyn_power = dyn_power > 0 ? dyn_power : 0;
+		dyn_power = power - static_power;
+		dyn_power = dyn_power > 0 ? dyn_power : 0;
 
-	/* Scale dynamic power for utilization */
-	busy_time = status->busy_time ?: 1;
-	dyn_power = (dyn_power * status->total_time) / busy_time;
+		/* Scale dynamic power for utilization */
+		busy_time = status->busy_time ?: 1;
+		est_power = (dyn_power * status->total_time) / busy_time;
+	}
 
 	/*
 	 * Find the first cooling state that is within the power
 	 * budget for dynamic power.
 	 */
 	for (i = 0; i < dfc->freq_table_size - 1; i++)
-		if (dyn_power >= dfc->power_table[i])
+		if (est_power >= dfc->power_table[i])
 			break;
 
 	*state = i;
+	dfc->capped_state = i;
 	trace_thermal_power_devfreq_limit(cdev, freq, *state, power);
 	return 0;
 }
@@ -376,7 +443,7 @@ static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc)
 	}
 
 	for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) {
-		unsigned long power_dyn, voltage;
+		unsigned long power, voltage;
 		struct dev_pm_opp *opp;
 
 		opp = dev_pm_opp_find_freq_floor(dev, &freq);
@@ -389,12 +456,15 @@ static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc)
 		dev_pm_opp_put(opp);
 
 		if (dfc->power_ops) {
-			power_dyn = get_dynamic_power(dfc, freq, voltage);
+			if (dfc->power_ops->get_real_power)
+				power = get_total_power(dfc, freq, voltage);
+			else
+				power = get_dynamic_power(dfc, freq, voltage);
 
-			dev_dbg(dev, "Dynamic power table: %lu MHz @ %lu mV: %lu = %lu mW\n",
-				freq / 1000000, voltage, power_dyn, power_dyn);
+			dev_dbg(dev, "Power table: %lu MHz @ %lu mV: %lu = %lu mW\n",
+				freq / 1000000, voltage, power, power);
 
-			power_table[i] = power_dyn;
+			power_table[i] = power;
 		}
 
 		freq_table[i] = freq;
diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c
index 9413c4abf0b9..a9ec94ed7a42 100644
--- a/drivers/thermal/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3400_thermal.c
@@ -23,7 +23,7 @@ enum int3400_thermal_uuid {
 	INT3400_THERMAL_MAXIMUM_UUID,
 };
 
-static u8 *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
+static char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
 	"42A441D6-AE6A-462b-A84B-4A8CE79027D3",
 	"3A95C389-E4B8-4629-A526-C52C88626BAE",
 	"97C68AE7-15FA-499c-B8C9-5DA81D606E0A",
@@ -141,10 +141,10 @@ static int int3400_thermal_get_uuids(struct int3400_thermal_priv *priv)
 		}
 
 		for (j = 0; j < INT3400_THERMAL_MAXIMUM_UUID; j++) {
-			u8 uuid[16];
+			guid_t guid;
 
-			acpi_str_to_uuid(int3400_thermal_uuids[j], uuid);
-			if (!strncmp(uuid, objb->buffer.pointer, 16)) {
+			guid_parse(int3400_thermal_uuids[j], &guid);
+			if (guid_equal((guid_t *)objb->buffer.pointer, &guid)) {
 				priv->uuid_bitmap |= (1 << j);
 				break;
 			}
diff --git a/drivers/thermal/intel_soc_dts_thermal.c b/drivers/thermal/intel_soc_dts_thermal.c
index b2bbaa1c60b0..c27868b2c6af 100644
--- a/drivers/thermal/intel_soc_dts_thermal.c
+++ b/drivers/thermal/intel_soc_dts_thermal.c
@@ -73,8 +73,12 @@ static int __init intel_soc_thermal_init(void)
 					   IRQF_TRIGGER_RISING | IRQF_ONESHOT,
 					   "soc_dts", soc_dts);
 		if (err) {
-			pr_err("request_threaded_irq ret %d\n", err);
-			goto error_irq;
+			/*
+			 * Do not just error out because the user space thermal
+			 * daemon such as DPTF may use polling instead of being
+			 * interrupt driven.
+			 */
+			pr_warn("request_threaded_irq ret %d\n", err);
 		}
 	}
 
@@ -88,7 +92,6 @@ static int __init intel_soc_thermal_init(void)
 error_trips:
 	if (soc_dts_thres_irq)
 		free_irq(soc_dts_thres_irq, soc_dts);
-error_irq:
 	intel_soc_dts_iosf_exit(soc_dts);
 
 	return err;
diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c
index 1aff7fde54b1..7737f14846f9 100644
--- a/drivers/thermal/mtk_thermal.c
+++ b/drivers/thermal/mtk_thermal.c
@@ -191,7 +191,7 @@ static const int mt8173_bank_data[MT8173_NUM_ZONES][3] = {
 };
 
 static const int mt8173_msr[MT8173_NUM_SENSORS_PER_ZONE] = {
-	TEMP_MSR0, TEMP_MSR1, TEMP_MSR2, TEMP_MSR2
+	TEMP_MSR0, TEMP_MSR1, TEMP_MSR2, TEMP_MSR3
 };
 
 static const int mt8173_adcpnp[MT8173_NUM_SENSORS_PER_ZONE] = {
diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c
index 644ba526d9ea..4362a69ac88d 100644
--- a/drivers/thermal/qoriq_thermal.c
+++ b/drivers/thermal/qoriq_thermal.c
@@ -195,7 +195,6 @@ static struct thermal_zone_of_device_ops tmu_tz_ops = {
 static int qoriq_tmu_probe(struct platform_device *pdev)
 {
 	int ret;
-	const struct thermal_trip *trip;
 	struct qoriq_tmu_data *data;
 	struct device_node *np = pdev->dev.of_node;
 	u32 site = 0;
@@ -243,8 +242,6 @@ static int qoriq_tmu_probe(struct platform_device *pdev)
 		goto err_tmu;
 	}
 
-	trip = of_thermal_get_trip_points(data->tz);
-
 	/* Enable monitoring */
 	site |= 0x1 << (15 - data->sensor_id);
 	tmu_write(data, site | TMR_ME | TMR_ALPF, &data->regs->tmr);
diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c
index d33c845244b1..37fcefd06d9f 100644
--- a/drivers/thermal/rcar_gen3_thermal.c
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -20,12 +20,14 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/mutex.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/spinlock.h>
 #include <linux/thermal.h>
 
+#include "thermal_core.h"
+
 /* Register offsets */
 #define REG_GEN3_IRQSTR		0x04
 #define REG_GEN3_IRQMSK		0x08
@@ -41,6 +43,14 @@
 #define REG_GEN3_THCODE2	0x54
 #define REG_GEN3_THCODE3	0x58
 
+/* IRQ{STR,MSK,EN} bits */
+#define IRQ_TEMP1		BIT(0)
+#define IRQ_TEMP2		BIT(1)
+#define IRQ_TEMP3		BIT(2)
+#define IRQ_TEMPD1		BIT(3)
+#define IRQ_TEMPD2		BIT(4)
+#define IRQ_TEMPD3		BIT(5)
+
 /* CTSR bits */
 #define CTSR_PONM	BIT(8)
 #define CTSR_AOUT	BIT(7)
@@ -72,11 +82,15 @@ struct rcar_gen3_thermal_tsc {
 	void __iomem *base;
 	struct thermal_zone_device *zone;
 	struct equation_coefs coef;
-	struct mutex lock;
+	int low;
+	int high;
 };
 
 struct rcar_gen3_thermal_priv {
 	struct rcar_gen3_thermal_tsc *tscs[TSC_MAX_NUM];
+	unsigned int num_tscs;
+	spinlock_t lock; /* Protect interrupts on and off */
+	const struct rcar_gen3_thermal_data *data;
 };
 
 struct rcar_gen3_thermal_data {
@@ -114,6 +128,7 @@ static inline void rcar_gen3_thermal_write(struct rcar_gen3_thermal_tsc *tsc,
 
 #define FIXPT_SHIFT 7
 #define FIXPT_INT(_x) ((_x) << FIXPT_SHIFT)
+#define INT_FIXPT(_x) ((_x) >> FIXPT_SHIFT)
 #define FIXPT_DIV(_a, _b) DIV_ROUND_CLOSEST(((_a) << FIXPT_SHIFT), (_b))
 #define FIXPT_TO_MCELSIUS(_x) ((_x) * 1000 >> FIXPT_SHIFT)
 
@@ -163,16 +178,12 @@ static int rcar_gen3_thermal_get_temp(void *devdata, int *temp)
 	u32 reg;
 
 	/* Read register and convert to mili Celsius */
-	mutex_lock(&tsc->lock);
-
 	reg = rcar_gen3_thermal_read(tsc, REG_GEN3_TEMP) & CTEMP_MASK;
 
 	val1 = FIXPT_DIV(FIXPT_INT(reg) - tsc->coef.b1, tsc->coef.a1);
 	val2 = FIXPT_DIV(FIXPT_INT(reg) - tsc->coef.b2, tsc->coef.a2);
 	mcelsius = FIXPT_TO_MCELSIUS((val1 + val2) / 2);
 
-	mutex_unlock(&tsc->lock);
-
 	/* Make sure we are inside specifications */
 	if ((mcelsius < MCELSIUS(-40)) || (mcelsius > MCELSIUS(125)))
 		return -EIO;
@@ -183,10 +194,90 @@ static int rcar_gen3_thermal_get_temp(void *devdata, int *temp)
 	return 0;
 }
 
+static int rcar_gen3_thermal_mcelsius_to_temp(struct rcar_gen3_thermal_tsc *tsc,
+					      int mcelsius)
+{
+	int celsius, val1, val2;
+
+	celsius = DIV_ROUND_CLOSEST(mcelsius, 1000);
+	val1 = celsius * tsc->coef.a1 + tsc->coef.b1;
+	val2 = celsius * tsc->coef.a2 + tsc->coef.b2;
+
+	return INT_FIXPT((val1 + val2) / 2);
+}
+
+static int rcar_gen3_thermal_set_trips(void *devdata, int low, int high)
+{
+	struct rcar_gen3_thermal_tsc *tsc = devdata;
+
+	low = clamp_val(low, -40000, 125000);
+	high = clamp_val(high, -40000, 125000);
+
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQTEMP1,
+				rcar_gen3_thermal_mcelsius_to_temp(tsc, low));
+
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQTEMP2,
+				rcar_gen3_thermal_mcelsius_to_temp(tsc, high));
+
+	tsc->low = low;
+	tsc->high = high;
+
+	return 0;
+}
+
 static struct thermal_zone_of_device_ops rcar_gen3_tz_of_ops = {
 	.get_temp	= rcar_gen3_thermal_get_temp,
+	.set_trips	= rcar_gen3_thermal_set_trips,
 };
 
+static void rcar_thermal_irq_set(struct rcar_gen3_thermal_priv *priv, bool on)
+{
+	unsigned int i;
+	u32 val = on ? IRQ_TEMPD1 | IRQ_TEMP2 : 0;
+
+	for (i = 0; i < priv->num_tscs; i++)
+		rcar_gen3_thermal_write(priv->tscs[i], REG_GEN3_IRQMSK, val);
+}
+
+static irqreturn_t rcar_gen3_thermal_irq(int irq, void *data)
+{
+	struct rcar_gen3_thermal_priv *priv = data;
+	u32 status;
+	int i, ret = IRQ_HANDLED;
+
+	spin_lock(&priv->lock);
+	for (i = 0; i < priv->num_tscs; i++) {
+		status = rcar_gen3_thermal_read(priv->tscs[i], REG_GEN3_IRQSTR);
+		rcar_gen3_thermal_write(priv->tscs[i], REG_GEN3_IRQSTR, 0);
+		if (status)
+			ret = IRQ_WAKE_THREAD;
+	}
+
+	if (ret == IRQ_WAKE_THREAD)
+		rcar_thermal_irq_set(priv, false);
+
+	spin_unlock(&priv->lock);
+
+	return ret;
+}
+
+static irqreturn_t rcar_gen3_thermal_irq_thread(int irq, void *data)
+{
+	struct rcar_gen3_thermal_priv *priv = data;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < priv->num_tscs; i++)
+		thermal_zone_device_update(priv->tscs[i]->zone,
+					   THERMAL_EVENT_UNSPECIFIED);
+
+	spin_lock_irqsave(&priv->lock, flags);
+	rcar_thermal_irq_set(priv, true);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return IRQ_HANDLED;
+}
+
 static void r8a7795_thermal_init(struct rcar_gen3_thermal_tsc *tsc)
 {
 	rcar_gen3_thermal_write(tsc, REG_GEN3_CTSR,  CTSR_THBGR);
@@ -195,7 +286,11 @@ static void r8a7795_thermal_init(struct rcar_gen3_thermal_tsc *tsc)
 	usleep_range(1000, 2000);
 
 	rcar_gen3_thermal_write(tsc, REG_GEN3_CTSR, CTSR_PONM);
+
 	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQCTL, 0x3F);
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQMSK, 0);
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQEN, IRQ_TEMPD1 | IRQ_TEMP2);
+
 	rcar_gen3_thermal_write(tsc, REG_GEN3_CTSR,
 				CTSR_PONM | CTSR_AOUT | CTSR_THBGR | CTSR_VMEN);
 
@@ -219,9 +314,14 @@ static void r8a7796_thermal_init(struct rcar_gen3_thermal_tsc *tsc)
 	usleep_range(1000, 2000);
 
 	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQCTL, 0x3F);
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQMSK, 0);
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQEN, IRQ_TEMPD1 | IRQ_TEMP2);
+
 	reg_val = rcar_gen3_thermal_read(tsc, REG_GEN3_THCTR);
 	reg_val |= THCTR_THSST;
 	rcar_gen3_thermal_write(tsc, REG_GEN3_THCTR, reg_val);
+
+	usleep_range(1000, 2000);
 }
 
 static const struct rcar_gen3_thermal_data r8a7795_data = {
@@ -255,9 +355,8 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct resource *res;
 	struct thermal_zone_device *zone;
-	int ret, i;
-	const struct rcar_gen3_thermal_data *match_data =
-		of_device_get_match_data(dev);
+	int ret, irq, i;
+	char *irqname;
 
 	/* default values if FUSEs are missing */
 	/* TODO: Read values from hardware on supported platforms */
@@ -272,24 +371,50 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 	if (!priv)
 		return -ENOMEM;
 
+	priv->data = of_device_get_match_data(dev);
+
+	spin_lock_init(&priv->lock);
+
 	platform_set_drvdata(pdev, priv);
 
+	/*
+	 * Request 2 (of the 3 possible) IRQs, the driver only needs to
+	 * to trigger on the low and high trip points of the current
+	 * temp window at this point.
+	 */
+	for (i = 0; i < 2; i++) {
+		irq = platform_get_irq(pdev, i);
+		if (irq < 0)
+			return irq;
+
+		irqname = devm_kasprintf(dev, GFP_KERNEL, "%s:ch%d",
+					 dev_name(dev), i);
+		if (!irqname)
+			return -ENOMEM;
+
+		ret = devm_request_threaded_irq(dev, irq, rcar_gen3_thermal_irq,
+						rcar_gen3_thermal_irq_thread,
+						IRQF_SHARED, irqname, priv);
+		if (ret)
+			return ret;
+	}
+
 	pm_runtime_enable(dev);
 	pm_runtime_get_sync(dev);
 
 	for (i = 0; i < TSC_MAX_NUM; i++) {
 		struct rcar_gen3_thermal_tsc *tsc;
 
+		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
+		if (!res)
+			break;
+
 		tsc = devm_kzalloc(dev, sizeof(*tsc), GFP_KERNEL);
 		if (!tsc) {
 			ret = -ENOMEM;
 			goto error_unregister;
 		}
 
-		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
-		if (!res)
-			break;
-
 		tsc->base = devm_ioremap_resource(dev, res);
 		if (IS_ERR(tsc->base)) {
 			ret = PTR_ERR(tsc->base);
@@ -297,9 +422,8 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 		}
 
 		priv->tscs[i] = tsc;
-		mutex_init(&tsc->lock);
 
-		match_data->thermal_init(tsc);
+		priv->data->thermal_init(tsc);
 		rcar_gen3_thermal_calc_coefs(&tsc->coef, ptat, thcode[i]);
 
 		zone = devm_thermal_zone_of_sensor_register(dev, i, tsc,
@@ -310,8 +434,23 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 			goto error_unregister;
 		}
 		tsc->zone = zone;
+
+		ret = of_thermal_get_ntrips(tsc->zone);
+		if (ret < 0)
+			goto error_unregister;
+
+		dev_info(dev, "TSC%d: Loaded %d trip points\n", i, ret);
 	}
 
+	priv->num_tscs = i;
+
+	if (!priv->num_tscs) {
+		ret = -ENODEV;
+		goto error_unregister;
+	}
+
+	rcar_thermal_irq_set(priv, true);
+
 	return 0;
 
 error_unregister:
@@ -320,9 +459,39 @@ error_unregister:
 	return ret;
 }
 
+static int __maybe_unused rcar_gen3_thermal_suspend(struct device *dev)
+{
+	struct rcar_gen3_thermal_priv *priv = dev_get_drvdata(dev);
+
+	rcar_thermal_irq_set(priv, false);
+
+	return 0;
+}
+
+static int __maybe_unused rcar_gen3_thermal_resume(struct device *dev)
+{
+	struct rcar_gen3_thermal_priv *priv = dev_get_drvdata(dev);
+	unsigned int i;
+
+	for (i = 0; i < priv->num_tscs; i++) {
+		struct rcar_gen3_thermal_tsc *tsc = priv->tscs[i];
+
+		priv->data->thermal_init(tsc);
+		rcar_gen3_thermal_set_trips(tsc, tsc->low, tsc->high);
+	}
+
+	rcar_thermal_irq_set(priv, true);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(rcar_gen3_thermal_pm_ops, rcar_gen3_thermal_suspend,
+			 rcar_gen3_thermal_resume);
+
 static struct platform_driver rcar_gen3_thermal_driver = {
 	.driver	= {
 		.name	= "rcar_gen3_thermal",
+		.pm = &rcar_gen3_thermal_pm_ops,
 		.of_match_table = rcar_gen3_thermal_dt_ids,
 	},
 	.probe		= rcar_gen3_thermal_probe,
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 11f0675cb7e5..5a51c740e372 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -45,8 +45,10 @@ static LIST_HEAD(thermal_governor_list);
 
 static DEFINE_MUTEX(thermal_list_lock);
 static DEFINE_MUTEX(thermal_governor_lock);
+static DEFINE_MUTEX(poweroff_lock);
 
 static atomic_t in_suspend;
+static bool power_off_triggered;
 
 static struct thermal_governor *def_governor;
 
@@ -322,6 +324,54 @@ static void handle_non_critical_trips(struct thermal_zone_device *tz,
 		       def_governor->throttle(tz, trip);
 }
 
+/**
+ * thermal_emergency_poweroff_func - emergency poweroff work after a known delay
+ * @work: work_struct associated with the emergency poweroff function
+ *
+ * This function is called in very critical situations to force
+ * a kernel poweroff after a configurable timeout value.
+ */
+static void thermal_emergency_poweroff_func(struct work_struct *work)
+{
+	/*
+	 * We have reached here after the emergency thermal shutdown
+	 * Waiting period has expired. This means orderly_poweroff has
+	 * not been able to shut off the system for some reason.
+	 * Try to shut down the system immediately using kernel_power_off
+	 * if populated
+	 */
+	WARN(1, "Attempting kernel_power_off: Temperature too high\n");
+	kernel_power_off();
+
+	/*
+	 * Worst of the worst case trigger emergency restart
+	 */
+	WARN(1, "Attempting emergency_restart: Temperature too high\n");
+	emergency_restart();
+}
+
+static DECLARE_DELAYED_WORK(thermal_emergency_poweroff_work,
+			    thermal_emergency_poweroff_func);
+
+/**
+ * thermal_emergency_poweroff - Trigger an emergency system poweroff
+ *
+ * This may be called from any critical situation to trigger a system shutdown
+ * after a known period of time. By default this is not scheduled.
+ */
+static void thermal_emergency_poweroff(void)
+{
+	int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS;
+	/*
+	 * poweroff_delay_ms must be a carefully profiled positive value.
+	 * Its a must for thermal_emergency_poweroff_work to be scheduled
+	 */
+	if (poweroff_delay_ms <= 0)
+		return;
+	schedule_delayed_work(&thermal_emergency_poweroff_work,
+			      msecs_to_jiffies(poweroff_delay_ms));
+}
+
 static void handle_critical_trips(struct thermal_zone_device *tz,
 				  int trip, enum thermal_trip_type trip_type)
 {
@@ -342,7 +392,17 @@ static void handle_critical_trips(struct thermal_zone_device *tz,
 		dev_emerg(&tz->device,
 			  "critical temperature reached(%d C),shutting down\n",
 			  tz->temperature / 1000);
-		orderly_poweroff(true);
+		mutex_lock(&poweroff_lock);
+		if (!power_off_triggered) {
+			/*
+			 * Queue a backup emergency shutdown in the event of
+			 * orderly_poweroff failure
+			 */
+			thermal_emergency_poweroff();
+			orderly_poweroff(true);
+			power_off_triggered = true;
+		}
+		mutex_unlock(&poweroff_lock);
 	}
 }
 
@@ -1463,6 +1523,7 @@ static int __init thermal_init(void)
 {
 	int result;
 
+	mutex_init(&poweroff_lock);
 	result = thermal_register_governors();
 	if (result)
 		goto error;
@@ -1497,6 +1558,7 @@ error:
 	ida_destroy(&thermal_cdev_ida);
 	mutex_destroy(&thermal_list_lock);
 	mutex_destroy(&thermal_governor_lock);
+	mutex_destroy(&poweroff_lock);
 	return result;
 }
 
diff --git a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c
index 118d7d847715..4167373327d9 100644
--- a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c
+++ b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c
@@ -410,8 +410,6 @@ const struct ti_bandgap_data dra752_data = {
 		.domain = "cpu",
 		.register_cooling = ti_thermal_register_cpu_cooling,
 		.unregister_cooling = ti_thermal_unregister_cpu_cooling,
-		.slope = DRA752_GRADIENT_SLOPE,
-		.constant = DRA752_GRADIENT_CONST,
 		.slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB,
 		.constant_pcb = DRA752_GRADIENT_CONST_W_PCB,
 		},
@@ -419,8 +417,6 @@ const struct ti_bandgap_data dra752_data = {
 		.registers = &dra752_gpu_temp_sensor_registers,
 		.ts_data = &dra752_gpu_temp_sensor_data,
 		.domain = "gpu",
-		.slope = DRA752_GRADIENT_SLOPE,
-		.constant = DRA752_GRADIENT_CONST,
 		.slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB,
 		.constant_pcb = DRA752_GRADIENT_CONST_W_PCB,
 		},
@@ -428,8 +424,6 @@ const struct ti_bandgap_data dra752_data = {
 		.registers = &dra752_core_temp_sensor_registers,
 		.ts_data = &dra752_core_temp_sensor_data,
 		.domain = "core",
-		.slope = DRA752_GRADIENT_SLOPE,
-		.constant = DRA752_GRADIENT_CONST,
 		.slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB,
 		.constant_pcb = DRA752_GRADIENT_CONST_W_PCB,
 		},
@@ -437,8 +431,6 @@ const struct ti_bandgap_data dra752_data = {
 		.registers = &dra752_dspeve_temp_sensor_registers,
 		.ts_data = &dra752_dspeve_temp_sensor_data,
 		.domain = "dspeve",
-		.slope = DRA752_GRADIENT_SLOPE,
-		.constant = DRA752_GRADIENT_CONST,
 		.slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB,
 		.constant_pcb = DRA752_GRADIENT_CONST_W_PCB,
 		},
@@ -446,8 +438,6 @@ const struct ti_bandgap_data dra752_data = {
 		.registers = &dra752_iva_temp_sensor_registers,
 		.ts_data = &dra752_iva_temp_sensor_data,
 		.domain = "iva",
-		.slope = DRA752_GRADIENT_SLOPE,
-		.constant = DRA752_GRADIENT_CONST,
 		.slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB,
 		.constant_pcb = DRA752_GRADIENT_CONST_W_PCB,
 		},
diff --git a/drivers/thermal/ti-soc-thermal/omap3-thermal-data.c b/drivers/thermal/ti-soc-thermal/omap3-thermal-data.c
index 3ee34340edab..c6d217913dd1 100644
--- a/drivers/thermal/ti-soc-thermal/omap3-thermal-data.c
+++ b/drivers/thermal/ti-soc-thermal/omap3-thermal-data.c
@@ -91,8 +91,6 @@ const struct ti_bandgap_data omap34xx_data = {
 		.registers = &omap34xx_mpu_temp_sensor_registers,
 		.ts_data = &omap34xx_mpu_temp_sensor_data,
 		.domain = "cpu",
-		.slope = 0,
-		.constant = 20000,
 		.slope_pcb = 0,
 		.constant_pcb = 20000,
 		.register_cooling = NULL,
@@ -164,8 +162,6 @@ const struct ti_bandgap_data omap36xx_data = {
 		.registers = &omap36xx_mpu_temp_sensor_registers,
 		.ts_data = &omap36xx_mpu_temp_sensor_data,
 		.domain = "cpu",
-		.slope = 0,
-		.constant = 20000,
 		.slope_pcb = 0,
 		.constant_pcb = 20000,
 		.register_cooling = NULL,
diff --git a/drivers/thermal/ti-soc-thermal/omap4-thermal-data.c b/drivers/thermal/ti-soc-thermal/omap4-thermal-data.c
index d255d33da9eb..fd1113360603 100644
--- a/drivers/thermal/ti-soc-thermal/omap4-thermal-data.c
+++ b/drivers/thermal/ti-soc-thermal/omap4-thermal-data.c
@@ -82,8 +82,6 @@ const struct ti_bandgap_data omap4430_data = {
 		.registers = &omap4430_mpu_temp_sensor_registers,
 		.ts_data = &omap4430_mpu_temp_sensor_data,
 		.domain = "cpu",
-		.slope = OMAP_GRADIENT_SLOPE_4430,
-		.constant = OMAP_GRADIENT_CONST_4430,
 		.slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_4430,
 		.constant_pcb = OMAP_GRADIENT_CONST_W_PCB_4430,
 		.register_cooling = ti_thermal_register_cpu_cooling,
@@ -222,8 +220,6 @@ const struct ti_bandgap_data omap4460_data = {
 		.registers = &omap4460_mpu_temp_sensor_registers,
 		.ts_data = &omap4460_mpu_temp_sensor_data,
 		.domain = "cpu",
-		.slope = OMAP_GRADIENT_SLOPE_4460,
-		.constant = OMAP_GRADIENT_CONST_4460,
 		.slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_4460,
 		.constant_pcb = OMAP_GRADIENT_CONST_W_PCB_4460,
 		.register_cooling = ti_thermal_register_cpu_cooling,
@@ -255,8 +251,6 @@ const struct ti_bandgap_data omap4470_data = {
 		.registers = &omap4460_mpu_temp_sensor_registers,
 		.ts_data = &omap4460_mpu_temp_sensor_data,
 		.domain = "cpu",
-		.slope = OMAP_GRADIENT_SLOPE_4470,
-		.constant = OMAP_GRADIENT_CONST_4470,
 		.slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_4470,
 		.constant_pcb = OMAP_GRADIENT_CONST_W_PCB_4470,
 		.register_cooling = ti_thermal_register_cpu_cooling,
diff --git a/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c b/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c
index 79ff70c446ba..cd9a304fb571 100644
--- a/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c
+++ b/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c
@@ -336,8 +336,6 @@ const struct ti_bandgap_data omap5430_data = {
 		.domain = "cpu",
 		.register_cooling = ti_thermal_register_cpu_cooling,
 		.unregister_cooling = ti_thermal_unregister_cpu_cooling,
-		.slope = OMAP_GRADIENT_SLOPE_5430_CPU,
-		.constant = OMAP_GRADIENT_CONST_5430_CPU,
 		.slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_5430_CPU,
 		.constant_pcb = OMAP_GRADIENT_CONST_W_PCB_5430_CPU,
 		},
@@ -345,8 +343,6 @@ const struct ti_bandgap_data omap5430_data = {
 		.registers = &omap5430_gpu_temp_sensor_registers,
 		.ts_data = &omap5430_gpu_temp_sensor_data,
 		.domain = "gpu",
-		.slope = OMAP_GRADIENT_SLOPE_5430_GPU,
-		.constant = OMAP_GRADIENT_CONST_5430_GPU,
 		.slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_5430_GPU,
 		.constant_pcb = OMAP_GRADIENT_CONST_W_PCB_5430_GPU,
 		},
diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
index ba9c302454fb..696ab3046b87 100644
--- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c
+++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
@@ -1010,7 +1010,7 @@ ti_bandgap_force_single_read(struct ti_bandgap *bgp, int id)
 }
 
 /**
- * ti_bandgap_set_continous_mode() - One time enabling of continuous mode
+ * ti_bandgap_set_continuous_mode() - One time enabling of continuous mode
  * @bgp: pointer to struct ti_bandgap
  *
  * Call this function only if HAS(MODE_CONFIG) is set. As this driver may
@@ -1214,22 +1214,18 @@ static struct ti_bandgap *ti_bandgap_build(struct platform_device *pdev)
 	}
 
 	bgp = devm_kzalloc(&pdev->dev, sizeof(*bgp), GFP_KERNEL);
-	if (!bgp) {
-		dev_err(&pdev->dev, "Unable to allocate mem for driver ref\n");
+	if (!bgp)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	of_id = of_match_device(of_ti_bandgap_match, &pdev->dev);
 	if (of_id)
 		bgp->conf = of_id->data;
 
 	/* register shadow for context save and restore */
-	bgp->regval = devm_kzalloc(&pdev->dev, sizeof(*bgp->regval) *
-				   bgp->conf->sensor_count, GFP_KERNEL);
-	if (!bgp->regval) {
-		dev_err(&pdev->dev, "Unable to allocate mem for driver ref\n");
+	bgp->regval = devm_kcalloc(&pdev->dev, bgp->conf->sensor_count,
+				   sizeof(*bgp->regval), GFP_KERNEL);
+	if (!bgp->regval)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	i = 0;
 	do {
diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.h b/drivers/thermal/ti-soc-thermal/ti-bandgap.h
index fe0adb898764..209c664c2823 100644
--- a/drivers/thermal/ti-soc-thermal/ti-bandgap.h
+++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.h
@@ -254,8 +254,6 @@ struct ti_bandgap {
  * @ts_data: pointer to struct with thresholds, limits of temperature sensor
  * @registers: pointer to the list of register offsets and bitfields
  * @domain: the name of the domain where the sensor is located
- * @slope: sensor gradient slope info for hotspot extrapolation equation
- * @constant: sensor gradient const info for hotspot extrapolation equation
  * @slope_pcb: sensor gradient slope info for hotspot extrapolation equation
  *             with no external influence
  * @constant_pcb: sensor gradient const info for hotspot extrapolation equation
@@ -274,8 +272,6 @@ struct ti_temp_sensor {
 	struct temp_sensor_registers	*registers;
 	char				*domain;
 	/* for hotspot extrapolation */
-	const int			slope;
-	const int			constant;
 	const int			slope_pcb;
 	const int			constant_pcb;
 	int (*register_cooling)(struct ti_bandgap *bgp, int id);
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index 0586bd0f2bab..02790f69e26c 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -96,8 +96,8 @@ static inline int __ti_thermal_get_temp(void *devdata, int *temp)
 		return ret;
 
 	/* Default constants */
-	slope = s->slope;
-	constant = s->constant;
+	slope = thermal_zone_get_slope(data->ti_thermal);
+	constant = thermal_zone_get_offset(data->ti_thermal);
 
 	pcb_tz = data->pcb_tz;
 	/* In case pcb zone is available, use the extrapolation rule with it */
@@ -126,119 +126,6 @@ static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal,
 	return __ti_thermal_get_temp(data, temp);
 }
 
-/* Bind callback functions for thermal zone */
-static int ti_thermal_bind(struct thermal_zone_device *thermal,
-			   struct thermal_cooling_device *cdev)
-{
-	struct ti_thermal_data *data = thermal->devdata;
-	int id;
-
-	if (!data || IS_ERR(data))
-		return -ENODEV;
-
-	/* check if this is the cooling device we registered */
-	if (data->cool_dev != cdev)
-		return 0;
-
-	id = data->sensor_id;
-
-	/* Simple thing, two trips, one passive another critical */
-	return thermal_zone_bind_cooling_device(thermal, 0, cdev,
-	/* bind with min and max states defined by cpu_cooling */
-						THERMAL_NO_LIMIT,
-						THERMAL_NO_LIMIT,
-						THERMAL_WEIGHT_DEFAULT);
-}
-
-/* Unbind callback functions for thermal zone */
-static int ti_thermal_unbind(struct thermal_zone_device *thermal,
-			     struct thermal_cooling_device *cdev)
-{
-	struct ti_thermal_data *data = thermal->devdata;
-
-	if (!data || IS_ERR(data))
-		return -ENODEV;
-
-	/* check if this is the cooling device we registered */
-	if (data->cool_dev != cdev)
-		return 0;
-
-	/* Simple thing, two trips, one passive another critical */
-	return thermal_zone_unbind_cooling_device(thermal, 0, cdev);
-}
-
-/* Get mode callback functions for thermal zone */
-static int ti_thermal_get_mode(struct thermal_zone_device *thermal,
-			       enum thermal_device_mode *mode)
-{
-	struct ti_thermal_data *data = thermal->devdata;
-
-	if (data)
-		*mode = data->mode;
-
-	return 0;
-}
-
-/* Set mode callback functions for thermal zone */
-static int ti_thermal_set_mode(struct thermal_zone_device *thermal,
-			       enum thermal_device_mode mode)
-{
-	struct ti_thermal_data *data = thermal->devdata;
-	struct ti_bandgap *bgp;
-
-	bgp = data->bgp;
-
-	if (!data->ti_thermal) {
-		dev_notice(&thermal->device, "thermal zone not registered\n");
-		return 0;
-	}
-
-	mutex_lock(&data->ti_thermal->lock);
-
-	if (mode == THERMAL_DEVICE_ENABLED)
-		data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE;
-	else
-		data->ti_thermal->polling_delay = 0;
-
-	mutex_unlock(&data->ti_thermal->lock);
-
-	data->mode = mode;
-	ti_bandgap_write_update_interval(bgp, data->sensor_id,
-					data->ti_thermal->polling_delay);
-	thermal_zone_device_update(data->ti_thermal, THERMAL_EVENT_UNSPECIFIED);
-	dev_dbg(&thermal->device, "thermal polling set for duration=%d msec\n",
-		data->ti_thermal->polling_delay);
-
-	return 0;
-}
-
-/* Get trip type callback functions for thermal zone */
-static int ti_thermal_get_trip_type(struct thermal_zone_device *thermal,
-				    int trip, enum thermal_trip_type *type)
-{
-	if (!ti_thermal_is_valid_trip(trip))
-		return -EINVAL;
-
-	if (trip + 1 == OMAP_TRIP_NUMBER)
-		*type = THERMAL_TRIP_CRITICAL;
-	else
-		*type = THERMAL_TRIP_PASSIVE;
-
-	return 0;
-}
-
-/* Get trip temperature callback functions for thermal zone */
-static int ti_thermal_get_trip_temp(struct thermal_zone_device *thermal,
-				    int trip, int *temp)
-{
-	if (!ti_thermal_is_valid_trip(trip))
-		return -EINVAL;
-
-	*temp = ti_thermal_get_trip_value(trip);
-
-	return 0;
-}
-
 static int __ti_thermal_get_trend(void *p, int trip, enum thermal_trend *trend)
 {
 	struct ti_thermal_data *data = p;
@@ -262,38 +149,11 @@ static int __ti_thermal_get_trend(void *p, int trip, enum thermal_trend *trend)
 	return 0;
 }
 
-/* Get the temperature trend callback functions for thermal zone */
-static int ti_thermal_get_trend(struct thermal_zone_device *thermal,
-				int trip, enum thermal_trend *trend)
-{
-	return __ti_thermal_get_trend(thermal->devdata, trip, trend);
-}
-
-/* Get critical temperature callback functions for thermal zone */
-static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal,
-				    int *temp)
-{
-	/* shutdown zone */
-	return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp);
-}
-
 static const struct thermal_zone_of_device_ops ti_of_thermal_ops = {
 	.get_temp = __ti_thermal_get_temp,
 	.get_trend = __ti_thermal_get_trend,
 };
 
-static struct thermal_zone_device_ops ti_thermal_ops = {
-	.get_temp = ti_thermal_get_temp,
-	.get_trend = ti_thermal_get_trend,
-	.bind = ti_thermal_bind,
-	.unbind = ti_thermal_unbind,
-	.get_mode = ti_thermal_get_mode,
-	.set_mode = ti_thermal_set_mode,
-	.get_trip_type = ti_thermal_get_trip_type,
-	.get_trip_temp = ti_thermal_get_trip_temp,
-	.get_crit_temp = ti_thermal_get_crit_temp,
-};
-
 static struct ti_thermal_data
 *ti_thermal_build_data(struct ti_bandgap *bgp, int id)
 {
@@ -331,18 +191,10 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id,
 	data->ti_thermal = devm_thermal_zone_of_sensor_register(bgp->dev, id,
 					data, &ti_of_thermal_ops);
 	if (IS_ERR(data->ti_thermal)) {
-		/* Create thermal zone */
-		data->ti_thermal = thermal_zone_device_register(domain,
-				OMAP_TRIP_NUMBER, 0, data, &ti_thermal_ops,
-				NULL, FAST_TEMP_MONITORING_RATE,
-				FAST_TEMP_MONITORING_RATE);
-		if (IS_ERR(data->ti_thermal)) {
-			dev_err(bgp->dev, "thermal zone device is NULL\n");
-			return PTR_ERR(data->ti_thermal);
-		}
-		data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE;
-		data->our_zone = true;
+		dev_err(bgp->dev, "thermal zone device is NULL\n");
+		return PTR_ERR(data->ti_thermal);
 	}
+
 	ti_bandgap_set_sensor_data(bgp, id, data);
 	ti_bandgap_write_update_interval(bgp, data->sensor_id,
 					data->ti_thermal->polling_delay);
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal.h b/drivers/thermal/ti-soc-thermal/ti-thermal.h
index f8b7ffea6194..8e85ca973967 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal.h
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal.h
@@ -25,22 +25,6 @@
 
 #include "ti-bandgap.h"
 
-/* sensors gradient and offsets */
-#define OMAP_GRADIENT_SLOPE_4430				0
-#define OMAP_GRADIENT_CONST_4430				20000
-#define OMAP_GRADIENT_SLOPE_4460				348
-#define OMAP_GRADIENT_CONST_4460				-9301
-#define OMAP_GRADIENT_SLOPE_4470				308
-#define OMAP_GRADIENT_CONST_4470				-7896
-
-#define OMAP_GRADIENT_SLOPE_5430_CPU				65
-#define OMAP_GRADIENT_CONST_5430_CPU				-1791
-#define OMAP_GRADIENT_SLOPE_5430_GPU				117
-#define OMAP_GRADIENT_CONST_5430_GPU				-2992
-
-#define DRA752_GRADIENT_SLOPE					0
-#define DRA752_GRADIENT_CONST					2000
-
 /* PCB sensor calculation constants */
 #define OMAP_GRADIENT_SLOPE_W_PCB_4430				0
 #define OMAP_GRADIENT_CONST_W_PCB_4430				20000
diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c
index 5e4fa9206861..104f09c58163 100644
--- a/drivers/tty/cyclades.c
+++ b/drivers/tty/cyclades.c
@@ -156,8 +156,8 @@ static unsigned int cy_isa_addresses[] = {
 static long maddr[NR_CARDS];
 static int irq[NR_CARDS];
 
-module_param_array(maddr, long, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(maddr, long, iomem, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 
 #endif				/* CONFIG_ISA */
 
diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c
index 7ac9bcdf1e61..61fe8d6fd24e 100644
--- a/drivers/tty/ehv_bytechan.c
+++ b/drivers/tty/ehv_bytechan.c
@@ -764,7 +764,7 @@ static int __init ehv_bc_init(void)
 	ehv_bc_driver = alloc_tty_driver(count);
 	if (!ehv_bc_driver) {
 		ret = -ENOMEM;
-		goto error;
+		goto err_free_bcs;
 	}
 
 	ehv_bc_driver->driver_name = "ehv-bc";
@@ -778,24 +778,23 @@ static int __init ehv_bc_init(void)
 	ret = tty_register_driver(ehv_bc_driver);
 	if (ret) {
 		pr_err("ehv-bc: could not register tty driver (ret=%i)\n", ret);
-		goto error;
+		goto err_put_tty_driver;
 	}
 
 	ret = platform_driver_register(&ehv_bc_tty_driver);
 	if (ret) {
 		pr_err("ehv-bc: could not register platform driver (ret=%i)\n",
 		       ret);
-		goto error;
+		goto err_deregister_tty_driver;
 	}
 
 	return 0;
 
-error:
-	if (ehv_bc_driver) {
-		tty_unregister_driver(ehv_bc_driver);
-		put_tty_driver(ehv_bc_driver);
-	}
-
+err_deregister_tty_driver:
+	tty_unregister_driver(ehv_bc_driver);
+err_put_tty_driver:
+	put_tty_driver(ehv_bc_driver);
+err_free_bcs:
 	kfree(bcs);
 
 	return ret;
diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c
index 4caf0c3b1f99..3b251f4e5df0 100644
--- a/drivers/tty/moxa.c
+++ b/drivers/tty/moxa.c
@@ -179,7 +179,7 @@ MODULE_FIRMWARE("c320tunx.cod");
 
 module_param_array(type, uint, NULL, 0);
 MODULE_PARM_DESC(type, "card type: C218=2, C320=4");
-module_param_array(baseaddr, ulong, NULL, 0);
+module_param_hw_array(baseaddr, ulong, ioport, NULL, 0);
 MODULE_PARM_DESC(baseaddr, "base address");
 module_param_array(numports, uint, NULL, 0);
 MODULE_PARM_DESC(numports, "numports (ignored for C218)");
diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c
index 7b8f383fb090..8bd6fb6d9391 100644
--- a/drivers/tty/mxser.c
+++ b/drivers/tty/mxser.c
@@ -183,7 +183,7 @@ static int ttymajor = MXSERMAJOR;
 
 MODULE_AUTHOR("Casper Yang");
 MODULE_DESCRIPTION("MOXA Smartio/Industio Family Multiport Board Device Driver");
-module_param_array(ioaddr, ulong, NULL, 0);
+module_param_hw_array(ioaddr, ulong, ioport, NULL, 0);
 MODULE_PARM_DESC(ioaddr, "ISA io addresses to look for a moxa board");
 module_param(ttymajor, int, 0);
 MODULE_LICENSE("GPL");
diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index d66c1edd9892..b51a877da986 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -250,15 +250,15 @@ static int sReadAiopNumChan(WordIO_t io);
 
 MODULE_AUTHOR("Theodore Ts'o");
 MODULE_DESCRIPTION("Comtrol RocketPort driver");
-module_param(board1, ulong, 0);
+module_param_hw(board1, ulong, ioport, 0);
 MODULE_PARM_DESC(board1, "I/O port for (ISA) board #1");
-module_param(board2, ulong, 0);
+module_param_hw(board2, ulong, ioport, 0);
 MODULE_PARM_DESC(board2, "I/O port for (ISA) board #2");
-module_param(board3, ulong, 0);
+module_param_hw(board3, ulong, ioport, 0);
 MODULE_PARM_DESC(board3, "I/O port for (ISA) board #3");
-module_param(board4, ulong, 0);
+module_param_hw(board4, ulong, ioport, 0);
 MODULE_PARM_DESC(board4, "I/O port for (ISA) board #4");
-module_param(controller, ulong, 0);
+module_param_hw(controller, ulong, ioport, 0);
 MODULE_PARM_DESC(controller, "I/O port for (ISA) rocketport controller");
 module_param(support_low_speed, bool, 0);
 MODULE_PARM_DESC(support_low_speed, "1 means support 50 baud, 0 means support 460400 baud");
diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c
index 433de5ea9b02..f71b47334149 100644
--- a/drivers/tty/serdev/core.c
+++ b/drivers/tty/serdev/core.c
@@ -122,6 +122,18 @@ void serdev_device_write_wakeup(struct serdev_device *serdev)
 }
 EXPORT_SYMBOL_GPL(serdev_device_write_wakeup);
 
+int serdev_device_write_buf(struct serdev_device *serdev,
+			    const unsigned char *buf, size_t count)
+{
+	struct serdev_controller *ctrl = serdev->ctrl;
+
+	if (!ctrl || !ctrl->ops->write_buf)
+		return -EINVAL;
+
+	return ctrl->ops->write_buf(ctrl, buf, count);
+}
+EXPORT_SYMBOL_GPL(serdev_device_write_buf);
+
 int serdev_device_write(struct serdev_device *serdev,
 			const unsigned char *buf, size_t count,
 			unsigned long timeout)
diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c
index 487c88f6aa0e..d0a021c93986 100644
--- a/drivers/tty/serdev/serdev-ttyport.c
+++ b/drivers/tty/serdev/serdev-ttyport.c
@@ -102,9 +102,6 @@ static int ttyport_open(struct serdev_controller *ctrl)
 		return PTR_ERR(tty);
 	serport->tty = tty;
 
-	serport->port->client_ops = &client_ops;
-	serport->port->client_data = ctrl;
-
 	if (tty->ops->open)
 		tty->ops->open(serport->tty, NULL);
 	else
@@ -215,6 +212,7 @@ struct device *serdev_tty_port_register(struct tty_port *port,
 					struct device *parent,
 					struct tty_driver *drv, int idx)
 {
+	const struct tty_port_client_operations *old_ops;
 	struct serdev_controller *ctrl;
 	struct serport *serport;
 	int ret;
@@ -233,28 +231,37 @@ struct device *serdev_tty_port_register(struct tty_port *port,
 
 	ctrl->ops = &ctrl_ops;
 
+	old_ops = port->client_ops;
+	port->client_ops = &client_ops;
+	port->client_data = ctrl;
+
 	ret = serdev_controller_add(ctrl);
 	if (ret)
-		goto err_controller_put;
+		goto err_reset_data;
 
 	dev_info(&ctrl->dev, "tty port %s%d registered\n", drv->name, idx);
 	return &ctrl->dev;
 
-err_controller_put:
+err_reset_data:
+	port->client_data = NULL;
+	port->client_ops = old_ops;
 	serdev_controller_put(ctrl);
+
 	return ERR_PTR(ret);
 }
 
-void serdev_tty_port_unregister(struct tty_port *port)
+int serdev_tty_port_unregister(struct tty_port *port)
 {
 	struct serdev_controller *ctrl = port->client_data;
 	struct serport *serport = serdev_controller_get_drvdata(ctrl);
 
 	if (!serport)
-		return;
+		return -ENODEV;
 
 	serdev_controller_remove(ctrl);
 	port->client_ops = NULL;
 	port->client_data = NULL;
 	serdev_controller_put(ctrl);
+
+	return 0;
 }
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 48a07e2f617f..1aab3010fbfa 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -1191,7 +1191,7 @@ module_exit(serial8250_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Generic 8250/16x50 serial driver");
 
-module_param(share_irqs, uint, 0644);
+module_param_hw(share_irqs, uint, other, 0644);
 MODULE_PARM_DESC(share_irqs, "Share IRQs with other non-8250/16x50 devices (unsafe)");
 
 module_param(nr_uarts, uint, 0644);
@@ -1201,7 +1201,7 @@ module_param(skip_txen_test, uint, 0644);
 MODULE_PARM_DESC(skip_txen_test, "Skip checking for the TXEN bug at init time");
 
 #ifdef CONFIG_SERIAL_8250_RSA
-module_param_array(probe_rsa, ulong, &probe_rsa_count, 0444);
+module_param_hw_array(probe_rsa, ulong, ioport, &probe_rsa_count, 0444);
 MODULE_PARM_DESC(probe_rsa, "Probe I/O ports for RSA");
 #endif
 MODULE_ALIAS_CHARDEV_MAJOR(TTY_MAJOR);
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 09a65a3ec7f7..68fd045a7025 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -47,6 +47,7 @@
 /*
  * These are definitions for the Exar XR17V35X and XR17(C|D)15X
  */
+#define UART_EXAR_INT0		0x80
 #define UART_EXAR_SLEEP		0x8b	/* Sleep mode */
 #define UART_EXAR_DVID		0x8d	/* Device identification */
 
@@ -1337,7 +1338,7 @@ out_lock:
 	/*
 	 * Check if the device is a Fintek F81216A
 	 */
-	if (port->type == PORT_16550A)
+	if (port->type == PORT_16550A && port->iotype == UPIO_PORT)
 		fintek_8250_probe(up);
 
 	if (up->capabilities != old_capabilities) {
@@ -1869,17 +1870,13 @@ static int serial8250_default_handle_irq(struct uart_port *port)
 static int exar_handle_irq(struct uart_port *port)
 {
 	unsigned int iir = serial_port_in(port, UART_IIR);
-	int ret;
+	int ret = 0;
 
-	ret = serial8250_handle_irq(port, iir);
+	if (((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X)) &&
+	    serial_port_in(port, UART_EXAR_INT0) != 0)
+		ret = 1;
 
-	if ((port->type == PORT_XR17V35X) ||
-	   (port->type == PORT_XR17D15X)) {
-		serial_port_in(port, 0x80);
-		serial_port_in(port, 0x81);
-		serial_port_in(port, 0x82);
-		serial_port_in(port, 0x83);
-	}
+	ret |= serial8250_handle_irq(port, iir);
 
 	return ret;
 }
@@ -2177,6 +2174,8 @@ int serial8250_do_startup(struct uart_port *port)
 	serial_port_in(port, UART_RX);
 	serial_port_in(port, UART_IIR);
 	serial_port_in(port, UART_MSR);
+	if ((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X))
+		serial_port_in(port, UART_EXAR_INT0);
 
 	/*
 	 * At this point, there's no way the LSR could still be 0xff;
@@ -2335,6 +2334,8 @@ dont_test_tx_en:
 	serial_port_in(port, UART_RX);
 	serial_port_in(port, UART_IIR);
 	serial_port_in(port, UART_MSR);
+	if ((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X))
+		serial_port_in(port, UART_EXAR_INT0);
 	up->lsr_saved_flags = 0;
 	up->msr_saved_flags = 0;
 
diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c
index 18e3f8342b85..0475f5d261ce 100644
--- a/drivers/tty/serial/altera_jtaguart.c
+++ b/drivers/tty/serial/altera_jtaguart.c
@@ -478,6 +478,7 @@ static int altera_jtaguart_remove(struct platform_device *pdev)
 
 	port = &altera_jtaguart_ports[i].port;
 	uart_remove_one_port(&altera_jtaguart_driver, port);
+	iounmap(port->membase);
 
 	return 0;
 }
diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c
index 46d3438a0d27..3e4b717670d7 100644
--- a/drivers/tty/serial/altera_uart.c
+++ b/drivers/tty/serial/altera_uart.c
@@ -615,6 +615,7 @@ static int altera_uart_remove(struct platform_device *pdev)
 	if (port) {
 		uart_remove_one_port(&altera_uart_driver, port);
 		port->mapbase = 0;
+		iounmap(port->membase);
 	}
 
 	return 0;
diff --git a/drivers/tty/serial/efm32-uart.c b/drivers/tty/serial/efm32-uart.c
index ebd8569f9ad5..9fff25be87f9 100644
--- a/drivers/tty/serial/efm32-uart.c
+++ b/drivers/tty/serial/efm32-uart.c
@@ -27,6 +27,7 @@
 #define UARTn_FRAME		0x04
 #define UARTn_FRAME_DATABITS__MASK	0x000f
 #define UARTn_FRAME_DATABITS(n)		((n) - 3)
+#define UARTn_FRAME_PARITY__MASK	0x0300
 #define UARTn_FRAME_PARITY_NONE		0x0000
 #define UARTn_FRAME_PARITY_EVEN		0x0200
 #define UARTn_FRAME_PARITY_ODD		0x0300
@@ -572,12 +573,16 @@ static void efm32_uart_console_get_options(struct efm32_uart_port *efm_port,
 			16 * (4 + (clkdiv >> 6)));
 
 	frame = efm32_uart_read32(efm_port, UARTn_FRAME);
-	if (frame & UARTn_FRAME_PARITY_ODD)
+	switch (frame & UARTn_FRAME_PARITY__MASK) {
+	case UARTn_FRAME_PARITY_ODD:
 		*parity = 'o';
-	else if (frame & UARTn_FRAME_PARITY_EVEN)
+		break;
+	case UARTn_FRAME_PARITY_EVEN:
 		*parity = 'e';
-	else
+		break;
+	default:
 		*parity = 'n';
+	}
 
 	*bits = (frame & UARTn_FRAME_DATABITS__MASK) -
 			UARTn_FRAME_DATABITS(4) + 4;
diff --git a/drivers/tty/serial/ifx6x60.c b/drivers/tty/serial/ifx6x60.c
index 157883653256..f190a84a0246 100644
--- a/drivers/tty/serial/ifx6x60.c
+++ b/drivers/tty/serial/ifx6x60.c
@@ -1382,9 +1382,9 @@ static struct spi_driver ifx_spi_driver = {
 static void __exit ifx_spi_exit(void)
 {
 	/* unregister */
+	spi_unregister_driver(&ifx_spi_driver);
 	tty_unregister_driver(tty_drv);
 	put_tty_driver(tty_drv);
-	spi_unregister_driver(&ifx_spi_driver);
 	unregister_reboot_notifier(&ifx_modem_reboot_notifier_block);
 }
 
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 33509b4beaec..bbefddd92bfe 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -2184,7 +2184,9 @@ static int serial_imx_probe(struct platform_device *pdev)
 		 * and DCD (when they are outputs) or enables the respective
 		 * irqs. So set this bit early, i.e. before requesting irqs.
 		 */
-		writel(UFCR_DCEDTE, sport->port.membase + UFCR);
+		reg = readl(sport->port.membase + UFCR);
+		if (!(reg & UFCR_DCEDTE))
+			writel(reg | UFCR_DCEDTE, sport->port.membase + UFCR);
 
 		/*
 		 * Disable UCR3_RI and UCR3_DCD irqs. They are also not
@@ -2195,7 +2197,15 @@ static int serial_imx_probe(struct platform_device *pdev)
 		       sport->port.membase + UCR3);
 
 	} else {
-		writel(0, sport->port.membase + UFCR);
+		unsigned long ucr3 = UCR3_DSR;
+
+		reg = readl(sport->port.membase + UFCR);
+		if (reg & UFCR_DCEDTE)
+			writel(reg & ~UFCR_DCEDTE, sport->port.membase + UFCR);
+
+		if (!is_imx1_uart(sport))
+			ucr3 |= IMX21_UCR3_RXDMUXSEL | UCR3_ADNIMP;
+		writel(ucr3, sport->port.membase + UCR3);
 	}
 
 	clk_disable_unprepare(sport->clk_ipg);
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 0f45b7884a2c..13bfd5dcffce 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2083,7 +2083,7 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
 	mutex_lock(&port->mutex);
 
 	tty_dev = device_find_child(uport->dev, &match, serial_match_port);
-	if (device_may_wakeup(tty_dev)) {
+	if (tty_dev && device_may_wakeup(tty_dev)) {
 		if (!enable_irq_wake(uport->irq))
 			uport->irq_wake = 1;
 		put_device(tty_dev);
@@ -2782,7 +2782,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 	 * Register the port whether it's detected or not.  This allows
 	 * setserial to be used to alter this port's parameters.
 	 */
-	tty_dev = tty_port_register_device_attr(port, drv->tty_driver,
+	tty_dev = tty_port_register_device_attr_serdev(port, drv->tty_driver,
 			uport->line, uport->dev, port, uport->tty_groups);
 	if (likely(!IS_ERR(tty_dev))) {
 		device_set_wakeup_capable(tty_dev, 1);
@@ -2845,7 +2845,7 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
 	/*
 	 * Remove the devices from the tty layer
 	 */
-	tty_unregister_device(drv->tty_driver, uport->line);
+	tty_port_unregister_device(port, drv->tty_driver, uport->line);
 
 	tty = tty_port_tty_get(port);
 	if (tty) {
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index 657eed82eeb3..a2c308f7d637 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -869,9 +869,9 @@ static int txholdbufs[MAX_TOTAL_DEVICES];
 	
 module_param(break_on_load, bool, 0);
 module_param(ttymajor, int, 0);
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
-module_param_array(dma, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
+module_param_hw_array(dma, int, dma, NULL, 0);
 module_param(debug_level, int, 0);
 module_param_array(maxframe, int, NULL, 0);
 module_param_array(txdmabufs, int, NULL, 0);
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 31885f20fc15..cc047de72e2a 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -184,7 +184,7 @@ static void hdlcdev_exit(struct slgt_info *info);
 struct cond_wait {
 	struct cond_wait *next;
 	wait_queue_head_t q;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	unsigned int data;
 };
 static void init_cond_wait(struct cond_wait *w, unsigned int data);
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index 1d21a9c1d33e..6b137194069f 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -129,19 +129,85 @@ struct device *tty_port_register_device_attr(struct tty_port *port,
 		struct device *device, void *drvdata,
 		const struct attribute_group **attr_grp)
 {
+	tty_port_link_device(port, driver, index);
+	return tty_register_device_attr(driver, index, device, drvdata,
+			attr_grp);
+}
+EXPORT_SYMBOL_GPL(tty_port_register_device_attr);
+
+/**
+ * tty_port_register_device_attr_serdev - register tty or serdev device
+ * @port: tty_port of the device
+ * @driver: tty_driver for this device
+ * @index: index of the tty
+ * @device: parent if exists, otherwise NULL
+ * @drvdata: driver data for the device
+ * @attr_grp: attribute group for the device
+ *
+ * Register a serdev or tty device depending on if the parent device has any
+ * defined serdev clients or not.
+ */
+struct device *tty_port_register_device_attr_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device, void *drvdata,
+		const struct attribute_group **attr_grp)
+{
 	struct device *dev;
 
 	tty_port_link_device(port, driver, index);
 
 	dev = serdev_tty_port_register(port, device, driver, index);
-	if (PTR_ERR(dev) != -ENODEV)
+	if (PTR_ERR(dev) != -ENODEV) {
 		/* Skip creating cdev if we registered a serdev device */
 		return dev;
+	}
 
 	return tty_register_device_attr(driver, index, device, drvdata,
 			attr_grp);
 }
-EXPORT_SYMBOL_GPL(tty_port_register_device_attr);
+EXPORT_SYMBOL_GPL(tty_port_register_device_attr_serdev);
+
+/**
+ * tty_port_register_device_serdev - register tty or serdev device
+ * @port: tty_port of the device
+ * @driver: tty_driver for this device
+ * @index: index of the tty
+ * @device: parent if exists, otherwise NULL
+ *
+ * Register a serdev or tty device depending on if the parent device has any
+ * defined serdev clients or not.
+ */
+struct device *tty_port_register_device_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device)
+{
+	return tty_port_register_device_attr_serdev(port, driver, index,
+			device, NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(tty_port_register_device_serdev);
+
+/**
+ * tty_port_unregister_device - deregister a tty or serdev device
+ * @port: tty_port of the device
+ * @driver: tty_driver for this device
+ * @index: index of the tty
+ *
+ * If a tty or serdev device is registered with a call to
+ * tty_port_register_device_serdev() then this function must be called when
+ * the device is gone.
+ */
+void tty_port_unregister_device(struct tty_port *port,
+		struct tty_driver *driver, unsigned index)
+{
+	int ret;
+
+	ret = serdev_tty_port_unregister(port);
+	if (ret == 0)
+		return;
+
+	tty_unregister_device(driver, index);
+}
+EXPORT_SYMBOL_GPL(tty_port_unregister_device);
 
 int tty_port_alloc_xmit_buf(struct tty_port *port)
 {
@@ -189,9 +255,6 @@ static void tty_port_destructor(struct kref *kref)
 	/* check if last port ref was dropped before tty release */
 	if (WARN_ON(port->itty))
 		return;
-
-	serdev_tty_port_unregister(port);
-
 	if (port->xmit_buf)
 		free_page((unsigned long)port->xmit_buf);
 	tty_port_destroy(port);
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 1c196f87e9d9..ff04b7f8549f 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -279,7 +279,7 @@ static int uio_dev_add_attributes(struct uio_device *idev)
 		map = kzalloc(sizeof(*map), GFP_KERNEL);
 		if (!map) {
 			ret = -ENOMEM;
-			goto err_map_kobj;
+			goto err_map;
 		}
 		kobject_init(&map->kobj, &map_attr_type);
 		map->mem = mem;
@@ -289,7 +289,7 @@ static int uio_dev_add_attributes(struct uio_device *idev)
 			goto err_map_kobj;
 		ret = kobject_uevent(&map->kobj, KOBJ_ADD);
 		if (ret)
-			goto err_map;
+			goto err_map_kobj;
 	}
 
 	for (pi = 0; pi < MAX_UIO_PORT_REGIONS; pi++) {
@@ -308,7 +308,7 @@ static int uio_dev_add_attributes(struct uio_device *idev)
 		portio = kzalloc(sizeof(*portio), GFP_KERNEL);
 		if (!portio) {
 			ret = -ENOMEM;
-			goto err_portio_kobj;
+			goto err_portio;
 		}
 		kobject_init(&portio->kobj, &portio_attr_type);
 		portio->port = port;
@@ -319,7 +319,7 @@ static int uio_dev_add_attributes(struct uio_device *idev)
 			goto err_portio_kobj;
 		ret = kobject_uevent(&portio->kobj, KOBJ_ADD);
 		if (ret)
-			goto err_portio;
+			goto err_portio_kobj;
 	}
 
 	return 0;
diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 9e217b1361ea..fe4fe2440729 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -843,7 +843,10 @@ static ssize_t ci_role_show(struct device *dev, struct device_attribute *attr,
 {
 	struct ci_hdrc *ci = dev_get_drvdata(dev);
 
-	return sprintf(buf, "%s\n", ci_role(ci)->name);
+	if (ci->role != CI_ROLE_END)
+		return sprintf(buf, "%s\n", ci_role(ci)->name);
+
+	return 0;
 }
 
 static ssize_t ci_role_store(struct device *dev,
diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
index 6d23eede4d8c..1c31e8a08810 100644
--- a/drivers/usb/chipidea/debug.c
+++ b/drivers/usb/chipidea/debug.c
@@ -294,7 +294,8 @@ static int ci_role_show(struct seq_file *s, void *data)
 {
 	struct ci_hdrc *ci = s->private;
 
-	seq_printf(s, "%s\n", ci_role(ci)->name);
+	if (ci->role != CI_ROLE_END)
+		seq_printf(s, "%s\n", ci_role(ci)->name);
 
 	return 0;
 }
diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 56d2d3213076..d68b125796f9 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -1993,6 +1993,7 @@ static void udc_id_switch_for_host(struct ci_hdrc *ci)
 int ci_hdrc_gadget_init(struct ci_hdrc *ci)
 {
 	struct ci_role_driver *rdrv;
+	int ret;
 
 	if (!hw_read(ci, CAP_DCCPARAMS, DCCPARAMS_DC))
 		return -ENXIO;
@@ -2005,7 +2006,10 @@ int ci_hdrc_gadget_init(struct ci_hdrc *ci)
 	rdrv->stop	= udc_id_switch_for_host;
 	rdrv->irq	= udc_irq;
 	rdrv->name	= "gadget";
-	ci->roles[CI_ROLE_GADGET] = rdrv;
 
-	return udc_start(ci);
+	ret = udc_start(ci);
+	if (!ret)
+		ci->roles[CI_ROLE_GADGET] = rdrv;
+
+	return ret;
 }
diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c
index e77a4ed4f021..9f4a0185dd60 100644
--- a/drivers/usb/chipidea/usbmisc_imx.c
+++ b/drivers/usb/chipidea/usbmisc_imx.c
@@ -108,6 +108,8 @@ struct imx_usbmisc {
 	const struct usbmisc_ops *ops;
 };
 
+static inline bool is_imx53_usbmisc(struct imx_usbmisc_data *data);
+
 static int usbmisc_imx25_init(struct imx_usbmisc_data *data)
 {
 	struct imx_usbmisc *usbmisc = dev_get_drvdata(data->dev);
@@ -242,10 +244,15 @@ static int usbmisc_imx53_init(struct imx_usbmisc_data *data)
 			val = readl(reg) | MX53_USB_UHx_CTRL_WAKE_UP_EN
 				| MX53_USB_UHx_CTRL_ULPI_INT_EN;
 			writel(val, reg);
-			/* Disable internal 60Mhz clock */
-			reg = usbmisc->base + MX53_USB_CLKONOFF_CTRL_OFFSET;
-			val = readl(reg) | MX53_USB_CLKONOFF_CTRL_H2_INT60CKOFF;
-			writel(val, reg);
+			if (is_imx53_usbmisc(data)) {
+				/* Disable internal 60Mhz clock */
+				reg = usbmisc->base +
+					MX53_USB_CLKONOFF_CTRL_OFFSET;
+				val = readl(reg) |
+					MX53_USB_CLKONOFF_CTRL_H2_INT60CKOFF;
+				writel(val, reg);
+			}
+
 		}
 		if (data->disable_oc) {
 			reg = usbmisc->base + MX53_USB_UH2_CTRL_OFFSET;
@@ -267,10 +274,15 @@ static int usbmisc_imx53_init(struct imx_usbmisc_data *data)
 			val = readl(reg) | MX53_USB_UHx_CTRL_WAKE_UP_EN
 				| MX53_USB_UHx_CTRL_ULPI_INT_EN;
 			writel(val, reg);
-			/* Disable internal 60Mhz clock */
-			reg = usbmisc->base + MX53_USB_CLKONOFF_CTRL_OFFSET;
-			val = readl(reg) | MX53_USB_CLKONOFF_CTRL_H3_INT60CKOFF;
-			writel(val, reg);
+
+			if (is_imx53_usbmisc(data)) {
+				/* Disable internal 60Mhz clock */
+				reg = usbmisc->base +
+					MX53_USB_CLKONOFF_CTRL_OFFSET;
+				val = readl(reg) |
+					MX53_USB_CLKONOFF_CTRL_H3_INT60CKOFF;
+				writel(val, reg);
+			}
 		}
 		if (data->disable_oc) {
 			reg = usbmisc->base + MX53_USB_UH3_CTRL_OFFSET;
@@ -456,6 +468,10 @@ static const struct usbmisc_ops imx27_usbmisc_ops = {
 	.init = usbmisc_imx27_init,
 };
 
+static const struct usbmisc_ops imx51_usbmisc_ops = {
+	.init = usbmisc_imx53_init,
+};
+
 static const struct usbmisc_ops imx53_usbmisc_ops = {
 	.init = usbmisc_imx53_init,
 };
@@ -479,6 +495,13 @@ static const struct usbmisc_ops imx7d_usbmisc_ops = {
 	.set_wakeup = usbmisc_imx7d_set_wakeup,
 };
 
+static inline bool is_imx53_usbmisc(struct imx_usbmisc_data *data)
+{
+	struct imx_usbmisc *usbmisc = dev_get_drvdata(data->dev);
+
+	return usbmisc->ops == &imx53_usbmisc_ops;
+}
+
 int imx_usbmisc_init(struct imx_usbmisc_data *data)
 {
 	struct imx_usbmisc *usbmisc;
@@ -536,7 +559,7 @@ static const struct of_device_id usbmisc_imx_dt_ids[] = {
 	},
 	{
 		.compatible = "fsl,imx51-usbmisc",
-		.data = &imx53_usbmisc_ops,
+		.data = &imx51_usbmisc_ops,
 	},
 	{
 		.compatible = "fsl,imx53-usbmisc",
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index cfc3cff6e8d5..8e6ef671be9b 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -475,11 +475,11 @@ static void snoop_urb(struct usb_device *udev,
 
 	if (userurb) {		/* Async */
 		if (when == SUBMIT)
-			dev_info(&udev->dev, "userurb %p, ep%d %s-%s, "
+			dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, "
 					"length %u\n",
 					userurb, ep, t, d, length);
 		else
-			dev_info(&udev->dev, "userurb %p, ep%d %s-%s, "
+			dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, "
 					"actual_length %u status %d\n",
 					userurb, ep, t, d, length,
 					timeout_or_status);
@@ -1895,7 +1895,7 @@ static int proc_reapurb(struct usb_dev_state *ps, void __user *arg)
 	if (as) {
 		int retval;
 
-		snoop(&ps->dev->dev, "reap %p\n", as->userurb);
+		snoop(&ps->dev->dev, "reap %pK\n", as->userurb);
 		retval = processcompl(as, (void __user * __user *)arg);
 		free_async(as);
 		return retval;
@@ -1912,7 +1912,7 @@ static int proc_reapurbnonblock(struct usb_dev_state *ps, void __user *arg)
 
 	as = async_getcompleted(ps);
 	if (as) {
-		snoop(&ps->dev->dev, "reap %p\n", as->userurb);
+		snoop(&ps->dev->dev, "reap %pK\n", as->userurb);
 		retval = processcompl(as, (void __user * __user *)arg);
 		free_async(as);
 	} else {
@@ -2043,7 +2043,7 @@ static int proc_reapurb_compat(struct usb_dev_state *ps, void __user *arg)
 	if (as) {
 		int retval;
 
-		snoop(&ps->dev->dev, "reap %p\n", as->userurb);
+		snoop(&ps->dev->dev, "reap %pK\n", as->userurb);
 		retval = processcompl_compat(as, (void __user * __user *)arg);
 		free_async(as);
 		return retval;
@@ -2060,7 +2060,7 @@ static int proc_reapurbnonblock_compat(struct usb_dev_state *ps, void __user *ar
 
 	as = async_getcompleted(ps);
 	if (as) {
-		snoop(&ps->dev->dev, "reap %p\n", as->userurb);
+		snoop(&ps->dev->dev, "reap %pK\n", as->userurb);
 		retval = processcompl_compat(as, (void __user * __user *)arg);
 		free_async(as);
 	} else {
@@ -2489,7 +2489,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 #endif
 
 	case USBDEVFS_DISCARDURB:
-		snoop(&dev->dev, "%s: DISCARDURB %p\n", __func__, p);
+		snoop(&dev->dev, "%s: DISCARDURB %pK\n", __func__, p);
 		ret = proc_unlinkurb(ps, p);
 		break;
 
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 49550790a3cb..5dea98358c05 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1723,7 +1723,7 @@ int usb_hcd_unlink_urb (struct urb *urb, int status)
 		if (retval == 0)
 			retval = -EINPROGRESS;
 		else if (retval != -EIDRM && retval != -EBUSY)
-			dev_dbg(&udev->dev, "hcd_unlink_urb %p fail %d\n",
+			dev_dbg(&udev->dev, "hcd_unlink_urb %pK fail %d\n",
 					urb, retval);
 		usb_put_dev(udev);
 	}
@@ -1890,7 +1890,7 @@ rescan:
 		/* kick hcd */
 		unlink1(hcd, urb, -ESHUTDOWN);
 		dev_dbg (hcd->self.controller,
-			"shutdown urb %p ep%d%s%s\n",
+			"shutdown urb %pK ep%d%s%s\n",
 			urb, usb_endpoint_num(&ep->desc),
 			is_in ? "in" : "out",
 			({	char *s;
@@ -2520,6 +2520,7 @@ struct usb_hcd *__usb_create_hcd(const struct hc_driver *driver,
 		hcd->bandwidth_mutex = kmalloc(sizeof(*hcd->bandwidth_mutex),
 				GFP_KERNEL);
 		if (!hcd->bandwidth_mutex) {
+			kfree(hcd->address0_mutex);
 			kfree(hcd);
 			dev_dbg(dev, "hcd bandwidth mutex alloc failed\n");
 			return NULL;
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 9dca59ef18b3..b8bb20d7acdb 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -362,7 +362,8 @@ static void usb_set_lpm_parameters(struct usb_device *udev)
 }
 
 /* USB 2.0 spec Section 11.24.4.5 */
-static int get_hub_descriptor(struct usb_device *hdev, void *data)
+static int get_hub_descriptor(struct usb_device *hdev,
+		struct usb_hub_descriptor *desc)
 {
 	int i, ret, size;
 	unsigned dtype;
@@ -378,10 +379,18 @@ static int get_hub_descriptor(struct usb_device *hdev, void *data)
 	for (i = 0; i < 3; i++) {
 		ret = usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0),
 			USB_REQ_GET_DESCRIPTOR, USB_DIR_IN | USB_RT_HUB,
-			dtype << 8, 0, data, size,
+			dtype << 8, 0, desc, size,
 			USB_CTRL_GET_TIMEOUT);
-		if (ret >= (USB_DT_HUB_NONVAR_SIZE + 2))
+		if (hub_is_superspeed(hdev)) {
+			if (ret == size)
+				return ret;
+		} else if (ret >= USB_DT_HUB_NONVAR_SIZE + 2) {
+			/* Make sure we have the DeviceRemovable field. */
+			size = USB_DT_HUB_NONVAR_SIZE + desc->bNbrPorts / 8 + 1;
+			if (ret < size)
+				return -EMSGSIZE;
 			return ret;
+		}
 	}
 	return -EINVAL;
 }
@@ -1313,7 +1322,7 @@ static int hub_configure(struct usb_hub *hub,
 	}
 	mutex_init(&hub->status_mutex);
 
-	hub->descriptor = kmalloc(sizeof(*hub->descriptor), GFP_KERNEL);
+	hub->descriptor = kzalloc(sizeof(*hub->descriptor), GFP_KERNEL);
 	if (!hub->descriptor) {
 		ret = -ENOMEM;
 		goto fail;
@@ -1321,13 +1330,19 @@ static int hub_configure(struct usb_hub *hub,
 
 	/* Request the entire hub descriptor.
 	 * hub->descriptor can handle USB_MAXCHILDREN ports,
-	 * but the hub can/will return fewer bytes here.
+	 * but a (non-SS) hub can/will return fewer bytes here.
 	 */
 	ret = get_hub_descriptor(hdev, hub->descriptor);
 	if (ret < 0) {
 		message = "can't read hub descriptor";
 		goto fail;
-	} else if (hub->descriptor->bNbrPorts > USB_MAXCHILDREN) {
+	}
+
+	maxchild = USB_MAXCHILDREN;
+	if (hub_is_superspeed(hdev))
+		maxchild = min_t(unsigned, maxchild, USB_SS_MAXPORTS);
+
+	if (hub->descriptor->bNbrPorts > maxchild) {
 		message = "hub has too many ports!";
 		ret = -ENODEV;
 		goto fail;
diff --git a/drivers/usb/core/of.c b/drivers/usb/core/of.c
index d787f195a9a6..d563cbcf76cf 100644
--- a/drivers/usb/core/of.c
+++ b/drivers/usb/core/of.c
@@ -53,6 +53,9 @@ EXPORT_SYMBOL_GPL(usb_of_get_child_node);
  *
  * Find the companion device from platform bus.
  *
+ * Takes a reference to the returned struct device which needs to be dropped
+ * after use.
+ *
  * Return: On success, a pointer to the companion device, %NULL on failure.
  */
 struct device *usb_of_get_companion_dev(struct device *dev)
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index d75cb8c0f7df..47903d510955 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -338,7 +338,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 	if (!urb || !urb->complete)
 		return -EINVAL;
 	if (urb->hcpriv) {
-		WARN_ONCE(1, "URB %p submitted while active\n", urb);
+		WARN_ONCE(1, "URB %pK submitted while active\n", urb);
 		return -EBUSY;
 	}
 
diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c
index 9cd8722f24f6..a3ffe97170ff 100644
--- a/drivers/usb/dwc2/params.c
+++ b/drivers/usb/dwc2/params.c
@@ -144,6 +144,8 @@ const struct of_device_id dwc2_of_match_table[] = {
 	{ .compatible = "lantiq,xrx200-usb", .data = dwc2_set_ltq_params },
 	{ .compatible = "snps,dwc2" },
 	{ .compatible = "samsung,s3c6400-hsotg" },
+	{ .compatible = "amlogic,meson8-usb",
+	  .data = dwc2_set_amlogic_params },
 	{ .compatible = "amlogic,meson8b-usb",
 	  .data = dwc2_set_amlogic_params },
 	{ .compatible = "amlogic,meson-gxbb-usb",
diff --git a/drivers/usb/dwc3/dwc3-keystone.c b/drivers/usb/dwc3/dwc3-keystone.c
index 72664700b8a2..12ee23f53cdd 100644
--- a/drivers/usb/dwc3/dwc3-keystone.c
+++ b/drivers/usb/dwc3/dwc3-keystone.c
@@ -107,6 +107,10 @@ static int kdwc3_probe(struct platform_device *pdev)
 		return PTR_ERR(kdwc->usbss);
 
 	kdwc->clk = devm_clk_get(kdwc->dev, "usb");
+	if (IS_ERR(kdwc->clk)) {
+		dev_err(kdwc->dev, "unable to get usb clock\n");
+		return PTR_ERR(kdwc->clk);
+	}
 
 	error = clk_prepare_enable(kdwc->clk);
 	if (error < 0) {
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index a15ec71d0423..fe851544d7fb 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -39,8 +39,10 @@
 #define PCI_DEVICE_ID_INTEL_APL			0x5aaa
 #define PCI_DEVICE_ID_INTEL_KBP			0xa2b0
 #define PCI_DEVICE_ID_INTEL_GLK			0x31aa
+#define PCI_DEVICE_ID_INTEL_CNPLP		0x9dee
+#define PCI_DEVICE_ID_INTEL_CNPH		0xa36e
 
-#define PCI_INTEL_BXT_DSM_UUID		"732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
+#define PCI_INTEL_BXT_DSM_GUID		"732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
 #define PCI_INTEL_BXT_FUNC_PMU_PWR	4
 #define PCI_INTEL_BXT_STATE_D0		0
 #define PCI_INTEL_BXT_STATE_D3		3
@@ -49,14 +51,14 @@
  * struct dwc3_pci - Driver private structure
  * @dwc3: child dwc3 platform_device
  * @pci: our link to PCI bus
- * @uuid: _DSM UUID
+ * @guid: _DSM GUID
  * @has_dsm_for_pm: true for devices which need to run _DSM on runtime PM
  */
 struct dwc3_pci {
 	struct platform_device *dwc3;
 	struct pci_dev *pci;
 
-	u8 uuid[16];
+	guid_t guid;
 
 	unsigned int has_dsm_for_pm:1;
 };
@@ -118,7 +120,7 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc)
 
 		if (pdev->device == PCI_DEVICE_ID_INTEL_BXT ||
 				pdev->device == PCI_DEVICE_ID_INTEL_BXT_M) {
-			acpi_str_to_uuid(PCI_INTEL_BXT_DSM_UUID, dwc->uuid);
+			guid_parse(PCI_INTEL_BXT_DSM_GUID, &dwc->guid);
 			dwc->has_dsm_for_pm = true;
 		}
 
@@ -270,6 +272,8 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBP), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_GLK), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CNPLP), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CNPH), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), },
 	{  }	/* Terminating Entry */
 };
@@ -288,7 +292,7 @@ static int dwc3_pci_dsm(struct dwc3_pci *dwc, int param)
 	tmp.type = ACPI_TYPE_INTEGER;
 	tmp.integer.value = param;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(&dwc->pci->dev), dwc->uuid,
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(&dwc->pci->dev), &dwc->guid,
 			1, PCI_INTEL_BXT_FUNC_PMU_PWR, &argv4);
 	if (!obj) {
 		dev_err(&dwc->pci->dev, "failed to evaluate _DSM\n");
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 6f6f0b3be3ad..aea9a5b948b4 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1261,14 +1261,24 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 				__dwc3_gadget_start_isoc(dwc, dep, cur_uf);
 				dep->flags &= ~DWC3_EP_PENDING_REQUEST;
 			}
+			return 0;
 		}
-		return 0;
+
+		if ((dep->flags & DWC3_EP_BUSY) &&
+		    !(dep->flags & DWC3_EP_MISSED_ISOC)) {
+			WARN_ON_ONCE(!dep->resource_index);
+			ret = __dwc3_gadget_kick_transfer(dep,
+							  dep->resource_index);
+		}
+
+		goto out;
 	}
 
 	if (!dwc3_calc_trbs_left(dep))
 		return 0;
 
 	ret = __dwc3_gadget_kick_transfer(dep, 0);
+out:
 	if (ret == -EBUSY)
 		ret = 0;
 
@@ -3026,6 +3036,15 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt)
 		return IRQ_HANDLED;
 	}
 
+	/*
+	 * With PCIe legacy interrupt, test shows that top-half irq handler can
+	 * be called again after HW interrupt deassertion. Check if bottom-half
+	 * irq event handler completes before caching new event to prevent
+	 * losing events.
+	 */
+	if (evt->flags & DWC3_EVENT_PENDING)
+		return IRQ_HANDLED;
+
 	count = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0));
 	count &= DWC3_GEVNTCOUNT_MASK;
 	if (!count)
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 49d685ad0da9..45b554032332 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -315,6 +315,9 @@ void usb_remove_function(struct usb_configuration *c, struct usb_function *f)
 	list_del(&f->list);
 	if (f->unbind)
 		f->unbind(c, f);
+
+	if (f->bind_deactivated)
+		usb_function_activate(f);
 }
 EXPORT_SYMBOL_GPL(usb_remove_function);
 
@@ -956,12 +959,8 @@ static void remove_config(struct usb_composite_dev *cdev,
 
 		f = list_first_entry(&config->functions,
 				struct usb_function, list);
-		list_del(&f->list);
-		if (f->unbind) {
-			DBG(cdev, "unbind function '%s'/%p\n", f->name, f);
-			f->unbind(config, f);
-			/* may free memory for "f" */
-		}
+
+		usb_remove_function(config, f);
 	}
 	list_del(&config->list);
 	if (config->unbind) {
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 71dd27c0d7f2..47dda3450abd 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1858,12 +1858,12 @@ static int ffs_func_eps_enable(struct ffs_function *func)
 		ep->ep->driver_data = ep;
 		ep->ep->desc = ds;
 
-		comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds +
-				USB_DT_ENDPOINT_SIZE);
-		ep->ep->maxburst = comp_desc->bMaxBurst + 1;
-
-		if (needs_comp_desc)
+		if (needs_comp_desc) {
+			comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds +
+					USB_DT_ENDPOINT_SIZE);
+			ep->ep->maxburst = comp_desc->bMaxBurst + 1;
 			ep->ep->comp_desc = comp_desc;
+		}
 
 		ret = usb_ep_enable(ep->ep);
 		if (likely(!ret)) {
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 4c8aacc232c0..74d57d6994da 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -396,7 +396,11 @@ static int fsg_set_halt(struct fsg_dev *fsg, struct usb_ep *ep)
 /* Caller must hold fsg->lock */
 static void wakeup_thread(struct fsg_common *common)
 {
-	smp_wmb();	/* ensure the write of bh->state is complete */
+	/*
+	 * Ensure the reading of thread_wakeup_needed
+	 * and the writing of bh->state are completed
+	 */
+	smp_mb();
 	/* Tell the main thread that something has happened */
 	common->thread_wakeup_needed = 1;
 	if (common->thread_task)
@@ -627,7 +631,12 @@ static int sleep_thread(struct fsg_common *common, bool can_freeze)
 	}
 	__set_current_state(TASK_RUNNING);
 	common->thread_wakeup_needed = 0;
-	smp_rmb();	/* ensure the latest bh->state is visible */
+
+	/*
+	 * Ensure the writing of thread_wakeup_needed
+	 * and the reading of bh->state are completed
+	 */
+	smp_mb();
 	return rc;
 }
 
diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c
index b4058f0000e4..6a1ce6a55158 100644
--- a/drivers/usb/gadget/function/f_phonet.c
+++ b/drivers/usb/gadget/function/f_phonet.c
@@ -281,7 +281,7 @@ static void pn_net_setup(struct net_device *dev)
 	dev->tx_queue_len	= 1;
 
 	dev->netdev_ops		= &pn_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->header_ops		= &phonet_header_ops;
 }
 
diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c
index 000677c991b0..9b0805f55ad7 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -1256,7 +1256,7 @@ static void gserial_console_exit(void)
 	struct gscons_info *info = &gscons_info;
 
 	unregister_console(&gserial_cons);
-	if (info->console_thread != NULL)
+	if (!IS_ERR_OR_NULL(info->console_thread))
 		kthread_stop(info->console_thread);
 	gs_buf_free(&info->con_buf);
 }
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index b9ca0a26cbd9..684900fcfe24 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1183,8 +1183,10 @@ dev_release (struct inode *inode, struct file *fd)
 
 	/* closing ep0 === shutdown all */
 
-	if (dev->gadget_registered)
+	if (dev->gadget_registered) {
 		usb_gadget_unregister_driver (&gadgetfs_driver);
+		dev->gadget_registered = false;
+	}
 
 	/* at this point "good" hardware has disconnected the
 	 * device from USB; the host won't see it any more.
@@ -1677,9 +1679,10 @@ static void
 gadgetfs_suspend (struct usb_gadget *gadget)
 {
 	struct dev_data		*dev = get_gadget_data (gadget);
+	unsigned long		flags;
 
 	INFO (dev, "suspended from state %d\n", dev->state);
-	spin_lock (&dev->lock);
+	spin_lock_irqsave(&dev->lock, flags);
 	switch (dev->state) {
 	case STATE_DEV_SETUP:		// VERY odd... host died??
 	case STATE_DEV_CONNECTED:
@@ -1690,7 +1693,7 @@ gadgetfs_suspend (struct usb_gadget *gadget)
 	default:
 		break;
 	}
-	spin_unlock (&dev->lock);
+	spin_unlock_irqrestore(&dev->lock, flags);
 }
 
 static struct usb_gadget_driver gadgetfs_driver = {
diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c
index c79081952ea0..7635fd7cc328 100644
--- a/drivers/usb/gadget/udc/dummy_hcd.c
+++ b/drivers/usb/gadget/udc/dummy_hcd.c
@@ -442,23 +442,16 @@ static void set_link_state(struct dummy_hcd *dum_hcd)
 		/* Report reset and disconnect events to the driver */
 		if (dum->driver && (disconnect || reset)) {
 			stop_activity(dum);
-			spin_unlock(&dum->lock);
 			if (reset)
 				usb_gadget_udc_reset(&dum->gadget, dum->driver);
 			else
 				dum->driver->disconnect(&dum->gadget);
-			spin_lock(&dum->lock);
 		}
 	} else if (dum_hcd->active != dum_hcd->old_active) {
-		if (dum_hcd->old_active && dum->driver->suspend) {
-			spin_unlock(&dum->lock);
+		if (dum_hcd->old_active && dum->driver->suspend)
 			dum->driver->suspend(&dum->gadget);
-			spin_lock(&dum->lock);
-		} else if (!dum_hcd->old_active &&  dum->driver->resume) {
-			spin_unlock(&dum->lock);
+		else if (!dum_hcd->old_active &&  dum->driver->resume)
 			dum->driver->resume(&dum->gadget);
-			spin_lock(&dum->lock);
-		}
 	}
 
 	dum_hcd->old_status = dum_hcd->port_status;
@@ -983,7 +976,9 @@ static int dummy_udc_stop(struct usb_gadget *g)
 	struct dummy_hcd	*dum_hcd = gadget_to_dummy_hcd(g);
 	struct dummy		*dum = dum_hcd->dum;
 
+	spin_lock_irq(&dum->lock);
 	dum->driver = NULL;
+	spin_unlock_irq(&dum->lock);
 
 	return 0;
 }
@@ -2008,7 +2003,7 @@ ss_hub_descriptor(struct usb_hub_descriptor *desc)
 			HUB_CHAR_COMMON_OCPM);
 	desc->bNbrPorts = 1;
 	desc->u.ss.bHubHdrDecLat = 0x04; /* Worst case: 0.4 micro sec*/
-	desc->u.ss.DeviceRemovable = 0xffff;
+	desc->u.ss.DeviceRemovable = 0;
 }
 
 static inline void hub_descriptor(struct usb_hub_descriptor *desc)
@@ -2020,8 +2015,8 @@ static inline void hub_descriptor(struct usb_hub_descriptor *desc)
 			HUB_CHAR_INDV_PORT_LPSM |
 			HUB_CHAR_COMMON_OCPM);
 	desc->bNbrPorts = 1;
-	desc->u.hs.DeviceRemovable[0] = 0xff;
-	desc->u.hs.DeviceRemovable[1] = 0xff;
+	desc->u.hs.DeviceRemovable[0] = 0;
+	desc->u.hs.DeviceRemovable[1] = 0xff;	/* PortPwrCtrlMask */
 }
 
 static int dummy_hub_control(
diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index 6cf07857eaca..f2cbd7f8005e 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -2470,11 +2470,8 @@ static void stop_activity(struct net2280 *dev, struct usb_gadget_driver *driver)
 		nuke(&dev->ep[i]);
 
 	/* report disconnect; the driver is already quiesced */
-	if (driver) {
-		spin_unlock(&dev->lock);
+	if (driver)
 		driver->disconnect(&dev->gadget);
-		spin_lock(&dev->lock);
-	}
 
 	usb_reinit(dev);
 }
@@ -3348,8 +3345,6 @@ next_endpoints:
 		BIT(PCI_RETRY_ABORT_INTERRUPT))
 
 static void handle_stat1_irqs(struct net2280 *dev, u32 stat)
-__releases(dev->lock)
-__acquires(dev->lock)
 {
 	struct net2280_ep	*ep;
 	u32			tmp, num, mask, scratch;
@@ -3390,14 +3385,12 @@ __acquires(dev->lock)
 			if (disconnect || reset) {
 				stop_activity(dev, dev->driver);
 				ep0_start(dev);
-				spin_unlock(&dev->lock);
 				if (reset)
 					usb_gadget_udc_reset
 						(&dev->gadget, dev->driver);
 				else
 					(dev->driver->disconnect)
 						(&dev->gadget);
-				spin_lock(&dev->lock);
 				return;
 			}
 		}
diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index 5a2d845fb1a6..cd4c88529721 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -623,7 +623,6 @@ static void renesas_usb3_stop_controller(struct renesas_usb3 *usb3)
 {
 	usb3_disconnect(usb3);
 	usb3_write(usb3, 0, USB3_P0_INT_ENA);
-	usb3_write(usb3, 0, USB3_PN_INT_ENA);
 	usb3_write(usb3, 0, USB3_USB_OTG_INT_ENA);
 	usb3_write(usb3, 0, USB3_USB_INT_ENA_1);
 	usb3_write(usb3, 0, USB3_USB_INT_ENA_2);
@@ -1475,7 +1474,13 @@ static void usb3_request_done_pipen(struct renesas_usb3 *usb3,
 				    struct renesas_usb3_request *usb3_req,
 				    int status)
 {
-	usb3_pn_stop(usb3);
+	unsigned long flags;
+
+	spin_lock_irqsave(&usb3->lock, flags);
+	if (usb3_pn_change(usb3, usb3_ep->num))
+		usb3_pn_stop(usb3);
+	spin_unlock_irqrestore(&usb3->lock, flags);
+
 	usb3_disable_pipe_irq(usb3, usb3_ep->num);
 	usb3_request_done(usb3_ep, usb3_req, status);
 
@@ -1504,30 +1509,46 @@ static void usb3_irq_epc_pipen_bfrdy(struct renesas_usb3 *usb3, int num)
 {
 	struct renesas_usb3_ep *usb3_ep = usb3_get_ep(usb3, num);
 	struct renesas_usb3_request *usb3_req = usb3_get_request(usb3_ep);
+	bool done = false;
 
 	if (!usb3_req)
 		return;
 
+	spin_lock(&usb3->lock);
+	if (usb3_pn_change(usb3, num))
+		goto out;
+
 	if (usb3_ep->dir_in) {
 		/* Do not stop the IN pipe here to detect LSTTR interrupt */
 		if (!usb3_write_pipe(usb3_ep, usb3_req, USB3_PN_WRITE))
 			usb3_clear_bit(usb3, PN_INT_BFRDY, USB3_PN_INT_ENA);
 	} else {
 		if (!usb3_read_pipe(usb3_ep, usb3_req, USB3_PN_READ))
-			usb3_request_done_pipen(usb3, usb3_ep, usb3_req, 0);
+			done = true;
 	}
+
+out:
+	/* need to unlock because usb3_request_done_pipen() locks it */
+	spin_unlock(&usb3->lock);
+
+	if (done)
+		usb3_request_done_pipen(usb3, usb3_ep, usb3_req, 0);
 }
 
 static void usb3_irq_epc_pipen(struct renesas_usb3 *usb3, int num)
 {
 	u32 pn_int_sta;
 
-	if (usb3_pn_change(usb3, num) < 0)
+	spin_lock(&usb3->lock);
+	if (usb3_pn_change(usb3, num) < 0) {
+		spin_unlock(&usb3->lock);
 		return;
+	}
 
 	pn_int_sta = usb3_read(usb3, USB3_PN_INT_STA);
 	pn_int_sta &= usb3_read(usb3, USB3_PN_INT_ENA);
 	usb3_write(usb3, pn_int_sta, USB3_PN_INT_STA);
+	spin_unlock(&usb3->lock);
 	if (pn_int_sta & PN_INT_LSTTR)
 		usb3_irq_epc_pipen_lsttr(usb3, num);
 	if (pn_int_sta & PN_INT_BFRDY)
@@ -1660,6 +1681,7 @@ static int usb3_disable_pipe_n(struct renesas_usb3_ep *usb3_ep)
 
 	spin_lock_irqsave(&usb3->lock, flags);
 	if (!usb3_pn_change(usb3, usb3_ep->num)) {
+		usb3_write(usb3, 0, USB3_PN_INT_ENA);
 		usb3_write(usb3, 0, USB3_PN_RAMMAP);
 		usb3_clear_bit(usb3, PN_CON_EN, USB3_PN_CON);
 	}
@@ -1799,6 +1821,9 @@ static int renesas_usb3_start(struct usb_gadget *gadget,
 	/* hook up the driver */
 	usb3->driver = driver;
 
+	pm_runtime_enable(usb3_to_dev(usb3));
+	pm_runtime_get_sync(usb3_to_dev(usb3));
+
 	renesas_usb3_init_controller(usb3);
 
 	return 0;
@@ -1807,14 +1832,14 @@ static int renesas_usb3_start(struct usb_gadget *gadget,
 static int renesas_usb3_stop(struct usb_gadget *gadget)
 {
 	struct renesas_usb3 *usb3 = gadget_to_renesas_usb3(gadget);
-	unsigned long flags;
 
-	spin_lock_irqsave(&usb3->lock, flags);
 	usb3->softconnect = false;
 	usb3->gadget.speed = USB_SPEED_UNKNOWN;
 	usb3->driver = NULL;
 	renesas_usb3_stop_controller(usb3);
-	spin_unlock_irqrestore(&usb3->lock, flags);
+
+	pm_runtime_put(usb3_to_dev(usb3));
+	pm_runtime_disable(usb3_to_dev(usb3));
 
 	return 0;
 }
@@ -1891,9 +1916,6 @@ static int renesas_usb3_remove(struct platform_device *pdev)
 
 	device_remove_file(&pdev->dev, &dev_attr_role);
 
-	pm_runtime_put(&pdev->dev);
-	pm_runtime_disable(&pdev->dev);
-
 	usb_del_gadget_udc(&usb3->gadget);
 
 	__renesas_usb3_ep_free_request(usb3->ep0_req);
@@ -2099,9 +2121,6 @@ static int renesas_usb3_probe(struct platform_device *pdev)
 
 	usb3->workaround_for_vbus = priv->workaround_for_vbus;
 
-	pm_runtime_enable(&pdev->dev);
-	pm_runtime_get_sync(&pdev->dev);
-
 	dev_info(&pdev->dev, "probed\n");
 
 	return 0;
diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c
index bc7b9be12f54..f1908ea9fbd8 100644
--- a/drivers/usb/host/ehci-platform.c
+++ b/drivers/usb/host/ehci-platform.c
@@ -384,8 +384,10 @@ static int ehci_platform_resume(struct device *dev)
 	}
 
 	companion_dev = usb_of_get_companion_dev(hcd->self.controller);
-	if (companion_dev)
+	if (companion_dev) {
 		device_pm_wait_for_dev(hcd->self.controller, companion_dev);
+		put_device(companion_dev);
+	}
 
 	ehci_resume(hcd, priv->reset_on_resume);
 	return 0;
diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index bfa7fa3d2eea..7bf78be1fd32 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -1269,7 +1269,7 @@ static void set_td_timer(struct r8a66597 *r8a66597, struct r8a66597_td *td)
 			time = 30;
 			break;
 		default:
-			time = 300;
+			time = 50;
 			break;
 		}
 
@@ -1785,6 +1785,7 @@ static void r8a66597_td_timer(unsigned long _r8a66597)
 		pipe = td->pipe;
 		pipe_stop(r8a66597, pipe);
 
+		/* Select a different address or endpoint */
 		new_td = td;
 		do {
 			list_move_tail(&new_td->queue,
@@ -1794,7 +1795,8 @@ static void r8a66597_td_timer(unsigned long _r8a66597)
 				new_td = td;
 				break;
 			}
-		} while (td != new_td && td->address == new_td->address);
+		} while (td != new_td && td->address == new_td->address &&
+			td->pipe->info.epnum == new_td->pipe->info.epnum);
 
 		start_transfer(r8a66597, new_td);
 
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 5e3e9d4c6956..0dde49c35dd2 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -419,7 +419,7 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend)
 	wait_for_completion(cmd->completion);
 
 	if (cmd->status == COMP_COMMAND_ABORTED ||
-			cmd->status == COMP_STOPPED) {
+	    cmd->status == COMP_COMMAND_RING_STOPPED) {
 		xhci_warn(xhci, "Timeout while waiting for stop endpoint command\n");
 		ret = -ETIME;
 	}
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index bbe22bcc550a..fddf2731f798 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -56,7 +56,7 @@ static struct xhci_segment *xhci_segment_alloc(struct xhci_hcd *xhci,
 	}
 
 	if (max_packet) {
-		seg->bounce_buf = kzalloc(max_packet, flags | GFP_DMA);
+		seg->bounce_buf = kzalloc(max_packet, flags);
 		if (!seg->bounce_buf) {
 			dma_pool_free(xhci->segment_pool, seg->trbs, dma);
 			kfree(seg);
@@ -1724,7 +1724,7 @@ static int scratchpad_alloc(struct xhci_hcd *xhci, gfp_t flags)
 	xhci->dcbaa->dev_context_ptrs[0] = cpu_to_le64(xhci->scratchpad->sp_dma);
 	for (i = 0; i < num_sp; i++) {
 		dma_addr_t dma;
-		void *buf = dma_alloc_coherent(dev, xhci->page_size, &dma,
+		void *buf = dma_zalloc_coherent(dev, xhci->page_size, &dma,
 				flags);
 		if (!buf)
 			goto fail_sp4;
@@ -2119,11 +2119,12 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
 {
 	u32 temp, port_offset, port_count;
 	int i;
-	u8 major_revision;
+	u8 major_revision, minor_revision;
 	struct xhci_hub *rhub;
 
 	temp = readl(addr);
 	major_revision = XHCI_EXT_PORT_MAJOR(temp);
+	minor_revision = XHCI_EXT_PORT_MINOR(temp);
 
 	if (major_revision == 0x03) {
 		rhub = &xhci->usb3_rhub;
@@ -2137,7 +2138,9 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
 		return;
 	}
 	rhub->maj_rev = XHCI_EXT_PORT_MAJOR(temp);
-	rhub->min_rev = XHCI_EXT_PORT_MINOR(temp);
+
+	if (rhub->min_rev < minor_revision)
+		rhub->min_rev = minor_revision;
 
 	/* Port offset and count in the third dword, see section 7.2 */
 	temp = readl(addr + 2);
@@ -2307,10 +2310,11 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags)
 	/* Place limits on the number of roothub ports so that the hub
 	 * descriptors aren't longer than the USB core will allocate.
 	 */
-	if (xhci->num_usb3_ports > 15) {
+	if (xhci->num_usb3_ports > USB_SS_MAXPORTS) {
 		xhci_dbg_trace(xhci, trace_xhci_dbg_init,
-				"Limiting USB 3.0 roothub ports to 15.");
-		xhci->num_usb3_ports = 15;
+				"Limiting USB 3.0 roothub ports to %u.",
+				USB_SS_MAXPORTS);
+		xhci->num_usb3_ports = USB_SS_MAXPORTS;
 	}
 	if (xhci->num_usb2_ports > USB_MAXCHILDREN) {
 		xhci_dbg_trace(xhci, trace_xhci_dbg_init,
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 7b86508ac8cf..783e6687bf4a 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -52,6 +52,7 @@
 #define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI		0x0aa8
 #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI		0x1aa8
 #define PCI_DEVICE_ID_INTEL_APL_XHCI			0x5aa8
+#define PCI_DEVICE_ID_INTEL_DNV_XHCI			0x19d0
 
 static const char hcd_name[] = "xhci_hcd";
 
@@ -166,7 +167,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 		 pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
 		 pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI ||
 		 pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI ||
-		 pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) {
+		 pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI ||
+		 pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) {
 		xhci->quirks |= XHCI_PME_STUCK_QUIRK;
 	}
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
@@ -175,7 +177,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	}
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
 	    (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
-	     pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI))
+	     pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI ||
+	     pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI))
 		xhci->quirks |= XHCI_MISSING_CAS;
 
 	if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
@@ -198,6 +201,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
 			pdev->device == 0x1042)
 		xhci->quirks |= XHCI_BROKEN_STREAMS;
+	if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
+			pdev->device == 0x1142)
+		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
 
 	if (pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241)
 		xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_7;
@@ -210,13 +216,12 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 #ifdef CONFIG_ACPI
 static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev)
 {
-	static const u8 intel_dsm_uuid[] = {
-		0xb7, 0x0c, 0x34, 0xac,	0x01, 0xe9, 0xbf, 0x45,
-		0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23,
-	};
+	static const guid_t intel_dsm_guid =
+		GUID_INIT(0xac340cb7, 0xe901, 0x45bf,
+			  0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23);
 	union acpi_object *obj;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), intel_dsm_uuid, 3, 1,
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), &intel_dsm_guid, 3, 1,
 				NULL);
 	ACPI_FREE(obj);
 }
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 7c2a9e7c8e0f..c04144b25a67 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -177,7 +177,7 @@ static int xhci_plat_probe(struct platform_device *pdev)
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
-		return -ENODEV;
+		return irq;
 
 	/*
 	 * sysdev must point to a device that is known to the system firmware
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 74bf5c60a260..03f63f50afb6 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -323,7 +323,7 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
 		if (i_cmd->status != COMP_COMMAND_ABORTED)
 			continue;
 
-		i_cmd->status = COMP_STOPPED;
+		i_cmd->status = COMP_COMMAND_RING_STOPPED;
 
 		xhci_dbg(xhci, "Turn aborted command %p to no-op\n",
 			 i_cmd->command_trb);
@@ -641,8 +641,8 @@ static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci,
 	xhci_urb_free_priv(urb_priv);
 	usb_hcd_unlink_urb_from_ep(hcd, urb);
 	spin_unlock(&xhci->lock);
-	usb_hcd_giveback_urb(hcd, urb, status);
 	trace_xhci_urb_giveback(urb);
+	usb_hcd_giveback_urb(hcd, urb, status);
 	spin_lock(&xhci->lock);
 }
 
@@ -1380,7 +1380,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 	cmd_comp_code = GET_COMP_CODE(le32_to_cpu(event->status));
 
 	/* If CMD ring stopped we own the trbs between enqueue and dequeue */
-	if (cmd_comp_code == COMP_STOPPED) {
+	if (cmd_comp_code == COMP_COMMAND_RING_STOPPED) {
 		complete_all(&xhci->cmd_ring_stop_completion);
 		return;
 	}
@@ -1436,8 +1436,8 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 		break;
 	case TRB_CMD_NOOP:
 		/* Is this an aborted command turned to NO-OP? */
-		if (cmd->status == COMP_STOPPED)
-			cmd_comp_code = COMP_STOPPED;
+		if (cmd->status == COMP_COMMAND_RING_STOPPED)
+			cmd_comp_code = COMP_COMMAND_RING_STOPPED;
 		break;
 	case TRB_RESET_EP:
 		WARN_ON(slot_id != TRB_TO_SLOT_ID(
@@ -2677,11 +2677,12 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
 	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
 	union xhci_trb *event_ring_deq;
 	irqreturn_t ret = IRQ_NONE;
+	unsigned long flags;
 	dma_addr_t deq;
 	u64 temp_64;
 	u32 status;
 
-	spin_lock(&xhci->lock);
+	spin_lock_irqsave(&xhci->lock, flags);
 	/* Check if the xHC generated the interrupt, or the irq is shared */
 	status = readl(&xhci->op_regs->status);
 	if (status == ~(u32)0) {
@@ -2707,12 +2708,9 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
 	 */
 	status |= STS_EINT;
 	writel(status, &xhci->op_regs->status);
-	/* FIXME when MSI-X is supported and there are multiple vectors */
-	/* Clear the MSI-X event interrupt status */
 
-	if (hcd->irq) {
+	if (!hcd->msi_enabled) {
 		u32 irq_pending;
-		/* Acknowledge the PCI interrupt */
 		irq_pending = readl(&xhci->ir_set->irq_pending);
 		irq_pending |= IMAN_IP;
 		writel(irq_pending, &xhci->ir_set->irq_pending);
@@ -2757,7 +2755,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
 	ret = IRQ_HANDLED;
 
 out:
-	spin_unlock(&xhci->lock);
+	spin_unlock_irqrestore(&xhci->lock, flags);
 
 	return ret;
 }
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 2d1310220832..30f47d92a610 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -359,9 +359,10 @@ static int xhci_try_enable_msi(struct usb_hcd *hcd)
 		/* fall back to msi*/
 		ret = xhci_setup_msi(xhci);
 
-	if (!ret)
-		/* hcd->irq is 0, we have MSI */
+	if (!ret) {
+		hcd->msi_enabled = 1;
 		return 0;
+	}
 
 	if (!pdev->irq) {
 		xhci_err(xhci, "No msi-x/msi found and no IRQ in BIOS\n");
@@ -1763,7 +1764,7 @@ static int xhci_configure_endpoint_result(struct xhci_hcd *xhci,
 
 	switch (*cmd_status) {
 	case COMP_COMMAND_ABORTED:
-	case COMP_STOPPED:
+	case COMP_COMMAND_RING_STOPPED:
 		xhci_warn(xhci, "Timeout while waiting for configure endpoint command\n");
 		ret = -ETIME;
 		break;
@@ -1813,7 +1814,7 @@ static int xhci_evaluate_context_result(struct xhci_hcd *xhci,
 
 	switch (*cmd_status) {
 	case COMP_COMMAND_ABORTED:
-	case COMP_STOPPED:
+	case COMP_COMMAND_RING_STOPPED:
 		xhci_warn(xhci, "Timeout while waiting for evaluate context command\n");
 		ret = -ETIME;
 		break;
@@ -3432,7 +3433,7 @@ static int xhci_discover_or_reset_device(struct usb_hcd *hcd,
 	ret = reset_device_cmd->status;
 	switch (ret) {
 	case COMP_COMMAND_ABORTED:
-	case COMP_STOPPED:
+	case COMP_COMMAND_RING_STOPPED:
 		xhci_warn(xhci, "Timeout waiting for reset device command\n");
 		ret = -ETIME;
 		goto command_cleanup;
@@ -3817,7 +3818,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
 	 */
 	switch (command->status) {
 	case COMP_COMMAND_ABORTED:
-	case COMP_STOPPED:
+	case COMP_COMMAND_RING_STOPPED:
 		xhci_warn(xhci, "Timeout while waiting for setup device command\n");
 		ret = -ETIME;
 		break;
diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c
index e9cae4d82af2..15d4e64d3b65 100644
--- a/drivers/usb/misc/chaoskey.c
+++ b/drivers/usb/misc/chaoskey.c
@@ -192,7 +192,7 @@ static int chaoskey_probe(struct usb_interface *interface,
 
 	dev->in_ep = in_ep;
 
-	if (udev->descriptor.idVendor != ALEA_VENDOR_ID)
+	if (le16_to_cpu(udev->descriptor.idVendor) != ALEA_VENDOR_ID)
 		dev->reads_started = 1;
 
 	dev->size = size;
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 77569531b78a..83b05a287b0c 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -554,7 +554,7 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd,
 			info.revision = le16_to_cpu(dev->udev->descriptor.bcdDevice);
 
 			/* 0==UNKNOWN, 1==LOW(usb1.1) ,2=FULL(usb1.1), 3=HIGH(usb2.0) */
-			info.speed = le16_to_cpu(dev->udev->speed);
+			info.speed = dev->udev->speed;
 			info.if_num = dev->interface->cur_altsetting->desc.bInterfaceNumber;
 			info.report_size = dev->report_size;
 
diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
index aa3c280fdf8d..0782ac6f5edf 100644
--- a/drivers/usb/misc/legousbtower.c
+++ b/drivers/usb/misc/legousbtower.c
@@ -926,6 +926,7 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device
 		 USB_MAJOR, dev->minor);
 
 exit:
+	kfree(get_version_reply);
 	return retval;
 
 error:
diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c
index 3c6948af726a..f019d80ca9e4 100644
--- a/drivers/usb/misc/sisusbvga/sisusb_con.c
+++ b/drivers/usb/misc/sisusbvga/sisusb_con.c
@@ -973,7 +973,7 @@ sisusbcon_set_origin(struct vc_data *c)
 
 	mutex_unlock(&sisusb->lock);
 
-	return 1;
+	return true;
 }
 
 /* Interface routine */
diff --git a/drivers/usb/misc/ucsi.c b/drivers/usb/misc/ucsi.c
index 07397bddefa3..81251aaa20f9 100644
--- a/drivers/usb/misc/ucsi.c
+++ b/drivers/usb/misc/ucsi.c
@@ -55,13 +55,13 @@ struct ucsi {
 
 static int ucsi_acpi_cmd(struct ucsi *ucsi, struct ucsi_control *ctrl)
 {
-	uuid_le uuid = UUID_LE(0x6f8398c2, 0x7ca4, 0x11e4,
-			       0xad, 0x36, 0x63, 0x10, 0x42, 0xb5, 0x00, 0x8f);
+	guid_t guid = GUID_INIT(0x6f8398c2, 0x7ca4, 0x11e4,
+				0xad, 0x36, 0x63, 0x10, 0x42, 0xb5, 0x00, 0x8f);
 	union acpi_object *obj;
 
 	ucsi->data->ctrl.raw_cmd = ctrl->raw_cmd;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(ucsi->dev), uuid.b, 1, 1, NULL);
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(ucsi->dev), &guid, 1, 1, NULL);
 	if (!obj) {
 		dev_err(ucsi->dev, "%s: failed to evaluate _DSM\n", __func__);
 		return -EIO;
diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c
index 9c7ee26ef388..bc6a9be2ccc5 100644
--- a/drivers/usb/musb/musb_dsps.c
+++ b/drivers/usb/musb/musb_dsps.c
@@ -245,6 +245,11 @@ static int dsps_check_status(struct musb *musb, void *unused)
 		dsps_mod_timer_optional(glue);
 		break;
 	case OTG_STATE_A_WAIT_BCON:
+		/* keep VBUS on for host-only mode */
+		if (musb->port_mode == MUSB_PORT_MODE_HOST) {
+			dsps_mod_timer_optional(glue);
+			break;
+		}
 		musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
 		skip_session = 1;
 		/* fall */
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index ac3a4952abb4..dbe617a735d8 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -2780,10 +2780,11 @@ int musb_host_setup(struct musb *musb, int power_budget)
 	int ret;
 	struct usb_hcd *hcd = musb->hcd;
 
-	MUSB_HST_MODE(musb);
-	musb->xceiv->otg->default_a = 1;
-	musb->xceiv->otg->state = OTG_STATE_A_IDLE;
-
+	if (musb->port_mode == MUSB_PORT_MODE_HOST) {
+		MUSB_HST_MODE(musb);
+		musb->xceiv->otg->default_a = 1;
+		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
+	}
 	otg_set_host(musb->xceiv->otg, &hcd->self);
 	hcd->self.otg_port = 1;
 	musb->xceiv->otg->host = &hcd->self;
diff --git a/drivers/usb/musb/tusb6010_omap.c b/drivers/usb/musb/tusb6010_omap.c
index 8b43c4b99f04..7870b37e0ea5 100644
--- a/drivers/usb/musb/tusb6010_omap.c
+++ b/drivers/usb/musb/tusb6010_omap.c
@@ -219,6 +219,7 @@ static int tusb_omap_dma_program(struct dma_channel *channel, u16 packet_sz,
 	u32				dma_remaining;
 	int				src_burst, dst_burst;
 	u16				csr;
+	u32				psize;
 	int				ch;
 	s8				dmareq;
 	s8				sync_dev;
@@ -390,15 +391,19 @@ static int tusb_omap_dma_program(struct dma_channel *channel, u16 packet_sz,
 
 	if (chdat->tx) {
 		/* Send transfer_packet_sz packets at a time */
-		musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET,
-			chdat->transfer_packet_sz);
+		psize = musb_readl(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET);
+		psize &= ~0x7ff;
+		psize |= chdat->transfer_packet_sz;
+		musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, psize);
 
 		musb_writel(ep_conf, TUSB_EP_TX_OFFSET,
 			TUSB_EP_CONFIG_XFR_SIZE(chdat->transfer_len));
 	} else {
 		/* Receive transfer_packet_sz packets at a time */
-		musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET,
-			chdat->transfer_packet_sz << 16);
+		psize = musb_readl(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET);
+		psize &= ~(0x7ff << 16);
+		psize |= (chdat->transfer_packet_sz << 16);
+		musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, psize);
 
 		musb_writel(ep_conf, TUSB_EP_RX_OFFSET,
 			TUSB_EP_CONFIG_XFR_SIZE(chdat->transfer_len));
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index d38780fa8788..aba74f817dc6 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -809,10 +809,10 @@ static const struct usb_device_id id_table_combined[] = {
 	{ USB_DEVICE(FTDI_VID, FTDI_PROPOX_ISPCABLEIII_PID) },
 	{ USB_DEVICE(FTDI_VID, CYBER_CORTEX_AV_PID),
 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
-	{ USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID),
-		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
-	{ USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID),
-		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
+	{ USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID, 1) },
+	{ USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID, 1) },
+	{ USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_PID, 1) },
+	{ USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_H_PID, 1) },
 	{ USB_DEVICE(FIC_VID, FIC_NEO1973_DEBUG_PID),
 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
 	{ USB_DEVICE(FTDI_VID, FTDI_OOCDLINK_PID),
@@ -1527,9 +1527,9 @@ static int set_serial_info(struct tty_struct *tty,
 					(new_serial.flags & ASYNC_FLAGS));
 	priv->custom_divisor = new_serial.custom_divisor;
 
+check_and_exit:
 	write_latency_timer(port);
 
-check_and_exit:
 	if ((old_priv.flags & ASYNC_SPD_MASK) !=
 	     (priv->flags & ASYNC_SPD_MASK)) {
 		if ((priv->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 71fb9e59db71..4fcf1cecb6d7 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -882,6 +882,8 @@
 /* Olimex */
 #define OLIMEX_VID			0x15BA
 #define OLIMEX_ARM_USB_OCD_PID		0x0003
+#define OLIMEX_ARM_USB_TINY_PID	0x0004
+#define OLIMEX_ARM_USB_TINY_H_PID	0x002a
 #define OLIMEX_ARM_USB_OCD_H_PID	0x002b
 
 /*
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 87798e625d6c..6cefb9cb133d 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -2336,8 +2336,11 @@ static void change_port_settings(struct tty_struct *tty,
 	if (!baud) {
 		/* pick a default, any default... */
 		baud = 9600;
-	} else
+	} else {
+		/* Avoid a zero divisor. */
+		baud = min(baud, 461550);
 		tty_encode_baud_rate(tty, baud, baud);
+	}
 
 	edge_port->baud_rate = baud;
 	config->wBaudRate = (__u16)((461550L + baud/2) / baud);
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 73956d48a0c5..f9734a96d516 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -197,6 +197,7 @@ static u8 ir_xbof_change(u8 xbof)
 static int ir_startup(struct usb_serial *serial)
 {
 	struct usb_irda_cs_descriptor *irda_desc;
+	int rates;
 
 	irda_desc = irda_usb_find_class_desc(serial, 0);
 	if (!irda_desc) {
@@ -205,18 +206,20 @@ static int ir_startup(struct usb_serial *serial)
 		return -ENODEV;
 	}
 
+	rates = le16_to_cpu(irda_desc->wBaudRate);
+
 	dev_dbg(&serial->dev->dev,
 		"%s - Baud rates supported:%s%s%s%s%s%s%s%s%s\n",
 		__func__,
-		(irda_desc->wBaudRate & USB_IRDA_BR_2400) ? " 2400" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_9600) ? " 9600" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_19200) ? " 19200" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_38400) ? " 38400" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_57600) ? " 57600" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_115200) ? " 115200" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_576000) ? " 576000" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_1152000) ? " 1152000" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_4000000) ? " 4000000" : "");
+		(rates & USB_IRDA_BR_2400) ? " 2400" : "",
+		(rates & USB_IRDA_BR_9600) ? " 9600" : "",
+		(rates & USB_IRDA_BR_19200) ? " 19200" : "",
+		(rates & USB_IRDA_BR_38400) ? " 38400" : "",
+		(rates & USB_IRDA_BR_57600) ? " 57600" : "",
+		(rates & USB_IRDA_BR_115200) ? " 115200" : "",
+		(rates & USB_IRDA_BR_576000) ? " 576000" : "",
+		(rates & USB_IRDA_BR_1152000) ? " 1152000" : "",
+		(rates & USB_IRDA_BR_4000000) ? " 4000000" : "");
 
 	switch (irda_desc->bmAdditionalBOFs) {
 	case USB_IRDA_AB_48:
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index edbc81f205c2..70f346f1aa86 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -189,7 +189,7 @@ static int mct_u232_set_baud_rate(struct tty_struct *tty,
 		return -ENOMEM;
 
 	divisor = mct_u232_calculate_baud_rate(serial, value, &speed);
-	put_unaligned_le32(cpu_to_le32(divisor), buf);
+	put_unaligned_le32(divisor, buf);
 	rc = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
 				MCT_U232_SET_BAUD_RATE_REQUEST,
 				MCT_U232_SET_REQUEST_TYPE,
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index af67a0de6b5d..3bf61acfc26b 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -281,6 +281,7 @@ static void option_instat_callback(struct urb *urb);
 #define TELIT_PRODUCT_LE922_USBCFG0		0x1042
 #define TELIT_PRODUCT_LE922_USBCFG3		0x1043
 #define TELIT_PRODUCT_LE922_USBCFG5		0x1045
+#define TELIT_PRODUCT_ME910			0x1100
 #define TELIT_PRODUCT_LE920			0x1200
 #define TELIT_PRODUCT_LE910			0x1201
 #define TELIT_PRODUCT_LE910_USBCFG4		0x1206
@@ -640,6 +641,11 @@ static const struct option_blacklist_info simcom_sim7100e_blacklist = {
 	.reserved = BIT(5) | BIT(6),
 };
 
+static const struct option_blacklist_info telit_me910_blacklist = {
+	.sendsetup = BIT(0),
+	.reserved = BIT(1) | BIT(3),
+};
+
 static const struct option_blacklist_info telit_le910_blacklist = {
 	.sendsetup = BIT(0),
 	.reserved = BIT(1) | BIT(2),
@@ -1235,6 +1241,8 @@ static const struct usb_device_id option_ids[] = {
 		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
 	{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff),
 		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
+	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
+		.driver_info = (kernel_ulong_t)&telit_me910_blacklist },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
 		.driver_info = (kernel_ulong_t)&telit_le910_blacklist },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4),
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index 38b3f0d8cd58..fd509ed6cf70 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = {
 	{DEVICE_SWI(0x1199, 0x9071)},	/* Sierra Wireless MC74xx */
 	{DEVICE_SWI(0x1199, 0x9078)},	/* Sierra Wireless EM74xx */
 	{DEVICE_SWI(0x1199, 0x9079)},	/* Sierra Wireless EM74xx */
+	{DEVICE_SWI(0x1199, 0x907a)},	/* Sierra Wireless EM74xx QDL */
+	{DEVICE_SWI(0x1199, 0x907b)},	/* Sierra Wireless EM74xx */
 	{DEVICE_SWI(0x413c, 0x81a2)},	/* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a3)},	/* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a4)},	/* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
diff --git a/drivers/usb/storage/ene_ub6250.c b/drivers/usb/storage/ene_ub6250.c
index 369f3c24815a..44af719194b2 100644
--- a/drivers/usb/storage/ene_ub6250.c
+++ b/drivers/usb/storage/ene_ub6250.c
@@ -446,6 +446,10 @@ struct ms_lib_ctrl {
 #define SD_BLOCK_LEN  9
 
 struct ene_ub6250_info {
+
+	/* I/O bounce buffer */
+	u8		*bbuf;
+
 	/* for 6250 code */
 	struct SD_STATUS	SD_Status;
 	struct MS_STATUS	MS_Status;
@@ -493,8 +497,11 @@ static int ene_load_bincode(struct us_data *us, unsigned char flag);
 
 static void ene_ub6250_info_destructor(void *extra)
 {
+	struct ene_ub6250_info *info = (struct ene_ub6250_info *) extra;
+
 	if (!extra)
 		return;
+	kfree(info->bbuf);
 }
 
 static int ene_send_scsi_cmd(struct us_data *us, u8 fDir, void *buf, int use_sg)
@@ -860,8 +867,9 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr,
 		u8 PageNum, u32 *PageBuf, struct ms_lib_type_extdat *ExtraDat)
 {
 	struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf;
+	struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra;
+	u8 *bbuf = info->bbuf;
 	int result;
-	u8 ExtBuf[4];
 	u32 bn = PhyBlockAddr * 0x20 + PageNum;
 
 	result = ene_load_bincode(us, MS_RW_PATTERN);
@@ -901,7 +909,7 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr,
 	bcb->CDB[2]     = (unsigned char)(PhyBlockAddr>>16);
 	bcb->CDB[6]     = 0x01;
 
-	result = ene_send_scsi_cmd(us, FDIR_READ, &ExtBuf, 0);
+	result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0);
 	if (result != USB_STOR_XFER_GOOD)
 		return USB_STOR_TRANSPORT_ERROR;
 
@@ -910,9 +918,9 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr,
 	ExtraDat->status0  = 0x10;  /* Not yet,fireware support */
 
 	ExtraDat->status1  = 0x00;  /* Not yet,fireware support */
-	ExtraDat->ovrflg   = ExtBuf[0];
-	ExtraDat->mngflg   = ExtBuf[1];
-	ExtraDat->logadr   = memstick_logaddr(ExtBuf[2], ExtBuf[3]);
+	ExtraDat->ovrflg   = bbuf[0];
+	ExtraDat->mngflg   = bbuf[1];
+	ExtraDat->logadr   = memstick_logaddr(bbuf[2], bbuf[3]);
 
 	return USB_STOR_TRANSPORT_GOOD;
 }
@@ -1332,8 +1340,9 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock,
 				u8 PageNum, struct ms_lib_type_extdat *ExtraDat)
 {
 	struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf;
+	struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra;
+	u8 *bbuf = info->bbuf;
 	int result;
-	u8 ExtBuf[4];
 
 	memset(bcb, 0, sizeof(struct bulk_cb_wrap));
 	bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN);
@@ -1347,7 +1356,7 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock,
 	bcb->CDB[2]     = (unsigned char)(PhyBlock>>16);
 	bcb->CDB[6]     = 0x01;
 
-	result = ene_send_scsi_cmd(us, FDIR_READ, &ExtBuf, 0);
+	result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0);
 	if (result != USB_STOR_XFER_GOOD)
 		return USB_STOR_TRANSPORT_ERROR;
 
@@ -1355,9 +1364,9 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock,
 	ExtraDat->intr     = 0x80;  /* Not yet, waiting for fireware support */
 	ExtraDat->status0  = 0x10;  /* Not yet, waiting for fireware support */
 	ExtraDat->status1  = 0x00;  /* Not yet, waiting for fireware support */
-	ExtraDat->ovrflg   = ExtBuf[0];
-	ExtraDat->mngflg   = ExtBuf[1];
-	ExtraDat->logadr   = memstick_logaddr(ExtBuf[2], ExtBuf[3]);
+	ExtraDat->ovrflg   = bbuf[0];
+	ExtraDat->mngflg   = bbuf[1];
+	ExtraDat->logadr   = memstick_logaddr(bbuf[2], bbuf[3]);
 
 	return USB_STOR_TRANSPORT_GOOD;
 }
@@ -1556,9 +1565,9 @@ static int ms_lib_scan_logicalblocknumber(struct us_data *us, u16 btBlk1st)
 	u16 PhyBlock, newblk, i;
 	u16 LogStart, LogEnde;
 	struct ms_lib_type_extdat extdat;
-	u8 buf[0x200];
 	u32 count = 0, index = 0;
 	struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra;
+	u8 *bbuf = info->bbuf;
 
 	for (PhyBlock = 0; PhyBlock < info->MS_Lib.NumberOfPhyBlock;) {
 		ms_lib_phy_to_log_range(PhyBlock, &LogStart, &LogEnde);
@@ -1572,14 +1581,16 @@ static int ms_lib_scan_logicalblocknumber(struct us_data *us, u16 btBlk1st)
 			}
 
 			if (count == PhyBlock) {
-				ms_lib_read_extrablock(us, PhyBlock, 0, 0x80, &buf);
+				ms_lib_read_extrablock(us, PhyBlock, 0, 0x80,
+						bbuf);
 				count += 0x80;
 			}
 			index = (PhyBlock % 0x80) * 4;
 
-			extdat.ovrflg = buf[index];
-			extdat.mngflg = buf[index+1];
-			extdat.logadr = memstick_logaddr(buf[index+2], buf[index+3]);
+			extdat.ovrflg = bbuf[index];
+			extdat.mngflg = bbuf[index+1];
+			extdat.logadr = memstick_logaddr(bbuf[index+2],
+					bbuf[index+3]);
 
 			if ((extdat.ovrflg & MS_REG_OVR_BKST) != MS_REG_OVR_BKST_OK) {
 				ms_lib_setacquired_errorblock(us, PhyBlock);
@@ -2062,9 +2073,9 @@ static int ene_ms_init(struct us_data *us)
 {
 	struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf;
 	int result;
-	u8 buf[0x200];
 	u16 MSP_BlockSize, MSP_UserAreaBlocks;
 	struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra;
+	u8 *bbuf = info->bbuf;
 
 	printk(KERN_INFO "transport --- ENE_MSInit\n");
 
@@ -2083,13 +2094,13 @@ static int ene_ms_init(struct us_data *us)
 	bcb->CDB[0]     = 0xF1;
 	bcb->CDB[1]     = 0x01;
 
-	result = ene_send_scsi_cmd(us, FDIR_READ, &buf, 0);
+	result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0);
 	if (result != USB_STOR_XFER_GOOD) {
 		printk(KERN_ERR "Execution MS Init Code Fail !!\n");
 		return USB_STOR_TRANSPORT_ERROR;
 	}
 	/* the same part to test ENE */
-	info->MS_Status = *(struct MS_STATUS *)&buf[0];
+	info->MS_Status = *(struct MS_STATUS *) bbuf;
 
 	if (info->MS_Status.Insert && info->MS_Status.Ready) {
 		printk(KERN_INFO "Insert     = %x\n", info->MS_Status.Insert);
@@ -2098,15 +2109,15 @@ static int ene_ms_init(struct us_data *us)
 		printk(KERN_INFO "IsMSPHG    = %x\n", info->MS_Status.IsMSPHG);
 		printk(KERN_INFO "WtP= %x\n", info->MS_Status.WtP);
 		if (info->MS_Status.IsMSPro) {
-			MSP_BlockSize      = (buf[6] << 8) | buf[7];
-			MSP_UserAreaBlocks = (buf[10] << 8) | buf[11];
+			MSP_BlockSize      = (bbuf[6] << 8) | bbuf[7];
+			MSP_UserAreaBlocks = (bbuf[10] << 8) | bbuf[11];
 			info->MSP_TotalBlock = MSP_BlockSize * MSP_UserAreaBlocks;
 		} else {
 			ms_card_init(us); /* Card is MS (to ms.c)*/
 		}
 		usb_stor_dbg(us, "MS Init Code OK !!\n");
 	} else {
-		usb_stor_dbg(us, "MS Card Not Ready --- %x\n", buf[0]);
+		usb_stor_dbg(us, "MS Card Not Ready --- %x\n", bbuf[0]);
 		return USB_STOR_TRANSPORT_ERROR;
 	}
 
@@ -2116,9 +2127,9 @@ static int ene_ms_init(struct us_data *us)
 static int ene_sd_init(struct us_data *us)
 {
 	int result;
-	u8  buf[0x200];
 	struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf;
 	struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra;
+	u8 *bbuf = info->bbuf;
 
 	usb_stor_dbg(us, "transport --- ENE_SDInit\n");
 	/* SD Init Part-1 */
@@ -2152,17 +2163,17 @@ static int ene_sd_init(struct us_data *us)
 	bcb->Flags              = US_BULK_FLAG_IN;
 	bcb->CDB[0]             = 0xF1;
 
-	result = ene_send_scsi_cmd(us, FDIR_READ, &buf, 0);
+	result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0);
 	if (result != USB_STOR_XFER_GOOD) {
 		usb_stor_dbg(us, "Execution SD Init Code Fail !!\n");
 		return USB_STOR_TRANSPORT_ERROR;
 	}
 
-	info->SD_Status =  *(struct SD_STATUS *)&buf[0];
+	info->SD_Status =  *(struct SD_STATUS *) bbuf;
 	if (info->SD_Status.Insert && info->SD_Status.Ready) {
 		struct SD_STATUS *s = &info->SD_Status;
 
-		ene_get_card_status(us, (unsigned char *)&buf);
+		ene_get_card_status(us, bbuf);
 		usb_stor_dbg(us, "Insert     = %x\n", s->Insert);
 		usb_stor_dbg(us, "Ready      = %x\n", s->Ready);
 		usb_stor_dbg(us, "IsMMC      = %x\n", s->IsMMC);
@@ -2170,7 +2181,7 @@ static int ene_sd_init(struct us_data *us)
 		usb_stor_dbg(us, "HiSpeed    = %x\n", s->HiSpeed);
 		usb_stor_dbg(us, "WtP        = %x\n", s->WtP);
 	} else {
-		usb_stor_dbg(us, "SD Card Not Ready --- %x\n", buf[0]);
+		usb_stor_dbg(us, "SD Card Not Ready --- %x\n", bbuf[0]);
 		return USB_STOR_TRANSPORT_ERROR;
 	}
 	return USB_STOR_TRANSPORT_GOOD;
@@ -2180,13 +2191,15 @@ static int ene_sd_init(struct us_data *us)
 static int ene_init(struct us_data *us)
 {
 	int result;
-	u8  misc_reg03 = 0;
+	u8  misc_reg03;
 	struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra);
+	u8 *bbuf = info->bbuf;
 
-	result = ene_get_card_type(us, REG_CARD_STATUS, &misc_reg03);
+	result = ene_get_card_type(us, REG_CARD_STATUS, bbuf);
 	if (result != USB_STOR_XFER_GOOD)
 		return USB_STOR_TRANSPORT_ERROR;
 
+	misc_reg03 = bbuf[0];
 	if (misc_reg03 & 0x01) {
 		if (!info->SD_Status.Ready) {
 			result = ene_sd_init(us);
@@ -2303,8 +2316,9 @@ static int ene_ub6250_probe(struct usb_interface *intf,
 			 const struct usb_device_id *id)
 {
 	int result;
-	u8  misc_reg03 = 0;
+	u8  misc_reg03;
 	struct us_data *us;
+	struct ene_ub6250_info *info;
 
 	result = usb_stor_probe1(&us, intf, id,
 		   (id - ene_ub6250_usb_ids) + ene_ub6250_unusual_dev_list,
@@ -2313,11 +2327,16 @@ static int ene_ub6250_probe(struct usb_interface *intf,
 		return result;
 
 	/* FIXME: where should the code alloc extra buf ? */
-	if (!us->extra) {
-		us->extra = kzalloc(sizeof(struct ene_ub6250_info), GFP_KERNEL);
-		if (!us->extra)
-			return -ENOMEM;
-		us->extra_destructor = ene_ub6250_info_destructor;
+	us->extra = kzalloc(sizeof(struct ene_ub6250_info), GFP_KERNEL);
+	if (!us->extra)
+		return -ENOMEM;
+	us->extra_destructor = ene_ub6250_info_destructor;
+
+	info = (struct ene_ub6250_info *)(us->extra);
+	info->bbuf = kmalloc(512, GFP_KERNEL);
+	if (!info->bbuf) {
+		kfree(us->extra);
+		return -ENOMEM;
 	}
 
 	us->transport_name = "ene_ub6250";
@@ -2329,12 +2348,13 @@ static int ene_ub6250_probe(struct usb_interface *intf,
 		return result;
 
 	/* probe card type */
-	result = ene_get_card_type(us, REG_CARD_STATUS, &misc_reg03);
+	result = ene_get_card_type(us, REG_CARD_STATUS, info->bbuf);
 	if (result != USB_STOR_XFER_GOOD) {
 		usb_stor_disconnect(intf);
 		return USB_STOR_TRANSPORT_ERROR;
 	}
 
+	misc_reg03 = info->bbuf[0];
 	if (!(misc_reg03 & 0x01)) {
 		pr_info("ums_eneub6250: This driver only supports SD/MS cards. "
 			"It does not support SM cards.\n");
diff --git a/drivers/usb/typec/typec_wcove.c b/drivers/usb/typec/typec_wcove.c
index d5a7b21fa3f1..c2ce25289027 100644
--- a/drivers/usb/typec/typec_wcove.c
+++ b/drivers/usb/typec/typec_wcove.c
@@ -105,8 +105,8 @@ enum wcove_typec_role {
 	WCOVE_ROLE_DEVICE,
 };
 
-static uuid_le uuid = UUID_LE(0x482383f0, 0x2876, 0x4e49,
-			      0x86, 0x85, 0xdb, 0x66, 0x21, 0x1a, 0xf0, 0x37);
+static guid_t guid = GUID_INIT(0x482383f0, 0x2876, 0x4e49,
+			       0x86, 0x85, 0xdb, 0x66, 0x21, 0x1a, 0xf0, 0x37);
 
 static int wcove_typec_func(struct wcove_typec *wcove,
 			    enum wcove_typec_func func, int param)
@@ -118,7 +118,7 @@ static int wcove_typec_func(struct wcove_typec *wcove,
 	tmp.type = ACPI_TYPE_INTEGER;
 	tmp.integer.value = param;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(wcove->dev), uuid.b, 1, func,
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(wcove->dev), &guid, 1, func,
 				&argv4);
 	if (!obj) {
 		dev_err(wcove->dev, "%s: failed to evaluate _DSM\n", __func__);
@@ -314,7 +314,7 @@ static int wcove_typec_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	if (!acpi_check_dsm(ACPI_HANDLE(&pdev->dev), uuid.b, 0, 0x1f)) {
+	if (!acpi_check_dsm(ACPI_HANDLE(&pdev->dev), &guid, 0, 0x1f)) {
 		dev_err(&pdev->dev, "Missing _DSM functions\n");
 		return -ENODEV;
 	}
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index 5d8b2c261940..0585078638db 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -235,14 +235,19 @@ done:
 
 static inline void hub_descriptor(struct usb_hub_descriptor *desc)
 {
+	int width;
+
 	memset(desc, 0, sizeof(*desc));
 	desc->bDescriptorType = USB_DT_HUB;
-	desc->bDescLength = 9;
 	desc->wHubCharacteristics = cpu_to_le16(
 		HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_COMMON_OCPM);
+
 	desc->bNbrPorts = VHCI_HC_PORTS;
-	desc->u.hs.DeviceRemovable[0] = 0xff;
-	desc->u.hs.DeviceRemovable[1] = 0xff;
+	BUILD_BUG_ON(VHCI_HC_PORTS > USB_MAXCHILDREN);
+	width = desc->bNbrPorts / 8 + 1;
+	desc->bDescLength = USB_DT_HUB_NONVAR_SIZE + 2 * width;
+	memset(&desc->u.hs.DeviceRemovable[0], 0, width);
+	memset(&desc->u.hs.DeviceRemovable[width], 0xff, width);
 }
 
 static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c
index 6345e85822a4..a50cf45e530f 100644
--- a/drivers/uwb/i1480/dfu/usb.c
+++ b/drivers/uwb/i1480/dfu/usb.c
@@ -341,6 +341,7 @@ error_submit_ep1:
 static
 int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id)
 {
+	struct usb_device *udev = interface_to_usbdev(iface);
 	struct i1480_usb *i1480_usb;
 	struct i1480 *i1480;
 	struct device *dev = &iface->dev;
@@ -352,8 +353,8 @@ int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id)
 			iface->cur_altsetting->desc.bInterfaceNumber);
 		goto error;
 	}
-	if (iface->num_altsetting > 1
-	    && interface_to_usbdev(iface)->descriptor.idProduct == 0xbabe) {
+	if (iface->num_altsetting > 1 &&
+			le16_to_cpu(udev->descriptor.idProduct) == 0xbabe) {
 		/* Need altsetting #1 [HW QUIRK] or EP1 won't work */
 		result = usb_set_interface(interface_to_usbdev(iface), 0, 1);
 		if (result < 0)
diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c
index 27c89cd5d70b..4797217e5e72 100644
--- a/drivers/vfio/virqfd.c
+++ b/drivers/vfio/virqfd.c
@@ -43,7 +43,7 @@ static void virqfd_deactivate(struct virqfd *virqfd)
 	queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
 }
 
-static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
 	unsigned long flags = (unsigned long)key;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 042030e5a035..e4613a3c362d 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -165,7 +165,7 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
 	add_wait_queue(wqh, &poll->wait);
 }
 
-static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
+static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync,
 			     void *key)
 {
 	struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index f55671d53f28..f72095868b93 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -31,7 +31,7 @@ struct vhost_work {
 struct vhost_poll {
 	poll_table                table;
 	wait_queue_head_t        *wqh;
-	wait_queue_t              wait;
+	wait_queue_entry_t              wait;
 	struct vhost_work	  work;
 	unsigned long		  mask;
 	struct vhost_dev	 *dev;
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 9ad3c17d6456..445b2c230b56 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_VGASTATE)            += vgastate.o
 obj-$(CONFIG_HDMI)                += hdmi.o
 
 obj-$(CONFIG_VT)		  += console/
+obj-$(CONFIG_FB_STI)		  += console/
 obj-$(CONFIG_LOGO)		  += logo/
 obj-y				  += backlight/
 
diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index 5b71bd905a60..2111d06f8c81 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -6,7 +6,7 @@ menu "Console display driver support"
 
 config VGA_CONSOLE
 	bool "VGA text console" if EXPERT || !X86
-	depends on !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && \
+	depends on !4xx && !PPC_8xx && !SPARC && !M68K && !PARISC && !FRV && \
 		!SUPERH && !BLACKFIN && !AVR32 && !MN10300 && !CRIS && \
 		(!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) && \
 		!ARM64 && !ARC && !MICROBLAZE && !OPENRISC
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 922e4eaed9c5..5c6696bb56da 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -689,8 +689,6 @@ config FB_STI
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
-	select STI_CONSOLE
-	select VT
 	default y
 	---help---
 	  STI refers to the HP "Standard Text Interface" which is a set of
diff --git a/drivers/video/fbdev/acornfb.c b/drivers/video/fbdev/acornfb.c
index fb75b7e5a19a..0c325b4da61d 100644
--- a/drivers/video/fbdev/acornfb.c
+++ b/drivers/video/fbdev/acornfb.c
@@ -101,7 +101,7 @@ extern unsigned int vram_size;	/* set by setup.c */
 #ifdef HAS_VIDC20
 #include <mach/acornfb.h>
 
-#define MAX_SIZE	2*1024*1024
+#define MAX_SIZE	(2*1024*1024)
 
 /* VIDC20 has a different set of rules from the VIDC:
  *  hcr  : must be multiple of 4
@@ -162,7 +162,7 @@ static void acornfb_set_timing(struct fb_info *info)
 	if (memcmp(&current_vidc, &vidc, sizeof(vidc))) {
 		current_vidc = vidc;
 
-		vidc_writel(VIDC20_CTRL| vidc.control);
+		vidc_writel(VIDC20_CTRL | vidc.control);
 		vidc_writel(0xd0000000 | vidc.pll_ctl);
 		vidc_writel(0x80000000 | vidc.h_cycle);
 		vidc_writel(0x81000000 | vidc.h_sync_width);
@@ -297,7 +297,7 @@ acornfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 		pal.p = 0;
 		vidc_writel(0x10000000);
 		for (i = 0; i < 256; i += 1) {
-			pal.vidc20.red   = current_par.palette[ i       & 31].vidc20.red;
+			pal.vidc20.red   = current_par.palette[i       & 31].vidc20.red;
 			pal.vidc20.green = current_par.palette[(i >> 1) & 31].vidc20.green;
 			pal.vidc20.blue  = current_par.palette[(i >> 2) & 31].vidc20.blue;
 			vidc_writel(pal.p);
@@ -1043,8 +1043,7 @@ static int acornfb_probe(struct platform_device *dev)
 		base = dma_alloc_wc(current_par.dev, size, &handle,
 				    GFP_KERNEL);
 		if (base == NULL) {
-			printk(KERN_ERR "acornfb: unable to allocate screen "
-			       "memory\n");
+			printk(KERN_ERR "acornfb: unable to allocate screen memory\n");
 			return -ENOMEM;
 		}
 
@@ -1103,8 +1102,7 @@ static int acornfb_probe(struct platform_device *dev)
 	v_sync = h_sync / (fb_info.var.yres + fb_info.var.upper_margin +
 		 fb_info.var.lower_margin + fb_info.var.vsync_len);
 
-	printk(KERN_INFO "Acornfb: %dkB %cRAM, %s, using %dx%d, "
-		"%d.%03dkHz, %dHz\n",
+	printk(KERN_INFO "Acornfb: %dkB %cRAM, %s, using %dx%d, %d.%03dkHz, %dHz\n",
 		fb_info.fix.smem_len / 1024,
 		current_par.using_vram ? 'V' : 'D',
 		VIDC_NAME, fb_info.var.xres, fb_info.var.yres,
diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c
index 0fab92c62828..ffc2c33c6cef 100644
--- a/drivers/video/fbdev/amba-clcd.c
+++ b/drivers/video/fbdev/amba-clcd.c
@@ -881,8 +881,8 @@ static int clcdfb_of_dma_setup(struct clcd_fb *fb)
 	if (err)
 		return err;
 
-	framesize = fb->panel->mode.xres * fb->panel->mode.yres *
-			fb->panel->bpp / 8;
+	framesize = PAGE_ALIGN(fb->panel->mode.xres * fb->panel->mode.yres *
+			fb->panel->bpp / 8);
 	fb->fb.screen_base = dma_alloc_coherent(&fb->dev->dev, framesize,
 			&dma, GFP_KERNEL);
 	if (!fb->fb.screen_base)
diff --git a/drivers/video/fbdev/arcfb.c b/drivers/video/fbdev/arcfb.c
index 1928cb2b5386..7e87d0d61658 100644
--- a/drivers/video/fbdev/arcfb.c
+++ b/drivers/video/fbdev/arcfb.c
@@ -645,17 +645,17 @@ module_param(nosplash, uint, 0);
 MODULE_PARM_DESC(nosplash, "Disable doing the splash screen");
 module_param(arcfb_enable, uint, 0);
 MODULE_PARM_DESC(arcfb_enable, "Enable communication with Arc board");
-module_param(dio_addr, ulong, 0);
+module_param_hw(dio_addr, ulong, ioport, 0);
 MODULE_PARM_DESC(dio_addr, "IO address for data, eg: 0x480");
-module_param(cio_addr, ulong, 0);
+module_param_hw(cio_addr, ulong, ioport, 0);
 MODULE_PARM_DESC(cio_addr, "IO address for control, eg: 0x400");
-module_param(c2io_addr, ulong, 0);
+module_param_hw(c2io_addr, ulong, ioport, 0);
 MODULE_PARM_DESC(c2io_addr, "IO address for secondary control, eg: 0x408");
 module_param(splashval, ulong, 0);
 MODULE_PARM_DESC(splashval, "Splash pattern: 0xFF is black, 0x00 is green");
 module_param(tuhold, ulong, 0);
 MODULE_PARM_DESC(tuhold, "Time to hold between strobing data to Arc board");
-module_param(irq, uint, 0);
+module_param_hw(irq, uint, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ for the Arc board");
 
 module_init(arcfb_init);
diff --git a/drivers/video/fbdev/aty/radeon_base.c b/drivers/video/fbdev/aty/radeon_base.c
index 218339a4edaa..6b4c7872b375 100644
--- a/drivers/video/fbdev/aty/radeon_base.c
+++ b/drivers/video/fbdev/aty/radeon_base.c
@@ -2453,8 +2453,8 @@ static int radeonfb_pci_register(struct pci_dev *pdev,
 		err |= sysfs_create_bin_file(&rinfo->pdev->dev.kobj,
 						&edid2_attr);
 	if (err)
-		pr_warning("%s() Creating sysfs files failed, continuing\n",
-			   __func__);
+		pr_warn("%s() Creating sysfs files failed, continuing\n",
+			__func__);
 
 	/* save current mode regs before we switch into the new one
 	 * so we can restore this upon __exit
diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c
index 62c0cf79674f..41d7979d81c5 100644
--- a/drivers/video/fbdev/core/fbmon.c
+++ b/drivers/video/fbdev/core/fbmon.c
@@ -1048,7 +1048,7 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs)
 
 	for (i = 0; i < (128 - edid[2]) / DETAILED_TIMING_DESCRIPTION_SIZE;
 	     i++, block += DETAILED_TIMING_DESCRIPTION_SIZE)
-		if (PIXEL_CLOCK)
+		if (PIXEL_CLOCK != 0)
 			edt[num++] = block - edid;
 
 	/* Yikes, EDID data is totally useless */
@@ -1073,9 +1073,9 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs)
 	for (i = specs->modedb_len + num; i < specs->modedb_len + num + svd_n; i++) {
 		int idx = svd[i - specs->modedb_len - num];
 		if (!idx || idx >= ARRAY_SIZE(cea_modes)) {
-			pr_warning("Reserved SVD code %d\n", idx);
+			pr_warn("Reserved SVD code %d\n", idx);
 		} else if (!cea_modes[idx].xres) {
-			pr_warning("Unimplemented SVD code %d\n", idx);
+			pr_warn("Unimplemented SVD code %d\n", idx);
 		} else {
 			memcpy(&m[i], cea_modes + idx, sizeof(m[i]));
 			pr_debug("Adding SVD #%d: %ux%u@%u\n", idx,
diff --git a/drivers/video/fbdev/i810/i810_main.c b/drivers/video/fbdev/i810/i810_main.c
index 483ab2592d0c..2488baab7c89 100644
--- a/drivers/video/fbdev/i810/i810_main.c
+++ b/drivers/video/fbdev/i810/i810_main.c
@@ -81,7 +81,7 @@ static u32 voffset;
 static int i810fb_cursor(struct fb_info *info, struct fb_cursor *cursor);
 static int i810fb_init_pci(struct pci_dev *dev,
 			   const struct pci_device_id *entry);
-static void __exit i810fb_remove_pci(struct pci_dev *dev);
+static void i810fb_remove_pci(struct pci_dev *dev);
 static int i810fb_resume(struct pci_dev *dev);
 static int i810fb_suspend(struct pci_dev *dev, pm_message_t state);
 
@@ -128,7 +128,7 @@ static struct pci_driver i810fb_driver = {
 	.name     =	"i810fb",
 	.id_table =	i810fb_pci_tbl,
 	.probe    =	i810fb_init_pci,
-	.remove   =	__exit_p(i810fb_remove_pci),
+	.remove   =	i810fb_remove_pci,
 	.suspend  =     i810fb_suspend,
 	.resume   =     i810fb_resume,
 };
@@ -2123,7 +2123,7 @@ static void i810fb_release_resource(struct fb_info *info,
 
 }
 
-static void __exit i810fb_remove_pci(struct pci_dev *dev)
+static void i810fb_remove_pci(struct pci_dev *dev)
 {
 	struct fb_info *info = pci_get_drvdata(dev);
 	struct i810fb_par *par = info->par;
diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c
index 1b0faadb3080..c166e0725be5 100644
--- a/drivers/video/fbdev/imxfb.c
+++ b/drivers/video/fbdev/imxfb.c
@@ -117,6 +117,9 @@
 
 #define IMXFB_LSCR1_DEFAULT 0x00120300
 
+#define LCDC_LAUSCR	0x80
+#define LAUSCR_AUS_MODE	(1<<31)
+
 /* Used fb-mode. Can be set on kernel command line, therefore file-static. */
 static const char *fb_mode;
 
@@ -158,6 +161,7 @@ struct imxfb_info {
 	dma_addr_t		dbar2;
 
 	u_int			pcr;
+	u_int			lauscr;
 	u_int			pwmr;
 	u_int			lscr1;
 	u_int			dmacr;
@@ -422,6 +426,11 @@ static int imxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	pcr |= imxfb_mode->pcr & ~(0x3f | (7 << 25));
 
 	fbi->pcr = pcr;
+	/*
+	 * The LCDC AUS Mode Control Register does not exist on imx1.
+	 */
+	if (!is_imx1_fb(fbi) && imxfb_mode->aus_mode)
+		fbi->lauscr = LAUSCR_AUS_MODE;
 
 	/*
 	 * Copy the RGB parameters for this display
@@ -638,6 +647,9 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf
 	if (fbi->dmacr)
 		writel(fbi->dmacr, fbi->regs + LCDC_DMACR);
 
+	if (fbi->lauscr)
+		writel(fbi->lauscr, fbi->regs + LCDC_LAUSCR);
+
 	return 0;
 }
 
@@ -734,6 +746,11 @@ static int imxfb_of_read_mode(struct device *dev, struct device_node *np,
 	imxfb_mode->bpp = bpp;
 	imxfb_mode->pcr = pcr;
 
+	/*
+	 * fsl,aus-mode is optional
+	 */
+	imxfb_mode->aus_mode = of_property_read_bool(np, "fsl,aus-mode");
+
 	return 0;
 }
 
diff --git a/drivers/video/fbdev/n411.c b/drivers/video/fbdev/n411.c
index 053deacad7cc..a3677313396e 100644
--- a/drivers/video/fbdev/n411.c
+++ b/drivers/video/fbdev/n411.c
@@ -193,11 +193,11 @@ module_exit(n411_exit);
 
 module_param(nosplash, uint, 0);
 MODULE_PARM_DESC(nosplash, "Disable doing the splash screen");
-module_param(dio_addr, ulong, 0);
+module_param_hw(dio_addr, ulong, ioport, 0);
 MODULE_PARM_DESC(dio_addr, "IO address for data, eg: 0x480");
-module_param(cio_addr, ulong, 0);
+module_param_hw(cio_addr, ulong, ioport, 0);
 MODULE_PARM_DESC(cio_addr, "IO address for control, eg: 0x400");
-module_param(c2io_addr, ulong, 0);
+module_param_hw(c2io_addr, ulong, ioport, 0);
 MODULE_PARM_DESC(c2io_addr, "IO address for secondary control, eg: 0x408");
 module_param(splashval, ulong, 0);
 MODULE_PARM_DESC(splashval, "Splash pattern: 0x00 is black, 0x01 is white");
diff --git a/drivers/video/fbdev/omap/lcd_mipid.c b/drivers/video/fbdev/omap/lcd_mipid.c
index c81f150589e1..df9e6ebcfad5 100644
--- a/drivers/video/fbdev/omap/lcd_mipid.c
+++ b/drivers/video/fbdev/omap/lcd_mipid.c
@@ -174,7 +174,7 @@ static void hw_guard_wait(struct mipid_device *md)
 {
 	unsigned long wait = md->hw_guard_end - jiffies;
 
-	if ((long)wait > 0 && wait <= md->hw_guard_wait) {
+	if ((long)wait > 0 && time_before_eq(wait,  md->hw_guard_wait)) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule_timeout(wait);
 	}
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dss.c b/drivers/video/fbdev/omap2/omapfb/dss/dss.c
index 47d7f69ad9ad..48c6500c24e1 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/dss.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/dss.c
@@ -941,11 +941,13 @@ static int dss_init_features(struct platform_device *pdev)
 	return 0;
 }
 
+static void dss_uninit_ports(struct platform_device *pdev);
+
 static int dss_init_ports(struct platform_device *pdev)
 {
 	struct device_node *parent = pdev->dev.of_node;
 	struct device_node *port;
-	int r;
+	int r, ret = 0;
 
 	if (parent == NULL)
 		return 0;
@@ -972,17 +974,21 @@ static int dss_init_ports(struct platform_device *pdev)
 
 		switch (port_type) {
 		case OMAP_DISPLAY_TYPE_DPI:
-			dpi_init_port(pdev, port);
+			ret = dpi_init_port(pdev, port);
 			break;
 		case OMAP_DISPLAY_TYPE_SDI:
-			sdi_init_port(pdev, port);
+			ret = sdi_init_port(pdev, port);
 			break;
 		default:
 			break;
 		}
-	} while ((port = omapdss_of_get_next_port(parent, port)) != NULL);
+	} while (!ret &&
+		 (port = omapdss_of_get_next_port(parent, port)) != NULL);
 
-	return 0;
+	if (ret)
+		dss_uninit_ports(pdev);
+
+	return ret;
 }
 
 static void dss_uninit_ports(struct platform_device *pdev)
diff --git a/drivers/video/fbdev/pmag-aa-fb.c b/drivers/video/fbdev/pmag-aa-fb.c
index ffe2dd482f84..39922f072db4 100644
--- a/drivers/video/fbdev/pmag-aa-fb.c
+++ b/drivers/video/fbdev/pmag-aa-fb.c
@@ -247,7 +247,7 @@ err_alloc:
 	return err;
 }
 
-static int __exit pmagaafb_remove(struct device *dev)
+static int pmagaafb_remove(struct device *dev)
 {
 	struct tc_dev *tdev = to_tc_dev(dev);
 	struct fb_info *info = dev_get_drvdata(dev);
@@ -280,7 +280,7 @@ static struct tc_driver pmagaafb_driver = {
 		.name	= "pmagaafb",
 		.bus	= &tc_bus_type,
 		.probe	= pmagaafb_probe,
-		.remove	= __exit_p(pmagaafb_remove),
+		.remove	= pmagaafb_remove,
 	},
 };
 
diff --git a/drivers/video/fbdev/pmag-ba-fb.c b/drivers/video/fbdev/pmag-ba-fb.c
index df02fb4b7fd1..1fd02f40708e 100644
--- a/drivers/video/fbdev/pmag-ba-fb.c
+++ b/drivers/video/fbdev/pmag-ba-fb.c
@@ -235,7 +235,7 @@ err_alloc:
 	return err;
 }
 
-static int __exit pmagbafb_remove(struct device *dev)
+static int pmagbafb_remove(struct device *dev)
 {
 	struct tc_dev *tdev = to_tc_dev(dev);
 	struct fb_info *info = dev_get_drvdata(dev);
@@ -270,7 +270,7 @@ static struct tc_driver pmagbafb_driver = {
 		.name	= "pmagbafb",
 		.bus	= &tc_bus_type,
 		.probe	= pmagbafb_probe,
-		.remove	= __exit_p(pmagbafb_remove),
+		.remove	= pmagbafb_remove,
 	},
 };
 
diff --git a/drivers/video/fbdev/pmagb-b-fb.c b/drivers/video/fbdev/pmagb-b-fb.c
index a7a179a0bb33..46e96c451506 100644
--- a/drivers/video/fbdev/pmagb-b-fb.c
+++ b/drivers/video/fbdev/pmagb-b-fb.c
@@ -353,7 +353,7 @@ err_alloc:
 	return err;
 }
 
-static int __exit pmagbbfb_remove(struct device *dev)
+static int pmagbbfb_remove(struct device *dev)
 {
 	struct tc_dev *tdev = to_tc_dev(dev);
 	struct fb_info *info = dev_get_drvdata(dev);
@@ -388,7 +388,7 @@ static struct tc_driver pmagbbfb_driver = {
 		.name	= "pmagbbfb",
 		.bus	= &tc_bus_type,
 		.probe	= pmagbbfb_probe,
-		.remove	= __exit_p(pmagbbfb_remove),
+		.remove	= pmagbbfb_remove,
 	},
 };
 
diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
index ef73f14d7ba0..b21a89b03fb4 100644
--- a/drivers/video/fbdev/pxafb.c
+++ b/drivers/video/fbdev/pxafb.c
@@ -645,7 +645,7 @@ static void overlay1fb_disable(struct pxafb_layer *ofb)
 	lcd_writel(ofb->fbi, FBR1, ofb->fbi->fdadr[DMA_OV1] | 0x3);
 
 	if (wait_for_completion_timeout(&ofb->branch_done, 1 * HZ) == 0)
-		pr_warning("%s: timeout disabling overlay1\n", __func__);
+		pr_warn("%s: timeout disabling overlay1\n", __func__);
 
 	lcd_writel(ofb->fbi, LCCR5, lccr5);
 }
@@ -710,7 +710,7 @@ static void overlay2fb_disable(struct pxafb_layer *ofb)
 	lcd_writel(ofb->fbi, FBR4, ofb->fbi->fdadr[DMA_OV2_Cr] | 0x3);
 
 	if (wait_for_completion_timeout(&ofb->branch_done, 1 * HZ) == 0)
-		pr_warning("%s: timeout disabling overlay2\n", __func__);
+		pr_warn("%s: timeout disabling overlay2\n", __func__);
 }
 
 static struct pxafb_layer_ops ofb_ops[] = {
@@ -1187,8 +1187,7 @@ int pxafb_smart_flush(struct fb_info *info)
 	lcd_writel(fbi, LCCR0, fbi->reg_lccr0 | LCCR0_ENB);
 
 	if (wait_for_completion_timeout(&fbi->command_done, HZ/2) == 0) {
-		pr_warning("%s: timeout waiting for command done\n",
-				__func__);
+		pr_warn("%s: timeout waiting for command done\n", __func__);
 		ret = -ETIMEDOUT;
 	}
 
diff --git a/drivers/video/fbdev/sm501fb.c b/drivers/video/fbdev/sm501fb.c
index d80bc8a3200f..67e314fdd947 100644
--- a/drivers/video/fbdev/sm501fb.c
+++ b/drivers/video/fbdev/sm501fb.c
@@ -1600,6 +1600,7 @@ static int sm501fb_start(struct sm501fb_info *info,
 	info->fbmem = ioremap(res->start, resource_size(res));
 	if (info->fbmem == NULL) {
 		dev_err(dev, "cannot remap framebuffer\n");
+		ret = -ENXIO;
 		goto err_mem_res;
 	}
 
diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c
index ec2e7e353685..449fceaf79d5 100644
--- a/drivers/video/fbdev/smscufx.c
+++ b/drivers/video/fbdev/smscufx.c
@@ -1646,8 +1646,9 @@ static int ufx_usb_probe(struct usb_interface *interface,
 	dev_dbg(dev->gdev, "%s %s - serial #%s\n",
 		usbdev->manufacturer, usbdev->product, usbdev->serial);
 	dev_dbg(dev->gdev, "vid_%04x&pid_%04x&rev_%04x driver's ufx_data struct at %p\n",
-		usbdev->descriptor.idVendor, usbdev->descriptor.idProduct,
-		usbdev->descriptor.bcdDevice, dev);
+		le16_to_cpu(usbdev->descriptor.idVendor),
+		le16_to_cpu(usbdev->descriptor.idProduct),
+		le16_to_cpu(usbdev->descriptor.bcdDevice), dev);
 	dev_dbg(dev->gdev, "console enable=%d\n", console);
 	dev_dbg(dev->gdev, "fb_defio enable=%d\n", fb_defio);
 
diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index e9c2f7ba3c8e..05ef657235df 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -1105,8 +1105,8 @@ static int dlfb_ops_blank(int blank_mode, struct fb_info *info)
 	char *bufptr;
 	struct urb *urb;
 
-	pr_info("/dev/fb%d FB_BLANK mode %d --> %d\n",
-		info->node, dev->blank_mode, blank_mode);
+	pr_debug("/dev/fb%d FB_BLANK mode %d --> %d\n",
+		 info->node, dev->blank_mode, blank_mode);
 
 	if ((dev->blank_mode == FB_BLANK_POWERDOWN) &&
 	    (blank_mode != FB_BLANK_POWERDOWN)) {
@@ -1487,15 +1487,25 @@ static struct device_attribute fb_device_attrs[] = {
 static int dlfb_select_std_channel(struct dlfb_data *dev)
 {
 	int ret;
-	u8 set_def_chn[] = {	   0x57, 0xCD, 0xDC, 0xA7,
+	void *buf;
+	static const u8 set_def_chn[] = {
+				0x57, 0xCD, 0xDC, 0xA7,
 				0x1C, 0x88, 0x5E, 0x15,
 				0x60, 0xFE, 0xC6, 0x97,
 				0x16, 0x3D, 0x47, 0xF2  };
 
+	buf = kmemdup(set_def_chn, sizeof(set_def_chn), GFP_KERNEL);
+
+	if (!buf)
+		return -ENOMEM;
+
 	ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
 			NR_USB_REQUEST_CHANNEL,
 			(USB_DIR_OUT | USB_TYPE_VENDOR), 0, 0,
-			set_def_chn, sizeof(set_def_chn), USB_CTRL_SET_TIMEOUT);
+			buf, sizeof(set_def_chn), USB_CTRL_SET_TIMEOUT);
+
+	kfree(buf);
+
 	return ret;
 }
 
@@ -1603,8 +1613,9 @@ static int dlfb_usb_probe(struct usb_interface *interface,
 	pr_info("%s %s - serial #%s\n",
 		usbdev->manufacturer, usbdev->product, usbdev->serial);
 	pr_info("vid_%04x&pid_%04x&rev_%04x driver's dlfb_data struct at %p\n",
-		usbdev->descriptor.idVendor, usbdev->descriptor.idProduct,
-		usbdev->descriptor.bcdDevice, dev);
+		le16_to_cpu(usbdev->descriptor.idVendor),
+		le16_to_cpu(usbdev->descriptor.idProduct),
+		le16_to_cpu(usbdev->descriptor.bcdDevice), dev);
 	pr_info("console enable=%d\n", console);
 	pr_info("fb_defio enable=%d\n", fb_defio);
 	pr_info("shadow enable=%d\n", shadow);
diff --git a/drivers/video/fbdev/via/viafbdev.c b/drivers/video/fbdev/via/viafbdev.c
index f9718f012aae..badee04ef496 100644
--- a/drivers/video/fbdev/via/viafbdev.c
+++ b/drivers/video/fbdev/via/viafbdev.c
@@ -1630,16 +1630,14 @@ static void viafb_init_proc(struct viafb_shared *shared)
 }
 static void viafb_remove_proc(struct viafb_shared *shared)
 {
-	struct proc_dir_entry *viafb_entry = shared->proc_entry,
-		*iga1_entry = shared->iga1_proc_entry,
-		*iga2_entry = shared->iga2_proc_entry;
+	struct proc_dir_entry *viafb_entry = shared->proc_entry;
 
 	if (!viafb_entry)
 		return;
 
-	remove_proc_entry("output_devices", iga2_entry);
+	remove_proc_entry("output_devices", shared->iga2_proc_entry);
 	remove_proc_entry("iga2", viafb_entry);
-	remove_proc_entry("output_devices", iga1_entry);
+	remove_proc_entry("output_devices", shared->iga1_proc_entry);
 	remove_proc_entry("iga1", viafb_entry);
 	remove_proc_entry("supported_output_devices", viafb_entry);
 
diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c
index 3ee309c50b2d..46f63960fa9e 100644
--- a/drivers/video/fbdev/xen-fbfront.c
+++ b/drivers/video/fbdev/xen-fbfront.c
@@ -18,6 +18,8 @@
  * frame buffer.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/console.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -380,10 +382,18 @@ static int xenfb_probe(struct xenbus_device *dev,
 			video[KPARAM_MEM] = val;
 	}
 
+	video[KPARAM_WIDTH] = xenbus_read_unsigned(dev->otherend, "width",
+						   video[KPARAM_WIDTH]);
+	video[KPARAM_HEIGHT] = xenbus_read_unsigned(dev->otherend, "height",
+						    video[KPARAM_HEIGHT]);
+
 	/* If requested res does not fit in available memory, use default */
 	fb_size = video[KPARAM_MEM] * 1024 * 1024;
 	if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH / 8
 	    > fb_size) {
+		pr_warn("display parameters %d,%d,%d invalid, use defaults\n",
+			video[KPARAM_MEM], video[KPARAM_WIDTH],
+			video[KPARAM_HEIGHT]);
 		video[KPARAM_WIDTH] = XENFB_WIDTH;
 		video[KPARAM_HEIGHT] = XENFB_HEIGHT;
 		fb_size = XENFB_DEFAULT_FB_LEN;
diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c
index b6bc4a0bda2a..4d50bfd13e7c 100644
--- a/drivers/video/logo/logo.c
+++ b/drivers/video/logo/logo.c
@@ -34,7 +34,7 @@ static int __init fb_logo_late_init(void)
 	return 0;
 }
 
-late_initcall(fb_logo_late_init);
+late_initcall_sync(fb_logo_late_init);
 
 /* logo's are marked __initdata. Use __ref to tell
  * modpost that it is intended that this function uses data
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 34adf9b9c053..22caf808bfab 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -418,8 +418,7 @@ static int init_vqs(struct virtio_balloon *vb)
 	 * optionally stat.
 	 */
 	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
-	err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names,
-			NULL);
+	err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
 	if (err)
 		return err;
 
@@ -664,6 +663,12 @@ static int virtballoon_restore(struct virtio_device *vdev)
 }
 #endif
 
+static int virtballoon_validate(struct virtio_device *vdev)
+{
+	__virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
+	return 0;
+}
+
 static unsigned int features[] = {
 	VIRTIO_BALLOON_F_MUST_TELL_HOST,
 	VIRTIO_BALLOON_F_STATS_VQ,
@@ -676,6 +681,7 @@ static struct virtio_driver virtio_balloon_driver = {
 	.driver.name =	KBUILD_MODNAME,
 	.driver.owner =	THIS_MODULE,
 	.id_table =	id_table,
+	.validate =	virtballoon_validate,
 	.probe =	virtballoon_probe,
 	.remove =	virtballoon_remove,
 	.config_changed = virtballoon_changed,
diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
index 79f1293cda93..3a0468f2ceb0 100644
--- a/drivers/virtio/virtio_input.c
+++ b/drivers/virtio/virtio_input.c
@@ -173,8 +173,7 @@ static int virtinput_init_vqs(struct virtio_input *vi)
 	static const char * const names[] = { "events", "status" };
 	int err;
 
-	err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names,
-			NULL);
+	err = virtio_find_vqs(vi->vdev, 2, vqs, cbs, names, NULL);
 	if (err)
 		return err;
 	vi->evt = vqs[0];
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 78343b8f9034..74dc7170fd35 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -351,7 +351,7 @@ static void vm_del_vqs(struct virtio_device *vdev)
 
 static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
 				  void (*callback)(struct virtqueue *vq),
-				  const char *name)
+				  const char *name, bool ctx)
 {
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
 	struct virtio_mmio_vq_info *info;
@@ -388,7 +388,7 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
 
 	/* Create the vring */
 	vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev,
-				 true, true, vm_notify, callback, name);
+				 true, true, ctx, vm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto error_new_virtqueue;
@@ -447,6 +447,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		       struct virtqueue *vqs[],
 		       vq_callback_t *callbacks[],
 		       const char * const names[],
+		       const bool *ctx,
 		       struct irq_affinity *desc)
 {
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
@@ -459,7 +460,8 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		return err;
 
 	for (i = 0; i < nvqs; ++i) {
-		vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]);
+		vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i],
+				     ctx ? ctx[i] : false);
 		if (IS_ERR(vqs[i])) {
 			vm_del_vqs(vdev);
 			return PTR_ERR(vqs[i]);
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 698d5d06fa03..007a4f366086 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -172,6 +172,7 @@ error:
 static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
 				     void (*callback)(struct virtqueue *vq),
 				     const char *name,
+				     bool ctx,
 				     u16 msix_vec)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
@@ -183,7 +184,7 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
 	if (!info)
 		return ERR_PTR(-ENOMEM);
 
-	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name,
+	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
 			      msix_vec);
 	if (IS_ERR(vq))
 		goto out_info;
@@ -274,6 +275,7 @@ void vp_del_vqs(struct virtio_device *vdev)
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
 		const char * const names[], bool per_vq_vectors,
+		const bool *ctx,
 		struct irq_affinity *desc)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
@@ -315,6 +317,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 		else
 			msix_vec = VP_MSIX_VQ_VECTOR;
 		vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+				     ctx ? ctx[i] : false,
 				     msix_vec);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
@@ -345,7 +348,7 @@ error_find:
 
 static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[])
+		const char * const names[], const bool *ctx)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	int i, err;
@@ -367,6 +370,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 			continue;
 		}
 		vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+				     ctx ? ctx[i] : false,
 				     VIRTIO_MSI_NO_VECTOR);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
@@ -383,20 +387,21 @@ out_del_vqs:
 /* the config->find_vqs() implementation */
 int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[], struct irq_affinity *desc)
+		const char * const names[], const bool *ctx,
+		struct irq_affinity *desc)
 {
 	int err;
 
 	/* Try MSI-X with one vector per queue. */
-	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, desc);
+	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc);
 	if (!err)
 		return 0;
 	/* Fallback: MSI-X with one vector for config, one shared for queues. */
-	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, desc);
+	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc);
 	if (!err)
 		return 0;
 	/* Finally fall back to regular interrupts. */
-	return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names);
+	return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
 }
 
 const char *vp_bus_name(struct virtio_device *vdev)
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index e96334aec1e0..135ee3cf7175 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -102,6 +102,7 @@ struct virtio_pci_device {
 				      unsigned idx,
 				      void (*callback)(struct virtqueue *vq),
 				      const char *name,
+				      bool ctx,
 				      u16 msix_vec);
 	void (*del_vq)(struct virtio_pci_vq_info *info);
 
@@ -131,7 +132,8 @@ void vp_del_vqs(struct virtio_device *vdev);
 /* the config->find_vqs() implementation */
 int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[], struct irq_affinity *desc);
+		const char * const names[], const bool *ctx,
+		struct irq_affinity *desc);
 const char *vp_bus_name(struct virtio_device *vdev);
 
 /* Setup the affinity for a virtqueue:
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 4bfa48fb1324..2780886e8ba3 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -116,6 +116,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 				  unsigned index,
 				  void (*callback)(struct virtqueue *vq),
 				  const char *name,
+				  bool ctx,
 				  u16 msix_vec)
 {
 	struct virtqueue *vq;
@@ -135,7 +136,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	/* create the vring */
 	vq = vring_create_virtqueue(index, num,
 				    VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
-				    true, false, vp_notify, callback, name);
+				    true, false, ctx,
+				    vp_notify, callback, name);
 	if (!vq)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 8978f109d2d7..2555d80f6eec 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -297,6 +297,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 				  unsigned index,
 				  void (*callback)(struct virtqueue *vq),
 				  const char *name,
+				  bool ctx,
 				  u16 msix_vec)
 {
 	struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
@@ -328,7 +329,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	/* create the vring */
 	vq = vring_create_virtqueue(index, num,
 				    SMP_CACHE_BYTES, &vp_dev->vdev,
-				    true, true, vp_notify, callback, name);
+				    true, true, ctx,
+				    vp_notify, callback, name);
 	if (!vq)
 		return ERR_PTR(-ENOMEM);
 
@@ -387,12 +389,14 @@ err_map_notify:
 }
 
 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[], struct irq_affinity *desc)
+			      struct virtqueue *vqs[],
+			      vq_callback_t *callbacks[],
+			      const char * const names[], const bool *ctx,
+			      struct irq_affinity *desc)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtqueue *vq;
-	int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, desc);
+	int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc);
 
 	if (rc)
 		return rc;
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 409aeaa49246..5e1b548828e6 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -263,6 +263,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 				unsigned int out_sgs,
 				unsigned int in_sgs,
 				void *data,
+				void *ctx,
 				gfp_t gfp)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -275,6 +276,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	START_USE(vq);
 
 	BUG_ON(data == NULL);
+	BUG_ON(ctx && vq->indirect);
 
 	if (unlikely(vq->broken)) {
 		END_USE(vq);
@@ -389,6 +391,8 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	vq->desc_state[head].data = data;
 	if (indirect)
 		vq->desc_state[head].indir_desc = desc;
+	if (ctx)
+		vq->desc_state[head].indir_desc = ctx;
 
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
@@ -461,7 +465,8 @@ int virtqueue_add_sgs(struct virtqueue *_vq,
 		for (sg = sgs[i]; sg; sg = sg_next(sg))
 			total_sg++;
 	}
-	return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, gfp);
+	return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
+			     data, NULL, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
 
@@ -483,7 +488,7 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
 			 void *data,
 			 gfp_t gfp)
 {
-	return virtqueue_add(vq, &sg, num, 1, 0, data, gfp);
+	return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
 
@@ -505,11 +510,35 @@ int virtqueue_add_inbuf(struct virtqueue *vq,
 			void *data,
 			gfp_t gfp)
 {
-	return virtqueue_add(vq, &sg, num, 0, 1, data, gfp);
+	return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
 
 /**
+ * virtqueue_add_inbuf_ctx - expose input buffers to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg writable by other side
+ * @data: the token identifying the buffer.
+ * @ctx: extra context for the token
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
+			struct scatterlist *sg, unsigned int num,
+			void *data,
+			void *ctx,
+			gfp_t gfp)
+{
+	return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
+
+/**
  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
  * @vq: the struct virtqueue
  *
@@ -598,7 +627,8 @@ bool virtqueue_kick(struct virtqueue *vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick);
 
-static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
+static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
+		       void **ctx)
 {
 	unsigned int i, j;
 	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
@@ -622,10 +652,15 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 	/* Plus final descriptor */
 	vq->vq.num_free++;
 
-	/* Free the indirect table, if any, now that it's unmapped. */
-	if (vq->desc_state[head].indir_desc) {
+	if (vq->indirect) {
 		struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
-		u32 len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
+		u32 len;
+
+		/* Free the indirect table, if any, now that it's unmapped. */
+		if (!indir_desc)
+			return;
+
+		len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
 
 		BUG_ON(!(vq->vring.desc[head].flags &
 			 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
@@ -634,8 +669,10 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 		for (j = 0; j < len / sizeof(struct vring_desc); j++)
 			vring_unmap_one(vq, &indir_desc[j]);
 
-		kfree(vq->desc_state[head].indir_desc);
+		kfree(indir_desc);
 		vq->desc_state[head].indir_desc = NULL;
+	} else if (ctx) {
+		*ctx = vq->desc_state[head].indir_desc;
 	}
 }
 
@@ -660,7 +697,8 @@ static inline bool more_used(const struct vring_virtqueue *vq)
  * Returns NULL if there are no used buffers, or the "data" token
  * handed to virtqueue_add_*().
  */
-void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
+void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
+			    void **ctx)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	void *ret;
@@ -698,7 +736,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 
 	/* detach_buf clears data, so grab it now. */
 	ret = vq->desc_state[i].data;
-	detach_buf(vq, i);
+	detach_buf(vq, i, ctx);
 	vq->last_used_idx++;
 	/* If we expect an interrupt for the next entry, tell host
 	 * by writing event index and flush out the write before
@@ -715,8 +753,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 	END_USE(vq);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(virtqueue_get_buf);
+EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
 
+void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
+{
+	return virtqueue_get_buf_ctx(_vq, len, NULL);
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_buf);
 /**
  * virtqueue_disable_cb - disable callbacks
  * @vq: the struct virtqueue we're talking about.
@@ -878,7 +921,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 			continue;
 		/* detach_buf clears data, so grab it now. */
 		buf = vq->desc_state[i].data;
-		detach_buf(vq, i);
+		detach_buf(vq, i, NULL);
 		vq->avail_idx_shadow--;
 		vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
 		END_USE(vq);
@@ -916,6 +959,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 					struct vring vring,
 					struct virtio_device *vdev,
 					bool weak_barriers,
+					bool context,
 					bool (*notify)(struct virtqueue *),
 					void (*callback)(struct virtqueue *),
 					const char *name)
@@ -950,7 +994,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	vq->last_add_time_valid = false;
 #endif
 
-	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
+	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
+		!context;
 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
 
 	/* No callback?  Tell other side not to bother us. */
@@ -1019,6 +1064,7 @@ struct virtqueue *vring_create_virtqueue(
 	struct virtio_device *vdev,
 	bool weak_barriers,
 	bool may_reduce_num,
+	bool context,
 	bool (*notify)(struct virtqueue *),
 	void (*callback)(struct virtqueue *),
 	const char *name)
@@ -1058,7 +1104,7 @@ struct virtqueue *vring_create_virtqueue(
 	queue_size_in_bytes = vring_size(num, vring_align);
 	vring_init(&vring, num, queue, vring_align);
 
-	vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers,
+	vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
 				   notify, callback, name);
 	if (!vq) {
 		vring_free_queue(vdev, queue_size_in_bytes, queue,
@@ -1079,6 +1125,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
 				      bool weak_barriers,
+				      bool context,
 				      void *pages,
 				      bool (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
@@ -1086,7 +1133,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 {
 	struct vring vring;
 	vring_init(&vring, num, pages, vring_align);
-	return __vring_new_virtqueue(index, vring, vdev, weak_barriers,
+	return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
 				     notify, callback, name);
 }
 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 52a70ee6014f..8b9049dac094 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -452,7 +452,7 @@ config DAVINCI_WATCHDOG
 
 config ORION_WATCHDOG
 	tristate "Orion watchdog"
-	depends on ARCH_ORION5X || ARCH_DOVE || MACH_DOVE || ARCH_MVEBU || COMPILE_TEST
+	depends on ARCH_ORION5X || ARCH_DOVE || MACH_DOVE || ARCH_MVEBU || (COMPILE_TEST && !ARCH_EBSA110)
 	depends on ARM
 	select WATCHDOG_CORE
 	help
diff --git a/drivers/watchdog/bcm_kona_wdt.c b/drivers/watchdog/bcm_kona_wdt.c
index 6fce17d5b9f1..a5775dfd8d5f 100644
--- a/drivers/watchdog/bcm_kona_wdt.c
+++ b/drivers/watchdog/bcm_kona_wdt.c
@@ -304,6 +304,8 @@ static int bcm_kona_wdt_probe(struct platform_device *pdev)
 	if (!wdt)
 		return -ENOMEM;
 
+	spin_lock_init(&wdt->lock);
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	wdt->base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(wdt->base))
@@ -316,7 +318,6 @@ static int bcm_kona_wdt_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	spin_lock_init(&wdt->lock);
 	platform_set_drvdata(pdev, wdt);
 	watchdog_set_drvdata(&bcm_kona_wdt_wdd, wdt);
 	bcm_kona_wdt_wdd.parent = &pdev->dev;
diff --git a/drivers/watchdog/cadence_wdt.c b/drivers/watchdog/cadence_wdt.c
index 8d61e8bfe60b..86e0b5d2e761 100644
--- a/drivers/watchdog/cadence_wdt.c
+++ b/drivers/watchdog/cadence_wdt.c
@@ -49,7 +49,7 @@
 /* Counter maximum value */
 #define CDNS_WDT_COUNTER_MAX 0xFFF
 
-static int wdt_timeout = CDNS_WDT_DEFAULT_TIMEOUT;
+static int wdt_timeout;
 static int nowayout = WATCHDOG_NOWAYOUT;
 
 module_param(wdt_timeout, int, 0);
diff --git a/drivers/watchdog/cpu5wdt.c b/drivers/watchdog/cpu5wdt.c
index 6d03e8e30f8b..6c3f78e45c26 100644
--- a/drivers/watchdog/cpu5wdt.c
+++ b/drivers/watchdog/cpu5wdt.c
@@ -289,7 +289,7 @@ MODULE_DESCRIPTION("sma cpu5 watchdog driver");
 MODULE_SUPPORTED_DEVICE("sma cpu5 watchdog");
 MODULE_LICENSE("GPL");
 
-module_param(port, int, 0);
+module_param_hw(port, int, ioport, 0);
 MODULE_PARM_DESC(port, "base address of watchdog card, default is 0x91");
 
 module_param(verbose, int, 0);
diff --git a/drivers/watchdog/eurotechwdt.c b/drivers/watchdog/eurotechwdt.c
index 23ee53240c4c..38e96712264f 100644
--- a/drivers/watchdog/eurotechwdt.c
+++ b/drivers/watchdog/eurotechwdt.c
@@ -97,9 +97,9 @@ MODULE_PARM_DESC(nowayout,
 #define WDT_TIMER_CFG		0xf3
 
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, "Eurotech WDT io port (default=0x3f0)");
-module_param(irq, int, 0);
+module_param_hw(irq, int, irq, 0);
 MODULE_PARM_DESC(irq, "Eurotech WDT irq (default=10)");
 module_param(ev, charp, 0);
 MODULE_PARM_DESC(ev, "Eurotech WDT event type (default is `int')");
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index 347f0389b089..c4f65873bfa4 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -306,16 +306,15 @@ static int iTCO_wdt_ping(struct watchdog_device *wd_dev)
 
 	iTCO_vendor_pre_keepalive(p->smi_res, wd_dev->timeout);
 
+	/* Reset the timeout status bit so that the timer
+	 * needs to count down twice again before rebooting */
+	outw(0x0008, TCO1_STS(p));	/* write 1 to clear bit */
+
 	/* Reload the timer by writing to the TCO Timer Counter register */
-	if (p->iTCO_version >= 2) {
+	if (p->iTCO_version >= 2)
 		outw(0x01, TCO_RLD(p));
-	} else if (p->iTCO_version == 1) {
-		/* Reset the timeout status bit so that the timer
-		 * needs to count down twice again before rebooting */
-		outw(0x0008, TCO1_STS(p));	/* write 1 to clear bit */
-
+	else if (p->iTCO_version == 1)
 		outb(0x01, TCO_RLD(p));
-	}
 
 	spin_unlock(&p->io_lock);
 	return 0;
@@ -328,11 +327,8 @@ static int iTCO_wdt_set_timeout(struct watchdog_device *wd_dev, unsigned int t)
 	unsigned char val8;
 	unsigned int tmrval;
 
-	tmrval = seconds_to_ticks(p, t);
-
-	/* For TCO v1 the timer counts down twice before rebooting */
-	if (p->iTCO_version == 1)
-		tmrval /= 2;
+	/* The timer counts down twice before rebooting */
+	tmrval = seconds_to_ticks(p, t) / 2;
 
 	/* from the specs: */
 	/* "Values of 0h-3h are ignored and should not be attempted" */
@@ -385,6 +381,8 @@ static unsigned int iTCO_wdt_get_timeleft(struct watchdog_device *wd_dev)
 		spin_lock(&p->io_lock);
 		val16 = inw(TCO_RLD(p));
 		val16 &= 0x3ff;
+		if (!(inw(TCO1_STS(p)) & 0x0008))
+			val16 += (inw(TCOv2_TMR(p)) & 0x3ff);
 		spin_unlock(&p->io_lock);
 
 		time_left = ticks_to_seconds(p, val16);
diff --git a/drivers/watchdog/pc87413_wdt.c b/drivers/watchdog/pc87413_wdt.c
index 9f15dd9435d1..06a892e36a8d 100644
--- a/drivers/watchdog/pc87413_wdt.c
+++ b/drivers/watchdog/pc87413_wdt.c
@@ -579,7 +579,7 @@ MODULE_AUTHOR("Marcus Junker <junker@anduras.de>");
 MODULE_DESCRIPTION("PC87413 WDT driver");
 MODULE_LICENSE("GPL");
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, MODNAME " I/O port (default: "
 					__MODULE_STRING(IO_DEFAULT) ").");
 
diff --git a/drivers/watchdog/pcwd_usb.c b/drivers/watchdog/pcwd_usb.c
index 99ebf6ea3de6..5615f4013924 100644
--- a/drivers/watchdog/pcwd_usb.c
+++ b/drivers/watchdog/pcwd_usb.c
@@ -630,6 +630,9 @@ static int usb_pcwd_probe(struct usb_interface *interface,
 		return -ENODEV;
 	}
 
+	if (iface_desc->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	/* check out the endpoint: it has to be Interrupt & IN */
 	endpoint = &iface_desc->endpoint[0].desc;
 
diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c
index f709962018ac..362fd229786d 100644
--- a/drivers/watchdog/sama5d4_wdt.c
+++ b/drivers/watchdog/sama5d4_wdt.c
@@ -6,6 +6,7 @@
  * Licensed under GPLv2.
  */
 
+#include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
@@ -29,6 +30,7 @@ struct sama5d4_wdt {
 	struct watchdog_device	wdd;
 	void __iomem		*reg_base;
 	u32			mr;
+	unsigned long		last_ping;
 };
 
 static int wdt_timeout = WDT_DEFAULT_TIMEOUT;
@@ -44,11 +46,34 @@ MODULE_PARM_DESC(nowayout,
 	"Watchdog cannot be stopped once started (default="
 	__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
+#define wdt_enabled (!(wdt->mr & AT91_WDT_WDDIS))
+
 #define wdt_read(wdt, field) \
 	readl_relaxed((wdt)->reg_base + (field))
 
-#define wdt_write(wtd, field, val) \
-	writel_relaxed((val), (wdt)->reg_base + (field))
+/* 4 slow clock periods is 4/32768 = 122.07µs*/
+#define WDT_DELAY	usecs_to_jiffies(123)
+
+static void wdt_write(struct sama5d4_wdt *wdt, u32 field, u32 val)
+{
+	/*
+	 * WDT_CR and WDT_MR must not be modified within three slow clock
+	 * periods following a restart of the watchdog performed by a write
+	 * access in WDT_CR.
+	 */
+	while (time_before(jiffies, wdt->last_ping + WDT_DELAY))
+		usleep_range(30, 125);
+	writel_relaxed(val, wdt->reg_base + field);
+	wdt->last_ping = jiffies;
+}
+
+static void wdt_write_nosleep(struct sama5d4_wdt *wdt, u32 field, u32 val)
+{
+	if (time_before(jiffies, wdt->last_ping + WDT_DELAY))
+		udelay(123);
+	writel_relaxed(val, wdt->reg_base + field);
+	wdt->last_ping = jiffies;
+}
 
 static int sama5d4_wdt_start(struct watchdog_device *wdd)
 {
@@ -89,7 +114,16 @@ static int sama5d4_wdt_set_timeout(struct watchdog_device *wdd,
 	wdt->mr &= ~AT91_WDT_WDD;
 	wdt->mr |= AT91_WDT_SET_WDV(value);
 	wdt->mr |= AT91_WDT_SET_WDD(value);
-	wdt_write(wdt, AT91_WDT_MR, wdt->mr);
+
+	/*
+	 * WDDIS has to be 0 when updating WDD/WDV. The datasheet states: When
+	 * setting the WDDIS bit, and while it is set, the fields WDV and WDD
+	 * must not be modified.
+	 * If the watchdog is enabled, then the timeout can be updated. Else,
+	 * wait that the user enables it.
+	 */
+	if (wdt_enabled)
+		wdt_write(wdt, AT91_WDT_MR, wdt->mr & ~AT91_WDT_WDDIS);
 
 	wdd->timeout = timeout;
 
@@ -145,23 +179,21 @@ static int of_sama5d4_wdt_init(struct device_node *np, struct sama5d4_wdt *wdt)
 
 static int sama5d4_wdt_init(struct sama5d4_wdt *wdt)
 {
-	struct watchdog_device *wdd = &wdt->wdd;
-	u32 value = WDT_SEC2TICKS(wdd->timeout);
 	u32 reg;
-
 	/*
-	 * Because the fields WDV and WDD must not be modified when the WDDIS
-	 * bit is set, so clear the WDDIS bit before writing the WDT_MR.
+	 * When booting and resuming, the bootloader may have changed the
+	 * watchdog configuration.
+	 * If the watchdog is already running, we can safely update it.
+	 * Else, we have to disable it properly.
 	 */
-	reg = wdt_read(wdt, AT91_WDT_MR);
-	reg &= ~AT91_WDT_WDDIS;
-	wdt_write(wdt, AT91_WDT_MR, reg);
-
-	wdt->mr |= AT91_WDT_SET_WDD(value);
-	wdt->mr |= AT91_WDT_SET_WDV(value);
-
-	wdt_write(wdt, AT91_WDT_MR, wdt->mr);
-
+	if (wdt_enabled) {
+		wdt_write_nosleep(wdt, AT91_WDT_MR, wdt->mr);
+	} else {
+		reg = wdt_read(wdt, AT91_WDT_MR);
+		if (!(reg & AT91_WDT_WDDIS))
+			wdt_write_nosleep(wdt, AT91_WDT_MR,
+					  reg | AT91_WDT_WDDIS);
+	}
 	return 0;
 }
 
@@ -172,6 +204,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
 	struct resource *res;
 	void __iomem *regs;
 	u32 irq = 0;
+	u32 timeout;
 	int ret;
 
 	wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
@@ -184,6 +217,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
 	wdd->ops = &sama5d4_wdt_ops;
 	wdd->min_timeout = MIN_WDT_TIMEOUT;
 	wdd->max_timeout = MAX_WDT_TIMEOUT;
+	wdt->last_ping = jiffies;
 
 	watchdog_set_drvdata(wdd, wdt);
 
@@ -221,6 +255,11 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	timeout = WDT_SEC2TICKS(wdd->timeout);
+
+	wdt->mr |= AT91_WDT_SET_WDD(timeout);
+	wdt->mr |= AT91_WDT_SET_WDV(timeout);
+
 	ret = sama5d4_wdt_init(wdt);
 	if (ret)
 		return ret;
@@ -263,9 +302,7 @@ static int sama5d4_wdt_resume(struct device *dev)
 {
 	struct sama5d4_wdt *wdt = dev_get_drvdata(dev);
 
-	wdt_write(wdt, AT91_WDT_MR, wdt->mr & ~AT91_WDT_WDDIS);
-	if (wdt->mr & AT91_WDT_WDDIS)
-		wdt_write(wdt, AT91_WDT_MR, wdt->mr);
+	sama5d4_wdt_init(wdt);
 
 	return 0;
 }
diff --git a/drivers/watchdog/sc1200wdt.c b/drivers/watchdog/sc1200wdt.c
index 131193a7acdf..b34d3d5ba632 100644
--- a/drivers/watchdog/sc1200wdt.c
+++ b/drivers/watchdog/sc1200wdt.c
@@ -88,7 +88,7 @@ MODULE_PARM_DESC(isapnp,
 	"When set to 0 driver ISA PnP support will be disabled");
 #endif
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, "io port");
 module_param(timeout, int, 0);
 MODULE_PARM_DESC(timeout, "range is 0-255 minutes, default is 1");
diff --git a/drivers/watchdog/wdt.c b/drivers/watchdog/wdt.c
index e0206b5b7d89..e481fbbc4ae7 100644
--- a/drivers/watchdog/wdt.c
+++ b/drivers/watchdog/wdt.c
@@ -78,9 +78,9 @@ static int irq = 11;
 
 static DEFINE_SPINLOCK(wdt_lock);
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, "WDT io port (default=0x240)");
-module_param(irq, int, 0);
+module_param_hw(irq, int, irq, 0);
 MODULE_PARM_DESC(irq, "WDT irq (default=11)");
 
 /* Support for the Fan Tachometer on the WDT501-P */
diff --git a/drivers/watchdog/wdt_pci.c b/drivers/watchdog/wdt_pci.c
index 48b2c058b009..bc7addc2dc06 100644
--- a/drivers/watchdog/wdt_pci.c
+++ b/drivers/watchdog/wdt_pci.c
@@ -332,7 +332,7 @@ static irqreturn_t wdtpci_interrupt(int irq, void *dev_id)
 		pr_crit("Would Reboot\n");
 #else
 		pr_crit("Initiating system reboot\n");
-		emergency_restart(NULL);
+		emergency_restart();
 #endif
 #else
 		pr_crit("Reset in 5ms\n");
diff --git a/drivers/watchdog/zx2967_wdt.c b/drivers/watchdog/zx2967_wdt.c
index e290d5a13a6d..c98252733c30 100644
--- a/drivers/watchdog/zx2967_wdt.c
+++ b/drivers/watchdog/zx2967_wdt.c
@@ -211,10 +211,8 @@ static int zx2967_wdt_probe(struct platform_device *pdev)
 
 	base = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	wdt->reg_base = devm_ioremap_resource(dev, base);
-	if (IS_ERR(wdt->reg_base)) {
-		dev_err(dev, "ioremap failed\n");
+	if (IS_ERR(wdt->reg_base))
 		return PTR_ERR(wdt->reg_base);
-	}
 
 	zx2967_wdt_reset_sysctrl(dev);
 
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index c1ec8ee80924..9e35032351a0 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -190,6 +190,7 @@ static void do_poweroff(void)
 {
 	switch (system_state) {
 	case SYSTEM_BOOTING:
+	case SYSTEM_SCHEDULING:
 		orderly_poweroff(true);
 		break;
 	case SYSTEM_RUNNING:
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 7a92a5e1d40c..feca75b07fdd 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -362,8 +362,8 @@ static int mmap_batch_fn(void *data, int nr, void *state)
 				st->global_error = 1;
 		}
 	}
-	st->va += PAGE_SIZE * nr;
-	st->index += nr;
+	st->va += XEN_PAGE_SIZE * nr;
+	st->index += nr / XEN_PFN_PER_PAGE;
 
 	return 0;
 }
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 4ac2ca8a7656..bf13d1ec51f3 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -233,12 +233,12 @@ static int tmem_cleancache_init_fs(size_t pagesize)
 	return xen_tmem_new_pool(uuid_private, 0, pagesize);
 }
 
-static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
+static int tmem_cleancache_init_shared_fs(uuid_t *uuid, size_t pagesize)
 {
 	struct tmem_pool_uuid shared_uuid;
 
-	shared_uuid.uuid_lo = *(u64 *)uuid;
-	shared_uuid.uuid_hi = *(u64 *)(&uuid[8]);
+	shared_uuid.uuid_lo = *(u64 *)&uuid->b[0];
+	shared_uuid.uuid_hi = *(u64 *)&uuid->b[8];
 	return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
 }
 
diff --git a/fs/Kconfig b/fs/Kconfig
index 83eab52fb3f6..b0e42b6a96b9 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -39,6 +39,7 @@ config FS_DAX
 	depends on MMU
 	depends on !(ARM || MIPS || SPARC)
 	select FS_IOMAP
+	select DAX
 	help
 	  Direct Access (DAX) can be used on memory-backed block devices.
 	  If the block device supports DAX and the filesystem supports DAX,
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 3062cceb5c2a..782d4d05a53b 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -350,7 +350,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 {
 	struct sockaddr_rxrpc srx;
 	struct afs_server *server;
-	struct uuid_v1 *r;
+	struct afs_uuid *r;
 	unsigned loop;
 	__be32 *b;
 	int ret;
@@ -380,7 +380,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 		}
 
 		_debug("unmarshall UUID");
-		call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL);
+		call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
 		if (!call->request)
 			return -ENOMEM;
 
@@ -453,7 +453,7 @@ static int afs_deliver_cb_probe(struct afs_call *call)
 static void SRXAFSCB_ProbeUuid(struct work_struct *work)
 {
 	struct afs_call *call = container_of(work, struct afs_call, work);
-	struct uuid_v1 *r = call->request;
+	struct afs_uuid *r = call->request;
 
 	struct {
 		__be32	match;
@@ -476,7 +476,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
  */
 static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 {
-	struct uuid_v1 *r;
+	struct afs_uuid *r;
 	unsigned loop;
 	__be32 *b;
 	int ret;
@@ -502,15 +502,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 		}
 
 		_debug("unmarshall UUID");
-		call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL);
+		call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
 		if (!call->request)
 			return -ENOMEM;
 
 		b = call->buffer;
 		r = call->request;
-		r->time_low			= b[0];
-		r->time_mid			= htons(ntohl(b[1]));
-		r->time_hi_and_version		= htons(ntohl(b[2]));
+		r->time_low			= ntohl(b[0]);
+		r->time_mid			= ntohl(b[1]);
+		r->time_hi_and_version		= ntohl(b[2]);
 		r->clock_seq_hi_and_reserved 	= ntohl(b[3]);
 		r->clock_seq_low		= ntohl(b[4]);
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 393672997cc2..4e2556606623 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -410,6 +410,15 @@ struct afs_interface {
 	unsigned	mtu;		/* MTU of interface */
 };
 
+struct afs_uuid {
+	__be32		time_low;			/* low part of timestamp */
+	__be16		time_mid;			/* mid part of timestamp */
+	__be16		time_hi_and_version;		/* high part of timestamp and version  */
+	__u8		clock_seq_hi_and_reserved;	/* clock seq hi and variant */
+	__u8		clock_seq_low;			/* clock seq low */
+	__u8		node[6];			/* spatially unique node ID (MAC addr) */
+};
+
 /*****************************************************************************/
 /*
  * cache.c
@@ -544,7 +553,7 @@ extern int afs_drop_inode(struct inode *);
  * main.c
  */
 extern struct workqueue_struct *afs_wq;
-extern struct uuid_v1 afs_uuid;
+extern struct afs_uuid afs_uuid;
 
 /*
  * misc.c
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 51d7d17bca57..9944770849da 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -31,7 +31,7 @@ static char *rootcell;
 module_param(rootcell, charp, 0);
 MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
 
-struct uuid_v1 afs_uuid;
+struct afs_uuid afs_uuid;
 struct workqueue_struct *afs_wq;
 
 /*
diff --git a/fs/aio.c b/fs/aio.c
index f52d925ee259..dcad3a66748c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1541,7 +1541,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	ssize_t ret;
 
 	/* enforce forwards compatibility on users */
-	if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) {
+	if (unlikely(iocb->aio_reserved2)) {
 		pr_debug("EINVAL: reserve field set\n");
 		return -EINVAL;
 	}
@@ -1568,6 +1568,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	req->common.ki_pos = iocb->aio_offset;
 	req->common.ki_complete = aio_complete;
 	req->common.ki_flags = iocb_flags(req->common.ki_filp);
+	req->common.ki_hint = file_write_hint(file);
 
 	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
 		/*
@@ -1586,6 +1587,18 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 		req->common.ki_flags |= IOCB_EVENTFD;
 	}
 
+	ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
+	if (unlikely(ret)) {
+		pr_debug("EINVAL: aio_rw_flags\n");
+		goto out_put_req;
+	}
+
+	if ((req->common.ki_flags & IOCB_NOWAIT) &&
+			!(req->common.ki_flags & IOCB_DIRECT)) {
+		ret = -EOPNOTSUPP;
+		goto out_put_req;
+	}
+
 	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
 	if (unlikely(ret)) {
 		pr_debug("EFAULT: aio_key\n");
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index beef981aa54f..974f5346458a 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -83,7 +83,7 @@ struct autofs_info {
 struct autofs_wait_queue {
 	wait_queue_head_t queue;
 	struct autofs_wait_queue *next;
-	autofs_wqt_t wait_queue_token;
+	autofs_wqt_t wait_queue_entry_token;
 	/* We use the following to see what we are waiting for */
 	struct qstr name;
 	u32 dev;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 734cbf8d9676..dd9f1bebb5a3 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -344,7 +344,7 @@ static int autofs_dev_ioctl_fail(struct file *fp,
 	int status;
 
 	token = (autofs_wqt_t) param->fail.token;
-	status = param->fail.status ? param->fail.status : -ENOENT;
+	status = param->fail.status < 0 ? param->fail.status : -ENOENT;
 	return autofs4_wait_release(sbi, token, status);
 }
 
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 24a58bf9ca72..7071895b0678 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -104,7 +104,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 	size_t pktsz;
 
 	pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n",
-		 (unsigned long) wq->wait_queue_token,
+		 (unsigned long) wq->wait_queue_entry_token,
 		 wq->name.len, wq->name.name, type);
 
 	memset(&pkt, 0, sizeof(pkt)); /* For security reasons */
@@ -120,7 +120,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*mp);
 
-		mp->wait_queue_token = wq->wait_queue_token;
+		mp->wait_queue_entry_token = wq->wait_queue_entry_token;
 		mp->len = wq->name.len;
 		memcpy(mp->name, wq->name.name, wq->name.len);
 		mp->name[wq->name.len] = '\0';
@@ -133,7 +133,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*ep);
 
-		ep->wait_queue_token = wq->wait_queue_token;
+		ep->wait_queue_entry_token = wq->wait_queue_entry_token;
 		ep->len = wq->name.len;
 		memcpy(ep->name, wq->name.name, wq->name.len);
 		ep->name[wq->name.len] = '\0';
@@ -153,7 +153,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*packet);
 
-		packet->wait_queue_token = wq->wait_queue_token;
+		packet->wait_queue_entry_token = wq->wait_queue_entry_token;
 		packet->len = wq->name.len;
 		memcpy(packet->name, wq->name.name, wq->name.len);
 		packet->name[wq->name.len] = '\0';
@@ -428,7 +428,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 			return -ENOMEM;
 		}
 
-		wq->wait_queue_token = autofs4_next_wait_queue;
+		wq->wait_queue_entry_token = autofs4_next_wait_queue;
 		if (++autofs4_next_wait_queue == 0)
 			autofs4_next_wait_queue = 1;
 		wq->next = sbi->queues;
@@ -461,7 +461,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 		}
 
 		pr_debug("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
-			 (unsigned long) wq->wait_queue_token, wq->name.len,
+			 (unsigned long) wq->wait_queue_entry_token, wq->name.len,
 			 wq->name.name, notify);
 
 		/*
@@ -471,7 +471,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 	} else {
 		wq->wait_ctr++;
 		pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n",
-			 (unsigned long) wq->wait_queue_token, wq->name.len,
+			 (unsigned long) wq->wait_queue_entry_token, wq->name.len,
 			 wq->name.name, notify);
 		mutex_unlock(&sbi->wq_mutex);
 		kfree(qstr.name);
@@ -550,13 +550,13 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 }
 
 
-int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status)
+int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_entry_token, int status)
 {
 	struct autofs_wait_queue *wq, **wql;
 
 	mutex_lock(&sbi->wq_mutex);
 	for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
-		if (wq->wait_queue_token == wait_queue_token)
+		if (wq->wait_queue_entry_token == wait_queue_entry_token)
 			break;
 	}
 
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index f2deec0a62f0..25e312cb6071 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -1,7 +1,7 @@
 /*
  *	fs/bfs/inode.c
  *	BFS superblock and inode operations.
- *	Copyright (C) 1999-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ *	Copyright (C) 1999-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
  *	From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds.
  *
  *      Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
@@ -19,7 +19,7 @@
 #include <linux/uaccess.h>
 #include "bfs.h"
 
-MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
+MODULE_AUTHOR("Tigran Aivazian <aivazian.tigran@gmail.com>");
 MODULE_DESCRIPTION("SCO UnixWare BFS filesystem for Linux");
 MODULE_LICENSE("GPL");
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2a305c1a2d88..a7df151f8aba 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -225,6 +225,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 	bio_init(&bio, vecs, nr_pages);
 	bio.bi_bdev = bdev;
 	bio.bi_iter.bi_sector = pos >> 9;
+	bio.bi_write_hint = iocb->ki_hint;
 	bio.bi_private = current;
 	bio.bi_end_io = blkdev_bio_end_io_simple;
 
@@ -262,8 +263,11 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 	if (vecs != inline_vecs)
 		kfree(vecs);
 
-	if (unlikely(bio.bi_error))
-		return bio.bi_error;
+	if (unlikely(bio.bi_status))
+		ret = blk_status_to_errno(bio.bi_status);
+
+	bio_uninit(&bio);
+
 	return ret;
 }
 
@@ -288,16 +292,18 @@ static void blkdev_bio_end_io(struct bio *bio)
 	bool should_dirty = dio->should_dirty;
 
 	if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
-		if (bio->bi_error && !dio->bio.bi_error)
-			dio->bio.bi_error = bio->bi_error;
+		if (bio->bi_status && !dio->bio.bi_status)
+			dio->bio.bi_status = bio->bi_status;
 	} else {
 		if (!dio->is_sync) {
 			struct kiocb *iocb = dio->iocb;
-			ssize_t ret = dio->bio.bi_error;
+			ssize_t ret;
 
-			if (likely(!ret)) {
+			if (likely(!dio->bio.bi_status)) {
 				ret = dio->size;
 				iocb->ki_pos += ret;
+			} else {
+				ret = blk_status_to_errno(dio->bio.bi_status);
 			}
 
 			dio->iocb->ki_complete(iocb, ret, 0);
@@ -334,7 +340,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
 	loff_t pos = iocb->ki_pos;
 	blk_qc_t qc = BLK_QC_T_NONE;
-	int ret;
+	int ret = 0;
 
 	if ((pos | iov_iter_alignment(iter)) &
 	    (bdev_logical_block_size(bdev) - 1))
@@ -358,12 +364,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	for (;;) {
 		bio->bi_bdev = bdev;
 		bio->bi_iter.bi_sector = pos >> 9;
+		bio->bi_write_hint = iocb->ki_hint;
 		bio->bi_private = dio;
 		bio->bi_end_io = blkdev_bio_end_io;
 
 		ret = bio_iov_iter_get_pages(bio, iter);
 		if (unlikely(ret)) {
-			bio->bi_error = ret;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 			break;
 		}
@@ -412,7 +419,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	}
 	__set_current_state(TASK_RUNNING);
 
-	ret = dio->bio.bi_error;
+	if (!ret)
+		ret = blk_status_to_errno(dio->bio.bi_status);
 	if (likely(!ret))
 		ret = dio->size;
 
@@ -436,7 +444,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 static __init int blkdev_init(void)
 {
-	blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio));
+	blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
 	if (!blkdev_dio_pool)
 		return -ENOMEM;
 	return 0;
@@ -717,72 +725,6 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
 }
 EXPORT_SYMBOL_GPL(bdev_write_page);
 
-int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
-		pgoff_t *pgoff)
-{
-	phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
-
-	if (pgoff)
-		*pgoff = PHYS_PFN(phys_off);
-	if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
-		return -EINVAL;
-	return 0;
-}
-EXPORT_SYMBOL(bdev_dax_pgoff);
-
-/**
- * bdev_dax_supported() - Check if the device supports dax for filesystem
- * @sb: The superblock of the device
- * @blocksize: The block size of the device
- *
- * This is a library function for filesystems to check if the block device
- * can be mounted with dax option.
- *
- * Return: negative errno if unsupported, 0 if supported.
- */
-int bdev_dax_supported(struct super_block *sb, int blocksize)
-{
-	struct block_device *bdev = sb->s_bdev;
-	struct dax_device *dax_dev;
-	pgoff_t pgoff;
-	int err, id;
-	void *kaddr;
-	pfn_t pfn;
-	long len;
-
-	if (blocksize != PAGE_SIZE) {
-		vfs_msg(sb, KERN_ERR, "error: unsupported blocksize for dax");
-		return -EINVAL;
-	}
-
-	err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
-	if (err) {
-		vfs_msg(sb, KERN_ERR, "error: unaligned partition for dax");
-		return err;
-	}
-
-	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
-	if (!dax_dev) {
-		vfs_msg(sb, KERN_ERR, "error: device does not support dax");
-		return -EOPNOTSUPP;
-	}
-
-	id = dax_read_lock();
-	len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
-	dax_read_unlock(id);
-
-	put_dax(dax_dev);
-
-	if (len < 1) {
-		vfs_msg(sb, KERN_ERR,
-				"error: dax access failed (%ld)", len);
-		return len < 0 ? len : -EIO;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bdev_dax_supported);
-
 /*
  * pseudo-fs
  */
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 7699e16784d3..24865da63d8f 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -26,6 +26,11 @@
 #include "delayed-ref.h"
 #include "locking.h"
 
+enum merge_mode {
+	MERGE_IDENTICAL_KEYS = 1,
+	MERGE_IDENTICAL_PARENTS,
+};
+
 /* Just an arbitrary number so we can be sure this happened */
 #define BACKREF_FOUND_SHARED 6
 
@@ -533,7 +538,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
 	 * slot==nritems. In that case, go to the next leaf before we continue.
 	 */
 	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
-		if (time_seq == (u64)-1)
+		if (time_seq == SEQ_LAST)
 			ret = btrfs_next_leaf(root, path);
 		else
 			ret = btrfs_next_old_leaf(root, path, time_seq);
@@ -577,7 +582,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
 			eie = NULL;
 		}
 next:
-		if (time_seq == (u64)-1)
+		if (time_seq == SEQ_LAST)
 			ret = btrfs_next_item(root, path);
 		else
 			ret = btrfs_next_old_item(root, path, time_seq);
@@ -629,7 +634,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
 
 	if (path->search_commit_root)
 		root_level = btrfs_header_level(root->commit_root);
-	else if (time_seq == (u64)-1)
+	else if (time_seq == SEQ_LAST)
 		root_level = btrfs_header_level(root->node);
 	else
 		root_level = btrfs_old_root_level(root, time_seq);
@@ -640,7 +645,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
 	}
 
 	path->lowest_level = level;
-	if (time_seq == (u64)-1)
+	if (time_seq == SEQ_LAST)
 		ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path,
 					0, 0);
 	else
@@ -809,14 +814,12 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
 /*
  * merge backrefs and adjust counts accordingly
  *
- * mode = 1: merge identical keys, if key is set
- *    FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
- *           additionally, we could even add a key range for the blocks we
- *           looked into to merge even more (-> replace unresolved refs by those
- *           having a parent).
- * mode = 2: merge identical parents
+ *    FIXME: For MERGE_IDENTICAL_KEYS, if we add more keys in __add_prelim_ref
+ *           then we can merge more here. Additionally, we could even add a key
+ *           range for the blocks we looked into to merge even more (-> replace
+ *           unresolved refs by those having a parent).
  */
-static void __merge_refs(struct list_head *head, int mode)
+static void __merge_refs(struct list_head *head, enum merge_mode mode)
 {
 	struct __prelim_ref *pos1;
 
@@ -829,7 +832,7 @@ static void __merge_refs(struct list_head *head, int mode)
 
 			if (!ref_for_same_block(ref1, ref2))
 				continue;
-			if (mode == 1) {
+			if (mode == MERGE_IDENTICAL_KEYS) {
 				if (!ref1->parent && ref2->parent)
 					swap(ref1, ref2);
 			} else {
@@ -1196,7 +1199,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
  *
  * NOTE: This can return values > 0
  *
- * If time_seq is set to (u64)-1, it will not search delayed_refs, and behave
+ * If time_seq is set to SEQ_LAST, it will not search delayed_refs, and behave
  * much like trans == NULL case, the difference only lies in it will not
  * commit root.
  * The special case is for qgroup to search roots in commit_transaction().
@@ -1243,7 +1246,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
 		path->skip_locking = 1;
 	}
 
-	if (time_seq == (u64)-1)
+	if (time_seq == SEQ_LAST)
 		path->skip_locking = 1;
 
 	/*
@@ -1273,9 +1276,9 @@ again:
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 	if (trans && likely(trans->type != __TRANS_DUMMY) &&
-	    time_seq != (u64)-1) {
+	    time_seq != SEQ_LAST) {
 #else
-	if (trans && time_seq != (u64)-1) {
+	if (trans && time_seq != SEQ_LAST) {
 #endif
 		/*
 		 * look if there are updates for this ref queued and lock the
@@ -1286,7 +1289,7 @@ again:
 		head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
 		if (head) {
 			if (!mutex_trylock(&head->mutex)) {
-				atomic_inc(&head->node.refs);
+				refcount_inc(&head->node.refs);
 				spin_unlock(&delayed_refs->lock);
 
 				btrfs_release_path(path);
@@ -1374,7 +1377,7 @@ again:
 	if (ret)
 		goto out;
 
-	__merge_refs(&prefs, 1);
+	__merge_refs(&prefs, MERGE_IDENTICAL_KEYS);
 
 	ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs,
 				      extent_item_pos, total_refs,
@@ -1382,7 +1385,7 @@ again:
 	if (ret)
 		goto out;
 
-	__merge_refs(&prefs, 2);
+	__merge_refs(&prefs, MERGE_IDENTICAL_PARENTS);
 
 	while (!list_empty(&prefs)) {
 		ref = list_first_entry(&prefs, struct __prelim_ref, list);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 0c6baaba0651..d87ac27a5f2b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -125,6 +125,13 @@ struct btrfs_inode {
 	u64 delalloc_bytes;
 
 	/*
+	 * Total number of bytes pending delalloc that fall within a file
+	 * range that is either a hole or beyond EOF (and no prealloc extent
+	 * exists in the range). This is always <= delalloc_bytes.
+	 */
+	u64 new_delalloc_bytes;
+
+	/*
 	 * total number of bytes pending defrag, used by stat to check whether
 	 * it needs COW.
 	 */
@@ -303,7 +310,8 @@ struct btrfs_dio_private {
 	 * The original bio may be split to several sub-bios, this is
 	 * done during endio of sub-bios
 	 */
-	int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int);
+	blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
+			blk_status_t);
 };
 
 /*
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index ab14c2e635ca..4ded1c3f92b8 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2129,7 +2129,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
 	/* mutex is not held! This is not save if IO is not yet completed
 	 * on umount */
 	iodone_w_error = 0;
-	if (bp->bi_error)
+	if (bp->bi_status)
 		iodone_w_error = 1;
 
 	BUG_ON(NULL == block);
@@ -2143,7 +2143,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
 		if ((dev_state->state->print_mask &
 		     BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
 			pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
-			       bp->bi_error,
+			       bp->bi_status,
 			       btrfsic_get_block_type(dev_state->state, block),
 			       block->logical_bytenr, dev_state->name,
 			       block->dev_bytenr, block->mirror_num);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c7721a6aa3bb..a2fad39f79ba 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -44,7 +44,7 @@
 
 struct compressed_bio {
 	/* number of bios pending for this compressed extent */
-	atomic_t pending_bios;
+	refcount_t pending_bios;
 
 	/* the pages with the compressed data on them */
 	struct page **compressed_pages;
@@ -155,13 +155,13 @@ static void end_compressed_bio_read(struct bio *bio)
 	unsigned long index;
 	int ret;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		cb->errors = 1;
 
 	/* if there are more bios still pending for this compressed
 	 * extent, just exit
 	 */
-	if (!atomic_dec_and_test(&cb->pending_bios))
+	if (!refcount_dec_and_test(&cb->pending_bios))
 		goto out;
 
 	inode = cb->inode;
@@ -268,13 +268,13 @@ static void end_compressed_bio_write(struct bio *bio)
 	struct page *page;
 	unsigned long index;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		cb->errors = 1;
 
 	/* if there are more bios still pending for this compressed
 	 * extent, just exit
 	 */
-	if (!atomic_dec_and_test(&cb->pending_bios))
+	if (!refcount_dec_and_test(&cb->pending_bios))
 		goto out;
 
 	/* ok, we're the last bio for this extent, step one is to
@@ -287,7 +287,7 @@ static void end_compressed_bio_write(struct bio *bio)
 					 cb->start,
 					 cb->start + cb->len - 1,
 					 NULL,
-					 bio->bi_error ? 0 : 1);
+					 bio->bi_status ? 0 : 1);
 	cb->compressed_pages[0]->mapping = NULL;
 
 	end_compressed_writeback(inode, cb);
@@ -320,7 +320,7 @@ out:
  * This also checksums the file bytes and gets things ready for
  * the end io hooks.
  */
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 				 unsigned long len, u64 disk_start,
 				 unsigned long compressed_len,
 				 struct page **compressed_pages,
@@ -335,14 +335,14 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	struct page *page;
 	u64 first_byte = disk_start;
 	struct block_device *bdev;
-	int ret;
+	blk_status_t ret;
 	int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
 	WARN_ON(start & ((u64)PAGE_SIZE - 1));
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
 	if (!cb)
-		return -ENOMEM;
-	atomic_set(&cb->pending_bios, 0);
+		return BLK_STS_RESOURCE;
+	refcount_set(&cb->pending_bios, 0);
 	cb->errors = 0;
 	cb->inode = inode;
 	cb->start = start;
@@ -358,27 +358,27 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
 	if (!bio) {
 		kfree(cb);
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 	}
 	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 	bio->bi_private = cb;
 	bio->bi_end_io = end_compressed_bio_write;
-	atomic_inc(&cb->pending_bios);
+	refcount_set(&cb->pending_bios, 1);
 
 	/* create and submit bios for the compressed pages */
 	bytes_left = compressed_len;
 	for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
+		int submit = 0;
+
 		page = compressed_pages[pg_index];
 		page->mapping = inode->i_mapping;
 		if (bio->bi_iter.bi_size)
-			ret = io_tree->ops->merge_bio_hook(page, 0,
+			submit = io_tree->ops->merge_bio_hook(page, 0,
 							   PAGE_SIZE,
 							   bio, 0);
-		else
-			ret = 0;
 
 		page->mapping = NULL;
-		if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) <
+		if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
 		    PAGE_SIZE) {
 			bio_get(bio);
 
@@ -388,7 +388,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 			 * we inc the count.  Otherwise, the cb might get
 			 * freed before we're done setting it up
 			 */
-			atomic_inc(&cb->pending_bios);
+			refcount_inc(&cb->pending_bios);
 			ret = btrfs_bio_wq_end_io(fs_info, bio,
 						  BTRFS_WQ_ENDIO_DATA);
 			BUG_ON(ret); /* -ENOMEM */
@@ -400,7 +400,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 
 			ret = btrfs_map_bio(fs_info, bio, 0, 1);
 			if (ret) {
-				bio->bi_error = ret;
+				bio->bi_status = ret;
 				bio_endio(bio);
 			}
 
@@ -434,7 +434,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 
 	ret = btrfs_map_bio(fs_info, bio, 0, 1);
 	if (ret) {
-		bio->bi_error = ret;
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 
@@ -569,7 +569,7 @@ next:
  * After the compressed pages are read, we copy the bytes into the
  * bio we were passed and then call the bio end_io calls
  */
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -586,7 +586,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	u64 em_len;
 	u64 em_start;
 	struct extent_map *em;
-	int ret = -ENOMEM;
+	blk_status_t ret = BLK_STS_RESOURCE;
 	int faili = 0;
 	u32 *sums;
 
@@ -600,14 +600,14 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				   PAGE_SIZE);
 	read_unlock(&em_tree->lock);
 	if (!em)
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	compressed_len = em->block_len;
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
 	if (!cb)
 		goto out;
 
-	atomic_set(&cb->pending_bios, 0);
+	refcount_set(&cb->pending_bios, 0);
 	cb->errors = 0;
 	cb->inode = inode;
 	cb->mirror_num = mirror_num;
@@ -638,7 +638,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 							      __GFP_HIGHMEM);
 		if (!cb->compressed_pages[pg_index]) {
 			faili = pg_index - 1;
-			ret = -ENOMEM;
+			ret = BLK_STS_RESOURCE;
 			goto fail2;
 		}
 	}
@@ -656,22 +656,22 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	bio_set_op_attrs (comp_bio, REQ_OP_READ, 0);
 	comp_bio->bi_private = cb;
 	comp_bio->bi_end_io = end_compressed_bio_read;
-	atomic_inc(&cb->pending_bios);
+	refcount_set(&cb->pending_bios, 1);
 
 	for (pg_index = 0; pg_index < nr_pages; pg_index++) {
+		int submit = 0;
+
 		page = cb->compressed_pages[pg_index];
 		page->mapping = inode->i_mapping;
 		page->index = em_start >> PAGE_SHIFT;
 
 		if (comp_bio->bi_iter.bi_size)
-			ret = tree->ops->merge_bio_hook(page, 0,
+			submit = tree->ops->merge_bio_hook(page, 0,
 							PAGE_SIZE,
 							comp_bio, 0);
-		else
-			ret = 0;
 
 		page->mapping = NULL;
-		if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
+		if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
 		    PAGE_SIZE) {
 			bio_get(comp_bio);
 
@@ -685,7 +685,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			 * we inc the count.  Otherwise, the cb might get
 			 * freed before we're done setting it up
 			 */
-			atomic_inc(&cb->pending_bios);
+			refcount_inc(&cb->pending_bios);
 
 			if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
 				ret = btrfs_lookup_bio_sums(inode, comp_bio,
@@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 			ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
 			if (ret) {
-				comp_bio->bi_error = ret;
+				comp_bio->bi_status = ret;
 				bio_endio(comp_bio);
 			}
 
@@ -726,7 +726,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 	ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
 	if (ret) {
-		comp_bio->bi_error = ret;
+		comp_bio->bi_status = ret;
 		bio_endio(comp_bio);
 	}
 
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 39ec43ab8df1..680d4265d601 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -48,12 +48,12 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
 			      unsigned long total_out, u64 disk_start,
 			      struct bio *bio);
 
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 				  unsigned long len, u64 disk_start,
 				  unsigned long compressed_len,
 				  struct page **compressed_pages,
 				  unsigned long nr_pages);
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags);
 
 enum btrfs_compression_type {
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 1c3b6c54d5ee..a3a75f1de002 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -567,7 +567,7 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
 static noinline int
 tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
 			 struct extent_buffer *eb, int dst_slot, int src_slot,
-			 int nr_items, gfp_t flags)
+			 int nr_items)
 {
 	struct tree_mod_elem *tm = NULL;
 	struct tree_mod_elem **tm_list = NULL;
@@ -578,11 +578,11 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
 	if (!tree_mod_need_log(fs_info, eb))
 		return 0;
 
-	tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), flags);
+	tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS);
 	if (!tm_list)
 		return -ENOMEM;
 
-	tm = kzalloc(sizeof(*tm), flags);
+	tm = kzalloc(sizeof(*tm), GFP_NOFS);
 	if (!tm) {
 		ret = -ENOMEM;
 		goto free_tms;
@@ -596,7 +596,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
 
 	for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
 		tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
-		    MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
+		    MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
 		if (!tm_list[i]) {
 			ret = -ENOMEM;
 			goto free_tms;
@@ -663,7 +663,7 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
 static noinline int
 tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
 			 struct extent_buffer *old_root,
-			 struct extent_buffer *new_root, gfp_t flags,
+			 struct extent_buffer *new_root,
 			 int log_removal)
 {
 	struct tree_mod_elem *tm = NULL;
@@ -678,14 +678,14 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
 	if (log_removal && btrfs_header_level(old_root) > 0) {
 		nritems = btrfs_header_nritems(old_root);
 		tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
-				  flags);
+				  GFP_NOFS);
 		if (!tm_list) {
 			ret = -ENOMEM;
 			goto free_tms;
 		}
 		for (i = 0; i < nritems; i++) {
 			tm_list[i] = alloc_tree_mod_elem(old_root, i,
-			    MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
+			    MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
 			if (!tm_list[i]) {
 				ret = -ENOMEM;
 				goto free_tms;
@@ -693,7 +693,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
 		}
 	}
 
-	tm = kzalloc(sizeof(*tm), flags);
+	tm = kzalloc(sizeof(*tm), GFP_NOFS);
 	if (!tm) {
 		ret = -ENOMEM;
 		goto free_tms;
@@ -873,7 +873,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
 {
 	int ret;
 	ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
-				       nr_items, GFP_NOFS);
+				       nr_items);
 	BUG_ON(ret < 0);
 }
 
@@ -943,7 +943,7 @@ tree_mod_log_set_root_pointer(struct btrfs_root *root,
 {
 	int ret;
 	ret = tree_mod_log_insert_root(root->fs_info, root->node,
-				       new_root_node, GFP_NOFS, log_removal);
+				       new_root_node, log_removal);
 	BUG_ON(ret < 0);
 }
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3e21211e99c3..a0d0c79d95ed 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -39,6 +39,7 @@
 #include <linux/security.h>
 #include <linux/sizes.h>
 #include <linux/dynamic_debug.h>
+#include <linux/refcount.h>
 #include "extent_io.h"
 #include "extent_map.h"
 #include "async-thread.h"
@@ -518,7 +519,7 @@ struct btrfs_caching_control {
 	struct btrfs_work work;
 	struct btrfs_block_group_cache *block_group;
 	u64 progress;
-	atomic_t count;
+	refcount_t count;
 };
 
 /* Once caching_thread() finds this much free space, it will wake up waiters. */
@@ -538,6 +539,14 @@ struct btrfs_io_ctl {
 	unsigned check_crcs:1;
 };
 
+/*
+ * Tree to record all locked full stripes of a RAID5/6 block group
+ */
+struct btrfs_full_stripe_locks_tree {
+	struct rb_root root;
+	struct mutex lock;
+};
+
 struct btrfs_block_group_cache {
 	struct btrfs_key key;
 	struct btrfs_block_group_item item;
@@ -648,6 +657,9 @@ struct btrfs_block_group_cache {
 	 * Protected by free_space_lock.
 	 */
 	int needs_free_space;
+
+	/* Record locked full stripes for RAID5/6 block group */
+	struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
 };
 
 /* delayed seq elem */
@@ -658,6 +670,8 @@ struct seq_list {
 
 #define SEQ_LIST_INIT(name)	{ .list = LIST_HEAD_INIT((name).list), .seq = 0 }
 
+#define SEQ_LAST	((u64)-1)
+
 enum btrfs_orphan_cleanup_state {
 	ORPHAN_CLEANUP_STARTED	= 1,
 	ORPHAN_CLEANUP_DONE	= 2,
@@ -702,6 +716,11 @@ struct btrfs_delayed_root;
 #define BTRFS_FS_BTREE_ERR			11
 #define BTRFS_FS_LOG1_ERR			12
 #define BTRFS_FS_LOG2_ERR			13
+/*
+ * Indicate that a whole-filesystem exclusive operation is running
+ * (device replace, resize, device add/delete, balance)
+ */
+#define BTRFS_FS_EXCL_OP			14
 
 struct btrfs_fs_info {
 	u8 fsid[BTRFS_FSID_SIZE];
@@ -1066,8 +1085,6 @@ struct btrfs_fs_info {
 	/* device replace state */
 	struct btrfs_dev_replace dev_replace;
 
-	atomic_t mutually_exclusive_operation_running;
-
 	struct percpu_counter bio_counter;
 	wait_queue_head_t replace_wait;
 
@@ -1220,7 +1237,7 @@ struct btrfs_root {
 	dev_t anon_dev;
 
 	spinlock_t root_item_lock;
-	atomic_t refs;
+	refcount_t refs;
 
 	struct mutex delalloc_mutex;
 	spinlock_t delalloc_lock;
@@ -2546,7 +2563,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes);
 static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info,
 						 unsigned num_items)
 {
-	return fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
+	return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
 }
 
 /*
@@ -2556,7 +2573,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info,
 static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info,
 						 unsigned num_items)
 {
-	return fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
+	return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
 }
 
 int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
@@ -3061,8 +3078,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
 struct btrfs_dio_private;
 int btrfs_del_csums(struct btrfs_trans_handle *trans,
 		    struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
 			      u64 logical_offset);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root,
@@ -3077,7 +3094,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct btrfs_ordered_sum *sums);
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 		       u64 file_start, int contig);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 			     struct list_head *list, int search_commit);
@@ -3646,6 +3663,12 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
 			   struct btrfs_device *dev);
 int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
 			 struct btrfs_scrub_progress *progress);
+static inline void btrfs_init_full_stripe_locks_tree(
+			struct btrfs_full_stripe_locks_tree *locks_root)
+{
+	locks_root->root = RB_ROOT;
+	mutex_init(&locks_root->lock);
+}
 
 /* dev-replace.c */
 void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info);
@@ -3670,8 +3693,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
 			      struct btrfs_key *start, struct btrfs_key *end);
 int btrfs_reada_wait(void *handle);
 void btrfs_reada_detach(void *handle);
-int btree_readahead_hook(struct btrfs_fs_info *fs_info,
-			 struct extent_buffer *eb, int err);
+int btree_readahead_hook(struct extent_buffer *eb, int err);
 
 static inline int is_fstree(u64 rootid)
 {
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 1aff676f0e5b..8ae409b5a61d 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -52,7 +52,7 @@ static inline void btrfs_init_delayed_node(
 {
 	delayed_node->root = root;
 	delayed_node->inode_id = inode_id;
-	atomic_set(&delayed_node->refs, 0);
+	refcount_set(&delayed_node->refs, 0);
 	delayed_node->ins_root = RB_ROOT;
 	delayed_node->del_root = RB_ROOT;
 	mutex_init(&delayed_node->mutex);
@@ -81,7 +81,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
 
 	node = READ_ONCE(btrfs_inode->delayed_node);
 	if (node) {
-		atomic_inc(&node->refs);
+		refcount_inc(&node->refs);
 		return node;
 	}
 
@@ -89,14 +89,14 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
 	node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
 	if (node) {
 		if (btrfs_inode->delayed_node) {
-			atomic_inc(&node->refs);	/* can be accessed */
+			refcount_inc(&node->refs);	/* can be accessed */
 			BUG_ON(btrfs_inode->delayed_node != node);
 			spin_unlock(&root->inode_lock);
 			return node;
 		}
 		btrfs_inode->delayed_node = node;
 		/* can be accessed and cached in the inode */
-		atomic_add(2, &node->refs);
+		refcount_add(2, &node->refs);
 		spin_unlock(&root->inode_lock);
 		return node;
 	}
@@ -125,7 +125,7 @@ again:
 	btrfs_init_delayed_node(node, root, ino);
 
 	/* cached in the btrfs inode and can be accessed */
-	atomic_add(2, &node->refs);
+	refcount_set(&node->refs, 2);
 
 	ret = radix_tree_preload(GFP_NOFS);
 	if (ret) {
@@ -166,7 +166,7 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
 	} else {
 		list_add_tail(&node->n_list, &root->node_list);
 		list_add_tail(&node->p_list, &root->prepare_list);
-		atomic_inc(&node->refs);	/* inserted into list */
+		refcount_inc(&node->refs);	/* inserted into list */
 		root->nodes++;
 		set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
 	}
@@ -180,7 +180,7 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
 	spin_lock(&root->lock);
 	if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
 		root->nodes--;
-		atomic_dec(&node->refs);	/* not in the list */
+		refcount_dec(&node->refs);	/* not in the list */
 		list_del_init(&node->n_list);
 		if (!list_empty(&node->p_list))
 			list_del_init(&node->p_list);
@@ -201,7 +201,7 @@ static struct btrfs_delayed_node *btrfs_first_delayed_node(
 
 	p = delayed_root->node_list.next;
 	node = list_entry(p, struct btrfs_delayed_node, n_list);
-	atomic_inc(&node->refs);
+	refcount_inc(&node->refs);
 out:
 	spin_unlock(&delayed_root->lock);
 
@@ -228,7 +228,7 @@ static struct btrfs_delayed_node *btrfs_next_delayed_node(
 		p = node->n_list.next;
 
 	next = list_entry(p, struct btrfs_delayed_node, n_list);
-	atomic_inc(&next->refs);
+	refcount_inc(&next->refs);
 out:
 	spin_unlock(&delayed_root->lock);
 
@@ -253,11 +253,11 @@ static void __btrfs_release_delayed_node(
 		btrfs_dequeue_delayed_node(delayed_root, delayed_node);
 	mutex_unlock(&delayed_node->mutex);
 
-	if (atomic_dec_and_test(&delayed_node->refs)) {
+	if (refcount_dec_and_test(&delayed_node->refs)) {
 		bool free = false;
 		struct btrfs_root *root = delayed_node->root;
 		spin_lock(&root->inode_lock);
-		if (atomic_read(&delayed_node->refs) == 0) {
+		if (refcount_read(&delayed_node->refs) == 0) {
 			radix_tree_delete(&root->delayed_nodes_tree,
 					  delayed_node->inode_id);
 			free = true;
@@ -286,7 +286,7 @@ static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
 	p = delayed_root->prepare_list.next;
 	list_del_init(p);
 	node = list_entry(p, struct btrfs_delayed_node, p_list);
-	atomic_inc(&node->refs);
+	refcount_inc(&node->refs);
 out:
 	spin_unlock(&delayed_root->lock);
 
@@ -308,7 +308,7 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
 		item->ins_or_del = 0;
 		item->bytes_reserved = 0;
 		item->delayed_node = NULL;
-		atomic_set(&item->refs, 1);
+		refcount_set(&item->refs, 1);
 	}
 	return item;
 }
@@ -483,7 +483,7 @@ static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
 {
 	if (item) {
 		__btrfs_remove_delayed_item(item);
-		if (atomic_dec_and_test(&item->refs))
+		if (refcount_dec_and_test(&item->refs))
 			kfree(item);
 	}
 }
@@ -1600,14 +1600,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
 	mutex_lock(&delayed_node->mutex);
 	item = __btrfs_first_delayed_insertion_item(delayed_node);
 	while (item) {
-		atomic_inc(&item->refs);
+		refcount_inc(&item->refs);
 		list_add_tail(&item->readdir_list, ins_list);
 		item = __btrfs_next_delayed_item(item);
 	}
 
 	item = __btrfs_first_delayed_deletion_item(delayed_node);
 	while (item) {
-		atomic_inc(&item->refs);
+		refcount_inc(&item->refs);
 		list_add_tail(&item->readdir_list, del_list);
 		item = __btrfs_next_delayed_item(item);
 	}
@@ -1621,7 +1621,7 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
 	 * insert/delete delayed items in this period. So we also needn't
 	 * requeue or dequeue this delayed node.
 	 */
-	atomic_dec(&delayed_node->refs);
+	refcount_dec(&delayed_node->refs);
 
 	return true;
 }
@@ -1634,13 +1634,13 @@ void btrfs_readdir_put_delayed_items(struct inode *inode,
 
 	list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
 		list_del(&curr->readdir_list);
-		if (atomic_dec_and_test(&curr->refs))
+		if (refcount_dec_and_test(&curr->refs))
 			kfree(curr);
 	}
 
 	list_for_each_entry_safe(curr, next, del_list, readdir_list) {
 		list_del(&curr->readdir_list);
-		if (atomic_dec_and_test(&curr->refs))
+		if (refcount_dec_and_test(&curr->refs))
 			kfree(curr);
 	}
 
@@ -1667,7 +1667,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
 		list_del(&curr->readdir_list);
 		ret = (curr->key.offset == index);
 
-		if (atomic_dec_and_test(&curr->refs))
+		if (refcount_dec_and_test(&curr->refs))
 			kfree(curr);
 
 		if (ret)
@@ -1705,7 +1705,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
 		list_del(&curr->readdir_list);
 
 		if (curr->key.offset < ctx->pos) {
-			if (atomic_dec_and_test(&curr->refs))
+			if (refcount_dec_and_test(&curr->refs))
 				kfree(curr);
 			continue;
 		}
@@ -1722,7 +1722,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
 		over = !dir_emit(ctx, name, name_len,
 			       location.objectid, d_type);
 
-		if (atomic_dec_and_test(&curr->refs))
+		if (refcount_dec_and_test(&curr->refs))
 			kfree(curr);
 
 		if (over)
@@ -1963,7 +1963,7 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
 		inode_id = delayed_nodes[n - 1]->inode_id + 1;
 
 		for (i = 0; i < n; i++)
-			atomic_inc(&delayed_nodes[i]->refs);
+			refcount_inc(&delayed_nodes[i]->refs);
 		spin_unlock(&root->inode_lock);
 
 		for (i = 0; i < n; i++) {
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 40327cc3b99a..c4189d495934 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -26,7 +26,7 @@
 #include <linux/list.h>
 #include <linux/wait.h>
 #include <linux/atomic.h>
-
+#include <linux/refcount.h>
 #include "ctree.h"
 
 /* types of the delayed item */
@@ -67,7 +67,7 @@ struct btrfs_delayed_node {
 	struct rb_root del_root;
 	struct mutex mutex;
 	struct btrfs_inode_item inode_item;
-	atomic_t refs;
+	refcount_t refs;
 	u64 index_cnt;
 	unsigned long flags;
 	int count;
@@ -80,7 +80,7 @@ struct btrfs_delayed_item {
 	struct list_head readdir_list;	/* used for readdir items */
 	u64 bytes_reserved;
 	struct btrfs_delayed_node *delayed_node;
-	atomic_t refs;
+	refcount_t refs;
 	int ins_or_del;
 	u32 data_len;
 	char data[0];
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 6eb80952efb3..be70d90dfee5 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -164,7 +164,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
 	if (mutex_trylock(&head->mutex))
 		return 0;
 
-	atomic_inc(&head->node.refs);
+	refcount_inc(&head->node.refs);
 	spin_unlock(&delayed_refs->lock);
 
 	mutex_lock(&head->mutex);
@@ -590,7 +590,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
 	delayed_refs = &trans->transaction->delayed_refs;
 
 	/* first set the basic ref node struct up */
-	atomic_set(&ref->refs, 1);
+	refcount_set(&ref->refs, 1);
 	ref->bytenr = bytenr;
 	ref->num_bytes = num_bytes;
 	ref->ref_mod = count_mod;
@@ -682,7 +682,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 	delayed_refs = &trans->transaction->delayed_refs;
 
 	/* first set the basic ref node struct up */
-	atomic_set(&ref->refs, 1);
+	refcount_set(&ref->refs, 1);
 	ref->bytenr = bytenr;
 	ref->num_bytes = num_bytes;
 	ref->ref_mod = 1;
@@ -739,7 +739,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 		seq = atomic64_read(&fs_info->tree_mod_seq);
 
 	/* first set the basic ref node struct up */
-	atomic_set(&ref->refs, 1);
+	refcount_set(&ref->refs, 1);
 	ref->bytenr = bytenr;
 	ref->num_bytes = num_bytes;
 	ref->ref_mod = 1;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 0e537f98f1a1..c0264ff01b53 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -18,6 +18,8 @@
 #ifndef __DELAYED_REF__
 #define __DELAYED_REF__
 
+#include <linux/refcount.h>
+
 /* these are the possible values of struct btrfs_delayed_ref_node->action */
 #define BTRFS_ADD_DELAYED_REF    1 /* add one backref to the tree */
 #define BTRFS_DROP_DELAYED_REF   2 /* delete one backref from the tree */
@@ -53,7 +55,7 @@ struct btrfs_delayed_ref_node {
 	u64 seq;
 
 	/* ref count on this data structure */
-	atomic_t refs;
+	refcount_t refs;
 
 	/*
 	 * how many refs is this entry adding or deleting.  For
@@ -220,8 +222,8 @@ btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
 
 static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
 {
-	WARN_ON(atomic_read(&ref->refs) == 0);
-	if (atomic_dec_and_test(&ref->refs)) {
+	WARN_ON(refcount_read(&ref->refs) == 0);
+	if (refcount_dec_and_test(&ref->refs)) {
 		WARN_ON(ref->in_tree);
 		switch (ref->type) {
 		case BTRFS_TREE_BLOCK_REF_KEY:
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index e653921f05d9..5fe1ca8abc70 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -546,8 +546,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 		mutex_unlock(&fs_info->chunk_mutex);
 		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 		mutex_unlock(&uuid_mutex);
+		btrfs_rm_dev_replace_blocked(fs_info);
 		if (tgt_device)
 			btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
+		btrfs_rm_dev_replace_unblocked(fs_info);
 		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
 
 		return scrub_ret;
@@ -665,7 +667,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
 		srcdev = dev_replace->srcdev;
-		args->status.progress_1000 = div_u64(dev_replace->cursor_left,
+		args->status.progress_1000 = div64_u64(dev_replace->cursor_left,
 			div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
 		break;
 	}
@@ -784,8 +786,7 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
 	}
 	btrfs_dev_replace_unlock(dev_replace, 1);
 
-	WARN_ON(atomic_xchg(
-		&fs_info->mutually_exclusive_operation_running, 1));
+	WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
 	task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
 	return PTR_ERR_OR_ZERO(task);
 }
@@ -814,7 +815,7 @@ static int btrfs_dev_replace_kthread(void *data)
 			(unsigned int)progress);
 	}
 	btrfs_dev_replace_continue_on_mount(fs_info);
-	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 
 	return 0;
 }
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 60a750678a82..c24d615e3d7f 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -468,7 +468,7 @@ int verify_dir_item(struct btrfs_fs_info *fs_info,
 
 	if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
 		btrfs_crit(fs_info, "invalid dir item name len: %u",
-		       (unsigned)btrfs_dir_data_len(leaf, dir_item));
+		       (unsigned)btrfs_dir_name_len(leaf, dir_item));
 		return 1;
 	}
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 061c1d1f774f..6036d15b47b8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -87,7 +87,7 @@ struct btrfs_end_io_wq {
 	bio_end_io_t *end_io;
 	void *private;
 	struct btrfs_fs_info *info;
-	int error;
+	blk_status_t status;
 	enum btrfs_wq_endio_type metadata;
 	struct list_head list;
 	struct btrfs_work work;
@@ -131,7 +131,7 @@ struct async_submit_bio {
 	 */
 	u64 bio_offset;
 	struct btrfs_work work;
-	int error;
+	blk_status_t status;
 };
 
 /*
@@ -762,7 +762,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 err:
 	if (reads_done &&
 	    test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
-		btree_readahead_hook(fs_info, eb, ret);
+		btree_readahead_hook(eb, ret);
 
 	if (ret) {
 		/*
@@ -787,7 +787,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
 	eb->read_mirror = failed_mirror;
 	atomic_dec(&eb->io_pages);
 	if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
-		btree_readahead_hook(eb->fs_info, eb, -EIO);
+		btree_readahead_hook(eb, -EIO);
 	return -EIO;	/* we fixed nothing */
 }
 
@@ -799,7 +799,7 @@ static void end_workqueue_bio(struct bio *bio)
 	btrfs_work_func_t func;
 
 	fs_info = end_io_wq->info;
-	end_io_wq->error = bio->bi_error;
+	end_io_wq->status = bio->bi_status;
 
 	if (bio_op(bio) == REQ_OP_WRITE) {
 		if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@ -836,19 +836,19 @@ static void end_workqueue_bio(struct bio *bio)
 	btrfs_queue_work(wq, &end_io_wq->work);
 }
 
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 			enum btrfs_wq_endio_type metadata)
 {
 	struct btrfs_end_io_wq *end_io_wq;
 
 	end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
 	if (!end_io_wq)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	end_io_wq->private = bio->bi_private;
 	end_io_wq->end_io = bio->bi_end_io;
 	end_io_wq->info = info;
-	end_io_wq->error = 0;
+	end_io_wq->status = 0;
 	end_io_wq->bio = bio;
 	end_io_wq->metadata = metadata;
 
@@ -868,14 +868,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
 static void run_one_async_start(struct btrfs_work *work)
 {
 	struct async_submit_bio *async;
-	int ret;
+	blk_status_t ret;
 
 	async = container_of(work, struct  async_submit_bio, work);
 	ret = async->submit_bio_start(async->inode, async->bio,
 				      async->mirror_num, async->bio_flags,
 				      async->bio_offset);
 	if (ret)
-		async->error = ret;
+		async->status = ret;
 }
 
 static void run_one_async_done(struct btrfs_work *work)
@@ -898,8 +898,8 @@ static void run_one_async_done(struct btrfs_work *work)
 		wake_up(&fs_info->async_submit_wait);
 
 	/* If an error occurred we just want to clean up the bio and move on */
-	if (async->error) {
-		async->bio->bi_error = async->error;
+	if (async->status) {
+		async->bio->bi_status = async->status;
 		bio_endio(async->bio);
 		return;
 	}
@@ -916,18 +916,17 @@ static void run_one_async_free(struct btrfs_work *work)
 	kfree(async);
 }
 
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
-			struct bio *bio, int mirror_num,
-			unsigned long bio_flags,
-			u64 bio_offset,
-			extent_submit_bio_hook_t *submit_bio_start,
-			extent_submit_bio_hook_t *submit_bio_done)
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+		struct inode *inode, struct bio *bio, int mirror_num,
+		unsigned long bio_flags, u64 bio_offset,
+		extent_submit_bio_hook_t *submit_bio_start,
+		extent_submit_bio_hook_t *submit_bio_done)
 {
 	struct async_submit_bio *async;
 
 	async = kmalloc(sizeof(*async), GFP_NOFS);
 	if (!async)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	async->inode = inode;
 	async->bio = bio;
@@ -941,7 +940,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 	async->bio_flags = bio_flags;
 	async->bio_offset = bio_offset;
 
-	async->error = 0;
+	async->status = 0;
 
 	atomic_inc(&fs_info->nr_async_submits);
 
@@ -959,7 +958,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 	return 0;
 }
 
-static int btree_csum_one_bio(struct bio *bio)
+static blk_status_t btree_csum_one_bio(struct bio *bio)
 {
 	struct bio_vec *bvec;
 	struct btrfs_root *root;
@@ -972,12 +971,12 @@ static int btree_csum_one_bio(struct bio *bio)
 			break;
 	}
 
-	return ret;
+	return errno_to_blk_status(ret);
 }
 
-static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
-				    int mirror_num, unsigned long bio_flags,
-				    u64 bio_offset)
+static blk_status_t __btree_submit_bio_start(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
 	/*
 	 * when we're called for a write, we're already in the async
@@ -986,11 +985,11 @@ static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
 	return btree_csum_one_bio(bio);
 }
 
-static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
-				 int mirror_num, unsigned long bio_flags,
-				 u64 bio_offset)
+static blk_status_t __btree_submit_bio_done(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
-	int ret;
+	blk_status_t ret;
 
 	/*
 	 * when we're called for a write, we're already in the async
@@ -998,7 +997,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
 	 */
 	ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
 	if (ret) {
-		bio->bi_error = ret;
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 	return ret;
@@ -1015,13 +1014,13 @@ static int check_async_write(unsigned long bio_flags)
 	return 1;
 }
 
-static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags,
 				 u64 bio_offset)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	int async = check_async_write(bio_flags);
-	int ret;
+	blk_status_t ret;
 
 	if (bio_op(bio) != REQ_OP_WRITE) {
 		/*
@@ -1054,7 +1053,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
 	return 0;
 
 out_w_error:
-	bio->bi_error = ret;
+	bio->bi_status = ret;
 	bio_endio(bio);
 	return ret;
 }
@@ -1340,7 +1339,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
 	atomic_set(&root->log_writers, 0);
 	atomic_set(&root->log_batch, 0);
 	atomic_set(&root->orphan_inodes, 0);
-	atomic_set(&root->refs, 1);
+	refcount_set(&root->refs, 1);
 	atomic_set(&root->will_be_snapshoted, 0);
 	atomic64_set(&root->qgroup_meta_rsv, 0);
 	root->log_transid = 0;
@@ -1820,7 +1819,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
 	end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
 	bio = end_io_wq->bio;
 
-	bio->bi_error = end_io_wq->error;
+	bio->bi_status = end_io_wq->status;
 	bio->bi_private = end_io_wq->private;
 	bio->bi_end_io = end_io_wq->end_io;
 	kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
@@ -3467,10 +3466,12 @@ static int write_dev_supers(struct btrfs_device *device,
 		 * we fua the first super.  The others we allow
 		 * to go down lazy.
 		 */
-		if (i == 0)
-			ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_FUA, bh);
-		else
+		if (i == 0) {
+			ret = btrfsic_submit_bh(REQ_OP_WRITE,
+						REQ_SYNC | REQ_FUA, bh);
+		} else {
 			ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
+		}
 		if (ret)
 			errors++;
 	}
@@ -3495,12 +3496,13 @@ static void btrfs_end_empty_barrier(struct bio *bio)
  * any device where the flush fails with eopnotsupp are flagged as not-barrier
  * capable
  */
-static int write_dev_flush(struct btrfs_device *device, int wait)
+static blk_status_t write_dev_flush(struct btrfs_device *device, int wait)
 {
+	struct request_queue *q = bdev_get_queue(device->bdev);
 	struct bio *bio;
-	int ret = 0;
+	blk_status_t ret = 0;
 
-	if (device->nobarriers)
+	if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
 		return 0;
 
 	if (wait) {
@@ -3510,8 +3512,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
 
 		wait_for_completion(&device->flush_wait);
 
-		if (bio->bi_error) {
-			ret = bio->bi_error;
+		if (bio->bi_status) {
+			ret = bio->bi_status;
 			btrfs_dev_stat_inc_and_print(device,
 				BTRFS_DEV_STAT_FLUSH_ERRS);
 		}
@@ -3530,11 +3532,11 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
 	device->flush_bio = NULL;
 	bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
 	if (!bio)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	bio->bi_end_io = btrfs_end_empty_barrier;
 	bio->bi_bdev = device->bdev;
-	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
 	init_completion(&device->flush_wait);
 	bio->bi_private = &device->flush_wait;
 	device->flush_bio = bio;
@@ -3555,7 +3557,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
 	struct btrfs_device *dev;
 	int errors_send = 0;
 	int errors_wait = 0;
-	int ret;
+	blk_status_t ret;
 
 	/* send down all the barriers */
 	head = &info->fs_devices->devices;
@@ -4321,7 +4323,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 		head = rb_entry(node, struct btrfs_delayed_ref_head,
 				href_node);
 		if (!mutex_trylock(&head->mutex)) {
-			atomic_inc(&head->node.refs);
+			refcount_inc(&head->node.refs);
 			spin_unlock(&delayed_refs->lock);
 
 			mutex_lock(&head->mutex);
@@ -4593,7 +4595,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
 		t = list_first_entry(&fs_info->trans_list,
 				     struct btrfs_transaction, list);
 		if (t->state >= TRANS_STATE_COMMIT_START) {
-			atomic_inc(&t->use_count);
+			refcount_inc(&t->use_count);
 			spin_unlock(&fs_info->trans_lock);
 			btrfs_wait_for_commit(fs_info, t->transid);
 			btrfs_put_transaction(t);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 2e0ec29bfd69..c581927555f3 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -101,14 +101,14 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
  */
 static inline struct btrfs_root *btrfs_grab_fs_root(struct btrfs_root *root)
 {
-	if (atomic_inc_not_zero(&root->refs))
+	if (refcount_inc_not_zero(&root->refs))
 		return root;
 	return NULL;
 }
 
 static inline void btrfs_put_fs_root(struct btrfs_root *root)
 {
-	if (atomic_dec_and_test(&root->refs))
+	if (refcount_dec_and_test(&root->refs))
 		kfree(root);
 }
 
@@ -118,13 +118,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
 u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
 void btrfs_csum_final(u32 crc, u8 *result);
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 			enum btrfs_wq_endio_type metadata);
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
-			struct bio *bio, int mirror_num,
-			unsigned long bio_flags, u64 bio_offset,
-			extent_submit_bio_hook_t *submit_bio_start,
-			extent_submit_bio_hook_t *submit_bio_done);
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+		struct inode *inode, struct bio *bio, int mirror_num,
+		unsigned long bio_flags, u64 bio_offset,
+		extent_submit_bio_hook_t *submit_bio_start,
+		extent_submit_bio_hook_t *submit_bio_done);
 unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
 int btrfs_write_tree_block(struct extent_buffer *buf);
 int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index be5477676cc8..33d979e9ea2a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -131,6 +131,16 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
 	if (atomic_dec_and_test(&cache->count)) {
 		WARN_ON(cache->pinned > 0);
 		WARN_ON(cache->reserved > 0);
+
+		/*
+		 * If not empty, someone is still holding mutex of
+		 * full_stripe_lock, which can only be released by caller.
+		 * And it will definitely cause use-after-free when caller
+		 * tries to release full stripe lock.
+		 *
+		 * No better way to resolve, but only to warn.
+		 */
+		WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
 		kfree(cache->free_space_ctl);
 		kfree(cache);
 	}
@@ -316,14 +326,14 @@ get_caching_control(struct btrfs_block_group_cache *cache)
 	}
 
 	ctl = cache->caching_ctl;
-	atomic_inc(&ctl->count);
+	refcount_inc(&ctl->count);
 	spin_unlock(&cache->lock);
 	return ctl;
 }
 
 static void put_caching_control(struct btrfs_caching_control *ctl)
 {
-	if (atomic_dec_and_test(&ctl->count))
+	if (refcount_dec_and_test(&ctl->count))
 		kfree(ctl);
 }
 
@@ -599,7 +609,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 	init_waitqueue_head(&caching_ctl->wait);
 	caching_ctl->block_group = cache;
 	caching_ctl->progress = cache->key.objectid;
-	atomic_set(&caching_ctl->count, 1);
+	refcount_set(&caching_ctl->count, 1);
 	btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
 			caching_thread, NULL, NULL);
 
@@ -620,7 +630,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 		struct btrfs_caching_control *ctl;
 
 		ctl = cache->caching_ctl;
-		atomic_inc(&ctl->count);
+		refcount_inc(&ctl->count);
 		prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
 		spin_unlock(&cache->lock);
 
@@ -707,7 +717,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 	}
 
 	down_write(&fs_info->commit_root_sem);
-	atomic_inc(&caching_ctl->count);
+	refcount_inc(&caching_ctl->count);
 	list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
 	up_write(&fs_info->commit_root_sem);
 
@@ -892,7 +902,7 @@ search_again:
 	head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
 	if (head) {
 		if (!mutex_trylock(&head->mutex)) {
-			atomic_inc(&head->node.refs);
+			refcount_inc(&head->node.refs);
 			spin_unlock(&delayed_refs->lock);
 
 			btrfs_release_path(path);
@@ -2980,7 +2990,7 @@ again:
 				struct btrfs_delayed_ref_node *ref;
 
 				ref = &head->node;
-				atomic_inc(&ref->refs);
+				refcount_inc(&ref->refs);
 
 				spin_unlock(&delayed_refs->lock);
 				/*
@@ -3003,7 +3013,6 @@ again:
 		goto again;
 	}
 out:
-	assert_qgroups_uptodate(trans);
 	trans->can_flush_pending_bgs = can_flush_pending_bgs;
 	return 0;
 }
@@ -3057,7 +3066,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
 	}
 
 	if (!mutex_trylock(&head->mutex)) {
-		atomic_inc(&head->node.refs);
+		refcount_inc(&head->node.refs);
 		spin_unlock(&delayed_refs->lock);
 
 		btrfs_release_path(path);
@@ -3443,7 +3452,8 @@ again:
 		/*
 		 * don't bother trying to write stuff out _if_
 		 * a) we're not cached,
-		 * b) we're with nospace_cache mount option.
+		 * b) we're with nospace_cache mount option,
+		 * c) we're with v2 space_cache (FREE_SPACE_TREE).
 		 */
 		dcs = BTRFS_DC_WRITTEN;
 		spin_unlock(&block_group->lock);
@@ -3983,6 +3993,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 				    info->space_info_kobj, "%s",
 				    alloc_name(found->flags));
 	if (ret) {
+		percpu_counter_destroy(&found->total_bytes_pinned);
 		kfree(found);
 		return ret;
 	}
@@ -4834,7 +4845,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
 	spin_unlock(&delayed_rsv->lock);
 
 commit:
-	trans = btrfs_join_transaction(fs_info->fs_root);
+	trans = btrfs_join_transaction(fs_info->extent_root);
 	if (IS_ERR(trans))
 		return -ENOSPC;
 
@@ -4852,7 +4863,7 @@ static int flush_space(struct btrfs_fs_info *fs_info,
 		       struct btrfs_space_info *space_info, u64 num_bytes,
 		       u64 orig_bytes, int state)
 {
-	struct btrfs_root *root = fs_info->fs_root;
+	struct btrfs_root *root = fs_info->extent_root;
 	struct btrfs_trans_handle *trans;
 	int nr;
 	int ret = 0;
@@ -5052,7 +5063,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
 	int flush_state = FLUSH_DELAYED_ITEMS_NR;
 
 	spin_lock(&space_info->lock);
-	to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
+	to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->extent_root,
 						      space_info);
 	if (!to_reclaim) {
 		spin_unlock(&space_info->lock);
@@ -9917,6 +9928,7 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
 	btrfs_init_free_space_ctl(cache);
 	atomic_set(&cache->trimming, 0);
 	mutex_init(&cache->free_space_lock);
+	btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
 
 	return cache;
 }
@@ -10416,7 +10428,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 				    &fs_info->caching_block_groups, list)
 				if (ctl->block_group == block_group) {
 					caching_ctl = ctl;
-					atomic_inc(&caching_ctl->count);
+					refcount_inc(&caching_ctl->count);
 					break;
 				}
 		}
@@ -10850,7 +10862,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
 		spin_lock(&fs_info->trans_lock);
 		trans = fs_info->running_transaction;
 		if (trans)
-			atomic_inc(&trans->use_count);
+			refcount_inc(&trans->use_count);
 		spin_unlock(&fs_info->trans_lock);
 
 		ret = find_free_dev_extent_start(trans, device, minlen, start,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 27fdb250b446..d1cd60140817 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -68,7 +68,7 @@ void btrfs_leak_debug_check(void)
 		pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
 		       state->start, state->end, state->state,
 		       extent_state_in_tree(state),
-		       atomic_read(&state->refs));
+		       refcount_read(&state->refs));
 		list_del(&state->leak_list);
 		kmem_cache_free(extent_state_cache, state);
 	}
@@ -174,7 +174,8 @@ int __init extent_io_init(void)
 		goto free_state_cache;
 
 	btrfs_bioset = bioset_create(BIO_POOL_SIZE,
-				     offsetof(struct btrfs_io_bio, bio));
+				     offsetof(struct btrfs_io_bio, bio),
+				     BIOSET_NEED_BVECS);
 	if (!btrfs_bioset)
 		goto free_buffer_cache;
 
@@ -238,7 +239,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
 	state->failrec = NULL;
 	RB_CLEAR_NODE(&state->rb_node);
 	btrfs_leak_debug_add(&state->leak_list, &states);
-	atomic_set(&state->refs, 1);
+	refcount_set(&state->refs, 1);
 	init_waitqueue_head(&state->wq);
 	trace_alloc_extent_state(state, mask, _RET_IP_);
 	return state;
@@ -248,7 +249,7 @@ void free_extent_state(struct extent_state *state)
 {
 	if (!state)
 		return;
-	if (atomic_dec_and_test(&state->refs)) {
+	if (refcount_dec_and_test(&state->refs)) {
 		WARN_ON(extent_state_in_tree(state));
 		btrfs_leak_debug_del(&state->leak_list);
 		trace_free_extent_state(state, _RET_IP_);
@@ -641,7 +642,7 @@ again:
 		if (cached && extent_state_in_tree(cached) &&
 		    cached->start <= start && cached->end > start) {
 			if (clear)
-				atomic_dec(&cached->refs);
+				refcount_dec(&cached->refs);
 			state = cached;
 			goto hit_next;
 		}
@@ -793,7 +794,7 @@ process_node:
 
 		if (state->state & bits) {
 			start = state->start;
-			atomic_inc(&state->refs);
+			refcount_inc(&state->refs);
 			wait_on_state(tree, state);
 			free_extent_state(state);
 			goto again;
@@ -834,7 +835,7 @@ static void cache_state_if_flags(struct extent_state *state,
 	if (cached_ptr && !(*cached_ptr)) {
 		if (!flags || (state->state & flags)) {
 			*cached_ptr = state;
-			atomic_inc(&state->refs);
+			refcount_inc(&state->refs);
 		}
 	}
 }
@@ -1538,7 +1539,7 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
 		if (!found) {
 			*start = state->start;
 			*cached_state = state;
-			atomic_inc(&state->refs);
+			refcount_inc(&state->refs);
 		}
 		found++;
 		*end = state->end;
@@ -2004,16 +2005,11 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
 	u64 map_length = 0;
 	u64 sector;
 	struct btrfs_bio *bbio = NULL;
-	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 	int ret;
 
 	ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
 	BUG_ON(!mirror_num);
 
-	/* we can't repair anything in raid56 yet */
-	if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
-		return 0;
-
 	bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
 	if (!bio)
 		return -EIO;
@@ -2026,17 +2022,35 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
 	 * read repair operation.
 	 */
 	btrfs_bio_counter_inc_blocked(fs_info);
-	ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
-			      &map_length, &bbio, mirror_num);
-	if (ret) {
-		btrfs_bio_counter_dec(fs_info);
-		bio_put(bio);
-		return -EIO;
+	if (btrfs_is_parity_mirror(fs_info, logical, length, mirror_num)) {
+		/*
+		 * Note that we don't use BTRFS_MAP_WRITE because it's supposed
+		 * to update all raid stripes, but here we just want to correct
+		 * bad stripe, thus BTRFS_MAP_READ is abused to only get the bad
+		 * stripe's dev and sector.
+		 */
+		ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
+				      &map_length, &bbio, 0);
+		if (ret) {
+			btrfs_bio_counter_dec(fs_info);
+			bio_put(bio);
+			return -EIO;
+		}
+		ASSERT(bbio->mirror_num == 1);
+	} else {
+		ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
+				      &map_length, &bbio, mirror_num);
+		if (ret) {
+			btrfs_bio_counter_dec(fs_info);
+			bio_put(bio);
+			return -EIO;
+		}
+		BUG_ON(mirror_num != bbio->mirror_num);
 	}
-	BUG_ON(mirror_num != bbio->mirror_num);
-	sector = bbio->stripes[mirror_num-1].physical >> 9;
+
+	sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
 	bio->bi_iter.bi_sector = sector;
-	dev = bbio->stripes[mirror_num-1].dev;
+	dev = bbio->stripes[bbio->mirror_num - 1].dev;
 	btrfs_put_bbio(bbio);
 	if (!dev || !dev->bdev || !dev->writeable) {
 		btrfs_bio_counter_dec(fs_info);
@@ -2386,6 +2400,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 	struct bio *bio;
 	int read_mode = 0;
+	blk_status_t status;
 	int ret;
 
 	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -2418,11 +2433,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 		"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
 		read_mode, failrec->this_mirror, failrec->in_validation);
 
-	ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
+	status = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
 					 failrec->bio_flags, 0);
-	if (ret) {
+	if (status) {
 		free_io_failure(BTRFS_I(inode), failrec);
 		bio_put(bio);
+		ret = blk_status_to_errno(status);
 	}
 
 	return ret;
@@ -2445,7 +2461,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
 	if (!uptodate) {
 		ClearPageUptodate(page);
 		SetPageError(page);
-		ret = ret < 0 ? ret : -EIO;
+		ret = err < 0 ? err : -EIO;
 		mapping_set_error(page->mapping, ret);
 	}
 }
@@ -2461,6 +2477,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
  */
 static void end_bio_extent_writepage(struct bio *bio)
 {
+	int error = blk_status_to_errno(bio->bi_status);
 	struct bio_vec *bvec;
 	u64 start;
 	u64 end;
@@ -2490,7 +2507,7 @@ static void end_bio_extent_writepage(struct bio *bio)
 		start = page_offset(page);
 		end = start + bvec->bv_offset + bvec->bv_len - 1;
 
-		end_extent_writepage(page, bio->bi_error, start, end);
+		end_extent_writepage(page, error, start, end);
 		end_page_writeback(page);
 	}
 
@@ -2523,7 +2540,7 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
 static void end_bio_extent_readpage(struct bio *bio)
 {
 	struct bio_vec *bvec;
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
 	struct extent_io_tree *tree;
 	u64 offset = 0;
@@ -2543,7 +2560,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 
 		btrfs_debug(fs_info,
 			"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
-			(u64)bio->bi_iter.bi_sector, bio->bi_error,
+			(u64)bio->bi_iter.bi_sector, bio->bi_status,
 			io_bio->mirror_num);
 		tree = &BTRFS_I(inode)->io_tree;
 
@@ -2602,7 +2619,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 				ret = bio_readpage_error(bio, offset, page,
 							 start, end, mirror);
 				if (ret == 0) {
-					uptodate = !bio->bi_error;
+					uptodate = !bio->bi_status;
 					offset += len;
 					continue;
 				}
@@ -2660,7 +2677,7 @@ readpage_ok:
 		endio_readpage_release_extent(tree, extent_start, extent_len,
 					      uptodate);
 	if (io_bio->end_io)
-		io_bio->end_io(io_bio, bio->bi_error);
+		io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
 	bio_put(bio);
 }
 
@@ -2730,7 +2747,7 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 				       unsigned long bio_flags)
 {
-	int ret = 0;
+	blk_status_t ret = 0;
 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 	struct page *page = bvec->bv_page;
 	struct extent_io_tree *tree = bio->bi_private;
@@ -2748,7 +2765,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 		btrfsic_submit_bio(bio);
 
 	bio_put(bio);
-	return ret;
+	return blk_status_to_errno(ret);
 }
 
 static int merge_bio(struct extent_io_tree *tree, struct page *page,
@@ -2813,6 +2830,7 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
 	bio_add_page(bio, page, page_size, offset);
 	bio->bi_end_io = end_io_func;
 	bio->bi_private = tree;
+	bio->bi_write_hint = page->mapping->host->i_write_hint;
 	bio_set_op_attrs(bio, op, op_flags);
 	if (wbc) {
 		wbc_init_bio(wbc, bio);
@@ -2859,7 +2877,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
 		em = *em_cached;
 		if (extent_map_in_tree(em) && start >= em->start &&
 		    start < extent_map_end(em)) {
-			atomic_inc(&em->refs);
+			refcount_inc(&em->refs);
 			return em;
 		}
 
@@ -2870,7 +2888,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
 	em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
 	if (em_cached && !IS_ERR_OR_NULL(em)) {
 		BUG_ON(*em_cached);
-		atomic_inc(&em->refs);
+		refcount_inc(&em->refs);
 		*em_cached = em;
 	}
 	return em;
@@ -3694,7 +3712,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
 		BUG_ON(!eb);
 		done = atomic_dec_and_test(&eb->io_pages);
 
-		if (bio->bi_error ||
+		if (bio->bi_status ||
 		    test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
 			ClearPageUptodate(page);
 			set_btree_ioerr(page);
@@ -4364,6 +4382,123 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
 	return NULL;
 }
 
+/*
+ * To cache previous fiemap extent
+ *
+ * Will be used for merging fiemap extent
+ */
+struct fiemap_cache {
+	u64 offset;
+	u64 phys;
+	u64 len;
+	u32 flags;
+	bool cached;
+};
+
+/*
+ * Helper to submit fiemap extent.
+ *
+ * Will try to merge current fiemap extent specified by @offset, @phys,
+ * @len and @flags with cached one.
+ * And only when we fails to merge, cached one will be submitted as
+ * fiemap extent.
+ *
+ * Return value is the same as fiemap_fill_next_extent().
+ */
+static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
+				struct fiemap_cache *cache,
+				u64 offset, u64 phys, u64 len, u32 flags)
+{
+	int ret = 0;
+
+	if (!cache->cached)
+		goto assign;
+
+	/*
+	 * Sanity check, extent_fiemap() should have ensured that new
+	 * fiemap extent won't overlap with cahced one.
+	 * Not recoverable.
+	 *
+	 * NOTE: Physical address can overlap, due to compression
+	 */
+	if (cache->offset + cache->len > offset) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	/*
+	 * Only merges fiemap extents if
+	 * 1) Their logical addresses are continuous
+	 *
+	 * 2) Their physical addresses are continuous
+	 *    So truly compressed (physical size smaller than logical size)
+	 *    extents won't get merged with each other
+	 *
+	 * 3) Share same flags except FIEMAP_EXTENT_LAST
+	 *    So regular extent won't get merged with prealloc extent
+	 */
+	if (cache->offset + cache->len  == offset &&
+	    cache->phys + cache->len == phys  &&
+	    (cache->flags & ~FIEMAP_EXTENT_LAST) ==
+			(flags & ~FIEMAP_EXTENT_LAST)) {
+		cache->len += len;
+		cache->flags |= flags;
+		goto try_submit_last;
+	}
+
+	/* Not mergeable, need to submit cached one */
+	ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
+				      cache->len, cache->flags);
+	cache->cached = false;
+	if (ret)
+		return ret;
+assign:
+	cache->cached = true;
+	cache->offset = offset;
+	cache->phys = phys;
+	cache->len = len;
+	cache->flags = flags;
+try_submit_last:
+	if (cache->flags & FIEMAP_EXTENT_LAST) {
+		ret = fiemap_fill_next_extent(fieinfo, cache->offset,
+				cache->phys, cache->len, cache->flags);
+		cache->cached = false;
+	}
+	return ret;
+}
+
+/*
+ * Sanity check for fiemap cache
+ *
+ * All fiemap cache should be submitted by emit_fiemap_extent()
+ * Iteration should be terminated either by last fiemap extent or
+ * fieinfo->fi_extents_max.
+ * So no cached fiemap should exist.
+ */
+static int check_fiemap_cache(struct btrfs_fs_info *fs_info,
+			       struct fiemap_extent_info *fieinfo,
+			       struct fiemap_cache *cache)
+{
+	int ret;
+
+	if (!cache->cached)
+		return 0;
+
+	/* Small and recoverbale problem, only to info developer */
+#ifdef CONFIG_BTRFS_DEBUG
+	WARN_ON(1);
+#endif
+	btrfs_warn(fs_info,
+		   "unhandled fiemap cache detected: offset=%llu phys=%llu len=%llu flags=0x%x",
+		   cache->offset, cache->phys, cache->len, cache->flags);
+	ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
+				      cache->len, cache->flags);
+	cache->cached = false;
+	if (ret > 0)
+		ret = 0;
+	return ret;
+}
+
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		__u64 start, __u64 len, get_extent_t *get_extent)
 {
@@ -4381,6 +4516,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 	struct extent_state *cached_state = NULL;
 	struct btrfs_path *path;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct fiemap_cache cache = { 0 };
 	int end = 0;
 	u64 em_start = 0;
 	u64 em_len = 0;
@@ -4560,8 +4696,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			flags |= FIEMAP_EXTENT_LAST;
 			end = 1;
 		}
-		ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
-					      em_len, flags);
+		ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
+					   em_len, flags);
 		if (ret) {
 			if (ret == 1)
 				ret = 0;
@@ -4569,6 +4705,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		}
 	}
 out_free:
+	if (!ret)
+		ret = check_fiemap_cache(root->fs_info, fieinfo, &cache);
 	free_extent_map(em);
 out:
 	btrfs_free_path(path);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 3e4fad4a909d..487ca0207cb6 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -2,6 +2,7 @@
 #define __EXTENTIO__
 
 #include <linux/rbtree.h>
+#include <linux/refcount.h>
 #include "ulist.h"
 
 /* bits for the extent state */
@@ -14,14 +15,17 @@
 #define EXTENT_DEFRAG		(1U << 6)
 #define EXTENT_BOUNDARY		(1U << 9)
 #define EXTENT_NODATASUM	(1U << 10)
-#define EXTENT_DO_ACCOUNTING	(1U << 11)
+#define EXTENT_CLEAR_META_RESV	(1U << 11)
 #define EXTENT_FIRST_DELALLOC	(1U << 12)
 #define EXTENT_NEED_WAIT	(1U << 13)
 #define EXTENT_DAMAGED		(1U << 14)
 #define EXTENT_NORESERVE	(1U << 15)
 #define EXTENT_QGROUP_RESERVED	(1U << 16)
 #define EXTENT_CLEAR_DATA_RESV	(1U << 17)
+#define EXTENT_DELALLOC_NEW	(1U << 18)
 #define EXTENT_IOBITS		(EXTENT_LOCKED | EXTENT_WRITEBACK)
+#define EXTENT_DO_ACCOUNTING    (EXTENT_CLEAR_META_RESV | \
+				 EXTENT_CLEAR_DATA_RESV)
 #define EXTENT_CTLBITS		(EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
 
 /*
@@ -88,9 +92,9 @@ struct btrfs_inode;
 struct btrfs_io_bio;
 struct io_failure_record;
 
-typedef	int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
-				       int mirror_num, unsigned long bio_flags,
-				       u64 bio_offset);
+typedef	blk_status_t (extent_submit_bio_hook_t)(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset);
 struct extent_io_ops {
 	/*
 	 * The following callbacks must be allways defined, the function
@@ -143,7 +147,7 @@ struct extent_state {
 
 	/* ADD NEW ELEMENTS AFTER THIS */
 	wait_queue_head_t wq;
-	atomic_t refs;
+	refcount_t refs;
 	unsigned state;
 
 	struct io_failure_record *failrec;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 26f9ac719d20..69850155870c 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -55,7 +55,7 @@ struct extent_map *alloc_extent_map(void)
 	em->flags = 0;
 	em->compress_type = BTRFS_COMPRESS_NONE;
 	em->generation = 0;
-	atomic_set(&em->refs, 1);
+	refcount_set(&em->refs, 1);
 	INIT_LIST_HEAD(&em->list);
 	return em;
 }
@@ -71,8 +71,8 @@ void free_extent_map(struct extent_map *em)
 {
 	if (!em)
 		return;
-	WARN_ON(atomic_read(&em->refs) == 0);
-	if (atomic_dec_and_test(&em->refs)) {
+	WARN_ON(refcount_read(&em->refs) == 0);
+	if (refcount_dec_and_test(&em->refs)) {
 		WARN_ON(extent_map_in_tree(em));
 		WARN_ON(!list_empty(&em->list));
 		if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
@@ -322,7 +322,7 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree,
 					struct extent_map *em,
 					int modified)
 {
-	atomic_inc(&em->refs);
+	refcount_inc(&em->refs);
 	em->mod_start = em->start;
 	em->mod_len = em->len;
 
@@ -381,7 +381,7 @@ __lookup_extent_mapping(struct extent_map_tree *tree,
 	if (strict && !(end > em->start && start < extent_map_end(em)))
 		return NULL;
 
-	atomic_inc(&em->refs);
+	refcount_inc(&em->refs);
 	return em;
 }
 
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index eb8b8fae036b..a67b2def5413 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -2,6 +2,7 @@
 #define __EXTENTMAP__
 
 #include <linux/rbtree.h>
+#include <linux/refcount.h>
 
 #define EXTENT_MAP_LAST_BYTE ((u64)-4)
 #define EXTENT_MAP_HOLE ((u64)-3)
@@ -41,7 +42,7 @@ struct extent_map {
 		 */
 		struct map_lookup *map_lookup;
 	};
-	atomic_t refs;
+	refcount_t refs;
 	unsigned int compress_type;
 	struct list_head list;
 };
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 64fcb31d7163..5b1c7090e546 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -160,7 +160,7 @@ static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
 	kfree(bio->csum_allocated);
 }
 
-static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
+static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 				   u64 logical_offset, u32 *dst, int dio)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -182,7 +182,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 
 	path = btrfs_alloc_path();
 	if (!path)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
 	if (!dst) {
@@ -191,7 +191,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 					csum_size, GFP_NOFS);
 			if (!btrfs_bio->csum_allocated) {
 				btrfs_free_path(path);
-				return -ENOMEM;
+				return BLK_STS_RESOURCE;
 			}
 			btrfs_bio->csum = btrfs_bio->csum_allocated;
 			btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
@@ -303,12 +303,12 @@ next:
 	return 0;
 }
 
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
 {
 	return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
 }
 
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
 {
 	return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1);
 }
@@ -433,7 +433,7 @@ fail:
 	return ret;
 }
 
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 		       u64 file_start, int contig)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -452,7 +452,7 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 	sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
 		       GFP_NOFS);
 	if (!sums)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	sums->len = bio->bi_iter.bi_size;
 	INIT_LIST_HEAD(&sums->list);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 520cb7230b2d..59e2dccdf75b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1404,6 +1404,47 @@ fail:
 
 }
 
+static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
+					 const u64 start,
+					 const u64 len,
+					 struct extent_state **cached_state)
+{
+	u64 search_start = start;
+	const u64 end = start + len - 1;
+
+	while (search_start < end) {
+		const u64 search_len = end - search_start + 1;
+		struct extent_map *em;
+		u64 em_len;
+		int ret = 0;
+
+		em = btrfs_get_extent(inode, NULL, 0, search_start,
+				      search_len, 0);
+		if (IS_ERR(em))
+			return PTR_ERR(em);
+
+		if (em->block_start != EXTENT_MAP_HOLE)
+			goto next;
+
+		em_len = em->len;
+		if (em->start < search_start)
+			em_len -= search_start - em->start;
+		if (em_len > search_len)
+			em_len = search_len;
+
+		ret = set_extent_bit(&inode->io_tree, search_start,
+				     search_start + em_len - 1,
+				     EXTENT_DELALLOC_NEW,
+				     NULL, cached_state, GFP_NOFS);
+next:
+		search_start = extent_map_end(em);
+		free_extent_map(em);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
 /*
  * This function locks the extent and properly waits for data=ordered extents
  * to finish before allowing the pages to be modified if need.
@@ -1432,8 +1473,11 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
 		+ round_up(pos + write_bytes - start_pos,
 			   fs_info->sectorsize) - 1;
 
-	if (start_pos < inode->vfs_inode.i_size) {
+	if (start_pos < inode->vfs_inode.i_size ||
+	    (inode->flags & BTRFS_INODE_PREALLOC)) {
 		struct btrfs_ordered_extent *ordered;
+		unsigned int clear_bits;
+
 		lock_extent_bits(&inode->io_tree, start_pos, last_pos,
 				cached_state);
 		ordered = btrfs_lookup_ordered_range(inode, start_pos,
@@ -1454,11 +1498,19 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
 		}
 		if (ordered)
 			btrfs_put_ordered_extent(ordered);
-
+		ret = btrfs_find_new_delalloc_bytes(inode, start_pos,
+						    last_pos - start_pos + 1,
+						    cached_state);
+		clear_bits = EXTENT_DIRTY | EXTENT_DELALLOC |
+			EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG;
+		if (ret)
+			clear_bits |= EXTENT_DELALLOC_NEW | EXTENT_LOCKED;
 		clear_extent_bit(&inode->io_tree, start_pos,
-				  last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
-				  EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
-				  0, 0, cached_state, GFP_NOFS);
+				 last_pos, clear_bits,
+				 (clear_bits & EXTENT_LOCKED) ? 1 : 0,
+				 0, cached_state, GFP_NOFS);
+		if (ret)
+			return ret;
 		*lockstart = start_pos;
 		*lockend = last_pos;
 		ret = 1;
@@ -1823,12 +1875,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	ssize_t num_written = 0;
 	bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
 	ssize_t err;
-	loff_t pos;
-	size_t count;
+	loff_t pos = iocb->ki_pos;
+	size_t count = iov_iter_count(from);
 	loff_t oldsize;
 	int clean_page = 0;
 
-	inode_lock(inode);
+	if ((iocb->ki_flags & IOCB_NOWAIT) &&
+			(iocb->ki_flags & IOCB_DIRECT)) {
+		/* Don't sleep on inode rwsem */
+		if (!inode_trylock(inode))
+			return -EAGAIN;
+		/*
+		 * We will allocate space in case nodatacow is not set,
+		 * so bail
+		 */
+		if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+					      BTRFS_INODE_PREALLOC)) ||
+		    check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
+			inode_unlock(inode);
+			return -EAGAIN;
+		}
+	} else
+		inode_lock(inode);
+
 	err = generic_write_checks(iocb, from);
 	if (err <= 0) {
 		inode_unlock(inode);
@@ -1862,8 +1931,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	 */
 	update_time_for_write(inode);
 
-	pos = iocb->ki_pos;
-	count = iov_iter_count(from);
 	start_pos = round_down(pos, fs_info->sectorsize);
 	oldsize = i_size_read(inode);
 	if (start_pos > oldsize) {
@@ -2342,13 +2409,8 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
 	int ret = 0;
 
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, *start, *len, 0);
-	if (IS_ERR_OR_NULL(em)) {
-		if (!em)
-			ret = -ENOMEM;
-		else
-			ret = PTR_ERR(em);
-		return ret;
-	}
+	if (IS_ERR(em))
+		return PTR_ERR(em);
 
 	/* Hole or vacuum extent(only exists in no-hole mode) */
 	if (em->block_start == EXTENT_MAP_HOLE) {
@@ -2835,11 +2897,8 @@ static long btrfs_fallocate(struct file *file, int mode,
 	while (1) {
 		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
 				      alloc_end - cur_offset, 0);
-		if (IS_ERR_OR_NULL(em)) {
-			if (!em)
-				ret = -ENOMEM;
-			else
-				ret = PTR_ERR(em);
+		if (IS_ERR(em)) {
+			ret = PTR_ERR(em);
 			break;
 		}
 		last_byte = min(extent_map_end(em), alloc_end);
@@ -2856,8 +2915,10 @@ static long btrfs_fallocate(struct file *file, int mode,
 			}
 			ret = btrfs_qgroup_reserve_data(inode, cur_offset,
 					last_byte - cur_offset);
-			if (ret < 0)
+			if (ret < 0) {
+				free_extent_map(em);
 				break;
+			}
 		} else {
 			/*
 			 * Do not need to reserve unwritten extent for this
@@ -3025,13 +3086,19 @@ out:
 	return offset;
 }
 
+static int btrfs_file_open(struct inode *inode, struct file *filp)
+{
+	filp->f_mode |= FMODE_AIO_NOWAIT;
+	return generic_file_open(inode, filp);
+}
+
 const struct file_operations btrfs_file_operations = {
 	.llseek		= btrfs_file_llseek,
 	.read_iter      = generic_file_read_iter,
 	.splice_read	= generic_file_splice_read,
 	.write_iter	= btrfs_file_write_iter,
 	.mmap		= btrfs_file_mmap,
-	.open		= generic_file_open,
+	.open		= btrfs_file_open,
 	.release	= btrfs_release_file,
 	.fsync		= btrfs_sync_file,
 	.fallocate	= btrfs_fallocate,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index da6841efac26..c5e6180cdb8c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -355,7 +355,7 @@ static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
 	io_ctl->orig = io_ctl->cur;
 	io_ctl->size = PAGE_SIZE;
 	if (clear)
-		memset(io_ctl->cur, 0, PAGE_SIZE);
+		clear_page(io_ctl->cur);
 }
 
 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
index a97fdc156a03..baacc1866861 100644
--- a/fs/btrfs/hash.c
+++ b/fs/btrfs/hash.c
@@ -38,6 +38,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
 {
 	SHASH_DESC_ON_STACK(shash, tfm);
 	u32 *ctx = (u32 *)shash_desc_ctx(shash);
+	u32 retval;
 	int err;
 
 	shash->tfm = tfm;
@@ -47,5 +48,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
 	err = crypto_shash_update(shash, address, length);
 	BUG_ON(err);
 
-	return *ctx;
+	retval = *ctx;
+	barrier_data(ctx);
+	return retval;
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5e71f1ea3391..556c93060606 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -115,6 +115,31 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
 				       u64 ram_bytes, int compress_type,
 				       int type);
 
+static void __endio_write_update_ordered(struct inode *inode,
+					 const u64 offset, const u64 bytes,
+					 const bool uptodate);
+
+/*
+ * Cleanup all submitted ordered extents in specified range to handle errors
+ * from the fill_dellaloc() callback.
+ *
+ * NOTE: caller must ensure that when an error happens, it can not call
+ * extent_clear_unlock_delalloc() to clear both the bits EXTENT_DO_ACCOUNTING
+ * and EXTENT_DELALLOC simultaneously, because that causes the reserved metadata
+ * to be released, which we want to happen only when finishing the ordered
+ * extent (btrfs_finish_ordered_io()). Also note that the caller of the
+ * fill_delalloc() callback already does proper cleanup for the first page of
+ * the range, that is, it invokes the callback writepage_end_io_hook() for the
+ * range of the first page.
+ */
+static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
+						 const u64 offset,
+						 const u64 bytes)
+{
+	return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
+					    bytes - PAGE_SIZE, false);
+}
+
 static int btrfs_dirty_inode(struct inode *inode);
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
@@ -547,7 +572,7 @@ cont:
 		}
 		if (ret <= 0) {
 			unsigned long clear_flags = EXTENT_DELALLOC |
-				EXTENT_DEFRAG;
+				EXTENT_DELALLOC_NEW | EXTENT_DEFRAG;
 			unsigned long page_error_op;
 
 			clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
@@ -565,8 +590,10 @@ cont:
 						     PAGE_SET_WRITEBACK |
 						     page_error_op |
 						     PAGE_END_WRITEBACK);
-			btrfs_free_reserved_data_space_noquota(inode, start,
-						end - start + 1);
+			if (ret == 0)
+				btrfs_free_reserved_data_space_noquota(inode,
+							       start,
+							       end - start + 1);
 			goto free_pages_out;
 		}
 	}
@@ -815,13 +842,12 @@ retry:
 				NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
 				PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
 				PAGE_SET_WRITEBACK);
-		ret = btrfs_submit_compressed_write(inode,
+		if (btrfs_submit_compressed_write(inode,
 				    async_extent->start,
 				    async_extent->ram_size,
 				    ins.objectid,
 				    ins.offset, async_extent->pages,
-				    async_extent->nr_pages);
-		if (ret) {
+				    async_extent->nr_pages)) {
 			struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 			struct page *p = async_extent->pages[0];
 			const u64 start = async_extent->start;
@@ -852,6 +878,7 @@ out_free:
 				     async_extent->start +
 				     async_extent->ram_size - 1,
 				     NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
+				     EXTENT_DELALLOC_NEW |
 				     EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
 				     PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
 				     PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
@@ -918,10 +945,13 @@ static noinline int cow_file_range(struct inode *inode,
 	u64 num_bytes;
 	unsigned long ram_size;
 	u64 disk_num_bytes;
-	u64 cur_alloc_size;
+	u64 cur_alloc_size = 0;
 	u64 blocksize = fs_info->sectorsize;
 	struct btrfs_key ins;
 	struct extent_map *em;
+	unsigned clear_bits;
+	unsigned long page_ops;
+	bool extent_reserved = false;
 	int ret = 0;
 
 	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
@@ -944,6 +974,7 @@ static noinline int cow_file_range(struct inode *inode,
 			extent_clear_unlock_delalloc(inode, start, end,
 				     delalloc_end, NULL,
 				     EXTENT_LOCKED | EXTENT_DELALLOC |
+				     EXTENT_DELALLOC_NEW |
 				     EXTENT_DEFRAG, PAGE_UNLOCK |
 				     PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
 				     PAGE_END_WRITEBACK);
@@ -966,14 +997,14 @@ static noinline int cow_file_range(struct inode *inode,
 			start + num_bytes - 1, 0);
 
 	while (disk_num_bytes > 0) {
-		unsigned long op;
-
 		cur_alloc_size = disk_num_bytes;
 		ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
 					   fs_info->sectorsize, 0, alloc_hint,
 					   &ins, 1, 1);
 		if (ret < 0)
 			goto out_unlock;
+		cur_alloc_size = ins.offset;
+		extent_reserved = true;
 
 		ram_size = ins.offset;
 		em = create_io_em(inode, start, ins.offset, /* len */
@@ -988,7 +1019,6 @@ static noinline int cow_file_range(struct inode *inode,
 			goto out_reserve;
 		free_extent_map(em);
 
-		cur_alloc_size = ins.offset;
 		ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
 					       ram_size, cur_alloc_size, 0);
 		if (ret)
@@ -998,15 +1028,24 @@ static noinline int cow_file_range(struct inode *inode,
 		    BTRFS_DATA_RELOC_TREE_OBJECTID) {
 			ret = btrfs_reloc_clone_csums(inode, start,
 						      cur_alloc_size);
+			/*
+			 * Only drop cache here, and process as normal.
+			 *
+			 * We must not allow extent_clear_unlock_delalloc()
+			 * at out_unlock label to free meta of this ordered
+			 * extent, as its meta should be freed by
+			 * btrfs_finish_ordered_io().
+			 *
+			 * So we must continue until @start is increased to
+			 * skip current ordered extent.
+			 */
 			if (ret)
-				goto out_drop_extent_cache;
+				btrfs_drop_extent_cache(BTRFS_I(inode), start,
+						start + ram_size - 1, 0);
 		}
 
 		btrfs_dec_block_group_reservations(fs_info, ins.objectid);
 
-		if (disk_num_bytes < cur_alloc_size)
-			break;
-
 		/* we're not doing compressed IO, don't unlock the first
 		 * page (which the caller expects to stay locked), don't
 		 * clear any dirty bits and don't set any writeback bits
@@ -1014,18 +1053,30 @@ static noinline int cow_file_range(struct inode *inode,
 		 * Do set the Private2 bit so we know this page was properly
 		 * setup for writepage
 		 */
-		op = unlock ? PAGE_UNLOCK : 0;
-		op |= PAGE_SET_PRIVATE2;
+		page_ops = unlock ? PAGE_UNLOCK : 0;
+		page_ops |= PAGE_SET_PRIVATE2;
 
 		extent_clear_unlock_delalloc(inode, start,
 					     start + ram_size - 1,
 					     delalloc_end, locked_page,
 					     EXTENT_LOCKED | EXTENT_DELALLOC,
-					     op);
-		disk_num_bytes -= cur_alloc_size;
+					     page_ops);
+		if (disk_num_bytes < cur_alloc_size)
+			disk_num_bytes = 0;
+		else
+			disk_num_bytes -= cur_alloc_size;
 		num_bytes -= cur_alloc_size;
 		alloc_hint = ins.objectid + ins.offset;
 		start += cur_alloc_size;
+		extent_reserved = false;
+
+		/*
+		 * btrfs_reloc_clone_csums() error, since start is increased
+		 * extent_clear_unlock_delalloc() at out_unlock label won't
+		 * free metadata of current ordered extent, we're OK to exit.
+		 */
+		if (ret)
+			goto out_unlock;
 	}
 out:
 	return ret;
@@ -1036,12 +1087,35 @@ out_reserve:
 	btrfs_dec_block_group_reservations(fs_info, ins.objectid);
 	btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
 out_unlock:
+	clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+		EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
+	page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
+		PAGE_END_WRITEBACK;
+	/*
+	 * If we reserved an extent for our delalloc range (or a subrange) and
+	 * failed to create the respective ordered extent, then it means that
+	 * when we reserved the extent we decremented the extent's size from
+	 * the data space_info's bytes_may_use counter and incremented the
+	 * space_info's bytes_reserved counter by the same amount. We must make
+	 * sure extent_clear_unlock_delalloc() does not try to decrement again
+	 * the data space_info's bytes_may_use counter, therefore we do not pass
+	 * it the flag EXTENT_CLEAR_DATA_RESV.
+	 */
+	if (extent_reserved) {
+		extent_clear_unlock_delalloc(inode, start,
+					     start + cur_alloc_size,
+					     start + cur_alloc_size,
+					     locked_page,
+					     clear_bits,
+					     page_ops);
+		start += cur_alloc_size;
+		if (start >= end)
+			goto out;
+	}
 	extent_clear_unlock_delalloc(inode, start, end, delalloc_end,
 				     locked_page,
-				     EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
-				     EXTENT_DELALLOC | EXTENT_DEFRAG,
-				     PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
-				     PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
+				     clear_bits | EXTENT_CLEAR_DATA_RESV,
+				     page_ops);
 	goto out;
 }
 
@@ -1414,15 +1488,14 @@ out_check:
 		BUG_ON(ret); /* -ENOMEM */
 
 		if (root->root_key.objectid ==
-		    BTRFS_DATA_RELOC_TREE_OBJECTID) {
+		    BTRFS_DATA_RELOC_TREE_OBJECTID)
+			/*
+			 * Error handled later, as we must prevent
+			 * extent_clear_unlock_delalloc() in error handler
+			 * from freeing metadata of created ordered extent.
+			 */
 			ret = btrfs_reloc_clone_csums(inode, cur_offset,
 						      num_bytes);
-			if (ret) {
-				if (!nolock && nocow)
-					btrfs_end_write_no_snapshoting(root);
-				goto error;
-			}
-		}
 
 		extent_clear_unlock_delalloc(inode, cur_offset,
 					     cur_offset + num_bytes - 1, end,
@@ -1434,6 +1507,14 @@ out_check:
 		if (!nolock && nocow)
 			btrfs_end_write_no_snapshoting(root);
 		cur_offset = extent_end;
+
+		/*
+		 * btrfs_reloc_clone_csums() error, now we're OK to call error
+		 * handler, as metadata for created ordered extent will only
+		 * be freed by btrfs_finish_ordered_io().
+		 */
+		if (ret)
+			goto error;
 		if (cur_offset > end)
 			break;
 	}
@@ -1509,6 +1590,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
 		ret = cow_file_range_async(inode, locked_page, start, end,
 					   page_started, nr_written);
 	}
+	if (ret)
+		btrfs_cleanup_ordered_extents(inode, start, end - start + 1);
 	return ret;
 }
 
@@ -1693,6 +1776,14 @@ static void btrfs_set_bit_hook(struct inode *inode,
 			btrfs_add_delalloc_inodes(root, inode);
 		spin_unlock(&BTRFS_I(inode)->lock);
 	}
+
+	if (!(state->state & EXTENT_DELALLOC_NEW) &&
+	    (*bits & EXTENT_DELALLOC_NEW)) {
+		spin_lock(&BTRFS_I(inode)->lock);
+		BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 -
+			state->start;
+		spin_unlock(&BTRFS_I(inode)->lock);
+	}
 }
 
 /*
@@ -1722,7 +1813,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
 
 		if (*bits & EXTENT_FIRST_DELALLOC) {
 			*bits &= ~EXTENT_FIRST_DELALLOC;
-		} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
+		} else if (!(*bits & EXTENT_CLEAR_META_RESV)) {
 			spin_lock(&inode->lock);
 			inode->outstanding_extents -= num_extents;
 			spin_unlock(&inode->lock);
@@ -1733,7 +1824,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
 		 * don't need to call dellalloc_release_metadata if there is an
 		 * error.
 		 */
-		if (*bits & EXTENT_DO_ACCOUNTING &&
+		if (*bits & EXTENT_CLEAR_META_RESV &&
 		    root != fs_info->tree_root)
 			btrfs_delalloc_release_metadata(inode, len);
 
@@ -1741,10 +1832,9 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
 		if (btrfs_is_testing(fs_info))
 			return;
 
-		if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
-		    && do_list && !(state->state & EXTENT_NORESERVE)
-		    && (*bits & (EXTENT_DO_ACCOUNTING |
-		    EXTENT_CLEAR_DATA_RESV)))
+		if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
+		    do_list && !(state->state & EXTENT_NORESERVE) &&
+		    (*bits & EXTENT_CLEAR_DATA_RESV))
 			btrfs_free_reserved_data_space_noquota(
 					&inode->vfs_inode,
 					state->start, len);
@@ -1759,6 +1849,14 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
 			btrfs_del_delalloc_inode(root, inode);
 		spin_unlock(&inode->lock);
 	}
+
+	if ((state->state & EXTENT_DELALLOC_NEW) &&
+	    (*bits & EXTENT_DELALLOC_NEW)) {
+		spin_lock(&inode->lock);
+		ASSERT(inode->new_delalloc_bytes >= len);
+		inode->new_delalloc_bytes -= len;
+		spin_unlock(&inode->lock);
+	}
 }
 
 /*
@@ -1802,11 +1900,11 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
-				    int mirror_num, unsigned long bio_flags,
-				    u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_start(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
-	int ret = 0;
+	blk_status_t ret = 0;
 
 	ret = btrfs_csum_one_bio(inode, bio, 0, 0);
 	BUG_ON(ret); /* -ENOMEM */
@@ -1821,16 +1919,16 @@ static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
-			  int mirror_num, unsigned long bio_flags,
-			  u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_done(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	int ret;
+	blk_status_t ret;
 
 	ret = btrfs_map_bio(fs_info, bio, mirror_num, 1);
 	if (ret) {
-		bio->bi_error = ret;
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 	return ret;
@@ -1840,14 +1938,14 @@ static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
  * extent_io.c submission hook. This does the right thing for csum calculation
  * on write, or reading the csums from the tree before a read
  */
-static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
 			  int mirror_num, unsigned long bio_flags,
 			  u64 bio_offset)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
-	int ret = 0;
+	blk_status_t ret = 0;
 	int skip_sum;
 	int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
 
@@ -1892,8 +1990,8 @@ mapit:
 	ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
 
 out:
-	if (ret < 0) {
-		bio->bi_error = ret;
+	if (ret) {
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 	return ret;
@@ -2791,6 +2889,13 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	u64 logical_len = ordered_extent->len;
 	bool nolock;
 	bool truncated = false;
+	bool range_locked = false;
+	bool clear_new_delalloc_bytes = false;
+
+	if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
+	    !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
+	    !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
+		clear_new_delalloc_bytes = true;
 
 	nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
 
@@ -2839,13 +2944,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 		goto out;
 	}
 
+	range_locked = true;
 	lock_extent_bits(io_tree, ordered_extent->file_offset,
 			 ordered_extent->file_offset + ordered_extent->len - 1,
 			 &cached_state);
 
 	ret = test_range_bit(io_tree, ordered_extent->file_offset,
 			ordered_extent->file_offset + ordered_extent->len - 1,
-			EXTENT_DEFRAG, 1, cached_state);
+			EXTENT_DEFRAG, 0, cached_state);
 	if (ret) {
 		u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
 		if (0 && last_snapshot >= BTRFS_I(inode)->generation)
@@ -2864,7 +2970,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
 		trans = NULL;
-		goto out_unlock;
+		goto out;
 	}
 
 	trans->block_rsv = &fs_info->delalloc_block_rsv;
@@ -2896,7 +3002,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 			   trans->transid);
 	if (ret < 0) {
 		btrfs_abort_transaction(trans, ret);
-		goto out_unlock;
+		goto out;
 	}
 
 	add_pending_csums(trans, inode, &ordered_extent->list);
@@ -2905,14 +3011,26 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	ret = btrfs_update_inode_fallback(trans, root, inode);
 	if (ret) { /* -ENOMEM or corruption */
 		btrfs_abort_transaction(trans, ret);
-		goto out_unlock;
+		goto out;
 	}
 	ret = 0;
-out_unlock:
-	unlock_extent_cached(io_tree, ordered_extent->file_offset,
-			     ordered_extent->file_offset +
-			     ordered_extent->len - 1, &cached_state, GFP_NOFS);
 out:
+	if (range_locked || clear_new_delalloc_bytes) {
+		unsigned int clear_bits = 0;
+
+		if (range_locked)
+			clear_bits |= EXTENT_LOCKED;
+		if (clear_new_delalloc_bytes)
+			clear_bits |= EXTENT_DELALLOC_NEW;
+		clear_extent_bit(&BTRFS_I(inode)->io_tree,
+				 ordered_extent->file_offset,
+				 ordered_extent->file_offset +
+				 ordered_extent->len - 1,
+				 clear_bits,
+				 (clear_bits & EXTENT_LOCKED) ? 1 : 0,
+				 0, &cached_state, GFP_NOFS);
+	}
+
 	if (root != fs_info->tree_root)
 		btrfs_delalloc_release_metadata(BTRFS_I(inode),
 				ordered_extent->len);
@@ -4401,9 +4519,17 @@ search_again:
 			if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
 				item_end +=
 				    btrfs_file_extent_num_bytes(leaf, fi);
+
+				trace_btrfs_truncate_show_fi_regular(
+					BTRFS_I(inode), leaf, fi,
+					found_key.offset);
 			} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
 				item_end += btrfs_file_extent_inline_len(leaf,
 							 path->slots[0], fi);
+
+				trace_btrfs_truncate_show_fi_inline(
+					BTRFS_I(inode), leaf, fi, path->slots[0],
+					found_key.offset);
 			}
 			item_end--;
 		}
@@ -4603,13 +4729,6 @@ error:
 
 	btrfs_free_path(path);
 
-	if (err == 0) {
-		/* only inline file may have last_size != new_size */
-		if (new_size >= fs_info->sectorsize ||
-		    new_size > fs_info->max_inline)
-			ASSERT(last_size == new_size);
-	}
-
 	if (be_nice && bytes_deleted > SZ_32M) {
 		unsigned long updates = trans->delayed_ref_updates;
 		if (updates) {
@@ -6735,7 +6854,6 @@ static noinline int uncompress_inline(struct btrfs_path *path,
  *
  * This also copies inline extents directly into the page.
  */
-
 struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
 		struct page *page,
 	    size_t pg_offset, u64 start, u64 len,
@@ -6835,11 +6953,18 @@ again:
 	    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
 		extent_end = extent_start +
 		       btrfs_file_extent_num_bytes(leaf, item);
+
+		trace_btrfs_get_extent_show_fi_regular(inode, leaf, item,
+						       extent_start);
 	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
 		size_t size;
 		size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
 		extent_end = ALIGN(extent_start + size,
 				   fs_info->sectorsize);
+
+		trace_btrfs_get_extent_show_fi_inline(inode, leaf, item,
+						      path->slots[0],
+						      extent_start);
 	}
 next:
 	if (start >= extent_end) {
@@ -7037,19 +7162,17 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
 	em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
 	if (IS_ERR(em))
 		return em;
-	if (em) {
-		/*
-		 * if our em maps to
-		 * -  a hole or
-		 * -  a pre-alloc extent,
-		 * there might actually be delalloc bytes behind it.
-		 */
-		if (em->block_start != EXTENT_MAP_HOLE &&
-		    !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
-			return em;
-		else
-			hole_em = em;
-	}
+	/*
+	 * If our em maps to:
+	 * - a hole or
+	 * - a pre-alloc extent,
+	 * there might actually be delalloc bytes behind it.
+	 */
+	if (em->block_start != EXTENT_MAP_HOLE &&
+	    !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+		return em;
+	else
+		hole_em = em;
 
 	/* check to see if we've wrapped (len == -1 or similar) */
 	end = start + len;
@@ -7359,8 +7482,8 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
 	int found = false;
 	void **pagep = NULL;
 	struct page *page = NULL;
-	int start_idx;
-	int end_idx;
+	unsigned long start_idx;
+	unsigned long end_idx;
 
 	start_idx = start >> PAGE_SHIFT;
 
@@ -7913,7 +8036,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
 	struct bio_vec *bvec;
 	int i;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		goto end;
 
 	ASSERT(bio->bi_vcnt == 1);
@@ -7992,7 +8115,7 @@ static void btrfs_retry_endio(struct bio *bio)
 	int ret;
 	int i;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		goto end;
 
 	uptodate = 1;
@@ -8017,8 +8140,8 @@ end:
 	bio_put(bio);
 }
 
-static int __btrfs_subio_endio_read(struct inode *inode,
-				    struct btrfs_io_bio *io_bio, int err)
+static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
+		struct btrfs_io_bio *io_bio, blk_status_t err)
 {
 	struct btrfs_fs_info *fs_info;
 	struct bio_vec *bvec;
@@ -8060,7 +8183,7 @@ try_again:
 				io_bio->mirror_num,
 				btrfs_retry_endio, &done);
 		if (ret) {
-			err = ret;
+			err = errno_to_blk_status(ret);
 			goto next;
 		}
 
@@ -8087,8 +8210,8 @@ next:
 	return err;
 }
 
-static int btrfs_subio_endio_read(struct inode *inode,
-				  struct btrfs_io_bio *io_bio, int err)
+static blk_status_t btrfs_subio_endio_read(struct inode *inode,
+		struct btrfs_io_bio *io_bio, blk_status_t err)
 {
 	bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
@@ -8108,7 +8231,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
 	struct inode *inode = dip->inode;
 	struct bio *dio_bio;
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
-	int err = bio->bi_error;
+	blk_status_t err = bio->bi_status;
 
 	if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
 		err = btrfs_subio_endio_read(inode, io_bio, err);
@@ -8119,25 +8242,34 @@ static void btrfs_endio_direct_read(struct bio *bio)
 
 	kfree(dip);
 
-	dio_bio->bi_error = bio->bi_error;
-	dio_end_io(dio_bio, bio->bi_error);
+	dio_bio->bi_status = bio->bi_status;
+	dio_end_io(dio_bio);
 
 	if (io_bio->end_io)
-		io_bio->end_io(io_bio, err);
+		io_bio->end_io(io_bio, blk_status_to_errno(err));
 	bio_put(bio);
 }
 
-static void btrfs_endio_direct_write_update_ordered(struct inode *inode,
-						    const u64 offset,
-						    const u64 bytes,
-						    const int uptodate)
+static void __endio_write_update_ordered(struct inode *inode,
+					 const u64 offset, const u64 bytes,
+					 const bool uptodate)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_ordered_extent *ordered = NULL;
+	struct btrfs_workqueue *wq;
+	btrfs_work_func_t func;
 	u64 ordered_offset = offset;
 	u64 ordered_bytes = bytes;
 	int ret;
 
+	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+		wq = fs_info->endio_freespace_worker;
+		func = btrfs_freespace_write_helper;
+	} else {
+		wq = fs_info->endio_write_workers;
+		func = btrfs_endio_write_helper;
+	}
+
 again:
 	ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
 						   &ordered_offset,
@@ -8146,9 +8278,8 @@ again:
 	if (!ret)
 		goto out_test;
 
-	btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
-			finish_ordered_fn, NULL, NULL);
-	btrfs_queue_work(fs_info->endio_write_workers, &ordered->work);
+	btrfs_init_work(&ordered->work, func, finish_ordered_fn, NULL, NULL);
+	btrfs_queue_work(wq, &ordered->work);
 out_test:
 	/*
 	 * our bio might span multiple ordered extents.  If we haven't
@@ -8166,23 +8297,21 @@ static void btrfs_endio_direct_write(struct bio *bio)
 	struct btrfs_dio_private *dip = bio->bi_private;
 	struct bio *dio_bio = dip->dio_bio;
 
-	btrfs_endio_direct_write_update_ordered(dip->inode,
-						dip->logical_offset,
-						dip->bytes,
-						!bio->bi_error);
+	__endio_write_update_ordered(dip->inode, dip->logical_offset,
+				     dip->bytes, !bio->bi_status);
 
 	kfree(dip);
 
-	dio_bio->bi_error = bio->bi_error;
-	dio_end_io(dio_bio, bio->bi_error);
+	dio_bio->bi_status = bio->bi_status;
+	dio_end_io(dio_bio);
 	bio_put(bio);
 }
 
-static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
+static blk_status_t __btrfs_submit_bio_start_direct_io(struct inode *inode,
 				    struct bio *bio, int mirror_num,
 				    unsigned long bio_flags, u64 offset)
 {
-	int ret;
+	blk_status_t ret;
 	ret = btrfs_csum_one_bio(inode, bio, offset, 1);
 	BUG_ON(ret); /* -ENOMEM */
 	return 0;
@@ -8191,7 +8320,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
 static void btrfs_end_dio_bio(struct bio *bio)
 {
 	struct btrfs_dio_private *dip = bio->bi_private;
-	int err = bio->bi_error;
+	blk_status_t err = bio->bi_status;
 
 	if (err)
 		btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
@@ -8221,7 +8350,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
 	if (dip->errors) {
 		bio_io_error(dip->orig_bio);
 	} else {
-		dip->dio_bio->bi_error = 0;
+		dip->dio_bio->bi_status = 0;
 		bio_endio(dip->orig_bio);
 	}
 out:
@@ -8238,14 +8367,14 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
 	return bio;
 }
 
-static inline int btrfs_lookup_and_bind_dio_csum(struct inode *inode,
+static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
 						 struct btrfs_dio_private *dip,
 						 struct bio *bio,
 						 u64 file_offset)
 {
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
 	struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
-	int ret;
+	blk_status_t ret;
 
 	/*
 	 * We load all the csum data we need when we submit
@@ -8276,7 +8405,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_dio_private *dip = bio->bi_private;
 	bool write = bio_op(bio) == REQ_OP_WRITE;
-	int ret;
+	blk_status_t ret;
 
 	if (async_submit)
 		async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
@@ -8519,7 +8648,7 @@ free_ordered:
 	 * callbacks - they require an allocated dip and a clone of dio_bio.
 	 */
 	if (io_bio && dip) {
-		io_bio->bi_error = -EIO;
+		io_bio->bi_status = BLK_STS_IOERR;
 		bio_endio(io_bio);
 		/*
 		 * The end io callbacks free our dip, do the final put on io_bio
@@ -8530,20 +8659,20 @@ free_ordered:
 		io_bio = NULL;
 	} else {
 		if (write)
-			btrfs_endio_direct_write_update_ordered(inode,
+			__endio_write_update_ordered(inode,
 						file_offset,
 						dio_bio->bi_iter.bi_size,
-						0);
+						false);
 		else
 			unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
 			      file_offset + dio_bio->bi_iter.bi_size - 1);
 
-		dio_bio->bi_error = -EIO;
+		dio_bio->bi_status = BLK_STS_IOERR;
 		/*
 		 * Releases and cleans up our dio_bio, no need to bio_put()
 		 * nor bio_endio()/bio_io_error() against dio_bio.
 		 */
-		dio_end_io(dio_bio, ret);
+		dio_end_io(dio_bio);
 	}
 	if (io_bio)
 		bio_put(io_bio);
@@ -8625,6 +8754,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 			dio_data.overwrite = 1;
 			inode_unlock(inode);
 			relock = true;
+		} else if (iocb->ki_flags & IOCB_NOWAIT) {
+			ret = -EAGAIN;
+			goto out;
 		}
 		ret = btrfs_delalloc_reserve_space(inode, offset, count);
 		if (ret)
@@ -8668,11 +8800,11 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 			 */
 			if (dio_data.unsubmitted_oe_range_start <
 			    dio_data.unsubmitted_oe_range_end)
-				btrfs_endio_direct_write_update_ordered(inode,
+				__endio_write_update_ordered(inode,
 					dio_data.unsubmitted_oe_range_start,
 					dio_data.unsubmitted_oe_range_end -
 					dio_data.unsubmitted_oe_range_start,
-					0);
+					false);
 		} else if (ret >= 0 && (size_t)ret < count)
 			btrfs_delalloc_release_space(inode, offset,
 						     count - (size_t)ret);
@@ -8819,6 +8951,7 @@ again:
 		if (!inode_evicting)
 			clear_extent_bit(tree, start, end,
 					 EXTENT_DIRTY | EXTENT_DELALLOC |
+					 EXTENT_DELALLOC_NEW |
 					 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
 					 EXTENT_DEFRAG, 1, 0, &cached_state,
 					 GFP_NOFS);
@@ -8876,8 +9009,8 @@ again:
 	if (!inode_evicting) {
 		clear_extent_bit(tree, page_start, page_end,
 				 EXTENT_LOCKED | EXTENT_DIRTY |
-				 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
-				 EXTENT_DEFRAG, 1, 1,
+				 EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+				 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
 				 &cached_state, GFP_NOFS);
 
 		__btrfs_releasepage(page, GFP_NOFS);
@@ -9248,6 +9381,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 	ei->last_sub_trans = 0;
 	ei->logged_trans = 0;
 	ei->delalloc_bytes = 0;
+	ei->new_delalloc_bytes = 0;
 	ei->defrag_bytes = 0;
 	ei->disk_i_size = 0;
 	ei->flags = 0;
@@ -9313,6 +9447,7 @@ void btrfs_destroy_inode(struct inode *inode)
 	WARN_ON(BTRFS_I(inode)->outstanding_extents);
 	WARN_ON(BTRFS_I(inode)->reserved_extents);
 	WARN_ON(BTRFS_I(inode)->delalloc_bytes);
+	WARN_ON(BTRFS_I(inode)->new_delalloc_bytes);
 	WARN_ON(BTRFS_I(inode)->csum_bytes);
 	WARN_ON(BTRFS_I(inode)->defrag_bytes);
 
@@ -9436,7 +9571,7 @@ static int btrfs_getattr(const struct path *path, struct kstat *stat,
 	stat->dev = BTRFS_I(inode)->root->anon_dev;
 
 	spin_lock(&BTRFS_I(inode)->lock);
-	delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
+	delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
 	spin_unlock(&BTRFS_I(inode)->lock);
 	stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
 			ALIGN(delalloc_bytes, blocksize)) >> 9;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 922a66fce401..e176375f374f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1504,7 +1504,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 	if (ret)
 		return ret;
 
-	if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
+	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
 		mnt_drop_write_file(file);
 		return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
 	}
@@ -1619,7 +1619,7 @@ out_free:
 	kfree(vol_args);
 out:
 	mutex_unlock(&fs_info->volume_mutex);
-	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 	mnt_drop_write_file(file);
 	return ret;
 }
@@ -2661,7 +2661,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1))
+	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
 		return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
 
 	mutex_lock(&fs_info->volume_mutex);
@@ -2680,7 +2680,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
 	kfree(vol_args);
 out:
 	mutex_unlock(&fs_info->volume_mutex);
-	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 	return ret;
 }
 
@@ -2708,7 +2708,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
 	if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED)
 		return -EOPNOTSUPP;
 
-	if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
+	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
 		ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
 		goto out;
 	}
@@ -2721,7 +2721,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
 		ret = btrfs_rm_device(fs_info, vol_args->name, 0);
 	}
 	mutex_unlock(&fs_info->volume_mutex);
-	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 
 	if (!ret) {
 		if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
@@ -2752,7 +2752,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 	if (ret)
 		return ret;
 
-	if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
+	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
 		ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
 		goto out_drop_write;
 	}
@@ -2772,7 +2772,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 		btrfs_info(fs_info, "disk deleted %s", vol_args->name);
 	kfree(vol_args);
 out:
-	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 out_drop_write:
 	mnt_drop_write_file(file);
 
@@ -4439,13 +4439,11 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
 			ret = -EROFS;
 			goto out;
 		}
-		if (atomic_xchg(
-			&fs_info->mutually_exclusive_operation_running, 1)) {
+		if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
 			ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
 		} else {
 			ret = btrfs_dev_replace_by_ioctl(fs_info, p);
-			atomic_set(
-			 &fs_info->mutually_exclusive_operation_running, 0);
+			clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 		}
 		break;
 	case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS:
@@ -4640,7 +4638,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
 		return ret;
 
 again:
-	if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
+	if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
 		mutex_lock(&fs_info->volume_mutex);
 		mutex_lock(&fs_info->balance_mutex);
 		need_unlock = true;
@@ -4686,7 +4684,7 @@ again:
 	}
 
 locked:
-	BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running));
+	BUG_ON(!test_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
 
 	if (arg) {
 		bargs = memdup_user(arg, sizeof(*bargs));
@@ -4742,11 +4740,10 @@ locked:
 
 do_balance:
 	/*
-	 * Ownership of bctl and mutually_exclusive_operation_running
+	 * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP
 	 * goes to to btrfs_balance.  bctl is freed in __cancel_balance,
 	 * or, if restriper was paused all the way until unmount, in
-	 * free_fs_info.  mutually_exclusive_operation_running is
-	 * cleared in __cancel_balance.
+	 * free_fs_info.  The flag is cleared in __cancel_balance.
 	 */
 	need_unlock = false;
 
@@ -4766,7 +4763,7 @@ out_unlock:
 	mutex_unlock(&fs_info->balance_mutex);
 	mutex_unlock(&fs_info->volume_mutex);
 	if (need_unlock)
-		atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+		clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 out:
 	mnt_drop_write_file(file);
 	return ret;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 9a46878ba60f..7b40e2e7292a 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -212,7 +212,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 		set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
 
 	/* one ref for the tree */
-	atomic_set(&entry->refs, 1);
+	refcount_set(&entry->refs, 1);
 	init_waitqueue_head(&entry->wait);
 	INIT_LIST_HEAD(&entry->list);
 	INIT_LIST_HEAD(&entry->root_extent_list);
@@ -358,7 +358,7 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
 out:
 	if (!ret && cached && entry) {
 		*cached = entry;
-		atomic_inc(&entry->refs);
+		refcount_inc(&entry->refs);
 	}
 	spin_unlock_irqrestore(&tree->lock, flags);
 	return ret == 0;
@@ -425,7 +425,7 @@ have_entry:
 out:
 	if (!ret && cached && entry) {
 		*cached = entry;
-		atomic_inc(&entry->refs);
+		refcount_inc(&entry->refs);
 	}
 	spin_unlock_irqrestore(&tree->lock, flags);
 	return ret == 0;
@@ -456,7 +456,7 @@ void btrfs_get_logged_extents(struct btrfs_inode *inode,
 		if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
 			continue;
 		list_add(&ordered->log_list, logged_list);
-		atomic_inc(&ordered->refs);
+		refcount_inc(&ordered->refs);
 	}
 	spin_unlock_irq(&tree->lock);
 }
@@ -565,7 +565,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
 
 	trace_btrfs_ordered_extent_put(entry->inode, entry);
 
-	if (atomic_dec_and_test(&entry->refs)) {
+	if (refcount_dec_and_test(&entry->refs)) {
 		ASSERT(list_empty(&entry->log_list));
 		ASSERT(list_empty(&entry->trans_list));
 		ASSERT(list_empty(&entry->root_extent_list));
@@ -623,7 +623,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
 		spin_lock(&fs_info->trans_lock);
 		trans = fs_info->running_transaction;
 		if (trans)
-			atomic_inc(&trans->use_count);
+			refcount_inc(&trans->use_count);
 		spin_unlock(&fs_info->trans_lock);
 
 		ASSERT(trans);
@@ -690,7 +690,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
 
 		list_move_tail(&ordered->root_extent_list,
 			       &root->ordered_extents);
-		atomic_inc(&ordered->refs);
+		refcount_inc(&ordered->refs);
 		spin_unlock(&root->ordered_extent_lock);
 
 		btrfs_init_work(&ordered->flush_work,
@@ -870,7 +870,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
 	if (!offset_in_entry(entry, file_offset))
 		entry = NULL;
 	if (entry)
-		atomic_inc(&entry->refs);
+		refcount_inc(&entry->refs);
 out:
 	spin_unlock_irq(&tree->lock);
 	return entry;
@@ -911,7 +911,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
 	}
 out:
 	if (entry)
-		atomic_inc(&entry->refs);
+		refcount_inc(&entry->refs);
 	spin_unlock_irq(&tree->lock);
 	return entry;
 }
@@ -948,7 +948,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
 		goto out;
 
 	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
-	atomic_inc(&entry->refs);
+	refcount_inc(&entry->refs);
 out:
 	spin_unlock_irq(&tree->lock);
 	return entry;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 195c93b67fe0..e0c1d5b8d859 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -113,7 +113,7 @@ struct btrfs_ordered_extent {
 	int compress_type;
 
 	/* reference count */
-	atomic_t refs;
+	refcount_t refs;
 
 	/* the inode we belong to */
 	struct inode *inode;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index afbea61d957e..deffbeb74a0b 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -47,50 +47,6 @@
  *  - check all ioctl parameters
  */
 
-/*
- * one struct for each qgroup, organized in fs_info->qgroup_tree.
- */
-struct btrfs_qgroup {
-	u64 qgroupid;
-
-	/*
-	 * state
-	 */
-	u64 rfer;	/* referenced */
-	u64 rfer_cmpr;	/* referenced compressed */
-	u64 excl;	/* exclusive */
-	u64 excl_cmpr;	/* exclusive compressed */
-
-	/*
-	 * limits
-	 */
-	u64 lim_flags;	/* which limits are set */
-	u64 max_rfer;
-	u64 max_excl;
-	u64 rsv_rfer;
-	u64 rsv_excl;
-
-	/*
-	 * reservation tracking
-	 */
-	u64 reserved;
-
-	/*
-	 * lists
-	 */
-	struct list_head groups;  /* groups this group is member of */
-	struct list_head members; /* groups that are members of this group */
-	struct list_head dirty;   /* dirty groups */
-	struct rb_node node;	  /* tree of qgroups */
-
-	/*
-	 * temp variables for accounting operations
-	 * Refer to qgroup_shared_accounting() for details.
-	 */
-	u64 old_refcnt;
-	u64 new_refcnt;
-};
-
 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
 					   int mod)
 {
@@ -1078,6 +1034,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
 	qgroup->excl += sign * num_bytes;
 	qgroup->excl_cmpr += sign * num_bytes;
 	if (sign > 0) {
+		trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes);
 		if (qgroup->reserved < num_bytes)
 			report_reserved_underflow(fs_info, qgroup, num_bytes);
 		else
@@ -1103,6 +1060,8 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
 		WARN_ON(sign < 0 && qgroup->excl < num_bytes);
 		qgroup->excl += sign * num_bytes;
 		if (sign > 0) {
+			trace_qgroup_update_reserve(fs_info, qgroup,
+						    -(s64)num_bytes);
 			if (qgroup->reserved < num_bytes)
 				report_reserved_underflow(fs_info, qgroup,
 							  num_bytes);
@@ -2058,12 +2017,12 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
 
 		if (!ret) {
 			/*
-			 * Use (u64)-1 as time_seq to do special search, which
+			 * Use SEQ_LAST as time_seq to do special search, which
 			 * doesn't lock tree or delayed_refs and search current
 			 * root. It's safe inside commit_transaction().
 			 */
 			ret = btrfs_find_all_roots(trans, fs_info,
-					record->bytenr, (u64)-1, &new_roots);
+					record->bytenr, SEQ_LAST, &new_roots);
 			if (ret < 0)
 				goto cleanup;
 			if (qgroup_to_skip)
@@ -2370,6 +2329,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	u64 ref_root = root->root_key.objectid;
 	int ret = 0;
+	int retried = 0;
 	struct ulist_node *unode;
 	struct ulist_iterator uiter;
 
@@ -2378,7 +2338,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
 
 	if (num_bytes == 0)
 		return 0;
-
+retry:
 	spin_lock(&fs_info->qgroup_lock);
 	quota_root = fs_info->quota_root;
 	if (!quota_root)
@@ -2405,6 +2365,27 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
 		qg = unode_aux_to_qgroup(unode);
 
 		if (enforce && !qgroup_check_limits(qg, num_bytes)) {
+			/*
+			 * Commit the tree and retry, since we may have
+			 * deletions which would free up space.
+			 */
+			if (!retried && qg->reserved > 0) {
+				struct btrfs_trans_handle *trans;
+
+				spin_unlock(&fs_info->qgroup_lock);
+				ret = btrfs_start_delalloc_inodes(root, 0);
+				if (ret)
+					return ret;
+				btrfs_wait_ordered_extents(root, -1, 0, (u64)-1);
+				trans = btrfs_join_transaction(root);
+				if (IS_ERR(trans))
+					return PTR_ERR(trans);
+				ret = btrfs_commit_transaction(trans);
+				if (ret)
+					return ret;
+				retried++;
+				goto retry;
+			}
 			ret = -EDQUOT;
 			goto out;
 		}
@@ -2427,6 +2408,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
 
 		qg = unode_aux_to_qgroup(unode);
 
+		trace_qgroup_update_reserve(fs_info, qg, num_bytes);
 		qg->reserved += num_bytes;
 	}
 
@@ -2472,6 +2454,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
 
 		qg = unode_aux_to_qgroup(unode);
 
+		trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes);
 		if (qg->reserved < num_bytes)
 			report_reserved_underflow(fs_info, qg, num_bytes);
 		else
@@ -2490,18 +2473,6 @@ out:
 	spin_unlock(&fs_info->qgroup_lock);
 }
 
-void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
-{
-	if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
-		return;
-	btrfs_err(trans->fs_info,
-		"qgroups not uptodate in trans handle %p:  list is%s empty, seq is %#x.%x",
-		trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
-		(u32)(trans->delayed_ref_elem.seq >> 32),
-		(u32)trans->delayed_ref_elem.seq);
-	BUG();
-}
-
 /*
  * returns < 0 on error, 0 when more leafs are to be scanned.
  * returns 1 when done.
@@ -2889,14 +2860,14 @@ static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
 	if (ret < 0)
 		goto out;
 
-	if (free) {
-		btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
-				BTRFS_I(inode)->root->objectid,
-				changeset.bytes_changed);
+	if (free)
 		trace_op = QGROUP_FREE;
-	}
 	trace_btrfs_qgroup_release_data(inode, start, len,
 					changeset.bytes_changed, trace_op);
+	if (free)
+		btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
+				BTRFS_I(inode)->root->objectid,
+				changeset.bytes_changed);
 out:
 	ulist_release(&changeset.range_changed);
 	return ret;
@@ -2948,6 +2919,7 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
 		return 0;
 
 	BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
+	trace_qgroup_meta_reserve(root, (s64)num_bytes);
 	ret = qgroup_reserve(root, num_bytes, enforce);
 	if (ret < 0)
 		return ret;
@@ -2967,6 +2939,7 @@ void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
 	reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
 	if (reserved == 0)
 		return;
+	trace_qgroup_meta_reserve(root, -(s64)reserved);
 	btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
 }
 
@@ -2981,6 +2954,7 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
 	BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
 	WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
 	atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
+	trace_qgroup_meta_reserve(root, -(s64)num_bytes);
 	btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
 }
 
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 26932a8a1993..fe04d3f295c6 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -62,6 +62,50 @@ struct btrfs_qgroup_extent_record {
 };
 
 /*
+ * one struct for each qgroup, organized in fs_info->qgroup_tree.
+ */
+struct btrfs_qgroup {
+	u64 qgroupid;
+
+	/*
+	 * state
+	 */
+	u64 rfer;	/* referenced */
+	u64 rfer_cmpr;	/* referenced compressed */
+	u64 excl;	/* exclusive */
+	u64 excl_cmpr;	/* exclusive compressed */
+
+	/*
+	 * limits
+	 */
+	u64 lim_flags;	/* which limits are set */
+	u64 max_rfer;
+	u64 max_excl;
+	u64 rsv_rfer;
+	u64 rsv_excl;
+
+	/*
+	 * reservation tracking
+	 */
+	u64 reserved;
+
+	/*
+	 * lists
+	 */
+	struct list_head groups;  /* groups this group is member of */
+	struct list_head members; /* groups that are members of this group */
+	struct list_head dirty;   /* dirty groups */
+	struct rb_node node;	  /* tree of qgroups */
+
+	/*
+	 * temp variables for accounting operations
+	 * Refer to qgroup_shared_accounting() for details.
+	 */
+	u64 old_refcnt;
+	u64 new_refcnt;
+};
+
+/*
  * For qgroup event trace points only
  */
 #define QGROUP_RESERVE		(1<<0)
@@ -186,17 +230,12 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 			 struct btrfs_qgroup_inherit *inherit);
 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
 			       u64 ref_root, u64 num_bytes);
-/*
- * TODO: Add proper trace point for it, as btrfs_qgroup_free() is
- * called by everywhere, can't provide good trace for delayed ref case.
- */
 static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
 						 u64 ref_root, u64 num_bytes)
 {
-	btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
 	trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
+	btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
 }
-void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 1571bf26dc07..f3d30d9ea8f9 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -149,7 +149,7 @@ struct btrfs_raid_bio {
 
 	int generic_bio_cnt;
 
-	atomic_t refs;
+	refcount_t refs;
 
 	atomic_t stripes_pending;
 
@@ -389,7 +389,7 @@ static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
 		if (bio_list_empty(&rbio->bio_list)) {
 			if (!list_empty(&rbio->hash_list)) {
 				list_del_init(&rbio->hash_list);
-				atomic_dec(&rbio->refs);
+				refcount_dec(&rbio->refs);
 				BUG_ON(!list_empty(&rbio->plug_list));
 			}
 		}
@@ -480,7 +480,7 @@ static void cache_rbio(struct btrfs_raid_bio *rbio)
 
 	/* bump our ref if we were not in the list before */
 	if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
-		atomic_inc(&rbio->refs);
+		refcount_inc(&rbio->refs);
 
 	if (!list_empty(&rbio->stripe_cache)){
 		list_move(&rbio->stripe_cache, &table->stripe_cache);
@@ -689,7 +689,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
 			    test_bit(RBIO_CACHE_BIT, &cur->flags) &&
 			    !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
 				list_del_init(&cur->hash_list);
-				atomic_dec(&cur->refs);
+				refcount_dec(&cur->refs);
 
 				steal_rbio(cur, rbio);
 				cache_drop = cur;
@@ -738,7 +738,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
 		}
 	}
 lockit:
-	atomic_inc(&rbio->refs);
+	refcount_inc(&rbio->refs);
 	list_add(&rbio->hash_list, &h->hash_list);
 out:
 	spin_unlock_irqrestore(&h->lock, flags);
@@ -784,7 +784,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
 		}
 
 		list_del_init(&rbio->hash_list);
-		atomic_dec(&rbio->refs);
+		refcount_dec(&rbio->refs);
 
 		/*
 		 * we use the plug list to hold all the rbios
@@ -801,7 +801,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
 			list_del_init(&rbio->plug_list);
 
 			list_add(&next->hash_list, &h->hash_list);
-			atomic_inc(&next->refs);
+			refcount_inc(&next->refs);
 			spin_unlock(&rbio->bio_list_lock);
 			spin_unlock_irqrestore(&h->lock, flags);
 
@@ -843,8 +843,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
 {
 	int i;
 
-	WARN_ON(atomic_read(&rbio->refs) < 0);
-	if (!atomic_dec_and_test(&rbio->refs))
+	if (!refcount_dec_and_test(&rbio->refs))
 		return;
 
 	WARN_ON(!list_empty(&rbio->stripe_cache));
@@ -872,7 +871,7 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
  * this frees the rbio and runs through all the bios in the
  * bio_list and calls end_io on them
  */
-static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
+static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
 {
 	struct bio *cur = bio_list_get(&rbio->bio_list);
 	struct bio *next;
@@ -885,7 +884,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
 	while (cur) {
 		next = cur->bi_next;
 		cur->bi_next = NULL;
-		cur->bi_error = err;
+		cur->bi_status = err;
 		bio_endio(cur);
 		cur = next;
 	}
@@ -898,7 +897,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
 static void raid_write_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
-	int err = bio->bi_error;
+	blk_status_t err = bio->bi_status;
 	int max_errors;
 
 	if (err)
@@ -915,7 +914,7 @@ static void raid_write_end_io(struct bio *bio)
 	max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
 		     0 : rbio->bbio->max_errors;
 	if (atomic_read(&rbio->error) > max_errors)
-		err = -EIO;
+		err = BLK_STS_IOERR;
 
 	rbio_orig_end_io(rbio, err);
 }
@@ -997,7 +996,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
 	rbio->stripe_npages = stripe_npages;
 	rbio->faila = -1;
 	rbio->failb = -1;
-	atomic_set(&rbio->refs, 1);
+	refcount_set(&rbio->refs, 1);
 	atomic_set(&rbio->error, 0);
 	atomic_set(&rbio->stripes_pending, 0);
 
@@ -1093,7 +1092,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
 		 * devices or if they are not contiguous
 		 */
 		if (last_end == disk_start && stripe->dev->bdev &&
-		    !last->bi_error &&
+		    !last->bi_status &&
 		    last->bi_bdev == stripe->dev->bdev) {
 			ret = bio_add_page(last, page, PAGE_SIZE, 0);
 			if (ret == PAGE_SIZE)
@@ -1449,7 +1448,7 @@ static void raid_rmw_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		fail_bio_stripe(rbio, bio);
 	else
 		set_bio_pages_uptodate(bio);
@@ -1992,7 +1991,7 @@ static void raid_recover_end_io(struct bio *bio)
 	 * we only read stripe pages off the disk, set them
 	 * up to date if there were no errors
 	 */
-	if (bio->bi_error)
+	if (bio->bi_status)
 		fail_bio_stripe(rbio, bio);
 	else
 		set_bio_pages_uptodate(bio);
@@ -2118,6 +2117,11 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
 	struct btrfs_raid_bio *rbio;
 	int ret;
 
+	if (generic_io) {
+		ASSERT(bbio->mirror_num == mirror_num);
+		btrfs_io_bio(bio)->mirror_num = mirror_num;
+	}
+
 	rbio = alloc_rbio(fs_info, bbio, stripe_len);
 	if (IS_ERR(rbio)) {
 		if (generic_io)
@@ -2194,6 +2198,8 @@ static void read_rebuild_work(struct btrfs_work *work)
 /*
  * The following code is used to scrub/replace the parity stripe
  *
+ * Caller must have already increased bio_counter for getting @bbio.
+ *
  * Note: We need make sure all the pages that add into the scrub/replace
  * raid bio are correct and not be changed during the scrub/replace. That
  * is those pages just hold metadata or file data with checksum.
@@ -2231,6 +2237,12 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
 	ASSERT(rbio->stripe_npages == stripe_nsectors);
 	bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
 
+	/*
+	 * We have already increased bio_counter when getting bbio, record it
+	 * so we can free it at rbio_orig_end_io().
+	 */
+	rbio->generic_bio_cnt = 1;
+
 	return rbio;
 }
 
@@ -2518,7 +2530,7 @@ static void raid56_parity_scrub_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		fail_bio_stripe(rbio, bio);
 	else
 		set_bio_pages_uptodate(bio);
@@ -2673,6 +2685,12 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
 		return NULL;
 	}
 
+	/*
+	 * When we get bbio, we have already increased bio_counter, record it
+	 * so we can free it at rbio_orig_end_io()
+	 */
+	rbio->generic_bio_cnt = 1;
+
 	return rbio;
 }
 
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index e88bca87f5d2..a17e775a4a89 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -209,9 +209,9 @@ cleanup:
 	return;
 }
 
-int btree_readahead_hook(struct btrfs_fs_info *fs_info,
-			 struct extent_buffer *eb, int err)
+int btree_readahead_hook(struct extent_buffer *eb, int err)
 {
+	struct btrfs_fs_info *fs_info = eb->fs_info;
 	int ret = 0;
 	struct reada_extent *re;
 
@@ -235,10 +235,10 @@ start_machine:
 	return ret;
 }
 
-static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
-					  struct btrfs_device *dev, u64 logical,
+static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
 					  struct btrfs_bio *bbio)
 {
+	struct btrfs_fs_info *fs_info = dev->fs_info;
 	int ret;
 	struct reada_zone *zone;
 	struct btrfs_block_group_cache *cache = NULL;
@@ -270,6 +270,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
 	if (!zone)
 		return NULL;
 
+	ret = radix_tree_preload(GFP_KERNEL);
+	if (ret) {
+		kfree(zone);
+		return NULL;
+	}
+
 	zone->start = start;
 	zone->end = end;
 	INIT_LIST_HEAD(&zone->list);
@@ -299,6 +305,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
 			zone = NULL;
 	}
 	spin_unlock(&fs_info->reada_lock);
+	radix_tree_preload_end();
 
 	return zone;
 }
@@ -313,7 +320,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 	struct btrfs_bio *bbio = NULL;
 	struct btrfs_device *dev;
 	struct btrfs_device *prev_dev;
-	u32 blocksize;
 	u64 length;
 	int real_stripes;
 	int nzones = 0;
@@ -334,7 +340,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 	if (!re)
 		return NULL;
 
-	blocksize = fs_info->nodesize;
 	re->logical = logical;
 	re->top = *top;
 	INIT_LIST_HEAD(&re->extctl);
@@ -344,10 +349,10 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 	/*
 	 * map block
 	 */
-	length = blocksize;
+	length = fs_info->nodesize;
 	ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
 			&length, &bbio, 0);
-	if (ret || !bbio || length < blocksize)
+	if (ret || !bbio || length < fs_info->nodesize)
 		goto error;
 
 	if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
@@ -367,7 +372,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 		 if (!dev->bdev)
 			continue;
 
-		zone = reada_find_zone(fs_info, dev, logical, bbio);
+		zone = reada_find_zone(dev, logical, bbio);
 		if (!zone)
 			continue;
 
@@ -386,6 +391,10 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 		goto error;
 	}
 
+	ret = radix_tree_preload(GFP_KERNEL);
+	if (ret)
+		goto error;
+
 	/* insert extent in reada_tree + all per-device trees, all or nothing */
 	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
 	spin_lock(&fs_info->reada_lock);
@@ -395,13 +404,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 		re_exist->refcnt++;
 		spin_unlock(&fs_info->reada_lock);
 		btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+		radix_tree_preload_end();
 		goto error;
 	}
 	if (ret) {
 		spin_unlock(&fs_info->reada_lock);
 		btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+		radix_tree_preload_end();
 		goto error;
 	}
+	radix_tree_preload_end();
 	prev_dev = NULL;
 	dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
 			&fs_info->dev_replace);
@@ -639,9 +651,9 @@ static int reada_pick_zone(struct btrfs_device *dev)
 	return 1;
 }
 
-static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
-				   struct btrfs_device *dev)
+static int reada_start_machine_dev(struct btrfs_device *dev)
 {
+	struct btrfs_fs_info *fs_info = dev->fs_info;
 	struct reada_extent *re = NULL;
 	int mirror_num = 0;
 	struct extent_buffer *eb = NULL;
@@ -754,8 +766,7 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
 		list_for_each_entry(device, &fs_devices->devices, dev_list) {
 			if (atomic_read(&device->reada_in_flight) <
 			    MAX_IN_FLIGHT)
-				enqueued += reada_start_machine_dev(fs_info,
-								    device);
+				enqueued += reada_start_machine_dev(device);
 		}
 		mutex_unlock(&fs_devices->device_list_mutex);
 		total += enqueued;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index a08224eab8b4..7d6bc308bf43 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -501,8 +501,9 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root)
 {
 	struct btrfs_root_item *item = &root->root_item;
-	struct timespec ct = current_fs_time(root->fs_info->sb);
+	struct timespec ct;
 
+	ktime_get_real_ts(&ct);
 	spin_lock(&root->root_item_lock);
 	btrfs_set_root_ctransid(item, trans->transid);
 	btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index b0251eb1239f..ba5595d19de1 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -64,7 +64,7 @@ struct scrub_ctx;
 #define SCRUB_MAX_PAGES_PER_BLOCK	16	/* 64k per node/leaf/sector */
 
 struct scrub_recover {
-	atomic_t		refs;
+	refcount_t		refs;
 	struct btrfs_bio	*bbio;
 	u64			map_length;
 };
@@ -95,7 +95,7 @@ struct scrub_bio {
 	struct scrub_ctx	*sctx;
 	struct btrfs_device	*dev;
 	struct bio		*bio;
-	int			err;
+	blk_status_t		status;
 	u64			logical;
 	u64			physical;
 #if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
@@ -112,7 +112,7 @@ struct scrub_block {
 	struct scrub_page	*pagev[SCRUB_MAX_PAGES_PER_BLOCK];
 	int			page_count;
 	atomic_t		outstanding_pages;
-	atomic_t		refs; /* free mem on transition to zero */
+	refcount_t		refs; /* free mem on transition to zero */
 	struct scrub_ctx	*sctx;
 	struct scrub_parity	*sparity;
 	struct {
@@ -140,9 +140,9 @@ struct scrub_parity {
 
 	int			nsectors;
 
-	int			stripe_len;
+	u64			stripe_len;
 
-	atomic_t		refs;
+	refcount_t		refs;
 
 	struct list_head	spages;
 
@@ -202,7 +202,7 @@ struct scrub_ctx {
 	 * doesn't free the scrub context before or while the workers are
 	 * doing the wakeup() call.
 	 */
-	atomic_t                refs;
+	refcount_t              refs;
 };
 
 struct scrub_fixup_nodatasum {
@@ -240,6 +240,13 @@ struct scrub_warning {
 	struct btrfs_device	*dev;
 };
 
+struct full_stripe_lock {
+	struct rb_node node;
+	u64 logical;
+	u64 refs;
+	struct mutex mutex;
+};
+
 static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
 static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
 static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
@@ -305,7 +312,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx);
 
 static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
 {
-	atomic_inc(&sctx->refs);
+	refcount_inc(&sctx->refs);
 	atomic_inc(&sctx->bios_in_flight);
 }
 
@@ -349,6 +356,222 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
 }
 
 /*
+ * Insert new full stripe lock into full stripe locks tree
+ *
+ * Return pointer to existing or newly inserted full_stripe_lock structure if
+ * everything works well.
+ * Return ERR_PTR(-ENOMEM) if we failed to allocate memory
+ *
+ * NOTE: caller must hold full_stripe_locks_root->lock before calling this
+ * function
+ */
+static struct full_stripe_lock *insert_full_stripe_lock(
+		struct btrfs_full_stripe_locks_tree *locks_root,
+		u64 fstripe_logical)
+{
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct full_stripe_lock *entry;
+	struct full_stripe_lock *ret;
+
+	WARN_ON(!mutex_is_locked(&locks_root->lock));
+
+	p = &locks_root->root.rb_node;
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct full_stripe_lock, node);
+		if (fstripe_logical < entry->logical) {
+			p = &(*p)->rb_left;
+		} else if (fstripe_logical > entry->logical) {
+			p = &(*p)->rb_right;
+		} else {
+			entry->refs++;
+			return entry;
+		}
+	}
+
+	/* Insert new lock */
+	ret = kmalloc(sizeof(*ret), GFP_KERNEL);
+	if (!ret)
+		return ERR_PTR(-ENOMEM);
+	ret->logical = fstripe_logical;
+	ret->refs = 1;
+	mutex_init(&ret->mutex);
+
+	rb_link_node(&ret->node, parent, p);
+	rb_insert_color(&ret->node, &locks_root->root);
+	return ret;
+}
+
+/*
+ * Search for a full stripe lock of a block group
+ *
+ * Return pointer to existing full stripe lock if found
+ * Return NULL if not found
+ */
+static struct full_stripe_lock *search_full_stripe_lock(
+		struct btrfs_full_stripe_locks_tree *locks_root,
+		u64 fstripe_logical)
+{
+	struct rb_node *node;
+	struct full_stripe_lock *entry;
+
+	WARN_ON(!mutex_is_locked(&locks_root->lock));
+
+	node = locks_root->root.rb_node;
+	while (node) {
+		entry = rb_entry(node, struct full_stripe_lock, node);
+		if (fstripe_logical < entry->logical)
+			node = node->rb_left;
+		else if (fstripe_logical > entry->logical)
+			node = node->rb_right;
+		else
+			return entry;
+	}
+	return NULL;
+}
+
+/*
+ * Helper to get full stripe logical from a normal bytenr.
+ *
+ * Caller must ensure @cache is a RAID56 block group.
+ */
+static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache,
+				   u64 bytenr)
+{
+	u64 ret;
+
+	/*
+	 * Due to chunk item size limit, full stripe length should not be
+	 * larger than U32_MAX. Just a sanity check here.
+	 */
+	WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
+
+	/*
+	 * round_down() can only handle power of 2, while RAID56 full
+	 * stripe length can be 64KiB * n, so we need to manually round down.
+	 */
+	ret = div64_u64(bytenr - cache->key.objectid, cache->full_stripe_len) *
+		cache->full_stripe_len + cache->key.objectid;
+	return ret;
+}
+
+/*
+ * Lock a full stripe to avoid concurrency of recovery and read
+ *
+ * It's only used for profiles with parities (RAID5/6), for other profiles it
+ * does nothing.
+ *
+ * Return 0 if we locked full stripe covering @bytenr, with a mutex held.
+ * So caller must call unlock_full_stripe() at the same context.
+ *
+ * Return <0 if encounters error.
+ */
+static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
+			    bool *locked_ret)
+{
+	struct btrfs_block_group_cache *bg_cache;
+	struct btrfs_full_stripe_locks_tree *locks_root;
+	struct full_stripe_lock *existing;
+	u64 fstripe_start;
+	int ret = 0;
+
+	*locked_ret = false;
+	bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
+	if (!bg_cache) {
+		ASSERT(0);
+		return -ENOENT;
+	}
+
+	/* Profiles not based on parity don't need full stripe lock */
+	if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
+		goto out;
+	locks_root = &bg_cache->full_stripe_locks_root;
+
+	fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
+
+	/* Now insert the full stripe lock */
+	mutex_lock(&locks_root->lock);
+	existing = insert_full_stripe_lock(locks_root, fstripe_start);
+	mutex_unlock(&locks_root->lock);
+	if (IS_ERR(existing)) {
+		ret = PTR_ERR(existing);
+		goto out;
+	}
+	mutex_lock(&existing->mutex);
+	*locked_ret = true;
+out:
+	btrfs_put_block_group(bg_cache);
+	return ret;
+}
+
+/*
+ * Unlock a full stripe.
+ *
+ * NOTE: Caller must ensure it's the same context calling corresponding
+ * lock_full_stripe().
+ *
+ * Return 0 if we unlock full stripe without problem.
+ * Return <0 for error
+ */
+static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
+			      bool locked)
+{
+	struct btrfs_block_group_cache *bg_cache;
+	struct btrfs_full_stripe_locks_tree *locks_root;
+	struct full_stripe_lock *fstripe_lock;
+	u64 fstripe_start;
+	bool freeit = false;
+	int ret = 0;
+
+	/* If we didn't acquire full stripe lock, no need to continue */
+	if (!locked)
+		return 0;
+
+	bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
+	if (!bg_cache) {
+		ASSERT(0);
+		return -ENOENT;
+	}
+	if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
+		goto out;
+
+	locks_root = &bg_cache->full_stripe_locks_root;
+	fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
+
+	mutex_lock(&locks_root->lock);
+	fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
+	/* Unpaired unlock_full_stripe() detected */
+	if (!fstripe_lock) {
+		WARN_ON(1);
+		ret = -ENOENT;
+		mutex_unlock(&locks_root->lock);
+		goto out;
+	}
+
+	if (fstripe_lock->refs == 0) {
+		WARN_ON(1);
+		btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
+			fstripe_lock->logical);
+	} else {
+		fstripe_lock->refs--;
+	}
+
+	if (fstripe_lock->refs == 0) {
+		rb_erase(&fstripe_lock->node, &locks_root->root);
+		freeit = true;
+	}
+	mutex_unlock(&locks_root->lock);
+
+	mutex_unlock(&fstripe_lock->mutex);
+	if (freeit)
+		kfree(fstripe_lock);
+out:
+	btrfs_put_block_group(bg_cache);
+	return ret;
+}
+
+/*
  * used for workers that require transaction commits (i.e., for the
  * NOCOW case)
  */
@@ -356,7 +579,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
 {
 	struct btrfs_fs_info *fs_info = sctx->fs_info;
 
-	atomic_inc(&sctx->refs);
+	refcount_inc(&sctx->refs);
 	/*
 	 * increment scrubs_running to prevent cancel requests from
 	 * completing as long as a worker is running. we must also
@@ -447,7 +670,7 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
 
 static void scrub_put_ctx(struct scrub_ctx *sctx)
 {
-	if (atomic_dec_and_test(&sctx->refs))
+	if (refcount_dec_and_test(&sctx->refs))
 		scrub_free_ctx(sctx);
 }
 
@@ -462,7 +685,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
 	sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
 	if (!sctx)
 		goto nomem;
-	atomic_set(&sctx->refs, 1);
+	refcount_set(&sctx->refs, 1);
 	sctx->is_dev_replace = is_dev_replace;
 	sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
 	sctx->curr = -1;
@@ -857,12 +1080,14 @@ out:
 
 static inline void scrub_get_recover(struct scrub_recover *recover)
 {
-	atomic_inc(&recover->refs);
+	refcount_inc(&recover->refs);
 }
 
-static inline void scrub_put_recover(struct scrub_recover *recover)
+static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
+				     struct scrub_recover *recover)
 {
-	if (atomic_dec_and_test(&recover->refs)) {
+	if (refcount_dec_and_test(&recover->refs)) {
+		btrfs_bio_counter_dec(fs_info);
 		btrfs_put_bbio(recover->bbio);
 		kfree(recover);
 	}
@@ -892,6 +1117,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 	int mirror_index;
 	int page_num;
 	int success;
+	bool full_stripe_locked;
 	static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
 				      DEFAULT_RATELIMIT_BURST);
 
@@ -917,6 +1143,24 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 	have_csum = sblock_to_check->pagev[0]->have_csum;
 	dev = sblock_to_check->pagev[0]->dev;
 
+	/*
+	 * For RAID5/6, race can happen for a different device scrub thread.
+	 * For data corruption, Parity and Data threads will both try
+	 * to recovery the data.
+	 * Race can lead to doubly added csum error, or even unrecoverable
+	 * error.
+	 */
+	ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
+	if (ret < 0) {
+		spin_lock(&sctx->stat_lock);
+		if (ret == -ENOMEM)
+			sctx->stat.malloc_errors++;
+		sctx->stat.read_errors++;
+		sctx->stat.uncorrectable_errors++;
+		spin_unlock(&sctx->stat_lock);
+		return ret;
+	}
+
 	if (sctx->is_dev_replace && !is_metadata && !have_csum) {
 		sblocks_for_recheck = NULL;
 		goto nodatasum_case;
@@ -1241,7 +1485,7 @@ out:
 				sblock->pagev[page_index]->sblock = NULL;
 				recover = sblock->pagev[page_index]->recover;
 				if (recover) {
-					scrub_put_recover(recover);
+					scrub_put_recover(fs_info, recover);
 					sblock->pagev[page_index]->recover =
 									NULL;
 				}
@@ -1251,6 +1495,9 @@ out:
 		kfree(sblocks_for_recheck);
 	}
 
+	ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
+	if (ret < 0)
+		return ret;
 	return 0;
 }
 
@@ -1330,20 +1577,23 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
 		 * with a length of PAGE_SIZE, each returned stripe
 		 * represents one mirror
 		 */
+		btrfs_bio_counter_inc_blocked(fs_info);
 		ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
-				logical, &mapped_length, &bbio, 0, 1);
+				logical, &mapped_length, &bbio);
 		if (ret || !bbio || mapped_length < sublen) {
 			btrfs_put_bbio(bbio);
+			btrfs_bio_counter_dec(fs_info);
 			return -EIO;
 		}
 
 		recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
 		if (!recover) {
 			btrfs_put_bbio(bbio);
+			btrfs_bio_counter_dec(fs_info);
 			return -ENOMEM;
 		}
 
-		atomic_set(&recover->refs, 1);
+		refcount_set(&recover->refs, 1);
 		recover->bbio = bbio;
 		recover->map_length = mapped_length;
 
@@ -1365,7 +1615,7 @@ leave_nomem:
 				spin_lock(&sctx->stat_lock);
 				sctx->stat.malloc_errors++;
 				spin_unlock(&sctx->stat_lock);
-				scrub_put_recover(recover);
+				scrub_put_recover(fs_info, recover);
 				return -ENOMEM;
 			}
 			scrub_page_get(page);
@@ -1407,7 +1657,7 @@ leave_nomem:
 			scrub_get_recover(recover);
 			page->recover = recover;
 		}
-		scrub_put_recover(recover);
+		scrub_put_recover(fs_info, recover);
 		length -= sublen;
 		logical += sublen;
 		page_index++;
@@ -1418,14 +1668,14 @@ leave_nomem:
 
 struct scrub_bio_ret {
 	struct completion event;
-	int error;
+	blk_status_t status;
 };
 
 static void scrub_bio_wait_endio(struct bio *bio)
 {
 	struct scrub_bio_ret *ret = bio->bi_private;
 
-	ret->error = bio->bi_error;
+	ret->status = bio->bi_status;
 	complete(&ret->event);
 }
 
@@ -1443,7 +1693,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
 	int ret;
 
 	init_completion(&done.event);
-	done.error = 0;
+	done.status = 0;
 	bio->bi_iter.bi_sector = page->logical >> 9;
 	bio->bi_private = &done;
 	bio->bi_end_io = scrub_bio_wait_endio;
@@ -1455,7 +1705,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
 		return ret;
 
 	wait_for_completion(&done.event);
-	if (done.error)
+	if (done.status)
 		return -EIO;
 
 	return 0;
@@ -1497,14 +1747,18 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
 
 		bio_add_page(bio, page->page, PAGE_SIZE, 0);
 		if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
-			if (scrub_submit_raid56_bio_wait(fs_info, bio, page))
+			if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) {
+				page->io_error = 1;
 				sblock->no_io_error_seen = 0;
+			}
 		} else {
 			bio->bi_iter.bi_sector = page->physical >> 9;
 			bio_set_op_attrs(bio, REQ_OP_READ, 0);
 
-			if (btrfsic_submit_bio_wait(bio))
+			if (btrfsic_submit_bio_wait(bio)) {
+				page->io_error = 1;
 				sblock->no_io_error_seen = 0;
+			}
 		}
 
 		bio_put(bio);
@@ -1634,7 +1888,7 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
 	if (spage->io_error) {
 		void *mapped_buffer = kmap_atomic(spage->page);
 
-		memset(mapped_buffer, 0, PAGE_SIZE);
+		clear_page(mapped_buffer);
 		flush_dcache_page(spage->page);
 		kunmap_atomic(mapped_buffer);
 	}
@@ -1683,7 +1937,7 @@ again:
 		bio->bi_bdev = sbio->dev->bdev;
 		bio->bi_iter.bi_sector = sbio->physical >> 9;
 		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-		sbio->err = 0;
+		sbio->status = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical_for_dev_replace ||
 		   sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -1738,7 +1992,7 @@ static void scrub_wr_bio_end_io(struct bio *bio)
 	struct scrub_bio *sbio = bio->bi_private;
 	struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
 
-	sbio->err = bio->bi_error;
+	sbio->status = bio->bi_status;
 	sbio->bio = bio;
 
 	btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
@@ -1753,7 +2007,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
 	int i;
 
 	WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
-	if (sbio->err) {
+	if (sbio->status) {
 		struct btrfs_dev_replace *dev_replace =
 			&sbio->sctx->fs_info->dev_replace;
 
@@ -1998,12 +2252,12 @@ static int scrub_checksum_super(struct scrub_block *sblock)
 
 static void scrub_block_get(struct scrub_block *sblock)
 {
-	atomic_inc(&sblock->refs);
+	refcount_inc(&sblock->refs);
 }
 
 static void scrub_block_put(struct scrub_block *sblock)
 {
-	if (atomic_dec_and_test(&sblock->refs)) {
+	if (refcount_dec_and_test(&sblock->refs)) {
 		int i;
 
 		if (sblock->sparity)
@@ -2087,7 +2341,7 @@ again:
 		bio->bi_bdev = sbio->dev->bdev;
 		bio->bi_iter.bi_sector = sbio->physical >> 9;
 		bio_set_op_attrs(bio, REQ_OP_READ, 0);
-		sbio->err = 0;
+		sbio->status = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical ||
 		   sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -2123,7 +2377,7 @@ static void scrub_missing_raid56_end_io(struct bio *bio)
 	struct scrub_block *sblock = bio->bi_private;
 	struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		sblock->no_io_error_seen = 0;
 
 	bio_put(bio);
@@ -2187,8 +2441,9 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
 	int ret;
 	int i;
 
+	btrfs_bio_counter_inc_blocked(fs_info);
 	ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
-			&length, &bbio, 0, 1);
+			&length, &bbio);
 	if (ret || !bbio || !bbio->raid_map)
 		goto bbio_out;
 
@@ -2231,6 +2486,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
 rbio_out:
 	bio_put(bio);
 bbio_out:
+	btrfs_bio_counter_dec(fs_info);
 	btrfs_put_bbio(bbio);
 	spin_lock(&sctx->stat_lock);
 	sctx->stat.malloc_errors++;
@@ -2255,7 +2511,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
 
 	/* one ref inside this function, plus one for each page added to
 	 * a bio later on */
-	atomic_set(&sblock->refs, 1);
+	refcount_set(&sblock->refs, 1);
 	sblock->sctx = sctx;
 	sblock->no_io_error_seen = 1;
 
@@ -2332,7 +2588,7 @@ static void scrub_bio_end_io(struct bio *bio)
 	struct scrub_bio *sbio = bio->bi_private;
 	struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
 
-	sbio->err = bio->bi_error;
+	sbio->status = bio->bi_status;
 	sbio->bio = bio;
 
 	btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
@@ -2345,7 +2601,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
 	int i;
 
 	BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
-	if (sbio->err) {
+	if (sbio->status) {
 		for (i = 0; i < sbio->page_count; i++) {
 			struct scrub_page *spage = sbio->pagev[i];
 
@@ -2385,7 +2641,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
 				       unsigned long *bitmap,
 				       u64 start, u64 len)
 {
-	u32 offset;
+	u64 offset;
 	int nsectors;
 	int sectorsize = sparity->sctx->fs_info->sectorsize;
 
@@ -2395,8 +2651,8 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
 	}
 
 	start -= sparity->logic_start;
-	start = div_u64_rem(start, sparity->stripe_len, &offset);
-	offset /= sectorsize;
+	start = div64_u64_rem(start, sparity->stripe_len, &offset);
+	offset = div_u64(offset, sectorsize);
 	nsectors = (int)len / sectorsize;
 
 	if (offset + nsectors <= sparity->nsectors) {
@@ -2555,7 +2811,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
 
 	/* one ref inside this function, plus one for each page added to
 	 * a bio later on */
-	atomic_set(&sblock->refs, 1);
+	refcount_set(&sblock->refs, 1);
 	sblock->sctx = sctx;
 	sblock->no_io_error_seen = 1;
 	sblock->sparity = sparity;
@@ -2694,7 +2950,7 @@ static int get_raid56_logic_offset(u64 physical, int num,
 	for (i = 0; i < nr_data_stripes(map); i++) {
 		*offset = last_offset + i * map->stripe_len;
 
-		stripe_nr = div_u64(*offset, map->stripe_len);
+		stripe_nr = div64_u64(*offset, map->stripe_len);
 		stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
 
 		/* Work out the disk rotation on this stripe-set */
@@ -2748,7 +3004,7 @@ static void scrub_parity_bio_endio(struct bio *bio)
 	struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
 	struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
 			  sparity->nsectors);
 
@@ -2765,7 +3021,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
 	struct btrfs_fs_info *fs_info = sctx->fs_info;
 	struct bio *bio;
 	struct btrfs_raid_bio *rbio;
-	struct scrub_page *spage;
 	struct btrfs_bio *bbio = NULL;
 	u64 length;
 	int ret;
@@ -2775,8 +3030,10 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
 		goto out;
 
 	length = sparity->logic_end - sparity->logic_start;
+
+	btrfs_bio_counter_inc_blocked(fs_info);
 	ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
-			       &length, &bbio, 0, 1);
+			       &length, &bbio);
 	if (ret || !bbio || !bbio->raid_map)
 		goto bbio_out;
 
@@ -2795,9 +3052,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
 	if (!rbio)
 		goto rbio_out;
 
-	list_for_each_entry(spage, &sparity->spages, list)
-		raid56_add_scrub_pages(rbio, spage->page, spage->logical);
-
 	scrub_pending_bio_inc(sctx);
 	raid56_parity_submit_scrub_rbio(rbio);
 	return;
@@ -2805,6 +3059,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
 rbio_out:
 	bio_put(bio);
 bbio_out:
+	btrfs_bio_counter_dec(fs_info);
 	btrfs_put_bbio(bbio);
 	bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
 		  sparity->nsectors);
@@ -2822,12 +3077,12 @@ static inline int scrub_calc_parity_bitmap_len(int nsectors)
 
 static void scrub_parity_get(struct scrub_parity *sparity)
 {
-	atomic_inc(&sparity->refs);
+	refcount_inc(&sparity->refs);
 }
 
 static void scrub_parity_put(struct scrub_parity *sparity)
 {
-	if (!atomic_dec_and_test(&sparity->refs))
+	if (!refcount_dec_and_test(&sparity->refs))
 		return;
 
 	scrub_parity_check_and_repair(sparity);
@@ -2879,7 +3134,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
 	sparity->scrub_dev = sdev;
 	sparity->logic_start = logic_start;
 	sparity->logic_end = logic_end;
-	atomic_set(&sparity->refs, 1);
+	refcount_set(&sparity->refs, 1);
 	INIT_LIST_HEAD(&sparity->spages);
 	sparity->dbitmap = sparity->bitmap;
 	sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
@@ -3098,7 +3353,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 
 	physical = map->stripes[num].physical;
 	offset = 0;
-	nstripes = div_u64(length, map->stripe_len);
+	nstripes = div64_u64(length, map->stripe_len);
 	if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
 		offset = map->stripe_len * num;
 		increment = map->stripe_len * map->num_stripes;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 3f645cd67b54..fc496a6f842a 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5184,13 +5184,19 @@ static int is_extent_unchanged(struct send_ctx *sctx,
 	while (key.offset < ekey->offset + left_len) {
 		ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
 		right_type = btrfs_file_extent_type(eb, ei);
-		if (right_type != BTRFS_FILE_EXTENT_REG) {
+		if (right_type != BTRFS_FILE_EXTENT_REG &&
+		    right_type != BTRFS_FILE_EXTENT_INLINE) {
 			ret = 0;
 			goto out;
 		}
 
 		right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
-		right_len = btrfs_file_extent_num_bytes(eb, ei);
+		if (right_type == BTRFS_FILE_EXTENT_INLINE) {
+			right_len = btrfs_file_extent_inline_len(eb, slot, ei);
+			right_len = PAGE_ALIGN(right_len);
+		} else {
+			right_len = btrfs_file_extent_num_bytes(eb, ei);
+		}
 		right_offset = btrfs_file_extent_offset(eb, ei);
 		right_gen = btrfs_file_extent_generation(eb, ei);
 
@@ -5204,6 +5210,19 @@ static int is_extent_unchanged(struct send_ctx *sctx,
 			goto out;
 		}
 
+		/*
+		 * We just wanted to see if when we have an inline extent, what
+		 * follows it is a regular extent (wanted to check the above
+		 * condition for inline extents too). This should normally not
+		 * happen but it's possible for example when we have an inline
+		 * compressed extent representing data with a size matching
+		 * the page size (currently the same as sector size).
+		 */
+		if (right_type == BTRFS_FILE_EXTENT_INLINE) {
+			ret = 0;
+			goto out;
+		}
+
 		left_offset_fixed = left_offset;
 		if (key.offset < ekey->offset) {
 			/* Fix the right offset for 2a and 7. */
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 72a053c9a7f0..4f1cdd5058f1 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1795,8 +1795,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 		}
 
 		if (fs_info->fs_devices->missing_devices >
-		     fs_info->num_tolerated_disk_barrier_failures &&
-		    !(*flags & MS_RDONLY)) {
+		     fs_info->num_tolerated_disk_barrier_failures) {
 			btrfs_warn(fs_info,
 				"too many missing devices, writeable remount is not allowed");
 			ret = -EACCES;
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index ea272432c930..b18ab8f327a5 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -237,7 +237,6 @@ void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans)
 {
 	memset(trans, 0, sizeof(*trans));
 	trans->transid = 1;
-	INIT_LIST_HEAD(&trans->qgroup_ref_list);
 	trans->type = __TRANS_DUMMY;
 }
 
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 61b807de3e16..2168654c90a1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -60,8 +60,8 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
 
 void btrfs_put_transaction(struct btrfs_transaction *transaction)
 {
-	WARN_ON(atomic_read(&transaction->use_count) == 0);
-	if (atomic_dec_and_test(&transaction->use_count)) {
+	WARN_ON(refcount_read(&transaction->use_count) == 0);
+	if (refcount_dec_and_test(&transaction->use_count)) {
 		BUG_ON(!list_empty(&transaction->list));
 		WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
 		if (transaction->delayed_refs.pending_csums)
@@ -207,7 +207,7 @@ loop:
 			spin_unlock(&fs_info->trans_lock);
 			return -EBUSY;
 		}
-		atomic_inc(&cur_trans->use_count);
+		refcount_inc(&cur_trans->use_count);
 		atomic_inc(&cur_trans->num_writers);
 		extwriter_counter_inc(cur_trans, type);
 		spin_unlock(&fs_info->trans_lock);
@@ -257,7 +257,7 @@ loop:
 	 * One for this trans handle, one so it will live on until we
 	 * commit the transaction.
 	 */
-	atomic_set(&cur_trans->use_count, 2);
+	refcount_set(&cur_trans->use_count, 2);
 	atomic_set(&cur_trans->pending_ordered, 0);
 	cur_trans->flags = 0;
 	cur_trans->start_time = get_seconds();
@@ -432,7 +432,7 @@ static void wait_current_trans(struct btrfs_fs_info *fs_info)
 	spin_lock(&fs_info->trans_lock);
 	cur_trans = fs_info->running_transaction;
 	if (cur_trans && is_transaction_blocked(cur_trans)) {
-		atomic_inc(&cur_trans->use_count);
+		refcount_inc(&cur_trans->use_count);
 		spin_unlock(&fs_info->trans_lock);
 
 		wait_event(fs_info->transaction_wait,
@@ -572,7 +572,6 @@ again:
 
 	h->type = type;
 	h->can_flush_pending_bgs = true;
-	INIT_LIST_HEAD(&h->qgroup_ref_list);
 	INIT_LIST_HEAD(&h->new_bgs);
 
 	smp_mb();
@@ -744,7 +743,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
 		list_for_each_entry(t, &fs_info->trans_list, list) {
 			if (t->transid == transid) {
 				cur_trans = t;
-				atomic_inc(&cur_trans->use_count);
+				refcount_inc(&cur_trans->use_count);
 				ret = 0;
 				break;
 			}
@@ -773,7 +772,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
 				if (t->state == TRANS_STATE_COMPLETED)
 					break;
 				cur_trans = t;
-				atomic_inc(&cur_trans->use_count);
+				refcount_inc(&cur_trans->use_count);
 				break;
 			}
 		}
@@ -917,7 +916,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 		wake_up_process(info->transaction_kthread);
 		err = -EIO;
 	}
-	assert_qgroups_uptodate(trans);
 
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
 	if (must_run_delayed_refs) {
@@ -1839,7 +1837,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
 
 	/* take transaction reference */
 	cur_trans = trans->transaction;
-	atomic_inc(&cur_trans->use_count);
+	refcount_inc(&cur_trans->use_count);
 
 	btrfs_end_transaction(trans);
 
@@ -2015,7 +2013,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	spin_lock(&fs_info->trans_lock);
 	if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
 		spin_unlock(&fs_info->trans_lock);
-		atomic_inc(&cur_trans->use_count);
+		refcount_inc(&cur_trans->use_count);
 		ret = btrfs_end_transaction(trans);
 
 		wait_for_commit(cur_trans);
@@ -2035,7 +2033,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 		prev_trans = list_entry(cur_trans->list.prev,
 					struct btrfs_transaction, list);
 		if (prev_trans->state != TRANS_STATE_COMPLETED) {
-			atomic_inc(&prev_trans->use_count);
+			refcount_inc(&prev_trans->use_count);
 			spin_unlock(&fs_info->trans_lock);
 
 			wait_for_commit(prev_trans);
@@ -2130,13 +2128,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 		goto scrub_continue;
 	}
 
-	/* Reocrd old roots for later qgroup accounting */
-	ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
-	if (ret) {
-		mutex_unlock(&fs_info->reloc_mutex);
-		goto scrub_continue;
-	}
-
 	/*
 	 * make sure none of the code above managed to slip in a
 	 * delayed item
@@ -2179,6 +2170,24 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	btrfs_free_log_root_tree(trans, fs_info);
 
 	/*
+	 * commit_fs_roots() can call btrfs_save_ino_cache(), which generates
+	 * new delayed refs. Must handle them or qgroup can be wrong.
+	 */
+	ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
+	if (ret) {
+		mutex_unlock(&fs_info->tree_log_mutex);
+		mutex_unlock(&fs_info->reloc_mutex);
+		goto scrub_continue;
+	}
+
+	ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
+	if (ret) {
+		mutex_unlock(&fs_info->tree_log_mutex);
+		mutex_unlock(&fs_info->reloc_mutex);
+		goto scrub_continue;
+	}
+
+	/*
 	 * Since fs roots are all committed, we can get a quite accurate
 	 * new_roots. So let's do quota accounting.
 	 */
@@ -2223,7 +2232,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
 	switch_commit_roots(cur_trans, fs_info);
 
-	assert_qgroups_uptodate(trans);
 	ASSERT(list_empty(&cur_trans->dirty_bgs));
 	ASSERT(list_empty(&cur_trans->io_bgs));
 	update_super_roots(fs_info);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 5dfb5590fff6..c55e44560103 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -18,6 +18,8 @@
 
 #ifndef __BTRFS_TRANSACTION__
 #define __BTRFS_TRANSACTION__
+
+#include <linux/refcount.h>
 #include "btrfs_inode.h"
 #include "delayed-ref.h"
 #include "ctree.h"
@@ -49,7 +51,7 @@ struct btrfs_transaction {
 	 * transaction can end
 	 */
 	atomic_t num_writers;
-	atomic_t use_count;
+	refcount_t use_count;
 	atomic_t pending_ordered;
 
 	unsigned long flags;
@@ -125,8 +127,6 @@ struct btrfs_trans_handle {
 	unsigned int type;
 	struct btrfs_root *root;
 	struct btrfs_fs_info *fs_info;
-	struct seq_list delayed_ref_elem;
-	struct list_head qgroup_ref_list;
 	struct list_head new_bgs;
 };
 
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a59674c3e69e..ccfe9fe7754a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4196,7 +4196,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 		if (em->generation <= test_gen)
 			continue;
 		/* Need a ref to keep it from getting evicted from cache */
-		atomic_inc(&em->refs);
+		refcount_inc(&em->refs);
 		set_bit(EXTENT_FLAG_LOGGING, &em->flags);
 		list_add_tail(&em->list, &extents);
 		num++;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ab8a66d852f9..84a495967e0a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -139,6 +139,11 @@ static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
 static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
 static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
 static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
+static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
+			     enum btrfs_map_op op,
+			     u64 logical, u64 *length,
+			     struct btrfs_bio **bbio_ret,
+			     int mirror_num, int need_raid_map);
 
 DEFINE_MUTEX(uuid_mutex);
 static LIST_HEAD(fs_uuids);
@@ -1008,14 +1013,13 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		q = bdev_get_queue(bdev);
 		if (blk_queue_discard(q))
 			device->can_discard = 1;
+		if (!blk_queue_nonrot(q))
+			fs_devices->rotating = 1;
 
 		device->bdev = bdev;
 		device->in_fs_metadata = 0;
 		device->mode = flags;
 
-		if (!blk_queue_nonrot(bdev_get_queue(bdev)))
-			fs_devices->rotating = 1;
-
 		fs_devices->open_devices++;
 		if (device->writeable &&
 		    device->devid != BTRFS_DEV_REPLACE_DEVID) {
@@ -2417,7 +2421,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	fs_info->free_chunk_space += device->total_bytes;
 	spin_unlock(&fs_info->free_chunk_lock);
 
-	if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+	if (!blk_queue_nonrot(q))
 		fs_info->fs_devices->rotating = 1;
 
 	tmp = btrfs_super_total_bytes(fs_info->super_copy);
@@ -2795,10 +2799,38 @@ static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info,
 	return ret;
 }
 
+static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
+					u64 logical, u64 length)
+{
+	struct extent_map_tree *em_tree;
+	struct extent_map *em;
+
+	em_tree = &fs_info->mapping_tree.map_tree;
+	read_lock(&em_tree->lock);
+	em = lookup_extent_mapping(em_tree, logical, length);
+	read_unlock(&em_tree->lock);
+
+	if (!em) {
+		btrfs_crit(fs_info, "unable to find logical %llu length %llu",
+			   logical, length);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (em->start > logical || em->start + em->len < logical) {
+		btrfs_crit(fs_info,
+			   "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
+			   logical, length, em->start, em->start + em->len);
+		free_extent_map(em);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* callers are responsible for dropping em's ref. */
+	return em;
+}
+
 int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
 		       struct btrfs_fs_info *fs_info, u64 chunk_offset)
 {
-	struct extent_map_tree *em_tree;
 	struct extent_map *em;
 	struct map_lookup *map;
 	u64 dev_extent_len = 0;
@@ -2806,23 +2838,15 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
 	int i, ret = 0;
 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
 
-	em_tree = &fs_info->mapping_tree.map_tree;
-
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, chunk_offset, 1);
-	read_unlock(&em_tree->lock);
-
-	if (!em || em->start > chunk_offset ||
-	    em->start + em->len < chunk_offset) {
+	em = get_chunk_map(fs_info, chunk_offset, 1);
+	if (IS_ERR(em)) {
 		/*
 		 * This is a logic error, but we don't want to just rely on the
 		 * user having built with ASSERT enabled, so if ASSERT doesn't
 		 * do anything we still error out.
 		 */
 		ASSERT(0);
-		if (em)
-			free_extent_map(em);
-		return -EINVAL;
+		return PTR_ERR(em);
 	}
 	map = em->map_lookup;
 	mutex_lock(&fs_info->chunk_mutex);
@@ -3736,7 +3760,7 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
 	if (ret)
 		btrfs_handle_fs_error(fs_info, ret, NULL);
 
-	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 }
 
 /* Non-zero return value signifies invalidity */
@@ -3755,6 +3779,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 		  struct btrfs_ioctl_balance_args *bargs)
 {
 	struct btrfs_fs_info *fs_info = bctl->fs_info;
+	u64 meta_target, data_target;
 	u64 allowed;
 	int mixed = 0;
 	int ret;
@@ -3851,11 +3876,16 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 		}
 	} while (read_seqretry(&fs_info->profiles_lock, seq));
 
-	if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) <
-		btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) {
+	/* if we're not converting, the target field is uninitialized */
+	meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
+		bctl->meta.target : fs_info->avail_metadata_alloc_bits;
+	data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
+		bctl->data.target : fs_info->avail_data_alloc_bits;
+	if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
+		btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
 		btrfs_warn(fs_info,
 			   "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
-			   bctl->meta.target, bctl->data.target);
+			   meta_target, data_target);
 	}
 
 	if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
@@ -3910,7 +3940,7 @@ out:
 		__cancel_balance(fs_info);
 	else {
 		kfree(bctl);
-		atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+		clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 	}
 	return ret;
 }
@@ -4000,7 +4030,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
 	btrfs_balance_sys(leaf, item, &disk_bargs);
 	btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
 
-	WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
+	WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
 
 	mutex_lock(&fs_info->volume_mutex);
 	mutex_lock(&fs_info->balance_mutex);
@@ -4785,7 +4815,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	stripe_size = div_u64(stripe_size, dev_stripes);
 
 	/* align to BTRFS_STRIPE_LEN */
-	stripe_size = div_u64(stripe_size, raid_stripe_len);
+	stripe_size = div64_u64(stripe_size, raid_stripe_len);
 	stripe_size *= raid_stripe_len;
 
 	map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
@@ -4833,7 +4863,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	ret = add_extent_mapping(em_tree, em, 0);
 	if (!ret) {
 		list_add_tail(&em->list, &trans->transaction->pending_chunks);
-		atomic_inc(&em->refs);
+		refcount_inc(&em->refs);
 	}
 	write_unlock(&em_tree->lock);
 	if (ret) {
@@ -4888,7 +4918,6 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
 	struct btrfs_device *device;
 	struct btrfs_chunk *chunk;
 	struct btrfs_stripe *stripe;
-	struct extent_map_tree *em_tree;
 	struct extent_map *em;
 	struct map_lookup *map;
 	size_t item_size;
@@ -4897,24 +4926,9 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
 	int i = 0;
 	int ret = 0;
 
-	em_tree = &fs_info->mapping_tree.map_tree;
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, chunk_offset, chunk_size);
-	read_unlock(&em_tree->lock);
-
-	if (!em) {
-		btrfs_crit(fs_info, "unable to find logical %Lu len %Lu",
-			   chunk_offset, chunk_size);
-		return -EINVAL;
-	}
-
-	if (em->start != chunk_offset || em->len != chunk_size) {
-		btrfs_crit(fs_info,
-			   "found a bad mapping, wanted %Lu-%Lu, found %Lu-%Lu",
-			    chunk_offset, chunk_size, em->start, em->len);
-		free_extent_map(em);
-		return -EINVAL;
-	}
+	em = get_chunk_map(fs_info, chunk_offset, chunk_size);
+	if (IS_ERR(em))
+		return PTR_ERR(em);
 
 	map = em->map_lookup;
 	item_size = btrfs_chunk_item_size(map->num_stripes);
@@ -5055,15 +5069,12 @@ int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 {
 	struct extent_map *em;
 	struct map_lookup *map;
-	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 	int readonly = 0;
 	int miss_ndevs = 0;
 	int i;
 
-	read_lock(&map_tree->map_tree.lock);
-	em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
-	read_unlock(&map_tree->map_tree.lock);
-	if (!em)
+	em = get_chunk_map(fs_info, chunk_offset, 1);
+	if (IS_ERR(em))
 		return 1;
 
 	map = em->map_lookup;
@@ -5117,34 +5128,19 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
 
 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 {
-	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 	struct extent_map *em;
 	struct map_lookup *map;
-	struct extent_map_tree *em_tree = &map_tree->map_tree;
 	int ret;
 
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, logical, len);
-	read_unlock(&em_tree->lock);
-
-	/*
-	 * We could return errors for these cases, but that could get ugly and
-	 * we'd probably do the same thing which is just not do anything else
-	 * and exit, so return 1 so the callers don't try to use other copies.
-	 */
-	if (!em) {
-		btrfs_crit(fs_info, "No mapping for %Lu-%Lu", logical,
-			    logical+len);
-		return 1;
-	}
-
-	if (em->start > logical || em->start + em->len < logical) {
-		btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got %Lu-%Lu",
-			   logical, logical+len, em->start,
-			   em->start + em->len);
-		free_extent_map(em);
+	em = get_chunk_map(fs_info, logical, len);
+	if (IS_ERR(em))
+		/*
+		 * We could return errors for these cases, but that could get
+		 * ugly and we'd probably do the same thing which is just not do
+		 * anything else and exit, so return 1 so the callers don't try
+		 * to use other copies.
+		 */
 		return 1;
-	}
 
 	map = em->map_lookup;
 	if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
@@ -5160,7 +5156,8 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 	free_extent_map(em);
 
 	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
-	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
+	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
+	    fs_info->dev_replace.tgtdev)
 		ret++;
 	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
 
@@ -5173,15 +5170,11 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
 {
 	struct extent_map *em;
 	struct map_lookup *map;
-	struct extent_map_tree *em_tree = &map_tree->map_tree;
 	unsigned long len = fs_info->sectorsize;
 
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, logical, len);
-	read_unlock(&em_tree->lock);
-	BUG_ON(!em);
+	em = get_chunk_map(fs_info, logical, len);
+	WARN_ON(IS_ERR(em));
 
-	BUG_ON(em->start > logical || em->start + em->len < logical);
 	map = em->map_lookup;
 	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
 		len = map->stripe_len * nr_data_stripes(map);
@@ -5189,20 +5182,16 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
 	return len;
 }
 
-int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
+int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
 			   u64 logical, u64 len, int mirror_num)
 {
 	struct extent_map *em;
 	struct map_lookup *map;
-	struct extent_map_tree *em_tree = &map_tree->map_tree;
 	int ret = 0;
 
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, logical, len);
-	read_unlock(&em_tree->lock);
-	BUG_ON(!em);
+	em = get_chunk_map(fs_info, logical, len);
+	WARN_ON(IS_ERR(em));
 
-	BUG_ON(em->start > logical || em->start + em->len < logical);
 	map = em->map_lookup;
 	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
 		ret = 1;
@@ -5295,25 +5284,353 @@ static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
 		GFP_NOFS|__GFP_NOFAIL);
 
 	atomic_set(&bbio->error, 0);
-	atomic_set(&bbio->refs, 1);
+	refcount_set(&bbio->refs, 1);
 
 	return bbio;
 }
 
 void btrfs_get_bbio(struct btrfs_bio *bbio)
 {
-	WARN_ON(!atomic_read(&bbio->refs));
-	atomic_inc(&bbio->refs);
+	WARN_ON(!refcount_read(&bbio->refs));
+	refcount_inc(&bbio->refs);
 }
 
 void btrfs_put_bbio(struct btrfs_bio *bbio)
 {
 	if (!bbio)
 		return;
-	if (atomic_dec_and_test(&bbio->refs))
+	if (refcount_dec_and_test(&bbio->refs))
 		kfree(bbio);
 }
 
+/* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */
+/*
+ * Please note that, discard won't be sent to target device of device
+ * replace.
+ */
+static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
+					 u64 logical, u64 length,
+					 struct btrfs_bio **bbio_ret)
+{
+	struct extent_map *em;
+	struct map_lookup *map;
+	struct btrfs_bio *bbio;
+	u64 offset;
+	u64 stripe_nr;
+	u64 stripe_nr_end;
+	u64 stripe_end_offset;
+	u64 stripe_cnt;
+	u64 stripe_len;
+	u64 stripe_offset;
+	u64 num_stripes;
+	u32 stripe_index;
+	u32 factor = 0;
+	u32 sub_stripes = 0;
+	u64 stripes_per_dev = 0;
+	u32 remaining_stripes = 0;
+	u32 last_stripe = 0;
+	int ret = 0;
+	int i;
+
+	/* discard always return a bbio */
+	ASSERT(bbio_ret);
+
+	em = get_chunk_map(fs_info, logical, length);
+	if (IS_ERR(em))
+		return PTR_ERR(em);
+
+	map = em->map_lookup;
+	/* we don't discard raid56 yet */
+	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	offset = logical - em->start;
+	length = min_t(u64, em->len - offset, length);
+
+	stripe_len = map->stripe_len;
+	/*
+	 * stripe_nr counts the total number of stripes we have to stride
+	 * to get to this block
+	 */
+	stripe_nr = div64_u64(offset, stripe_len);
+
+	/* stripe_offset is the offset of this block in its stripe */
+	stripe_offset = offset - stripe_nr * stripe_len;
+
+	stripe_nr_end = round_up(offset + length, map->stripe_len);
+	stripe_nr_end = div64_u64(stripe_nr_end, map->stripe_len);
+	stripe_cnt = stripe_nr_end - stripe_nr;
+	stripe_end_offset = stripe_nr_end * map->stripe_len -
+			    (offset + length);
+	/*
+	 * after this, stripe_nr is the number of stripes on this
+	 * device we have to walk to find the data, and stripe_index is
+	 * the number of our device in the stripe array
+	 */
+	num_stripes = 1;
+	stripe_index = 0;
+	if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+			 BTRFS_BLOCK_GROUP_RAID10)) {
+		if (map->type & BTRFS_BLOCK_GROUP_RAID0)
+			sub_stripes = 1;
+		else
+			sub_stripes = map->sub_stripes;
+
+		factor = map->num_stripes / sub_stripes;
+		num_stripes = min_t(u64, map->num_stripes,
+				    sub_stripes * stripe_cnt);
+		stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
+		stripe_index *= sub_stripes;
+		stripes_per_dev = div_u64_rem(stripe_cnt, factor,
+					      &remaining_stripes);
+		div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
+		last_stripe *= sub_stripes;
+	} else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+				BTRFS_BLOCK_GROUP_DUP)) {
+		num_stripes = map->num_stripes;
+	} else {
+		stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
+					&stripe_index);
+	}
+
+	bbio = alloc_btrfs_bio(num_stripes, 0);
+	if (!bbio) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < num_stripes; i++) {
+		bbio->stripes[i].physical =
+			map->stripes[stripe_index].physical +
+			stripe_offset + stripe_nr * map->stripe_len;
+		bbio->stripes[i].dev = map->stripes[stripe_index].dev;
+
+		if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+				 BTRFS_BLOCK_GROUP_RAID10)) {
+			bbio->stripes[i].length = stripes_per_dev *
+				map->stripe_len;
+
+			if (i / sub_stripes < remaining_stripes)
+				bbio->stripes[i].length +=
+					map->stripe_len;
+
+			/*
+			 * Special for the first stripe and
+			 * the last stripe:
+			 *
+			 * |-------|...|-------|
+			 *     |----------|
+			 *    off     end_off
+			 */
+			if (i < sub_stripes)
+				bbio->stripes[i].length -=
+					stripe_offset;
+
+			if (stripe_index >= last_stripe &&
+			    stripe_index <= (last_stripe +
+					     sub_stripes - 1))
+				bbio->stripes[i].length -=
+					stripe_end_offset;
+
+			if (i == sub_stripes - 1)
+				stripe_offset = 0;
+		} else {
+			bbio->stripes[i].length = length;
+		}
+
+		stripe_index++;
+		if (stripe_index == map->num_stripes) {
+			stripe_index = 0;
+			stripe_nr++;
+		}
+	}
+
+	*bbio_ret = bbio;
+	bbio->map_type = map->type;
+	bbio->num_stripes = num_stripes;
+out:
+	free_extent_map(em);
+	return ret;
+}
+
+/*
+ * In dev-replace case, for repair case (that's the only case where the mirror
+ * is selected explicitly when calling btrfs_map_block), blocks left of the
+ * left cursor can also be read from the target drive.
+ *
+ * For REQ_GET_READ_MIRRORS, the target drive is added as the last one to the
+ * array of stripes.
+ * For READ, it also needs to be supported using the same mirror number.
+ *
+ * If the requested block is not left of the left cursor, EIO is returned. This
+ * can happen because btrfs_num_copies() returns one more in the dev-replace
+ * case.
+ */
+static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
+					 u64 logical, u64 length,
+					 u64 srcdev_devid, int *mirror_num,
+					 u64 *physical)
+{
+	struct btrfs_bio *bbio = NULL;
+	int num_stripes;
+	int index_srcdev = 0;
+	int found = 0;
+	u64 physical_of_found = 0;
+	int i;
+	int ret = 0;
+
+	ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
+				logical, &length, &bbio, 0, 0);
+	if (ret) {
+		ASSERT(bbio == NULL);
+		return ret;
+	}
+
+	num_stripes = bbio->num_stripes;
+	if (*mirror_num > num_stripes) {
+		/*
+		 * BTRFS_MAP_GET_READ_MIRRORS does not contain this mirror,
+		 * that means that the requested area is not left of the left
+		 * cursor
+		 */
+		btrfs_put_bbio(bbio);
+		return -EIO;
+	}
+
+	/*
+	 * process the rest of the function using the mirror_num of the source
+	 * drive. Therefore look it up first.  At the end, patch the device
+	 * pointer to the one of the target drive.
+	 */
+	for (i = 0; i < num_stripes; i++) {
+		if (bbio->stripes[i].dev->devid != srcdev_devid)
+			continue;
+
+		/*
+		 * In case of DUP, in order to keep it simple, only add the
+		 * mirror with the lowest physical address
+		 */
+		if (found &&
+		    physical_of_found <= bbio->stripes[i].physical)
+			continue;
+
+		index_srcdev = i;
+		found = 1;
+		physical_of_found = bbio->stripes[i].physical;
+	}
+
+	btrfs_put_bbio(bbio);
+
+	ASSERT(found);
+	if (!found)
+		return -EIO;
+
+	*mirror_num = index_srcdev + 1;
+	*physical = physical_of_found;
+	return ret;
+}
+
+static void handle_ops_on_dev_replace(enum btrfs_map_op op,
+				      struct btrfs_bio **bbio_ret,
+				      struct btrfs_dev_replace *dev_replace,
+				      int *num_stripes_ret, int *max_errors_ret)
+{
+	struct btrfs_bio *bbio = *bbio_ret;
+	u64 srcdev_devid = dev_replace->srcdev->devid;
+	int tgtdev_indexes = 0;
+	int num_stripes = *num_stripes_ret;
+	int max_errors = *max_errors_ret;
+	int i;
+
+	if (op == BTRFS_MAP_WRITE) {
+		int index_where_to_add;
+
+		/*
+		 * duplicate the write operations while the dev replace
+		 * procedure is running. Since the copying of the old disk to
+		 * the new disk takes place at run time while the filesystem is
+		 * mounted writable, the regular write operations to the old
+		 * disk have to be duplicated to go to the new disk as well.
+		 *
+		 * Note that device->missing is handled by the caller, and that
+		 * the write to the old disk is already set up in the stripes
+		 * array.
+		 */
+		index_where_to_add = num_stripes;
+		for (i = 0; i < num_stripes; i++) {
+			if (bbio->stripes[i].dev->devid == srcdev_devid) {
+				/* write to new disk, too */
+				struct btrfs_bio_stripe *new =
+					bbio->stripes + index_where_to_add;
+				struct btrfs_bio_stripe *old =
+					bbio->stripes + i;
+
+				new->physical = old->physical;
+				new->length = old->length;
+				new->dev = dev_replace->tgtdev;
+				bbio->tgtdev_map[i] = index_where_to_add;
+				index_where_to_add++;
+				max_errors++;
+				tgtdev_indexes++;
+			}
+		}
+		num_stripes = index_where_to_add;
+	} else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
+		int index_srcdev = 0;
+		int found = 0;
+		u64 physical_of_found = 0;
+
+		/*
+		 * During the dev-replace procedure, the target drive can also
+		 * be used to read data in case it is needed to repair a corrupt
+		 * block elsewhere. This is possible if the requested area is
+		 * left of the left cursor. In this area, the target drive is a
+		 * full copy of the source drive.
+		 */
+		for (i = 0; i < num_stripes; i++) {
+			if (bbio->stripes[i].dev->devid == srcdev_devid) {
+				/*
+				 * In case of DUP, in order to keep it simple,
+				 * only add the mirror with the lowest physical
+				 * address
+				 */
+				if (found &&
+				    physical_of_found <=
+				     bbio->stripes[i].physical)
+					continue;
+				index_srcdev = i;
+				found = 1;
+				physical_of_found = bbio->stripes[i].physical;
+			}
+		}
+		if (found) {
+			struct btrfs_bio_stripe *tgtdev_stripe =
+				bbio->stripes + num_stripes;
+
+			tgtdev_stripe->physical = physical_of_found;
+			tgtdev_stripe->length =
+				bbio->stripes[index_srcdev].length;
+			tgtdev_stripe->dev = dev_replace->tgtdev;
+			bbio->tgtdev_map[index_srcdev] = num_stripes;
+
+			tgtdev_indexes++;
+			num_stripes++;
+		}
+	}
+
+	*num_stripes_ret = num_stripes;
+	*max_errors_ret = max_errors;
+	bbio->num_tgtdevs = tgtdev_indexes;
+	*bbio_ret = bbio;
+}
+
+static bool need_full_stripe(enum btrfs_map_op op)
+{
+	return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
+}
+
 static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 			     enum btrfs_map_op op,
 			     u64 logical, u64 *length,
@@ -5322,14 +5639,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 {
 	struct extent_map *em;
 	struct map_lookup *map;
-	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
-	struct extent_map_tree *em_tree = &map_tree->map_tree;
 	u64 offset;
 	u64 stripe_offset;
-	u64 stripe_end_offset;
 	u64 stripe_nr;
-	u64 stripe_nr_orig;
-	u64 stripe_nr_end;
 	u64 stripe_len;
 	u32 stripe_index;
 	int i;
@@ -5345,23 +5657,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 	u64 physical_to_patch_in_first_stripe = 0;
 	u64 raid56_full_stripe_start = (u64)-1;
 
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, logical, *length);
-	read_unlock(&em_tree->lock);
-
-	if (!em) {
-		btrfs_crit(fs_info, "unable to find logical %llu len %llu",
-			logical, *length);
-		return -EINVAL;
-	}
+	if (op == BTRFS_MAP_DISCARD)
+		return __btrfs_map_block_for_discard(fs_info, logical,
+						     *length, bbio_ret);
 
-	if (em->start > logical || em->start + em->len < logical) {
-		btrfs_crit(fs_info,
-			   "found a bad mapping, wanted %Lu, found %Lu-%Lu",
-			   logical, em->start, em->start + em->len);
-		free_extent_map(em);
-		return -EINVAL;
-	}
+	em = get_chunk_map(fs_info, logical, *length);
+	if (IS_ERR(em))
+		return PTR_ERR(em);
 
 	map = em->map_lookup;
 	offset = logical - em->start;
@@ -5400,14 +5702,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		raid56_full_stripe_start *= full_stripe_len;
 	}
 
-	if (op == BTRFS_MAP_DISCARD) {
-		/* we don't discard raid56 yet */
-		if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
-			ret = -EOPNOTSUPP;
-			goto out;
-		}
-		*length = min_t(u64, em->len - offset, *length);
-	} else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+	if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
 		u64 max_len;
 		/* For writes to RAID[56], allow a full stripeset across all disks.
 		   For other RAID types and for RAID[56] reads, just allow a single
@@ -5438,105 +5733,28 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		btrfs_dev_replace_set_lock_blocking(dev_replace);
 
 	if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
-	    op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
-	    op != BTRFS_MAP_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) {
-		/*
-		 * in dev-replace case, for repair case (that's the only
-		 * case where the mirror is selected explicitly when
-		 * calling btrfs_map_block), blocks left of the left cursor
-		 * can also be read from the target drive.
-		 * For REQ_GET_READ_MIRRORS, the target drive is added as
-		 * the last one to the array of stripes. For READ, it also
-		 * needs to be supported using the same mirror number.
-		 * If the requested block is not left of the left cursor,
-		 * EIO is returned. This can happen because btrfs_num_copies()
-		 * returns one more in the dev-replace case.
-		 */
-		u64 tmp_length = *length;
-		struct btrfs_bio *tmp_bbio = NULL;
-		int tmp_num_stripes;
-		u64 srcdev_devid = dev_replace->srcdev->devid;
-		int index_srcdev = 0;
-		int found = 0;
-		u64 physical_of_found = 0;
-
-		ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
-			     logical, &tmp_length, &tmp_bbio, 0, 0);
-		if (ret) {
-			WARN_ON(tmp_bbio != NULL);
-			goto out;
-		}
-
-		tmp_num_stripes = tmp_bbio->num_stripes;
-		if (mirror_num > tmp_num_stripes) {
-			/*
-			 * BTRFS_MAP_GET_READ_MIRRORS does not contain this
-			 * mirror, that means that the requested area
-			 * is not left of the left cursor
-			 */
-			ret = -EIO;
-			btrfs_put_bbio(tmp_bbio);
-			goto out;
-		}
-
-		/*
-		 * process the rest of the function using the mirror_num
-		 * of the source drive. Therefore look it up first.
-		 * At the end, patch the device pointer to the one of the
-		 * target drive.
-		 */
-		for (i = 0; i < tmp_num_stripes; i++) {
-			if (tmp_bbio->stripes[i].dev->devid != srcdev_devid)
-				continue;
-
-			/*
-			 * In case of DUP, in order to keep it simple, only add
-			 * the mirror with the lowest physical address
-			 */
-			if (found &&
-			    physical_of_found <= tmp_bbio->stripes[i].physical)
-				continue;
-
-			index_srcdev = i;
-			found = 1;
-			physical_of_found = tmp_bbio->stripes[i].physical;
-		}
-
-		btrfs_put_bbio(tmp_bbio);
-
-		if (!found) {
-			WARN_ON(1);
-			ret = -EIO;
+	    !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
+		ret = get_extra_mirror_from_replace(fs_info, logical, *length,
+						    dev_replace->srcdev->devid,
+						    &mirror_num,
+					    &physical_to_patch_in_first_stripe);
+		if (ret)
 			goto out;
-		}
-
-		mirror_num = index_srcdev + 1;
-		patch_the_first_stripe_for_dev_replace = 1;
-		physical_to_patch_in_first_stripe = physical_of_found;
+		else
+			patch_the_first_stripe_for_dev_replace = 1;
 	} else if (mirror_num > map->num_stripes) {
 		mirror_num = 0;
 	}
 
 	num_stripes = 1;
 	stripe_index = 0;
-	stripe_nr_orig = stripe_nr;
-	stripe_nr_end = ALIGN(offset + *length, map->stripe_len);
-	stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len);
-	stripe_end_offset = stripe_nr_end * map->stripe_len -
-			    (offset + *length);
-
 	if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
-		if (op == BTRFS_MAP_DISCARD)
-			num_stripes = min_t(u64, map->num_stripes,
-					    stripe_nr_end - stripe_nr_orig);
 		stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
 				&stripe_index);
-		if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
-		    op != BTRFS_MAP_GET_READ_MIRRORS)
+		if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS)
 			mirror_num = 1;
 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD ||
-		    op == BTRFS_MAP_GET_READ_MIRRORS)
+		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
 			num_stripes = map->num_stripes;
 		else if (mirror_num)
 			stripe_index = mirror_num - 1;
@@ -5549,8 +5767,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		}
 
 	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD ||
-		    op == BTRFS_MAP_GET_READ_MIRRORS) {
+		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) {
 			num_stripes = map->num_stripes;
 		} else if (mirror_num) {
 			stripe_index = mirror_num - 1;
@@ -5566,10 +5783,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
 		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
 			num_stripes = map->sub_stripes;
-		else if (op == BTRFS_MAP_DISCARD)
-			num_stripes = min_t(u64, map->sub_stripes *
-					    (stripe_nr_end - stripe_nr_orig),
-					    map->num_stripes);
 		else if (mirror_num)
 			stripe_index += mirror_num - 1;
 		else {
@@ -5587,7 +5800,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		    (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS ||
 		     mirror_num > 1)) {
 			/* push stripe_nr back to the start of the full stripe */
-			stripe_nr = div_u64(raid56_full_stripe_start,
+			stripe_nr = div64_u64(raid56_full_stripe_start,
 					stripe_len * nr_data_stripes(map));
 
 			/* RAID[56] write or recovery. Return all stripes */
@@ -5612,8 +5825,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 			/* We distribute the parity blocks across stripes */
 			div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
 					&stripe_index);
-			if ((op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
-			    op != BTRFS_MAP_GET_READ_MIRRORS) && mirror_num <= 1)
+			if ((op != BTRFS_MAP_WRITE &&
+			     op != BTRFS_MAP_GET_READ_MIRRORS) &&
+			    mirror_num <= 1)
 				mirror_num = 1;
 		}
 	} else {
@@ -5635,8 +5849,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 	}
 
 	num_alloc_stripes = num_stripes;
-	if (dev_replace_is_ongoing) {
-		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD)
+	if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
+		if (op == BTRFS_MAP_WRITE)
 			num_alloc_stripes <<= 1;
 		if (op == BTRFS_MAP_GET_READ_MIRRORS)
 			num_alloc_stripes++;
@@ -5648,14 +5862,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		ret = -ENOMEM;
 		goto out;
 	}
-	if (dev_replace_is_ongoing)
+	if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
 		bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
 
 	/* build raid_map */
-	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
-	    need_raid_map &&
-	    ((op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) ||
-	    mirror_num > 1)) {
+	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
+	    (need_full_stripe(op) || mirror_num > 1)) {
 		u64 tmp;
 		unsigned rot;
 
@@ -5679,173 +5891,27 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 				RAID6_Q_STRIPE;
 	}
 
-	if (op == BTRFS_MAP_DISCARD) {
-		u32 factor = 0;
-		u32 sub_stripes = 0;
-		u64 stripes_per_dev = 0;
-		u32 remaining_stripes = 0;
-		u32 last_stripe = 0;
 
-		if (map->type &
-		    (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) {
-			if (map->type & BTRFS_BLOCK_GROUP_RAID0)
-				sub_stripes = 1;
-			else
-				sub_stripes = map->sub_stripes;
-
-			factor = map->num_stripes / sub_stripes;
-			stripes_per_dev = div_u64_rem(stripe_nr_end -
-						      stripe_nr_orig,
-						      factor,
-						      &remaining_stripes);
-			div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
-			last_stripe *= sub_stripes;
-		}
-
-		for (i = 0; i < num_stripes; i++) {
-			bbio->stripes[i].physical =
-				map->stripes[stripe_index].physical +
-				stripe_offset + stripe_nr * map->stripe_len;
-			bbio->stripes[i].dev = map->stripes[stripe_index].dev;
-
-			if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
-					 BTRFS_BLOCK_GROUP_RAID10)) {
-				bbio->stripes[i].length = stripes_per_dev *
-							  map->stripe_len;
-
-				if (i / sub_stripes < remaining_stripes)
-					bbio->stripes[i].length +=
-						map->stripe_len;
-
-				/*
-				 * Special for the first stripe and
-				 * the last stripe:
-				 *
-				 * |-------|...|-------|
-				 *     |----------|
-				 *    off     end_off
-				 */
-				if (i < sub_stripes)
-					bbio->stripes[i].length -=
-						stripe_offset;
-
-				if (stripe_index >= last_stripe &&
-				    stripe_index <= (last_stripe +
-						     sub_stripes - 1))
-					bbio->stripes[i].length -=
-						stripe_end_offset;
-
-				if (i == sub_stripes - 1)
-					stripe_offset = 0;
-			} else
-				bbio->stripes[i].length = *length;
-
-			stripe_index++;
-			if (stripe_index == map->num_stripes) {
-				/* This could only happen for RAID0/10 */
-				stripe_index = 0;
-				stripe_nr++;
-			}
-		}
-	} else {
-		for (i = 0; i < num_stripes; i++) {
-			bbio->stripes[i].physical =
-				map->stripes[stripe_index].physical +
-				stripe_offset +
-				stripe_nr * map->stripe_len;
-			bbio->stripes[i].dev =
-				map->stripes[stripe_index].dev;
-			stripe_index++;
-		}
+	for (i = 0; i < num_stripes; i++) {
+		bbio->stripes[i].physical =
+			map->stripes[stripe_index].physical +
+			stripe_offset +
+			stripe_nr * map->stripe_len;
+		bbio->stripes[i].dev =
+			map->stripes[stripe_index].dev;
+		stripe_index++;
 	}
 
-	if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
+	if (need_full_stripe(op))
 		max_errors = btrfs_chunk_max_errors(map);
 
 	if (bbio->raid_map)
 		sort_parity_stripes(bbio, num_stripes);
 
-	tgtdev_indexes = 0;
-	if (dev_replace_is_ongoing &&
-	   (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD) &&
-	    dev_replace->tgtdev != NULL) {
-		int index_where_to_add;
-		u64 srcdev_devid = dev_replace->srcdev->devid;
-
-		/*
-		 * duplicate the write operations while the dev replace
-		 * procedure is running. Since the copying of the old disk
-		 * to the new disk takes place at run time while the
-		 * filesystem is mounted writable, the regular write
-		 * operations to the old disk have to be duplicated to go
-		 * to the new disk as well.
-		 * Note that device->missing is handled by the caller, and
-		 * that the write to the old disk is already set up in the
-		 * stripes array.
-		 */
-		index_where_to_add = num_stripes;
-		for (i = 0; i < num_stripes; i++) {
-			if (bbio->stripes[i].dev->devid == srcdev_devid) {
-				/* write to new disk, too */
-				struct btrfs_bio_stripe *new =
-					bbio->stripes + index_where_to_add;
-				struct btrfs_bio_stripe *old =
-					bbio->stripes + i;
-
-				new->physical = old->physical;
-				new->length = old->length;
-				new->dev = dev_replace->tgtdev;
-				bbio->tgtdev_map[i] = index_where_to_add;
-				index_where_to_add++;
-				max_errors++;
-				tgtdev_indexes++;
-			}
-		}
-		num_stripes = index_where_to_add;
-	} else if (dev_replace_is_ongoing &&
-		   op == BTRFS_MAP_GET_READ_MIRRORS &&
-		   dev_replace->tgtdev != NULL) {
-		u64 srcdev_devid = dev_replace->srcdev->devid;
-		int index_srcdev = 0;
-		int found = 0;
-		u64 physical_of_found = 0;
-
-		/*
-		 * During the dev-replace procedure, the target drive can
-		 * also be used to read data in case it is needed to repair
-		 * a corrupt block elsewhere. This is possible if the
-		 * requested area is left of the left cursor. In this area,
-		 * the target drive is a full copy of the source drive.
-		 */
-		for (i = 0; i < num_stripes; i++) {
-			if (bbio->stripes[i].dev->devid == srcdev_devid) {
-				/*
-				 * In case of DUP, in order to keep it
-				 * simple, only add the mirror with the
-				 * lowest physical address
-				 */
-				if (found &&
-				    physical_of_found <=
-				     bbio->stripes[i].physical)
-					continue;
-				index_srcdev = i;
-				found = 1;
-				physical_of_found = bbio->stripes[i].physical;
-			}
-		}
-		if (found) {
-			struct btrfs_bio_stripe *tgtdev_stripe =
-				bbio->stripes + num_stripes;
-
-			tgtdev_stripe->physical = physical_of_found;
-			tgtdev_stripe->length =
-				bbio->stripes[index_srcdev].length;
-			tgtdev_stripe->dev = dev_replace->tgtdev;
-			bbio->tgtdev_map[index_srcdev] = num_stripes;
-
-			tgtdev_indexes++;
-			num_stripes++;
-		}
+	if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
+	    need_full_stripe(op)) {
+		handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
+					  &max_errors);
 	}
 
 	*bbio_ret = bbio;
@@ -5853,7 +5919,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 	bbio->num_stripes = num_stripes;
 	bbio->max_errors = max_errors;
 	bbio->mirror_num = mirror_num;
-	bbio->num_tgtdevs = tgtdev_indexes;
 
 	/*
 	 * this is the case that REQ_READ && dev_replace_is_ongoing &&
@@ -5886,19 +5951,15 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 /* For Scrub/replace */
 int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 		     u64 logical, u64 *length,
-		     struct btrfs_bio **bbio_ret, int mirror_num,
-		     int need_raid_map)
+		     struct btrfs_bio **bbio_ret)
 {
-	return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
-				 mirror_num, need_raid_map);
+	return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
 }
 
 int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
 		     u64 chunk_start, u64 physical, u64 devid,
 		     u64 **logical, int *naddrs, int *stripe_len)
 {
-	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
-	struct extent_map_tree *em_tree = &map_tree->map_tree;
 	struct extent_map *em;
 	struct map_lookup *map;
 	u64 *buf;
@@ -5908,24 +5969,11 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
 	u64 rmap_len;
 	int i, j, nr = 0;
 
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, chunk_start, 1);
-	read_unlock(&em_tree->lock);
-
-	if (!em) {
-		btrfs_err(fs_info, "couldn't find em for chunk %Lu",
-			chunk_start);
+	em = get_chunk_map(fs_info, chunk_start, 1);
+	if (IS_ERR(em))
 		return -EIO;
-	}
 
-	if (em->start != chunk_start) {
-		btrfs_err(fs_info, "bad chunk start, em=%Lu, wanted=%Lu",
-		       em->start, chunk_start);
-		free_extent_map(em);
-		return -EIO;
-	}
 	map = em->map_lookup;
-
 	length = em->len;
 	rmap_len = map->stripe_len;
 
@@ -5949,7 +5997,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
 			continue;
 
 		stripe_nr = physical - map->stripes[i].physical;
-		stripe_nr = div_u64(stripe_nr, map->stripe_len);
+		stripe_nr = div64_u64(stripe_nr, map->stripe_len);
 
 		if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
 			stripe_nr = stripe_nr * map->num_stripes + i;
@@ -5994,9 +6042,10 @@ static void btrfs_end_bio(struct bio *bio)
 	struct btrfs_bio *bbio = bio->bi_private;
 	int is_orig_bio = 0;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		atomic_inc(&bbio->error);
-		if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) {
+		if (bio->bi_status == BLK_STS_IOERR ||
+		    bio->bi_status == BLK_STS_TARGET) {
 			unsigned int stripe_index =
 				btrfs_io_bio(bio)->stripe_index;
 			struct btrfs_device *dev;
@@ -6034,13 +6083,13 @@ static void btrfs_end_bio(struct bio *bio)
 		 * beyond the tolerance of the btrfs bio
 		 */
 		if (atomic_read(&bbio->error) > bbio->max_errors) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 		} else {
 			/*
 			 * this bio is actually up to date, we didn't
 			 * go over the max number of errors
 			 */
-			bio->bi_error = 0;
+			bio->bi_status = 0;
 		}
 
 		btrfs_end_bbio(bbio, bio);
@@ -6151,7 +6200,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
 
 		btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
 		bio->bi_iter.bi_sector = logical >> 9;
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		btrfs_end_bbio(bbio, bio);
 	}
 }
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 59be81206dd7..c7d0fbc915ca 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -123,7 +123,6 @@ struct btrfs_device {
 	struct list_head resized_list;
 
 	/* for sending down flush barriers */
-	int nobarriers;
 	struct bio *flush_bio;
 	struct completion flush_wait;
 
@@ -298,7 +297,7 @@ struct btrfs_bio;
 typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
 
 struct btrfs_bio {
-	atomic_t refs;
+	refcount_t refs;
 	atomic_t stripes_pending;
 	struct btrfs_fs_info *fs_info;
 	u64 map_type; /* get from map_lookup->type */
@@ -400,8 +399,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 		    struct btrfs_bio **bbio_ret, int mirror_num);
 int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 		     u64 logical, u64 *length,
-		     struct btrfs_bio **bbio_ret, int mirror_num,
-		     int need_raid_map);
+		     struct btrfs_bio **bbio_ret);
 int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
 		     u64 chunk_start, u64 physical, u64 devid,
 		     u64 **logical, int *naddrs, int *stripe_len);
@@ -475,7 +473,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
 					      struct btrfs_device *tgtdev);
 void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
-int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
+int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
 			   u64 logical, u64 len, int mirror_num);
 unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
 				    struct btrfs_mapping_tree *map_tree,
diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58c5cb0..5c2cba8d2387 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -49,7 +49,7 @@
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
-			 struct writeback_control *wbc);
+			 enum rw_hint hint, struct writeback_control *wbc);
 
 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
 
@@ -1829,7 +1829,8 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
-			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
+			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+					inode->i_write_hint, wbc);
 			nr_underway++;
 		}
 		bh = next;
@@ -1883,7 +1884,8 @@ recover:
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
 			clear_buffer_dirty(bh);
-			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
+			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+					inode->i_write_hint, wbc);
 			nr_underway++;
 		}
 		bh = next;
@@ -3038,7 +3040,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
 	if (unlikely(bio_flagged(bio, BIO_QUIET)))
 		set_bit(BH_Quiet, &bh->b_state);
 
-	bh->b_end_io(bh, !bio->bi_error);
+	bh->b_end_io(bh, !bio->bi_status);
 	bio_put(bio);
 }
 
@@ -3091,7 +3093,7 @@ void guard_bio_eod(int op, struct bio *bio)
 }
 
 static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
-			 struct writeback_control *wbc)
+			 enum rw_hint write_hint, struct writeback_control *wbc)
 {
 	struct bio *bio;
 
@@ -3120,6 +3122,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio->bi_bdev = bh->b_bdev;
+	bio->bi_write_hint = write_hint;
 
 	bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
 	BUG_ON(bio->bi_iter.bi_size != bh->b_size);
@@ -3142,7 +3145,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 
 int submit_bh(int op, int op_flags, struct buffer_head *bh)
 {
-	return submit_bh_wbc(op, op_flags, bh, NULL);
+	return submit_bh_wbc(op, op_flags, bh, 0, NULL);
 }
 EXPORT_SYMBOL(submit_bh);
 
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 9bf90bcc56ac..bb3a02ca9da4 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -18,7 +18,7 @@
 
 #include <linux/fscache-cache.h>
 #include <linux/timer.h>
-#include <linux/wait.h>
+#include <linux/wait_bit.h>
 #include <linux/cred.h>
 #include <linux/workqueue.h>
 #include <linux/security.h>
@@ -97,7 +97,7 @@ struct cachefiles_cache {
  * backing file read tracking
  */
 struct cachefiles_one_read {
-	wait_queue_t			monitor;	/* link into monitored waitqueue */
+	wait_queue_entry_t			monitor;	/* link into monitored waitqueue */
 	struct page			*back_page;	/* backing file page we're waiting for */
 	struct page			*netfs_page;	/* netfs page we're going to fill */
 	struct fscache_retrieval	*op;		/* retrieval op covering this */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 41df8a27d7eb..3978b324cbca 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -204,7 +204,7 @@ wait_for_old_object:
 		wait_queue_head_t *wq;
 
 		signed long timeout = 60 * HZ;
-		wait_queue_t wait;
+		wait_queue_entry_t wait;
 		bool requeue;
 
 		/* if the object we're waiting for is queued for processing,
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index afbdc418966d..18d7aa61ef0f 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -21,7 +21,7 @@
  * - we use this to detect read completion of backing pages
  * - the caller holds the waitqueue lock
  */
-static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
+static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode,
 				  int sync, void *_key)
 {
 	struct cachefiles_one_read *monitor =
@@ -48,7 +48,7 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
 	}
 
 	/* remove from the waitqueue */
-	list_del(&wait->task_list);
+	list_del(&wait->entry);
 
 	/* move onto the action list and queue for FS-Cache thread pool */
 	ASSERT(monitor->op);
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 987044bca1c2..59cb307b15fb 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -131,6 +131,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	}
 
 	if (new_mode != old_mode) {
+		newattrs.ia_ctime = current_time(inode);
 		newattrs.ia_mode = new_mode;
 		newattrs.ia_valid = ATTR_MODE;
 		ret = __ceph_setattr(inode, &newattrs);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 9ecb2fd348cb..1e71e6ca5ddf 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -670,8 +670,12 @@ static void writepages_finish(struct ceph_osd_request *req)
 	bool remove_page;
 
 	dout("writepages_finish %p rc %d\n", inode, rc);
-	if (rc < 0)
+	if (rc < 0) {
 		mapping_set_error(mapping, rc);
+		ceph_set_error_write(ci);
+	} else {
+		ceph_clear_error_write(ci);
+	}
 
 	/*
 	 * We lost the cache cap, need to truncate the page before
@@ -703,9 +707,6 @@ static void writepages_finish(struct ceph_osd_request *req)
 				clear_bdi_congested(inode_to_bdi(inode),
 						    BLK_RW_ASYNC);
 
-			if (rc < 0)
-				SetPageError(page);
-
 			ceph_put_snap_context(page_snap_context(page));
 			page->private = 0;
 			ClearPagePrivate(page);
@@ -1892,6 +1893,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
 	err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);
 
 	wr_req->r_mtime = ci->vfs_inode.i_mtime;
+	wr_req->r_abort_on_full = true;
 	err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);
 
 	if (!err)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 68c78be19d5b..a3ebb632294e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1015,6 +1015,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
 	void *p;
 	size_t extra_len;
 	struct timespec zerotime = {0};
+	struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc;
 
 	dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
 	     " seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu"
@@ -1076,8 +1077,12 @@ static int send_cap_msg(struct cap_msg_args *arg)
 	ceph_encode_64(&p, arg->inline_data ? 0 : CEPH_INLINE_NONE);
 	/* inline data size */
 	ceph_encode_32(&p, 0);
-	/* osd_epoch_barrier (version 5) */
-	ceph_encode_32(&p, 0);
+	/*
+	 * osd_epoch_barrier (version 5)
+	 * The epoch_barrier is protected osdc->lock, so READ_ONCE here in
+	 * case it was recently changed
+	 */
+	ceph_encode_32(&p, READ_ONCE(osdc->epoch_barrier));
 	/* oldest_flush_tid (version 6) */
 	ceph_encode_64(&p, arg->oldest_flush_tid);
 
@@ -1389,7 +1394,7 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,
 		first_tid = cf->tid + 1;
 
 		capsnap = container_of(cf, struct ceph_cap_snap, cap_flush);
-		atomic_inc(&capsnap->nref);
+		refcount_inc(&capsnap->nref);
 		spin_unlock(&ci->i_ceph_lock);
 
 		dout("__flush_snaps %p capsnap %p tid %llu %s\n",
@@ -2202,7 +2207,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 			     inode, capsnap, cf->tid,
 			     ceph_cap_string(capsnap->dirty));
 
-			atomic_inc(&capsnap->nref);
+			refcount_inc(&capsnap->nref);
 			spin_unlock(&ci->i_ceph_lock);
 
 			ret = __send_flush_snap(inode, session, capsnap, cap->mseq,
@@ -3633,13 +3638,19 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 		p += inline_len;
 	}
 
+	if (le16_to_cpu(msg->hdr.version) >= 5) {
+		struct ceph_osd_client	*osdc = &mdsc->fsc->client->osdc;
+		u32			epoch_barrier;
+
+		ceph_decode_32_safe(&p, end, epoch_barrier, bad);
+		ceph_osdc_update_epoch_barrier(osdc, epoch_barrier);
+	}
+
 	if (le16_to_cpu(msg->hdr.version) >= 8) {
 		u64 flush_tid;
 		u32 caller_uid, caller_gid;
-		u32 osd_epoch_barrier;
 		u32 pool_ns_len;
-		/* version >= 5 */
-		ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad);
+
 		/* version >= 6 */
 		ceph_decode_64_safe(&p, end, flush_tid, bad);
 		/* version >= 7 */
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 3ef11bc8d728..4e2d112c982f 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -22,20 +22,19 @@ static int mdsmap_show(struct seq_file *s, void *p)
 {
 	int i;
 	struct ceph_fs_client *fsc = s->private;
+	struct ceph_mdsmap *mdsmap;
 
 	if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL)
 		return 0;
-	seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch);
-	seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root);
-	seq_printf(s, "session_timeout %d\n",
-		       fsc->mdsc->mdsmap->m_session_timeout);
-	seq_printf(s, "session_autoclose %d\n",
-		       fsc->mdsc->mdsmap->m_session_autoclose);
-	for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) {
-		struct ceph_entity_addr *addr =
-			&fsc->mdsc->mdsmap->m_info[i].addr;
-		int state = fsc->mdsc->mdsmap->m_info[i].state;
-
+	mdsmap = fsc->mdsc->mdsmap;
+	seq_printf(s, "epoch %d\n", mdsmap->m_epoch);
+	seq_printf(s, "root %d\n", mdsmap->m_root);
+	seq_printf(s, "max_mds %d\n", mdsmap->m_max_mds);
+	seq_printf(s, "session_timeout %d\n", mdsmap->m_session_timeout);
+	seq_printf(s, "session_autoclose %d\n", mdsmap->m_session_autoclose);
+	for (i = 0; i < mdsmap->m_num_mds; i++) {
+		struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr;
+		int state = mdsmap->m_info[i].state;
 		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
 			       ceph_pr_addr(&addr->in_addr),
 			       ceph_mds_state_name(state));
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 3e9ad501addf..e071d23f6148 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -294,7 +294,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	int i;
 	int err;
-	u32 ftype;
+	unsigned frag = -1;
 	struct ceph_mds_reply_info_parsed *rinfo;
 
 	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
@@ -341,7 +341,6 @@ more:
 	/* do we have the correct frag content buffered? */
 	if (need_send_readdir(fi, ctx->pos)) {
 		struct ceph_mds_request *req;
-		unsigned frag;
 		int op = ceph_snap(inode) == CEPH_SNAPDIR ?
 			CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
 
@@ -352,8 +351,11 @@ more:
 		}
 
 		if (is_hash_order(ctx->pos)) {
-			frag = ceph_choose_frag(ci, fpos_hash(ctx->pos),
-						NULL, NULL);
+			/* fragtree isn't always accurate. choose frag
+			 * based on previous reply when possible. */
+			if (frag == (unsigned)-1)
+				frag = ceph_choose_frag(ci, fpos_hash(ctx->pos),
+							NULL, NULL);
 		} else {
 			frag = fpos_frag(ctx->pos);
 		}
@@ -378,7 +380,11 @@ more:
 				ceph_mdsc_put_request(req);
 				return -ENOMEM;
 			}
+		} else if (is_hash_order(ctx->pos)) {
+			req->r_args.readdir.offset_hash =
+				cpu_to_le32(fpos_hash(ctx->pos));
 		}
+
 		req->r_dir_release_cnt = fi->dir_release_count;
 		req->r_dir_ordered_cnt = fi->dir_ordered_count;
 		req->r_readdir_cache_idx = fi->readdir_cache_idx;
@@ -476,6 +482,7 @@ more:
 		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
 		struct ceph_vino vino;
 		ino_t ino;
+		u32 ftype;
 
 		BUG_ON(rde->offset < ctx->pos);
 
@@ -498,15 +505,17 @@ more:
 		ctx->pos++;
 	}
 
+	ceph_mdsc_put_request(fi->last_readdir);
+	fi->last_readdir = NULL;
+
 	if (fi->next_offset > 2) {
-		ceph_mdsc_put_request(fi->last_readdir);
-		fi->last_readdir = NULL;
+		frag = fi->frag;
 		goto more;
 	}
 
 	/* more frags? */
 	if (!ceph_frag_is_rightmost(fi->frag)) {
-		unsigned frag = ceph_frag_next(fi->frag);
+		frag = ceph_frag_next(fi->frag);
 		if (is_hash_order(ctx->pos)) {
 			loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
 							fi->next_offset, true);
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e8f11fa565c5..7df550c13d7f 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -91,6 +91,10 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
 		ceph_mdsc_put_request(req);
 		if (!inode)
 			return ERR_PTR(-ESTALE);
+		if (inode->i_nlink == 0) {
+			iput(inode);
+			return ERR_PTR(-ESTALE);
+		}
 	}
 
 	return d_obtain_alias(inode);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 18c045e2ead6..29308a80d66f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -13,6 +13,38 @@
 #include "mds_client.h"
 #include "cache.h"
 
+static __le32 ceph_flags_sys2wire(u32 flags)
+{
+	u32 wire_flags = 0;
+
+	switch (flags & O_ACCMODE) {
+	case O_RDONLY:
+		wire_flags |= CEPH_O_RDONLY;
+		break;
+	case O_WRONLY:
+		wire_flags |= CEPH_O_WRONLY;
+		break;
+	case O_RDWR:
+		wire_flags |= CEPH_O_RDWR;
+		break;
+	}
+
+#define ceph_sys2wire(a) if (flags & a) { wire_flags |= CEPH_##a; flags &= ~a; }
+
+	ceph_sys2wire(O_CREAT);
+	ceph_sys2wire(O_EXCL);
+	ceph_sys2wire(O_TRUNC);
+	ceph_sys2wire(O_DIRECTORY);
+	ceph_sys2wire(O_NOFOLLOW);
+
+#undef ceph_sys2wire
+
+	if (flags)
+		dout("unused open flags: %x", flags);
+
+	return cpu_to_le32(wire_flags);
+}
+
 /*
  * Ceph file operations
  *
@@ -120,7 +152,7 @@ prepare_open_request(struct super_block *sb, int flags, int create_mode)
 	if (IS_ERR(req))
 		goto out;
 	req->r_fmode = ceph_flags_to_mode(flags);
-	req->r_args.open.flags = cpu_to_le32(flags);
+	req->r_args.open.flags = ceph_flags_sys2wire(flags);
 	req->r_args.open.mode = cpu_to_le32(create_mode);
 out:
 	return req;
@@ -189,7 +221,7 @@ int ceph_renew_caps(struct inode *inode)
 	spin_lock(&ci->i_ceph_lock);
 	wanted = __ceph_caps_file_wanted(ci);
 	if (__ceph_is_any_real_caps(ci) &&
-	    (!(wanted & CEPH_CAP_ANY_WR) == 0 || ci->i_auth_cap)) {
+	    (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) {
 		int issued = __ceph_caps_issued(ci, NULL);
 		spin_unlock(&ci->i_ceph_lock);
 		dout("renew caps %p want %s issued %s updating mds_wanted\n",
@@ -778,6 +810,7 @@ static void ceph_aio_retry_work(struct work_struct *work)
 	req->r_callback = ceph_aio_complete_req;
 	req->r_inode = inode;
 	req->r_priv = aio_req;
+	req->r_abort_on_full = true;
 
 	ret = ceph_osdc_start_request(req->r_osdc, req, false);
 out:
@@ -1085,19 +1118,22 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 
 out:
 		ceph_osdc_put_request(req);
-		if (ret == 0) {
-			pos += len;
-			written += len;
-
-			if (pos > i_size_read(inode)) {
-				check_caps = ceph_inode_set_size(inode, pos);
-				if (check_caps)
-					ceph_check_caps(ceph_inode(inode),
-							CHECK_CAPS_AUTHONLY,
-							NULL);
-			}
-		} else
+		if (ret != 0) {
+			ceph_set_error_write(ci);
 			break;
+		}
+
+		ceph_clear_error_write(ci);
+		pos += len;
+		written += len;
+		if (pos > i_size_read(inode)) {
+			check_caps = ceph_inode_set_size(inode, pos);
+			if (check_caps)
+				ceph_check_caps(ceph_inode(inode),
+						CHECK_CAPS_AUTHONLY,
+						NULL);
+		}
+
 	}
 
 	if (ret != -EOLDSNAPC && written > 0) {
@@ -1303,6 +1339,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	}
 
 retry_snap:
+	/* FIXME: not complete since it doesn't account for being at quota */
 	if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) {
 		err = -ENOSPC;
 		goto out;
@@ -1324,7 +1361,8 @@ retry_snap:
 	     inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
 
 	if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
-	    (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
+	    (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC) ||
+	    (ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) {
 		struct ceph_snap_context *snapc;
 		struct iov_iter data;
 		inode_unlock(inode);
@@ -1633,8 +1671,12 @@ static long ceph_fallocate(struct file *file, int mode,
 	}
 
 	size = i_size_read(inode);
-	if (!(mode & FALLOC_FL_KEEP_SIZE))
+	if (!(mode & FALLOC_FL_KEEP_SIZE)) {
 		endoff = offset + length;
+		ret = inode_newsize_ok(inode, endoff);
+		if (ret)
+			goto unlock;
+	}
 
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
 		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index d3119fe3ab45..4de6cdddf059 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1482,10 +1482,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 	if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
 		return readdir_prepopulate_inodes_only(req, session);
 
-	if (rinfo->hash_order && req->r_path2) {
-		last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
-					  req->r_path2, strlen(req->r_path2));
-		last_hash = ceph_frag_value(last_hash);
+	if (rinfo->hash_order) {
+		if (req->r_path2) {
+			last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
+						  req->r_path2,
+						  strlen(req->r_path2));
+			last_hash = ceph_frag_value(last_hash);
+		} else if (rinfo->offset_hash) {
+			/* mds understands offset_hash */
+			WARN_ON_ONCE(req->r_readdir_offset != 2);
+			last_hash = le32_to_cpu(rhead->args.readdir.offset_hash);
+		}
 	}
 
 	if (rinfo->dir_dir &&
@@ -1510,7 +1517,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 	}
 
 	if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2 &&
-	    !(rinfo->hash_order && req->r_path2)) {
+	    !(rinfo->hash_order && last_hash)) {
 		/* note dir version at start of readdir so we can tell
 		 * if any dentries get dropped */
 		req->r_dir_release_cnt = atomic64_read(&ci->i_release_count);
@@ -2015,7 +2022,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 		    attr->ia_size > inode->i_size) {
 			i_size_write(inode, attr->ia_size);
 			inode->i_blocks = calc_inode_blocks(attr->ia_size);
-			inode->i_ctime = attr->ia_ctime;
 			ci->i_reported_size = attr->ia_size;
 			dirtied |= CEPH_CAP_FILE_EXCL;
 		} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
@@ -2037,7 +2043,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 		     inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
 		     attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
 		     only ? "ctime only" : "ignored");
-		inode->i_ctime = attr->ia_ctime;
 		if (only) {
 			/*
 			 * if kernel wants to dirty ctime but nothing else,
@@ -2060,7 +2065,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 	if (dirtied) {
 		inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
 							   &prealloc_cf);
-		inode->i_ctime = current_time(inode);
+		inode->i_ctime = attr->ia_ctime;
 	}
 
 	release &= issued;
@@ -2078,6 +2083,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 		req->r_inode_drop = release;
 		req->r_args.setattr.mask = cpu_to_le32(mask);
 		req->r_num_caps = 1;
+		req->r_stamp = attr->ia_ctime;
 		err = ceph_mdsc_do_request(mdsc, NULL, req);
 	}
 	dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1d3fa90d40b9..0c05df44cc6c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -189,6 +189,7 @@ static int parse_reply_info_dir(void **p, void *end,
 		info->dir_end = !!(flags & CEPH_READDIR_FRAG_END);
 		info->dir_complete = !!(flags & CEPH_READDIR_FRAG_COMPLETE);
 		info->hash_order = !!(flags & CEPH_READDIR_HASH_ORDER);
+		info->offset_hash = !!(flags & CEPH_READDIR_OFFSET_HASH);
 	}
 	if (num == 0)
 		goto done;
@@ -378,9 +379,9 @@ const char *ceph_session_state_name(int s)
 
 static struct ceph_mds_session *get_session(struct ceph_mds_session *s)
 {
-	if (atomic_inc_not_zero(&s->s_ref)) {
+	if (refcount_inc_not_zero(&s->s_ref)) {
 		dout("mdsc get_session %p %d -> %d\n", s,
-		     atomic_read(&s->s_ref)-1, atomic_read(&s->s_ref));
+		     refcount_read(&s->s_ref)-1, refcount_read(&s->s_ref));
 		return s;
 	} else {
 		dout("mdsc get_session %p 0 -- FAIL", s);
@@ -391,8 +392,8 @@ static struct ceph_mds_session *get_session(struct ceph_mds_session *s)
 void ceph_put_mds_session(struct ceph_mds_session *s)
 {
 	dout("mdsc put_session %p %d -> %d\n", s,
-	     atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
-	if (atomic_dec_and_test(&s->s_ref)) {
+	     refcount_read(&s->s_ref), refcount_read(&s->s_ref)-1);
+	if (refcount_dec_and_test(&s->s_ref)) {
 		if (s->s_auth.authorizer)
 			ceph_auth_destroy_authorizer(s->s_auth.authorizer);
 		kfree(s);
@@ -411,7 +412,7 @@ struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
 		return NULL;
 	session = mdsc->sessions[mds];
 	dout("lookup_mds_session %p %d\n", session,
-	     atomic_read(&session->s_ref));
+	     refcount_read(&session->s_ref));
 	get_session(session);
 	return session;
 }
@@ -441,7 +442,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 {
 	struct ceph_mds_session *s;
 
-	if (mds >= mdsc->mdsmap->m_max_mds)
+	if (mds >= mdsc->mdsmap->m_num_mds)
 		return ERR_PTR(-EINVAL);
 
 	s = kzalloc(sizeof(*s), GFP_NOFS);
@@ -466,7 +467,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	INIT_LIST_HEAD(&s->s_caps);
 	s->s_nr_caps = 0;
 	s->s_trim_caps = 0;
-	atomic_set(&s->s_ref, 1);
+	refcount_set(&s->s_ref, 1);
 	INIT_LIST_HEAD(&s->s_waiting);
 	INIT_LIST_HEAD(&s->s_unsafe);
 	s->s_num_cap_releases = 0;
@@ -494,7 +495,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	}
 	mdsc->sessions[mds] = s;
 	atomic_inc(&mdsc->num_sessions);
-	atomic_inc(&s->s_ref);  /* one ref to sessions[], one to caller */
+	refcount_inc(&s->s_ref);  /* one ref to sessions[], one to caller */
 
 	ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds,
 		      ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
@@ -1004,7 +1005,7 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
 	struct ceph_mds_session *ts;
 	int i, mds = session->s_mds;
 
-	if (mds >= mdsc->mdsmap->m_max_mds)
+	if (mds >= mdsc->mdsmap->m_num_mds)
 		return;
 
 	mi = &mdsc->mdsmap->m_info[mds];
@@ -1551,9 +1552,15 @@ void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
 	struct ceph_msg *msg = NULL;
 	struct ceph_mds_cap_release *head;
 	struct ceph_mds_cap_item *item;
+	struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc;
 	struct ceph_cap *cap;
 	LIST_HEAD(tmp_list);
 	int num_cap_releases;
+	__le32	barrier, *cap_barrier;
+
+	down_read(&osdc->lock);
+	barrier = cpu_to_le32(osdc->epoch_barrier);
+	up_read(&osdc->lock);
 
 	spin_lock(&session->s_cap_lock);
 again:
@@ -1571,7 +1578,11 @@ again:
 			head = msg->front.iov_base;
 			head->num = cpu_to_le32(0);
 			msg->front.iov_len = sizeof(*head);
+
+			msg->hdr.version = cpu_to_le16(2);
+			msg->hdr.compat_version = cpu_to_le16(1);
 		}
+
 		cap = list_first_entry(&tmp_list, struct ceph_cap,
 					session_caps);
 		list_del(&cap->session_caps);
@@ -1589,6 +1600,11 @@ again:
 		ceph_put_cap(mdsc, cap);
 
 		if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
+			// Append cap_barrier field
+			cap_barrier = msg->front.iov_base + msg->front.iov_len;
+			*cap_barrier = barrier;
+			msg->front.iov_len += sizeof(*cap_barrier);
+
 			msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
 			dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
 			ceph_con_send(&session->s_con, msg);
@@ -1604,6 +1620,11 @@ again:
 	spin_unlock(&session->s_cap_lock);
 
 	if (msg) {
+		// Append cap_barrier field
+		cap_barrier = msg->front.iov_base + msg->front.iov_len;
+		*cap_barrier = barrier;
+		msg->front.iov_len += sizeof(*cap_barrier);
+
 		msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
 		dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
 		ceph_con_send(&session->s_con, msg);
@@ -1666,7 +1687,6 @@ struct ceph_mds_request *
 ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
 {
 	struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
-	struct timespec ts;
 
 	if (!req)
 		return ERR_PTR(-ENOMEM);
@@ -1685,8 +1705,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
 	init_completion(&req->r_safe_completion);
 	INIT_LIST_HEAD(&req->r_unsafe_item);
 
-	ktime_get_real_ts(&ts);
-	req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
+	req->r_stamp = timespec_trunc(current_kernel_time(), mdsc->fsc->sb->s_time_gran);
 
 	req->r_op = op;
 	req->r_direct_mode = mode;
@@ -1993,7 +2012,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 
 	if (req->r_pagelist) {
 		struct ceph_pagelist *pagelist = req->r_pagelist;
-		atomic_inc(&pagelist->refcnt);
+		refcount_inc(&pagelist->refcnt);
 		ceph_msg_data_add_pagelist(msg, pagelist);
 		msg->hdr.data_len = cpu_to_le32(pagelist->length);
 	} else {
@@ -2640,8 +2659,10 @@ static void handle_session(struct ceph_mds_session *session,
 	seq = le64_to_cpu(h->seq);
 
 	mutex_lock(&mdsc->mutex);
-	if (op == CEPH_SESSION_CLOSE)
+	if (op == CEPH_SESSION_CLOSE) {
+		get_session(session);
 		__unregister_session(mdsc, session);
+	}
 	/* FIXME: this ttl calculation is generous */
 	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
 	mutex_unlock(&mdsc->mutex);
@@ -2730,6 +2751,8 @@ static void handle_session(struct ceph_mds_session *session,
 			kick_requests(mdsc, mds);
 		mutex_unlock(&mdsc->mutex);
 	}
+	if (op == CEPH_SESSION_CLOSE)
+		ceph_put_mds_session(session);
 	return;
 
 bad:
@@ -3109,7 +3132,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 	dout("check_new_map new %u old %u\n",
 	     newmap->m_epoch, oldmap->m_epoch);
 
-	for (i = 0; i < oldmap->m_max_mds && i < mdsc->max_sessions; i++) {
+	for (i = 0; i < oldmap->m_num_mds && i < mdsc->max_sessions; i++) {
 		if (mdsc->sessions[i] == NULL)
 			continue;
 		s = mdsc->sessions[i];
@@ -3123,15 +3146,33 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 		     ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
 		     ceph_session_state_name(s->s_state));
 
-		if (i >= newmap->m_max_mds ||
+		if (i >= newmap->m_num_mds ||
 		    memcmp(ceph_mdsmap_get_addr(oldmap, i),
 			   ceph_mdsmap_get_addr(newmap, i),
 			   sizeof(struct ceph_entity_addr))) {
 			if (s->s_state == CEPH_MDS_SESSION_OPENING) {
 				/* the session never opened, just close it
 				 * out now */
+				get_session(s);
+				__unregister_session(mdsc, s);
 				__wake_requests(mdsc, &s->s_waiting);
+				ceph_put_mds_session(s);
+			} else if (i >= newmap->m_num_mds) {
+				/* force close session for stopped mds */
+				get_session(s);
 				__unregister_session(mdsc, s);
+				__wake_requests(mdsc, &s->s_waiting);
+				kick_requests(mdsc, i);
+				mutex_unlock(&mdsc->mutex);
+
+				mutex_lock(&s->s_mutex);
+				cleanup_session_requests(mdsc, s);
+				remove_session_caps(s);
+				mutex_unlock(&s->s_mutex);
+
+				ceph_put_mds_session(s);
+
+				mutex_lock(&mdsc->mutex);
 			} else {
 				/* just close it */
 				mutex_unlock(&mdsc->mutex);
@@ -3169,7 +3210,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 		}
 	}
 
-	for (i = 0; i < newmap->m_max_mds && i < mdsc->max_sessions; i++) {
+	for (i = 0; i < newmap->m_num_mds && i < mdsc->max_sessions; i++) {
 		s = mdsc->sessions[i];
 		if (!s)
 			continue;
@@ -3883,7 +3924,7 @@ static struct ceph_connection *con_get(struct ceph_connection *con)
 	struct ceph_mds_session *s = con->private;
 
 	if (get_session(s)) {
-		dout("mdsc con_get %p ok (%d)\n", s, atomic_read(&s->s_ref));
+		dout("mdsc con_get %p ok (%d)\n", s, refcount_read(&s->s_ref));
 		return con;
 	}
 	dout("mdsc con_get %p FAIL\n", s);
@@ -3894,7 +3935,7 @@ static void con_put(struct ceph_connection *con)
 {
 	struct ceph_mds_session *s = con->private;
 
-	dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref) - 1);
+	dout("mdsc con_put %p (%d)\n", s, refcount_read(&s->s_ref) - 1);
 	ceph_put_mds_session(s);
 }
 
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ac0475a2daa7..db57ae98ed34 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -7,6 +7,7 @@
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
+#include <linux/refcount.h>
 
 #include <linux/ceph/types.h>
 #include <linux/ceph/messenger.h>
@@ -82,9 +83,10 @@ struct ceph_mds_reply_info_parsed {
 			struct ceph_mds_reply_dirfrag *dir_dir;
 			size_t			      dir_buf_size;
 			int                           dir_nr;
-			bool			      dir_complete;
 			bool			      dir_end;
+			bool			      dir_complete;
 			bool			      hash_order;
+			bool			      offset_hash;
 			struct ceph_mds_reply_dir_entry  *dir_entries;
 		};
 
@@ -104,10 +106,13 @@ struct ceph_mds_reply_info_parsed {
 
 /*
  * cap releases are batched and sent to the MDS en masse.
+ *
+ * Account for per-message overhead of mds_cap_release header
+ * and __le32 for osd epoch barrier trailing field.
  */
-#define CEPH_CAPS_PER_RELEASE ((PAGE_SIZE -			\
+#define CEPH_CAPS_PER_RELEASE ((PAGE_SIZE - sizeof(u32) -		\
 				sizeof(struct ceph_mds_cap_release)) /	\
-			       sizeof(struct ceph_mds_cap_item))
+			        sizeof(struct ceph_mds_cap_item))
 
 
 /*
@@ -156,7 +161,7 @@ struct ceph_mds_session {
 	unsigned long     s_renew_requested; /* last time we sent a renew req */
 	u64               s_renew_seq;
 
-	atomic_t          s_ref;
+	refcount_t          s_ref;
 	struct list_head  s_waiting;  /* waiting requests */
 	struct list_head  s_unsafe;   /* unsafe requests */
 };
@@ -373,7 +378,7 @@ __ceph_lookup_mds_session(struct ceph_mds_client *, int mds);
 static inline struct ceph_mds_session *
 ceph_get_mds_session(struct ceph_mds_session *s)
 {
-	atomic_inc(&s->s_ref);
+	refcount_inc(&s->s_ref);
 	return s;
 }
 
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 5454e2327a5f..1a748cf88535 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -22,11 +22,11 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
 	int i;
 
 	/* special case for one mds */
-	if (1 == m->m_max_mds && m->m_info[0].state > 0)
+	if (1 == m->m_num_mds && m->m_info[0].state > 0)
 		return 0;
 
 	/* count */
-	for (i = 0; i < m->m_max_mds; i++)
+	for (i = 0; i < m->m_num_mds; i++)
 		if (m->m_info[i].state > 0)
 			n++;
 	if (n == 0)
@@ -135,8 +135,9 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 	m->m_session_autoclose = ceph_decode_32(p);
 	m->m_max_file_size = ceph_decode_64(p);
 	m->m_max_mds = ceph_decode_32(p);
+	m->m_num_mds = m->m_max_mds;
 
-	m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS);
+	m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS);
 	if (m->m_info == NULL)
 		goto nomem;
 
@@ -207,9 +208,20 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 		     ceph_pr_addr(&addr.in_addr),
 		     ceph_mds_state_name(state));
 
-		if (mds < 0 || mds >= m->m_max_mds || state <= 0)
+		if (mds < 0 || state <= 0)
 			continue;
 
+		if (mds >= m->m_num_mds) {
+			int new_num = max(mds + 1, m->m_num_mds * 2);
+			void *new_m_info = krealloc(m->m_info,
+						new_num * sizeof(*m->m_info),
+						GFP_NOFS | __GFP_ZERO);
+			if (!new_m_info)
+				goto nomem;
+			m->m_info = new_m_info;
+			m->m_num_mds = new_num;
+		}
+
 		info = &m->m_info[mds];
 		info->global_id = global_id;
 		info->state = state;
@@ -229,6 +241,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 			info->export_targets = NULL;
 		}
 	}
+	if (m->m_num_mds > m->m_max_mds) {
+		/* find max up mds */
+		for (i = m->m_num_mds; i >= m->m_max_mds; i--) {
+			if (i == 0 || m->m_info[i-1].state > 0)
+				break;
+		}
+		m->m_num_mds = i;
+	}
 
 	/* pg_pools */
 	ceph_decode_32_safe(p, end, n, bad);
@@ -270,12 +290,22 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 
 		for (i = 0; i < n; i++) {
 			s32 mds = ceph_decode_32(p);
-			if (mds >= 0 && mds < m->m_max_mds) {
+			if (mds >= 0 && mds < m->m_num_mds) {
 				if (m->m_info[mds].laggy)
 					num_laggy++;
 			}
 		}
 		m->m_num_laggy = num_laggy;
+
+		if (n > m->m_num_mds) {
+			void *new_m_info = krealloc(m->m_info,
+						    n * sizeof(*m->m_info),
+						    GFP_NOFS | __GFP_ZERO);
+			if (!new_m_info)
+				goto nomem;
+			m->m_info = new_m_info;
+		}
+		m->m_num_mds = n;
 	}
 
 	/* inc */
@@ -341,7 +371,7 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
 {
 	int i;
 
-	for (i = 0; i < m->m_max_mds; i++)
+	for (i = 0; i < m->m_num_mds; i++)
 		kfree(m->m_info[i].export_targets);
 	kfree(m->m_info);
 	kfree(m->m_data_pg_pools);
@@ -357,7 +387,7 @@ bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m)
 		return false;
 	if (m->m_num_laggy > 0)
 		return false;
-	for (i = 0; i < m->m_max_mds; i++) {
+	for (i = 0; i < m->m_num_mds; i++) {
 		if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE)
 			nr_active++;
 	}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 8f8b41c2ef0f..dab5d6732345 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -519,7 +519,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 	     capsnap->need_flush ? "" : "no_flush");
 	ihold(inode);
 
-	atomic_set(&capsnap->nref, 1);
+	refcount_set(&capsnap->nref, 1);
 	INIT_LIST_HEAD(&capsnap->ci_item);
 
 	capsnap->follows = old_snapc->seq;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a8c81b2052ca..8d7918ce694a 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -544,10 +544,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 					struct ceph_options *opt)
 {
 	struct ceph_fs_client *fsc;
-	const u64 supported_features =
-		CEPH_FEATURE_FLOCK | CEPH_FEATURE_DIRLAYOUTHASH |
-		CEPH_FEATURE_MDSENC | CEPH_FEATURE_MDS_INLINE_DATA;
-	const u64 required_features = 0;
 	int page_count;
 	size_t size;
 	int err = -ENOMEM;
@@ -556,8 +552,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 	if (!fsc)
 		return ERR_PTR(-ENOMEM);
 
-	fsc->client = ceph_create_client(opt, fsc, supported_features,
-					 required_features);
+	fsc->client = ceph_create_client(opt, fsc);
 	if (IS_ERR(fsc->client)) {
 		err = PTR_ERR(fsc->client);
 		goto fail;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 176186b12457..a973acd8beaf 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -14,6 +14,7 @@
 #include <linux/writeback.h>
 #include <linux/slab.h>
 #include <linux/posix_acl.h>
+#include <linux/refcount.h>
 
 #include <linux/ceph/libceph.h>
 
@@ -160,7 +161,7 @@ struct ceph_cap_flush {
  * data before flushing the snapped state (tracked here) back to the MDS.
  */
 struct ceph_cap_snap {
-	atomic_t nref;
+	refcount_t nref;
 	struct list_head ci_item;
 
 	struct ceph_cap_flush cap_flush;
@@ -189,7 +190,7 @@ struct ceph_cap_snap {
 
 static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
 {
-	if (atomic_dec_and_test(&capsnap->nref)) {
+	if (refcount_dec_and_test(&capsnap->nref)) {
 		if (capsnap->xattr_blob)
 			ceph_buffer_put(capsnap->xattr_blob);
 		kfree(capsnap);
@@ -471,6 +472,32 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 #define CEPH_I_CAP_DROPPED	(1 << 8)  /* caps were forcibly dropped */
 #define CEPH_I_KICK_FLUSH	(1 << 9)  /* kick flushing caps */
 #define CEPH_I_FLUSH_SNAPS	(1 << 10) /* need flush snapss */
+#define CEPH_I_ERROR_WRITE	(1 << 11) /* have seen write errors */
+
+/*
+ * We set the ERROR_WRITE bit when we start seeing write errors on an inode
+ * and then clear it when they start succeeding. Note that we do a lockless
+ * check first, and only take the lock if it looks like it needs to be changed.
+ * The write submission code just takes this as a hint, so we're not too
+ * worried if a few slip through in either direction.
+ */
+static inline void ceph_set_error_write(struct ceph_inode_info *ci)
+{
+	if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE)) {
+		spin_lock(&ci->i_ceph_lock);
+		ci->i_ceph_flags |= CEPH_I_ERROR_WRITE;
+		spin_unlock(&ci->i_ceph_lock);
+	}
+}
+
+static inline void ceph_clear_error_write(struct ceph_inode_info *ci)
+{
+	if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE) {
+		spin_lock(&ci->i_ceph_lock);
+		ci->i_ceph_flags &= ~CEPH_I_ERROR_WRITE;
+		spin_unlock(&ci->i_ceph_lock);
+	}
+}
 
 static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
 					   long long release_count,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index febc28f9e2c2..75267cdd5dfd 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -392,6 +392,7 @@ static int __set_xattr(struct ceph_inode_info *ci,
 
 	if (update_xattr) {
 		int err = 0;
+
 		if (xattr && (flags & XATTR_CREATE))
 			err = -EEXIST;
 		else if (!xattr && (flags & XATTR_REPLACE))
@@ -399,12 +400,14 @@ static int __set_xattr(struct ceph_inode_info *ci,
 		if (err) {
 			kfree(name);
 			kfree(val);
+			kfree(*newxattr);
 			return err;
 		}
 		if (update_xattr < 0) {
 			if (xattr)
 				__remove_xattr(ci, xattr);
 			kfree(name);
+			kfree(*newxattr);
 			return 0;
 		}
 	}
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 15bac390dff9..b98436f5c7c7 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1135,20 +1135,19 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
 	u32 acllen = 0;
 	int rc = 0;
 	struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
-	struct cifs_tcon *tcon;
+	struct smb_version_operations *ops;
 
 	cifs_dbg(NOISY, "converting ACL to mode for %s\n", path);
 
 	if (IS_ERR(tlink))
 		return PTR_ERR(tlink);
-	tcon = tlink_tcon(tlink);
 
-	if (pfid && (tcon->ses->server->ops->get_acl_by_fid))
-		pntsd = tcon->ses->server->ops->get_acl_by_fid(cifs_sb, pfid,
-							  &acllen);
-	else if (tcon->ses->server->ops->get_acl)
-		pntsd = tcon->ses->server->ops->get_acl(cifs_sb, inode, path,
-							&acllen);
+	ops = tlink_tcon(tlink)->ses->server->ops;
+
+	if (pfid && (ops->get_acl_by_fid))
+		pntsd = ops->get_acl_by_fid(cifs_sb, pfid, &acllen);
+	else if (ops->get_acl)
+		pntsd = ops->get_acl(cifs_sb, inode, path, &acllen);
 	else {
 		cifs_put_tlink(tlink);
 		return -EOPNOTSUPP;
@@ -1181,23 +1180,23 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
 	struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
-	struct cifs_tcon *tcon;
+	struct smb_version_operations *ops;
 
 	if (IS_ERR(tlink))
 		return PTR_ERR(tlink);
-	tcon = tlink_tcon(tlink);
+
+	ops = tlink_tcon(tlink)->ses->server->ops;
 
 	cifs_dbg(NOISY, "set ACL from mode for %s\n", path);
 
 	/* Get the security descriptor */
 
-	if (tcon->ses->server->ops->get_acl == NULL) {
+	if (ops->get_acl == NULL) {
 		cifs_put_tlink(tlink);
 		return -EOPNOTSUPP;
 	}
 
-	pntsd = tcon->ses->server->ops->get_acl(cifs_sb, inode, path,
-						&secdesclen);
+	pntsd = ops->get_acl(cifs_sb, inode, path, &secdesclen);
 	if (IS_ERR(pntsd)) {
 		rc = PTR_ERR(pntsd);
 		cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc);
@@ -1224,13 +1223,12 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
 
 	cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc);
 
-	if (tcon->ses->server->ops->set_acl == NULL)
+	if (ops->set_acl == NULL)
 		rc = -EOPNOTSUPP;
 
 	if (!rc) {
 		/* Set the security descriptor */
-		rc = tcon->ses->server->ops->set_acl(pnntsd, secdesclen, inode,
-						     path, aclflag);
+		rc = ops->set_acl(pnntsd, secdesclen, inode, path, aclflag);
 		cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc);
 	}
 	cifs_put_tlink(tlink);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8be55be70faf..bcc7d9acad64 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -418,7 +418,7 @@ struct smb_version_operations {
 	int (*validate_negotiate)(const unsigned int, struct cifs_tcon *);
 	ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *,
 			const unsigned char *, const unsigned char *, char *,
-			size_t, const struct nls_table *, int);
+			size_t, struct cifs_sb_info *);
 	int (*set_EA)(const unsigned int, struct cifs_tcon *, const char *,
 			const char *, const void *, const __u16,
 			const struct nls_table *, int);
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e49958c3f8bb..6eb3147132e3 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -480,8 +480,7 @@ extern int CIFSSMBCopy(unsigned int xid,
 extern ssize_t CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon,
 			const unsigned char *searchName,
 			const unsigned char *ea_name, char *EAData,
-			size_t bufsize, const struct nls_table *nls_codepage,
-			int remap_special_chars);
+			size_t bufsize, struct cifs_sb_info *cifs_sb);
 extern int CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon,
 		const char *fileName, const char *ea_name,
 		const void *ea_value, const __u16 ea_value_len,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4c01b3f9abf0..fbb0d4cbda41 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -697,9 +697,7 @@ cifs_echo_callback(struct mid_q_entry *mid)
 {
 	struct TCP_Server_Info *server = mid->callback_data;
 
-	mutex_lock(&server->srv_mutex);
 	DeleteMidQEntry(mid);
-	mutex_unlock(&server->srv_mutex);
 	add_credits(server, 1, CIFS_ECHO_OP);
 }
 
@@ -1599,9 +1597,7 @@ cifs_readv_callback(struct mid_q_entry *mid)
 	}
 
 	queue_work(cifsiod_wq, &rdata->work);
-	mutex_lock(&server->srv_mutex);
 	DeleteMidQEntry(mid);
-	mutex_unlock(&server->srv_mutex);
 	add_credits(server, 1, 0);
 }
 
@@ -2058,7 +2054,6 @@ cifs_writev_callback(struct mid_q_entry *mid)
 {
 	struct cifs_writedata *wdata = mid->callback_data;
 	struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
-	struct TCP_Server_Info *server = tcon->ses->server;
 	unsigned int written;
 	WRITE_RSP *smb = (WRITE_RSP *)mid->resp_buf;
 
@@ -2095,9 +2090,7 @@ cifs_writev_callback(struct mid_q_entry *mid)
 	}
 
 	queue_work(cifsiod_wq, &wdata->work);
-	mutex_lock(&server->srv_mutex);
 	DeleteMidQEntry(mid);
-	mutex_unlock(&server->srv_mutex);
 	add_credits(tcon->ses->server, 1, 0);
 }
 
@@ -6076,11 +6069,13 @@ ssize_t
 CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon,
 		const unsigned char *searchName, const unsigned char *ea_name,
 		char *EAData, size_t buf_size,
-		const struct nls_table *nls_codepage, int remap)
+		struct cifs_sb_info *cifs_sb)
 {
 		/* BB assumes one setup word */
 	TRANSACTION2_QPI_REQ *pSMB = NULL;
 	TRANSACTION2_QPI_RSP *pSMBr = NULL;
+	int remap = cifs_remap(cifs_sb);
+	struct nls_table *nls_codepage = cifs_sb->local_nls;
 	int rc = 0;
 	int bytes_returned;
 	int list_len;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 6ef78ad838e6..fcef70602b27 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -582,7 +582,7 @@ cifs_relock_file(struct cifsFileInfo *cfile)
 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 	int rc = 0;
 
-	down_read(&cinode->lock_sem);
+	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
 	if (cinode->can_cache_brlcks) {
 		/* can cache locks - no need to relock */
 		up_read(&cinode->lock_sem);
@@ -3271,7 +3271,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
 	if (!is_sync_kiocb(iocb))
 		ctx->iocb = iocb;
 
-	if (to->type & ITER_IOVEC)
+	if (to->type == ITER_IOVEC)
 		ctx->should_dirty = true;
 
 	rc = setup_aio_ctx_iter(ctx, to, READ);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index c3b2fa0b2ec8..a8693632235f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -24,6 +24,7 @@
 #include <linux/pagemap.h>
 #include <linux/freezer.h>
 #include <linux/sched/signal.h>
+#include <linux/wait_bit.h>
 
 #include <asm/div64.h>
 #include "cifsfs.h"
@@ -563,8 +564,7 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
 
 	rc = tcon->ses->server->ops->query_all_EAs(xid, tcon, path,
 			"SETFILEBITS", ea_value, 4 /* size of buf */,
-			cifs_sb->local_nls,
-			cifs_remap(cifs_sb));
+			cifs_sb);
 	cifs_put_tlink(tlink);
 	if (rc < 0)
 		return (int)rc;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index b08531977daa..3b147dc6af63 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -810,7 +810,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
 
 	if (!pages) {
 		pages = vmalloc(max_pages * sizeof(struct page *));
-		if (!bv) {
+		if (!pages) {
 			kvfree(bv);
 			return -ENOMEM;
 		}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 27bc360c7ffd..a723df3e0197 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -849,8 +849,13 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 		     struct cifs_fid *fid, __u16 search_flags,
 		     struct cifs_search_info *srch_inf)
 {
-	return CIFSFindFirst(xid, tcon, path, cifs_sb,
-			     &fid->netfid, search_flags, srch_inf, true);
+	int rc;
+
+	rc = CIFSFindFirst(xid, tcon, path, cifs_sb,
+			   &fid->netfid, search_flags, srch_inf, true);
+	if (rc)
+		cifs_dbg(FYI, "find first failed=%d\n", rc);
+	return rc;
 }
 
 static int
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c58691834eb2..7e48561abd29 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -982,7 +982,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 	rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
 	kfree(utf16_path);
 	if (rc) {
-		cifs_dbg(VFS, "open dir failed\n");
+		cifs_dbg(FYI, "open dir failed rc=%d\n", rc);
 		return rc;
 	}
 
@@ -992,7 +992,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 	rc = SMB2_query_directory(xid, tcon, fid->persistent_fid,
 				  fid->volatile_fid, 0, srch_inf);
 	if (rc) {
-		cifs_dbg(VFS, "query directory failed\n");
+		cifs_dbg(FYI, "query directory failed rc=%d\n", rc);
 		SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
 	}
 	return rc;
@@ -1809,7 +1809,8 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 
 	sg = init_sg(rqst, sign);
 	if (!sg) {
-		cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc);
+		cifs_dbg(VFS, "%s: Failed to init sg", __func__);
+		rc = -ENOMEM;
 		goto free_req;
 	}
 
@@ -1817,6 +1818,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 	iv = kzalloc(iv_len, GFP_KERNEL);
 	if (!iv) {
 		cifs_dbg(VFS, "%s: Failed to alloc IV", __func__);
+		rc = -ENOMEM;
 		goto free_sg;
 	}
 	iv[0] = 3;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 48ff7703b919..e4afdaae743f 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1240,15 +1240,19 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 		goto tcon_exit;
 	}
 
-	if (rsp->ShareType & SMB2_SHARE_TYPE_DISK)
+	switch (rsp->ShareType) {
+	case SMB2_SHARE_TYPE_DISK:
 		cifs_dbg(FYI, "connection to disk share\n");
-	else if (rsp->ShareType & SMB2_SHARE_TYPE_PIPE) {
+		break;
+	case SMB2_SHARE_TYPE_PIPE:
 		tcon->ipc = true;
 		cifs_dbg(FYI, "connection to pipe share\n");
-	} else if (rsp->ShareType & SMB2_SHARE_TYPE_PRINT) {
-		tcon->print = true;
+		break;
+	case SMB2_SHARE_TYPE_PRINT:
+		tcon->ipc = true;
 		cifs_dbg(FYI, "connection to printer\n");
-	} else {
+		break;
+	default:
 		cifs_dbg(VFS, "unknown share type %d\n", rsp->ShareType);
 		rc = -EOPNOTSUPP;
 		goto tcon_error_exit;
@@ -2173,9 +2177,7 @@ smb2_echo_callback(struct mid_q_entry *mid)
 	if (mid->mid_state == MID_RESPONSE_RECEIVED)
 		credits_received = le16_to_cpu(rsp->hdr.sync_hdr.CreditRequest);
 
-	mutex_lock(&server->srv_mutex);
 	DeleteMidQEntry(mid);
-	mutex_unlock(&server->srv_mutex);
 	add_credits(server, credits_received, CIFS_ECHO_OP);
 }
 
@@ -2433,9 +2435,7 @@ smb2_readv_callback(struct mid_q_entry *mid)
 		cifs_stats_fail_inc(tcon, SMB2_READ_HE);
 
 	queue_work(cifsiod_wq, &rdata->work);
-	mutex_lock(&server->srv_mutex);
 	DeleteMidQEntry(mid);
-	mutex_unlock(&server->srv_mutex);
 	add_credits(server, credits_received, 0);
 }
 
@@ -2594,7 +2594,6 @@ smb2_writev_callback(struct mid_q_entry *mid)
 {
 	struct cifs_writedata *wdata = mid->callback_data;
 	struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
-	struct TCP_Server_Info *server = tcon->ses->server;
 	unsigned int written;
 	struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf;
 	unsigned int credits_received = 1;
@@ -2634,9 +2633,7 @@ smb2_writev_callback(struct mid_q_entry *mid)
 		cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
 
 	queue_work(cifsiod_wq, &wdata->work);
-	mutex_lock(&server->srv_mutex);
 	DeleteMidQEntry(mid);
-	mutex_unlock(&server->srv_mutex);
 	add_credits(tcon->ses->server, credits_received, 0);
 }
 
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 4d64b5b8fc9c..47a125ece11e 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -94,7 +94,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
 	now = jiffies;
 	/* commands taking longer than one second are indications that
 	   something is wrong, unless it is quite a slow link or server */
-	if ((now - midEntry->when_alloc) > HZ) {
+	if (time_after(now, midEntry->when_alloc + HZ)) {
 		if ((cifsFYI & CIFS_TIMER) && (midEntry->command != command)) {
 			pr_debug(" CIFS slow rsp: cmd %d mid %llu",
 			       midEntry->command, midEntry->mid);
@@ -613,9 +613,7 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
 	}
 	spin_unlock(&GlobalMid_Lock);
 
-	mutex_lock(&server->srv_mutex);
 	DeleteMidQEntry(mid);
-	mutex_unlock(&server->srv_mutex);
 	return rc;
 }
 
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 20af5187ba63..de50e749ff05 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -188,8 +188,6 @@ static int cifs_creation_time_get(struct dentry *dentry, struct inode *inode,
 	pcreatetime = (__u64 *)value;
 	*pcreatetime = CIFS_I(inode)->createtime;
 	return sizeof(__u64);
-
-	return rc;
 }
 
 
@@ -235,8 +233,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
 
 		if (pTcon->ses->server->ops->query_all_EAs)
 			rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
-				full_path, name, value, size,
-				cifs_sb->local_nls, cifs_remap(cifs_sb));
+				full_path, name, value, size, cifs_sb);
 		break;
 
 	case XATTR_CIFS_ACL: {
@@ -336,8 +333,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
 
 	if (pTcon->ses->server->ops->query_all_EAs)
 		rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
-				full_path, NULL, data, buf_size,
-				cifs_sb->local_nls, cifs_remap(cifs_sb));
+				full_path, NULL, data, buf_size, cifs_sb);
 list_ea_exit:
 	kfree(full_path);
 	free_xid(xid);
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 8b2a994042dd..a66f6624d899 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -138,6 +138,14 @@ struct config_item *config_item_get(struct config_item *item)
 }
 EXPORT_SYMBOL(config_item_get);
 
+struct config_item *config_item_get_unless_zero(struct config_item *item)
+{
+	if (item && kref_get_unless_zero(&item->ci_kref))
+		return item;
+	return NULL;
+}
+EXPORT_SYMBOL(config_item_get_unless_zero);
+
 static void config_item_cleanup(struct config_item *item)
 {
 	struct config_item_type *t = item->ci_type;
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index a6ab012a2c6a..c8aabba502f6 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -83,14 +83,13 @@ static int create_link(struct config_item *parent_item,
 	ret = -ENOMEM;
 	sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
 	if (sl) {
-		sl->sl_target = config_item_get(item);
 		spin_lock(&configfs_dirent_lock);
 		if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
 			spin_unlock(&configfs_dirent_lock);
-			config_item_put(item);
 			kfree(sl);
 			return -ENOENT;
 		}
+		sl->sl_target = config_item_get(item);
 		list_add(&sl->sl_list, &target_sd->s_links);
 		spin_unlock(&configfs_dirent_lock);
 		ret = configfs_create_link(sl, parent_item->ci_dentry,
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index a409a84f1bca..6181e9526860 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -129,7 +129,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
 			goto errout;
 		}
 		err = submit_bio_wait(bio);
-		if ((err == 0) && bio->bi_error)
+		if (err == 0 && bio->bi_status)
 			err = -EIO;
 		bio_put(bio);
 		if (err)
diff --git a/fs/dax.c b/fs/dax.c
index 66d79067eedf..33d05aa02aad 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -84,7 +84,7 @@ struct exceptional_entry_key {
 };
 
 struct wait_exceptional_entry_queue {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct exceptional_entry_key key;
 };
 
@@ -108,7 +108,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
 	return wait_table + hash;
 }
 
-static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode,
+static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mode,
 				       int sync, void *keyp)
 {
 	struct exceptional_entry_key *key = keyp;
@@ -461,35 +461,6 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 }
 
 /*
- * Invalidate exceptional DAX entry if easily possible. This handles DAX
- * entries for invalidate_inode_pages() so we evict the entry only if we can
- * do so without blocking.
- */
-int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index)
-{
-	int ret = 0;
-	void *entry, **slot;
-	struct radix_tree_root *page_tree = &mapping->page_tree;
-
-	spin_lock_irq(&mapping->tree_lock);
-	entry = __radix_tree_lookup(page_tree, index, NULL, &slot);
-	if (!entry || !radix_tree_exceptional_entry(entry) ||
-	    slot_locked(mapping, slot))
-		goto out;
-	if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
-	    radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
-		goto out;
-	radix_tree_delete(page_tree, index);
-	mapping->nrexceptional--;
-	ret = 1;
-out:
-	spin_unlock_irq(&mapping->tree_lock);
-	if (ret)
-		dax_wake_mapping_entry_waiter(mapping, index, entry, true);
-	return ret;
-}
-
-/*
  * Invalidate exceptional DAX entry if it is clean.
  */
 int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
@@ -888,6 +859,7 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 			if (ret < 0)
 				goto out;
 		}
+		start_index = indices[pvec.nr - 1] + 1;
 	}
 out:
 	put_dax(dax_dev);
@@ -993,12 +965,12 @@ int __dax_zero_page_range(struct block_device *bdev,
 		void *kaddr;
 		pfn_t pfn;
 
-		rc = bdev_dax_pgoff(bdev, sector, size, &pgoff);
+		rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
 		if (rc)
 			return rc;
 
 		id = dax_read_lock();
-		rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr,
+		rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr,
 				&pfn);
 		if (rc < 0) {
 			dax_read_unlock(id);
@@ -1044,7 +1016,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	 * into page tables. We have to tear down these mappings so that data
 	 * written by write(2) is visible in mmap.
 	 */
-	if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) {
+	if (iomap->flags & IOMAP_F_NEW) {
 		invalidate_inode_pages2_range(inode->i_mapping,
 					      pos >> PAGE_SHIFT,
 					      (end - 1) >> PAGE_SHIFT);
@@ -1177,6 +1149,23 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 	if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
 		flags |= IOMAP_WRITE;
 
+	entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
+	if (IS_ERR(entry)) {
+		vmf_ret = dax_fault_return(PTR_ERR(entry));
+		goto out;
+	}
+
+	/*
+	 * It is possible, particularly with mixed reads & writes to private
+	 * mappings, that we have raced with a PMD fault that overlaps with
+	 * the PTE we need to set up.  If so just return and the fault will be
+	 * retried.
+	 */
+	if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) {
+		vmf_ret = VM_FAULT_NOPAGE;
+		goto unlock_entry;
+	}
+
 	/*
 	 * Note that we don't bother to use iomap_apply here: DAX required
 	 * the file system block size to be equal the page size, which means
@@ -1185,17 +1174,11 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
 	if (error) {
 		vmf_ret = dax_fault_return(error);
-		goto out;
+		goto unlock_entry;
 	}
 	if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
-		vmf_ret = dax_fault_return(-EIO);	/* fs corruption? */
-		goto finish_iomap;
-	}
-
-	entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
-	if (IS_ERR(entry)) {
-		vmf_ret = dax_fault_return(PTR_ERR(entry));
-		goto finish_iomap;
+		error = -EIO;	/* fs corruption? */
+		goto error_finish_iomap;
 	}
 
 	sector = dax_iomap_sector(&iomap, pos);
@@ -1217,13 +1200,13 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 		}
 
 		if (error)
-			goto error_unlock_entry;
+			goto error_finish_iomap;
 
 		__SetPageUptodate(vmf->cow_page);
 		vmf_ret = finish_fault(vmf);
 		if (!vmf_ret)
 			vmf_ret = VM_FAULT_DONE_COW;
-		goto unlock_entry;
+		goto finish_iomap;
 	}
 
 	switch (iomap.type) {
@@ -1243,7 +1226,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 	case IOMAP_HOLE:
 		if (!(vmf->flags & FAULT_FLAG_WRITE)) {
 			vmf_ret = dax_load_hole(mapping, &entry, vmf);
-			goto unlock_entry;
+			goto finish_iomap;
 		}
 		/*FALLTHRU*/
 	default:
@@ -1252,10 +1235,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 		break;
 	}
 
- error_unlock_entry:
+ error_finish_iomap:
 	vmf_ret = dax_fault_return(error) | major;
- unlock_entry:
-	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
  finish_iomap:
 	if (ops->iomap_end) {
 		int copied = PAGE_SIZE;
@@ -1270,7 +1251,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 		 */
 		ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
 	}
-out:
+ unlock_entry:
+	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+ out:
 	trace_dax_pte_fault_done(inode, vmf, vmf_ret);
 	return vmf_ret;
 }
@@ -1417,6 +1400,28 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
 		goto fallback;
 
 	/*
+	 * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
+	 * PMD or a HZP entry.  If it can't (because a 4k page is already in
+	 * the tree, for instance), it will return -EEXIST and we just fall
+	 * back to 4k entries.
+	 */
+	entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
+	if (IS_ERR(entry))
+		goto fallback;
+
+	/*
+	 * It is possible, particularly with mixed reads & writes to private
+	 * mappings, that we have raced with a PTE fault that overlaps with
+	 * the PMD we need to set up.  If so just return and the fault will be
+	 * retried.
+	 */
+	if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) &&
+			!pmd_devmap(*vmf->pmd)) {
+		result = 0;
+		goto unlock_entry;
+	}
+
+	/*
 	 * Note that we don't use iomap_apply here.  We aren't doing I/O, only
 	 * setting up a mapping, so really we're using iomap_begin() as a way
 	 * to look up our filesystem block.
@@ -1424,21 +1429,11 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
 	pos = (loff_t)pgoff << PAGE_SHIFT;
 	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
 	if (error)
-		goto fallback;
+		goto unlock_entry;
 
 	if (iomap.offset + iomap.length < pos + PMD_SIZE)
 		goto finish_iomap;
 
-	/*
-	 * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
-	 * PMD or a HZP entry.  If it can't (because a 4k page is already in
-	 * the tree, for instance), it will return -EEXIST and we just fall
-	 * back to 4k entries.
-	 */
-	entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
-	if (IS_ERR(entry))
-		goto finish_iomap;
-
 	switch (iomap.type) {
 	case IOMAP_MAPPED:
 		result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry);
@@ -1446,7 +1441,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
 	case IOMAP_UNWRITTEN:
 	case IOMAP_HOLE:
 		if (WARN_ON_ONCE(write))
-			goto unlock_entry;
+			break;
 		result = dax_pmd_load_hole(vmf, &iomap, &entry);
 		break;
 	default:
@@ -1454,8 +1449,6 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
 		break;
 	}
 
- unlock_entry:
-	put_locked_mapping_entry(mapping, pgoff, entry);
  finish_iomap:
 	if (ops->iomap_end) {
 		int copied = PMD_SIZE;
@@ -1471,6 +1464,8 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
 		ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags,
 				&iomap);
 	}
+ unlock_entry:
+	put_locked_mapping_entry(mapping, pgoff, entry);
  fallback:
 	if (result == VM_FAULT_FALLBACK) {
 		split_huge_pmd(vma, vmf->pmd, vmf->address);
diff --git a/fs/dcache.c b/fs/dcache.c
index cddf39777835..a9f995f6859e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1494,7 +1494,7 @@ static void check_and_drop(void *_data)
 {
 	struct detach_data *data = _data;
 
-	if (!data->mountpoint && !data->select.found)
+	if (!data->mountpoint && list_empty(&data->select.dispose))
 		__d_drop(data->select.start);
 }
 
@@ -1536,17 +1536,15 @@ void d_invalidate(struct dentry *dentry)
 
 		d_walk(dentry, &data, detach_and_collect, check_and_drop);
 
-		if (data.select.found)
+		if (!list_empty(&data.select.dispose))
 			shrink_dentry_list(&data.select.dispose);
+		else if (!data.mountpoint)
+			return;
 
 		if (data.mountpoint) {
 			detach_mounts(data.mountpoint);
 			dput(data.mountpoint);
 		}
-
-		if (!data.mountpoint && !data.select.found)
-			break;
-
 		cond_resched();
 	}
 }
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a04ebea77de8..08cf27811e5a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -294,7 +294,7 @@ static void dio_aio_complete_work(struct work_struct *work)
 	dio_complete(dio, 0, true);
 }
 
-static int dio_bio_complete(struct dio *dio, struct bio *bio);
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
 
 /*
  * Asynchronous IO callback. 
@@ -348,13 +348,12 @@ static void dio_bio_end_io(struct bio *bio)
 /**
  * dio_end_io - handle the end io action for the given bio
  * @bio: The direct io bio thats being completed
- * @error: Error if there was one
  *
  * This is meant to be called by any filesystem that uses their own dio_submit_t
  * so that the DIO specific endio actions are dealt with after the filesystem
  * has done it's completion work.
  */
-void dio_end_io(struct bio *bio, int error)
+void dio_end_io(struct bio *bio)
 {
 	struct dio *dio = bio->bi_private;
 
@@ -386,6 +385,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
 	else
 		bio->bi_end_io = dio_bio_end_io;
 
+	bio->bi_write_hint = dio->iocb->ki_hint;
+
 	sdio->bio = bio;
 	sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
 }
@@ -474,17 +475,20 @@ static struct bio *dio_await_one(struct dio *dio)
 /*
  * Process one completed BIO.  No locks are held.
  */
-static int dio_bio_complete(struct dio *dio, struct bio *bio)
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
 {
 	struct bio_vec *bvec;
 	unsigned i;
-	int err;
+	blk_status_t err = bio->bi_status;
 
-	if (bio->bi_error)
-		dio->io_error = -EIO;
+	if (err) {
+		if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
+			dio->io_error = -EAGAIN;
+		else
+			dio->io_error = -EIO;
+	}
 
 	if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
-		err = bio->bi_error;
 		bio_check_pages_dirty(bio);	/* transfers ownership */
 	} else {
 		bio_for_each_segment_all(bvec, bio, i) {
@@ -495,7 +499,6 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
 				set_page_dirty_lock(page);
 			put_page(page);
 		}
-		err = bio->bi_error;
 		bio_put(bio);
 	}
 	return err;
@@ -539,7 +542,7 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
 			bio = dio->bio_list;
 			dio->bio_list = bio->bi_private;
 			spin_unlock_irqrestore(&dio->bio_lock, flags);
-			ret2 = dio_bio_complete(dio, bio);
+			ret2 = blk_status_to_errno(dio_bio_complete(dio, bio));
 			if (ret == 0)
 				ret = ret2;
 		}
@@ -1197,6 +1200,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	if (iov_iter_rw(iter) == WRITE) {
 		dio->op = REQ_OP_WRITE;
 		dio->op_flags = REQ_SYNC | REQ_IDLE;
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			dio->op_flags |= REQ_NOWAIT;
 	} else {
 		dio->op = REQ_OP_READ;
 	}
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 68b9fffcb2c8..9736df2ce89d 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -191,7 +191,7 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
  * This is used to atomically remove a wait queue entry from the eventfd wait
  * queue head, and read/reset the counter value.
  */
-int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
 				  __u64 *cnt)
 {
 	unsigned long flags;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5420767c9b68..b1c8e23ddf65 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -244,7 +244,7 @@ struct eppoll_entry {
 	 * Wait queue item that will be linked to the target file wait
 	 * queue head.
 	 */
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	/* The wait queue head that linked the "wait" wait queue item */
 	wait_queue_head_t *whead;
@@ -347,13 +347,13 @@ static inline int ep_is_linked(struct list_head *p)
 	return !list_empty(p);
 }
 
-static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
+static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p)
 {
 	return container_of(p, struct eppoll_entry, wait);
 }
 
 /* Get the "struct epitem" from a wait queue pointer */
-static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
+static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p)
 {
 	return container_of(p, struct eppoll_entry, wait)->base;
 }
@@ -1078,7 +1078,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
  * mechanism. It is called by the stored file descriptors when they
  * have events to report.
  */
-static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	int pwake = 0;
 	unsigned long flags;
@@ -1094,7 +1094,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 		 * can't use __remove_wait_queue(). whead->lock is held by
 		 * the caller.
 		 */
-		list_del_init(&wait->task_list);
+		list_del_init(&wait->entry);
 	}
 
 	spin_lock_irqsave(&ep->lock, flags);
@@ -1699,7 +1699,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 	int res = 0, eavail, timed_out = 0;
 	unsigned long flags;
 	u64 slack = 0;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	ktime_t expires, *to = NULL;
 
 	if (timeout > 0) {
diff --git a/fs/exec.c b/fs/exec.c
index 72934df68471..904199086490 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -220,8 +220,26 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 
 	if (write) {
 		unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+		unsigned long ptr_size;
 		struct rlimit *rlim;
 
+		/*
+		 * Since the stack will hold pointers to the strings, we
+		 * must account for them as well.
+		 *
+		 * The size calculation is the entire vma while each arg page is
+		 * built, so each time we get here it's calculating how far it
+		 * is currently (rather than each call being just the newly
+		 * added size from the arg page).  As a result, we need to
+		 * always add the entire size of the pointers, so that on the
+		 * last call to get_arg_page() we'll actually have the entire
+		 * correct size.
+		 */
+		ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+		if (ptr_size > ULONG_MAX - size)
+			goto fail;
+		size += ptr_size;
+
 		acct_arg_size(bprm, size / PAGE_SIZE);
 
 		/*
@@ -239,13 +257,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		 *    to work from.
 		 */
 		rlim = current->signal->rlim;
-		if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
-			put_page(page);
-			return NULL;
-		}
+		if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4)
+			goto fail;
 	}
 
 	return page;
+
+fail:
+	put_page(page);
+	return NULL;
 }
 
 static void put_arg_page(struct page *page)
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 26d77f9f8c12..2dcbd5698884 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -817,7 +817,7 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	iomap->bdev = bdev;
 	iomap->offset = (u64)first_block << blkbits;
 	if (blk_queue_dax(bdev->bd_queue))
-		iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+		iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
 	else
 		iomap->dax_dev = NULL;
 
@@ -841,7 +841,7 @@ static int
 ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
 		ssize_t written, unsigned flags, struct iomap *iomap)
 {
-	put_dax(iomap->dax_dev);
+	fs_put_dax(iomap->dax_dev);
 	if (iomap->type == IOMAP_MAPPED &&
 	    written < length &&
 	    (flags & IOMAP_WRITE))
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 8ac673c71a36..9c2028b50e5c 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -32,6 +32,7 @@
 #include <linux/log2.h>
 #include <linux/quotaops.h>
 #include <linux/uaccess.h>
+#include <linux/dax.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index fd389935ecd1..3ec0e46de95f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
  */
 
+#include <linux/quotaops.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
 #include "xattr.h"
@@ -232,6 +233,9 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	handle_t *handle;
 	int error, retries = 0;
 
+	error = dquot_initialize(inode);
+	if (error)
+		return error;
 retry:
 	handle = ext4_journal_start(inode, EXT4_HT_XATTR,
 				    ext4_jbd2_credits_xattr(inode));
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8e8046104f4d..32191548abed 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2523,7 +2523,6 @@ extern int ext4_search_dir(struct buffer_head *bh,
 			   int buf_size,
 			   struct inode *dir,
 			   struct ext4_filename *fname,
-			   const struct qstr *d_name,
 			   unsigned int offset,
 			   struct ext4_dir_entry_2 **res_dir);
 extern int ext4_generic_delete_entry(handle_t *handle,
@@ -3007,7 +3006,6 @@ extern int htree_inlinedir_to_tree(struct file *dir_file,
 				   int *has_inline_data);
 extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
 					struct ext4_filename *fname,
-					const struct qstr *d_name,
 					struct ext4_dir_entry_2 **res_dir,
 					int *has_inline_data);
 extern int ext4_delete_inline_entry(handle_t *handle,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2a97dff87b96..3e36508610b7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3413,13 +3413,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	struct ext4_sb_info *sbi;
 	struct ext4_extent_header *eh;
 	struct ext4_map_blocks split_map;
-	struct ext4_extent zero_ex;
+	struct ext4_extent zero_ex1, zero_ex2;
 	struct ext4_extent *ex, *abut_ex;
 	ext4_lblk_t ee_block, eof_block;
 	unsigned int ee_len, depth, map_len = map->m_len;
 	int allocated = 0, max_zeroout = 0;
 	int err = 0;
-	int split_flag = 0;
+	int split_flag = EXT4_EXT_DATA_VALID2;
 
 	ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
 		"block %llu, max_blocks %u\n", inode->i_ino,
@@ -3436,7 +3436,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	ex = path[depth].p_ext;
 	ee_block = le32_to_cpu(ex->ee_block);
 	ee_len = ext4_ext_get_actual_len(ex);
-	zero_ex.ee_len = 0;
+	zero_ex1.ee_len = 0;
+	zero_ex2.ee_len = 0;
 
 	trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
 
@@ -3576,62 +3577,52 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	if (ext4_encrypted_inode(inode))
 		max_zeroout = 0;
 
-	/* If extent is less than s_max_zeroout_kb, zeroout directly */
-	if (max_zeroout && (ee_len <= max_zeroout)) {
-		err = ext4_ext_zeroout(inode, ex);
-		if (err)
-			goto out;
-		zero_ex.ee_block = ex->ee_block;
-		zero_ex.ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex));
-		ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
-
-		err = ext4_ext_get_access(handle, inode, path + depth);
-		if (err)
-			goto out;
-		ext4_ext_mark_initialized(ex);
-		ext4_ext_try_to_merge(handle, inode, path, ex);
-		err = ext4_ext_dirty(handle, inode, path + path->p_depth);
-		goto out;
-	}
-
 	/*
-	 * four cases:
+	 * five cases:
 	 * 1. split the extent into three extents.
-	 * 2. split the extent into two extents, zeroout the first half.
-	 * 3. split the extent into two extents, zeroout the second half.
+	 * 2. split the extent into two extents, zeroout the head of the first
+	 *    extent.
+	 * 3. split the extent into two extents, zeroout the tail of the second
+	 *    extent.
 	 * 4. split the extent into two extents with out zeroout.
+	 * 5. no splitting needed, just possibly zeroout the head and / or the
+	 *    tail of the extent.
 	 */
 	split_map.m_lblk = map->m_lblk;
 	split_map.m_len = map->m_len;
 
-	if (max_zeroout && (allocated > map->m_len)) {
+	if (max_zeroout && (allocated > split_map.m_len)) {
 		if (allocated <= max_zeroout) {
-			/* case 3 */
-			zero_ex.ee_block =
-					 cpu_to_le32(map->m_lblk);
-			zero_ex.ee_len = cpu_to_le16(allocated);
-			ext4_ext_store_pblock(&zero_ex,
-				ext4_ext_pblock(ex) + map->m_lblk - ee_block);
-			err = ext4_ext_zeroout(inode, &zero_ex);
+			/* case 3 or 5 */
+			zero_ex1.ee_block =
+				 cpu_to_le32(split_map.m_lblk +
+					     split_map.m_len);
+			zero_ex1.ee_len =
+				cpu_to_le16(allocated - split_map.m_len);
+			ext4_ext_store_pblock(&zero_ex1,
+				ext4_ext_pblock(ex) + split_map.m_lblk +
+				split_map.m_len - ee_block);
+			err = ext4_ext_zeroout(inode, &zero_ex1);
 			if (err)
 				goto out;
-			split_map.m_lblk = map->m_lblk;
 			split_map.m_len = allocated;
-		} else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
-			/* case 2 */
-			if (map->m_lblk != ee_block) {
-				zero_ex.ee_block = ex->ee_block;
-				zero_ex.ee_len = cpu_to_le16(map->m_lblk -
+		}
+		if (split_map.m_lblk - ee_block + split_map.m_len <
+								max_zeroout) {
+			/* case 2 or 5 */
+			if (split_map.m_lblk != ee_block) {
+				zero_ex2.ee_block = ex->ee_block;
+				zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk -
 							ee_block);
-				ext4_ext_store_pblock(&zero_ex,
+				ext4_ext_store_pblock(&zero_ex2,
 						      ext4_ext_pblock(ex));
-				err = ext4_ext_zeroout(inode, &zero_ex);
+				err = ext4_ext_zeroout(inode, &zero_ex2);
 				if (err)
 					goto out;
 			}
 
+			split_map.m_len += split_map.m_lblk - ee_block;
 			split_map.m_lblk = ee_block;
-			split_map.m_len = map->m_lblk - ee_block + map->m_len;
 			allocated = map->m_len;
 		}
 	}
@@ -3642,8 +3633,11 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		err = 0;
 out:
 	/* If we have gotten a failure, don't zero out status tree */
-	if (!err)
-		err = ext4_zeroout_es(inode, &zero_ex);
+	if (!err) {
+		err = ext4_zeroout_es(inode, &zero_ex1);
+		if (!err)
+			err = ext4_zeroout_es(inode, &zero_ex2);
+	}
 	return err ? err : allocated;
 }
 
@@ -4883,6 +4877,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 
 	/* Zero out partial block at the edges of the range */
 	ret = ext4_zero_partial_blocks(handle, inode, offset, len);
+	if (ret >= 0)
+		ext4_update_inode_fsync_trans(handle, inode, 1);
 
 	if (file->f_flags & O_SYNC)
 		ext4_handle_sync(handle);
@@ -5569,6 +5565,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 		ext4_handle_sync(handle);
 	inode->i_mtime = inode->i_ctime = current_time(inode);
 	ext4_mark_inode_dirty(handle, inode);
+	ext4_update_inode_fsync_trans(handle, inode, 1);
 
 out_stop:
 	ext4_journal_stop(handle);
@@ -5742,6 +5739,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 	up_write(&EXT4_I(inode)->i_data_sem);
 	if (IS_SYNC(inode))
 		ext4_handle_sync(handle);
+	if (ret >= 0)
+		ext4_update_inode_fsync_trans(handle, inode, 1);
 
 out_stop:
 	ext4_journal_stop(handle);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index cefa9835f275..58e2eeaa0bc4 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -37,7 +37,11 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	struct inode *inode = file_inode(iocb->ki_filp);
 	ssize_t ret;
 
-	inode_lock_shared(inode);
+	if (!inode_trylock_shared(inode)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			return -EAGAIN;
+		inode_lock_shared(inode);
+	}
 	/*
 	 * Recheck under inode lock - at this point we are sure it cannot
 	 * change anymore
@@ -179,7 +183,11 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct inode *inode = file_inode(iocb->ki_filp);
 	ssize_t ret;
 
-	inode_lock(inode);
+	if (!inode_trylock(inode)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			return -EAGAIN;
+		inode_lock(inode);
+	}
 	ret = ext4_write_checks(iocb, from);
 	if (ret <= 0)
 		goto out;
@@ -216,7 +224,12 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		return ext4_dax_write_iter(iocb, from);
 #endif
 
-	inode_lock(inode);
+	if (!inode_trylock(inode)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			return -EAGAIN;
+		inode_lock(inode);
+	}
+
 	ret = ext4_write_checks(iocb, from);
 	if (ret <= 0)
 		goto out;
@@ -235,9 +248,15 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
 	iocb->private = &overwrite;
 	/* Check whether we do a DIO overwrite or not */
-	if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio &&
-	    ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
-		overwrite = 1;
+	if (o_direct && !unaligned_aio) {
+		if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
+			if (ext4_should_dioread_nolock(inode))
+				overwrite = 1;
+		} else if (iocb->ki_flags & IOCB_NOWAIT) {
+			ret = -EAGAIN;
+			goto out;
+		}
+	}
 
 	ret = __generic_file_write_iter(iocb, from);
 	inode_unlock(inode);
@@ -257,6 +276,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
 		enum page_entry_size pe_size)
 {
 	int result;
+	handle_t *handle = NULL;
 	struct inode *inode = file_inode(vmf->vma->vm_file);
 	struct super_block *sb = inode->i_sb;
 	bool write = vmf->flags & FAULT_FLAG_WRITE;
@@ -264,12 +284,24 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
 	if (write) {
 		sb_start_pagefault(sb);
 		file_update_time(vmf->vma->vm_file);
+		down_read(&EXT4_I(inode)->i_mmap_sem);
+		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
+					       EXT4_DATA_TRANS_BLOCKS(sb));
+	} else {
+		down_read(&EXT4_I(inode)->i_mmap_sem);
 	}
-	down_read(&EXT4_I(inode)->i_mmap_sem);
-	result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops);
-	up_read(&EXT4_I(inode)->i_mmap_sem);
-	if (write)
+	if (!IS_ERR(handle))
+		result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops);
+	else
+		result = VM_FAULT_SIGBUS;
+	if (write) {
+		if (!IS_ERR(handle))
+			ext4_journal_stop(handle);
+		up_read(&EXT4_I(inode)->i_mmap_sem);
 		sb_end_pagefault(sb);
+	} else {
+		up_read(&EXT4_I(inode)->i_mmap_sem);
+	}
 
 	return result;
 }
@@ -422,6 +454,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 		if (ret < 0)
 			return ret;
 	}
+
+	/* Set the flags to support nowait AIO */
+	filp->f_mode |= FMODE_AIO_NOWAIT;
+
 	return dquot_file_open(inode, filp);
 }
 
@@ -461,57 +497,37 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 	endoff = (loff_t)end_blk << blkbits;
 
 	index = startoff >> PAGE_SHIFT;
-	end = endoff >> PAGE_SHIFT;
+	end = (endoff - 1) >> PAGE_SHIFT;
 
 	pagevec_init(&pvec, 0);
 	do {
 		int i, num;
 		unsigned long nr_pages;
 
-		num = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
+		num = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
 		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
 					  (pgoff_t)num);
-		if (nr_pages == 0) {
-			if (whence == SEEK_DATA)
-				break;
-
-			BUG_ON(whence != SEEK_HOLE);
-			/*
-			 * If this is the first time to go into the loop and
-			 * offset is not beyond the end offset, it will be a
-			 * hole at this offset
-			 */
-			if (lastoff == startoff || lastoff < endoff)
-				found = 1;
+		if (nr_pages == 0)
 			break;
-		}
-
-		/*
-		 * If this is the first time to go into the loop and
-		 * offset is smaller than the first page offset, it will be a
-		 * hole at this offset.
-		 */
-		if (lastoff == startoff && whence == SEEK_HOLE &&
-		    lastoff < page_offset(pvec.pages[0])) {
-			found = 1;
-			break;
-		}
 
 		for (i = 0; i < nr_pages; i++) {
 			struct page *page = pvec.pages[i];
 			struct buffer_head *bh, *head;
 
 			/*
-			 * If the current offset is not beyond the end of given
-			 * range, it will be a hole.
+			 * If current offset is smaller than the page offset,
+			 * there is a hole at this offset.
 			 */
-			if (lastoff < endoff && whence == SEEK_HOLE &&
-			    page->index > end) {
+			if (whence == SEEK_HOLE && lastoff < endoff &&
+			    lastoff < page_offset(pvec.pages[i])) {
 				found = 1;
 				*offset = lastoff;
 				goto out;
 			}
 
+			if (page->index > end)
+				goto out;
+
 			lock_page(page);
 
 			if (unlikely(page->mapping != inode->i_mapping)) {
@@ -551,20 +567,18 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 			unlock_page(page);
 		}
 
-		/*
-		 * The no. of pages is less than our desired, that would be a
-		 * hole in there.
-		 */
-		if (nr_pages < num && whence == SEEK_HOLE) {
-			found = 1;
-			*offset = lastoff;
+		/* The no. of pages is less than our desired, we are done. */
+		if (nr_pages < num)
 			break;
-		}
 
 		index = pvec.pages[i - 1]->index + 1;
 		pagevec_release(&pvec);
 	} while (index <= end);
 
+	if (whence == SEEK_HOLE && lastoff < endoff) {
+		found = 1;
+		*offset = lastoff;
+	}
 out:
 	pagevec_release(&pvec);
 	return found;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index d5dea4c293ef..8d141c0c8ff9 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1627,7 +1627,6 @@ out:
 
 struct buffer_head *ext4_find_inline_entry(struct inode *dir,
 					struct ext4_filename *fname,
-					const struct qstr *d_name,
 					struct ext4_dir_entry_2 **res_dir,
 					int *has_inline_data)
 {
@@ -1649,7 +1648,7 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
 						EXT4_INLINE_DOTDOT_SIZE;
 	inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
 	ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
-			      dir, fname, d_name, 0, res_dir);
+			      dir, fname, 0, res_dir);
 	if (ret == 1)
 		goto out_find;
 	if (ret < 0)
@@ -1662,7 +1661,7 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
 	inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE;
 
 	ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
-			      dir, fname, d_name, 0, res_dir);
+			      dir, fname, 0, res_dir);
 	if (ret == 1)
 		goto out_find;
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5834c4d76be8..5cf82d03968c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2124,15 +2124,29 @@ static int ext4_writepage(struct page *page,
 static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
 {
 	int len;
-	loff_t size = i_size_read(mpd->inode);
+	loff_t size;
 	int err;
 
 	BUG_ON(page->index != mpd->first_page);
+	clear_page_dirty_for_io(page);
+	/*
+	 * We have to be very careful here!  Nothing protects writeback path
+	 * against i_size changes and the page can be writeably mapped into
+	 * page tables. So an application can be growing i_size and writing
+	 * data through mmap while writeback runs. clear_page_dirty_for_io()
+	 * write-protects our page in page tables and the page cannot get
+	 * written to again until we release page lock. So only after
+	 * clear_page_dirty_for_io() we are safe to sample i_size for
+	 * ext4_bio_write_page() to zero-out tail of the written page. We rely
+	 * on the barrier provided by TestClearPageDirty in
+	 * clear_page_dirty_for_io() to make sure i_size is really sampled only
+	 * after page tables are updated.
+	 */
+	size = i_size_read(mpd->inode);
 	if (page->index == size >> PAGE_SHIFT)
 		len = size & ~PAGE_MASK;
 	else
 		len = PAGE_SIZE;
-	clear_page_dirty_for_io(page);
 	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
 	if (!err)
 		mpd->wbc->nr_to_write--;
@@ -3412,7 +3426,7 @@ retry:
 	bdev = inode->i_sb->s_bdev;
 	iomap->bdev = bdev;
 	if (blk_queue_dax(bdev->bd_queue))
-		iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+		iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
 	else
 		iomap->dax_dev = NULL;
 	iomap->offset = first_block << blkbits;
@@ -3447,7 +3461,7 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
 	int blkbits = inode->i_blkbits;
 	bool truncate = false;
 
-	put_dax(iomap->dax_dev);
+	fs_put_dax(iomap->dax_dev);
 	if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
 		return 0;
 
@@ -3629,9 +3643,6 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
 		get_block_func = ext4_dio_get_block_unwritten_async;
 		dio_flags = DIO_LOCKING;
 	}
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-	BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
-#endif
 	ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
 				   get_block_func, ext4_end_io_dio, NULL,
 				   dio_flags);
@@ -3713,7 +3724,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
 	 */
 	inode_lock_shared(inode);
 	ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
-					   iocb->ki_pos + count);
+					   iocb->ki_pos + count - 1);
 	if (ret)
 		goto out_unlock;
 	ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
@@ -4207,6 +4218,8 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
 	inode->i_mtime = inode->i_ctime = current_time(inode);
 	ext4_mark_inode_dirty(handle, inode);
+	if (ret >= 0)
+		ext4_update_inode_fsync_trans(handle, inode, 1);
 out_stop:
 	ext4_journal_stop(handle);
 out_dio:
@@ -5637,8 +5650,9 @@ static int ext4_expand_extra_isize(struct inode *inode,
 	/* No extended attributes present */
 	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
 	    header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
-		memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
-			new_extra_isize);
+		memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
+		       EXT4_I(inode)->i_extra_isize, 0,
+		       new_extra_isize - EXT4_I(inode)->i_extra_isize);
 		EXT4_I(inode)->i_extra_isize = new_extra_isize;
 		return 0;
 	}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5083bce20ac4..b7928cddd539 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3887,7 +3887,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 
 	err = ext4_mb_load_buddy(sb, group, &e4b);
 	if (err) {
-		ext4_error(sb, "Error loading buddy information for %u", group);
+		ext4_warning(sb, "Error %d loading buddy information for %u",
+			     err, group);
 		put_bh(bitmap_bh);
 		return 0;
 	}
@@ -4044,10 +4045,11 @@ repeat:
 		BUG_ON(pa->pa_type != MB_INODE_PA);
 		group = ext4_get_group_number(sb, pa->pa_pstart);
 
-		err = ext4_mb_load_buddy(sb, group, &e4b);
+		err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
+					     GFP_NOFS|__GFP_NOFAIL);
 		if (err) {
-			ext4_error(sb, "Error loading buddy information for %u",
-					group);
+			ext4_error(sb, "Error %d loading buddy information for %u",
+				   err, group);
 			continue;
 		}
 
@@ -4303,11 +4305,14 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
 	spin_unlock(&lg->lg_prealloc_lock);
 
 	list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
+		int err;
 
 		group = ext4_get_group_number(sb, pa->pa_pstart);
-		if (ext4_mb_load_buddy(sb, group, &e4b)) {
-			ext4_error(sb, "Error loading buddy information for %u",
-					group);
+		err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
+					     GFP_NOFS|__GFP_NOFAIL);
+		if (err) {
+			ext4_error(sb, "Error %d loading buddy information for %u",
+				   err, group);
 			continue;
 		}
 		ext4_lock_group(sb, group);
@@ -5127,8 +5132,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 
 	ret = ext4_mb_load_buddy(sb, group, &e4b);
 	if (ret) {
-		ext4_error(sb, "Error in loading buddy "
-				"information for %u", group);
+		ext4_warning(sb, "Error %d loading buddy information for %u",
+			     ret, group);
 		return ret;
 	}
 	bitmap = e4b.bd_bitmap;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b81f7d46f344..404256caf9cf 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1155,12 +1155,11 @@ errout:
 static inline int search_dirblock(struct buffer_head *bh,
 				  struct inode *dir,
 				  struct ext4_filename *fname,
-				  const struct qstr *d_name,
 				  unsigned int offset,
 				  struct ext4_dir_entry_2 **res_dir)
 {
 	return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
-			       fname, d_name, offset, res_dir);
+			       fname, offset, res_dir);
 }
 
 /*
@@ -1262,7 +1261,6 @@ static inline bool ext4_match(const struct ext4_filename *fname,
  */
 int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
 		    struct inode *dir, struct ext4_filename *fname,
-		    const struct qstr *d_name,
 		    unsigned int offset, struct ext4_dir_entry_2 **res_dir)
 {
 	struct ext4_dir_entry_2 * de;
@@ -1355,7 +1353,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 
 	if (ext4_has_inline_data(dir)) {
 		int has_inline_data = 1;
-		ret = ext4_find_inline_entry(dir, &fname, d_name, res_dir,
+		ret = ext4_find_inline_entry(dir, &fname, res_dir,
 					     &has_inline_data);
 		if (has_inline_data) {
 			if (inlined)
@@ -1447,7 +1445,7 @@ restart:
 			goto next;
 		}
 		set_buffer_verified(bh);
-		i = search_dirblock(bh, dir, &fname, d_name,
+		i = search_dirblock(bh, dir, &fname,
 			    block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
 		if (i == 1) {
 			EXT4_I(dir)->i_dir_start_lookup = block;
@@ -1488,7 +1486,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 {
 	struct super_block * sb = dir->i_sb;
 	struct dx_frame frames[2], *frame;
-	const struct qstr *d_name = fname->usr_fname;
 	struct buffer_head *bh;
 	ext4_lblk_t block;
 	int retval;
@@ -1505,7 +1502,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 		if (IS_ERR(bh))
 			goto errout;
 
-		retval = search_dirblock(bh, dir, fname, d_name,
+		retval = search_dirblock(bh, dir, fname,
 					 block << EXT4_BLOCK_SIZE_BITS(sb),
 					 res_dir);
 		if (retval == 1)
@@ -1530,7 +1527,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 
 	bh = NULL;
 errout:
-	dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
+	dxtrace(printk(KERN_DEBUG "%s not found\n", fname->usr_fname->name));
 success:
 	dx_release(frames);
 	return bh;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1a82138ba739..c2fce4478cca 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -85,7 +85,7 @@ static void ext4_finish_bio(struct bio *bio)
 		}
 #endif
 
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			SetPageError(page);
 			mapping_set_error(page->mapping, -EIO);
 		}
@@ -104,7 +104,7 @@ static void ext4_finish_bio(struct bio *bio)
 				continue;
 			}
 			clear_buffer_async_write(bh);
-			if (bio->bi_error)
+			if (bio->bi_status)
 				buffer_io_error(bh);
 		} while ((bh = bh->b_this_page) != head);
 		bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
@@ -303,24 +303,25 @@ static void ext4_end_bio(struct bio *bio)
 		      bdevname(bio->bi_bdev, b),
 		      (long long) bio->bi_iter.bi_sector,
 		      (unsigned) bio_sectors(bio),
-		      bio->bi_error)) {
+		      bio->bi_status)) {
 		ext4_finish_bio(bio);
 		bio_put(bio);
 		return;
 	}
 	bio->bi_end_io = NULL;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		struct inode *inode = io_end->inode;
 
 		ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
 			     "(offset %llu size %ld starting block %llu)",
-			     bio->bi_error, inode->i_ino,
+			     bio->bi_status, inode->i_ino,
 			     (unsigned long long) io_end->offset,
 			     (long) io_end->size,
 			     (unsigned long long)
 			     bi_sector >> (inode->i_blkbits - 9));
-		mapping_set_error(inode->i_mapping, bio->bi_error);
+		mapping_set_error(inode->i_mapping,
+				blk_status_to_errno(bio->bi_status));
 	}
 
 	if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
@@ -349,6 +350,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
 	if (bio) {
 		int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
 				  REQ_SYNC : 0;
+		io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
 		bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
 		submit_bio(io->io_bio);
 	}
@@ -396,6 +398,7 @@ submit_and_retry:
 		ret = io_submit_init_bio(io, bh);
 		if (ret)
 			return ret;
+		io->io_bio->bi_write_hint = inode->i_write_hint;
 	}
 	ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
 	if (ret != bh->b_size)
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index a81b829d56de..40a5497b0f60 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -73,7 +73,7 @@ static void mpage_end_io(struct bio *bio)
 	int i;
 
 	if (ext4_bio_encrypted(bio)) {
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			fscrypt_release_ctx(bio->bi_private);
 		} else {
 			fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -83,7 +83,7 @@ static void mpage_end_io(struct bio *bio)
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
 
-		if (!bio->bi_error) {
+		if (!bio->bi_status) {
 			SetPageUptodate(page);
 		} else {
 			ClearPageUptodate(page);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c90edf09b0c3..9006cb5857b8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -37,6 +37,7 @@
 #include <linux/ctype.h>
 #include <linux/log2.h>
 #include <linux/crc16.h>
+#include <linux/dax.h>
 #include <linux/cleancache.h>
 #include <linux/uaccess.h>
 
@@ -847,14 +848,9 @@ static inline void ext4_quota_off_umount(struct super_block *sb)
 {
 	int type;
 
-	if (ext4_has_feature_quota(sb)) {
-		dquot_disable(sb, -1,
-			      DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
-	} else {
-		/* Use our quota_off function to clear inode flags etc. */
-		for (type = 0; type < EXT4_MAXQUOTAS; type++)
-			ext4_quota_off(sb, type);
-	}
+	/* Use our quota_off function to clear inode flags etc. */
+	for (type = 0; type < EXT4_MAXQUOTAS; type++)
+		ext4_quota_off(sb, type);
 }
 #else
 static inline void ext4_quota_off_umount(struct super_block *sb)
@@ -1178,6 +1174,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
 		return res;
 	}
 
+	res = dquot_initialize(inode);
+	if (res)
+		return res;
 retry:
 	handle = ext4_journal_start(inode, EXT4_HT_MISC,
 			ext4_jbd2_credits_xattr(inode));
@@ -3951,7 +3950,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		sb->s_qcop = &ext4_qctl_operations;
 	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
 #endif
-	memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
+	memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
 
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
 	mutex_init(&sbi->s_orphan_lock);
@@ -5484,7 +5483,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
 		goto out;
 
 	err = dquot_quota_off(sb, type);
-	if (err)
+	if (err || ext4_has_feature_quota(sb))
 		goto out_put;
 
 	inode_lock(inode);
@@ -5504,6 +5503,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
 out_unlock:
 	inode_unlock(inode);
 out_put:
+	lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
 	iput(inode);
 	return err;
 out:
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 8fb7ce14e6eb..5d3c2536641c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -888,6 +888,8 @@ inserted:
 			else {
 				u32 ref;
 
+				WARN_ON_ONCE(dquot_initialize_needed(inode));
+
 				/* The old block is released after updating
 				   the inode. */
 				error = dquot_alloc_block(inode,
@@ -954,6 +956,8 @@ inserted:
 			/* We need to allocate a new block */
 			ext4_fsblk_t goal, block;
 
+			WARN_ON_ONCE(dquot_initialize_needed(inode));
+
 			goal = ext4_group_first_block_no(sb,
 						EXT4_I(inode)->i_block_group);
 
@@ -1166,6 +1170,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 		return -EINVAL;
 	if (strlen(name) > 255)
 		return -ERANGE;
+
 	ext4_write_lock_xattr(inode, &no_expand);
 
 	error = ext4_reserve_inode_write(handle, inode, &is.iloc);
@@ -1267,6 +1272,9 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
 	int error, retries = 0;
 	int credits = ext4_jbd2_credits_xattr(inode);
 
+	error = dquot_initialize(inode);
+	if (error)
+		return error;
 retry:
 	handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
 	if (IS_ERR(handle)) {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7c0f6bdf817d..36fe82012a33 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -58,12 +58,12 @@ static void f2fs_read_end_io(struct bio *bio)
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
 		f2fs_show_injection_info(FAULT_IO);
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 	}
 #endif
 
 	if (f2fs_bio_encrypted(bio)) {
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			fscrypt_release_ctx(bio->bi_private);
 		} else {
 			fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -74,7 +74,7 @@ static void f2fs_read_end_io(struct bio *bio)
 	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 
-		if (!bio->bi_error) {
+		if (!bio->bi_status) {
 			if (!PageUptodate(page))
 				SetPageUptodate(page);
 		} else {
@@ -102,14 +102,14 @@ static void f2fs_write_end_io(struct bio *bio)
 			unlock_page(page);
 			mempool_free(page, sbi->write_io_dummy);
 
-			if (unlikely(bio->bi_error))
+			if (unlikely(bio->bi_status))
 				f2fs_stop_checkpoint(sbi, true);
 			continue;
 		}
 
 		fscrypt_pullback_bio_page(&page, true);
 
-		if (unlikely(bio->bi_error)) {
+		if (unlikely(bio->bi_status)) {
 			mapping_set_error(page->mapping, -EIO);
 			f2fs_stop_checkpoint(sbi, true);
 		}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2185c7a040a1..fd2e651bad6d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1078,6 +1078,7 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
 {
 	SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver);
 	u32 *ctx = (u32 *)shash_desc_ctx(shash);
+	u32 retval;
 	int err;
 
 	shash->tfm = sbi->s_chksum_driver;
@@ -1087,7 +1088,9 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
 	err = crypto_shash_update(shash, address, length);
 	BUG_ON(err);
 
-	return *ctx;
+	retval = *ctx;
+	barrier_data(ctx);
+	return retval;
 }
 
 static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc,
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 96845854e7ee..ea9f455d94ba 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -749,7 +749,7 @@ static void f2fs_submit_discard_endio(struct bio *bio)
 {
 	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
 
-	dc->error = bio->bi_error;
+	dc->error = blk_status_to_errno(bio->bi_status);
 	dc->state = D_DONE;
 	complete(&dc->wait);
 	bio_put(bio);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 83355ec4a92c..0b89b0b7b9f7 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1937,7 +1937,7 @@ try_onemore:
 	sb->s_time_gran = 1;
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
 		(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
-	memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
+	memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
 
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f4e7267d117f..ed051f825bad 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -243,6 +243,67 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
 }
 #endif
 
+static bool rw_hint_valid(enum rw_hint hint)
+{
+	switch (hint) {
+	case RWF_WRITE_LIFE_NOT_SET:
+	case RWH_WRITE_LIFE_NONE:
+	case RWH_WRITE_LIFE_SHORT:
+	case RWH_WRITE_LIFE_MEDIUM:
+	case RWH_WRITE_LIFE_LONG:
+	case RWH_WRITE_LIFE_EXTREME:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static long fcntl_rw_hint(struct file *file, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct inode *inode = file_inode(file);
+	u64 *argp = (u64 __user *)arg;
+	enum rw_hint hint;
+	u64 h;
+
+	switch (cmd) {
+	case F_GET_FILE_RW_HINT:
+		h = file_write_hint(file);
+		if (copy_to_user(argp, &h, sizeof(*argp)))
+			return -EFAULT;
+		return 0;
+	case F_SET_FILE_RW_HINT:
+		if (copy_from_user(&h, argp, sizeof(h)))
+			return -EFAULT;
+		hint = (enum rw_hint) h;
+		if (!rw_hint_valid(hint))
+			return -EINVAL;
+
+		spin_lock(&file->f_lock);
+		file->f_write_hint = hint;
+		spin_unlock(&file->f_lock);
+		return 0;
+	case F_GET_RW_HINT:
+		h = inode->i_write_hint;
+		if (copy_to_user(argp, &h, sizeof(*argp)))
+			return -EFAULT;
+		return 0;
+	case F_SET_RW_HINT:
+		if (copy_from_user(&h, argp, sizeof(h)))
+			return -EFAULT;
+		hint = (enum rw_hint) h;
+		if (!rw_hint_valid(hint))
+			return -EINVAL;
+
+		inode_lock(inode);
+		inode->i_write_hint = hint;
+		inode_unlock(inode);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		struct file *filp)
 {
@@ -337,6 +398,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_GET_SEALS:
 		err = shmem_fcntl(filp, cmd, arg);
 		break;
+	case F_GET_RW_HINT:
+	case F_SET_RW_HINT:
+	case F_GET_FILE_RW_HINT:
+	case F_SET_FILE_RW_HINT:
+		err = fcntl_rw_hint(filp, cmd, arg);
+		break;
 	default:
 		break;
 	}
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index 611b5408f6ec..e747b3d720ee 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -34,7 +34,7 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m)
 
 void pin_kill(struct fs_pin *p)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (!p) {
 		rcu_read_unlock();
@@ -61,7 +61,7 @@ void pin_kill(struct fs_pin *p)
 		rcu_read_unlock();
 		schedule();
 		rcu_read_lock();
-		if (likely(list_empty(&wait.task_list)))
+		if (likely(list_empty(&wait.entry)))
 			break;
 		/* OK, we know p couldn't have been freed yet */
 		spin_lock_irq(&p->wait.lock);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c2d7f3a92679..c16d00e53264 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -20,6 +20,7 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/swap.h>
 #include <linux/splice.h>
+#include <linux/sched.h>
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 MODULE_ALIAS("devname:fuse");
@@ -45,7 +46,7 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages,
 	INIT_LIST_HEAD(&req->list);
 	INIT_LIST_HEAD(&req->intr_entry);
 	init_waitqueue_head(&req->waitq);
-	atomic_set(&req->count, 1);
+	refcount_set(&req->count, 1);
 	req->pages = pages;
 	req->page_descs = page_descs;
 	req->max_pages = npages;
@@ -102,21 +103,20 @@ void fuse_request_free(struct fuse_req *req)
 
 void __fuse_get_request(struct fuse_req *req)
 {
-	atomic_inc(&req->count);
+	refcount_inc(&req->count);
 }
 
 /* Must be called with > 1 refcount */
 static void __fuse_put_request(struct fuse_req *req)
 {
-	BUG_ON(atomic_read(&req->count) < 2);
-	atomic_dec(&req->count);
+	refcount_dec(&req->count);
 }
 
-static void fuse_req_init_context(struct fuse_req *req)
+static void fuse_req_init_context(struct fuse_conn *fc, struct fuse_req *req)
 {
 	req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
 	req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
-	req->in.h.pid = current->pid;
+	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
 }
 
 void fuse_set_initialized(struct fuse_conn *fc)
@@ -163,7 +163,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
 		goto out;
 	}
 
-	fuse_req_init_context(req);
+	fuse_req_init_context(fc, req);
 	__set_bit(FR_WAITING, &req->flags);
 	if (for_background)
 		__set_bit(FR_BACKGROUND, &req->flags);
@@ -256,7 +256,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
 	if (!req)
 		req = get_reserved_req(fc, file);
 
-	fuse_req_init_context(req);
+	fuse_req_init_context(fc, req);
 	__set_bit(FR_WAITING, &req->flags);
 	__clear_bit(FR_BACKGROUND, &req->flags);
 	return req;
@@ -264,7 +264,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
 
 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 {
-	if (atomic_dec_and_test(&req->count)) {
+	if (refcount_dec_and_test(&req->count)) {
 		if (test_bit(FR_BACKGROUND, &req->flags)) {
 			/*
 			 * We get here in the unlikely case that a background
@@ -1222,6 +1222,9 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
 	struct fuse_in *in;
 	unsigned reqsize;
 
+	if (task_active_pid_ns(current) != fc->pid_ns)
+		return -EIO;
+
  restart:
 	spin_lock(&fiq->waitq.lock);
 	err = -EAGAIN;
@@ -1820,6 +1823,9 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
 	struct fuse_req *req;
 	struct fuse_out_header oh;
 
+	if (task_active_pid_ns(current) != fc->pid_ns)
+		return -EIO;
+
 	if (nbytes < sizeof(struct fuse_out_header))
 		return -EINVAL;
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ec238fb5a584..3ee4fdc3da9e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -58,7 +58,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 	}
 
 	INIT_LIST_HEAD(&ff->write_entry);
-	atomic_set(&ff->count, 1);
+	refcount_set(&ff->count, 1);
 	RB_CLEAR_NODE(&ff->polled_node);
 	init_waitqueue_head(&ff->poll_wait);
 
@@ -77,7 +77,7 @@ void fuse_file_free(struct fuse_file *ff)
 
 static struct fuse_file *fuse_file_get(struct fuse_file *ff)
 {
-	atomic_inc(&ff->count);
+	refcount_inc(&ff->count);
 	return ff;
 }
 
@@ -88,7 +88,7 @@ static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
 
 static void fuse_file_put(struct fuse_file *ff, bool sync)
 {
-	if (atomic_dec_and_test(&ff->count)) {
+	if (refcount_dec_and_test(&ff->count)) {
 		struct fuse_req *req = ff->reserved_req;
 
 		if (ff->fc->no_open) {
@@ -293,7 +293,7 @@ static int fuse_release(struct inode *inode, struct file *file)
 
 void fuse_sync_release(struct fuse_file *ff, int flags)
 {
-	WARN_ON(atomic_read(&ff->count) != 1);
+	WARN_ON(refcount_read(&ff->count) > 1);
 	fuse_prepare_release(ff, flags, FUSE_RELEASE);
 	/*
 	 * iput(NULL) is a no-op and since the refcount is 1 and everything's
@@ -2083,7 +2083,8 @@ static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma)
 	return generic_file_mmap(file, vma);
 }
 
-static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
+static int convert_fuse_file_lock(struct fuse_conn *fc,
+				  const struct fuse_file_lock *ffl,
 				  struct file_lock *fl)
 {
 	switch (ffl->type) {
@@ -2098,7 +2099,14 @@ static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
 
 		fl->fl_start = ffl->start;
 		fl->fl_end = ffl->end;
-		fl->fl_pid = ffl->pid;
+
+		/*
+		 * Convert pid into the caller's pid namespace. If the pid
+		 * does not map into the namespace fl_pid will get set to 0.
+		 */
+		rcu_read_lock();
+		fl->fl_pid = pid_vnr(find_pid_ns(ffl->pid, fc->pid_ns));
+		rcu_read_unlock();
 		break;
 
 	default:
@@ -2147,7 +2155,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
 	args.out.args[0].value = &outarg;
 	err = fuse_simple_request(fc, &args);
 	if (!err)
-		err = convert_fuse_file_lock(&outarg.lk, fl);
+		err = convert_fuse_file_lock(fc, &outarg.lk, fl);
 
 	return err;
 }
@@ -2159,7 +2167,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
 	FUSE_ARGS(args);
 	struct fuse_lk_in inarg;
 	int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
-	pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
+	struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL;
+	pid_t pid_nr = pid_nr_ns(pid, fc->pid_ns);
 	int err;
 
 	if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
@@ -2168,10 +2177,13 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
 	}
 
 	/* Unlock on close is handled by the flush method */
-	if (fl->fl_flags & FL_CLOSE)
+	if ((fl->fl_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX)
 		return 0;
 
-	fuse_lk_fill(&args, file, fl, opcode, pid, flock, &inarg);
+	if (pid && pid_nr == 0)
+		return -EOVERFLOW;
+
+	fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg);
 	err = fuse_simple_request(fc, &args);
 
 	/* locking is restartable */
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index f33341d9501a..1bd7ffdad593 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -24,6 +24,8 @@
 #include <linux/workqueue.h>
 #include <linux/kref.h>
 #include <linux/xattr.h>
+#include <linux/pid_namespace.h>
+#include <linux/refcount.h>
 
 /** Max number of pages that can be used in a single read request */
 #define FUSE_MAX_PAGES_PER_REQ 32
@@ -137,7 +139,7 @@ struct fuse_file {
 	u64 nodeid;
 
 	/** Refcount */
-	atomic_t count;
+	refcount_t count;
 
 	/** FOPEN_* flags returned by open */
 	u32 open_flags;
@@ -306,7 +308,7 @@ struct fuse_req {
 	struct list_head intr_entry;
 
 	/** refcount */
-	atomic_t count;
+	refcount_t count;
 
 	/** Unique ID for the interrupt request */
 	u64 intr_unique;
@@ -448,7 +450,7 @@ struct fuse_conn {
 	spinlock_t lock;
 
 	/** Refcount */
-	atomic_t count;
+	refcount_t count;
 
 	/** Number of fuse_dev's */
 	atomic_t dev_count;
@@ -461,6 +463,9 @@ struct fuse_conn {
 	/** The group id for this mount */
 	kgid_t group_id;
 
+	/** The pid namespace for this mount */
+	struct pid_namespace *pid_ns;
+
 	/** Maximum read size */
 	unsigned max_read;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 73cf05135252..65c88379a3a1 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -21,6 +21,7 @@
 #include <linux/sched.h>
 #include <linux/exportfs.h>
 #include <linux/posix_acl.h>
+#include <linux/pid_namespace.h>
 
 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
 MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -601,7 +602,7 @@ void fuse_conn_init(struct fuse_conn *fc)
 	memset(fc, 0, sizeof(*fc));
 	spin_lock_init(&fc->lock);
 	init_rwsem(&fc->killsb);
-	atomic_set(&fc->count, 1);
+	refcount_set(&fc->count, 1);
 	atomic_set(&fc->dev_count, 1);
 	init_waitqueue_head(&fc->blocked_waitq);
 	init_waitqueue_head(&fc->reserved_req_waitq);
@@ -619,14 +620,16 @@ void fuse_conn_init(struct fuse_conn *fc)
 	fc->connected = 1;
 	fc->attr_version = 1;
 	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
+	fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
 }
 EXPORT_SYMBOL_GPL(fuse_conn_init);
 
 void fuse_conn_put(struct fuse_conn *fc)
 {
-	if (atomic_dec_and_test(&fc->count)) {
+	if (refcount_dec_and_test(&fc->count)) {
 		if (fc->destroy_req)
 			fuse_request_free(fc->destroy_req);
+		put_pid_ns(fc->pid_ns);
 		fc->release(fc);
 	}
 }
@@ -634,7 +637,7 @@ EXPORT_SYMBOL_GPL(fuse_conn_put);
 
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
 {
-	atomic_inc(&fc->count);
+	refcount_inc(&fc->count);
 	return fc;
 }
 EXPORT_SYMBOL_GPL(fuse_conn_get);
@@ -972,8 +975,15 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 	int err;
 	char *suffix = "";
 
-	if (sb->s_bdev)
+	if (sb->s_bdev) {
 		suffix = "-fuseblk";
+		/*
+		 * sb->s_bdi points to blkdev's bdi however we want to redirect
+		 * it to our private bdi...
+		 */
+		bdi_put(sb->s_bdi);
+		sb->s_bdi = &noop_backing_dev_info;
+	}
 	err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
 				   MINOR(fc->dev), suffix);
 	if (err)
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b7cf65d13561..aa3d44527fa2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -815,7 +815,6 @@ struct gfs2_sbd {
 	atomic_t sd_log_in_flight;
 	struct bio *sd_log_bio;
 	wait_queue_head_t sd_log_flush_wait;
-	int sd_log_error;
 
 	atomic_t sd_reserving_log;
 	wait_queue_head_t sd_reserving_log_wait;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f865b96374df..d2955daf17a4 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -659,7 +659,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 	struct gfs2_log_header *lh;
 	unsigned int tail;
 	u32 hash;
-	int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META;
+	int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
 	struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 	lh = page_address(page);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index b1f9144b42c7..885d36e7a29f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -170,7 +170,7 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
  */
 
 static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
-				  int error)
+				  blk_status_t error)
 {
 	struct buffer_head *bh, *next;
 	struct page *page = bvec->bv_page;
@@ -209,15 +209,13 @@ static void gfs2_end_log_write(struct bio *bio)
 	struct page *page;
 	int i;
 
-	if (bio->bi_error) {
-		sdp->sd_log_error = bio->bi_error;
-		fs_err(sdp, "Error %d writing to log\n", bio->bi_error);
-	}
+	if (bio->bi_status)
+		fs_err(sdp, "Error %d writing to log\n", bio->bi_status);
 
 	bio_for_each_segment_all(bvec, bio, i) {
 		page = bvec->bv_page;
 		if (page_has_buffers(page))
-			gfs2_end_log_write_bh(sdp, bvec, bio->bi_error);
+			gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
 		else
 			mempool_free(page, gfs2_page_pool);
 	}
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 663ffc135ef3..fabe1614f879 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -201,7 +201,7 @@ static void gfs2_meta_read_endio(struct bio *bio)
 		do {
 			struct buffer_head *next = bh->b_this_page;
 			len -= bh->b_size;
-			bh->b_end_io(bh, !bio->bi_error);
+			bh->b_end_io(bh, !bio->bi_status);
 			bh = next;
 		} while (bh && len);
 	}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ed67548b286c..e76058d34b74 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -176,10 +176,10 @@ static void end_bio_io_page(struct bio *bio)
 {
 	struct page *page = bio->bi_private;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		SetPageUptodate(page);
 	else
-		pr_warn("error %d reading superblock\n", bio->bi_error);
+		pr_warn("error %d reading superblock\n", bio->bi_status);
 	unlock_page(page);
 }
 
@@ -203,7 +203,7 @@ static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
 
 	memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
 	memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
-	memcpy(s->s_uuid, str->sb_uuid, 16);
+	memcpy(&s->s_uuid, str->sb_uuid, 16);
 }
 
 /**
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 7a515345610c..e77bc52b468f 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -71,25 +71,14 @@ static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
 	return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
 }
 
-static int gfs2_uuid_valid(const u8 *uuid)
-{
-	int i;
-
-	for (i = 0; i < 16; i++) {
-		if (uuid[i])
-			return 1;
-	}
-	return 0;
-}
-
 static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
 {
 	struct super_block *s = sdp->sd_vfs;
-	const u8 *uuid = s->s_uuid;
+
 	buf[0] = '\0';
-	if (!gfs2_uuid_valid(uuid))
+	if (uuid_is_null(&s->s_uuid))
 		return 0;
-	return snprintf(buf, PAGE_SIZE, "%pUB\n", uuid);
+	return snprintf(buf, PAGE_SIZE, "%pUB\n", &s->s_uuid);
 }
 
 static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
@@ -712,14 +701,13 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
 {
 	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
 	struct super_block *s = sdp->sd_vfs;
-	const u8 *uuid = s->s_uuid;
 
 	add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
 	add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
 	if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags))
 		add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid);
-	if (gfs2_uuid_valid(uuid))
-		add_uevent_var(env, "UUID=%pUB", uuid);
+	if (!uuid_is_null(&s->s_uuid))
+		add_uevent_var(env, "UUID=%pUB", &s->s_uuid);
 	return 0;
 }
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index dde861387a40..d44f5456eb9b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -200,7 +200,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/fs/inode.c b/fs/inode.c
index db5914783a71..ab3b9a795c0b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -146,6 +146,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	i_gid_write(inode, 0);
 	atomic_set(&inode->i_writecount, 0);
 	inode->i_size = 0;
+	inode->i_write_hint = WRITE_LIFE_NOT_SET;
 	inode->i_blocks = 0;
 	inode->i_bytes = 0;
 	inode->i_generation = 0;
@@ -1891,11 +1892,11 @@ static void __wait_on_freeing_inode(struct inode *inode)
 	wait_queue_head_t *wq;
 	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
 	wq = bit_waitqueue(&inode->i_state, __I_NEW);
-	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_hash_lock);
 	schedule();
-	finish_wait(wq, &wait.wait);
+	finish_wait(wq, &wait.wq_entry);
 	spin_lock(&inode_hash_lock);
 }
 
@@ -2038,11 +2039,11 @@ static void __inode_dio_wait(struct inode *inode)
 	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
 
 	do {
-		prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
 		if (atomic_read(&inode->i_dio_count))
 			schedule();
 	} while (atomic_read(&inode->i_dio_count));
-	finish_wait(wq, &q.wait);
+	finish_wait(wq, &q.wq_entry);
 }
 
 /**
diff --git a/fs/iomap.c b/fs/iomap.c
index 4b10892967a5..fa6cd5b3f578 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -672,8 +672,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
 	struct iomap_dio *dio = bio->bi_private;
 	bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
 
-	if (bio->bi_error)
-		iomap_dio_set_error(dio, bio->bi_error);
+	if (bio->bi_status)
+		iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
 
 	if (atomic_dec_and_test(&dio->ref)) {
 		if (is_sync_kiocb(dio->iocb)) {
@@ -793,6 +793,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
 		bio->bi_bdev = iomap->bdev;
 		bio->bi_iter.bi_sector =
 			iomap->blkno + ((pos - iomap->offset) >> 9);
+		bio->bi_write_hint = dio->iocb->ki_hint;
 		bio->bi_private = dio;
 		bio->bi_end_io = iomap_dio_bio_end_io;
 
@@ -881,6 +882,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		flags |= IOMAP_WRITE;
 	}
 
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (filemap_range_has_page(mapping, start, end)) {
+			ret = -EAGAIN;
+			goto out_free_dio;
+		}
+		flags |= IOMAP_NOWAIT;
+	}
+
 	ret = filemap_write_and_wait_range(mapping, start, end);
 	if (ret)
 		goto out_free_dio;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 5a0245e36240..7d5ef3bf3f3e 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2363,7 +2363,7 @@ static int jbd2_journal_init_journal_head_cache(void)
 	jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
 				sizeof(struct journal_head),
 				0,		/* offset */
-				SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU,
+				SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU,
 				NULL);		/* ctor */
 	retval = 0;
 	if (!jbd2_journal_head_cache) {
@@ -2579,10 +2579,10 @@ restart:
 		wait_queue_head_t *wq;
 		DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
 		wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
-		prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 		spin_unlock(&journal->j_list_lock);
 		schedule();
-		finish_wait(wq, &wait.wait);
+		finish_wait(wq, &wait.wq_entry);
 		goto restart;
 	}
 
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 9ee4832b6f8b..2d30a6da7013 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -680,6 +680,12 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
 
 	rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
 	handle->h_buffer_credits = nblocks;
+	/*
+	 * Restore the original nofs context because the journal restart
+	 * is basically the same thing as journal stop and start.
+	 * start_this_handle will start a new nofs context.
+	 */
+	memalloc_nofs_restore(handle->saved_alloc_context);
 	ret = start_this_handle(journal, handle, gfp_mask);
 	return ret;
 }
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 06a71dbd4833..389ea53ea487 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1366,7 +1366,7 @@ int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
 		jffs2_add_ino_cache(c, f->inocache);
 	}
 	if (!f->inocache) {
-		JFFS2_ERROR("requestied to read an nonexistent ino %u\n", ino);
+		JFFS2_ERROR("requested to read a nonexistent ino %u\n", ino);
 		return -ENOENT;
 	}
 
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index bb1da1feafeb..a21f0e9eecd4 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2205,7 +2205,7 @@ static void lbmIODone(struct bio *bio)
 
 	bp->l_flag |= lbmDONE;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		bp->l_flag |= lbmERROR;
 
 		jfs_err("lbmIODone: I/O error in JFS log");
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 489aaa1403e5..ce93db3aef3c 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -280,7 +280,7 @@ static void metapage_read_end_io(struct bio *bio)
 {
 	struct page *page = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		printk(KERN_ERR "metapage_read_end_io: I/O error\n");
 		SetPageError(page);
 	}
@@ -337,7 +337,7 @@ static void metapage_write_end_io(struct bio *bio)
 
 	BUG_ON(!PagePrivate(page));
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		printk(KERN_ERR "metapage_write_end_io: I/O error\n");
 		SetPageError(page);
 	}
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 41e491b8e5d7..27d577dbe51a 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -69,6 +69,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
 	if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL)
 		goto out_nobind;
 
+	host->h_nlmclnt_ops = nlm_init->nlmclnt_ops;
 	return host;
 out_nobind:
 	nlmclnt_release_host(host);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 112952037933..066ac313ae5c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -150,17 +150,22 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
  * @host: address of a valid nlm_host context representing the NLM server
  * @cmd: fcntl-style file lock operation to perform
  * @fl: address of arguments for the lock operation
+ * @data: address of data to be sent to callback operations
  *
  */
-int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
+int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, void *data)
 {
 	struct nlm_rqst		*call;
 	int			status;
+	const struct nlmclnt_operations *nlmclnt_ops = host->h_nlmclnt_ops;
 
 	call = nlm_alloc_call(host);
 	if (call == NULL)
 		return -ENOMEM;
 
+	if (nlmclnt_ops && nlmclnt_ops->nlmclnt_alloc_call)
+		nlmclnt_ops->nlmclnt_alloc_call(data);
+
 	nlmclnt_locks_init_private(fl, host);
 	if (!fl->fl_u.nfs_fl.owner) {
 		/* lockowner allocation has failed */
@@ -169,6 +174,7 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
 	}
 	/* Set up the argument struct */
 	nlmclnt_setlockargs(call, fl);
+	call->a_callback_data = data;
 
 	if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
 		if (fl->fl_type != F_UNLCK) {
@@ -214,8 +220,12 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
 
 void nlmclnt_release_call(struct nlm_rqst *call)
 {
+	const struct nlmclnt_operations *nlmclnt_ops = call->a_host->h_nlmclnt_ops;
+
 	if (!atomic_dec_and_test(&call->a_count))
 		return;
+	if (nlmclnt_ops && nlmclnt_ops->nlmclnt_release_call)
+		nlmclnt_ops->nlmclnt_release_call(call->a_callback_data);
 	nlmclnt_release_host(call->a_host);
 	nlmclnt_release_lockargs(call);
 	kfree(call);
@@ -687,6 +697,19 @@ out:
 	return status;
 }
 
+static void nlmclnt_unlock_prepare(struct rpc_task *task, void *data)
+{
+	struct nlm_rqst	*req = data;
+	const struct nlmclnt_operations *nlmclnt_ops = req->a_host->h_nlmclnt_ops;
+	bool defer_call = false;
+
+	if (nlmclnt_ops && nlmclnt_ops->nlmclnt_unlock_prepare)
+		defer_call = nlmclnt_ops->nlmclnt_unlock_prepare(task, req->a_callback_data);
+
+	if (!defer_call)
+		rpc_call_start(task);
+}
+
 static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
 {
 	struct nlm_rqst	*req = data;
@@ -720,6 +743,7 @@ die:
 }
 
 static const struct rpc_call_ops nlmclnt_unlock_ops = {
+	.rpc_call_prepare = nlmclnt_unlock_prepare,
 	.rpc_call_done = nlmclnt_unlock_callback,
 	.rpc_release = nlmclnt_rpc_release,
 };
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index e7c8b9c76e48..5d481e8a1b5d 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -132,6 +132,8 @@ lockd(void *vrqstp)
 {
 	int		err = 0;
 	struct svc_rqst *rqstp = vrqstp;
+	struct net *net = &init_net;
+	struct lockd_net *ln = net_generic(net, lockd_net_id);
 
 	/* try_to_freeze() is called from svc_recv() */
 	set_freezable();
@@ -176,6 +178,8 @@ lockd(void *vrqstp)
 	if (nlmsvc_ops)
 		nlmsvc_invalidate_all();
 	nlm_shutdown_hosts();
+	cancel_delayed_work_sync(&ln->grace_period_end);
+	locks_end_grace(&ln->lockd_manager);
 	return 0;
 }
 
@@ -270,8 +274,6 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
 	if (ln->nlmsvc_users) {
 		if (--ln->nlmsvc_users == 0) {
 			nlm_shutdown_hosts_net(net);
-			cancel_delayed_work_sync(&ln->grace_period_end);
-			locks_end_grace(&ln->lockd_manager);
 			svc_shutdown_net(serv, net);
 			dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net);
 		}
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 5581e020644b..3507c80d1d4b 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -870,15 +870,15 @@ nlmsvc_grant_reply(struct nlm_cookie *cookie, __be32 status)
 	if (!(block = nlmsvc_find_block(cookie)))
 		return;
 
-	if (block) {
-		if (status == nlm_lck_denied_grace_period) {
-			/* Try again in a couple of seconds */
-			nlmsvc_insert_block(block, 10 * HZ);
-		} else {
-			/* Lock is now held by client, or has been rejected.
-			 * In both cases, the block should be removed. */
-			nlmsvc_unlink_block(block);
-		}
+	if (status == nlm_lck_denied_grace_period) {
+		/* Try again in a couple of seconds */
+		nlmsvc_insert_block(block, 10 * HZ);
+	} else {
+		/*
+		 * Lock is now held by client, or has been rejected.
+		 * In both cases, the block should be removed.
+		 */
+		nlmsvc_unlink_block(block);
 	}
 	nlmsvc_release_block(block);
 }
diff --git a/fs/locks.c b/fs/locks.c
index 26811321d39b..af2031a1fcff 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2504,7 +2504,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
 		.fl_owner = filp,
 		.fl_pid = current->tgid,
 		.fl_file = filp,
-		.fl_flags = FL_FLOCK,
+		.fl_flags = FL_FLOCK | FL_CLOSE,
 		.fl_type = F_UNLCK,
 		.fl_end = OFFSET_MAX,
 	};
diff --git a/fs/mpage.c b/fs/mpage.c
index baff8f820c29..d6d1486d6f99 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -50,7 +50,8 @@ static void mpage_end_io(struct bio *bio)
 
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
-		page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
+		page_endio(page, op_is_write(bio_op(bio)),
+				blk_status_to_errno(bio->bi_status));
 	}
 
 	bio_put(bio);
@@ -614,6 +615,7 @@ alloc_new:
 			goto confused;
 
 		wbc_init_bio(wbc, bio);
+		bio->bi_write_hint = inode->i_write_hint;
 	}
 
 	/*
diff --git a/fs/namei.c b/fs/namei.c
index 7286f87ce863..6571a5f5112e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2142,7 +2142,6 @@ OK:
 
 static const char *path_init(struct nameidata *nd, unsigned flags)
 {
-	int retval = 0;
 	const char *s = nd->name->name;
 
 	if (!*s)
@@ -2154,13 +2153,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
 	if (flags & LOOKUP_ROOT) {
 		struct dentry *root = nd->root.dentry;
 		struct inode *inode = root->d_inode;
-		if (*s) {
-			if (!d_can_lookup(root))
-				return ERR_PTR(-ENOTDIR);
-			retval = inode_permission(inode, MAY_EXEC);
-			if (retval)
-				return ERR_PTR(retval);
-		}
+		if (*s && unlikely(!d_can_lookup(root)))
+			return ERR_PTR(-ENOTDIR);
 		nd->path = nd->root;
 		nd->inode = inode;
 		if (flags & LOOKUP_RCU) {
@@ -2258,6 +2252,35 @@ static inline int lookup_last(struct nameidata *nd)
 	return walk_component(nd, 0);
 }
 
+static int handle_lookup_down(struct nameidata *nd)
+{
+	struct path path = nd->path;
+	struct inode *inode = nd->inode;
+	unsigned seq = nd->seq;
+	int err;
+
+	if (nd->flags & LOOKUP_RCU) {
+		/*
+		 * don't bother with unlazy_walk on failure - we are
+		 * at the very beginning of walk, so we lose nothing
+		 * if we simply redo everything in non-RCU mode
+		 */
+		if (unlikely(!__follow_mount_rcu(nd, &path, &inode, &seq)))
+			return -ECHILD;
+	} else {
+		dget(path.dentry);
+		err = follow_managed(&path, nd);
+		if (unlikely(err < 0))
+			return err;
+		inode = d_backing_inode(path.dentry);
+		seq = 0;
+	}
+	path_to_nameidata(&path, nd);
+	nd->inode = inode;
+	nd->seq = seq;
+	return 0;
+}
+
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
 static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
 {
@@ -2266,6 +2289,15 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
 
 	if (IS_ERR(s))
 		return PTR_ERR(s);
+
+	if (unlikely(flags & LOOKUP_DOWN)) {
+		err = handle_lookup_down(nd);
+		if (unlikely(err < 0)) {
+			terminate_walk(nd);
+			return err;
+		}
+	}
+
 	while (!(err = link_path_walk(s, nd))
 		&& ((err = lookup_last(nd)) > 0)) {
 		s = trailing_symlink(nd);
diff --git a/fs/namespace.c b/fs/namespace.c
index b3b115bd4e1e..5a4438445bf7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3462,8 +3462,9 @@ static void mntns_put(struct ns_common *ns)
 static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 {
 	struct fs_struct *fs = current->fs;
-	struct mnt_namespace *mnt_ns = to_mnt_ns(ns);
+	struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
 	struct path root;
+	int err;
 
 	if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
 	    !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
@@ -3474,15 +3475,20 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 		return -EINVAL;
 
 	get_mnt_ns(mnt_ns);
-	put_mnt_ns(nsproxy->mnt_ns);
+	old_mnt_ns = nsproxy->mnt_ns;
 	nsproxy->mnt_ns = mnt_ns;
 
 	/* Find the root */
-	root.mnt    = &mnt_ns->root->mnt;
-	root.dentry = mnt_ns->root->mnt.mnt_root;
-	path_get(&root);
-	while(d_mountpoint(root.dentry) && follow_down_one(&root))
-		;
+	err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
+				"/", LOOKUP_DOWN, &root);
+	if (err) {
+		/* revert to old namespace */
+		nsproxy->mnt_ns = old_mnt_ns;
+		put_mnt_ns(mnt_ns);
+		return err;
+	}
+
+	put_mnt_ns(old_mnt_ns);
 
 	/* Update the pwd and root */
 	set_fs_pwd(fs, &root);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index f31fd0dd92c6..69d02cf8cf37 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -123,11 +123,6 @@ config PNFS_BLOCK
 	depends on NFS_V4_1 && BLK_DEV_DM
 	default NFS_V4
 
-config PNFS_OBJLAYOUT
-	tristate
-	depends on NFS_V4_1 && SCSI_OSD_ULD
-	default NFS_V4
-
 config PNFS_FLEXFILE_LAYOUT
 	tristate
 	depends on NFS_V4_1 && NFS_V3
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 6abdda209642..98f4e5728a67 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -31,6 +31,5 @@ nfsv4-$(CONFIG_NFS_V4_1)	+= pnfs.o pnfs_dev.o pnfs_nfs.o
 nfsv4-$(CONFIG_NFS_V4_2)	+= nfs42proc.o
 
 obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
-obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
 obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
 obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += flexfilelayout/
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 0ca370d23ddb..d8863a804b15 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -188,7 +188,7 @@ static void bl_end_io_read(struct bio *bio)
 {
 	struct parallel_io *par = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		struct nfs_pgio_header *header = par->data;
 
 		if (!header->pnfs_error)
@@ -319,7 +319,7 @@ static void bl_end_io_write(struct bio *bio)
 	struct parallel_io *par = bio->bi_private;
 	struct nfs_pgio_header *header = par->data;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		if (!header->pnfs_error)
 			header->pnfs_error = -EIO;
 		pnfs_set_lo_fail(header->lseg);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 773774531aff..73a1f928226c 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -76,7 +76,10 @@ nfs4_callback_svc(void *vrqstp)
 
 	set_freezable();
 
-	while (!kthread_should_stop()) {
+	while (!kthread_freezable_should_stop(NULL)) {
+
+		if (signal_pending(current))
+			flush_signals(current);
 		/*
 		 * Listen for a request on the socket
 		 */
@@ -85,6 +88,8 @@ nfs4_callback_svc(void *vrqstp)
 			continue;
 		svc_process(rqstp);
 	}
+	svc_exit_thread(rqstp);
+	module_put_and_exit(0);
 	return 0;
 }
 
@@ -103,9 +108,10 @@ nfs41_callback_svc(void *vrqstp)
 
 	set_freezable();
 
-	while (!kthread_should_stop()) {
-		if (try_to_freeze())
-			continue;
+	while (!kthread_freezable_should_stop(NULL)) {
+
+		if (signal_pending(current))
+			flush_signals(current);
 
 		prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
 		spin_lock_bh(&serv->sv_cb_lock);
@@ -121,11 +127,13 @@ nfs41_callback_svc(void *vrqstp)
 				error);
 		} else {
 			spin_unlock_bh(&serv->sv_cb_lock);
-			schedule();
+			if (!kthread_should_stop())
+				schedule();
 			finish_wait(&serv->sv_cb_waitq, &wq);
 		}
-		flush_signals(current);
 	}
+	svc_exit_thread(rqstp);
+	module_put_and_exit(0);
 	return 0;
 }
 
@@ -221,14 +229,14 @@ err_bind:
 static struct svc_serv_ops nfs40_cb_sv_ops = {
 	.svo_function		= nfs4_callback_svc,
 	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
-	.svo_setup		= svc_set_num_threads,
+	.svo_setup		= svc_set_num_threads_sync,
 	.svo_module		= THIS_MODULE,
 };
 #if defined(CONFIG_NFS_V4_1)
 static struct svc_serv_ops nfs41_cb_sv_ops = {
 	.svo_function		= nfs41_callback_svc,
 	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
-	.svo_setup		= svc_set_num_threads,
+	.svo_setup		= svc_set_num_threads_sync,
 	.svo_module		= THIS_MODULE,
 };
 
@@ -280,7 +288,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
 		printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
 			cb_info->users);
 
-	serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
+	serv = svc_create_pooled(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
 	if (!serv) {
 		printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
 		return ERR_PTR(-ENOMEM);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index f073a6d2c6a5..52479f180ea1 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -131,10 +131,11 @@ restart:
 			if (!inode)
 				continue;
 			if (!nfs_sb_active(inode->i_sb)) {
-				rcu_read_lock();
+				rcu_read_unlock();
 				spin_unlock(&clp->cl_lock);
 				iput(inode);
 				spin_lock(&clp->cl_lock);
+				rcu_read_lock();
 				goto restart;
 			}
 			return inode;
@@ -170,10 +171,11 @@ restart:
 			if (!inode)
 				continue;
 			if (!nfs_sb_active(inode->i_sb)) {
-				rcu_read_lock();
+				rcu_read_unlock();
 				spin_unlock(&clp->cl_lock);
 				iput(inode);
 				spin_lock(&clp->cl_lock);
+				rcu_read_lock();
 				goto restart;
 			}
 			return inode;
@@ -317,31 +319,18 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
 static u32 do_callback_layoutrecall(struct nfs_client *clp,
 				    struct cb_layoutrecallargs *args)
 {
-	u32 res;
-
-	dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
 	if (args->cbl_recall_type == RETURN_FILE)
-		res = initiate_file_draining(clp, args);
-	else
-		res = initiate_bulk_draining(clp, args);
-	dprintk("%s returning %i\n", __func__, res);
-	return res;
-
+		return initiate_file_draining(clp, args);
+	return initiate_bulk_draining(clp, args);
 }
 
 __be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
 				  void *dummy, struct cb_process_state *cps)
 {
-	u32 res;
-
-	dprintk("%s: -->\n", __func__);
+	u32 res = NFS4ERR_OP_NOT_IN_SESSION;
 
 	if (cps->clp)
 		res = do_callback_layoutrecall(cps->clp, args);
-	else
-		res = NFS4ERR_OP_NOT_IN_SESSION;
-
-	dprintk("%s: exit with status = %d\n", __func__, res);
 	return cpu_to_be32(res);
 }
 
@@ -364,8 +353,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
 	struct nfs_client *clp = cps->clp;
 	struct nfs_server *server = NULL;
 
-	dprintk("%s: -->\n", __func__);
-
 	if (!clp) {
 		res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
 		goto out;
@@ -384,8 +371,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
 					goto found;
 				}
 			rcu_read_unlock();
-			dprintk("%s: layout type %u not found\n",
-				__func__, dev->cbd_layout_type);
 			continue;
 		}
 
@@ -395,8 +380,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
 
 out:
 	kfree(args->devs);
-	dprintk("%s: exit with status = %u\n",
-		__func__, be32_to_cpu(res));
 	return res;
 }
 
@@ -417,16 +400,11 @@ static __be32
 validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot,
 		const struct cb_sequenceargs * args)
 {
-	dprintk("%s enter. slotid %u seqid %u, slot table seqid: %u\n",
-		__func__, args->csa_slotid, args->csa_sequenceid, slot->seq_nr);
-
 	if (args->csa_slotid > tbl->server_highest_slotid)
 		return htonl(NFS4ERR_BADSLOT);
 
 	/* Replay */
 	if (args->csa_sequenceid == slot->seq_nr) {
-		dprintk("%s seqid %u is a replay\n",
-			__func__, args->csa_sequenceid);
 		if (nfs4_test_locked_slot(tbl, slot->slot_nr))
 			return htonl(NFS4ERR_DELAY);
 		/* Signal process_op to set this error on next op */
@@ -480,15 +458,6 @@ static bool referring_call_exists(struct nfs_client *clp,
 
 		for (j = 0; j < rclist->rcl_nrefcalls; j++) {
 			ref = &rclist->rcl_refcalls[j];
-
-			dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u "
-				"slotid %u\n", __func__,
-				((u32 *)&rclist->rcl_sessionid.data)[0],
-				((u32 *)&rclist->rcl_sessionid.data)[1],
-				((u32 *)&rclist->rcl_sessionid.data)[2],
-				((u32 *)&rclist->rcl_sessionid.data)[3],
-				ref->rc_sequenceid, ref->rc_slotid);
-
 			status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid,
 					ref->rc_sequenceid, HZ >> 1) < 0;
 			if (status)
@@ -593,8 +562,6 @@ out:
 		res->csr_status = status;
 
 	trace_nfs4_cb_sequence(args, res, status);
-	dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
-		ntohl(status), ntohl(res->csr_status));
 	return status;
 }
 
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index d051fc3583a9..390ac9c39c59 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -171,8 +171,6 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
 		return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
 	}
 	hdr->nops = ntohl(*p);
-	dprintk("%s: minorversion %d nops %d\n", __func__,
-		hdr->minorversion, hdr->nops);
 	return 0;
 }
 
@@ -192,11 +190,8 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr
 
 	status = decode_fh(xdr, &args->fh);
 	if (unlikely(status != 0))
-		goto out;
-	status = decode_bitmap(xdr, args->bitmap);
-out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
-	return status;
+		return status;
+	return decode_bitmap(xdr, args->bitmap);
 }
 
 static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
@@ -206,17 +201,12 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 
 	status = decode_delegation_stateid(xdr, &args->stateid);
 	if (unlikely(status != 0))
-		goto out;
+		return status;
 	p = read_buf(xdr, 4);
-	if (unlikely(p == NULL)) {
-		status = htonl(NFS4ERR_RESOURCE);
-		goto out;
-	}
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
 	args->truncate = ntohl(*p);
-	status = decode_fh(xdr, &args->fh);
-out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
-	return status;
+	return decode_fh(xdr, &args->fh);
 }
 
 #if defined(CONFIG_NFS_V4_1)
@@ -235,10 +225,8 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
 	uint32_t iomode;
 
 	p = read_buf(xdr, 4 * sizeof(uint32_t));
-	if (unlikely(p == NULL)) {
-		status = htonl(NFS4ERR_BADXDR);
-		goto out;
-	}
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_BADXDR);
 
 	args->cbl_layout_type = ntohl(*p++);
 	/* Depite the spec's xdr, iomode really belongs in the FILE switch,
@@ -252,37 +240,23 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
 		args->cbl_range.iomode = iomode;
 		status = decode_fh(xdr, &args->cbl_fh);
 		if (unlikely(status != 0))
-			goto out;
+			return status;
 
 		p = read_buf(xdr, 2 * sizeof(uint64_t));
-		if (unlikely(p == NULL)) {
-			status = htonl(NFS4ERR_BADXDR);
-			goto out;
-		}
+		if (unlikely(p == NULL))
+			return htonl(NFS4ERR_BADXDR);
 		p = xdr_decode_hyper(p, &args->cbl_range.offset);
 		p = xdr_decode_hyper(p, &args->cbl_range.length);
-		status = decode_layout_stateid(xdr, &args->cbl_stateid);
-		if (unlikely(status != 0))
-			goto out;
+		return decode_layout_stateid(xdr, &args->cbl_stateid);
 	} else if (args->cbl_recall_type == RETURN_FSID) {
 		p = read_buf(xdr, 2 * sizeof(uint64_t));
-		if (unlikely(p == NULL)) {
-			status = htonl(NFS4ERR_BADXDR);
-			goto out;
-		}
+		if (unlikely(p == NULL))
+			return htonl(NFS4ERR_BADXDR);
 		p = xdr_decode_hyper(p, &args->cbl_fsid.major);
 		p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
-	} else if (args->cbl_recall_type != RETURN_ALL) {
-		status = htonl(NFS4ERR_BADXDR);
-		goto out;
-	}
-	dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d\n",
-		__func__,
-		args->cbl_layout_type, iomode,
-		args->cbl_layoutchanged, args->cbl_recall_type);
-out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
-	return status;
+	} else if (args->cbl_recall_type != RETURN_ALL)
+		return htonl(NFS4ERR_BADXDR);
+	return 0;
 }
 
 static
@@ -437,12 +411,11 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
 
 	status = decode_sessionid(xdr, &args->csa_sessionid);
 	if (status)
-		goto out;
+		return status;
 
-	status = htonl(NFS4ERR_RESOURCE);
 	p = read_buf(xdr, 5 * sizeof(uint32_t));
 	if (unlikely(p == NULL))
-		goto out;
+		return htonl(NFS4ERR_RESOURCE);
 
 	args->csa_addr = svc_addr(rqstp);
 	args->csa_sequenceid = ntohl(*p++);
@@ -456,7 +429,7 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
 						  sizeof(*args->csa_rclists),
 						  GFP_KERNEL);
 		if (unlikely(args->csa_rclists == NULL))
-			goto out;
+			return htonl(NFS4ERR_RESOURCE);
 
 		for (i = 0; i < args->csa_nrclists; i++) {
 			status = decode_rc_list(xdr, &args->csa_rclists[i]);
@@ -466,27 +439,13 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
 			}
 		}
 	}
-	status = 0;
-
-	dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u slotid %u "
-		"highestslotid %u cachethis %d nrclists %u\n",
-		__func__,
-		((u32 *)&args->csa_sessionid)[0],
-		((u32 *)&args->csa_sessionid)[1],
-		((u32 *)&args->csa_sessionid)[2],
-		((u32 *)&args->csa_sessionid)[3],
-		args->csa_sequenceid, args->csa_slotid,
-		args->csa_highestslotid, args->csa_cachethis,
-		args->csa_nrclists);
-out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
-	return status;
+	return 0;
 
 out_free:
 	for (i = 0; i < args->csa_nrclists; i++)
 		kfree(args->csa_rclists[i].rcl_refcalls);
 	kfree(args->csa_rclists);
-	goto out;
+	return status;
 }
 
 static __be32 decode_recallany_args(struct svc_rqst *rqstp,
@@ -557,11 +516,8 @@ static __be32 decode_notify_lock_args(struct svc_rqst *rqstp, struct xdr_stream
 
 	status = decode_fh(xdr, &args->cbnl_fh);
 	if (unlikely(status != 0))
-		goto out;
-	status = decode_lockowner(xdr, args);
-out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
-	return status;
+		return status;
+	return decode_lockowner(xdr, args);
 }
 
 #endif /* CONFIG_NFS_V4_1 */
@@ -707,7 +663,6 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 	status = encode_attr_mtime(xdr, res->bitmap, &res->mtime);
 	*savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1)));
 out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
 	return status;
 }
 
@@ -734,11 +689,11 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
 	__be32 status = res->csr_status;
 
 	if (unlikely(status != 0))
-		goto out;
+		return status;
 
 	status = encode_sessionid(xdr, &res->csr_sessionid);
 	if (status)
-		goto out;
+		return status;
 
 	p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t));
 	if (unlikely(p == NULL))
@@ -748,9 +703,7 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
 	*p++ = htonl(res->csr_slotid);
 	*p++ = htonl(res->csr_highestslotid);
 	*p++ = htonl(res->csr_target_highestslotid);
-out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
-	return status;
+	return 0;
 }
 
 static __be32
@@ -800,7 +753,6 @@ static void nfs4_callback_free_slot(struct nfs4_session *session,
 	 * A single slot, so highest used slotid is either 0 or -1
 	 */
 	nfs4_free_slot(tbl, slot);
-	nfs4_slot_tbl_drain_complete(tbl);
 	spin_unlock(&tbl->slot_tbl_lock);
 }
 
@@ -871,14 +823,10 @@ static __be32 process_op(int nop, struct svc_rqst *rqstp,
 	long maxlen;
 	__be32 res;
 
-	dprintk("%s: start\n", __func__);
 	status = decode_op_hdr(xdr_in, &op_nr);
 	if (unlikely(status))
 		return status;
 
-	dprintk("%s: minorversion=%d nop=%d op_nr=%u\n",
-		__func__, cps->minorversion, nop, op_nr);
-
 	switch (cps->minorversion) {
 	case 0:
 		status = preprocess_nfs4_op(op_nr, &op);
@@ -917,7 +865,6 @@ encode_hdr:
 		return res;
 	if (op->encode_res != NULL && status == 0)
 		status = op->encode_res(rqstp, xdr_out, resp);
-	dprintk("%s: done, status = %d\n", __func__, ntohl(status));
 	return status;
 }
 
@@ -937,8 +884,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 	};
 	unsigned int nops = 0;
 
-	dprintk("%s: start\n", __func__);
-
 	xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
 
 	p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
@@ -977,7 +922,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 	*hdr_res.nops = htonl(nops);
 	nfs4_cb_free_slot(&cps);
 	nfs_put_client(cps.clp);
-	dprintk("%s: done, status = %u\n", __func__, ntohl(status));
 	return rpc_success;
 
 out_invalidcred:
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 04d15a0045e3..ee5ddbd36088 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -218,6 +218,7 @@ static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
 static void pnfs_init_server(struct nfs_server *server)
 {
 	rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
+	rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC");
 }
 
 #else
@@ -240,8 +241,6 @@ static void pnfs_init_server(struct nfs_server *server)
  */
 void nfs_free_client(struct nfs_client *clp)
 {
-	dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
-
 	nfs_fscache_release_client_cookie(clp);
 
 	/* -EIO all pending I/O */
@@ -256,8 +255,6 @@ void nfs_free_client(struct nfs_client *clp)
 	kfree(clp->cl_hostname);
 	kfree(clp->cl_acceptor);
 	kfree(clp);
-
-	dprintk("<-- nfs_free_client()\n");
 }
 EXPORT_SYMBOL_GPL(nfs_free_client);
 
@@ -271,7 +268,6 @@ void nfs_put_client(struct nfs_client *clp)
 	if (!clp)
 		return;
 
-	dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
 	nn = net_generic(clp->cl_net, nfs_net_id);
 
 	if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
@@ -382,9 +378,6 @@ nfs_found_client(const struct nfs_client_initdata *cl_init,
 	}
 
 	smp_rmb();
-
-	dprintk("<-- %s found nfs_client %p for %s\n",
-		__func__, clp, cl_init->hostname ?: "");
 	return clp;
 }
 
@@ -403,9 +396,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
 		return NULL;
 	}
 
-	dprintk("--> nfs_get_client(%s,v%u)\n",
-		cl_init->hostname, rpc_ops->version);
-
 	/* see if the client already exists */
 	do {
 		spin_lock(&nn->nfs_client_lock);
@@ -430,8 +420,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
 		new = rpc_ops->alloc_client(cl_init);
 	} while (!IS_ERR(new));
 
-	dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
-		cl_init->hostname, PTR_ERR(new));
 	return new;
 }
 EXPORT_SYMBOL_GPL(nfs_get_client);
@@ -558,6 +546,7 @@ static int nfs_start_lockd(struct nfs_server *server)
 		.noresvport	= server->flags & NFS_MOUNT_NORESVPORT ?
 					1 : 0,
 		.net		= clp->cl_net,
+		.nlmclnt_ops 	= clp->cl_nfs_mod->rpc_ops->nlmclnt_ops,
 	};
 
 	if (nlm_init.nfs_version > 3)
@@ -624,27 +613,21 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp,
 {
 	int error;
 
-	if (clp->cl_cons_state == NFS_CS_READY) {
-		/* the client is already initialised */
-		dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
+	/* the client is already initialised */
+	if (clp->cl_cons_state == NFS_CS_READY)
 		return clp;
-	}
 
 	/*
 	 * Create a client RPC handle for doing FSSTAT with UNIX auth only
 	 * - RFC 2623, sec 2.3.2
 	 */
 	error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX);
-	if (error < 0)
-		goto error;
-	nfs_mark_client_ready(clp, NFS_CS_READY);
+	nfs_mark_client_ready(clp, error == 0 ? NFS_CS_READY : error);
+	if (error < 0) {
+		nfs_put_client(clp);
+		clp = ERR_PTR(error);
+	}
 	return clp;
-
-error:
-	nfs_mark_client_ready(clp, error);
-	nfs_put_client(clp);
-	dprintk("<-- nfs_init_client() = xerror %d\n", error);
-	return ERR_PTR(error);
 }
 EXPORT_SYMBOL_GPL(nfs_init_client);
 
@@ -668,8 +651,6 @@ static int nfs_init_server(struct nfs_server *server,
 	struct nfs_client *clp;
 	int error;
 
-	dprintk("--> nfs_init_server()\n");
-
 	nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
 			data->timeo, data->retrans);
 	if (data->flags & NFS_MOUNT_NORESVPORT)
@@ -677,10 +658,8 @@ static int nfs_init_server(struct nfs_server *server,
 
 	/* Allocate or find a client reference we can use */
 	clp = nfs_get_client(&cl_init);
-	if (IS_ERR(clp)) {
-		dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
+	if (IS_ERR(clp))
 		return PTR_ERR(clp);
-	}
 
 	server->nfs_client = clp;
 
@@ -725,13 +704,11 @@ static int nfs_init_server(struct nfs_server *server,
 	server->mountd_protocol = data->mount_server.protocol;
 
 	server->namelen  = data->namlen;
-	dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
 	return 0;
 
 error:
 	server->nfs_client = NULL;
 	nfs_put_client(clp);
-	dprintk("<-- nfs_init_server() = xerror %d\n", error);
 	return error;
 }
 
@@ -798,12 +775,10 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
 	struct nfs_client *clp = server->nfs_client;
 	int error;
 
-	dprintk("--> nfs_probe_fsinfo()\n");
-
 	if (clp->rpc_ops->set_capabilities != NULL) {
 		error = clp->rpc_ops->set_capabilities(server, mntfh);
 		if (error < 0)
-			goto out_error;
+			return error;
 	}
 
 	fsinfo.fattr = fattr;
@@ -811,7 +786,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
 	memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype));
 	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
 	if (error < 0)
-		goto out_error;
+		return error;
 
 	nfs_server_set_fsinfo(server, &fsinfo);
 
@@ -826,12 +801,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
 			server->namelen = pathinfo.max_namelen;
 	}
 
-	dprintk("<-- nfs_probe_fsinfo() = 0\n");
 	return 0;
-
-out_error:
-	dprintk("nfs_probe_fsinfo: error = %d\n", -error);
-	return error;
 }
 EXPORT_SYMBOL_GPL(nfs_probe_fsinfo);
 
@@ -927,8 +897,6 @@ EXPORT_SYMBOL_GPL(nfs_alloc_server);
  */
 void nfs_free_server(struct nfs_server *server)
 {
-	dprintk("--> nfs_free_server()\n");
-
 	nfs_server_remove_lists(server);
 
 	if (server->destroy != NULL)
@@ -946,7 +914,6 @@ void nfs_free_server(struct nfs_server *server)
 	nfs_free_iostats(server->io_stats);
 	kfree(server);
 	nfs_release_automount_timer();
-	dprintk("<-- nfs_free_server()\n");
 }
 EXPORT_SYMBOL_GPL(nfs_free_server);
 
@@ -1026,10 +993,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 	struct nfs_fattr *fattr_fsinfo;
 	int error;
 
-	dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
-		(unsigned long long) fattr->fsid.major,
-		(unsigned long long) fattr->fsid.minor);
-
 	server = nfs_alloc_server();
 	if (!server)
 		return ERR_PTR(-ENOMEM);
@@ -1061,10 +1024,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 	if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
 		server->namelen = NFS4_MAXNAMLEN;
 
-	dprintk("Cloned FSID: %llx:%llx\n",
-		(unsigned long long) server->fsid.major,
-		(unsigned long long) server->fsid.minor);
-
 	error = nfs_start_lockd(server);
 	if (error < 0)
 		goto out_free_server;
@@ -1073,13 +1032,11 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 	server->mount_time = jiffies;
 
 	nfs_free_fattr(fattr_fsinfo);
-	dprintk("<-- nfs_clone_server() = %p\n", server);
 	return server;
 
 out_free_server:
 	nfs_free_fattr(fattr_fsinfo);
 	nfs_free_server(server);
-	dprintk("<-- nfs_clone_server() = error %d\n", error);
 	return ERR_PTR(error);
 }
 EXPORT_SYMBOL_GPL(nfs_clone_server);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f92ba8d6c556..2ac00bf4ecf1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -57,7 +57,7 @@ static void nfs_readdir_clear_array(struct page*);
 const struct file_operations nfs_dir_operations = {
 	.llseek		= nfs_llseek_dir,
 	.read		= generic_read_dir,
-	.iterate_shared	= nfs_readdir,
+	.iterate	= nfs_readdir,
 	.open		= nfs_opendir,
 	.release	= nfs_closedir,
 	.fsync		= nfs_fsync_dir,
@@ -145,7 +145,6 @@ struct nfs_cache_array_entry {
 };
 
 struct nfs_cache_array {
-	atomic_t refcount;
 	int size;
 	int eof_index;
 	u64 last_cookie;
@@ -171,27 +170,6 @@ typedef struct {
 } nfs_readdir_descriptor_t;
 
 /*
- * The caller is responsible for calling nfs_readdir_release_array(page)
- */
-static
-struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
-{
-	void *ptr;
-	if (page == NULL)
-		return ERR_PTR(-EIO);
-	ptr = kmap(page);
-	if (ptr == NULL)
-		return ERR_PTR(-ENOMEM);
-	return ptr;
-}
-
-static
-void nfs_readdir_release_array(struct page *page)
-{
-	kunmap(page);
-}
-
-/*
  * we are freeing strings created by nfs_add_to_readdir_array()
  */
 static
@@ -201,18 +179,9 @@ void nfs_readdir_clear_array(struct page *page)
 	int i;
 
 	array = kmap_atomic(page);
-	if (atomic_dec_and_test(&array->refcount))
-		for (i = 0; i < array->size; i++)
-			kfree(array->array[i].string.name);
-	kunmap_atomic(array);
-}
-
-static bool grab_page(struct page *page)
-{
-	struct nfs_cache_array *array = kmap_atomic(page);
-	bool res = atomic_inc_not_zero(&array->refcount);
+	for (i = 0; i < array->size; i++)
+		kfree(array->array[i].string.name);
 	kunmap_atomic(array);
-	return res;
 }
 
 /*
@@ -239,13 +208,10 @@ int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int le
 static
 int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 {
-	struct nfs_cache_array *array = nfs_readdir_get_array(page);
+	struct nfs_cache_array *array = kmap(page);
 	struct nfs_cache_array_entry *cache_entry;
 	int ret;
 
-	if (IS_ERR(array))
-		return PTR_ERR(array);
-
 	cache_entry = &array->array[array->size];
 
 	/* Check that this entry lies within the page bounds */
@@ -264,7 +230,7 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 	if (entry->eof != 0)
 		array->eof_index = array->size;
 out:
-	nfs_readdir_release_array(page);
+	kunmap(page);
 	return ret;
 }
 
@@ -353,11 +319,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
 	struct nfs_cache_array *array;
 	int status;
 
-	array = nfs_readdir_get_array(desc->page);
-	if (IS_ERR(array)) {
-		status = PTR_ERR(array);
-		goto out;
-	}
+	array = kmap(desc->page);
 
 	if (*desc->dir_cookie == 0)
 		status = nfs_readdir_search_for_pos(array, desc);
@@ -369,8 +331,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
 		desc->current_index += array->size;
 		desc->page_index++;
 	}
-	nfs_readdir_release_array(desc->page);
-out:
+	kunmap(desc->page);
 	return status;
 }
 
@@ -606,13 +567,10 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 
 out_nopages:
 	if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
-		array = nfs_readdir_get_array(page);
-		if (!IS_ERR(array)) {
-			array->eof_index = array->size;
-			status = 0;
-			nfs_readdir_release_array(page);
-		} else
-			status = PTR_ERR(array);
+		array = kmap(page);
+		array->eof_index = array->size;
+		status = 0;
+		kunmap(page);
 	}
 
 	put_page(scratch);
@@ -674,13 +632,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 		goto out;
 	}
 
-	array = nfs_readdir_get_array(page);
-	if (IS_ERR(array)) {
-		status = PTR_ERR(array);
-		goto out_label_free;
-	}
+	array = kmap(page);
 	memset(array, 0, sizeof(struct nfs_cache_array));
-	atomic_set(&array->refcount, 1);
 	array->eof_index = -1;
 
 	status = nfs_readdir_alloc_pages(pages, array_size);
@@ -703,8 +656,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 
 	nfs_readdir_free_pages(pages, array_size);
 out_release_array:
-	nfs_readdir_release_array(page);
-out_label_free:
+	kunmap(page);
 	nfs4_label_free(entry.label);
 out:
 	nfs_free_fattr(entry.fattr);
@@ -743,7 +695,8 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
 static
 void cache_page_release(nfs_readdir_descriptor_t *desc)
 {
-	nfs_readdir_clear_array(desc->page);
+	if (!desc->page->mapping)
+		nfs_readdir_clear_array(desc->page);
 	put_page(desc->page);
 	desc->page = NULL;
 }
@@ -751,16 +704,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
 static
 struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
 {
-	struct page *page;
-
-	for (;;) {
-		page = read_cache_page(desc->file->f_mapping,
+	return read_cache_page(desc->file->f_mapping,
 			desc->page_index, (filler_t *)nfs_readdir_filler, desc);
-		if (IS_ERR(page) || grab_page(page))
-			break;
-		put_page(page);
-	}
-	return page;
 }
 
 /*
@@ -809,12 +754,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
 	struct nfs_cache_array *array = NULL;
 	struct nfs_open_dir_context *ctx = file->private_data;
 
-	array = nfs_readdir_get_array(desc->page);
-	if (IS_ERR(array)) {
-		res = PTR_ERR(array);
-		goto out;
-	}
-
+	array = kmap(desc->page);
 	for (i = desc->cache_entry_index; i < array->size; i++) {
 		struct nfs_cache_array_entry *ent;
 
@@ -835,8 +775,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
 	if (array->eof_index >= 0)
 		desc->eof = 1;
 
-	nfs_readdir_release_array(desc->page);
-out:
+	kunmap(desc->page);
 	cache_page_release(desc);
 	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
 			(unsigned long long)*desc->dir_cookie, res);
@@ -966,11 +905,13 @@ out:
 
 static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
 {
+	struct inode *inode = file_inode(filp);
 	struct nfs_open_dir_context *dir_ctx = filp->private_data;
 
 	dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
 			filp, offset, whence);
 
+	inode_lock(inode);
 	switch (whence) {
 		case 1:
 			offset += filp->f_pos;
@@ -978,13 +919,16 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
 			if (offset >= 0)
 				break;
 		default:
-			return -EINVAL;
+			offset = -EINVAL;
+			goto out;
 	}
 	if (offset != filp->f_pos) {
 		filp->f_pos = offset;
 		dir_ctx->dir_cookie = 0;
 		dir_ctx->duped = 0;
 	}
+out:
+	inode_unlock(inode);
 	return offset;
 }
 
@@ -2002,29 +1946,6 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 }
 EXPORT_SYMBOL_GPL(nfs_link);
 
-static void
-nfs_complete_rename(struct rpc_task *task, struct nfs_renamedata *data)
-{
-	struct dentry *old_dentry = data->old_dentry;
-	struct dentry *new_dentry = data->new_dentry;
-	struct inode *old_inode = d_inode(old_dentry);
-	struct inode *new_inode = d_inode(new_dentry);
-
-	nfs_mark_for_revalidate(old_inode);
-
-	switch (task->tk_status) {
-	case 0:
-		if (new_inode != NULL)
-			nfs_drop_nlink(new_inode);
-		d_move(old_dentry, new_dentry);
-		nfs_set_verifier(new_dentry,
-					nfs_save_change_attribute(data->new_dir));
-		break;
-	case -ENOENT:
-		nfs_dentry_handle_enoent(old_dentry);
-	}
-}
-
 /*
  * RENAME
  * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -2055,7 +1976,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
 	struct inode *old_inode = d_inode(old_dentry);
 	struct inode *new_inode = d_inode(new_dentry);
-	struct dentry *dentry = NULL;
+	struct dentry *dentry = NULL, *rehash = NULL;
 	struct rpc_task *task;
 	int error = -EBUSY;
 
@@ -2078,8 +1999,10 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		 * To prevent any new references to the target during the
 		 * rename, we unhash the dentry in advance.
 		 */
-		if (!d_unhashed(new_dentry))
+		if (!d_unhashed(new_dentry)) {
 			d_drop(new_dentry);
+			rehash = new_dentry;
+		}
 
 		if (d_count(new_dentry) > 2) {
 			int err;
@@ -2096,6 +2019,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 				goto out;
 
 			new_dentry = dentry;
+			rehash = NULL;
 			new_inode = NULL;
 		}
 	}
@@ -2104,8 +2028,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (new_inode != NULL)
 		NFS_PROTO(new_inode)->return_delegation(new_inode);
 
-	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
-					nfs_complete_rename);
+	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
 	if (IS_ERR(task)) {
 		error = PTR_ERR(task);
 		goto out;
@@ -2115,9 +2038,27 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (error == 0)
 		error = task->tk_status;
 	rpc_put_task(task);
+	nfs_mark_for_revalidate(old_inode);
 out:
+	if (rehash)
+		d_rehash(rehash);
 	trace_nfs_rename_exit(old_dir, old_dentry,
 			new_dir, new_dentry, error);
+	if (!error) {
+		if (new_inode != NULL)
+			nfs_drop_nlink(new_inode);
+		/*
+		 * The d_move() should be here instead of in an async RPC completion
+		 * handler because we need the proper locks to move the dentry.  If
+		 * we're interrupted by a signal, the async RPC completion handler
+		 * should mark the directories for revalidation.
+		 */
+		d_move(old_dentry, new_dentry);
+		nfs_set_verifier(new_dentry,
+					nfs_save_change_attribute(new_dir));
+	} else if (error == -ENOENT)
+		nfs_dentry_handle_enoent(old_dentry);
+
 	/* new dentry created? */
 	if (dentry)
 		dput(dentry);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index c1b5fed7c863..6fb9fad2d1e6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -392,16 +392,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 	nfs_direct_req_release(dreq);
 }
 
-static void nfs_direct_readpage_release(struct nfs_page *req)
-{
-	dprintk("NFS: direct read done (%s/%llu %d@%lld)\n",
-		req->wb_context->dentry->d_sb->s_id,
-		(unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)),
-		req->wb_bytes,
-		(long long)req_offset(req));
-	nfs_release_request(req);
-}
-
 static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
 {
 	unsigned long bytes = 0;
@@ -426,7 +416,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
 			set_page_dirty(page);
 		bytes += req->wb_bytes;
 		nfs_list_remove_request(req);
-		nfs_direct_readpage_release(req);
+		nfs_release_request(req);
 	}
 out_put:
 	if (put_dreq(dreq))
@@ -700,16 +690,9 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
 	int status = data->task.tk_status;
 
 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
-	if (status < 0) {
-		dprintk("NFS: %5u commit failed with error %d.\n",
-			data->task.tk_pid, status);
-		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-	} else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
-		dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
+	if (status < 0 || nfs_direct_cmp_commit_data_verf(dreq, data))
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-	}
 
-	dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
 		nfs_list_remove_request(req);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 668213984d68..5713eb32a45e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -482,7 +482,7 @@ static int nfs_launder_page(struct page *page)
 		inode->i_ino, (long long)page_offset(page));
 
 	nfs_fscache_wait_on_page_write(nfsi, page);
-	return nfs_wb_launder_page(inode, page);
+	return nfs_wb_page(inode, page);
 }
 
 static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
@@ -697,14 +697,14 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 	if (!IS_ERR(l_ctx)) {
 		status = nfs_iocounter_wait(l_ctx);
 		nfs_put_lock_context(l_ctx);
-		if (status < 0)
+		/*  NOTE: special case
+		 * 	If we're signalled while cleaning up locks on process exit, we
+		 * 	still need to complete the unlock.
+		 */
+		if (status < 0 && !(fl->fl_flags & FL_CLOSE))
 			return status;
 	}
 
-	/* NOTE: special case
-	 * 	If we're signalled while cleaning up locks on process exit, we
-	 * 	still need to complete the unlock.
-	 */
 	/*
 	 * Use local locking if mounted with "-onolock" or with appropriate
 	 * "-olocal_lock="
@@ -820,9 +820,23 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
 	if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
 		is_local = 1;
 
-	/* We're simulating flock() locks using posix locks on the server */
-	if (fl->fl_type == F_UNLCK)
+	/*
+	 * VFS doesn't require the open mode to match a flock() lock's type.
+	 * NFS, however, may simulate flock() locking with posix locking which
+	 * requires the open mode to match the lock type.
+	 */
+	switch (fl->fl_type) {
+	case F_UNLCK:
 		return do_unlk(filp, cmd, fl, is_local);
+	case F_RDLCK:
+		if (!(filp->f_mode & FMODE_READ))
+			return -EBADF;
+		break;
+	case F_WRLCK:
+		if (!(filp->f_mode & FMODE_WRITE))
+			return -EBADF;
+	}
+
 	return do_setlk(filp, cmd, fl, is_local);
 }
 EXPORT_SYMBOL_GPL(nfs_flock);
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index acd30baca461..1cf85d65b748 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -921,11 +921,11 @@ fl_pnfs_update_layout(struct inode *ino,
 	fl = FILELAYOUT_LSEG(lseg);
 
 	status = filelayout_check_deviceid(lo, fl, gfp_flags);
-	if (status)
+	if (status) {
+		pnfs_put_lseg(lseg);
 		lseg = ERR_PTR(status);
+	}
 out:
-	if (IS_ERR(lseg))
-		pnfs_put_lseg(lseg);
 	return lseg;
 }
 
@@ -933,6 +933,7 @@ static void
 filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
 			struct nfs_page *req)
 {
+	pnfs_generic_pg_check_layout(pgio);
 	if (!pgio->pg_lseg) {
 		pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
 						      req->wb_context,
@@ -959,6 +960,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
 	struct nfs_commit_info cinfo;
 	int status;
 
+	pnfs_generic_pg_check_layout(pgio);
 	if (!pgio->pg_lseg) {
 		pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
 						      req->wb_context,
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 42dedf2d625f..23542dc44a25 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -454,6 +454,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
 			goto out_err_free;
 
 		/* fh */
+		rc = -EIO;
 		p = xdr_inline_decode(&stream, 4);
 		if (!p)
 			goto out_err_free;
@@ -846,6 +847,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
 	int ds_idx;
 
 retry:
+	pnfs_generic_pg_check_layout(pgio);
 	/* Use full layout for now */
 	if (!pgio->pg_lseg)
 		ff_layout_pg_get_read(pgio, req, false);
@@ -894,6 +896,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
 	int status;
 
 retry:
+	pnfs_generic_pg_check_layout(pgio);
 	if (!pgio->pg_lseg) {
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   req->wb_context,
@@ -1800,16 +1803,16 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 
 	ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
 	if (!ds)
-		return PNFS_NOT_ATTEMPTED;
+		goto out_failed;
 
 	ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
 						   hdr->inode);
 	if (IS_ERR(ds_clnt))
-		return PNFS_NOT_ATTEMPTED;
+		goto out_failed;
 
 	ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
 	if (!ds_cred)
-		return PNFS_NOT_ATTEMPTED;
+		goto out_failed;
 
 	vers = nfs4_ff_layout_ds_version(lseg, idx);
 
@@ -1839,6 +1842,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 			  sync, RPC_TASK_SOFTCONN);
 	put_rpccred(ds_cred);
 	return PNFS_ATTEMPTED;
+
+out_failed:
+	if (ff_layout_avoid_mds_available_ds(lseg))
+		return PNFS_TRY_AGAIN;
+	return PNFS_NOT_ATTEMPTED;
 }
 
 static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -2354,10 +2362,21 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
 	return 0;
 }
 
+static int
+ff_layout_set_layoutdriver(struct nfs_server *server,
+		const struct nfs_fh *dummy)
+{
+#if IS_ENABLED(CONFIG_NFS_V4_2)
+	server->caps |= NFS_CAP_LAYOUTSTATS;
+#endif
+	return 0;
+}
+
 static struct pnfs_layoutdriver_type flexfilelayout_type = {
 	.id			= LAYOUT_FLEX_FILES,
 	.name			= "LAYOUT_FLEX_FILES",
 	.owner			= THIS_MODULE,
+	.set_layoutdriver	= ff_layout_set_layoutdriver,
 	.alloc_layout_hdr	= ff_layout_alloc_layout_hdr,
 	.free_layout_hdr	= ff_layout_free_layout_hdr,
 	.alloc_lseg		= ff_layout_alloc_lseg,
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 457cfeb1d5c1..6df7a0cf5660 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -119,7 +119,13 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
 		if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE)
 			ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE;
 
-		if (ds_versions[i].version != 3 || ds_versions[i].minor_version != 0) {
+		/*
+		 * check for valid major/minor combination.
+		 * currently we support dataserver which talk:
+		 *   v3, v4.0, v4.1, v4.2
+		 */
+		if (!((ds_versions[i].version == 3 && ds_versions[i].minor_version == 0) ||
+			(ds_versions[i].version == 4 && ds_versions[i].minor_version < 3))) {
 			dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__,
 				i, ds_versions[i].version,
 				ds_versions[i].minor_version);
@@ -415,7 +421,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
 			     mirror->mirror_ds->ds_versions[0].minor_version);
 
 	/* connect success, check rsize/wsize limit */
-	if (ds->ds_clp) {
+	if (!status) {
 		max_payload =
 			nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient),
 				       NULL);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f489a5a71bd5..1de93ba78dc9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -734,7 +734,10 @@ int nfs_getattr(const struct path *path, struct kstat *stat,
 	if (need_atime || nfs_need_revalidate_inode(inode)) {
 		struct nfs_server *server = NFS_SERVER(inode);
 
-		nfs_readdirplus_parent_cache_miss(path->dentry);
+		if (!(server->flags & NFS_MOUNT_NOAC))
+			nfs_readdirplus_parent_cache_miss(path->dentry);
+		else
+			nfs_readdirplus_parent_cache_hit(path->dentry);
 		err = __nfs_revalidate_inode(server, inode);
 	} else
 		nfs_readdirplus_parent_cache_hit(path->dentry);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7b38fedb7e03..8701d7617964 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -7,6 +7,7 @@
 #include <linux/security.h>
 #include <linux/crc32.h>
 #include <linux/nfs_page.h>
+#include <linux/wait_bit.h>
 
 #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
 
@@ -398,7 +399,6 @@ extern struct file_system_type nfs4_referral_fs_type;
 bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
 struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *,
 			struct nfs_subversion *);
-void nfs_initialise_sb(struct super_block *);
 int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
 int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
 struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *,
@@ -458,7 +458,6 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
 
 /* super.c */
-void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
 void nfs_umount_begin(struct super_block *);
 int  nfs_statfs(struct dentry *, struct kstatfs *);
 int  nfs_show_options(struct seq_file *, struct dentry *);
@@ -495,7 +494,6 @@ void nfs_mark_request_commit(struct nfs_page *req,
 			     u32 ds_commit_idx);
 int nfs_write_need_commit(struct nfs_pgio_header *);
 void nfs_writeback_update_inode(struct nfs_pgio_header *hdr);
-int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf);
 int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
 			    int how, struct nfs_commit_info *cinfo);
 void nfs_retry_commit(struct list_head *page_list,
@@ -756,9 +754,13 @@ static inline bool nfs_error_is_fatal(int err)
 {
 	switch (err) {
 	case -ERESTARTSYS:
+	case -EACCES:
+	case -EDQUOT:
+	case -EFBIG:
 	case -EIO:
 	case -ENOSPC:
 	case -EROFS:
+	case -ESTALE:
 	case -E2BIG:
 		return true;
 	default:
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 786f17580582..e5686be67be8 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -143,11 +143,8 @@ struct vfsmount *nfs_d_automount(struct path *path)
 	struct nfs_fh *fh = NULL;
 	struct nfs_fattr *fattr = NULL;
 
-	dprintk("--> nfs_d_automount()\n");
-
-	mnt = ERR_PTR(-ESTALE);
 	if (IS_ROOT(path->dentry))
-		goto out_nofree;
+		return ERR_PTR(-ESTALE);
 
 	mnt = ERR_PTR(-ENOMEM);
 	fh = nfs_alloc_fhandle();
@@ -155,13 +152,10 @@ struct vfsmount *nfs_d_automount(struct path *path)
 	if (fh == NULL || fattr == NULL)
 		goto out;
 
-	dprintk("%s: enter\n", __func__);
-
 	mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
 	if (IS_ERR(mnt))
 		goto out;
 
-	dprintk("%s: done, success\n", __func__);
 	mntget(mnt); /* prevent immediate expiration */
 	mnt_set_expiry(mnt, &nfs_automount_list);
 	schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
@@ -169,11 +163,6 @@ struct vfsmount *nfs_d_automount(struct path *path)
 out:
 	nfs_free_fattr(fattr);
 	nfs_free_fhandle(fh);
-out_nofree:
-	if (IS_ERR(mnt))
-		dprintk("<-- %s(): error %ld\n", __func__, PTR_ERR(mnt));
-	else
-		dprintk("<-- %s() = %p\n", __func__, mnt);
 	return mnt;
 }
 
@@ -248,27 +237,20 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
 		.fattr = fattr,
 		.authflavor = authflavor,
 	};
-	struct vfsmount *mnt = ERR_PTR(-ENOMEM);
+	struct vfsmount *mnt;
 	char *page = (char *) __get_free_page(GFP_USER);
 	char *devname;
 
-	dprintk("--> nfs_do_submount()\n");
-
-	dprintk("%s: submounting on %pd2\n", __func__,
-			dentry);
 	if (page == NULL)
-		goto out;
+		return ERR_PTR(-ENOMEM);
+
 	devname = nfs_devname(dentry, page, PAGE_SIZE);
-	mnt = (struct vfsmount *)devname;
 	if (IS_ERR(devname))
-		goto free_page;
-	mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
-free_page:
-	free_page((unsigned long)page);
-out:
-	dprintk("%s: done\n", __func__);
+		mnt = ERR_CAST(devname);
+	else
+		mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
 
-	dprintk("<-- nfs_do_submount() = %p\n", mnt);
+	free_page((unsigned long)page);
 	return mnt;
 }
 EXPORT_SYMBOL_GPL(nfs_do_submount);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index dc925b531f32..0c07b567118d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -865,12 +865,63 @@ static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
 }
 
+static void nfs3_nlm_alloc_call(void *data)
+{
+	struct nfs_lock_context *l_ctx = data;
+	if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags)) {
+		get_nfs_open_context(l_ctx->open_context);
+		nfs_get_lock_context(l_ctx->open_context);
+	}
+}
+
+static bool nfs3_nlm_unlock_prepare(struct rpc_task *task, void *data)
+{
+	struct nfs_lock_context *l_ctx = data;
+	if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags))
+		return nfs_async_iocounter_wait(task, l_ctx);
+	return false;
+
+}
+
+static void nfs3_nlm_release_call(void *data)
+{
+	struct nfs_lock_context *l_ctx = data;
+	struct nfs_open_context *ctx;
+	if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags)) {
+		ctx = l_ctx->open_context;
+		nfs_put_lock_context(l_ctx);
+		put_nfs_open_context(ctx);
+	}
+}
+
+const struct nlmclnt_operations nlmclnt_fl_close_lock_ops = {
+	.nlmclnt_alloc_call = nfs3_nlm_alloc_call,
+	.nlmclnt_unlock_prepare = nfs3_nlm_unlock_prepare,
+	.nlmclnt_release_call = nfs3_nlm_release_call,
+};
+
 static int
 nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = file_inode(filp);
+	struct nfs_lock_context *l_ctx = NULL;
+	struct nfs_open_context *ctx = nfs_file_open_context(filp);
+	int status;
 
-	return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
+	if (fl->fl_flags & FL_CLOSE) {
+		l_ctx = nfs_get_lock_context(ctx);
+		if (IS_ERR(l_ctx))
+			l_ctx = NULL;
+		else
+			set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags);
+	}
+
+	status = nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl, l_ctx);
+
+	if (l_ctx)
+		nfs_put_lock_context(l_ctx);
+
+	return status;
 }
 
 static int nfs3_have_delegation(struct inode *inode, fmode_t flags)
@@ -921,6 +972,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.dir_inode_ops	= &nfs3_dir_inode_operations,
 	.file_inode_ops	= &nfs3_file_inode_operations,
 	.file_ops	= &nfs_file_operations,
+	.nlmclnt_ops	= &nlmclnt_fl_close_lock_ops,
 	.getroot	= nfs3_proc_get_root,
 	.submount	= nfs_submount,
 	.try_mount	= nfs_try_mount,
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 1e486c73ec94..319a47db218d 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -167,23 +167,29 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 	if (status)
 		return status;
 
+	res->commit_res.verf = kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS);
+	if (!res->commit_res.verf)
+		return -ENOMEM;
 	status = nfs4_call_sync(server->client, server, &msg,
 				&args->seq_args, &res->seq_res, 0);
 	if (status == -ENOTSUPP)
 		server->caps &= ~NFS_CAP_COPY;
 	if (status)
-		return status;
+		goto out;
 
-	if (res->write_res.verifier.committed != NFS_FILE_SYNC) {
-		status = nfs_commit_file(dst, &res->write_res.verifier.verifier);
-		if (status)
-			return status;
+	if (nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
+				    &res->commit_res.verf->verifier)) {
+		status = -EAGAIN;
+		goto out;
 	}
 
 	truncate_pagecache_range(dst_inode, pos_dst,
 				 pos_dst + res->write_res.count);
 
-	return res->write_res.count;
+	status = res->write_res.count;
+out:
+	kfree(res->commit_res.verf);
+	return status;
 }
 
 ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
@@ -240,6 +246,9 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
 		if (err == -ENOTSUPP) {
 			err = -EOPNOTSUPP;
 			break;
+		} if (err == -EAGAIN) {
+			dst_exception.retry = 1;
+			continue;
 		}
 
 		err2 = nfs4_handle_exception(server, err, &src_exception);
@@ -379,6 +388,7 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
 			pnfs_mark_layout_stateid_invalid(lo, &head);
 			spin_unlock(&inode->i_lock);
 			pnfs_free_lseg_list(&head);
+			nfs_commit_inode(inode, 0);
 		} else
 			spin_unlock(&inode->i_lock);
 		break;
@@ -400,8 +410,6 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
 	case -EOPNOTSUPP:
 		NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
 	}
-
-	dprintk("%s server returns %d\n", __func__, task->tk_status);
 }
 
 static void
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 6c7296454bbc..528362f69cc1 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -66,12 +66,14 @@
 					 encode_putfh_maxsz + \
 					 encode_savefh_maxsz + \
 					 encode_putfh_maxsz + \
-					 encode_copy_maxsz)
+					 encode_copy_maxsz + \
+					 encode_commit_maxsz)
 #define NFS4_dec_copy_sz		(compound_decode_hdr_maxsz + \
 					 decode_putfh_maxsz + \
 					 decode_savefh_maxsz + \
 					 decode_putfh_maxsz + \
-					 decode_copy_maxsz)
+					 decode_copy_maxsz + \
+					 decode_commit_maxsz)
 #define NFS4_enc_deallocate_sz		(compound_encode_hdr_maxsz + \
 					 encode_putfh_maxsz + \
 					 encode_deallocate_maxsz + \
@@ -222,6 +224,18 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
 	encode_nops(&hdr);
 }
 
+static void encode_copy_commit(struct xdr_stream *xdr,
+			  struct nfs42_copy_args *args,
+			  struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr);
+	p = reserve_space(xdr, 12);
+	p = xdr_encode_hyper(p, args->dst_pos);
+	*p = cpu_to_be32(args->count);
+}
+
 /*
  * Encode COPY request
  */
@@ -239,6 +253,7 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
 	encode_savefh(xdr, &hdr);
 	encode_putfh(xdr, args->dst_fh, &hdr);
 	encode_copy(xdr, args, &hdr);
+	encode_copy_commit(xdr, args, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -481,6 +496,9 @@ static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
 	if (status)
 		goto out;
 	status = decode_copy(xdr, res);
+	if (status)
+		goto out;
+	status = decode_commit(xdr, &res->commit_res);
 out:
 	return status;
 }
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8346ccbf2d52..66776f022111 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -359,11 +359,9 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 	struct nfs_client *old;
 	int error;
 
-	if (clp->cl_cons_state == NFS_CS_READY) {
+	if (clp->cl_cons_state == NFS_CS_READY)
 		/* the client is initialised already */
-		dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
 		return clp;
-	}
 
 	/* Check NFS protocol revision and initialize RPC op vector */
 	clp->rpc_ops = &nfs_v4_clientops;
@@ -421,7 +419,6 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 error:
 	nfs_mark_client_ready(clp, error);
 	nfs_put_client(clp);
-	dprintk("<-- nfs4_init_client() = xerror %d\n", error);
 	return ERR_PTR(error);
 }
 
@@ -469,6 +466,50 @@ static bool nfs4_same_verifier(nfs4_verifier *v1, nfs4_verifier *v2)
 	return memcmp(v1->data, v2->data, sizeof(v1->data)) == 0;
 }
 
+static int nfs4_match_client(struct nfs_client  *pos,  struct nfs_client *new,
+			     struct nfs_client **prev, struct nfs_net *nn)
+{
+	int status;
+
+	if (pos->rpc_ops != new->rpc_ops)
+		return 1;
+
+	if (pos->cl_minorversion != new->cl_minorversion)
+		return 1;
+
+	/* If "pos" isn't marked ready, we can't trust the
+	 * remaining fields in "pos", especially the client
+	 * ID and serverowner fields.  Wait for CREATE_SESSION
+	 * to finish. */
+	if (pos->cl_cons_state > NFS_CS_READY) {
+		atomic_inc(&pos->cl_count);
+		spin_unlock(&nn->nfs_client_lock);
+
+		nfs_put_client(*prev);
+		*prev = pos;
+
+		status = nfs_wait_client_init_complete(pos);
+		spin_lock(&nn->nfs_client_lock);
+
+		if (status < 0)
+			return status;
+	}
+
+	if (pos->cl_cons_state != NFS_CS_READY)
+		return 1;
+
+	if (pos->cl_clientid != new->cl_clientid)
+		return 1;
+
+	/* NFSv4.1 always uses the uniform string, however someone
+	 * might switch the uniquifier string on us.
+	 */
+	if (!nfs4_match_client_owner_id(pos, new))
+		return 1;
+
+	return 0;
+}
+
 /**
  * nfs40_walk_client_list - Find server that recognizes a client ID
  *
@@ -497,34 +538,10 @@ int nfs40_walk_client_list(struct nfs_client *new,
 	spin_lock(&nn->nfs_client_lock);
 	list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
 
-		if (pos->rpc_ops != new->rpc_ops)
-			continue;
-
-		if (pos->cl_minorversion != new->cl_minorversion)
-			continue;
-
-		/* If "pos" isn't marked ready, we can't trust the
-		 * remaining fields in "pos" */
-		if (pos->cl_cons_state > NFS_CS_READY) {
-			atomic_inc(&pos->cl_count);
-			spin_unlock(&nn->nfs_client_lock);
-
-			nfs_put_client(prev);
-			prev = pos;
-
-			status = nfs_wait_client_init_complete(pos);
-			if (status < 0)
-				goto out;
-			status = -NFS4ERR_STALE_CLIENTID;
-			spin_lock(&nn->nfs_client_lock);
-		}
-		if (pos->cl_cons_state != NFS_CS_READY)
-			continue;
-
-		if (pos->cl_clientid != new->cl_clientid)
-			continue;
-
-		if (!nfs4_match_client_owner_id(pos, new))
+		status = nfs4_match_client(pos, new, &prev, nn);
+		if (status < 0)
+			goto out_unlock;
+		if (status != 0)
 			continue;
 		/*
 		 * We just sent a new SETCLIENTID, which should have
@@ -557,8 +574,6 @@ int nfs40_walk_client_list(struct nfs_client *new,
 
 			prev = NULL;
 			*result = pos;
-			dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
-				__func__, pos, atomic_read(&pos->cl_count));
 			goto out;
 		case -ERESTARTSYS:
 		case -ETIMEDOUT:
@@ -572,32 +587,17 @@ int nfs40_walk_client_list(struct nfs_client *new,
 
 		spin_lock(&nn->nfs_client_lock);
 	}
+out_unlock:
 	spin_unlock(&nn->nfs_client_lock);
 
 	/* No match found. The server lost our clientid */
 out:
 	nfs_put_client(prev);
-	dprintk("NFS: <-- %s status = %d\n", __func__, status);
 	return status;
 }
 
 #ifdef CONFIG_NFS_V4_1
 /*
- * Returns true if the client IDs match
- */
-static bool nfs4_match_clientids(u64 a, u64 b)
-{
-	if (a != b) {
-		dprintk("NFS: --> %s client ID %llx does not match %llx\n",
-			__func__, a, b);
-		return false;
-	}
-	dprintk("NFS: --> %s client ID %llx matches %llx\n",
-		__func__, a, b);
-	return true;
-}
-
-/*
  * Returns true if the server major ids match
  */
 static bool
@@ -605,36 +605,8 @@ nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1,
 				struct nfs41_server_owner *o2)
 {
 	if (o1->major_id_sz != o2->major_id_sz)
-		goto out_major_mismatch;
-	if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0)
-		goto out_major_mismatch;
-
-	dprintk("NFS: --> %s server owner major IDs match\n", __func__);
-	return true;
-
-out_major_mismatch:
-	dprintk("NFS: --> %s server owner major IDs do not match\n",
-		__func__);
-	return false;
-}
-
-/*
- * Returns true if server minor ids match
- */
-static bool
-nfs4_check_serverowner_minor_id(struct nfs41_server_owner *o1,
-				struct nfs41_server_owner *o2)
-{
-	/* Check eir_server_owner so_minor_id */
-	if (o1->minor_id != o2->minor_id)
-		goto out_minor_mismatch;
-
-	dprintk("NFS: --> %s server owner minor IDs match\n", __func__);
-	return true;
-
-out_minor_mismatch:
-	dprintk("NFS: --> %s server owner minor IDs do not match\n", __func__);
-	return false;
+		return false;
+	return memcmp(o1->major_id, o2->major_id, o1->major_id_sz) == 0;
 }
 
 /*
@@ -645,18 +617,9 @@ nfs4_check_server_scope(struct nfs41_server_scope *s1,
 			struct nfs41_server_scope *s2)
 {
 	if (s1->server_scope_sz != s2->server_scope_sz)
-		goto out_scope_mismatch;
-	if (memcmp(s1->server_scope, s2->server_scope,
-		   s1->server_scope_sz) != 0)
-		goto out_scope_mismatch;
-
-	dprintk("NFS: --> %s server scopes match\n", __func__);
-	return true;
-
-out_scope_mismatch:
-	dprintk("NFS: --> %s server scopes do not match\n",
-		__func__);
-	return false;
+		return false;
+	return memcmp(s1->server_scope, s2->server_scope,
+					s1->server_scope_sz) == 0;
 }
 
 /**
@@ -680,7 +643,7 @@ int nfs4_detect_session_trunking(struct nfs_client *clp,
 				 struct rpc_xprt *xprt)
 {
 	/* Check eir_clientid */
-	if (!nfs4_match_clientids(clp->cl_clientid, res->clientid))
+	if (clp->cl_clientid != res->clientid)
 		goto out_err;
 
 	/* Check eir_server_owner so_major_id */
@@ -689,8 +652,7 @@ int nfs4_detect_session_trunking(struct nfs_client *clp,
 		goto out_err;
 
 	/* Check eir_server_owner so_minor_id */
-	if (!nfs4_check_serverowner_minor_id(clp->cl_serverowner,
-					     res->server_owner))
+	if (clp->cl_serverowner->minor_id != res->server_owner->minor_id)
 		goto out_err;
 
 	/* Check eir_server_scope */
@@ -739,33 +701,10 @@ int nfs41_walk_client_list(struct nfs_client *new,
 		if (pos == new)
 			goto found;
 
-		if (pos->rpc_ops != new->rpc_ops)
-			continue;
-
-		if (pos->cl_minorversion != new->cl_minorversion)
-			continue;
-
-		/* If "pos" isn't marked ready, we can't trust the
-		 * remaining fields in "pos", especially the client
-		 * ID and serverowner fields.  Wait for CREATE_SESSION
-		 * to finish. */
-		if (pos->cl_cons_state > NFS_CS_READY) {
-			atomic_inc(&pos->cl_count);
-			spin_unlock(&nn->nfs_client_lock);
-
-			nfs_put_client(prev);
-			prev = pos;
-
-			status = nfs_wait_client_init_complete(pos);
-			spin_lock(&nn->nfs_client_lock);
-			if (status < 0)
-				break;
-			status = -NFS4ERR_STALE_CLIENTID;
-		}
-		if (pos->cl_cons_state != NFS_CS_READY)
-			continue;
-
-		if (!nfs4_match_clientids(pos->cl_clientid, new->cl_clientid))
+		status = nfs4_match_client(pos, new, &prev, nn);
+		if (status < 0)
+			goto out;
+		if (status != 0)
 			continue;
 
 		/*
@@ -777,23 +716,15 @@ int nfs41_walk_client_list(struct nfs_client *new,
 						     new->cl_serverowner))
 			continue;
 
-		/* Unlike NFSv4.0, we know that NFSv4.1 always uses the
-		 * uniform string, however someone might switch the
-		 * uniquifier string on us.
-		 */
-		if (!nfs4_match_client_owner_id(pos, new))
-			continue;
 found:
 		atomic_inc(&pos->cl_count);
 		*result = pos;
 		status = 0;
-		dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
-			__func__, pos, atomic_read(&pos->cl_count));
 		break;
 	}
 
+out:
 	spin_unlock(&nn->nfs_client_lock);
-	dprintk("NFS: <-- %s status = %d\n", __func__, status);
 	nfs_put_client(prev);
 	return status;
 }
@@ -916,9 +847,6 @@ static int nfs4_set_client(struct nfs_server *server,
 		.timeparms = timeparms,
 	};
 	struct nfs_client *clp;
-	int error;
-
-	dprintk("--> nfs4_set_client()\n");
 
 	if (server->flags & NFS_MOUNT_NORESVPORT)
 		set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
@@ -927,15 +855,11 @@ static int nfs4_set_client(struct nfs_server *server,
 
 	/* Allocate or find a client reference we can use */
 	clp = nfs_get_client(&cl_init);
-	if (IS_ERR(clp)) {
-		error = PTR_ERR(clp);
-		goto error;
-	}
+	if (IS_ERR(clp))
+		return PTR_ERR(clp);
 
-	if (server->nfs_client == clp) {
-		error = -ELOOP;
-		goto error;
-	}
+	if (server->nfs_client == clp)
+		return -ELOOP;
 
 	/*
 	 * Query for the lease time on clientid setup or renewal
@@ -947,11 +871,7 @@ static int nfs4_set_client(struct nfs_server *server,
 	set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
 
 	server->nfs_client = clp;
-	dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
 	return 0;
-error:
-	dprintk("<-- nfs4_set_client() = xerror %d\n", error);
-	return error;
 }
 
 /*
@@ -982,7 +902,6 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
 		.net = mds_clp->cl_net,
 		.timeparms = &ds_timeout,
 	};
-	struct nfs_client *clp;
 	char buf[INET6_ADDRSTRLEN + 1];
 
 	if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
@@ -998,10 +917,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
 	 * (section 13.1 RFC 5661).
 	 */
 	nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
-	clp = nfs_get_client(&cl_init);
-
-	dprintk("<-- %s %p\n", __func__, clp);
-	return clp;
+	return nfs_get_client(&cl_init);
 }
 EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
 
@@ -1098,8 +1014,6 @@ static int nfs4_init_server(struct nfs_server *server,
 	struct rpc_timeout timeparms;
 	int error;
 
-	dprintk("--> nfs4_init_server()\n");
-
 	nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
 			data->timeo, data->retrans);
 
@@ -1127,7 +1041,7 @@ static int nfs4_init_server(struct nfs_server *server,
 			data->minorversion,
 			data->net);
 	if (error < 0)
-		goto error;
+		return error;
 
 	if (data->rsize)
 		server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1138,16 +1052,10 @@ static int nfs4_init_server(struct nfs_server *server,
 	server->acregmax = data->acregmax * HZ;
 	server->acdirmin = data->acdirmin * HZ;
 	server->acdirmax = data->acdirmax * HZ;
+	server->port     = data->nfs_server.port;
 
-	server->port = data->nfs_server.port;
-
-	error = nfs_init_server_rpcclient(server, &timeparms,
-					  data->selected_flavor);
-
-error:
-	/* Done */
-	dprintk("<-- nfs4_init_server() = %d\n", error);
-	return error;
+	return nfs_init_server_rpcclient(server, &timeparms,
+					 data->selected_flavor);
 }
 
 /*
@@ -1163,8 +1071,6 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
 	bool auth_probe;
 	int error;
 
-	dprintk("--> nfs4_create_server()\n");
-
 	server = nfs_alloc_server();
 	if (!server)
 		return ERR_PTR(-ENOMEM);
@@ -1180,12 +1086,10 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
 	if (error < 0)
 		goto error;
 
-	dprintk("<-- nfs4_create_server() = %p\n", server);
 	return server;
 
 error:
 	nfs_free_server(server);
-	dprintk("<-- nfs4_create_server() = error %d\n", error);
 	return ERR_PTR(error);
 }
 
@@ -1200,8 +1104,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 	bool auth_probe;
 	int error;
 
-	dprintk("--> nfs4_create_referral_server()\n");
-
 	server = nfs_alloc_server();
 	if (!server)
 		return ERR_PTR(-ENOMEM);
@@ -1235,12 +1137,10 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 	if (error < 0)
 		goto error;
 
-	dprintk("<-- nfs_create_referral_server() = %p\n", server);
 	return server;
 
 error:
 	nfs_free_server(server);
-	dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
 	return ERR_PTR(error);
 }
 
@@ -1300,31 +1200,16 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
 	struct sockaddr *localaddr = (struct sockaddr *)&address;
 	int error;
 
-	dprintk("--> %s: move FSID %llx:%llx to \"%s\")\n", __func__,
-			(unsigned long long)server->fsid.major,
-			(unsigned long long)server->fsid.minor,
-			hostname);
-
 	error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout);
-	if (error != 0) {
-		dprintk("<-- %s(): rpc_switch_client_transport returned %d\n",
-			__func__, error);
-		goto out;
-	}
+	if (error != 0)
+		return error;
 
 	error = rpc_localaddr(clnt, localaddr, sizeof(address));
-	if (error != 0) {
-		dprintk("<-- %s(): rpc_localaddr returned %d\n",
-			__func__, error);
-		goto out;
-	}
+	if (error != 0)
+		return error;
 
-	error = -EAFNOSUPPORT;
-	if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0) {
-		dprintk("<-- %s(): rpc_ntop returned %d\n",
-			__func__, error);
-		goto out;
-	}
+	if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0)
+		return -EAFNOSUPPORT;
 
 	nfs_server_remove_lists(server);
 	error = nfs4_set_client(server, hostname, sap, salen, buf,
@@ -1333,21 +1218,12 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
 	nfs_put_client(clp);
 	if (error != 0) {
 		nfs_server_insert_lists(server);
-		dprintk("<-- %s(): nfs4_set_client returned %d\n",
-			__func__, error);
-		goto out;
+		return error;
 	}
 
 	if (server->nfs_client->cl_hostname == NULL)
 		server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
 	nfs_server_insert_lists(server);
 
-	error = nfs_probe_destination(server);
-	if (error < 0)
-		goto out;
-
-	dprintk("<-- %s() succeeded\n", __func__);
-
-out:
-	return error;
+	return nfs_probe_destination(server);
 }
diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c
index 039b3eb6d834..ac8406018962 100644
--- a/fs/nfs/nfs4getroot.c
+++ b/fs/nfs/nfs4getroot.c
@@ -14,8 +14,6 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p
 	struct nfs_fsinfo fsinfo;
 	int ret = -ENOMEM;
 
-	dprintk("--> nfs4_get_rootfh()\n");
-
 	fsinfo.fattr = nfs_alloc_fattr();
 	if (fsinfo.fattr == NULL)
 		goto out;
@@ -38,6 +36,5 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p
 	memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
 out:
 	nfs_free_fattr(fsinfo.fattr);
-	dprintk("<-- nfs4_get_rootfh() = %d\n", ret);
 	return ret;
 }
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index d8b040bd9814..7d531da1bae3 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -340,7 +340,6 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
 out:
 	free_page((unsigned long) page);
 	free_page((unsigned long) page2);
-	dprintk("%s: done\n", __func__);
 	return mnt;
 }
 
@@ -358,11 +357,9 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
 	int err;
 
 	/* BUG_ON(IS_ROOT(dentry)); */
-	dprintk("%s: enter\n", __func__);
-
 	page = alloc_page(GFP_KERNEL);
 	if (page == NULL)
-		goto out;
+		return mnt;
 
 	fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
 	if (fs_locations == NULL)
@@ -386,8 +383,6 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
 out_free:
 	__free_page(page);
 	kfree(fs_locations);
-out:
-	dprintk("%s: done\n", __func__);
 	return mnt;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 201ca3f2c4ba..98b0b662af09 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -698,7 +698,8 @@ static int nfs41_sequence_process(struct rpc_task *task,
 	session = slot->table->session;
 
 	if (slot->interrupted) {
-		slot->interrupted = 0;
+		if (res->sr_status != -NFS4ERR_DELAY)
+			slot->interrupted = 0;
 		interrupted = true;
 	}
 
@@ -2300,8 +2301,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 		if (status != 0)
 			return status;
 	}
-	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
+	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) {
+		nfs4_sequence_free_slot(&o_res->seq_res);
 		nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label);
+	}
 	return 0;
 }
 
@@ -2586,7 +2589,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
 
 	/* Except MODE, it seems harmless of setting twice. */
 	if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE &&
-		attrset[1] & FATTR4_WORD1_MODE)
+		(attrset[1] & FATTR4_WORD1_MODE ||
+		 attrset[2] & FATTR4_WORD2_MODE_UMASK))
 		sattr->ia_valid &= ~ATTR_MODE;
 
 	if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)
@@ -3265,6 +3269,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 		.rpc_resp = &res,
 	};
 	int status;
+	int i;
 
 	bitmask[0] = FATTR4_WORD0_SUPPORTED_ATTRS |
 		     FATTR4_WORD0_FH_EXPIRE_TYPE |
@@ -3330,8 +3335,13 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 		server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
 		server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
 		server->cache_consistency_bitmask[2] = 0;
+
+		/* Avoid a regression due to buggy server */
+		for (i = 0; i < ARRAY_SIZE(res.exclcreat_bitmask); i++)
+			res.exclcreat_bitmask[i] &= res.attr_bitmask[i];
 		memcpy(server->exclcreat_bitmask, res.exclcreat_bitmask,
 			sizeof(server->exclcreat_bitmask));
+
 		server->acl_bitmask = res.acl_bitmask;
 		server->fh_expire_type = res.fh_expire_type;
 	}
@@ -4610,7 +4620,7 @@ static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
 		return 0;
 	if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
 				hdr->args.lock_context,
-				hdr->rw_ops->rw_mode) == -EIO)
+				hdr->rw_mode) == -EIO)
 		return -EIO;
 	if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags)))
 		return -EIO;
@@ -4804,8 +4814,10 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred,
 	if (!atomic_inc_not_zero(&clp->cl_count))
 		return -EIO;
 	data = kmalloc(sizeof(*data), GFP_NOFS);
-	if (data == NULL)
+	if (data == NULL) {
+		nfs_put_client(clp);
 		return -ENOMEM;
+	}
 	data->client = clp;
 	data->timestamp = jiffies;
 	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT,
@@ -5782,6 +5794,7 @@ struct nfs4_unlockdata {
 	struct nfs_locku_res res;
 	struct nfs4_lock_state *lsp;
 	struct nfs_open_context *ctx;
+	struct nfs_lock_context *l_ctx;
 	struct file_lock fl;
 	struct nfs_server *server;
 	unsigned long timestamp;
@@ -5806,6 +5819,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
 	atomic_inc(&lsp->ls_count);
 	/* Ensure we don't close file until we're done freeing locks! */
 	p->ctx = get_nfs_open_context(ctx);
+	p->l_ctx = nfs_get_lock_context(ctx);
 	memcpy(&p->fl, fl, sizeof(p->fl));
 	p->server = NFS_SERVER(inode);
 	return p;
@@ -5816,6 +5830,7 @@ static void nfs4_locku_release_calldata(void *data)
 	struct nfs4_unlockdata *calldata = data;
 	nfs_free_seqid(calldata->arg.seqid);
 	nfs4_put_lock_state(calldata->lsp);
+	nfs_put_lock_context(calldata->l_ctx);
 	put_nfs_open_context(calldata->ctx);
 	kfree(calldata);
 }
@@ -5857,6 +5872,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
 {
 	struct nfs4_unlockdata *calldata = data;
 
+	if (test_bit(NFS_CONTEXT_UNLOCK, &calldata->l_ctx->open_context->flags) &&
+		nfs_async_iocounter_wait(task, calldata->l_ctx))
+		return;
+
 	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		goto out_wait;
 	nfs4_stateid_copy(&calldata->arg.stateid, &calldata->lsp->ls_stateid);
@@ -5908,6 +5927,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 	 * canceled lock is passed in, and it won't be an unlock.
 	 */
 	fl->fl_type = F_UNLCK;
+	if (fl->fl_flags & FL_CLOSE)
+		set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags);
 
 	data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
 	if (data == NULL) {
@@ -6352,7 +6373,7 @@ struct nfs4_lock_waiter {
 };
 
 static int
-nfs4_wake_lock_waiter(wait_queue_t *wait, unsigned int mode, int flags, void *key)
+nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key)
 {
 	int ret;
 	struct cb_notify_lock_args *cbnl = key;
@@ -6395,7 +6416,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 					   .inode = state->inode,
 					   .owner = &owner,
 					   .notified = false };
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	/* Don't bother with waitqueue if we don't expect a callback */
 	if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags))
@@ -6445,9 +6466,6 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
 	ctx = nfs_file_open_context(filp);
 	state = ctx->state;
 
-	if (request->fl_start < 0 || request->fl_end < 0)
-		return -EINVAL;
-
 	if (IS_GETLK(cmd)) {
 		if (state != NULL)
 			return nfs4_proc_getlk(state, F_GETLK, request);
@@ -6470,20 +6488,6 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
 	    !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags))
 		return -ENOLCK;
 
-	/*
-	 * Don't rely on the VFS having checked the file open mode,
-	 * since it won't do this for flock() locks.
-	 */
-	switch (request->fl_type) {
-	case F_RDLCK:
-		if (!(filp->f_mode & FMODE_READ))
-			return -EBADF;
-		break;
-	case F_WRLCK:
-		if (!(filp->f_mode & FMODE_WRITE))
-			return -EBADF;
-	}
-
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
 		return status;
@@ -7155,8 +7159,6 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt,
 	};
 	struct rpc_task *task;
 
-	dprintk("--> %s\n", __func__);
-
 	nfs4_copy_sessionid(&args.sessionid, &clp->cl_session->sess_id);
 	if (!(clp->cl_session->flags & SESSION4_BACK_CHAN))
 		args.dir = NFS4_CDFC4_FORE;
@@ -7176,24 +7178,20 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt,
 		if (memcmp(res.sessionid.data,
 		    clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) {
 			dprintk("NFS: %s: Session ID mismatch\n", __func__);
-			status = -EIO;
-			goto out;
+			return -EIO;
 		}
 		if ((res.dir & args.dir) != res.dir || res.dir == 0) {
 			dprintk("NFS: %s: Unexpected direction from server\n",
 				__func__);
-			status = -EIO;
-			goto out;
+			return -EIO;
 		}
 		if (res.use_conn_in_rdma_mode != args.use_conn_in_rdma_mode) {
 			dprintk("NFS: %s: Server returned RDMA mode = true\n",
 				__func__);
-			status = -EIO;
-			goto out;
+			return -EIO;
 		}
 	}
-out:
-	dprintk("<-- %s status= %d\n", __func__, status);
+
 	return status;
 }
 
@@ -7459,15 +7457,16 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 	};
 	struct nfs41_exchange_id_data *calldata;
 	struct rpc_task *task;
-	int status = -EIO;
+	int status;
 
 	if (!atomic_inc_not_zero(&clp->cl_count))
-		goto out;
+		return -EIO;
 
-	status = -ENOMEM;
 	calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
-	if (!calldata)
-		goto out;
+	if (!calldata) {
+		nfs_put_client(clp);
+		return -ENOMEM;
+	}
 
 	if (!xprt)
 		nfs4_init_boot_verifier(clp, &verifier);
@@ -7476,10 +7475,6 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 	if (status)
 		goto out_calldata;
 
-	dprintk("NFS call  exchange_id auth=%s, '%s'\n",
-		clp->cl_rpcclient->cl_auth->au_ops->au_name,
-		clp->cl_owner_id);
-
 	calldata->res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
 						GFP_NOFS);
 	status = -ENOMEM;
@@ -7545,13 +7540,6 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 
 	rpc_put_task(task);
 out:
-	if (clp->cl_implid != NULL)
-		dprintk("NFS reply exchange_id: Server Implementation ID: "
-			"domain: %s, name: %s, date: %llu,%u\n",
-			clp->cl_implid->domain, clp->cl_implid->name,
-			clp->cl_implid->date.seconds,
-			clp->cl_implid->date.nseconds);
-	dprintk("NFS reply exchange_id: %d\n", status);
 	return status;
 
 out_impl_id:
@@ -7769,17 +7757,13 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
 
 	nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
 	nfs4_set_sequence_privileged(&args.la_seq_args);
-	dprintk("--> %s\n", __func__);
 	task = rpc_run_task(&task_setup);
 
 	if (IS_ERR(task))
-		status = PTR_ERR(task);
-	else {
-		status = task->tk_status;
-		rpc_put_task(task);
-	}
-	dprintk("<-- %s return %d\n", __func__, status);
+		return PTR_ERR(task);
 
+	status = task->tk_status;
+	rpc_put_task(task);
 	return status;
 }
 
@@ -8180,6 +8164,12 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
 		/* fall through */
 	case -NFS4ERR_RETRY_UNCACHED_REP:
 		return -EAGAIN;
+	case -NFS4ERR_BADSESSION:
+	case -NFS4ERR_DEADSESSION:
+	case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+		nfs4_schedule_session_recovery(clp->cl_session,
+				task->tk_status);
+		break;
 	default:
 		nfs4_schedule_lease_recovery(clp);
 	}
@@ -8258,7 +8248,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
 	if (status == 0)
 		status = task->tk_status;
 	rpc_put_task(task);
-	return 0;
 out:
 	dprintk("<-- %s status=%d\n", __func__, status);
 	return status;
@@ -8357,6 +8346,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 		 */
 		pnfs_mark_layout_stateid_invalid(lo, &head);
 		spin_unlock(&inode->i_lock);
+		nfs_commit_inode(inode, 0);
 		pnfs_free_lseg_list(&head);
 		status = -EAGAIN;
 		goto out;
@@ -8427,6 +8417,7 @@ static void nfs4_layoutget_release(void *calldata)
 	size_t max_pages = max_response_pages(server);
 
 	dprintk("--> %s\n", __func__);
+	nfs4_sequence_free_slot(&lgp->res.seq_res);
 	nfs4_free_pages(lgp->args.layout.pages, max_pages);
 	pnfs_put_layout_hdr(NFS_I(inode)->layout);
 	put_nfs_open_context(lgp->args.ctx);
@@ -8501,7 +8492,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
 	/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
 	if (status == 0 && lgp->res.layoutp->len)
 		lseg = pnfs_layout_process(lgp);
-	nfs4_sequence_free_slot(&lgp->res.seq_res);
 	rpc_put_task(task);
 	dprintk("<-- %s status=%d\n", __func__, status);
 	if (status)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8156bad6b441..cbf82b0d4467 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1649,13 +1649,14 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
 	nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
 }
 
-static void nfs4_reclaim_complete(struct nfs_client *clp,
+static int nfs4_reclaim_complete(struct nfs_client *clp,
 				 const struct nfs4_state_recovery_ops *ops,
 				 struct rpc_cred *cred)
 {
 	/* Notify the server we're done reclaiming our state */
 	if (ops->reclaim_complete)
-		(void)ops->reclaim_complete(clp, cred);
+		return ops->reclaim_complete(clp, cred);
+	return 0;
 }
 
 static void nfs4_clear_reclaim_server(struct nfs_server *server)
@@ -1702,13 +1703,16 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
 {
 	const struct nfs4_state_recovery_ops *ops;
 	struct rpc_cred *cred;
+	int err;
 
 	if (!nfs4_state_clear_reclaim_reboot(clp))
 		return;
 	ops = clp->cl_mvops->reboot_recovery_ops;
 	cred = nfs4_get_clid_cred(clp);
-	nfs4_reclaim_complete(clp, ops, cred);
+	err = nfs4_reclaim_complete(clp, ops, cred);
 	put_rpccred(cred);
+	if (err == -NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
+		set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
 }
 
 static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
@@ -2130,6 +2134,8 @@ again:
 	put_rpccred(cred);
 	switch (status) {
 	case 0:
+	case -EINTR:
+	case -ERESTARTSYS:
 		break;
 	case -ETIMEDOUT:
 		if (clnt->cl_softrtry)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 80ce289eea05..3aebfdc82b30 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1000,8 +1000,9 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve
 
 static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 				const struct nfs4_label *label,
+				const umode_t *umask,
 				const struct nfs_server *server,
-				bool excl_check, const umode_t *umask)
+				const uint32_t attrmask[])
 {
 	char owner_name[IDMAP_NAMESZ];
 	char owner_group[IDMAP_NAMESZ];
@@ -1016,22 +1017,20 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 	/*
 	 * We reserve enough space to write the entire attribute buffer at once.
 	 */
-	if (iap->ia_valid & ATTR_SIZE) {
+	if ((iap->ia_valid & ATTR_SIZE) && (attrmask[0] & FATTR4_WORD0_SIZE)) {
 		bmval[0] |= FATTR4_WORD0_SIZE;
 		len += 8;
 	}
-	if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
-		umask = NULL;
 	if (iap->ia_valid & ATTR_MODE) {
-		if (umask) {
+		if (umask && (attrmask[2] & FATTR4_WORD2_MODE_UMASK)) {
 			bmval[2] |= FATTR4_WORD2_MODE_UMASK;
 			len += 8;
-		} else {
+		} else if (attrmask[1] & FATTR4_WORD1_MODE) {
 			bmval[1] |= FATTR4_WORD1_MODE;
 			len += 4;
 		}
 	}
-	if (iap->ia_valid & ATTR_UID) {
+	if ((iap->ia_valid & ATTR_UID) && (attrmask[1] & FATTR4_WORD1_OWNER)) {
 		owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
 		if (owner_namelen < 0) {
 			dprintk("nfs: couldn't resolve uid %d to string\n",
@@ -1044,7 +1043,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 		bmval[1] |= FATTR4_WORD1_OWNER;
 		len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
 	}
-	if (iap->ia_valid & ATTR_GID) {
+	if ((iap->ia_valid & ATTR_GID) &&
+	   (attrmask[1] & FATTR4_WORD1_OWNER_GROUP)) {
 		owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ);
 		if (owner_grouplen < 0) {
 			dprintk("nfs: couldn't resolve gid %d to string\n",
@@ -1056,32 +1056,26 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 		bmval[1] |= FATTR4_WORD1_OWNER_GROUP;
 		len += 4 + (XDR_QUADLEN(owner_grouplen) << 2);
 	}
-	if (iap->ia_valid & ATTR_ATIME_SET) {
-		bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
-		len += 16;
-	} else if (iap->ia_valid & ATTR_ATIME) {
-		bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
-		len += 4;
-	}
-	if (iap->ia_valid & ATTR_MTIME_SET) {
-		bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
-		len += 16;
-	} else if (iap->ia_valid & ATTR_MTIME) {
-		bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
-		len += 4;
+	if (attrmask[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
+		if (iap->ia_valid & ATTR_ATIME_SET) {
+			bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
+			len += 16;
+		} else if (iap->ia_valid & ATTR_ATIME) {
+			bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
+			len += 4;
+		}
 	}
-
-	if (excl_check) {
-		const u32 *excl_bmval = server->exclcreat_bitmask;
-		bmval[0] &= excl_bmval[0];
-		bmval[1] &= excl_bmval[1];
-		bmval[2] &= excl_bmval[2];
-
-		if (!(excl_bmval[2] & FATTR4_WORD2_SECURITY_LABEL))
-			label = NULL;
+	if (attrmask[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
+		if (iap->ia_valid & ATTR_MTIME_SET) {
+			bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
+			len += 16;
+		} else if (iap->ia_valid & ATTR_MTIME) {
+			bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
+			len += 4;
+		}
 	}
 
-	if (label) {
+	if (label && (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL)) {
 		len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2);
 		bmval[2] |= FATTR4_WORD2_SECURITY_LABEL;
 	}
@@ -1188,8 +1182,8 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
 	}
 
 	encode_string(xdr, create->name->len, create->name->name);
-	encode_attrs(xdr, create->attrs, create->label, create->server, false,
-		     &create->umask);
+	encode_attrs(xdr, create->attrs, create->label, &create->umask,
+			create->server, create->server->attr_bitmask);
 }
 
 static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr)
@@ -1409,13 +1403,13 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
 	switch(arg->createmode) {
 	case NFS4_CREATE_UNCHECKED:
 		*p = cpu_to_be32(NFS4_CREATE_UNCHECKED);
-		encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false,
-			     &arg->umask);
+		encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask,
+				arg->server, arg->server->attr_bitmask);
 		break;
 	case NFS4_CREATE_GUARDED:
 		*p = cpu_to_be32(NFS4_CREATE_GUARDED);
-		encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false,
-			     &arg->umask);
+		encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask,
+				arg->server, arg->server->attr_bitmask);
 		break;
 	case NFS4_CREATE_EXCLUSIVE:
 		*p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
@@ -1424,8 +1418,8 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
 	case NFS4_CREATE_EXCLUSIVE4_1:
 		*p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1);
 		encode_nfs4_verifier(xdr, &arg->u.verifier);
-		encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, true,
-			     &arg->umask);
+		encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask,
+				arg->server, arg->server->exclcreat_bitmask);
 	}
 }
 
@@ -1681,7 +1675,8 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
 {
 	encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr);
 	encode_nfs4_stateid(xdr, &arg->stateid);
-	encode_attrs(xdr, arg->iap, arg->label, server, false, NULL);
+	encode_attrs(xdr, arg->iap, arg->label, NULL, server,
+			server->attr_bitmask);
 }
 
 static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr)
@@ -2005,16 +2000,10 @@ encode_layoutcommit(struct xdr_stream *xdr,
 	*p++ = cpu_to_be32(0); /* Never send time_modify_changed */
 	*p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
 
-	if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) {
-		NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit(
-			NFS_I(inode)->layout, xdr, args);
-	} else {
-		encode_uint32(xdr, args->layoutupdate_len);
-		if (args->layoutupdate_pages) {
-			xdr_write_pages(xdr, args->layoutupdate_pages, 0,
-					args->layoutupdate_len);
-		}
-	}
+	encode_uint32(xdr, args->layoutupdate_len);
+	if (args->layoutupdate_pages)
+		xdr_write_pages(xdr, args->layoutupdate_pages, 0,
+				args->layoutupdate_len);
 
 	return 0;
 }
@@ -2024,7 +2013,6 @@ encode_layoutreturn(struct xdr_stream *xdr,
 		    const struct nfs4_layoutreturn_args *args,
 		    struct compound_hdr *hdr)
 {
-	const struct pnfs_layoutdriver_type *lr_ops = NFS_SERVER(args->inode)->pnfs_curr_ld;
 	__be32 *p;
 
 	encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr);
@@ -2041,8 +2029,6 @@ encode_layoutreturn(struct xdr_stream *xdr,
 	spin_unlock(&args->inode->i_lock);
 	if (args->ld_private->ops && args->ld_private->ops->encode)
 		args->ld_private->ops->encode(xdr, args, args->ld_private);
-	else if (lr_ops->encode_layoutreturn)
-		lr_ops->encode_layoutreturn(xdr, args);
 	else
 		encode_uint32(xdr, 0);
 }
@@ -5579,6 +5565,8 @@ static int decode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map)
 	unsigned int i;
 
 	p = xdr_inline_decode(xdr, 4);
+	if (!p)
+		return -EIO;
 	bitmap_words = be32_to_cpup(p++);
 	if (bitmap_words > NFS4_OP_MAP_NUM_WORDS)
 		return -EIO;
diff --git a/fs/nfs/objlayout/Kbuild b/fs/nfs/objlayout/Kbuild
deleted file mode 100644
index ed30ea072bb8..000000000000
--- a/fs/nfs/objlayout/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the pNFS Objects Layout Driver kernel module
-#
-objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o
-obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
deleted file mode 100644
index 049c1b1f2932..000000000000
--- a/fs/nfs/objlayout/objio_osd.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/*
- *  pNFS Objects layout implementation over open-osd initiator library
- *
- *  Copyright (C) 2009 Panasas Inc. [year of first publication]
- *  All rights reserved.
- *
- *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <ooo@electrozaur.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  See the file COPYING included with this distribution for more details.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the Panasas company nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/module.h>
-#include <scsi/osd_ore.h>
-
-#include "objlayout.h"
-#include "../internal.h"
-
-#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
-
-struct objio_dev_ent {
-	struct nfs4_deviceid_node id_node;
-	struct ore_dev od;
-};
-
-static void
-objio_free_deviceid_node(struct nfs4_deviceid_node *d)
-{
-	struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node);
-
-	dprintk("%s: free od=%p\n", __func__, de->od.od);
-	osduld_put_device(de->od.od);
-	kfree_rcu(d, rcu);
-}
-
-struct objio_segment {
-	struct pnfs_layout_segment lseg;
-
-	struct ore_layout layout;
-	struct ore_components oc;
-};
-
-static inline struct objio_segment *
-OBJIO_LSEG(struct pnfs_layout_segment *lseg)
-{
-	return container_of(lseg, struct objio_segment, lseg);
-}
-
-struct objio_state {
-	/* Generic layer */
-	struct objlayout_io_res oir;
-
-	bool sync;
-	/*FIXME: Support for extra_bytes at ore_get_rw_state() */
-	struct ore_io_state *ios;
-};
-
-/* Send and wait for a get_device_info of devices in the layout,
-   then look them up with the osd_initiator library */
-struct nfs4_deviceid_node *
-objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
-			gfp_t gfp_flags)
-{
-	struct pnfs_osd_deviceaddr *deviceaddr;
-	struct objio_dev_ent *ode = NULL;
-	struct osd_dev *od;
-	struct osd_dev_info odi;
-	bool retry_flag = true;
-	__be32 *p;
-	int err;
-
-	deviceaddr = kzalloc(sizeof(*deviceaddr), gfp_flags);
-	if (!deviceaddr)
-		return NULL;
-
-	p = page_address(pdev->pages[0]);
-	pnfs_osd_xdr_decode_deviceaddr(deviceaddr, p);
-
-	odi.systemid_len = deviceaddr->oda_systemid.len;
-	if (odi.systemid_len > sizeof(odi.systemid)) {
-		dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n",
-			__func__, sizeof(odi.systemid));
-		err = -EINVAL;
-		goto out;
-	} else if (odi.systemid_len)
-		memcpy(odi.systemid, deviceaddr->oda_systemid.data,
-		       odi.systemid_len);
-	odi.osdname_len	 = deviceaddr->oda_osdname.len;
-	odi.osdname	 = (u8 *)deviceaddr->oda_osdname.data;
-
-	if (!odi.osdname_len && !odi.systemid_len) {
-		dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
-			__func__);
-		err = -ENODEV;
-		goto out;
-	}
-
-retry_lookup:
-	od = osduld_info_lookup(&odi);
-	if (IS_ERR(od)) {
-		err = PTR_ERR(od);
-		dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
-		if (err == -ENODEV && retry_flag) {
-			err = objlayout_autologin(deviceaddr);
-			if (likely(!err)) {
-				retry_flag = false;
-				goto retry_lookup;
-			}
-		}
-		goto out;
-	}
-
-	dprintk("Adding new dev_id(%llx:%llx)\n",
-		_DEVID_LO(&pdev->dev_id), _DEVID_HI(&pdev->dev_id));
-
-	ode = kzalloc(sizeof(*ode), gfp_flags);
-	if (!ode) {
-		dprintk("%s: -ENOMEM od=%p\n", __func__, od);
-		goto out;
-	}
-
-	nfs4_init_deviceid_node(&ode->id_node, server, &pdev->dev_id);
-	kfree(deviceaddr);
-
-	ode->od.od = od;
-	return &ode->id_node;
-
-out:
-	kfree(deviceaddr);
-	return NULL;
-}
-
-static void copy_single_comp(struct ore_components *oc, unsigned c,
-			     struct pnfs_osd_object_cred *src_comp)
-{
-	struct ore_comp *ocomp = &oc->comps[c];
-
-	WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */
-	WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred));
-
-	ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id;
-	ocomp->obj.id = src_comp->oc_object_id.oid_object_id;
-
-	memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
-}
-
-static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
-		       struct objio_segment **pseg)
-{
-/*	This is the in memory structure of the objio_segment
- *
- *	struct __alloc_objio_segment {
- *		struct objio_segment olseg;
- *		struct ore_dev *ods[numdevs];
- *		struct ore_comp	comps[numdevs];
- *	} *aolseg;
- *	NOTE: The code as above compiles and runs perfectly. It is elegant,
- *	type safe and compact. At some Past time Linus has decided he does not
- *	like variable length arrays, For the sake of this principal we uglify
- *	the code as below.
- */
-	struct objio_segment *lseg;
-	size_t lseg_size = sizeof(*lseg) +
-			numdevs * sizeof(lseg->oc.ods[0]) +
-			numdevs * sizeof(*lseg->oc.comps);
-
-	lseg = kzalloc(lseg_size, gfp_flags);
-	if (unlikely(!lseg)) {
-		dprintk("%s: Failed allocation numdevs=%d size=%zd\n", __func__,
-			numdevs, lseg_size);
-		return -ENOMEM;
-	}
-
-	lseg->oc.numdevs = numdevs;
-	lseg->oc.single_comp = EC_MULTPLE_COMPS;
-	lseg->oc.ods = (void *)(lseg + 1);
-	lseg->oc.comps = (void *)(lseg->oc.ods + numdevs);
-
-	*pseg = lseg;
-	return 0;
-}
-
-int objio_alloc_lseg(struct pnfs_layout_segment **outp,
-	struct pnfs_layout_hdr *pnfslay,
-	struct pnfs_layout_range *range,
-	struct xdr_stream *xdr,
-	gfp_t gfp_flags)
-{
-	struct nfs_server *server = NFS_SERVER(pnfslay->plh_inode);
-	struct objio_segment *objio_seg;
-	struct pnfs_osd_xdr_decode_layout_iter iter;
-	struct pnfs_osd_layout layout;
-	struct pnfs_osd_object_cred src_comp;
-	unsigned cur_comp;
-	int err;
-
-	err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
-	if (unlikely(err))
-		return err;
-
-	err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg);
-	if (unlikely(err))
-		return err;
-
-	objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit;
-	objio_seg->layout.group_width = layout.olo_map.odm_group_width;
-	objio_seg->layout.group_depth = layout.olo_map.odm_group_depth;
-	objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
-	objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm;
-
-	err = ore_verify_layout(layout.olo_map.odm_num_comps,
-					  &objio_seg->layout);
-	if (unlikely(err))
-		goto err;
-
-	objio_seg->oc.first_dev = layout.olo_comps_index;
-	cur_comp = 0;
-	while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) {
-		struct nfs4_deviceid_node *d;
-		struct objio_dev_ent *ode;
-
-		copy_single_comp(&objio_seg->oc, cur_comp, &src_comp);
-
-		d = nfs4_find_get_deviceid(server,
-				&src_comp.oc_object_id.oid_device_id,
-				pnfslay->plh_lc_cred, gfp_flags);
-		if (!d) {
-			err = -ENXIO;
-			goto err;
-		}
-
-		ode = container_of(d, struct objio_dev_ent, id_node);
-		objio_seg->oc.ods[cur_comp++] = &ode->od;
-	}
-	/* pnfs_osd_xdr_decode_layout_comp returns false on error */
-	if (unlikely(err))
-		goto err;
-
-	*outp = &objio_seg->lseg;
-	return 0;
-
-err:
-	kfree(objio_seg);
-	dprintk("%s: Error: return %d\n", __func__, err);
-	*outp = NULL;
-	return err;
-}
-
-void objio_free_lseg(struct pnfs_layout_segment *lseg)
-{
-	int i;
-	struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
-
-	for (i = 0; i < objio_seg->oc.numdevs; i++) {
-		struct ore_dev *od = objio_seg->oc.ods[i];
-		struct objio_dev_ent *ode;
-
-		if (!od)
-			break;
-		ode = container_of(od, typeof(*ode), od);
-		nfs4_put_deviceid_node(&ode->id_node);
-	}
-	kfree(objio_seg);
-}
-
-static int
-objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading,
-	struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase,
-	loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags,
-	struct objio_state **outp)
-{
-	struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
-	struct ore_io_state *ios;
-	int ret;
-	struct __alloc_objio_state {
-		struct objio_state objios;
-		struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs];
-	} *aos;
-
-	aos = kzalloc(sizeof(*aos), gfp_flags);
-	if (unlikely(!aos))
-		return -ENOMEM;
-
-	objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs,
-			aos->ioerrs, rpcdata, pnfs_layout_type);
-
-	ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading,
-			       offset, count, &ios);
-	if (unlikely(ret)) {
-		kfree(aos);
-		return ret;
-	}
-
-	ios->pages = pages;
-	ios->pgbase = pgbase;
-	ios->private = aos;
-	BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT);
-
-	aos->objios.sync = 0;
-	aos->objios.ios = ios;
-	*outp = &aos->objios;
-	return 0;
-}
-
-void objio_free_result(struct objlayout_io_res *oir)
-{
-	struct objio_state *objios = container_of(oir, struct objio_state, oir);
-
-	ore_put_io_state(objios->ios);
-	kfree(objios);
-}
-
-static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
-{
-	switch (oep) {
-	case OSD_ERR_PRI_NO_ERROR:
-		return (enum pnfs_osd_errno)0;
-
-	case OSD_ERR_PRI_CLEAR_PAGES:
-		BUG_ON(1);
-		return 0;
-
-	case OSD_ERR_PRI_RESOURCE:
-		return PNFS_OSD_ERR_RESOURCE;
-	case OSD_ERR_PRI_BAD_CRED:
-		return PNFS_OSD_ERR_BAD_CRED;
-	case OSD_ERR_PRI_NO_ACCESS:
-		return PNFS_OSD_ERR_NO_ACCESS;
-	case OSD_ERR_PRI_UNREACHABLE:
-		return PNFS_OSD_ERR_UNREACHABLE;
-	case OSD_ERR_PRI_NOT_FOUND:
-		return PNFS_OSD_ERR_NOT_FOUND;
-	case OSD_ERR_PRI_NO_SPACE:
-		return PNFS_OSD_ERR_NO_SPACE;
-	default:
-		WARN_ON(1);
-		/* fallthrough */
-	case OSD_ERR_PRI_EIO:
-		return PNFS_OSD_ERR_EIO;
-	}
-}
-
-static void __on_dev_error(struct ore_io_state *ios,
-	struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep,
-	u64 dev_offset, u64  dev_len)
-{
-	struct objio_state *objios = ios->private;
-	struct pnfs_osd_objid pooid;
-	struct objio_dev_ent *ode = container_of(od, typeof(*ode), od);
-	/* FIXME: what to do with more-then-one-group layouts. We need to
-	 * translate from ore_io_state index to oc->comps index
-	 */
-	unsigned comp = dev_index;
-
-	pooid.oid_device_id = ode->id_node.deviceid;
-	pooid.oid_partition_id = ios->oc->comps[comp].obj.partition;
-	pooid.oid_object_id = ios->oc->comps[comp].obj.id;
-
-	objlayout_io_set_result(&objios->oir, comp,
-				&pooid, osd_pri_2_pnfs_err(oep),
-				dev_offset, dev_len, !ios->reading);
-}
-
-/*
- * read
- */
-static void _read_done(struct ore_io_state *ios, void *private)
-{
-	struct objio_state *objios = private;
-	ssize_t status;
-	int ret = ore_check_io(ios, &__on_dev_error);
-
-	/* FIXME: _io_free(ios) can we dealocate the libosd resources; */
-
-	if (likely(!ret))
-		status = ios->length;
-	else
-		status = ret;
-
-	objlayout_read_done(&objios->oir, status, objios->sync);
-}
-
-int objio_read_pagelist(struct nfs_pgio_header *hdr)
-{
-	struct objio_state *objios;
-	int ret;
-
-	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
-			hdr->lseg, hdr->args.pages, hdr->args.pgbase,
-			hdr->args.offset, hdr->args.count, hdr,
-			GFP_KERNEL, &objios);
-	if (unlikely(ret))
-		return ret;
-
-	objios->ios->done = _read_done;
-	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
-		hdr->args.offset, hdr->args.count);
-	ret = ore_read(objios->ios);
-	if (unlikely(ret))
-		objio_free_result(&objios->oir);
-	return ret;
-}
-
-/*
- * write
- */
-static void _write_done(struct ore_io_state *ios, void *private)
-{
-	struct objio_state *objios = private;
-	ssize_t status;
-	int ret = ore_check_io(ios, &__on_dev_error);
-
-	/* FIXME: _io_free(ios) can we dealocate the libosd resources; */
-
-	if (likely(!ret)) {
-		/* FIXME: should be based on the OSD's persistence model
-		 * See OSD2r05 Section 4.13 Data persistence model */
-		objios->oir.committed = NFS_FILE_SYNC;
-		status = ios->length;
-	} else {
-		status = ret;
-	}
-
-	objlayout_write_done(&objios->oir, status, objios->sync);
-}
-
-static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
-{
-	struct objio_state *objios = priv;
-	struct nfs_pgio_header *hdr = objios->oir.rpcdata;
-	struct address_space *mapping = hdr->inode->i_mapping;
-	pgoff_t index = offset / PAGE_SIZE;
-	struct page *page;
-	loff_t i_size = i_size_read(hdr->inode);
-
-	if (offset >= i_size) {
-		*uptodate = true;
-		dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
-		return ZERO_PAGE(0);
-	}
-
-	page = find_get_page(mapping, index);
-	if (!page) {
-		page = find_or_create_page(mapping, index, GFP_NOFS);
-		if (unlikely(!page)) {
-			dprintk("%s: grab_cache_page Failed index=0x%lx\n",
-				__func__, index);
-			return NULL;
-		}
-		unlock_page(page);
-	}
-	*uptodate = PageUptodate(page);
-	dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);
-	return page;
-}
-
-static void __r4w_put_page(void *priv, struct page *page)
-{
-	dprintk("%s: index=0x%lx\n", __func__,
-		(page == ZERO_PAGE(0)) ? -1UL : page->index);
-	if (ZERO_PAGE(0) != page)
-		put_page(page);
-	return;
-}
-
-static const struct _ore_r4w_op _r4w_op = {
-	.get_page = &__r4w_get_page,
-	.put_page = &__r4w_put_page,
-};
-
-int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
-{
-	struct objio_state *objios;
-	int ret;
-
-	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
-			hdr->lseg, hdr->args.pages, hdr->args.pgbase,
-			hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
-			&objios);
-	if (unlikely(ret))
-		return ret;
-
-	objios->sync = 0 != (how & FLUSH_SYNC);
-	objios->ios->r4w = &_r4w_op;
-
-	if (!objios->sync)
-		objios->ios->done = _write_done;
-
-	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
-		hdr->args.offset, hdr->args.count);
-	ret = ore_write(objios->ios);
-	if (unlikely(ret)) {
-		objio_free_result(&objios->oir);
-		return ret;
-	}
-
-	if (objios->sync)
-		_write_done(objios->ios, objios);
-
-	return 0;
-}
-
-/*
- * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
- * of bytes (maximum @req->wb_bytes) that can be coalesced.
- */
-static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
-			  struct nfs_page *prev, struct nfs_page *req)
-{
-	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(pgio);
-	unsigned int size;
-
-	size = pnfs_generic_pg_test(pgio, prev, req);
-
-	if (!size || mirror->pg_count + req->wb_bytes >
-	    (unsigned long)pgio->pg_layout_private)
-		return 0;
-
-	return min(size, req->wb_bytes);
-}
-
-static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
-{
-	pnfs_generic_pg_init_read(pgio, req);
-	if (unlikely(pgio->pg_lseg == NULL))
-		return; /* Not pNFS */
-
-	pgio->pg_layout_private = (void *)
-				OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
-}
-
-static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
-				   unsigned long *stripe_end)
-{
-	u32 stripe_off;
-	unsigned stripe_size;
-
-	if (layout->raid_algorithm == PNFS_OSD_RAID_0)
-		return true;
-
-	stripe_size = layout->stripe_unit *
-				(layout->group_width - layout->parity);
-
-	div_u64_rem(offset, stripe_size, &stripe_off);
-	if (!stripe_off)
-		return true;
-
-	*stripe_end = stripe_size - stripe_off;
-	return false;
-}
-
-static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
-{
-	unsigned long stripe_end = 0;
-	u64 wb_size;
-
-	if (pgio->pg_dreq == NULL)
-		wb_size = i_size_read(pgio->pg_inode) - req_offset(req);
-	else
-		wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
-
-	pnfs_generic_pg_init_write(pgio, req, wb_size);
-	if (unlikely(pgio->pg_lseg == NULL))
-		return; /* Not pNFS */
-
-	if (req->wb_offset ||
-	    !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE,
-			       &OBJIO_LSEG(pgio->pg_lseg)->layout,
-			       &stripe_end)) {
-		pgio->pg_layout_private = (void *)stripe_end;
-	} else {
-		pgio->pg_layout_private = (void *)
-				OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
-	}
-}
-
-static const struct nfs_pageio_ops objio_pg_read_ops = {
-	.pg_init = objio_init_read,
-	.pg_test = objio_pg_test,
-	.pg_doio = pnfs_generic_pg_readpages,
-	.pg_cleanup = pnfs_generic_pg_cleanup,
-};
-
-static const struct nfs_pageio_ops objio_pg_write_ops = {
-	.pg_init = objio_init_write,
-	.pg_test = objio_pg_test,
-	.pg_doio = pnfs_generic_pg_writepages,
-	.pg_cleanup = pnfs_generic_pg_cleanup,
-};
-
-static struct pnfs_layoutdriver_type objlayout_type = {
-	.id = LAYOUT_OSD2_OBJECTS,
-	.name = "LAYOUT_OSD2_OBJECTS",
-	.flags                   = PNFS_LAYOUTRET_ON_SETATTR |
-				   PNFS_LAYOUTRET_ON_ERROR,
-
-	.max_deviceinfo_size	 = PAGE_SIZE,
-	.owner		       	 = THIS_MODULE,
-	.alloc_layout_hdr        = objlayout_alloc_layout_hdr,
-	.free_layout_hdr         = objlayout_free_layout_hdr,
-
-	.alloc_lseg              = objlayout_alloc_lseg,
-	.free_lseg               = objlayout_free_lseg,
-
-	.read_pagelist           = objlayout_read_pagelist,
-	.write_pagelist          = objlayout_write_pagelist,
-	.pg_read_ops             = &objio_pg_read_ops,
-	.pg_write_ops            = &objio_pg_write_ops,
-
-	.sync			 = pnfs_generic_sync,
-
-	.free_deviceid_node	 = objio_free_deviceid_node,
-
-	.encode_layoutcommit	 = objlayout_encode_layoutcommit,
-	.encode_layoutreturn     = objlayout_encode_layoutreturn,
-};
-
-MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
-MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
-MODULE_LICENSE("GPL");
-
-static int __init
-objlayout_init(void)
-{
-	int ret = pnfs_register_layoutdriver(&objlayout_type);
-
-	if (ret)
-		printk(KERN_INFO
-			"NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n",
-			__func__, ret);
-	else
-		printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n",
-			__func__);
-	return ret;
-}
-
-static void __exit
-objlayout_exit(void)
-{
-	pnfs_unregister_layoutdriver(&objlayout_type);
-	printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n",
-	       __func__);
-}
-
-MODULE_ALIAS("nfs-layouttype4-2");
-
-module_init(objlayout_init);
-module_exit(objlayout_exit);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
deleted file mode 100644
index 8f3d2acb81c3..000000000000
--- a/fs/nfs/objlayout/objlayout.c
+++ /dev/null
@@ -1,706 +0,0 @@
-/*
- *  pNFS Objects layout driver high level definitions
- *
- *  Copyright (C) 2007 Panasas Inc. [year of first publication]
- *  All rights reserved.
- *
- *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <ooo@electrozaur.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  See the file COPYING included with this distribution for more details.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the Panasas company nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/kmod.h>
-#include <linux/moduleparam.h>
-#include <linux/ratelimit.h>
-#include <scsi/osd_initiator.h>
-#include "objlayout.h"
-
-#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
-/*
- * Create a objlayout layout structure for the given inode and return it.
- */
-struct pnfs_layout_hdr *
-objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
-{
-	struct objlayout *objlay;
-
-	objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
-	if (!objlay)
-		return NULL;
-	spin_lock_init(&objlay->lock);
-	INIT_LIST_HEAD(&objlay->err_list);
-	dprintk("%s: Return %p\n", __func__, objlay);
-	return &objlay->pnfs_layout;
-}
-
-/*
- * Free an objlayout layout structure
- */
-void
-objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
-{
-	struct objlayout *objlay = OBJLAYOUT(lo);
-
-	dprintk("%s: objlay %p\n", __func__, objlay);
-
-	WARN_ON(!list_empty(&objlay->err_list));
-	kfree(objlay);
-}
-
-/*
- * Unmarshall layout and store it in pnfslay.
- */
-struct pnfs_layout_segment *
-objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
-		     struct nfs4_layoutget_res *lgr,
-		     gfp_t gfp_flags)
-{
-	int status = -ENOMEM;
-	struct xdr_stream stream;
-	struct xdr_buf buf = {
-		.pages =  lgr->layoutp->pages,
-		.page_len =  lgr->layoutp->len,
-		.buflen =  lgr->layoutp->len,
-		.len = lgr->layoutp->len,
-	};
-	struct page *scratch;
-	struct pnfs_layout_segment *lseg;
-
-	dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
-
-	scratch = alloc_page(gfp_flags);
-	if (!scratch)
-		goto err_nofree;
-
-	xdr_init_decode(&stream, &buf, NULL);
-	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
-
-	status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
-	if (unlikely(status)) {
-		dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
-			status);
-		goto err;
-	}
-
-	__free_page(scratch);
-
-	dprintk("%s: Return %p\n", __func__, lseg);
-	return lseg;
-
-err:
-	__free_page(scratch);
-err_nofree:
-	dprintk("%s: Err Return=>%d\n", __func__, status);
-	return ERR_PTR(status);
-}
-
-/*
- * Free a layout segement
- */
-void
-objlayout_free_lseg(struct pnfs_layout_segment *lseg)
-{
-	dprintk("%s: freeing layout segment %p\n", __func__, lseg);
-
-	if (unlikely(!lseg))
-		return;
-
-	objio_free_lseg(lseg);
-}
-
-/*
- * I/O Operations
- */
-static inline u64
-end_offset(u64 start, u64 len)
-{
-	u64 end;
-
-	end = start + len;
-	return end >= start ? end : NFS4_MAX_UINT64;
-}
-
-static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
-			   struct page ***p_pages, unsigned *p_pgbase,
-			   u64 offset, unsigned long count)
-{
-	u64 lseg_end_offset;
-
-	BUG_ON(offset < lseg->pls_range.offset);
-	lseg_end_offset = end_offset(lseg->pls_range.offset,
-				     lseg->pls_range.length);
-	BUG_ON(offset >= lseg_end_offset);
-	WARN_ON(offset + count > lseg_end_offset);
-
-	if (*p_pgbase > PAGE_SIZE) {
-		dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase);
-		*p_pages += *p_pgbase >> PAGE_SHIFT;
-		*p_pgbase &= ~PAGE_MASK;
-	}
-}
-
-/*
- * I/O done common code
- */
-static void
-objlayout_iodone(struct objlayout_io_res *oir)
-{
-	if (likely(oir->status >= 0)) {
-		objio_free_result(oir);
-	} else {
-		struct objlayout *objlay = oir->objlay;
-
-		spin_lock(&objlay->lock);
-		objlay->delta_space_valid = OBJ_DSU_INVALID;
-		list_add(&objlay->err_list, &oir->err_list);
-		spin_unlock(&objlay->lock);
-	}
-}
-
-/*
- * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
- *
- * The @index component IO failed (error returned from target). Register
- * the error for later reporting at layout-return.
- */
-void
-objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
-			struct pnfs_osd_objid *pooid, int osd_error,
-			u64 offset, u64 length, bool is_write)
-{
-	struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index];
-
-	BUG_ON(index >= oir->num_comps);
-	if (osd_error) {
-		ioerr->oer_component = *pooid;
-		ioerr->oer_comp_offset = offset;
-		ioerr->oer_comp_length = length;
-		ioerr->oer_iswrite = is_write;
-		ioerr->oer_errno = osd_error;
-
-		dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
-			"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
-			__func__, index, ioerr->oer_errno,
-			ioerr->oer_iswrite,
-			_DEVID_LO(&ioerr->oer_component.oid_device_id),
-			_DEVID_HI(&ioerr->oer_component.oid_device_id),
-			ioerr->oer_component.oid_partition_id,
-			ioerr->oer_component.oid_object_id,
-			ioerr->oer_comp_offset,
-			ioerr->oer_comp_length);
-	} else {
-		/* User need not call if no error is reported */
-		ioerr->oer_errno = 0;
-	}
-}
-
-/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
- * This is because the osd completion is called with ints-off from
- * the block layer
- */
-static void _rpc_read_complete(struct work_struct *work)
-{
-	struct rpc_task *task;
-	struct nfs_pgio_header *hdr;
-
-	dprintk("%s enter\n", __func__);
-	task = container_of(work, struct rpc_task, u.tk_work);
-	hdr = container_of(task, struct nfs_pgio_header, task);
-
-	pnfs_ld_read_done(hdr);
-}
-
-void
-objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
-{
-	struct nfs_pgio_header *hdr = oir->rpcdata;
-
-	oir->status = hdr->task.tk_status = status;
-	if (status >= 0)
-		hdr->res.count = status;
-	else
-		hdr->pnfs_error = status;
-	objlayout_iodone(oir);
-	/* must not use oir after this point */
-
-	dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
-		status, hdr->res.eof, sync);
-
-	if (sync)
-		pnfs_ld_read_done(hdr);
-	else {
-		INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
-		schedule_work(&hdr->task.u.tk_work);
-	}
-}
-
-/*
- * Perform sync or async reads.
- */
-enum pnfs_try_status
-objlayout_read_pagelist(struct nfs_pgio_header *hdr)
-{
-	struct inode *inode = hdr->inode;
-	loff_t offset = hdr->args.offset;
-	size_t count = hdr->args.count;
-	int err;
-	loff_t eof;
-
-	eof = i_size_read(inode);
-	if (unlikely(offset + count > eof)) {
-		if (offset >= eof) {
-			err = 0;
-			hdr->res.count = 0;
-			hdr->res.eof = 1;
-			/*FIXME: do we need to call pnfs_ld_read_done() */
-			goto out;
-		}
-		count = eof - offset;
-	}
-
-	hdr->res.eof = (offset + count) >= eof;
-	_fix_verify_io_params(hdr->lseg, &hdr->args.pages,
-			      &hdr->args.pgbase,
-			      hdr->args.offset, hdr->args.count);
-
-	dprintk("%s: inode(%lx) offset 0x%llx count 0x%zx eof=%d\n",
-		__func__, inode->i_ino, offset, count, hdr->res.eof);
-
-	err = objio_read_pagelist(hdr);
- out:
-	if (unlikely(err)) {
-		hdr->pnfs_error = err;
-		dprintk("%s: Returned Error %d\n", __func__, err);
-		return PNFS_NOT_ATTEMPTED;
-	}
-	return PNFS_ATTEMPTED;
-}
-
-/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
- * This is because the osd completion is called with ints-off from
- * the block layer
- */
-static void _rpc_write_complete(struct work_struct *work)
-{
-	struct rpc_task *task;
-	struct nfs_pgio_header *hdr;
-
-	dprintk("%s enter\n", __func__);
-	task = container_of(work, struct rpc_task, u.tk_work);
-	hdr = container_of(task, struct nfs_pgio_header, task);
-
-	pnfs_ld_write_done(hdr);
-}
-
-void
-objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
-{
-	struct nfs_pgio_header *hdr = oir->rpcdata;
-
-	oir->status = hdr->task.tk_status = status;
-	if (status >= 0) {
-		hdr->res.count = status;
-		hdr->verf.committed = oir->committed;
-	} else {
-		hdr->pnfs_error = status;
-	}
-	objlayout_iodone(oir);
-	/* must not use oir after this point */
-
-	dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
-		status, hdr->verf.committed, sync);
-
-	if (sync)
-		pnfs_ld_write_done(hdr);
-	else {
-		INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
-		schedule_work(&hdr->task.u.tk_work);
-	}
-}
-
-/*
- * Perform sync or async writes.
- */
-enum pnfs_try_status
-objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
-{
-	int err;
-
-	_fix_verify_io_params(hdr->lseg, &hdr->args.pages,
-			      &hdr->args.pgbase,
-			      hdr->args.offset, hdr->args.count);
-
-	err = objio_write_pagelist(hdr, how);
-	if (unlikely(err)) {
-		hdr->pnfs_error = err;
-		dprintk("%s: Returned Error %d\n", __func__, err);
-		return PNFS_NOT_ATTEMPTED;
-	}
-	return PNFS_ATTEMPTED;
-}
-
-void
-objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
-			      struct xdr_stream *xdr,
-			      const struct nfs4_layoutcommit_args *args)
-{
-	struct objlayout *objlay = OBJLAYOUT(pnfslay);
-	struct pnfs_osd_layoutupdate lou;
-	__be32 *start;
-
-	dprintk("%s: Begin\n", __func__);
-
-	spin_lock(&objlay->lock);
-	lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
-	lou.dsu_delta = objlay->delta_space_used;
-	objlay->delta_space_used = 0;
-	objlay->delta_space_valid = OBJ_DSU_INIT;
-	lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
-	spin_unlock(&objlay->lock);
-
-	start = xdr_reserve_space(xdr, 4);
-
-	BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
-
-	*start = cpu_to_be32((xdr->p - start - 1) * 4);
-
-	dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
-		lou.dsu_delta, lou.olu_ioerr_flag);
-}
-
-static int
-err_prio(u32 oer_errno)
-{
-	switch (oer_errno) {
-	case 0:
-		return 0;
-
-	case PNFS_OSD_ERR_RESOURCE:
-		return OSD_ERR_PRI_RESOURCE;
-	case PNFS_OSD_ERR_BAD_CRED:
-		return OSD_ERR_PRI_BAD_CRED;
-	case PNFS_OSD_ERR_NO_ACCESS:
-		return OSD_ERR_PRI_NO_ACCESS;
-	case PNFS_OSD_ERR_UNREACHABLE:
-		return OSD_ERR_PRI_UNREACHABLE;
-	case PNFS_OSD_ERR_NOT_FOUND:
-		return OSD_ERR_PRI_NOT_FOUND;
-	case PNFS_OSD_ERR_NO_SPACE:
-		return OSD_ERR_PRI_NO_SPACE;
-	default:
-		WARN_ON(1);
-		/* fallthrough */
-	case PNFS_OSD_ERR_EIO:
-		return OSD_ERR_PRI_EIO;
-	}
-}
-
-static void
-merge_ioerr(struct pnfs_osd_ioerr *dest_err,
-	    const struct pnfs_osd_ioerr *src_err)
-{
-	u64 dest_end, src_end;
-
-	if (!dest_err->oer_errno) {
-		*dest_err = *src_err;
-		/* accumulated device must be blank */
-		memset(&dest_err->oer_component.oid_device_id, 0,
-			sizeof(dest_err->oer_component.oid_device_id));
-
-		return;
-	}
-
-	if (dest_err->oer_component.oid_partition_id !=
-				src_err->oer_component.oid_partition_id)
-		dest_err->oer_component.oid_partition_id = 0;
-
-	if (dest_err->oer_component.oid_object_id !=
-				src_err->oer_component.oid_object_id)
-		dest_err->oer_component.oid_object_id = 0;
-
-	if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
-		dest_err->oer_comp_offset = src_err->oer_comp_offset;
-
-	dest_end = end_offset(dest_err->oer_comp_offset,
-			      dest_err->oer_comp_length);
-	src_end =  end_offset(src_err->oer_comp_offset,
-			      src_err->oer_comp_length);
-	if (dest_end < src_end)
-		dest_end = src_end;
-
-	dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
-
-	if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
-	    (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
-			dest_err->oer_errno = src_err->oer_errno;
-	} else if (src_err->oer_iswrite) {
-		dest_err->oer_iswrite = true;
-		dest_err->oer_errno = src_err->oer_errno;
-	}
-}
-
-static void
-encode_accumulated_error(struct objlayout *objlay, __be32 *p)
-{
-	struct objlayout_io_res *oir, *tmp;
-	struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
-
-	list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
-		unsigned i;
-
-		for (i = 0; i < oir->num_comps; i++) {
-			struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
-
-			if (!ioerr->oer_errno)
-				continue;
-
-			printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
-				"is_write=%d dev(%llx:%llx) par=0x%llx "
-				"obj=0x%llx offset=0x%llx length=0x%llx\n",
-				__func__, i, ioerr->oer_errno,
-				ioerr->oer_iswrite,
-				_DEVID_LO(&ioerr->oer_component.oid_device_id),
-				_DEVID_HI(&ioerr->oer_component.oid_device_id),
-				ioerr->oer_component.oid_partition_id,
-				ioerr->oer_component.oid_object_id,
-				ioerr->oer_comp_offset,
-				ioerr->oer_comp_length);
-
-			merge_ioerr(&accumulated_err, ioerr);
-		}
-		list_del(&oir->err_list);
-		objio_free_result(oir);
-	}
-
-	pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
-}
-
-void
-objlayout_encode_layoutreturn(struct xdr_stream *xdr,
-			      const struct nfs4_layoutreturn_args *args)
-{
-	struct pnfs_layout_hdr *pnfslay = args->layout;
-	struct objlayout *objlay = OBJLAYOUT(pnfslay);
-	struct objlayout_io_res *oir, *tmp;
-	__be32 *start;
-
-	dprintk("%s: Begin\n", __func__);
-	start = xdr_reserve_space(xdr, 4);
-	BUG_ON(!start);
-
-	spin_lock(&objlay->lock);
-
-	list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
-		__be32 *last_xdr = NULL, *p;
-		unsigned i;
-		int res = 0;
-
-		for (i = 0; i < oir->num_comps; i++) {
-			struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
-
-			if (!ioerr->oer_errno)
-				continue;
-
-			dprintk("%s: err[%d]: errno=%d is_write=%d "
-				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
-				"offset=0x%llx length=0x%llx\n",
-				__func__, i, ioerr->oer_errno,
-				ioerr->oer_iswrite,
-				_DEVID_LO(&ioerr->oer_component.oid_device_id),
-				_DEVID_HI(&ioerr->oer_component.oid_device_id),
-				ioerr->oer_component.oid_partition_id,
-				ioerr->oer_component.oid_object_id,
-				ioerr->oer_comp_offset,
-				ioerr->oer_comp_length);
-
-			p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
-			if (unlikely(!p)) {
-				res = -E2BIG;
-				break; /* accumulated_error */
-			}
-
-			last_xdr = p;
-			pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]);
-		}
-
-		/* TODO: use xdr_write_pages */
-		if (unlikely(res)) {
-			/* no space for even one error descriptor */
-			BUG_ON(!last_xdr);
-
-			/* we've encountered a situation with lots and lots of
-			 * errors and no space to encode them all. Use the last
-			 * available slot to report the union of all the
-			 * remaining errors.
-			 */
-			encode_accumulated_error(objlay, last_xdr);
-			goto loop_done;
-		}
-		list_del(&oir->err_list);
-		objio_free_result(oir);
-	}
-loop_done:
-	spin_unlock(&objlay->lock);
-
-	*start = cpu_to_be32((xdr->p - start - 1) * 4);
-	dprintk("%s: Return\n", __func__);
-}
-
-enum {
-	OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
-	OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
-	OSD_LOGIN_UPCALL_PATHLEN  = 256
-};
-
-static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
-
-module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
-		    0600);
-MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
-
-struct __auto_login {
-	char uri[OBJLAYOUT_MAX_URI_LEN];
-	char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
-	char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
-};
-
-static int __objlayout_upcall(struct __auto_login *login)
-{
-	static char *envp[] = { "HOME=/",
-		"TERM=linux",
-		"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
-		NULL
-	};
-	char *argv[8];
-	int ret;
-
-	if (unlikely(!osd_login_prog[0])) {
-		dprintk("%s: osd_login_prog is disabled\n", __func__);
-		return -EACCES;
-	}
-
-	dprintk("%s uri: %s\n", __func__, login->uri);
-	dprintk("%s osdname %s\n", __func__, login->osdname);
-	dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
-
-	argv[0] = (char *)osd_login_prog;
-	argv[1] = "-u";
-	argv[2] = login->uri;
-	argv[3] = "-o";
-	argv[4] = login->osdname;
-	argv[5] = "-s";
-	argv[6] = login->systemid_hex;
-	argv[7] = NULL;
-
-	ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
-	/*
-	 * Disable the upcall mechanism if we're getting an ENOENT or
-	 * EACCES error. The admin can re-enable it on the fly by using
-	 * sysfs to set the objlayoutdriver.osd_login_prog module parameter once
-	 * the problem has been fixed.
-	 */
-	if (ret == -ENOENT || ret == -EACCES) {
-		printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
-			"objlayoutdriver.osd_login_prog kernel parameter!\n",
-			osd_login_prog);
-		osd_login_prog[0] = '\0';
-	}
-	dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
-
-	return ret;
-}
-
-/* Assume dest is all zeros */
-static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
-					   char *dest, int max_len,
-					   const char *var_name)
-{
-	if (!s.len)
-		return;
-
-	if (s.len >= max_len) {
-		pr_warn_ratelimited(
-			"objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
-			var_name, s.len, max_len);
-		s.len = max_len - 1; /* space for null terminator */
-	}
-
-	memcpy(dest, s.data, s.len);
-}
-
-/* Assume sysid is all zeros */
-static void _sysid_2_hex(struct nfs4_string s,
-		  char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
-{
-	int i;
-	char *cur;
-
-	if (!s.len)
-		return;
-
-	if (s.len != OSD_SYSTEMID_LEN) {
-		pr_warn_ratelimited(
-		    "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
-		    s.len);
-		if (s.len > OSD_SYSTEMID_LEN)
-			s.len = OSD_SYSTEMID_LEN;
-	}
-
-	cur = sysid;
-	for (i = 0; i < s.len; i++)
-		cur = hex_byte_pack(cur, s.data[i]);
-}
-
-int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
-{
-	int rc;
-	struct __auto_login login;
-
-	if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
-		return -ENODEV;
-
-	memset(&login, 0, sizeof(login));
-	__copy_nfsS_and_zero_terminate(
-		deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
-		login.uri, sizeof(login.uri), "URI");
-
-	__copy_nfsS_and_zero_terminate(
-		deviceaddr->oda_osdname,
-		login.osdname, sizeof(login.osdname), "OSDNAME");
-
-	_sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
-
-	rc = __objlayout_upcall(&login);
-	if (rc > 0) /* script returns positive values */
-		rc = -ENODEV;
-
-	return rc;
-}
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
deleted file mode 100644
index fc94a5872ed4..000000000000
--- a/fs/nfs/objlayout/objlayout.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- *  Data types and function declerations for interfacing with the
- *  pNFS standard object layout driver.
- *
- *  Copyright (C) 2007 Panasas Inc. [year of first publication]
- *  All rights reserved.
- *
- *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <ooo@electrozaur.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  See the file COPYING included with this distribution for more details.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the Panasas company nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _OBJLAYOUT_H
-#define _OBJLAYOUT_H
-
-#include <linux/nfs_fs.h>
-#include <linux/pnfs_osd_xdr.h>
-#include "../pnfs.h"
-
-/*
- * per-inode layout
- */
-struct objlayout {
-	struct pnfs_layout_hdr pnfs_layout;
-
-	 /* for layout_commit */
-	enum osd_delta_space_valid_enum {
-		OBJ_DSU_INIT = 0,
-		OBJ_DSU_VALID,
-		OBJ_DSU_INVALID,
-	} delta_space_valid;
-	s64 delta_space_used;  /* consumed by write ops */
-
-	 /* for layout_return */
-	spinlock_t lock;
-	struct list_head err_list;
-};
-
-static inline struct objlayout *
-OBJLAYOUT(struct pnfs_layout_hdr *lo)
-{
-	return container_of(lo, struct objlayout, pnfs_layout);
-}
-
-/*
- * per-I/O operation state
- * embedded in objects provider io_state data structure
- */
-struct objlayout_io_res {
-	struct objlayout *objlay;
-
-	void *rpcdata;
-	int status;             /* res */
-	int committed;          /* res */
-
-	/* Error reporting (layout_return) */
-	struct list_head err_list;
-	unsigned num_comps;
-	/* Pointer to array of error descriptors of size num_comps.
-	 * It should contain as many entries as devices in the osd_layout
-	 * that participate in the I/O. It is up to the io_engine to allocate
-	 * needed space and set num_comps.
-	 */
-	struct pnfs_osd_ioerr *ioerrs;
-};
-
-static inline
-void objlayout_init_ioerrs(struct objlayout_io_res *oir, unsigned num_comps,
-			struct pnfs_osd_ioerr *ioerrs, void *rpcdata,
-			struct pnfs_layout_hdr *pnfs_layout_type)
-{
-	oir->objlay = OBJLAYOUT(pnfs_layout_type);
-	oir->rpcdata = rpcdata;
-	INIT_LIST_HEAD(&oir->err_list);
-	oir->num_comps = num_comps;
-	oir->ioerrs = ioerrs;
-}
-
-/*
- * Raid engine I/O API
- */
-extern int objio_alloc_lseg(struct pnfs_layout_segment **outp,
-	struct pnfs_layout_hdr *pnfslay,
-	struct pnfs_layout_range *range,
-	struct xdr_stream *xdr,
-	gfp_t gfp_flags);
-extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
-
-/* objio_free_result will free these @oir structs received from
- * objlayout_{read,write}_done
- */
-extern void objio_free_result(struct objlayout_io_res *oir);
-
-extern int objio_read_pagelist(struct nfs_pgio_header *rdata);
-extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how);
-
-/*
- * callback API
- */
-extern void objlayout_io_set_result(struct objlayout_io_res *oir,
-			unsigned index, struct pnfs_osd_objid *pooid,
-			int osd_error, u64 offset, u64 length, bool is_write);
-
-static inline void
-objlayout_add_delta_space_used(struct objlayout *objlay, s64 space_used)
-{
-	/* If one of the I/Os errored out and the delta_space_used was
-	 * invalid we render the complete report as invalid. Protocol mandate
-	 * the DSU be accurate or not reported.
-	 */
-	spin_lock(&objlay->lock);
-	if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
-		objlay->delta_space_valid = OBJ_DSU_VALID;
-		objlay->delta_space_used += space_used;
-	}
-	spin_unlock(&objlay->lock);
-}
-
-extern void objlayout_read_done(struct objlayout_io_res *oir,
-				ssize_t status, bool sync);
-extern void objlayout_write_done(struct objlayout_io_res *oir,
-				 ssize_t status, bool sync);
-
-/*
- * exported generic objects function vectors
- */
-
-extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags);
-extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *);
-
-extern struct pnfs_layout_segment *objlayout_alloc_lseg(
-	struct pnfs_layout_hdr *,
-	struct nfs4_layoutget_res *,
-	gfp_t gfp_flags);
-extern void objlayout_free_lseg(struct pnfs_layout_segment *);
-
-extern enum pnfs_try_status objlayout_read_pagelist(
-	struct nfs_pgio_header *);
-
-extern enum pnfs_try_status objlayout_write_pagelist(
-	struct nfs_pgio_header *,
-	int how);
-
-extern void objlayout_encode_layoutcommit(
-	struct pnfs_layout_hdr *,
-	struct xdr_stream *,
-	const struct nfs4_layoutcommit_args *);
-
-extern void objlayout_encode_layoutreturn(
-	struct xdr_stream *,
-	const struct nfs4_layoutreturn_args *);
-
-extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr);
-
-#endif /* _OBJLAYOUT_H */
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
deleted file mode 100644
index f093c7ec983b..000000000000
--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- *  Object-Based pNFS Layout XDR layer
- *
- *  Copyright (C) 2007 Panasas Inc. [year of first publication]
- *  All rights reserved.
- *
- *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <ooo@electrozaur.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  See the file COPYING included with this distribution for more details.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the Panasas company nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/pnfs_osd_xdr.h>
-
-#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
-
-/*
- * The following implementation is based on RFC5664
- */
-
-/*
- * struct pnfs_osd_objid {
- *	struct nfs4_deviceid	oid_device_id;
- *	u64			oid_partition_id;
- *	u64			oid_object_id;
- * }; // xdr size 32 bytes
- */
-static __be32 *
-_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
-{
-	p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data,
-				    sizeof(objid->oid_device_id.data));
-
-	p = xdr_decode_hyper(p, &objid->oid_partition_id);
-	p = xdr_decode_hyper(p, &objid->oid_object_id);
-	return p;
-}
-/*
- * struct pnfs_osd_opaque_cred {
- *	u32 cred_len;
- *	void *cred;
- * }; // xdr size [variable]
- * The return pointers are from the xdr buffer
- */
-static int
-_osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred,
-			    struct xdr_stream *xdr)
-{
-	__be32 *p = xdr_inline_decode(xdr, 1);
-
-	if (!p)
-		return -EINVAL;
-
-	opaque_cred->cred_len = be32_to_cpu(*p++);
-
-	p = xdr_inline_decode(xdr, opaque_cred->cred_len);
-	if (!p)
-		return -EINVAL;
-
-	opaque_cred->cred = p;
-	return 0;
-}
-
-/*
- * struct pnfs_osd_object_cred {
- *	struct pnfs_osd_objid		oc_object_id;
- *	u32				oc_osd_version;
- *	u32				oc_cap_key_sec;
- *	struct pnfs_osd_opaque_cred	oc_cap_key
- *	struct pnfs_osd_opaque_cred	oc_cap;
- * }; // xdr size 32 + 4 + 4 + [variable] + [variable]
- */
-static int
-_osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp,
-			    struct xdr_stream *xdr)
-{
-	__be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4);
-	int ret;
-
-	if (!p)
-		return -EIO;
-
-	p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
-	comp->oc_osd_version = be32_to_cpup(p++);
-	comp->oc_cap_key_sec = be32_to_cpup(p);
-
-	ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr);
-	if (unlikely(ret))
-		return ret;
-
-	ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr);
-	return ret;
-}
-
-/*
- * struct pnfs_osd_data_map {
- *	u32	odm_num_comps;
- *	u64	odm_stripe_unit;
- *	u32	odm_group_width;
- *	u32	odm_group_depth;
- *	u32	odm_mirror_cnt;
- *	u32	odm_raid_algorithm;
- * }; // xdr size 4 + 8 + 4 + 4 + 4 + 4
- */
-static inline int
-_osd_data_map_xdr_sz(void)
-{
-	return 4 + 8 + 4 + 4 + 4 + 4;
-}
-
-static __be32 *
-_osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map)
-{
-	data_map->odm_num_comps = be32_to_cpup(p++);
-	p = xdr_decode_hyper(p, &data_map->odm_stripe_unit);
-	data_map->odm_group_width = be32_to_cpup(p++);
-	data_map->odm_group_depth = be32_to_cpup(p++);
-	data_map->odm_mirror_cnt = be32_to_cpup(p++);
-	data_map->odm_raid_algorithm = be32_to_cpup(p++);
-	dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
-		"odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
-		__func__,
-		data_map->odm_num_comps,
-		(unsigned long long)data_map->odm_stripe_unit,
-		data_map->odm_group_width,
-		data_map->odm_group_depth,
-		data_map->odm_mirror_cnt,
-		data_map->odm_raid_algorithm);
-	return p;
-}
-
-int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
-	struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr)
-{
-	__be32 *p;
-
-	memset(iter, 0, sizeof(*iter));
-
-	p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4);
-	if (unlikely(!p))
-		return -EINVAL;
-
-	p = _osd_xdr_decode_data_map(p, &layout->olo_map);
-	layout->olo_comps_index = be32_to_cpup(p++);
-	layout->olo_num_comps = be32_to_cpup(p++);
-	dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__,
-		layout->olo_comps_index, layout->olo_num_comps);
-
-	iter->total_comps = layout->olo_num_comps;
-	return 0;
-}
-
-bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp,
-	struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr,
-	int *err)
-{
-	BUG_ON(iter->decoded_comps > iter->total_comps);
-	if (iter->decoded_comps == iter->total_comps)
-		return false;
-
-	*err = _osd_xdr_decode_object_cred(comp, xdr);
-	if (unlikely(*err)) {
-		dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d "
-			"total_comps=%d\n", __func__, *err,
-			iter->decoded_comps, iter->total_comps);
-		return false; /* stop the loop */
-	}
-	dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx "
-		"key_len=%u cap_len=%u\n",
-		__func__,
-		_DEVID_LO(&comp->oc_object_id.oid_device_id),
-		_DEVID_HI(&comp->oc_object_id.oid_device_id),
-		comp->oc_object_id.oid_partition_id,
-		comp->oc_object_id.oid_object_id,
-		comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
-
-	iter->decoded_comps++;
-	return true;
-}
-
-/*
- * Get Device Information Decoding
- *
- * Note: since Device Information is currently done synchronously, all
- *       variable strings fields are left inside the rpc buffer and are only
- *       pointed to by the pnfs_osd_deviceaddr members. So the read buffer
- *       should not be freed while the returned information is in use.
- */
-/*
- *struct nfs4_string {
- *	unsigned int len;
- *	char *data;
- *}; // size [variable]
- * NOTE: Returned string points to inside the XDR buffer
- */
-static __be32 *
-__read_u8_opaque(__be32 *p, struct nfs4_string *str)
-{
-	str->len = be32_to_cpup(p++);
-	str->data = (char *)p;
-
-	p += XDR_QUADLEN(str->len);
-	return p;
-}
-
-/*
- * struct pnfs_osd_targetid {
- *	u32			oti_type;
- *	struct nfs4_string	oti_scsi_device_id;
- * };// size 4 + [variable]
- */
-static __be32 *
-__read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid)
-{
-	u32 oti_type;
-
-	oti_type = be32_to_cpup(p++);
-	targetid->oti_type = oti_type;
-
-	switch (oti_type) {
-	case OBJ_TARGET_SCSI_NAME:
-	case OBJ_TARGET_SCSI_DEVICE_ID:
-		p = __read_u8_opaque(p, &targetid->oti_scsi_device_id);
-	}
-
-	return p;
-}
-
-/*
- * struct pnfs_osd_net_addr {
- *	struct nfs4_string	r_netid;
- *	struct nfs4_string	r_addr;
- * };
- */
-static __be32 *
-__read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr)
-{
-	p = __read_u8_opaque(p, &netaddr->r_netid);
-	p = __read_u8_opaque(p, &netaddr->r_addr);
-
-	return p;
-}
-
-/*
- * struct pnfs_osd_targetaddr {
- *	u32				ota_available;
- *	struct pnfs_osd_net_addr	ota_netaddr;
- * };
- */
-static __be32 *
-__read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr)
-{
-	u32 ota_available;
-
-	ota_available = be32_to_cpup(p++);
-	targetaddr->ota_available = ota_available;
-
-	if (ota_available)
-		p = __read_net_addr(p, &targetaddr->ota_netaddr);
-
-
-	return p;
-}
-
-/*
- * struct pnfs_osd_deviceaddr {
- *	struct pnfs_osd_targetid	oda_targetid;
- *	struct pnfs_osd_targetaddr	oda_targetaddr;
- *	u8				oda_lun[8];
- *	struct nfs4_string		oda_systemid;
- *	struct pnfs_osd_object_cred	oda_root_obj_cred;
- *	struct nfs4_string		oda_osdname;
- * };
- */
-
-/* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does
- * not have an xdr_stream
- */
-static __be32 *
-__read_opaque_cred(__be32 *p,
-			      struct pnfs_osd_opaque_cred *opaque_cred)
-{
-	opaque_cred->cred_len = be32_to_cpu(*p++);
-	opaque_cred->cred = p;
-	return p + XDR_QUADLEN(opaque_cred->cred_len);
-}
-
-static __be32 *
-__read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp)
-{
-	p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
-	comp->oc_osd_version = be32_to_cpup(p++);
-	comp->oc_cap_key_sec = be32_to_cpup(p++);
-
-	p = __read_opaque_cred(p, &comp->oc_cap_key);
-	p = __read_opaque_cred(p, &comp->oc_cap);
-	return p;
-}
-
-void pnfs_osd_xdr_decode_deviceaddr(
-	struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p)
-{
-	p = __read_targetid(p, &deviceaddr->oda_targetid);
-
-	p = __read_targetaddr(p, &deviceaddr->oda_targetaddr);
-
-	p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun,
-				    sizeof(deviceaddr->oda_lun));
-
-	p = __read_u8_opaque(p, &deviceaddr->oda_systemid);
-
-	p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred);
-
-	p = __read_u8_opaque(p, &deviceaddr->oda_osdname);
-
-	/* libosd likes this terminated in dbg. It's last, so no problems */
-	deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0;
-}
-
-/*
- * struct pnfs_osd_layoutupdate {
- *	u32	dsu_valid;
- *	s64	dsu_delta;
- *	u32	olu_ioerr_flag;
- * }; xdr size 4 + 8 + 4
- */
-int
-pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
-				 struct pnfs_osd_layoutupdate *lou)
-{
-	__be32 *p = xdr_reserve_space(xdr,  4 + 8 + 4);
-
-	if (!p)
-		return -E2BIG;
-
-	*p++ = cpu_to_be32(lou->dsu_valid);
-	if (lou->dsu_valid)
-		p = xdr_encode_hyper(p, lou->dsu_delta);
-	*p++ = cpu_to_be32(lou->olu_ioerr_flag);
-	return 0;
-}
-
-/*
- * struct pnfs_osd_objid {
- *	struct nfs4_deviceid	oid_device_id;
- *	u64			oid_partition_id;
- *	u64			oid_object_id;
- * }; // xdr size 32 bytes
- */
-static inline __be32 *
-pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id)
-{
-	p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
-				    sizeof(object_id->oid_device_id.data));
-	p = xdr_encode_hyper(p, object_id->oid_partition_id);
-	p = xdr_encode_hyper(p, object_id->oid_object_id);
-
-	return p;
-}
-
-/*
- * struct pnfs_osd_ioerr {
- *	struct pnfs_osd_objid	oer_component;
- *	u64			oer_comp_offset;
- *	u64			oer_comp_length;
- *	u32			oer_iswrite;
- *	u32			oer_errno;
- * }; // xdr size 32 + 24 bytes
- */
-void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr)
-{
-	p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component);
-	p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
-	p = xdr_encode_hyper(p, ioerr->oer_comp_length);
-	*p++ = cpu_to_be32(ioerr->oer_iswrite);
-	*p   = cpu_to_be32(ioerr->oer_errno);
-}
-
-__be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr)
-{
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, 32 + 24);
-	if (unlikely(!p))
-		dprintk("%s: out of xdr space\n", __func__);
-
-	return p;
-}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 6e629b856a00..ad92b401326c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -29,19 +29,6 @@
 static struct kmem_cache *nfs_page_cachep;
 static const struct rpc_call_ops nfs_pgio_common_ops;
 
-static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
-{
-	p->npages = pagecount;
-	if (pagecount <= ARRAY_SIZE(p->page_array))
-		p->pagevec = p->page_array;
-	else {
-		p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
-		if (!p->pagevec)
-			p->npages = 0;
-	}
-	return p->pagevec != NULL;
-}
-
 struct nfs_pgio_mirror *
 nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc)
 {
@@ -115,6 +102,35 @@ nfs_iocounter_wait(struct nfs_lock_context *l_ctx)
 			TASK_KILLABLE);
 }
 
+/**
+ * nfs_async_iocounter_wait - wait on a rpc_waitqueue for I/O
+ * to complete
+ * @task: the rpc_task that should wait
+ * @l_ctx: nfs_lock_context with io_counter to check
+ *
+ * Returns true if there is outstanding I/O to wait on and the
+ * task has been put to sleep.
+ */
+bool
+nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx)
+{
+	struct inode *inode = d_inode(l_ctx->open_context->dentry);
+	bool ret = false;
+
+	if (atomic_read(&l_ctx->io_count) > 0) {
+		rpc_sleep_on(&NFS_SERVER(inode)->uoc_rpcwaitq, task, NULL);
+		ret = true;
+	}
+
+	if (atomic_read(&l_ctx->io_count) == 0) {
+		rpc_wake_up_queued_task(&NFS_SERVER(inode)->uoc_rpcwaitq, task);
+		ret = false;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait);
+
 /*
  * nfs_page_group_lock - lock the head of the page group
  * @req - request in group that is to be locked
@@ -398,8 +414,11 @@ static void nfs_clear_request(struct nfs_page *req)
 		req->wb_page = NULL;
 	}
 	if (l_ctx != NULL) {
-		if (atomic_dec_and_test(&l_ctx->io_count))
+		if (atomic_dec_and_test(&l_ctx->io_count)) {
 			wake_up_atomic_t(&l_ctx->io_count);
+			if (test_bit(NFS_CONTEXT_UNLOCK, &ctx->flags))
+				rpc_wake_up(&NFS_SERVER(d_inode(ctx->dentry))->uoc_rpcwaitq);
+		}
 		nfs_put_lock_context(l_ctx);
 		req->wb_lock_context = NULL;
 	}
@@ -677,7 +696,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 		     const struct nfs_pgio_completion_ops *compl_ops,
 		     const struct nfs_rw_ops *rw_ops,
 		     size_t bsize,
-		     int io_flags)
+		     int io_flags,
+		     gfp_t gfp_flags)
 {
 	struct nfs_pgio_mirror *new;
 	int i;
@@ -701,7 +721,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 		/* until we have a request, we don't have an lseg and no
 		 * idea how many mirrors there will be */
 		new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX,
-			      sizeof(struct nfs_pgio_mirror), GFP_KERNEL);
+			      sizeof(struct nfs_pgio_mirror), gfp_flags);
 		desc->pg_mirrors_dynamic = new;
 		desc->pg_mirrors = new;
 
@@ -754,13 +774,24 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 				*last_page;
 	struct list_head *head = &mirror->pg_list;
 	struct nfs_commit_info cinfo;
+	struct nfs_page_array *pg_array = &hdr->page_array;
 	unsigned int pagecount, pageused;
+	gfp_t gfp_flags = GFP_KERNEL;
 
 	pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
-	if (!nfs_pgarray_set(&hdr->page_array, pagecount)) {
-		nfs_pgio_error(hdr);
-		desc->pg_error = -ENOMEM;
-		return desc->pg_error;
+
+	if (pagecount <= ARRAY_SIZE(pg_array->page_array))
+		pg_array->pagevec = pg_array->page_array;
+	else {
+		if (hdr->rw_mode == FMODE_WRITE)
+			gfp_flags = GFP_NOIO;
+		pg_array->pagevec = kcalloc(pagecount, sizeof(struct page *), gfp_flags);
+		if (!pg_array->pagevec) {
+			pg_array->npages = 0;
+			nfs_pgio_error(hdr);
+			desc->pg_error = -ENOMEM;
+			return desc->pg_error;
+		}
 	}
 
 	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
@@ -1256,8 +1287,10 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
 		mirror = &desc->pg_mirrors[midx];
 		if (!list_empty(&mirror->pg_list)) {
 			prev = nfs_list_entry(mirror->pg_list.prev);
-			if (index != prev->wb_index + 1)
-				nfs_pageio_complete_mirror(desc, midx);
+			if (index != prev->wb_index + 1) {
+				nfs_pageio_complete(desc);
+				break;
+			}
 		}
 	}
 }
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index dd042498ce7c..c383d0913b54 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -322,9 +322,15 @@ pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
 static void
 pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
 {
+	struct pnfs_layout_segment *lseg;
 	lo->plh_return_iomode = 0;
 	lo->plh_return_seq = 0;
 	clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
+		if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
+			continue;
+		pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
+	}
 }
 
 static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
@@ -367,9 +373,9 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
 	struct pnfs_layout_segment *lseg, *next;
 
 	set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
-	pnfs_clear_layoutreturn_info(lo);
 	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
 		pnfs_clear_lseg_state(lseg, lseg_list);
+	pnfs_clear_layoutreturn_info(lo);
 	pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
 	if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
 	    !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
@@ -563,7 +569,6 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg)
 		}
 	}
 }
-EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked);
 
 /*
  * is l2 fully contained in l1?
@@ -728,6 +733,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
 		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
 		spin_unlock(&nfsi->vfs_inode.i_lock);
 		pnfs_free_lseg_list(&tmp_list);
+		nfs_commit_inode(&nfsi->vfs_inode, 0);
 		pnfs_put_layout_hdr(lo);
 	} else
 		spin_unlock(&nfsi->vfs_inode.i_lock);
@@ -1209,7 +1215,6 @@ out:
 	dprintk("<-- %s status: %d\n", __func__, status);
 	return status;
 }
-EXPORT_SYMBOL_GPL(_pnfs_return_layout);
 
 int
 pnfs_commit_and_return_layout(struct inode *inode)
@@ -1991,6 +1996,8 @@ out_forget:
 	spin_unlock(&ino->i_lock);
 	lseg->pls_layout = lo;
 	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+	if (!pnfs_layout_is_valid(lo))
+		nfs_commit_inode(ino, 0);
 	return ERR_PTR(-EAGAIN);
 }
 
@@ -2051,9 +2058,11 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
 	bool return_now = false;
 
 	spin_lock(&inode->i_lock);
+	if (!pnfs_layout_is_valid(lo)) {
+		spin_unlock(&inode->i_lock);
+		return;
+	}
 	pnfs_set_plh_return_info(lo, range.iomode, 0);
-	/* Block LAYOUTGET */
-	set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
 	/*
 	 * mark all matching lsegs so that we are sure to have no live
 	 * segments at hand when sending layoutreturn. See pnfs_put_lseg()
@@ -2075,10 +2084,36 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
 EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
 
 void
+pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
+{
+	if (pgio->pg_lseg == NULL ||
+	    test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags))
+		return;
+	pnfs_put_lseg(pgio->pg_lseg);
+	pgio->pg_lseg = NULL;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
+
+/*
+ * Check for any intersection between the request and the pgio->pg_lseg,
+ * and if none, put this pgio->pg_lseg away.
+ */
+static void
+pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+	if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) {
+		pnfs_put_lseg(pgio->pg_lseg);
+		pgio->pg_lseg = NULL;
+	}
+}
+
+void
 pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 {
 	u64 rd_size = req->wb_bytes;
 
+	pnfs_generic_pg_check_layout(pgio);
+	pnfs_generic_pg_check_range(pgio, req);
 	if (pgio->pg_lseg == NULL) {
 		if (pgio->pg_dreq == NULL)
 			rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
@@ -2109,6 +2144,8 @@ void
 pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
 			   struct nfs_page *req, u64 wb_size)
 {
+	pnfs_generic_pg_check_layout(pgio);
+	pnfs_generic_pg_check_range(pgio, req);
 	if (pgio->pg_lseg == NULL) {
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   req->wb_context,
@@ -2169,16 +2206,10 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
 		seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset,
 				     pgio->pg_lseg->pls_range.length);
 		req_start = req_offset(req);
-		WARN_ON_ONCE(req_start >= seg_end);
+
 		/* start of request is past the last byte of this segment */
-		if (req_start >= seg_end) {
-			/* reference the new lseg */
-			if (pgio->pg_ops->pg_cleanup)
-				pgio->pg_ops->pg_cleanup(pgio);
-			if (pgio->pg_ops->pg_init)
-				pgio->pg_ops->pg_init(pgio, req);
+		if (req_start >= seg_end)
 			return 0;
-		}
 
 		/* adjust 'size' iff there are fewer bytes left in the
 		 * segment than what nfs_generic_pg_test returned */
@@ -2277,8 +2308,20 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc,
 	enum pnfs_try_status trypnfs;
 
 	trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
-	if (trypnfs == PNFS_NOT_ATTEMPTED)
+	switch (trypnfs) {
+	case PNFS_NOT_ATTEMPTED:
 		pnfs_write_through_mds(desc, hdr);
+	case PNFS_ATTEMPTED:
+		break;
+	case PNFS_TRY_AGAIN:
+		/* cleanup hdr and prepare to redo pnfs */
+		if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+			struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
+			list_splice_init(&hdr->pages, &mirror->pg_list);
+			mirror->pg_recoalesce = 1;
+		}
+		hdr->mds_ops->rpc_release(hdr);
+	}
 }
 
 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
@@ -2408,10 +2451,20 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
 	enum pnfs_try_status trypnfs;
 
 	trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
-	if (trypnfs == PNFS_TRY_AGAIN)
-		pnfs_read_resend_pnfs(hdr);
-	if (trypnfs == PNFS_NOT_ATTEMPTED || hdr->task.tk_status)
+	switch (trypnfs) {
+	case PNFS_NOT_ATTEMPTED:
 		pnfs_read_through_mds(desc, hdr);
+	case PNFS_ATTEMPTED:
+		break;
+	case PNFS_TRY_AGAIN:
+		/* cleanup hdr and prepare to redo pnfs */
+		if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+			struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
+			list_splice_init(&hdr->pages, &mirror->pg_list);
+			mirror->pg_recoalesce = 1;
+		}
+		hdr->mds_ops->rpc_release(hdr);
+	}
 }
 
 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 590e1e35781f..99731e3e332f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -173,14 +173,9 @@ struct pnfs_layoutdriver_type {
 			gfp_t gfp_flags);
 
 	int (*prepare_layoutreturn) (struct nfs4_layoutreturn_args *);
-	void (*encode_layoutreturn) (struct xdr_stream *xdr,
-				     const struct nfs4_layoutreturn_args *args);
 
 	void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data);
 	int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args);
-	void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo,
-				     struct xdr_stream *xdr,
-				     const struct nfs4_layoutcommit_args *args);
 	int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args);
 };
 
@@ -239,6 +234,7 @@ void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg);
 
 void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *);
 void unset_pnfs_layoutdriver(struct nfs_server *);
+void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio);
 void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
 int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
 void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
@@ -597,6 +593,16 @@ pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1,
 	return pnfs_is_range_intersecting(l1->offset, end1, l2->offset, end2);
 }
 
+static inline bool
+pnfs_lseg_request_intersecting(struct pnfs_layout_segment *lseg, struct nfs_page *req)
+{
+	u64 seg_last = pnfs_end_offset(lseg->pls_range.offset, lseg->pls_range.length);
+	u64 req_last = req_offset(req) + req->wb_bytes;
+
+	return pnfs_is_range_intersecting(lseg->pls_range.offset, seg_last,
+				req_offset(req), req_last);
+}
+
 extern unsigned int layoutstats_timer;
 
 #ifdef NFS_DEBUG
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 7250b95549ec..d40755a0984b 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -217,7 +217,14 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo,
 	for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
 		if (list_empty(&bucket->committing))
 			continue;
-		data = nfs_commitdata_alloc();
+		/*
+		 * If the layout segment is invalid, then let
+		 * pnfs_generic_retry_commit() clean up the bucket.
+		 */
+		if (bucket->clseg && !pnfs_is_valid_lseg(bucket->clseg) &&
+		    !test_bit(NFS_LSEG_LAYOUTRETURN, &bucket->clseg->pls_flags))
+			break;
+		data = nfs_commitdata_alloc(false);
 		if (!data)
 			break;
 		data->ds_commit_index = i;
@@ -283,16 +290,10 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 	unsigned int nreq = 0;
 
 	if (!list_empty(mds_pages)) {
-		data = nfs_commitdata_alloc();
-		if (data != NULL) {
-			data->ds_commit_index = -1;
-			list_add(&data->pages, &list);
-			nreq++;
-		} else {
-			nfs_retry_commit(mds_pages, NULL, cinfo, 0);
-			pnfs_generic_retry_commit(cinfo, 0);
-			return -ENOMEM;
-		}
+		data = nfs_commitdata_alloc(true);
+		data->ds_commit_index = -1;
+		list_add(&data->pages, &list);
+		nreq++;
 	}
 
 	nreq += pnfs_generic_alloc_ds_commits(cinfo, &list);
@@ -619,7 +620,6 @@ void nfs4_pnfs_v3_ds_connect_unload(void)
 		get_v3_ds_connect = NULL;
 	}
 }
-EXPORT_SYMBOL_GPL(nfs4_pnfs_v3_ds_connect_unload);
 
 static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
 				 struct nfs4_pnfs_ds *ds,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b7bca8303989..9872cf676a50 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -638,7 +638,7 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = file_inode(filp);
 
-	return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
+	return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl, NULL);
 }
 
 /* Helper functions for NFS lock bounds checking */
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index defc9233e985..a8421d9dab6a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -35,7 +35,11 @@ static struct kmem_cache *nfs_rdata_cachep;
 
 static struct nfs_pgio_header *nfs_readhdr_alloc(void)
 {
-	return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
+	struct nfs_pgio_header *p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
+
+	if (p)
+		p->rw_mode = FMODE_READ;
+	return p;
 }
 
 static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
@@ -64,7 +68,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 		pg_ops = server->pnfs_curr_ld->pg_read_ops;
 #endif
 	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
-			server->rsize, 0);
+			server->rsize, 0, GFP_KERNEL);
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
 
@@ -451,7 +455,6 @@ void nfs_destroy_readpagecache(void)
 }
 
 static const struct nfs_rw_ops nfs_rw_read_ops = {
-	.rw_mode		= FMODE_READ,
 	.rw_alloc_header	= nfs_readhdr_alloc,
 	.rw_free_header		= nfs_readhdr_free,
 	.rw_done		= nfs_readpage_done,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2f3822a4a7d5..eceb4eabb064 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2301,7 +2301,7 @@ EXPORT_SYMBOL_GPL(nfs_remount);
 /*
  * Initialise the common bits of the superblock
  */
-inline void nfs_initialise_sb(struct super_block *sb)
+static void nfs_initialise_sb(struct super_block *sb)
 {
 	struct nfs_server *server = NFS_SB(sb);
 
@@ -2348,7 +2348,8 @@ EXPORT_SYMBOL_GPL(nfs_fill_super);
 /*
  * Finish setting up a cloned NFS2/3/4 superblock
  */
-void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info)
+static void nfs_clone_super(struct super_block *sb,
+			    struct nfs_mount_info *mount_info)
 {
 	const struct super_block *old_sb = mount_info->cloned->sb;
 	struct nfs_server *server = NFS_SB(sb);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index cc341fc7fd44..db7ba542559e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -60,14 +60,28 @@ static mempool_t *nfs_wdata_mempool;
 static struct kmem_cache *nfs_cdata_cachep;
 static mempool_t *nfs_commit_mempool;
 
-struct nfs_commit_data *nfs_commitdata_alloc(void)
+struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail)
 {
-	struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
+	struct nfs_commit_data *p;
 
-	if (p) {
-		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&p->pages);
+	if (never_fail)
+		p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
+	else {
+		/* It is OK to do some reclaim, not no safe to wait
+		 * for anything to be returned to the pool.
+		 * mempool_alloc() cannot handle that particular combination,
+		 * so we need two separate attempts.
+		 */
+		p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT);
+		if (!p)
+			p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO |
+					     __GFP_NOWARN | __GFP_NORETRY);
+		if (!p)
+			return NULL;
 	}
+
+	memset(p, 0, sizeof(*p));
+	INIT_LIST_HEAD(&p->pages);
 	return p;
 }
 EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
@@ -82,8 +96,10 @@ static struct nfs_pgio_header *nfs_writehdr_alloc(void)
 {
 	struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
 
-	if (p)
+	if (p) {
 		memset(p, 0, sizeof(*p));
+		p->rw_mode = FMODE_WRITE;
+	}
 	return p;
 }
 
@@ -547,9 +563,21 @@ static void nfs_write_error_remove_page(struct nfs_page *req)
 {
 	nfs_unlock_request(req);
 	nfs_end_page_writeback(req);
-	nfs_release_request(req);
 	generic_error_remove_page(page_file_mapping(req->wb_page),
 				  req->wb_page);
+	nfs_release_request(req);
+}
+
+static bool
+nfs_error_is_fatal_on_server(int err)
+{
+	switch (err) {
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+		return false;
+	}
+	return nfs_error_is_fatal(err);
 }
 
 /*
@@ -557,8 +585,7 @@ static void nfs_write_error_remove_page(struct nfs_page *req)
  * May return an error if the user signalled nfs_wait_on_request().
  */
 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
-				struct page *page, bool nonblock,
-				bool launder)
+				struct page *page, bool nonblock)
 {
 	struct nfs_page *req;
 	int ret = 0;
@@ -574,19 +601,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 	WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
 
 	ret = 0;
+	/* If there is a fatal error that covers this write, just exit */
+	if (nfs_error_is_fatal_on_server(req->wb_context->error))
+		goto out_launder;
+
 	if (!nfs_pageio_add_request(pgio, req)) {
 		ret = pgio->pg_error;
 		/*
-		 * Remove the problematic req upon fatal errors
-		 * in launder case, while other dirty pages can
-		 * still be around until they get flushed.
+		 * Remove the problematic req upon fatal errors on the server
 		 */
 		if (nfs_error_is_fatal(ret)) {
 			nfs_context_set_write_error(req->wb_context, ret);
-			if (launder) {
-				nfs_write_error_remove_page(req);
-				goto out;
-			}
+			if (nfs_error_is_fatal_on_server(ret))
+				goto out_launder;
 		}
 		nfs_redirty_request(req);
 		ret = -EAGAIN;
@@ -595,16 +622,18 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 				NFSIOS_WRITEPAGES, 1);
 out:
 	return ret;
+out_launder:
+	nfs_write_error_remove_page(req);
+	return ret;
 }
 
 static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
-			    struct nfs_pageio_descriptor *pgio, bool launder)
+			    struct nfs_pageio_descriptor *pgio)
 {
 	int ret;
 
 	nfs_pageio_cond_complete(pgio, page_index(page));
-	ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE,
-				   launder);
+	ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
 	if (ret == -EAGAIN) {
 		redirty_page_for_writepage(wbc, page);
 		ret = 0;
@@ -616,8 +645,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
  * Write an mmapped page to the server.
  */
 static int nfs_writepage_locked(struct page *page,
-				struct writeback_control *wbc,
-				bool launder)
+				struct writeback_control *wbc)
 {
 	struct nfs_pageio_descriptor pgio;
 	struct inode *inode = page_file_mapping(page)->host;
@@ -626,7 +654,7 @@ static int nfs_writepage_locked(struct page *page,
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
 	nfs_pageio_init_write(&pgio, inode, 0,
 				false, &nfs_async_write_completion_ops);
-	err = nfs_do_writepage(page, wbc, &pgio, launder);
+	err = nfs_do_writepage(page, wbc, &pgio);
 	nfs_pageio_complete(&pgio);
 	if (err < 0)
 		return err;
@@ -639,7 +667,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	int ret;
 
-	ret = nfs_writepage_locked(page, wbc, false);
+	ret = nfs_writepage_locked(page, wbc);
 	unlock_page(page);
 	return ret;
 }
@@ -648,7 +676,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
 {
 	int ret;
 
-	ret = nfs_do_writepage(page, wbc, data, false);
+	ret = nfs_do_writepage(page, wbc, data);
 	unlock_page(page);
 	return ret;
 }
@@ -1367,7 +1395,7 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 		pg_ops = server->pnfs_curr_ld->pg_write_ops;
 #endif
 	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
-			server->wsize, ioflags);
+			server->wsize, ioflags, GFP_NOIO);
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
 
@@ -1704,50 +1732,14 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
 	if (list_empty(head))
 		return 0;
 
-	data = nfs_commitdata_alloc();
-
-	if (!data)
-		goto out_bad;
+	data = nfs_commitdata_alloc(true);
 
 	/* Set up the argument struct */
 	nfs_init_commit(data, head, NULL, cinfo);
 	atomic_inc(&cinfo->mds->rpcs_out);
 	return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
 				   data->mds_ops, how, 0);
- out_bad:
-	nfs_retry_commit(head, NULL, cinfo, 0);
-	return -ENOMEM;
-}
-
-int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf)
-{
-	struct inode *inode = file_inode(file);
-	struct nfs_open_context *open;
-	struct nfs_commit_info cinfo;
-	struct nfs_page *req;
-	int ret;
-
-	open = get_nfs_open_context(nfs_file_open_context(file));
-	req  = nfs_create_request(open, NULL, NULL, 0, i_size_read(inode));
-	if (IS_ERR(req)) {
-		ret = PTR_ERR(req);
-		goto out_put;
-	}
-
-	nfs_init_cinfo_from_inode(&cinfo, inode);
-
-	memcpy(&req->wb_verf, verf, sizeof(struct nfs_write_verifier));
-	nfs_request_add_commit_list(req, &cinfo);
-	ret = nfs_commit_inode(inode, FLUSH_SYNC);
-	if (ret > 0)
-		ret = 0;
-
-	nfs_free_request(req);
-out_put:
-	put_nfs_open_context(open);
-	return ret;
 }
-EXPORT_SYMBOL_GPL(nfs_commit_file);
 
 /*
  * COMMIT call returned
@@ -1985,7 +1977,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 /*
  * Write back all requests on one page - we do this before reading it.
  */
-int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
+int nfs_wb_page(struct inode *inode, struct page *page)
 {
 	loff_t range_start = page_file_offset(page);
 	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
@@ -2002,7 +1994,7 @@ int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
 	for (;;) {
 		wait_on_page_writeback(page);
 		if (clear_page_dirty_for_io(page)) {
-			ret = nfs_writepage_locked(page, &wbc, launder);
+			ret = nfs_writepage_locked(page, &wbc);
 			if (ret < 0)
 				goto out_error;
 			continue;
@@ -2107,7 +2099,6 @@ void nfs_destroy_writepagecache(void)
 }
 
 static const struct nfs_rw_ops nfs_rw_write_ops = {
-	.rw_mode		= FMODE_WRITE,
 	.rw_alloc_header	= nfs_writehdr_alloc,
 	.rw_free_header		= nfs_writehdr_free,
 	.rw_done		= nfs_writeback_done,
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index fb5213afc854..c862c2489df0 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -219,6 +219,9 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
 	u8 *buf, *d, type, assoc;
 	int error;
 
+	if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q)))
+		return -EINVAL;
+
 	buf = kzalloc(bufflen, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
@@ -229,7 +232,6 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
 		goto out_free_buf;
 	}
 	req = scsi_req(rq);
-	scsi_req_init(rq);
 
 	error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
 	if (error)
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index e71f11b1a180..3bc08c394a3f 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -486,7 +486,7 @@ secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
 #endif
 
 static inline int
-uuid_parse(char **mesg, char *buf, unsigned char **puuid)
+nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid)
 {
 	int len;
 
@@ -586,7 +586,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 			if (strcmp(buf, "fsloc") == 0)
 				err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
 			else if (strcmp(buf, "uuid") == 0)
-				err = uuid_parse(&mesg, buf, &exp.ex_uuid);
+				err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid);
 			else if (strcmp(buf, "secinfo") == 0)
 				err = secinfo_parse(&mesg, buf, &exp);
 			else
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d86031b6ad79..dadb3bf305b2 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1259,7 +1259,8 @@ nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
 		return NULL;
 	}
 
-	if (!(exp->ex_layout_types & (1 << layout_type))) {
+	if (layout_type >= LAYOUT_TYPE_MAX ||
+	    !(exp->ex_layout_types & (1 << layout_type))) {
 		dprintk("%s: layout type %d not supported\n",
 			__func__, layout_type);
 		return NULL;
@@ -1768,6 +1769,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			opdesc->op_get_currentstateid(cstate, &op->u);
 		op->status = opdesc->op_func(rqstp, cstate, &op->u);
 
+		/* Only from SEQUENCE */
+		if (cstate->status == nfserr_replay_cache) {
+			dprintk("%s NFS4.1 replay from cache\n", __func__);
+			status = op->status;
+			goto out;
+		}
 		if (!op->status) {
 			if (opdesc->op_set_currentstateid)
 				opdesc->op_set_currentstateid(cstate, &op->u);
@@ -1778,14 +1785,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			if (need_wrongsec_check(rqstp))
 				op->status = check_nfsd_access(current_fh->fh_export, rqstp);
 		}
-
 encode_op:
-		/* Only from SEQUENCE */
-		if (cstate->status == nfserr_replay_cache) {
-			dprintk("%s NFS4.1 replay from cache\n", __func__);
-			status = op->status;
-			goto out;
-		}
 		if (op->status == nfserr_replay_me) {
 			op->replay = &cstate->replay_owner->so_replay;
 			nfsd4_encode_replay(&resp->xdr, op);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e9ef50addddb..22002fb75a18 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1912,28 +1912,15 @@ static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
 	target->cl_clientid.cl_id = source->cl_clientid.cl_id; 
 }
 
-int strdup_if_nonnull(char **target, char *source)
-{
-	if (source) {
-		*target = kstrdup(source, GFP_KERNEL);
-		if (!*target)
-			return -ENOMEM;
-	} else
-		*target = NULL;
-	return 0;
-}
-
 static int copy_cred(struct svc_cred *target, struct svc_cred *source)
 {
-	int ret;
+	target->cr_principal = kstrdup(source->cr_principal, GFP_KERNEL);
+	target->cr_raw_principal = kstrdup(source->cr_raw_principal,
+								GFP_KERNEL);
+	if ((source->cr_principal && ! target->cr_principal) ||
+	    (source->cr_raw_principal && ! target->cr_raw_principal))
+		return -ENOMEM;
 
-	ret = strdup_if_nonnull(&target->cr_principal, source->cr_principal);
-	if (ret)
-		return ret;
-	ret = strdup_if_nonnull(&target->cr_raw_principal,
-					source->cr_raw_principal);
-	if (ret)
-		return ret;
 	target->cr_flavor = source->cr_flavor;
 	target->cr_uid = source->cr_uid;
 	target->cr_gid = source->cr_gid;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 33017d652b1d..26780d53a6f9 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2831,9 +2831,14 @@ out_acl:
 	}
 #endif /* CONFIG_NFSD_PNFS */
 	if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
-		status = nfsd4_encode_bitmap(xdr, NFSD_SUPPATTR_EXCLCREAT_WORD0,
-						  NFSD_SUPPATTR_EXCLCREAT_WORD1,
-						  NFSD_SUPPATTR_EXCLCREAT_WORD2);
+		u32 supp[3];
+
+		memcpy(supp, nfsd_suppattrs[minorversion], sizeof(supp));
+		supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0;
+		supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1;
+		supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2;
+
+		status = nfsd4_encode_bitmap(xdr, supp[0], supp[1], supp[2]);
 		if (status)
 			goto out;
 	}
@@ -4119,8 +4124,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
 		struct nfsd4_getdeviceinfo *gdev)
 {
 	struct xdr_stream *xdr = &resp->xdr;
-	const struct nfsd4_layout_ops *ops =
-		nfsd4_layout_ops[gdev->gd_layout_type];
+	const struct nfsd4_layout_ops *ops;
 	u32 starting_len = xdr->buf->len, needed_len;
 	__be32 *p;
 
@@ -4137,6 +4141,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 	/* If maxcount is 0 then just update notifications */
 	if (gdev->gd_maxcount != 0) {
+		ops = nfsd4_layout_ops[gdev->gd_layout_type];
 		nfserr = ops->encode_getdeviceinfo(xdr, gdev);
 		if (nfserr) {
 			/*
@@ -4189,8 +4194,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
 		struct nfsd4_layoutget *lgp)
 {
 	struct xdr_stream *xdr = &resp->xdr;
-	const struct nfsd4_layout_ops *ops =
-		nfsd4_layout_ops[lgp->lg_layout_type];
+	const struct nfsd4_layout_ops *ops;
 	__be32 *p;
 
 	dprintk("%s: err %d\n", __func__, nfserr);
@@ -4213,6 +4217,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
 	*p++ = cpu_to_be32(lgp->lg_seg.iomode);
 	*p++ = cpu_to_be32(lgp->lg_layout_type);
 
+	ops = nfsd4_layout_ops[lgp->lg_layout_type];
 	nfserr = ops->encode_layoutget(xdr, lgp);
 out:
 	kfree(lgp->lg_content);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9aaf6ca77569..2be32955d7f2 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -94,6 +94,12 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 	err = follow_down(&path);
 	if (err < 0)
 		goto out;
+	if (path.mnt == exp->ex_path.mnt && path.dentry == dentry &&
+	    nfsd_mountpoint(dentry, exp) == 2) {
+		/* This is only a mountpoint in some other namespace */
+		path_put(&path);
+		goto out;
+	}
 
 	exp2 = rqst_exp_get_by_name(rqstp, &path);
 	if (IS_ERR(exp2)) {
@@ -167,16 +173,26 @@ static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, st
 /*
  * For nfsd purposes, we treat V4ROOT exports as though there was an
  * export at *every* directory.
+ * We return:
+ * '1' if this dentry *must* be an export point,
+ * '2' if it might be, if there is really a mount here, and
+ * '0' if there is no chance of an export point here.
  */
 int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
 {
-	if (d_mountpoint(dentry))
+	if (!d_inode(dentry))
+		return 0;
+	if (exp->ex_flags & NFSEXP_V4ROOT)
 		return 1;
 	if (nfsd4_is_junction(dentry))
 		return 1;
-	if (!(exp->ex_flags & NFSEXP_V4ROOT))
-		return 0;
-	return d_inode(dentry) != NULL;
+	if (d_mountpoint(dentry))
+		/*
+		 * Might only be a mountpoint in a different namespace,
+		 * but we need to check.
+		 */
+		return 2;
+	return 0;
 }
 
 __be32
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 6f87b2ac1aeb..e73c86d9855c 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -338,7 +338,7 @@ static void nilfs_end_bio_write(struct bio *bio)
 {
 	struct nilfs_segment_buffer *segbuf = bio->bi_private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		atomic_inc(&segbuf->sb_err);
 
 	bio_put(bio);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index febed1217b3f..70ded52dc1dd 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2161,7 +2161,7 @@ void nilfs_flush_segment(struct super_block *sb, ino_t ino)
 }
 
 struct nilfs_segctor_wait_request {
-	wait_queue_t	wq;
+	wait_queue_entry_t	wq;
 	__u32		seq;
 	int		err;
 	atomic_t	done;
@@ -2206,8 +2206,7 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
 	unsigned long flags;
 
 	spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
-	list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list,
-				 wq.task_list) {
+	list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) {
 		if (!atomic_read(&wrq->done) &&
 		    nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
 			wrq->err = err;
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 358258364616..4690cd75d8d7 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -159,7 +159,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
 					PTR_ERR(dent_inode));
 		kfree(name);
 		/* Return the error code. */
-		return (struct dentry *)dent_inode;
+		return ERR_CAST(dent_inode);
 	}
 	/* It is guaranteed that @name is no longer allocated at this point. */
 	if (MREF_ERR(mref) == -ENOENT) {
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 0da0332725aa..ffe003982d95 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -516,9 +516,9 @@ static void o2hb_bio_end_io(struct bio *bio)
 {
 	struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
 
-	if (bio->bi_error) {
-		mlog(ML_ERROR, "IO Error %d\n", bio->bi_error);
-		wc->wc_error = bio->bi_error;
+	if (bio->bi_status) {
+		mlog(ML_ERROR, "IO Error %d\n", bio->bi_status);
+		wc->wc_error = blk_status_to_errno(bio->bi_status);
 	}
 
 	o2hb_bio_wait_dec(wc, 1);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 3b7c937a36b5..4689940a953c 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2591,6 +2591,10 @@ void ocfs2_inode_unlock_tracker(struct inode *inode,
 	struct ocfs2_lock_res *lockres;
 
 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
+	/* had_lock means that the currect process already takes the cluster
+	 * lock previously. If had_lock is 1, we have nothing to do here, and
+	 * it will get unlocked where we got the lock.
+	 */
 	if (!had_lock) {
 		ocfs2_remove_holder(lockres, oh);
 		ocfs2_inode_unlock(inode, ex);
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 827fc9809bc2..9f88188060db 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -119,7 +119,7 @@ check_err:
 
 	if (IS_ERR(inode)) {
 		mlog_errno(PTR_ERR(inode));
-		result = (void *)inode;
+		result = ERR_CAST(inode);
 		goto bail;
 	}
 
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index ca1646fbcaef..83005f486451 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2062,7 +2062,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
 	bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
 	sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
-	memcpy(sb->s_uuid, di->id2.i_super.s_uuid,
+	memcpy(&sb->s_uuid, di->id2.i_super.s_uuid,
 	       sizeof(di->id2.i_super.s_uuid));
 
 	osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3c5384d9b3a5..f70c3778d600 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1328,20 +1328,21 @@ static int ocfs2_xattr_get(struct inode *inode,
 			   void *buffer,
 			   size_t buffer_size)
 {
-	int ret;
+	int ret, had_lock;
 	struct buffer_head *di_bh = NULL;
+	struct ocfs2_lock_holder oh;
 
-	ret = ocfs2_inode_lock(inode, &di_bh, 0);
-	if (ret < 0) {
-		mlog_errno(ret);
-		return ret;
+	had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
+	if (had_lock < 0) {
+		mlog_errno(had_lock);
+		return had_lock;
 	}
 	down_read(&OCFS2_I(inode)->ip_xattr_sem);
 	ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
 				     name, buffer, buffer_size);
 	up_read(&OCFS2_I(inode)->ip_xattr_sem);
 
-	ocfs2_inode_unlock(inode, 0);
+	ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
 
 	brelse(di_bh);
 
@@ -3537,11 +3538,12 @@ int ocfs2_xattr_set(struct inode *inode,
 {
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_dinode *di;
-	int ret, credits, ref_meta = 0, ref_credits = 0;
+	int ret, credits, had_lock, ref_meta = 0, ref_credits = 0;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct inode *tl_inode = osb->osb_tl_inode;
 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
 	struct ocfs2_refcount_tree *ref_tree = NULL;
+	struct ocfs2_lock_holder oh;
 
 	struct ocfs2_xattr_info xi = {
 		.xi_name_index = name_index,
@@ -3572,8 +3574,9 @@ int ocfs2_xattr_set(struct inode *inode,
 		return -ENOMEM;
 	}
 
-	ret = ocfs2_inode_lock(inode, &di_bh, 1);
-	if (ret < 0) {
+	had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh);
+	if (had_lock < 0) {
+		ret = had_lock;
 		mlog_errno(ret);
 		goto cleanup_nolock;
 	}
@@ -3670,7 +3673,7 @@ cleanup:
 		if (ret)
 			mlog_errno(ret);
 	}
-	ocfs2_inode_unlock(inode, 1);
+	ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
 cleanup_nolock:
 	brelse(di_bh);
 	brelse(xbs.xattr_bh);
diff --git a/fs/open.c b/fs/open.c
index 373787afd638..3fe0c4aa7d27 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -193,7 +193,8 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 		goto out_putf;
 
 	error = -EPERM;
-	if (IS_APPEND(inode))
+	/* Check IS_APPEND on real upper inode */
+	if (IS_APPEND(file_inode(f.file)))
 		goto out_putf;
 
 	sb_start_write(inode->i_sb);
@@ -459,20 +460,17 @@ out:
 SYSCALL_DEFINE1(fchdir, unsigned int, fd)
 {
 	struct fd f = fdget_raw(fd);
-	struct inode *inode;
-	int error = -EBADF;
+	int error;
 
 	error = -EBADF;
 	if (!f.file)
 		goto out;
 
-	inode = file_inode(f.file);
-
 	error = -ENOTDIR;
-	if (!S_ISDIR(inode->i_mode))
+	if (!d_can_lookup(f.file->f_path.dentry))
 		goto out_putf;
 
-	error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
+	error = inode_permission(file_inode(f.file), MAY_EXEC | MAY_CHDIR);
 	if (!error)
 		set_fs_pwd(current->fs, &f.file->f_path);
 out_putf:
@@ -761,6 +759,7 @@ static int do_dentry_open(struct file *f,
 	     likely(f->f_op->write || f->f_op->write_iter))
 		f->f_mode |= FMODE_CAN_WRITE;
 
+	f->f_write_hint = WRITE_LIFE_NOT_SET;
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 83b506020718..038d67545d9f 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -46,8 +46,8 @@ static void run_down(struct slot_map *m)
 	spin_lock(&m->q.lock);
 	if (m->c != -1) {
 		for (;;) {
-			if (likely(list_empty(&wait.task_list)))
-				__add_wait_queue_tail(&m->q, &wait);
+			if (likely(list_empty(&wait.entry)))
+				__add_wait_queue_entry_tail(&m->q, &wait);
 			set_current_state(TASK_UNINTERRUPTIBLE);
 
 			if (m->c == -1)
@@ -84,8 +84,8 @@ static int wait_for_free(struct slot_map *m)
 
 	do {
 		long n = left, t;
-		if (likely(list_empty(&wait.task_list)))
-			__add_wait_queue_tail_exclusive(&m->q, &wait);
+		if (likely(list_empty(&wait.entry)))
+			__add_wait_queue_entry_tail_exclusive(&m->q, &wait);
 		set_current_state(TASK_INTERRUPTIBLE);
 
 		if (m->c > 0)
@@ -108,8 +108,8 @@ static int wait_for_free(struct slot_map *m)
 			left = -EINTR;
 	} while (left > 0);
 
-	if (!list_empty(&wait.task_list))
-		list_del(&wait.task_list);
+	if (!list_empty(&wait.entry))
+		list_del(&wait.entry);
 	else if (left <= 0 && waitqueue_active(&m->q))
 		__wake_up_locked_key(&m->q, TASK_INTERRUPTIBLE, NULL);
 	__set_current_state(TASK_RUNNING);
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 0daac5112f7a..c0c9683934b7 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -1,5 +1,6 @@
 config OVERLAY_FS
 	tristate "Overlay filesystem support"
+	select EXPORTFS
 	help
 	  An overlay filesystem combines two filesystems - an 'upper' filesystem
 	  and a 'lower' filesystem.  When a name exists in both filesystems, the
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 906ea6c93260..e5869f91b3ab 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -20,6 +20,7 @@
 #include <linux/namei.h>
 #include <linux/fdtable.h>
 #include <linux/ratelimit.h>
+#include <linux/exportfs.h>
 #include "overlayfs.h"
 #include "ovl_entry.h"
 
@@ -232,6 +233,82 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 	return err;
 }
 
+static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
+{
+	struct ovl_fh *fh;
+	int fh_type, fh_len, dwords;
+	void *buf;
+	int buflen = MAX_HANDLE_SZ;
+
+	buf = kmalloc(buflen, GFP_TEMPORARY);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * We encode a non-connectable file handle for non-dir, because we
+	 * only need to find the lower inode number and we don't want to pay
+	 * the price or reconnecting the dentry.
+	 */
+	dwords = buflen >> 2;
+	fh_type = exportfs_encode_fh(lower, buf, &dwords, 0);
+	buflen = (dwords << 2);
+
+	fh = ERR_PTR(-EIO);
+	if (WARN_ON(fh_type < 0) ||
+	    WARN_ON(buflen > MAX_HANDLE_SZ) ||
+	    WARN_ON(fh_type == FILEID_INVALID))
+		goto out;
+
+	BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255);
+	fh_len = offsetof(struct ovl_fh, fid) + buflen;
+	fh = kmalloc(fh_len, GFP_KERNEL);
+	if (!fh) {
+		fh = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	fh->version = OVL_FH_VERSION;
+	fh->magic = OVL_FH_MAGIC;
+	fh->type = fh_type;
+	fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
+	fh->len = fh_len;
+	fh->uuid = *uuid;
+	memcpy(fh->fid, buf, buflen);
+
+out:
+	kfree(buf);
+	return fh;
+}
+
+static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
+			  struct dentry *upper)
+{
+	struct super_block *sb = lower->d_sb;
+	const struct ovl_fh *fh = NULL;
+	int err;
+
+	/*
+	 * When lower layer doesn't support export operations store a 'null' fh,
+	 * so we can use the overlay.origin xattr to distignuish between a copy
+	 * up and a pure upper inode.
+	 */
+	if (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
+	    !uuid_is_null(&sb->s_uuid)) {
+		fh = ovl_encode_fh(lower, &sb->s_uuid);
+		if (IS_ERR(fh))
+			return PTR_ERR(fh);
+	}
+
+	/*
+	 * Do not fail when upper doesn't support xattrs.
+	 */
+	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
+				 fh ? fh->len : 0, 0);
+	kfree(fh);
+
+	return err;
+}
+
 static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 			      struct dentry *dentry, struct path *lowerpath,
 			      struct kstat *stat, const char *link,
@@ -252,15 +329,9 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 		.link = link
 	};
 
-	upper = lookup_one_len(dentry->d_name.name, upperdir,
-			       dentry->d_name.len);
-	err = PTR_ERR(upper);
-	if (IS_ERR(upper))
-		goto out;
-
 	err = security_inode_copy_up(dentry, &new_creds);
 	if (err < 0)
-		goto out1;
+		goto out;
 
 	if (new_creds)
 		old_creds = override_creds(new_creds);
@@ -268,13 +339,14 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	if (tmpfile)
 		temp = ovl_do_tmpfile(upperdir, stat->mode);
 	else
-		temp = ovl_lookup_temp(workdir, dentry);
-	err = PTR_ERR(temp);
-	if (IS_ERR(temp))
-		goto out1;
-
+		temp = ovl_lookup_temp(workdir);
 	err = 0;
-	if (!tmpfile)
+	if (IS_ERR(temp)) {
+		err = PTR_ERR(temp);
+		temp = NULL;
+	}
+
+	if (!err && !tmpfile)
 		err = ovl_create_real(wdir, temp, &cattr, NULL, true);
 
 	if (new_creds) {
@@ -283,7 +355,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	}
 
 	if (err)
-		goto out2;
+		goto out;
 
 	if (S_ISREG(stat->mode)) {
 		struct path upperpath;
@@ -316,6 +388,27 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	if (err)
 		goto out_cleanup;
 
+	/*
+	 * Store identifier of lower inode in upper inode xattr to
+	 * allow lookup of the copy up origin inode.
+	 *
+	 * Don't set origin when we are breaking the association with a lower
+	 * hard link.
+	 */
+	if (S_ISDIR(stat->mode) || stat->nlink == 1) {
+		err = ovl_set_origin(dentry, lowerpath->dentry, temp);
+		if (err)
+			goto out_cleanup;
+	}
+
+	upper = lookup_one_len(dentry->d_name.name, upperdir,
+			       dentry->d_name.len);
+	if (IS_ERR(upper)) {
+		err = PTR_ERR(upper);
+		upper = NULL;
+		goto out_cleanup;
+	}
+
 	if (tmpfile)
 		err = ovl_do_link(temp, udir, upper, true);
 	else
@@ -329,17 +422,15 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 
 	/* Restore timestamps on parent (best effort) */
 	ovl_set_timestamps(upperdir, pstat);
-out2:
+out:
 	dput(temp);
-out1:
 	dput(upper);
-out:
 	return err;
 
 out_cleanup:
 	if (!tmpfile)
 		ovl_cleanup(wdir, temp);
-	goto out2;
+	goto out;
 }
 
 /*
@@ -372,6 +463,11 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	ovl_path_upper(parent, &parentpath);
 	upperdir = parentpath.dentry;
 
+	/* Mark parent "impure" because it may now contain non-pure upper */
+	err = ovl_set_impure(parent, upperdir);
+	if (err)
+		return err;
+
 	err = vfs_getattr(&parentpath, &pstat,
 			  STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
 	if (err)
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 6515796460df..a63a71656e9b 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -41,7 +41,7 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
 	}
 }
 
-struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
+struct dentry *ovl_lookup_temp(struct dentry *workdir)
 {
 	struct dentry *temp;
 	char name[20];
@@ -68,7 +68,7 @@ static struct dentry *ovl_whiteout(struct dentry *workdir,
 	struct dentry *whiteout;
 	struct inode *wdir = workdir->d_inode;
 
-	whiteout = ovl_lookup_temp(workdir, dentry);
+	whiteout = ovl_lookup_temp(workdir);
 	if (IS_ERR(whiteout))
 		return whiteout;
 
@@ -127,45 +127,26 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry,
 	return err;
 }
 
-static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
+static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
+			       int xerr)
 {
 	int err;
 
-	err = ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0);
+	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
 	if (!err)
 		ovl_dentry_set_opaque(dentry);
 
 	return err;
 }
 
-static int ovl_dir_getattr(const struct path *path, struct kstat *stat,
-			   u32 request_mask, unsigned int flags)
+static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
 {
-	struct dentry *dentry = path->dentry;
-	int err;
-	enum ovl_path_type type;
-	struct path realpath;
-	const struct cred *old_cred;
-
-	type = ovl_path_real(dentry, &realpath);
-	old_cred = ovl_override_creds(dentry->d_sb);
-	err = vfs_getattr(&realpath, stat, request_mask, flags);
-	revert_creds(old_cred);
-	if (err)
-		return err;
-
-	stat->dev = dentry->d_sb->s_dev;
-	stat->ino = dentry->d_inode->i_ino;
-
 	/*
-	 * It's probably not worth it to count subdirs to get the
-	 * correct link count.  nlink=1 seems to pacify 'find' and
-	 * other utilities.
+	 * Fail with -EIO when trying to create opaque dir and upper doesn't
+	 * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to
+	 * return a specific error for noxattr case.
 	 */
-	if (OVL_TYPE_MERGE(type))
-		stat->nlink = 1;
-
-	return 0;
+	return ovl_set_opaque_xerr(dentry, upperdentry, -EIO);
 }
 
 /* Common operations required to be done after creation of file on upper */
@@ -182,6 +163,9 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
 		inc_nlink(inode);
 	}
 	d_instantiate(dentry, inode);
+	/* Force lookup of new upper hardlink to find its lower */
+	if (hardlink)
+		d_drop(dentry);
 }
 
 static bool ovl_type_merge(struct dentry *dentry)
@@ -189,6 +173,11 @@ static bool ovl_type_merge(struct dentry *dentry)
 	return OVL_TYPE_MERGE(ovl_path_type(dentry));
 }
 
+static bool ovl_type_origin(struct dentry *dentry)
+{
+	return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
+}
+
 static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
 			    struct cattr *attr, struct dentry *hardlink)
 {
@@ -210,7 +199,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
 	if (err)
 		goto out_dput;
 
-	if (ovl_type_merge(dentry->d_parent)) {
+	if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
 		/* Setting opaque here is just an optimization, allow to fail */
 		ovl_set_opaque(dentry, newdentry);
 	}
@@ -277,7 +266,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
 	if (upper->d_parent->d_inode != udir)
 		goto out_unlock;
 
-	opaquedir = ovl_lookup_temp(workdir, dentry);
+	opaquedir = ovl_lookup_temp(workdir);
 	err = PTR_ERR(opaquedir);
 	if (IS_ERR(opaquedir))
 		goto out_unlock;
@@ -409,7 +398,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
 	if (err)
 		goto out;
 
-	newdentry = ovl_lookup_temp(workdir, dentry);
+	newdentry = ovl_lookup_temp(workdir);
 	err = PTR_ERR(newdentry);
 	if (IS_ERR(newdentry))
 		goto out_unlock;
@@ -873,18 +862,16 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
 	if (IS_ERR(redirect))
 		return PTR_ERR(redirect);
 
-	err = ovl_do_setxattr(ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT,
-			      redirect, strlen(redirect), 0);
+	err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry),
+				 OVL_XATTR_REDIRECT,
+				 redirect, strlen(redirect), -EXDEV);
 	if (!err) {
 		spin_lock(&dentry->d_lock);
 		ovl_dentry_set_redirect(dentry, redirect);
 		spin_unlock(&dentry->d_lock);
 	} else {
 		kfree(redirect);
-		if (err == -EOPNOTSUPP)
-			ovl_clear_redirect_dir(dentry->d_sb);
-		else
-			pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
+		pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
 		/* Fall back to userspace copy-up */
 		err = -EXDEV;
 	}
@@ -970,6 +957,25 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 	old_upperdir = ovl_dentry_upper(old->d_parent);
 	new_upperdir = ovl_dentry_upper(new->d_parent);
 
+	if (!samedir) {
+		/*
+		 * When moving a merge dir or non-dir with copy up origin into
+		 * a new parent, we are marking the new parent dir "impure".
+		 * When ovl_iterate() iterates an "impure" upper dir, it will
+		 * lookup the origin inodes of the entries to fill d_ino.
+		 */
+		if (ovl_type_origin(old)) {
+			err = ovl_set_impure(new->d_parent, new_upperdir);
+			if (err)
+				goto out_revert_creds;
+		}
+		if (!overwrite && ovl_type_origin(new)) {
+			err = ovl_set_impure(old->d_parent, old_upperdir);
+			if (err)
+				goto out_revert_creds;
+		}
+	}
+
 	trap = lock_rename(new_upperdir, old_upperdir);
 
 	olddentry = lookup_one_len(old->d_name.name, old_upperdir,
@@ -1019,7 +1025,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 		if (ovl_type_merge_or_lower(old))
 			err = ovl_set_redirect(old, samedir);
 		else if (!old_opaque && ovl_type_merge(new->d_parent))
-			err = ovl_set_opaque(old, olddentry);
+			err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
 		if (err)
 			goto out_dput;
 	}
@@ -1027,7 +1033,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 		if (ovl_type_merge_or_lower(new))
 			err = ovl_set_redirect(new, samedir);
 		else if (!new_opaque && ovl_type_merge(old->d_parent))
-			err = ovl_set_opaque(new, newdentry);
+			err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
 		if (err)
 			goto out_dput;
 	}
@@ -1070,7 +1076,7 @@ const struct inode_operations ovl_dir_inode_operations = {
 	.create		= ovl_create,
 	.mknod		= ovl_mknod,
 	.permission	= ovl_permission,
-	.getattr	= ovl_dir_getattr,
+	.getattr	= ovl_getattr,
 	.listxattr	= ovl_listxattr,
 	.get_acl	= ovl_get_acl,
 	.update_time	= ovl_update_time,
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index f8fe6bf2036d..d613e2c41242 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -57,18 +57,78 @@ out:
 	return err;
 }
 
-static int ovl_getattr(const struct path *path, struct kstat *stat,
-		       u32 request_mask, unsigned int flags)
+int ovl_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int flags)
 {
 	struct dentry *dentry = path->dentry;
+	enum ovl_path_type type;
 	struct path realpath;
 	const struct cred *old_cred;
+	bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
 	int err;
 
-	ovl_path_real(dentry, &realpath);
+	type = ovl_path_real(dentry, &realpath);
 	old_cred = ovl_override_creds(dentry->d_sb);
 	err = vfs_getattr(&realpath, stat, request_mask, flags);
+	if (err)
+		goto out;
+
+	/*
+	 * When all layers are on the same fs, all real inode number are
+	 * unique, so we use the overlay st_dev, which is friendly to du -x.
+	 *
+	 * We also use st_ino of the copy up origin, if we know it.
+	 * This guaranties constant st_dev/st_ino across copy up.
+	 *
+	 * If filesystem supports NFS export ops, this also guaranties
+	 * persistent st_ino across mount cycle.
+	 */
+	if (ovl_same_sb(dentry->d_sb)) {
+		if (OVL_TYPE_ORIGIN(type)) {
+			struct kstat lowerstat;
+			u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
+
+			ovl_path_lower(dentry, &realpath);
+			err = vfs_getattr(&realpath, &lowerstat,
+					  lowermask, flags);
+			if (err)
+				goto out;
+
+			WARN_ON_ONCE(stat->dev != lowerstat.dev);
+			/*
+			 * Lower hardlinks are broken on copy up to different
+			 * upper files, so we cannot use the lower origin st_ino
+			 * for those different files, even for the same fs case.
+			 */
+			if (is_dir || lowerstat.nlink == 1)
+				stat->ino = lowerstat.ino;
+		}
+		stat->dev = dentry->d_sb->s_dev;
+	} else if (is_dir) {
+		/*
+		 * If not all layers are on the same fs the pair {real st_ino;
+		 * overlay st_dev} is not unique, so use the non persistent
+		 * overlay st_ino.
+		 *
+		 * Always use the overlay st_dev for directories, so 'find
+		 * -xdev' will scan the entire overlay mount and won't cross the
+		 * overlay mount boundaries.
+		 */
+		stat->dev = dentry->d_sb->s_dev;
+		stat->ino = dentry->d_inode->i_ino;
+	}
+
+	/*
+	 * It's probably not worth it to count subdirs to get the
+	 * correct link count.  nlink=1 seems to pacify 'find' and
+	 * other utilities.
+	 */
+	if (is_dir && OVL_TYPE_MERGE(type))
+		stat->nlink = 1;
+
+out:
 	revert_creds(old_cred);
+
 	return err;
 }
 
@@ -180,6 +240,16 @@ int ovl_xattr_get(struct dentry *dentry, const char *name,
 	return res;
 }
 
+static bool ovl_can_list(const char *s)
+{
+	/* List all non-trusted xatts */
+	if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
+		return true;
+
+	/* Never list trusted.overlay, list other trusted for superuser only */
+	return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN);
+}
+
 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
 {
 	struct dentry *realdentry = ovl_dentry_real(dentry);
@@ -203,7 +273,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
 			return -EIO;
 
 		len -= slen;
-		if (ovl_is_private_xattr(s)) {
+		if (!ovl_can_list(s)) {
 			res -= slen;
 			memmove(s, s + slen, len);
 		} else {
@@ -303,6 +373,41 @@ static const struct inode_operations ovl_symlink_inode_operations = {
 	.update_time	= ovl_update_time,
 };
 
+/*
+ * It is possible to stack overlayfs instance on top of another
+ * overlayfs instance as lower layer. We need to annonate the
+ * stackable i_mutex locks according to stack level of the super
+ * block instance. An overlayfs instance can never be in stack
+ * depth 0 (there is always a real fs below it).  An overlayfs
+ * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth].
+ *
+ * For example, here is a snip from /proc/lockdep_chains after
+ * dir_iterate of nested overlayfs:
+ *
+ * [...] &ovl_i_mutex_dir_key[depth]   (stack_depth=2)
+ * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
+ * [...] &type->i_mutex_dir_key        (stack_depth=0)
+ */
+#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH
+
+static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
+{
+#ifdef CONFIG_LOCKDEP
+	static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING];
+	static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING];
+
+	int depth = inode->i_sb->s_stack_depth - 1;
+
+	if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING))
+		depth = 0;
+
+	if (S_ISDIR(inode->i_mode))
+		lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]);
+	else
+		lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]);
+#endif
+}
+
 static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
 {
 	inode->i_ino = get_next_ino();
@@ -312,6 +417,8 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
 	inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
 #endif
 
+	ovl_lockdep_annotate_inode_mutex_key(inode);
+
 	switch (mode & S_IFMT) {
 	case S_IFREG:
 		inode->i_op = &ovl_file_inode_operations;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index b8b077821fb0..de0d4f742f36 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -12,6 +12,8 @@
 #include <linux/namei.h>
 #include <linux/xattr.h>
 #include <linux/ratelimit.h>
+#include <linux/mount.h>
+#include <linux/exportfs.h>
 #include "overlayfs.h"
 #include "ovl_entry.h"
 
@@ -81,19 +83,93 @@ invalid:
 	goto err_free;
 }
 
-static bool ovl_is_opaquedir(struct dentry *dentry)
+static int ovl_acceptable(void *ctx, struct dentry *dentry)
+{
+	return 1;
+}
+
+static struct dentry *ovl_get_origin(struct dentry *dentry,
+				     struct vfsmount *mnt)
 {
 	int res;
-	char val;
+	struct ovl_fh *fh = NULL;
+	struct dentry *origin = NULL;
+	int bytes;
 
-	if (!d_is_dir(dentry))
-		return false;
+	res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
+	if (res < 0) {
+		if (res == -ENODATA || res == -EOPNOTSUPP)
+			return NULL;
+		goto fail;
+	}
+	/* Zero size value means "copied up but origin unknown" */
+	if (res == 0)
+		return NULL;
 
-	res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1);
-	if (res == 1 && val == 'y')
-		return true;
+	fh  = kzalloc(res, GFP_TEMPORARY);
+	if (!fh)
+		return ERR_PTR(-ENOMEM);
+
+	res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res);
+	if (res < 0)
+		goto fail;
+
+	if (res < sizeof(struct ovl_fh) || res < fh->len)
+		goto invalid;
+
+	if (fh->magic != OVL_FH_MAGIC)
+		goto invalid;
+
+	/* Treat larger version and unknown flags as "origin unknown" */
+	if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
+		goto out;
+
+	/* Treat endianness mismatch as "origin unknown" */
+	if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
+	    (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
+		goto out;
+
+	bytes = (fh->len - offsetof(struct ovl_fh, fid));
+
+	/*
+	 * Make sure that the stored uuid matches the uuid of the lower
+	 * layer where file handle will be decoded.
+	 */
+	if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
+		goto out;
 
-	return false;
+	origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
+				    bytes >> 2, (int)fh->type,
+				    ovl_acceptable, NULL);
+	if (IS_ERR(origin)) {
+		/* Treat stale file handle as "origin unknown" */
+		if (origin == ERR_PTR(-ESTALE))
+			origin = NULL;
+		goto out;
+	}
+
+	if (ovl_dentry_weird(origin) ||
+	    ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) {
+		dput(origin);
+		origin = NULL;
+		goto invalid;
+	}
+
+out:
+	kfree(fh);
+	return origin;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
+	goto out;
+invalid:
+	pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+	goto out;
+}
+
+static bool ovl_is_opaquedir(struct dentry *dentry)
+{
+	return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
 }
 
 static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
@@ -192,6 +268,45 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
 	return 0;
 }
 
+
+static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry,
+			    struct path **stackp, unsigned int *ctrp)
+{
+	struct super_block *same_sb = ovl_same_sb(dentry->d_sb);
+	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
+	struct vfsmount *mnt;
+	struct dentry *origin;
+
+	if (!same_sb || !roe->numlower)
+		return 0;
+
+       /*
+	* Since all layers are on the same fs, we use the first layer for
+	* decoding the file handle.  We may get a disconnected dentry,
+	* which is fine, because we only need to hold the origin inode in
+	* cache and use its inode number.  We may even get a connected dentry,
+	* that is not under the first layer's root.  That is also fine for
+	* using it's inode number - it's the same as if we held a reference
+	* to a dentry in first layer that was moved under us.
+	*/
+	mnt = roe->lowerstack[0].mnt;
+
+	origin = ovl_get_origin(upperdentry, mnt);
+	if (IS_ERR_OR_NULL(origin))
+		return PTR_ERR(origin);
+
+	BUG_ON(*stackp || *ctrp);
+	*stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY);
+	if (!*stackp) {
+		dput(origin);
+		return -ENOMEM;
+	}
+	**stackp = (struct path) { .dentry = origin, .mnt = mnt };
+	*ctrp = 1;
+
+	return 0;
+}
+
 /*
  * Returns next layer in stack starting from top.
  * Returns -1 if this is the last layer.
@@ -220,11 +335,13 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	const struct cred *old_cred;
 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
+	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
 	struct path *stack = NULL;
 	struct dentry *upperdir, *upperdentry = NULL;
 	unsigned int ctr = 0;
 	struct inode *inode = NULL;
 	bool upperopaque = false;
+	bool upperimpure = false;
 	char *upperredirect = NULL;
 	struct dentry *this;
 	unsigned int i;
@@ -253,15 +370,24 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			err = -EREMOTE;
 			goto out;
 		}
+		if (upperdentry && !d.is_dir) {
+			BUG_ON(!d.stop || d.redirect);
+			err = ovl_check_origin(dentry, upperdentry,
+					       &stack, &ctr);
+			if (err)
+				goto out;
+		}
 
 		if (d.redirect) {
 			upperredirect = kstrdup(d.redirect, GFP_KERNEL);
 			if (!upperredirect)
 				goto out_put_upper;
 			if (d.redirect[0] == '/')
-				poe = dentry->d_sb->s_root->d_fsdata;
+				poe = roe;
 		}
 		upperopaque = d.opaque;
+		if (upperdentry && d.is_dir)
+			upperimpure = ovl_is_impuredir(upperdentry);
 	}
 
 	if (!d.stop && poe->numlower) {
@@ -290,10 +416,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		if (d.stop)
 			break;
 
-		if (d.redirect &&
-		    d.redirect[0] == '/' &&
-		    poe != dentry->d_sb->s_root->d_fsdata) {
-			poe = dentry->d_sb->s_root->d_fsdata;
+		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
+			poe = roe;
 
 			/* Find the current layer on the root dentry */
 			for (i = 0; i < poe->numlower; i++)
@@ -332,6 +456,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 
 	revert_creds(old_cred);
 	oe->opaque = upperopaque;
+	oe->impure = upperimpure;
 	oe->redirect = upperredirect;
 	oe->__upperdentry = upperdentry;
 	memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 741dc0b6931f..10863b4105fa 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -8,18 +8,57 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/uuid.h>
 
 enum ovl_path_type {
 	__OVL_PATH_UPPER	= (1 << 0),
 	__OVL_PATH_MERGE	= (1 << 1),
+	__OVL_PATH_ORIGIN	= (1 << 2),
 };
 
 #define OVL_TYPE_UPPER(type)	((type) & __OVL_PATH_UPPER)
 #define OVL_TYPE_MERGE(type)	((type) & __OVL_PATH_MERGE)
+#define OVL_TYPE_ORIGIN(type)	((type) & __OVL_PATH_ORIGIN)
 
 #define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
 #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
 #define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
+#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
+#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
+
+/*
+ * The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
+ * where:
+ * origin.fh	- exported file handle of the lower file
+ * origin.uuid	- uuid of the lower filesystem
+ */
+#define OVL_FH_VERSION	0
+#define OVL_FH_MAGIC	0xfb
+
+/* CPU byte order required for fid decoding:  */
+#define OVL_FH_FLAG_BIG_ENDIAN	(1 << 0)
+#define OVL_FH_FLAG_ANY_ENDIAN	(1 << 1)
+
+#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN)
+
+#if defined(__LITTLE_ENDIAN)
+#define OVL_FH_FLAG_CPU_ENDIAN 0
+#elif defined(__BIG_ENDIAN)
+#define OVL_FH_FLAG_CPU_ENDIAN OVL_FH_FLAG_BIG_ENDIAN
+#else
+#error Endianness not defined
+#endif
+
+/* On-disk and in-memeory format for redirect by file handle */
+struct ovl_fh {
+	u8 version;	/* 0 */
+	u8 magic;	/* 0xfb */
+	u8 len;		/* size of this header + size of fid */
+	u8 flags;	/* OVL_FH_FLAG_* */
+	u8 type;	/* fid_type of fid */
+	uuid_t uuid;	/* uuid of filesystem */
+	u8 fid[0];	/* file identifier */
+} __packed;
 
 #define OVL_ISUPPER_MASK 1UL
 
@@ -151,6 +190,7 @@ int ovl_want_write(struct dentry *dentry);
 void ovl_drop_write(struct dentry *dentry);
 struct dentry *ovl_workdir(struct dentry *dentry);
 const struct cred *ovl_override_creds(struct super_block *sb);
+struct super_block *ovl_same_sb(struct super_block *sb);
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
 bool ovl_dentry_remote(struct dentry *dentry);
 bool ovl_dentry_weird(struct dentry *dentry);
@@ -164,10 +204,10 @@ struct dentry *ovl_dentry_real(struct dentry *dentry);
 struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
 void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
 bool ovl_dentry_is_opaque(struct dentry *dentry);
+bool ovl_dentry_is_impure(struct dentry *dentry);
 bool ovl_dentry_is_whiteout(struct dentry *dentry);
 void ovl_dentry_set_opaque(struct dentry *dentry);
 bool ovl_redirect_dir(struct super_block *sb);
-void ovl_clear_redirect_dir(struct super_block *sb);
 const char *ovl_dentry_get_redirect(struct dentry *dentry);
 void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
 void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
@@ -180,6 +220,17 @@ bool ovl_is_whiteout(struct dentry *dentry);
 struct file *ovl_path_open(struct path *path, int flags);
 int ovl_copy_up_start(struct dentry *dentry);
 void ovl_copy_up_end(struct dentry *dentry);
+bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
+int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+		       const char *name, const void *value, size_t size,
+		       int xerr);
+int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
+
+static inline bool ovl_is_impuredir(struct dentry *dentry)
+{
+	return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
+}
+
 
 /* namei.c */
 int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
@@ -197,6 +248,8 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
 
 /* inode.c */
 int ovl_setattr(struct dentry *dentry, struct iattr *attr);
+int ovl_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int flags);
 int ovl_permission(struct inode *inode, int mask);
 int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
 		  size_t size, int flags);
@@ -222,7 +275,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
 
 /* dir.c */
 extern const struct inode_operations ovl_dir_inode_operations;
-struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
+struct dentry *ovl_lookup_temp(struct dentry *workdir);
 struct cattr {
 	dev_t rdev;
 	umode_t mode;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 59614faa14c3..34bc4a9f5c61 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -28,7 +28,10 @@ struct ovl_fs {
 	/* creds of process who forced instantiation of super block */
 	const struct cred *creator_cred;
 	bool tmpfile;
+	bool noxattr;
 	wait_queue_head_t copyup_wq;
+	/* sb common to all layers */
+	struct super_block *same_sb;
 };
 
 /* private information held for every overlayfs dentry */
@@ -40,6 +43,7 @@ struct ovl_entry {
 			u64 version;
 			const char *redirect;
 			bool opaque;
+			bool impure;
 			bool copying;
 		};
 		struct rcu_head rcu;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index c9e70d39c1ea..4882ffb37bae 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -49,11 +49,28 @@ static void ovl_dentry_release(struct dentry *dentry)
 	}
 }
 
+static int ovl_check_append_only(struct inode *inode, int flag)
+{
+	/*
+	 * This test was moot in vfs may_open() because overlay inode does
+	 * not have the S_APPEND flag, so re-check on real upper inode
+	 */
+	if (IS_APPEND(inode)) {
+		if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
+			return -EPERM;
+		if (flag & O_TRUNC)
+			return -EPERM;
+	}
+
+	return 0;
+}
+
 static struct dentry *ovl_d_real(struct dentry *dentry,
 				 const struct inode *inode,
 				 unsigned int open_flags)
 {
 	struct dentry *real;
+	int err;
 
 	if (!d_is_reg(dentry)) {
 		if (!inode || inode == d_inode(dentry))
@@ -65,15 +82,20 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
 		return dentry;
 
 	if (open_flags) {
-		int err = ovl_open_maybe_copy_up(dentry, open_flags);
-
+		err = ovl_open_maybe_copy_up(dentry, open_flags);
 		if (err)
 			return ERR_PTR(err);
 	}
 
 	real = ovl_dentry_upper(dentry);
-	if (real && (!inode || inode == d_inode(real)))
+	if (real && (!inode || inode == d_inode(real))) {
+		if (!inode) {
+			err = ovl_check_append_only(d_inode(real), open_flags);
+			if (err)
+				return ERR_PTR(err);
+		}
 		return real;
+	}
 
 	real = ovl_dentry_lower(dentry);
 	if (!real)
@@ -709,8 +731,8 @@ static const struct xattr_handler *ovl_xattr_handlers[] = {
 
 static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 {
-	struct path upperpath = { NULL, NULL };
-	struct path workpath = { NULL, NULL };
+	struct path upperpath = { };
+	struct path workpath = { };
 	struct dentry *root_dentry;
 	struct inode *realinode;
 	struct ovl_entry *oe;
@@ -869,6 +891,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 				dput(temp);
 			else
 				pr_warn("overlayfs: upper fs does not support tmpfile.\n");
+
+			/*
+			 * Check if upper/work fs supports trusted.overlay.*
+			 * xattr
+			 */
+			err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE,
+					      "0", 1, 0);
+			if (err) {
+				ufs->noxattr = true;
+				pr_warn("overlayfs: upper fs does not support xattr.\n");
+			} else {
+				vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE);
+			}
 		}
 	}
 
@@ -892,11 +927,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 		ufs->lower_mnt[ufs->numlower] = mnt;
 		ufs->numlower++;
+
+		/* Check if all lower layers are on same sb */
+		if (i == 0)
+			ufs->same_sb = mnt->mnt_sb;
+		else if (ufs->same_sb != mnt->mnt_sb)
+			ufs->same_sb = NULL;
 	}
 
 	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
 	if (!ufs->upper_mnt)
 		sb->s_flags |= MS_RDONLY;
+	else if (ufs->upper_mnt->mnt_sb != ufs->same_sb)
+		ufs->same_sb = NULL;
 
 	if (remote)
 		sb->s_d_op = &ovl_reval_dentry_operations;
@@ -931,7 +974,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	path_put(&workpath);
 	kfree(lowertmp);
 
-	oe->__upperdentry = upperpath.dentry;
+	if (upperpath.dentry) {
+		oe->__upperdentry = upperpath.dentry;
+		oe->impure = ovl_is_impuredir(upperpath.dentry);
+	}
 	for (i = 0; i < numlower; i++) {
 		oe->lowerstack[i].dentry = stack[i].dentry;
 		oe->lowerstack[i].mnt = ufs->lower_mnt[i];
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 6e610a205e15..809048913889 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -40,6 +40,13 @@ const struct cred *ovl_override_creds(struct super_block *sb)
 	return override_creds(ofs->creator_cred);
 }
 
+struct super_block *ovl_same_sb(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->same_sb;
+}
+
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
 {
 	size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
@@ -75,11 +82,13 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
 		type = __OVL_PATH_UPPER;
 
 		/*
-		 * Non-dir dentry can hold lower dentry from previous
-		 * location.
+		 * Non-dir dentry can hold lower dentry of its copy up origin.
 		 */
-		if (oe->numlower && d_is_dir(dentry))
-			type |= __OVL_PATH_MERGE;
+		if (oe->numlower) {
+			type |= __OVL_PATH_ORIGIN;
+			if (d_is_dir(dentry))
+				type |= __OVL_PATH_MERGE;
+		}
 	} else {
 		if (oe->numlower > 1)
 			type |= __OVL_PATH_MERGE;
@@ -100,7 +109,7 @@ void ovl_path_lower(struct dentry *dentry, struct path *path)
 {
 	struct ovl_entry *oe = dentry->d_fsdata;
 
-	*path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL };
+	*path = oe->numlower ? oe->lowerstack[0] : (struct path) { };
 }
 
 enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
@@ -166,6 +175,13 @@ bool ovl_dentry_is_opaque(struct dentry *dentry)
 	return oe->opaque;
 }
 
+bool ovl_dentry_is_impure(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	return oe->impure;
+}
+
 bool ovl_dentry_is_whiteout(struct dentry *dentry)
 {
 	return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
@@ -182,14 +198,7 @@ bool ovl_redirect_dir(struct super_block *sb)
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
 
-	return ofs->config.redirect_dir;
-}
-
-void ovl_clear_redirect_dir(struct super_block *sb)
-{
-	struct ovl_fs *ofs = sb->s_fs_info;
-
-	ofs->config.redirect_dir = false;
+	return ofs->config.redirect_dir && !ofs->noxattr;
 }
 
 const char *ovl_dentry_get_redirect(struct dentry *dentry)
@@ -294,3 +303,59 @@ void ovl_copy_up_end(struct dentry *dentry)
 	wake_up_locked(&ofs->copyup_wq);
 	spin_unlock(&ofs->copyup_wq.lock);
 }
+
+bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
+{
+	int res;
+	char val;
+
+	if (!d_is_dir(dentry))
+		return false;
+
+	res = vfs_getxattr(dentry, name, &val, 1);
+	if (res == 1 && val == 'y')
+		return true;
+
+	return false;
+}
+
+int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+		       const char *name, const void *value, size_t size,
+		       int xerr)
+{
+	int err;
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+	if (ofs->noxattr)
+		return xerr;
+
+	err = ovl_do_setxattr(upperdentry, name, value, size, 0);
+
+	if (err == -EOPNOTSUPP) {
+		pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
+		ofs->noxattr = true;
+		return xerr;
+	}
+
+	return err;
+}
+
+int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
+{
+	int err;
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	if (oe->impure)
+		return 0;
+
+	/*
+	 * Do not fail when upper doesn't support xattrs.
+	 * Upper inodes won't have origin nor redirect xattr anyway.
+	 */
+	err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE,
+				 "y", 1, 0);
+	if (!err)
+		oe->impure = true;
+
+	return err;
+}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 45f6bf68fff3..f1e1927ccd48 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -821,7 +821,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
 	if (!mmget_not_zero(mm))
 		goto free;
 
-	flags = write ? FOLL_WRITE : 0;
+	flags = FOLL_FORCE | (write ? FOLL_WRITE : 0);
 
 	while (count > 0) {
 		int this_len = min_t(int, count, PAGE_SIZE);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0c8b33d99b1..520802da059c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -300,11 +300,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 
 	/* We don't show the stack guard page in /proc/maps */
 	start = vma->vm_start;
-	if (stack_guard_page_start(vma, start))
-		start += PAGE_SIZE;
 	end = vma->vm_end;
-	if (stack_guard_page_end(vma, end))
-		end -= PAGE_SIZE;
 
 	seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 5523df7f17ef..5cb022c8cd33 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -58,7 +58,7 @@ module_param_named(pmsg_size, ramoops_pmsg_size, ulong, 0400);
 MODULE_PARM_DESC(pmsg_size, "size of user space message log");
 
 static unsigned long long mem_address;
-module_param(mem_address, ullong, 0400);
+module_param_hw(mem_address, ullong, other, 0400);
 MODULE_PARM_DESC(mem_address,
 		"start of reserved RAM used to store oops/panic logs");
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ebf80c7739e1..48813aeaab80 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1512,6 +1512,22 @@ int dquot_initialize(struct inode *inode)
 }
 EXPORT_SYMBOL(dquot_initialize);
 
+bool dquot_initialize_needed(struct inode *inode)
+{
+	struct dquot **dquots;
+	int i;
+
+	if (!dquot_active(inode))
+		return false;
+
+	dquots = i_dquot(inode);
+	for (i = 0; i < MAXQUOTAS; i++)
+		if (!dquots[i] && sb_has_quota_active(inode->i_sb, i))
+			return true;
+	return false;
+}
+EXPORT_SYMBOL(dquot_initialize_needed);
+
 /*
  * Release all quotas referenced by inode.
  *
diff --git a/fs/read_write.c b/fs/read_write.c
index 47c1d4484df9..d591eeed061f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -678,16 +678,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 	struct kiocb kiocb;
 	ssize_t ret;
 
-	if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))
-		return -EOPNOTSUPP;
-
 	init_sync_kiocb(&kiocb, filp);
-	if (flags & RWF_HIPRI)
-		kiocb.ki_flags |= IOCB_HIPRI;
-	if (flags & RWF_DSYNC)
-		kiocb.ki_flags |= IOCB_DSYNC;
-	if (flags & RWF_SYNC)
-		kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+	ret = kiocb_set_rw_flags(&kiocb, flags);
+	if (ret)
+		return ret;
 	kiocb.ki_pos = *ppos;
 
 	if (type == READ)
@@ -1285,7 +1279,7 @@ static size_t compat_writev(struct file *file,
 	if (!(file->f_mode & FMODE_CAN_WRITE))
 		goto out;
 
-	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, 0);
+	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, flags);
 
 out:
 	if (ret > 0)
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index da01f497180a..a11d773e5ff3 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1112,7 +1112,7 @@ static int flush_commit_list(struct super_block *s,
 		depth = reiserfs_write_unlock_nested(s);
 		if (reiserfs_barrier_flush(s))
 			__sync_dirty_buffer(jl->j_commit_bh,
-					REQ_PREFLUSH | REQ_FUA);
+					REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
 		else
 			sync_dirty_buffer(jl->j_commit_bh);
 		reiserfs_write_lock_nested(s, depth);
@@ -1271,7 +1271,7 @@ static int _update_journal_header_block(struct super_block *sb,
 
 		if (reiserfs_barrier_flush(sb))
 			__sync_dirty_buffer(journal->j_header_bh,
-					REQ_PREFLUSH | REQ_FUA);
+					REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
 		else
 			sync_dirty_buffer(journal->j_header_bh);
 
@@ -2956,7 +2956,7 @@ void reiserfs_wait_on_write_block(struct super_block *s)
 
 static void queue_log_writer(struct super_block *s)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
 	set_bit(J_WRITERS_QUEUED, &journal->j_state);
 
diff --git a/fs/select.c b/fs/select.c
index d6c652a31e99..5b524a977d91 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -180,7 +180,7 @@ static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
 	return table->entry++;
 }
 
-static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct poll_wqueues *pwq = wait->private;
 	DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
@@ -206,7 +206,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
 	return default_wake_function(&dummy_wait, mode, sync, key);
 }
 
-static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct poll_table_entry *entry;
 
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 270221fcef42..593b022ac11b 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -38,12 +38,12 @@ void signalfd_cleanup(struct sighand_struct *sighand)
 	/*
 	 * The lockless check can race with remove_wait_queue() in progress,
 	 * but in this case its caller should run under rcu_read_lock() and
-	 * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
+	 * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return.
 	 */
 	if (likely(!waitqueue_active(wqh)))
 		return;
 
-	/* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
+	/* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */
 	wake_up_poll(wqh, POLLHUP | POLLFREE);
 }
 
diff --git a/fs/stat.c b/fs/stat.c
index f494b182c7c7..c35610845ab1 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -672,6 +672,7 @@ void __inode_add_bytes(struct inode *inode, loff_t bytes)
 		inode->i_bytes -= 512;
 	}
 }
+EXPORT_SYMBOL(__inode_add_bytes);
 
 void inode_add_bytes(struct inode *inode, loff_t bytes)
 {
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index b0d0623c83ed..83a961bf7280 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -61,3 +61,16 @@ config UBIFS_FS_ENCRYPTION
 	  feature is similar to ecryptfs, but it is more memory
 	  efficient since it avoids caching the encrypted and
 	  decrypted pages in the page cache.
+
+config UBIFS_FS_SECURITY
+	bool "UBIFS Security Labels"
+	depends on UBIFS_FS
+	default y
+	help
+	  Security labels provide an access control facility to support Linux
+	  Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO
+	  Linux. This option enables an extended attribute handler for file
+	  security labels in the ubifs filesystem, so that it requires enabling
+	  the extended attribute support in advance.
+
+	  If you are not using a security module, say N.
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 718b749fa11a..7cd8a7b95299 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2391,8 +2391,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
 			ubifs_dump_node(c, sa->node);
 			return -EINVAL;
 		}
-		if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
-		    sa->type != UBIFS_XENT_NODE) {
+		if (sb->type != UBIFS_INO_NODE && sb->type != UBIFS_DENT_NODE &&
+		    sb->type != UBIFS_XENT_NODE) {
 			ubifs_err(c, "bad node type %d", sb->type);
 			ubifs_dump_node(c, sb->node);
 			return -EINVAL;
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 12b9eb5005ff..fdc311246807 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -53,7 +53,7 @@ void ubifs_set_inode_flags(struct inode *inode)
  * ioctl2ubifs - convert ioctl inode flags to UBIFS inode flags.
  * @ioctl_flags: flags to convert
  *
- * This function convert ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags
+ * This function converts ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags
  * (@UBIFS_COMPR_FL, etc).
  */
 static int ioctl2ubifs(int ioctl_flags)
@@ -78,8 +78,8 @@ static int ioctl2ubifs(int ioctl_flags)
  * ubifs2ioctl - convert UBIFS inode flags to ioctl inode flags.
  * @ubifs_flags: flags to convert
  *
- * This function convert UBIFS (@UBIFS_COMPR_FL, etc) to ioctl flags
- * (@FS_COMPR_FL, etc).
+ * This function converts UBIFS inode flags (@UBIFS_COMPR_FL, etc) to ioctl
+ * flags (@FS_COMPR_FL, etc).
  */
 static int ubifs2ioctl(int ubifs_flags)
 {
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 586d59347fff..3af4472061cc 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -442,7 +442,6 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
 {
 	int empty_offs, pad_len;
 
-	lnum = lnum;
 	dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
 
 	ubifs_assert(!(*offs & 7));
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 4da10a6d702a..298b4d89eee9 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1753,13 +1753,23 @@ int ubifs_check_dir_empty(struct inode *dir);
 /* xattr.c */
 extern const struct xattr_handler *ubifs_xattr_handlers[];
 ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size);
-int ubifs_init_security(struct inode *dentry, struct inode *inode,
-			const struct qstr *qstr);
 int ubifs_xattr_set(struct inode *host, const char *name, const void *value,
 		    size_t size, int flags);
 ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf,
 			size_t size);
 
+#ifdef CONFIG_UBIFS_FS_SECURITY
+extern int ubifs_init_security(struct inode *dentry, struct inode *inode,
+			const struct qstr *qstr);
+#else
+static inline int ubifs_init_security(struct inode *dentry,
+			struct inode *inode, const struct qstr *qstr)
+{
+	return 0;
+}
+#endif
+
+
 /* super.c */
 struct inode *ubifs_iget(struct super_block *sb, unsigned long inum);
 
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 3e53fdbf7997..6c9e62c2ef55 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -559,6 +559,7 @@ out_free:
 	return err;
 }
 
+#ifdef CONFIG_UBIFS_FS_SECURITY
 static int init_xattrs(struct inode *inode, const struct xattr *xattr_array,
 		      void *fs_info)
 {
@@ -599,6 +600,7 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode,
 	}
 	return err;
 }
+#endif
 
 static int xattr_get(const struct xattr_handler *handler,
 			   struct dentry *dentry, struct inode *inode,
@@ -639,15 +641,19 @@ static const struct xattr_handler ubifs_trusted_xattr_handler = {
 	.set = xattr_set,
 };
 
+#ifdef CONFIG_UBIFS_FS_SECURITY
 static const struct xattr_handler ubifs_security_xattr_handler = {
 	.prefix = XATTR_SECURITY_PREFIX,
 	.get = xattr_get,
 	.set = xattr_set,
 };
+#endif
 
 const struct xattr_handler *ubifs_xattr_handlers[] = {
 	&ubifs_user_xattr_handler,
 	&ubifs_trusted_xattr_handler,
+#ifdef CONFIG_UBIFS_FS_SECURITY
 	&ubifs_security_xattr_handler,
+#endif
 	NULL
 };
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index a0376a2c1c29..f80be4c5df9d 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -82,7 +82,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
 			ufs_error (sb, "ufs_free_fragments",
 				   "bit already cleared for fragment %u", i);
 	}
-	
+
+	inode_sub_bytes(inode, count << uspi->s_fshift);
 	fs32_add(sb, &ucg->cg_cs.cs_nffree, count);
 	uspi->cs_total.cs_nffree += count;
 	fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
@@ -184,6 +185,7 @@ do_more:
 			ufs_error(sb, "ufs_free_blocks", "freeing free fragment");
 		}
 		ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
+		inode_sub_bytes(inode, uspi->s_fpb << uspi->s_fshift);
 		if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
 			ufs_clusteracct (sb, ucpi, blkno, 1);
 
@@ -398,10 +400,12 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
 	/*
 	 * There is not enough space for user on the device
 	 */
-	if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) {
-		mutex_unlock(&UFS_SB(sb)->s_lock);
-		UFSD("EXIT (FAILED)\n");
-		return 0;
+	if (unlikely(ufs_freefrags(uspi) <= uspi->s_root_blocks)) {
+		if (!capable(CAP_SYS_RESOURCE)) {
+			mutex_unlock(&UFS_SB(sb)->s_lock);
+			UFSD("EXIT (FAILED)\n");
+			return 0;
+		}
 	}
 
 	if (goal >= uspi->s_size) 
@@ -419,12 +423,12 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
 		if (result) {
 			ufs_clear_frags(inode, result + oldcount,
 					newcount - oldcount, locked_page != NULL);
+			*err = 0;
 			write_seqlock(&UFS_I(inode)->meta_lock);
 			ufs_cpu_to_data_ptr(sb, p, result);
-			write_sequnlock(&UFS_I(inode)->meta_lock);
-			*err = 0;
 			UFS_I(inode)->i_lastfrag =
 				max(UFS_I(inode)->i_lastfrag, fragment + count);
+			write_sequnlock(&UFS_I(inode)->meta_lock);
 		}
 		mutex_unlock(&UFS_SB(sb)->s_lock);
 		UFSD("EXIT, result %llu\n", (unsigned long long)result);
@@ -437,8 +441,10 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
 	result = ufs_add_fragments(inode, tmp, oldcount, newcount);
 	if (result) {
 		*err = 0;
+		read_seqlock_excl(&UFS_I(inode)->meta_lock);
 		UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
 						fragment + count);
+		read_sequnlock_excl(&UFS_I(inode)->meta_lock);
 		ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
 				locked_page != NULL);
 		mutex_unlock(&UFS_SB(sb)->s_lock);
@@ -449,39 +455,29 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
 	/*
 	 * allocate new block and move data
 	 */
-	switch (fs32_to_cpu(sb, usb1->fs_optim)) {
-	    case UFS_OPTSPACE:
+	if (fs32_to_cpu(sb, usb1->fs_optim) == UFS_OPTSPACE) {
 		request = newcount;
-		if (uspi->s_minfree < 5 || uspi->cs_total.cs_nffree
-		    > uspi->s_dsize * uspi->s_minfree / (2 * 100))
-			break;
-		usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
-		break;
-	    default:
-		usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
-	
-	    case UFS_OPTTIME:
+		if (uspi->cs_total.cs_nffree < uspi->s_space_to_time)
+			usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
+	} else {
 		request = uspi->s_fpb;
-		if (uspi->cs_total.cs_nffree < uspi->s_dsize *
-		    (uspi->s_minfree - 2) / 100)
-			break;
-		usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
-		break;
+		if (uspi->cs_total.cs_nffree > uspi->s_time_to_space)
+			usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE);
 	}
 	result = ufs_alloc_fragments (inode, cgno, goal, request, err);
 	if (result) {
 		ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
 				locked_page != NULL);
+		mutex_unlock(&UFS_SB(sb)->s_lock);
 		ufs_change_blocknr(inode, fragment - oldcount, oldcount,
 				   uspi->s_sbbase + tmp,
 				   uspi->s_sbbase + result, locked_page);
+		*err = 0;
 		write_seqlock(&UFS_I(inode)->meta_lock);
 		ufs_cpu_to_data_ptr(sb, p, result);
-		write_sequnlock(&UFS_I(inode)->meta_lock);
-		*err = 0;
 		UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
 						fragment + count);
-		mutex_unlock(&UFS_SB(sb)->s_lock);
+		write_sequnlock(&UFS_I(inode)->meta_lock);
 		if (newcount < request)
 			ufs_free_fragments (inode, result + newcount, request - newcount);
 		ufs_free_fragments (inode, tmp, oldcount);
@@ -494,6 +490,20 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
 	return 0;
 }		
 
+static bool try_add_frags(struct inode *inode, unsigned frags)
+{
+	unsigned size = frags * i_blocksize(inode);
+	spin_lock(&inode->i_lock);
+	__inode_add_bytes(inode, size);
+	if (unlikely((u32)inode->i_blocks != inode->i_blocks)) {
+		__inode_sub_bytes(inode, size);
+		spin_unlock(&inode->i_lock);
+		return false;
+	}
+	spin_unlock(&inode->i_lock);
+	return true;
+}
+
 static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
 			     unsigned oldcount, unsigned newcount)
 {
@@ -530,6 +540,9 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
 	for (i = oldcount; i < newcount; i++)
 		if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i))
 			return 0;
+
+	if (!try_add_frags(inode, count))
+		return 0;
 	/*
 	 * Block can be extended
 	 */
@@ -647,6 +660,7 @@ cg_found:
 			ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
 		i = uspi->s_fpb - count;
 
+		inode_sub_bytes(inode, i << uspi->s_fshift);
 		fs32_add(sb, &ucg->cg_cs.cs_nffree, i);
 		uspi->cs_total.cs_nffree += i;
 		fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, i);
@@ -657,6 +671,8 @@ cg_found:
 	result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
 	if (result == INVBLOCK)
 		return 0;
+	if (!try_add_frags(inode, count))
+		return 0;
 	for (i = 0; i < count; i++)
 		ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i);
 	
@@ -716,6 +732,8 @@ norot:
 		return INVBLOCK;
 	ucpi->c_rotor = result;
 gotit:
+	if (!try_add_frags(inode, uspi->s_fpb))
+		return 0;
 	blkno = ufs_fragstoblks(result);
 	ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
 	if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7e41aee7b69a..f36d6a53687d 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -235,7 +235,8 @@ ufs_extend_tail(struct inode *inode, u64 writes_to,
 
 	p = ufs_get_direct_data_ptr(uspi, ufsi, block);
 	tmp = ufs_new_fragments(inode, p, lastfrag, ufs_data_ptr_to_cpu(sb, p),
-				new_size, err, locked_page);
+				new_size - (lastfrag & uspi->s_fpbmask), err,
+				locked_page);
 	return tmp != 0;
 }
 
@@ -284,7 +285,7 @@ ufs_inode_getfrag(struct inode *inode, unsigned index,
 			goal += uspi->s_fpb;
 	}
 	tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment),
-				goal, uspi->s_fpb, err, locked_page);
+				goal, nfrags, err, locked_page);
 
 	if (!tmp) {
 		*err = -ENOSPC;
@@ -400,11 +401,20 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	u64 phys64 = 0;
 	unsigned frag = fragment & uspi->s_fpbmask;
 
-	if (!create) {
-		phys64 = ufs_frag_map(inode, offsets, depth);
-		goto out;
-	}
+	phys64 = ufs_frag_map(inode, offsets, depth);
+	if (!create)
+		goto done;
 
+	if (phys64) {
+		if (fragment >= UFS_NDIR_FRAGMENT)
+			goto done;
+		read_seqlock_excl(&UFS_I(inode)->meta_lock);
+		if (fragment < UFS_I(inode)->i_lastfrag) {
+			read_sequnlock_excl(&UFS_I(inode)->meta_lock);
+			goto done;
+		}
+		read_sequnlock_excl(&UFS_I(inode)->meta_lock);
+	}
         /* This code entered only while writing ....? */
 
 	mutex_lock(&UFS_I(inode)->truncate_mutex);
@@ -448,6 +458,11 @@ out:
 	}
 	mutex_unlock(&UFS_I(inode)->truncate_mutex);
 	return err;
+
+done:
+	if (phys64)
+		map_bh(bh_result, sb, phys64 + frag);
+	return 0;
 }
 
 static int ufs_writepage(struct page *page, struct writeback_control *wbc)
@@ -551,10 +566,8 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 	 */
 	inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode);
 	set_nlink(inode, fs16_to_cpu(sb, ufs_inode->ui_nlink));
-	if (inode->i_nlink == 0) {
-		ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
-		return -1;
-	}
+	if (inode->i_nlink == 0)
+		return -ESTALE;
 
 	/*
 	 * Linux now has 32-bit uid and gid, so we can support EFT.
@@ -563,9 +576,9 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 	i_gid_write(inode, ufs_get_inode_gid(sb, ufs_inode));
 
 	inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size);
-	inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
-	inode->i_ctime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
-	inode->i_mtime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
+	inode->i_atime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
+	inode->i_ctime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
+	inode->i_mtime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
 	inode->i_mtime.tv_nsec = 0;
 	inode->i_atime.tv_nsec = 0;
 	inode->i_ctime.tv_nsec = 0;
@@ -599,10 +612,8 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
 	 */
 	inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode);
 	set_nlink(inode, fs16_to_cpu(sb, ufs2_inode->ui_nlink));
-	if (inode->i_nlink == 0) {
-		ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
-		return -1;
-	}
+	if (inode->i_nlink == 0)
+		return -ESTALE;
 
         /*
          * Linux now has 32-bit uid and gid, so we can support EFT.
@@ -642,7 +653,7 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	struct buffer_head * bh;
 	struct inode *inode;
-	int err;
+	int err = -EIO;
 
 	UFSD("ENTER, ino %lu\n", ino);
 
@@ -677,9 +688,10 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
 		err = ufs1_read_inode(inode,
 				      ufs_inode + ufs_inotofsbo(inode->i_ino));
 	}
-
+	brelse(bh);
 	if (err)
 		goto bad_inode;
+
 	inode->i_version++;
 	ufsi->i_lastfrag =
 		(inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift;
@@ -688,15 +700,13 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
 
 	ufs_set_inode_ops(inode);
 
-	brelse(bh);
-
 	UFSD("EXIT\n");
 	unlock_new_inode(inode);
 	return inode;
 
 bad_inode:
 	iget_failed(inode);
-	return ERR_PTR(-EIO);
+	return ERR_PTR(err);
 }
 
 static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
@@ -841,8 +851,11 @@ void ufs_evict_inode(struct inode * inode)
 	truncate_inode_pages_final(&inode->i_data);
 	if (want_delete) {
 		inode->i_size = 0;
-		if (inode->i_blocks)
+		if (inode->i_blocks &&
+		    (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+		     S_ISLNK(inode->i_mode)))
 			ufs_truncate_blocks(inode);
+		ufs_update_inode(inode, inode_needs_sync(inode));
 	}
 
 	invalidate_inode_buffers(inode);
@@ -868,7 +881,6 @@ static inline void free_data(struct to_free *ctx, u64 from, unsigned count)
 	ctx->to = from + count;
 }
 
-#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
 #define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
 
 static void ufs_trunc_direct(struct inode *inode)
@@ -1100,25 +1112,30 @@ out:
        return err;
 }
 
-static void __ufs_truncate_blocks(struct inode *inode)
+static void ufs_truncate_blocks(struct inode *inode)
 {
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	unsigned offsets[4];
-	int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets);
+	int depth;
 	int depth2;
 	unsigned i;
 	struct ufs_buffer_head *ubh[3];
 	void *p;
 	u64 block;
 
-	if (!depth)
-		return;
+	if (inode->i_size) {
+		sector_t last = (inode->i_size - 1) >> uspi->s_bshift;
+		depth = ufs_block_to_path(inode, last, offsets);
+		if (!depth)
+			return;
+	} else {
+		depth = 1;
+	}
 
-	/* find the last non-zero in offsets[] */
 	for (depth2 = depth - 1; depth2; depth2--)
-		if (offsets[depth2])
+		if (offsets[depth2] != uspi->s_apb - 1)
 			break;
 
 	mutex_lock(&ufsi->truncate_mutex);
@@ -1127,9 +1144,8 @@ static void __ufs_truncate_blocks(struct inode *inode)
 		offsets[0] = UFS_IND_BLOCK;
 	} else {
 		/* get the blocks that should be partially emptied */
-		p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]);
+		p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]++);
 		for (i = 0; i < depth2; i++) {
-			offsets[i]++;	/* next branch is fully freed */
 			block = ufs_data_ptr_to_cpu(sb, p);
 			if (!block)
 				break;
@@ -1140,7 +1156,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
 				write_sequnlock(&ufsi->meta_lock);
 				break;
 			}
-			p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]);
+			p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]++);
 		}
 		while (i--)
 			free_branch_tail(inode, offsets[i + 1], ubh[i], depth - i - 1);
@@ -1155,7 +1171,9 @@ static void __ufs_truncate_blocks(struct inode *inode)
 			free_full_branch(inode, block, i - UFS_IND_BLOCK + 1);
 		}
 	}
+	read_seqlock_excl(&ufsi->meta_lock);
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
+	read_sequnlock_excl(&ufsi->meta_lock);
 	mark_inode_dirty(inode);
 	mutex_unlock(&ufsi->truncate_mutex);
 }
@@ -1183,7 +1201,7 @@ static int ufs_truncate(struct inode *inode, loff_t size)
 
 	truncate_setsize(inode, size);
 
-	__ufs_truncate_blocks(inode);
+	ufs_truncate_blocks(inode);
 	inode->i_mtime = inode->i_ctime = current_time(inode);
 	mark_inode_dirty(inode);
 out:
@@ -1191,16 +1209,6 @@ out:
 	return err;
 }
 
-static void ufs_truncate_blocks(struct inode *inode)
-{
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-	      S_ISLNK(inode->i_mode)))
-		return;
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-		return;
-	__ufs_truncate_blocks(inode);
-}
-
 int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = d_inode(dentry);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 131b2b77c818..0a4f58a5073c 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -480,7 +480,7 @@ static void ufs_setup_cstotal(struct super_block *sb)
 	usb3 = ubh_get_usb_third(uspi);
 
 	if ((mtype == UFS_MOUNT_UFSTYPE_44BSD &&
-	     (usb1->fs_flags & UFS_FLAGS_UPDATED)) ||
+	     (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) ||
 	    mtype == UFS_MOUNT_UFSTYPE_UFS2) {
 		/*we have statistic in different place, then usual*/
 		uspi->cs_total.cs_ndir = fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir);
@@ -596,9 +596,7 @@ static void ufs_put_cstotal(struct super_block *sb)
 	usb2 = ubh_get_usb_second(uspi);
 	usb3 = ubh_get_usb_third(uspi);
 
-	if ((mtype == UFS_MOUNT_UFSTYPE_44BSD &&
-	     (usb1->fs_flags & UFS_FLAGS_UPDATED)) ||
-	    mtype == UFS_MOUNT_UFSTYPE_UFS2) {
+	if (mtype == UFS_MOUNT_UFSTYPE_UFS2) {
 		/*we have statistic in different place, then usual*/
 		usb2->fs_un.fs_u2.cs_ndir =
 			cpu_to_fs64(sb, uspi->cs_total.cs_ndir);
@@ -608,16 +606,26 @@ static void ufs_put_cstotal(struct super_block *sb)
 			cpu_to_fs64(sb, uspi->cs_total.cs_nifree);
 		usb3->fs_un1.fs_u2.cs_nffree =
 			cpu_to_fs64(sb, uspi->cs_total.cs_nffree);
-	} else {
-		usb1->fs_cstotal.cs_ndir =
-			cpu_to_fs32(sb, uspi->cs_total.cs_ndir);
-		usb1->fs_cstotal.cs_nbfree =
-			cpu_to_fs32(sb, uspi->cs_total.cs_nbfree);
-		usb1->fs_cstotal.cs_nifree =
-			cpu_to_fs32(sb, uspi->cs_total.cs_nifree);
-		usb1->fs_cstotal.cs_nffree =
-			cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
+		goto out;
+	}
+
+	if (mtype == UFS_MOUNT_UFSTYPE_44BSD &&
+	     (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) {
+		/* store stats in both old and new places */
+		usb2->fs_un.fs_u2.cs_ndir =
+			cpu_to_fs64(sb, uspi->cs_total.cs_ndir);
+		usb2->fs_un.fs_u2.cs_nbfree =
+			cpu_to_fs64(sb, uspi->cs_total.cs_nbfree);
+		usb3->fs_un1.fs_u2.cs_nifree =
+			cpu_to_fs64(sb, uspi->cs_total.cs_nifree);
+		usb3->fs_un1.fs_u2.cs_nffree =
+			cpu_to_fs64(sb, uspi->cs_total.cs_nffree);
 	}
+	usb1->fs_cstotal.cs_ndir = cpu_to_fs32(sb, uspi->cs_total.cs_ndir);
+	usb1->fs_cstotal.cs_nbfree = cpu_to_fs32(sb, uspi->cs_total.cs_nbfree);
+	usb1->fs_cstotal.cs_nifree = cpu_to_fs32(sb, uspi->cs_total.cs_nifree);
+	usb1->fs_cstotal.cs_nffree = cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
+out:
 	ubh_mark_buffer_dirty(USPI_UBH(uspi));
 	ufs_print_super_stuff(sb, usb1, usb2, usb3);
 	UFSD("EXIT\n");
@@ -746,6 +754,23 @@ static void ufs_put_super(struct super_block *sb)
 	return;
 }
 
+static u64 ufs_max_bytes(struct super_block *sb)
+{
+	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+	int bits = uspi->s_apbshift;
+	u64 res;
+
+	if (bits > 21)
+		res = ~0ULL;
+	else
+		res = UFS_NDADDR + (1LL << bits) + (1LL << (2*bits)) +
+			(1LL << (3*bits));
+
+	if (res >= (MAX_LFS_FILESIZE >> uspi->s_bshift))
+		return MAX_LFS_FILESIZE;
+	return res << uspi->s_bshift;
+}
+
 static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct ufs_sb_info * sbi;
@@ -812,9 +837,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 	uspi->s_dirblksize = UFS_SECTOR_SIZE;
 	super_block_offset=UFS_SBLOCK;
 
-	/* Keep 2Gig file limit. Some UFS variants need to override 
-	   this but as I don't know which I'll let those in the know loosen
-	   the rules */
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+
 	switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) {
 	case UFS_MOUNT_UFSTYPE_44BSD:
 		UFSD("ufstype=44bsd\n");
@@ -980,6 +1004,13 @@ again:
 		flags |=  UFS_ST_SUN;
 	}
 
+	if ((flags & UFS_ST_MASK) == UFS_ST_44BSD &&
+	    uspi->s_postblformat == UFS_42POSTBLFMT) {
+		if (!silent)
+			pr_err("this is not a 44bsd filesystem");
+		goto failed;
+	}
+
 	/*
 	 * Check ufs magic number
 	 */
@@ -1127,8 +1158,8 @@ magic_found:
 	uspi->s_cgmask = fs32_to_cpu(sb, usb1->fs_cgmask);
 
 	if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
-		uspi->s_u2_size  = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size);
-		uspi->s_u2_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
+		uspi->s_size  = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size);
+		uspi->s_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
 	} else {
 		uspi->s_size  =  fs32_to_cpu(sb, usb1->fs_size);
 		uspi->s_dsize =  fs32_to_cpu(sb, usb1->fs_dsize);
@@ -1177,6 +1208,18 @@ magic_found:
 	uspi->s_postbloff = fs32_to_cpu(sb, usb3->fs_postbloff);
 	uspi->s_rotbloff = fs32_to_cpu(sb, usb3->fs_rotbloff);
 
+	uspi->s_root_blocks = mul_u64_u32_div(uspi->s_dsize,
+					      uspi->s_minfree, 100);
+	if (uspi->s_minfree <= 5) {
+		uspi->s_time_to_space = ~0ULL;
+		uspi->s_space_to_time = 0;
+		usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE);
+	} else {
+		uspi->s_time_to_space = (uspi->s_root_blocks / 2) + 1;
+		uspi->s_space_to_time = mul_u64_u32_div(uspi->s_dsize,
+					      uspi->s_minfree - 2, 100) - 1;
+	}
+
 	/*
 	 * Compute another frequently used values
 	 */
@@ -1212,6 +1255,7 @@ magic_found:
 			    "fast symlink size (%u)\n", uspi->s_maxsymlinklen);
 		uspi->s_maxsymlinklen = maxsymlen;
 	}
+	sb->s_maxbytes = ufs_max_bytes(sb);
 	sb->s_max_links = UFS_LINK_MAX;
 
 	inode = ufs_iget(sb, UFS_ROOTINO);
@@ -1365,19 +1409,17 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	mutex_lock(&UFS_SB(sb)->s_lock);
 	usb3 = ubh_get_usb_third(uspi);
 	
-	if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
+	if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
 		buf->f_type = UFS2_MAGIC;
-		buf->f_blocks = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
-	} else {
+	else
 		buf->f_type = UFS_MAGIC;
-		buf->f_blocks = uspi->s_dsize;
-	}
-	buf->f_bfree = ufs_blkstofrags(uspi->cs_total.cs_nbfree) +
-		uspi->cs_total.cs_nffree;
+
+	buf->f_blocks = uspi->s_dsize;
+	buf->f_bfree = ufs_freefrags(uspi);
 	buf->f_ffree = uspi->cs_total.cs_nifree;
 	buf->f_bsize = sb->s_blocksize;
-	buf->f_bavail = (buf->f_bfree > (((long)buf->f_blocks / 100) * uspi->s_minfree))
-		? (buf->f_bfree - (((long)buf->f_blocks / 100) * uspi->s_minfree)) : 0;
+	buf->f_bavail = (buf->f_bfree > uspi->s_root_blocks)
+		? (buf->f_bfree - uspi->s_root_blocks) : 0;
 	buf->f_files = uspi->s_ncg * uspi->s_ipg;
 	buf->f_namelen = UFS_MAXNAMLEN;
 	buf->f_fsid.val[0] = (u32)id;
diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h
index 0cbd5d340b67..150eef6f1233 100644
--- a/fs/ufs/ufs_fs.h
+++ b/fs/ufs/ufs_fs.h
@@ -733,10 +733,8 @@ struct ufs_sb_private_info {
 	__u32	s_dblkno;	/* offset of first data after cg */
 	__u32	s_cgoffset;	/* cylinder group offset in cylinder */
 	__u32	s_cgmask;	/* used to calc mod fs_ntrak */
-	__u32	s_size;		/* number of blocks (fragments) in fs */
-	__u32	s_dsize;	/* number of data blocks in fs */
-	__u64	s_u2_size;	/* ufs2: number of blocks (fragments) in fs */
-	__u64	s_u2_dsize;	/*ufs2:  number of data blocks in fs */
+	__u64	s_size;		/* number of blocks (fragments) in fs */
+	__u64	s_dsize;	/* number of data blocks in fs */
 	__u32	s_ncg;		/* number of cylinder groups */
 	__u32	s_bsize;	/* size of basic blocks */
 	__u32	s_fsize;	/* size of fragments */
@@ -793,6 +791,9 @@ struct ufs_sb_private_info {
 	__u32	s_maxsymlinklen;/* upper limit on fast symlinks' size */
 	__s32	fs_magic;       /* filesystem magic */
 	unsigned int s_dirblksize;
+	__u64   s_root_blocks;
+	__u64	s_time_to_space;
+	__u64	s_space_to_time;
 };
 
 /*
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index f41ad0a6106f..02497a492eb2 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -243,9 +243,8 @@ ufs_set_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi, dev_t dev
 struct page *ufs_get_locked_page(struct address_space *mapping,
 				 pgoff_t index)
 {
-	struct page *page;
-
-	page = find_lock_page(mapping, index);
+	struct inode *inode = mapping->host;
+	struct page *page = find_lock_page(mapping, index);
 	if (!page) {
 		page = read_mapping_page(mapping, index, NULL);
 
@@ -253,7 +252,7 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
 			printk(KERN_ERR "ufs_change_blocknr: "
 			       "read_mapping_page error: ino %lu, index: %lu\n",
 			       mapping->host->i_ino, index);
-			goto out;
+			return page;
 		}
 
 		lock_page(page);
@@ -262,8 +261,7 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
 			/* Truncate got there first */
 			unlock_page(page);
 			put_page(page);
-			page = NULL;
-			goto out;
+			return NULL;
 		}
 
 		if (!PageUptodate(page) || PageError(page)) {
@@ -272,11 +270,12 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
 
 			printk(KERN_ERR "ufs_change_blocknr: "
 			       "can not read page: ino %lu, index: %lu\n",
-			       mapping->host->i_ino, index);
+			       inode->i_ino, index);
 
-			page = ERR_PTR(-EIO);
+			return ERR_PTR(-EIO);
 		}
 	}
-out:
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
 	return page;
 }
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index b7fbf53dbc81..9fc7119a1551 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -350,16 +350,11 @@ static inline void *ubh_get_data_ptr(struct ufs_sb_private_info *uspi,
 #define ubh_blkmap(ubh,begin,bit) \
 	((*ubh_get_addr(ubh, (begin) + ((bit) >> 3)) >> ((bit) & 7)) & (0xff >> (UFS_MAXFRAG - uspi->s_fpb)))
 
-/*
- * Determine the number of available frags given a
- * percentage to hold in reserve.
- */
 static inline u64
-ufs_freespace(struct ufs_sb_private_info *uspi, int percentreserved)
+ufs_freefrags(struct ufs_sb_private_info *uspi)
 {
 	return ufs_blkstofrags(uspi->cs_total.cs_nbfree) +
-		uspi->cs_total.cs_nffree -
-		(uspi->s_dsize * (percentreserved) / 100);
+		uspi->cs_total.cs_nffree;
 }
 
 /*
@@ -473,15 +468,19 @@ static inline unsigned _ubh_find_last_zero_bit_(
 static inline int _ubh_isblockset_(struct ufs_sb_private_info * uspi,
 	struct ufs_buffer_head * ubh, unsigned begin, unsigned block)
 {
+	u8 mask;
 	switch (uspi->s_fpb) {
 	case 8:
 	    	return (*ubh_get_addr (ubh, begin + block) == 0xff);
 	case 4:
-		return (*ubh_get_addr (ubh, begin + (block >> 1)) == (0x0f << ((block & 0x01) << 2)));
+		mask = 0x0f << ((block & 0x01) << 2);
+		return (*ubh_get_addr (ubh, begin + (block >> 1)) & mask) == mask;
 	case 2:
-		return (*ubh_get_addr (ubh, begin + (block >> 2)) == (0x03 << ((block & 0x03) << 1)));
+		mask = 0x03 << ((block & 0x03) << 1);
+		return (*ubh_get_addr (ubh, begin + (block >> 2)) & mask) == mask;
 	case 1:
-		return (*ubh_get_addr (ubh, begin + (block >> 3)) == (0x01 << (block & 0x07)));
+		mask = 0x01 << (block & 0x07);
+		return (*ubh_get_addr (ubh, begin + (block >> 3)) & mask) == mask;
 	}
 	return 0;	
 }
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index f7555fc25877..6148ccd6cccf 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -81,7 +81,7 @@ struct userfaultfd_unmap_ctx {
 
 struct userfaultfd_wait_queue {
 	struct uffd_msg msg;
-	wait_queue_t wq;
+	wait_queue_entry_t wq;
 	struct userfaultfd_ctx *ctx;
 	bool waken;
 };
@@ -91,7 +91,7 @@ struct userfaultfd_wake_range {
 	unsigned long len;
 };
 
-static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
+static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
 				     int wake_flags, void *key)
 {
 	struct userfaultfd_wake_range *range = key;
@@ -129,7 +129,7 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
 		 * wouldn't be enough, the smp_mb__before_spinlock is
 		 * enough to avoid an explicit smp_mb() here.
 		 */
-		list_del_init(&wq->task_list);
+		list_del_init(&wq->entry);
 out:
 	return ret;
 }
@@ -340,9 +340,28 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	bool must_wait, return_to_userland;
 	long blocking_state;
 
-	BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
-
 	ret = VM_FAULT_SIGBUS;
+
+	/*
+	 * We don't do userfault handling for the final child pid update.
+	 *
+	 * We also don't do userfault handling during
+	 * coredumping. hugetlbfs has the special
+	 * follow_hugetlb_page() to skip missing pages in the
+	 * FOLL_DUMP case, anon memory also checks for FOLL_DUMP with
+	 * the no_page_table() helper in follow_page_mask(), but the
+	 * shmem_vm_ops->fault method is invoked even during
+	 * coredumping without mmap_sem and it ends up here.
+	 */
+	if (current->flags & (PF_EXITING|PF_DUMPCORE))
+		goto out;
+
+	/*
+	 * Coredumping runs without mmap_sem so we can only check that
+	 * the mmap_sem is held, if PF_DUMPCORE was not set.
+	 */
+	WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem));
+
 	ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
 	if (!ctx)
 		goto out;
@@ -361,12 +380,6 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 		goto out;
 
 	/*
-	 * We don't do userfault handling for the final child pid update.
-	 */
-	if (current->flags & PF_EXITING)
-		goto out;
-
-	/*
 	 * Check that we can return VM_FAULT_RETRY.
 	 *
 	 * NOTE: it should become possible to return VM_FAULT_RETRY
@@ -509,13 +522,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	 * and it's fine not to block on the spinlock. The uwq on this
 	 * kernel stack can be released after the list_del_init.
 	 */
-	if (!list_empty_careful(&uwq.wq.task_list)) {
+	if (!list_empty_careful(&uwq.wq.entry)) {
 		spin_lock(&ctx->fault_pending_wqh.lock);
 		/*
 		 * No need of list_del_init(), the uwq on the stack
 		 * will be freed shortly anyway.
 		 */
-		list_del(&uwq.wq.task_list);
+		list_del(&uwq.wq.entry);
 		spin_unlock(&ctx->fault_pending_wqh.lock);
 	}
 
@@ -847,7 +860,7 @@ wakeup:
 static inline struct userfaultfd_wait_queue *find_userfault_in(
 		wait_queue_head_t *wqh)
 {
-	wait_queue_t *wq;
+	wait_queue_entry_t *wq;
 	struct userfaultfd_wait_queue *uwq;
 
 	VM_BUG_ON(!spin_is_locked(&wqh->lock));
@@ -856,7 +869,7 @@ static inline struct userfaultfd_wait_queue *find_userfault_in(
 	if (!waitqueue_active(wqh))
 		goto out;
 	/* walk in reverse to provide FIFO behavior to read userfaults */
-	wq = list_last_entry(&wqh->task_list, typeof(*wq), task_list);
+	wq = list_last_entry(&wqh->head, typeof(*wq), entry);
 	uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
 out:
 	return uwq;
@@ -990,14 +1003,14 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 			 * changes __remove_wait_queue() to use
 			 * list_del_init() in turn breaking the
 			 * !list_empty_careful() check in
-			 * handle_userfault(). The uwq->wq.task_list
+			 * handle_userfault(). The uwq->wq.head list
 			 * must never be empty at any time during the
 			 * refile, or the waitqueue could disappear
 			 * from under us. The "wait_queue_head_t"
 			 * parameter of __remove_wait_queue() is unused
 			 * anyway.
 			 */
-			list_del(&uwq->wq.task_list);
+			list_del(&uwq->wq.entry);
 			__add_wait_queue(&ctx->fault_wqh, &uwq->wq);
 
 			write_seqcount_end(&ctx->refile_seq);
@@ -1019,7 +1032,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 				fork_nctx = (struct userfaultfd_ctx *)
 					(unsigned long)
 					uwq->msg.arg.reserved.reserved1;
-				list_move(&uwq->wq.task_list, &fork_event);
+				list_move(&uwq->wq.entry, &fork_event);
 				spin_unlock(&ctx->event_wqh.lock);
 				ret = 0;
 				break;
@@ -1056,8 +1069,8 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 			if (!list_empty(&fork_event)) {
 				uwq = list_first_entry(&fork_event,
 						       typeof(*uwq),
-						       wq.task_list);
-				list_del(&uwq->wq.task_list);
+						       wq.entry);
+				list_del(&uwq->wq.entry);
 				__add_wait_queue(&ctx->event_wqh, &uwq->wq);
 				userfaultfd_event_complete(ctx, uwq);
 			}
@@ -1734,17 +1747,17 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
 static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct userfaultfd_ctx *ctx = f->private_data;
-	wait_queue_t *wq;
+	wait_queue_entry_t *wq;
 	struct userfaultfd_wait_queue *uwq;
 	unsigned long pending = 0, total = 0;
 
 	spin_lock(&ctx->fault_pending_wqh.lock);
-	list_for_each_entry(wq, &ctx->fault_pending_wqh.task_list, task_list) {
+	list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
 		uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
 		pending++;
 		total++;
 	}
-	list_for_each_entry(wq, &ctx->fault_wqh.task_list, task_list) {
+	list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
 		uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
 		total++;
 	}
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 5c90f82b8f6b..a6e955bfead8 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -98,8 +98,7 @@ xfs-y				+= xfs_aops.o \
 				   xfs_sysfs.o \
 				   xfs_trans.o \
 				   xfs_xattr.o \
-				   kmem.o \
-				   uuid.o
+				   kmem.o
 
 # low-level transaction/log code
 xfs-y				+= xfs_log.o \
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f02eb7673392..a7048eafa8e6 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1280,7 +1280,6 @@ xfs_bmap_read_extents(
 		xfs_bmbt_rec_t	*frp;
 		xfs_fsblock_t	nextbno;
 		xfs_extnum_t	num_recs;
-		xfs_extnum_t	start;
 
 		num_recs = xfs_btree_get_numrecs(block);
 		if (unlikely(i + num_recs > room)) {
@@ -1303,7 +1302,6 @@ xfs_bmap_read_extents(
 		 * Copy records into the extent records.
 		 */
 		frp = XFS_BMBT_REC_ADDR(mp, block, 1);
-		start = i;
 		for (j = 0; j < num_recs; j++, i++, frp++) {
 			xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
 			trp->l0 = be64_to_cpu(frp->l0);
@@ -2065,8 +2063,10 @@ xfs_bmap_add_extent_delay_real(
 		}
 		temp = xfs_bmap_worst_indlen(bma->ip, temp);
 		temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
-		diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
-			(bma->cur ? bma->cur->bc_private.b.allocated : 0));
+		diff = (int)(temp + temp2 -
+			     (startblockval(PREV.br_startblock) -
+			      (bma->cur ?
+			       bma->cur->bc_private.b.allocated : 0)));
 		if (diff > 0) {
 			error = xfs_mod_fdblocks(bma->ip->i_mount,
 						 -((int64_t)diff), false);
@@ -2123,7 +2123,6 @@ xfs_bmap_add_extent_delay_real(
 		temp = da_new;
 		if (bma->cur)
 			temp += bma->cur->bc_private.b.allocated;
-		ASSERT(temp <= da_old);
 		if (temp < da_old)
 			xfs_mod_fdblocks(bma->ip->i_mount,
 					(int64_t)(da_old - temp), false);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 5392674bf893..3a673ba201aa 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4395,7 +4395,7 @@ xfs_btree_visit_blocks(
 			xfs_btree_readahead_ptr(cur, ptr, 1);
 
 			/* save for the next iteration of the loop */
-			lptr = *ptr;
+			xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
 		}
 
 		/* for each buffer in the level */
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index b177ef33cd4c..82a38d86ebad 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1629,13 +1629,28 @@ xfs_refcount_recover_cow_leftovers(
 	if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START)
 		return -EOPNOTSUPP;
 
-	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+	INIT_LIST_HEAD(&debris);
+
+	/*
+	 * In this first part, we use an empty transaction to gather up
+	 * all the leftover CoW extents so that we can subsequently
+	 * delete them.  The empty transaction is used to avoid
+	 * a buffer lock deadlock if there happens to be a loop in the
+	 * refcountbt because we're allowed to re-grab a buffer that is
+	 * already attached to our transaction.  When we're done
+	 * recording the CoW debris we cancel the (empty) transaction
+	 * and everything goes away cleanly.
+	 */
+	error = xfs_trans_alloc_empty(mp, &tp);
 	if (error)
 		return error;
-	cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+
+	error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+	if (error)
+		goto out_trans;
+	cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
 
 	/* Find all the leftover CoW staging extents. */
-	INIT_LIST_HEAD(&debris);
 	memset(&low, 0, sizeof(low));
 	memset(&high, 0, sizeof(high));
 	low.rc.rc_startblock = XFS_REFC_COW_START;
@@ -1645,10 +1660,11 @@ xfs_refcount_recover_cow_leftovers(
 	if (error)
 		goto out_cursor;
 	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-	xfs_buf_relse(agbp);
+	xfs_trans_brelse(tp, agbp);
+	xfs_trans_cancel(tp);
 
 	/* Now iterate the list to free the leftovers */
-	list_for_each_entry(rr, &debris, rr_list) {
+	list_for_each_entry_safe(rr, n, &debris, rr_list) {
 		/* Set up transaction. */
 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
 		if (error)
@@ -1676,8 +1692,16 @@ xfs_refcount_recover_cow_leftovers(
 		error = xfs_trans_commit(tp);
 		if (error)
 			goto out_free;
+
+		list_del(&rr->rr_list);
+		kmem_free(rr);
 	}
 
+	return error;
+out_defer:
+	xfs_defer_cancel(&dfops);
+out_trans:
+	xfs_trans_cancel(tp);
 out_free:
 	/* Free the leftover list */
 	list_for_each_entry_safe(rr, n, &debris, rr_list) {
@@ -1688,11 +1712,6 @@ out_free:
 
 out_cursor:
 	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-	xfs_buf_relse(agbp);
-	goto out_free;
-
-out_defer:
-	xfs_defer_cancel(&dfops);
-	xfs_trans_cancel(tp);
-	goto out_free;
+	xfs_trans_brelse(tp, agbp);
+	goto out_trans;
 }
diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c
deleted file mode 100644
index b83f76b6d410..000000000000
--- a/fs/xfs/uuid.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <xfs.h>
-
-/* IRIX interpretation of an uuid_t */
-typedef struct {
-	__be32	uu_timelow;
-	__be16	uu_timemid;
-	__be16	uu_timehi;
-	__be16	uu_clockseq;
-	__be16	uu_node[3];
-} xfs_uu_t;
-
-/*
- * uuid_getnodeuniq - obtain the node unique fields of a UUID.
- *
- * This is not in any way a standard or condoned UUID function;
- * it just something that's needed for user-level file handles.
- */
-void
-uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
-{
-	xfs_uu_t *uup = (xfs_uu_t *)uuid;
-
-	fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
-		   be16_to_cpu(uup->uu_timemid);
-	fsid[1] = be32_to_cpu(uup->uu_timelow);
-}
-
-int
-uuid_is_nil(uuid_t *uuid)
-{
-	int	i;
-	char	*cp = (char *)uuid;
-
-	if (uuid == NULL)
-		return 0;
-	/* implied check of version number here... */
-	for (i = 0; i < sizeof *uuid; i++)
-		if (*cp++) return 0;	/* not nil */
-	return 1;	/* is nil */
-}
-
-int
-uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
-{
-	return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
-}
diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h
deleted file mode 100644
index 104db0f3bed6..000000000000
--- a/fs/xfs/uuid.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_UUID_H__
-#define __XFS_SUPPORT_UUID_H__
-
-typedef struct {
-	unsigned char	__u_bits[16];
-} uuid_t;
-
-extern int uuid_is_nil(uuid_t *uuid);
-extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
-extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
-
-static inline void
-uuid_copy(uuid_t *dst, uuid_t *src)
-{
-	memcpy(dst, src, sizeof(uuid_t));
-}
-
-#endif	/* __XFS_SUPPORT_UUID_H__ */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 09af0f7cd55e..d20c29b9c95b 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -276,7 +276,7 @@ xfs_end_io(
 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
 	xfs_off_t		offset = ioend->io_offset;
 	size_t			size = ioend->io_size;
-	int			error = ioend->io_bio->bi_error;
+	int			error;
 
 	/*
 	 * Just clean up the in-memory strutures if the fs has been shut down.
@@ -289,6 +289,7 @@ xfs_end_io(
 	/*
 	 * Clean up any COW blocks on an I/O error.
 	 */
+	error = blk_status_to_errno(ioend->io_bio->bi_status);
 	if (unlikely(error)) {
 		switch (ioend->io_type) {
 		case XFS_IO_COW:
@@ -332,7 +333,7 @@ xfs_end_bio(
 	else if (ioend->io_append_trans)
 		queue_work(mp->m_data_workqueue, &ioend->io_work);
 	else
-		xfs_destroy_ioend(ioend, bio->bi_error);
+		xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
 }
 
 STATIC int
@@ -500,11 +501,12 @@ xfs_submit_ioend(
 	 * time.
 	 */
 	if (status) {
-		ioend->io_bio->bi_error = status;
+		ioend->io_bio->bi_status = errno_to_blk_status(status);
 		bio_endio(ioend->io_bio);
 		return status;
 	}
 
+	ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
 	submit_bio(ioend->io_bio);
 	return 0;
 }
@@ -564,6 +566,7 @@ xfs_chain_bio(
 	bio_chain(ioend->io_bio, new);
 	bio_get(ioend->io_bio);		/* for xfs_destroy_ioend */
 	ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+	ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
 	submit_bio(ioend->io_bio);
 	ioend->io_bio = new;
 }
@@ -1316,9 +1319,12 @@ xfs_vm_bmap(
 	 * The swap code (ab-)uses ->bmap to get a block mapping and then
 	 * bypasseѕ the file system for actual I/O.  We really can't allow
 	 * that on reflinks inodes, so we have to skip out here.  And yes,
-	 * 0 is the magic code for a bmap error..
+	 * 0 is the magic code for a bmap error.
+	 *
+	 * Since we don't pass back blockdev info, we can't return bmap
+	 * information for rt files either.
 	 */
-	if (xfs_is_reflink_inode(ip))
+	if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
 		return 0;
 
 	filemap_write_and_wait(mapping);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 2b954308a1d6..9e3cc2146d5b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -582,9 +582,13 @@ xfs_getbmap(
 		}
 		break;
 	default:
+		/* Local format data forks report no extents. */
+		if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+			bmv->bmv_entries = 0;
+			return 0;
+		}
 		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
-		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
-		    ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
 			return -EINVAL;
 
 		if (xfs_get_extsz_hint(ip) ||
@@ -712,7 +716,7 @@ xfs_getbmap(
 			 * extents.
 			 */
 			if (map[i].br_startblock == DELAYSTARTBLOCK &&
-			    map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
+			    map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
 				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
 
                         if (map[i].br_startblock == HOLESTARTBLOCK &&
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 62fa39276a24..438505f395e7 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -97,12 +97,16 @@ static inline void
 xfs_buf_ioacct_inc(
 	struct xfs_buf	*bp)
 {
-	if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT))
+	if (bp->b_flags & XBF_NO_IOACCT)
 		return;
 
 	ASSERT(bp->b_flags & XBF_ASYNC);
-	bp->b_flags |= _XBF_IN_FLIGHT;
-	percpu_counter_inc(&bp->b_target->bt_io_count);
+	spin_lock(&bp->b_lock);
+	if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
+		bp->b_state |= XFS_BSTATE_IN_FLIGHT;
+		percpu_counter_inc(&bp->b_target->bt_io_count);
+	}
+	spin_unlock(&bp->b_lock);
 }
 
 /*
@@ -110,14 +114,24 @@ xfs_buf_ioacct_inc(
  * freed and unaccount from the buftarg.
  */
 static inline void
-xfs_buf_ioacct_dec(
+__xfs_buf_ioacct_dec(
 	struct xfs_buf	*bp)
 {
-	if (!(bp->b_flags & _XBF_IN_FLIGHT))
-		return;
+	lockdep_assert_held(&bp->b_lock);
 
-	bp->b_flags &= ~_XBF_IN_FLIGHT;
-	percpu_counter_dec(&bp->b_target->bt_io_count);
+	if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
+		bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
+		percpu_counter_dec(&bp->b_target->bt_io_count);
+	}
+}
+
+static inline void
+xfs_buf_ioacct_dec(
+	struct xfs_buf	*bp)
+{
+	spin_lock(&bp->b_lock);
+	__xfs_buf_ioacct_dec(bp);
+	spin_unlock(&bp->b_lock);
 }
 
 /*
@@ -149,9 +163,9 @@ xfs_buf_stale(
 	 * unaccounted (released to LRU) before that occurs. Drop in-flight
 	 * status now to preserve accounting consistency.
 	 */
-	xfs_buf_ioacct_dec(bp);
-
 	spin_lock(&bp->b_lock);
+	__xfs_buf_ioacct_dec(bp);
+
 	atomic_set(&bp->b_lru_ref, 0);
 	if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
 	    (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
@@ -979,12 +993,12 @@ xfs_buf_rele(
 		 * ensures the decrement occurs only once per-buf.
 		 */
 		if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
-			xfs_buf_ioacct_dec(bp);
+			__xfs_buf_ioacct_dec(bp);
 		goto out_unlock;
 	}
 
 	/* the last reference has been dropped ... */
-	xfs_buf_ioacct_dec(bp);
+	__xfs_buf_ioacct_dec(bp);
 	if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
 		/*
 		 * If the buffer is added to the LRU take a new reference to the
@@ -1213,8 +1227,11 @@ xfs_buf_bio_end_io(
 	 * don't overwrite existing errors - otherwise we can lose errors on
 	 * buffers that require multiple bios to complete.
 	 */
-	if (bio->bi_error)
-		cmpxchg(&bp->b_io_error, 0, bio->bi_error);
+	if (bio->bi_status) {
+		int error = blk_status_to_errno(bio->bi_status);
+
+		cmpxchg(&bp->b_io_error, 0, error);
+	}
 
 	if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
 		invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 8d1d44f87ce9..1508121f29f2 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -63,7 +63,6 @@ typedef enum {
 #define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
 #define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
 #define _XBF_COMPOUND	 (1 << 23)/* compound buffer */
-#define _XBF_IN_FLIGHT	 (1 << 25) /* I/O in flight, for accounting purposes */
 
 typedef unsigned int xfs_buf_flags_t;
 
@@ -84,14 +83,14 @@ typedef unsigned int xfs_buf_flags_t;
 	{ _XBF_PAGES,		"PAGES" }, \
 	{ _XBF_KMEM,		"KMEM" }, \
 	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
-	{ _XBF_COMPOUND,	"COMPOUND" }, \
-	{ _XBF_IN_FLIGHT,	"IN_FLIGHT" }
+	{ _XBF_COMPOUND,	"COMPOUND" }
 
 
 /*
  * Internal state flags.
  */
 #define XFS_BSTATE_DISPOSE	 (1 << 0)	/* buffer being discarded */
+#define XFS_BSTATE_IN_FLIGHT	 (1 << 1)	/* I/O in flight */
 
 /*
  * The xfs_buftarg contains 2 notions of "sector size" -
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 35703a801372..17f27a2fb5e2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -237,7 +237,11 @@ xfs_file_dax_read(
 	if (!count)
 		return 0; /* skip atime */
 
-	xfs_ilock(ip, XFS_IOLOCK_SHARED);
+	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			return -EAGAIN;
+		xfs_ilock(ip, XFS_IOLOCK_SHARED);
+	}
 	ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
@@ -541,7 +545,11 @@ xfs_file_dio_aio_write(
 		iolock = XFS_IOLOCK_SHARED;
 	}
 
-	xfs_ilock(ip, iolock);
+	if (!xfs_ilock_nowait(ip, iolock)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			return -EAGAIN;
+		xfs_ilock(ip, iolock);
+	}
 
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
@@ -553,9 +561,15 @@ xfs_file_dio_aio_write(
 	 * otherwise demote the lock if we had to take the exclusive lock
 	 * for other reasons in xfs_file_aio_write_checks.
 	 */
-	if (unaligned_io)
-		inode_dio_wait(inode);
-	else if (iolock == XFS_IOLOCK_EXCL) {
+	if (unaligned_io) {
+		/* If we are going to wait for other DIO to finish, bail */
+		if (iocb->ki_flags & IOCB_NOWAIT) {
+			if (atomic_read(&inode->i_dio_count))
+				return -EAGAIN;
+		} else {
+			inode_dio_wait(inode);
+		}
+	} else if (iolock == XFS_IOLOCK_EXCL) {
 		xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
 		iolock = XFS_IOLOCK_SHARED;
 	}
@@ -585,7 +599,12 @@ xfs_file_dax_write(
 	size_t			count;
 	loff_t			pos;
 
-	xfs_ilock(ip, iolock);
+	if (!xfs_ilock_nowait(ip, iolock)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			return -EAGAIN;
+		xfs_ilock(ip, iolock);
+	}
+
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
@@ -892,6 +911,7 @@ xfs_file_open(
 		return -EFBIG;
 	if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
 		return -EIO;
+	file->f_mode |= FMODE_AIO_NOWAIT;
 	return 0;
 }
 
@@ -1043,49 +1063,17 @@ xfs_find_get_desired_pgoff(
 
 	index = startoff >> PAGE_SHIFT;
 	endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
-	end = endoff >> PAGE_SHIFT;
+	end = (endoff - 1) >> PAGE_SHIFT;
 	do {
 		int		want;
 		unsigned	nr_pages;
 		unsigned int	i;
 
-		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
+		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
 		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
 					  want);
-		/*
-		 * No page mapped into given range.  If we are searching holes
-		 * and if this is the first time we got into the loop, it means
-		 * that the given offset is landed in a hole, return it.
-		 *
-		 * If we have already stepped through some block buffers to find
-		 * holes but they all contains data.  In this case, the last
-		 * offset is already updated and pointed to the end of the last
-		 * mapped page, if it does not reach the endpoint to search,
-		 * that means there should be a hole between them.
-		 */
-		if (nr_pages == 0) {
-			/* Data search found nothing */
-			if (type == DATA_OFF)
-				break;
-
-			ASSERT(type == HOLE_OFF);
-			if (lastoff == startoff || lastoff < endoff) {
-				found = true;
-				*offset = lastoff;
-			}
-			break;
-		}
-
-		/*
-		 * At lease we found one page.  If this is the first time we
-		 * step into the loop, and if the first page index offset is
-		 * greater than the given search offset, a hole was found.
-		 */
-		if (type == HOLE_OFF && lastoff == startoff &&
-		    lastoff < page_offset(pvec.pages[0])) {
-			found = true;
+		if (nr_pages == 0)
 			break;
-		}
 
 		for (i = 0; i < nr_pages; i++) {
 			struct page	*page = pvec.pages[i];
@@ -1098,18 +1086,18 @@ xfs_find_get_desired_pgoff(
 			 * file mapping. However, page->index will not change
 			 * because we have a reference on the page.
 			 *
-			 * Searching done if the page index is out of range.
-			 * If the current offset is not reaches the end of
-			 * the specified search range, there should be a hole
-			 * between them.
+			 * If current page offset is beyond where we've ended,
+			 * we've found a hole.
 			 */
-			if (page->index > end) {
-				if (type == HOLE_OFF && lastoff < endoff) {
-					*offset = lastoff;
-					found = true;
-				}
+			if (type == HOLE_OFF && lastoff < endoff &&
+			    lastoff < page_offset(pvec.pages[i])) {
+				found = true;
+				*offset = lastoff;
 				goto out;
 			}
+			/* Searching done if the page index is out of range. */
+			if (page->index > end)
+				goto out;
 
 			lock_page(page);
 			/*
@@ -1151,21 +1139,20 @@ xfs_find_get_desired_pgoff(
 
 		/*
 		 * The number of returned pages less than our desired, search
-		 * done.  In this case, nothing was found for searching data,
-		 * but we found a hole behind the last offset.
+		 * done.
 		 */
-		if (nr_pages < want) {
-			if (type == HOLE_OFF) {
-				*offset = lastoff;
-				found = true;
-			}
+		if (nr_pages < want)
 			break;
-		}
 
 		index = pvec.pages[i - 1]->index + 1;
 		pagevec_release(&pvec);
 	} while (index <= end);
 
+	/* No page at lastoff and we are not done - we found a hole. */
+	if (type == HOLE_OFF && lastoff < endoff) {
+		*offset = lastoff;
+		found = true;
+	}
 out:
 	pagevec_release(&pvec);
 	return found;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 3683819887a5..814ed729881d 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -828,6 +828,7 @@ xfs_getfsmap(
 	struct xfs_fsmap		dkeys[2];	/* per-dev keys */
 	struct xfs_getfsmap_dev		handlers[XFS_GETFSMAP_DEVS];
 	struct xfs_getfsmap_info	info = { NULL };
+	bool				use_rmap;
 	int				i;
 	int				error = 0;
 
@@ -837,12 +838,14 @@ xfs_getfsmap(
 	    !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
 		return -EINVAL;
 
+	use_rmap = capable(CAP_SYS_ADMIN) &&
+		   xfs_sb_version_hasrmapbt(&mp->m_sb);
 	head->fmh_entries = 0;
 
 	/* Set up our device handlers. */
 	memset(handlers, 0, sizeof(handlers));
 	handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (use_rmap)
 		handlers[0].fn = xfs_getfsmap_datadev_rmapbt;
 	else
 		handlers[0].fn = xfs_getfsmap_datadev_bnobt;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index f61c84f8e31a..b9c12e1cc23a 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -66,7 +66,6 @@ xfs_inode_alloc(
 
 	XFS_STATS_INC(mp, vn_active);
 	ASSERT(atomic_read(&ip->i_pincount) == 0);
-	ASSERT(!spin_is_locked(&ip->i_flags_lock));
 	ASSERT(!xfs_isiflocked(ip));
 	ASSERT(ip->i_ino == 0);
 
@@ -190,7 +189,7 @@ xfs_perag_set_reclaim_tag(
 {
 	struct xfs_mount	*mp = pag->pag_mount;
 
-	ASSERT(spin_is_locked(&pag->pag_ici_lock));
+	lockdep_assert_held(&pag->pag_ici_lock);
 	if (pag->pag_ici_reclaimable++)
 		return;
 
@@ -212,7 +211,7 @@ xfs_perag_clear_reclaim_tag(
 {
 	struct xfs_mount	*mp = pag->pag_mount;
 
-	ASSERT(spin_is_locked(&pag->pag_ici_lock));
+	lockdep_assert_held(&pag->pag_ici_lock);
 	if (--pag->pag_ici_reclaimable)
 		return;
 
@@ -270,12 +269,12 @@ xfs_inew_wait(
 	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT);
 
 	do {
-		prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 		if (!xfs_iflags_test(ip, XFS_INEW))
 			break;
 		schedule();
 	} while (true);
-	finish_wait(wq, &wait.wait);
+	finish_wait(wq, &wait.wq_entry);
 }
 
 /*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ec9826c56500..c0a1e840a588 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -622,12 +622,12 @@ __xfs_iflock(
 	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
 
 	do {
-		prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 		if (xfs_isiflocked(ip))
 			io_schedule();
 	} while (!xfs_iflock_nowait(ip));
 
-	finish_wait(wq, &wait.wait);
+	finish_wait(wq, &wait.wq_entry);
 }
 
 STATIC uint
@@ -2486,11 +2486,11 @@ __xfs_iunpin_wait(
 	xfs_iunpin(ip);
 
 	do {
-		prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 		if (xfs_ipincount(ip))
 			io_schedule();
 	} while (xfs_ipincount(ip));
-	finish_wait(wq, &wait.wait);
+	finish_wait(wq, &wait.wq_entry);
 }
 
 void
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 08cb7d1a4a3a..013cc78d7daf 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -834,9 +834,7 @@ xfs_inode_item_format_convert(
 		in_f->ilf_dsize = in_f32->ilf_dsize;
 		in_f->ilf_ino = in_f32->ilf_ino;
 		/* copy biggest field of ilf_u */
-		memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
-		       in_f32->ilf_u.ilfu_uuid.__u_bits,
-		       sizeof(uuid_t));
+		uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid);
 		in_f->ilf_blkno = in_f32->ilf_blkno;
 		in_f->ilf_len = in_f32->ilf_len;
 		in_f->ilf_boffset = in_f32->ilf_boffset;
@@ -851,9 +849,7 @@ xfs_inode_item_format_convert(
 		in_f->ilf_dsize = in_f64->ilf_dsize;
 		in_f->ilf_ino = in_f64->ilf_ino;
 		/* copy biggest field of ilf_u */
-		memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
-		       in_f64->ilf_u.ilfu_uuid.__u_bits,
-		       sizeof(uuid_t));
+		uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f64->ilf_u.ilfu_uuid);
 		in_f->ilf_blkno = in_f64->ilf_blkno;
 		in_f->ilf_len = in_f64->ilf_len;
 		in_f->ilf_boffset = in_f64->ilf_boffset;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index a63f61c256bd..05dc87e8c1f5 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -995,6 +995,11 @@ xfs_file_iomap_begin(
 		lockmode = xfs_ilock_data_map_shared(ip);
 	}
 
+	if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) {
+		error = -EAGAIN;
+		goto out_unlock;
+	}
+
 	ASSERT(offset <= mp->m_super->s_maxbytes);
 	if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
 		length = mp->m_super->s_maxbytes - offset;
@@ -1016,6 +1021,15 @@ xfs_file_iomap_begin(
 
 	if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
 		if (flags & IOMAP_DIRECT) {
+			/*
+			 * A reflinked inode will result in CoW alloc.
+			 * FIXME: It could still overwrite on unshared extents
+			 * and not need allocation.
+			 */
+			if (flags & IOMAP_NOWAIT) {
+				error = -EAGAIN;
+				goto out_unlock;
+			}
 			/* may drop and re-acquire the ilock */
 			error = xfs_reflink_allocate_cow(ip, &imap, &shared,
 					&lockmode);
@@ -1033,6 +1047,14 @@ xfs_file_iomap_begin(
 
 	if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
 		/*
+		 * If nowait is set bail since we are going to make
+		 * allocations.
+		 */
+		if (flags & IOMAP_NOWAIT) {
+			error = -EAGAIN;
+			goto out_unlock;
+		}
+		/*
 		 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
 		 * pages to keep the chunks of work done where somewhat symmetric
 		 * with the work writeback does. This is a completely arbitrary
@@ -1068,7 +1090,7 @@ xfs_file_iomap_begin(
 	/* optionally associate a dax device with the iomap bdev */
 	bdev = iomap->bdev;
 	if (blk_queue_dax(bdev->bd_queue))
-		iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+		iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
 	else
 		iomap->dax_dev = NULL;
 
@@ -1149,7 +1171,7 @@ xfs_file_iomap_end(
 	unsigned		flags,
 	struct iomap		*iomap)
 {
-	put_dax(iomap->dax_dev);
+	fs_put_dax(iomap->dax_dev);
 	if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
 		return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
 				length, written, iomap);
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 044fb0e15390..2d167fe643ec 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -19,6 +19,7 @@
 #define __XFS_LINUX__
 
 #include <linux/types.h>
+#include <linux/uuid.h>
 
 /*
  * Kernel specific type declarations for XFS
@@ -42,7 +43,6 @@ typedef __u32			xfs_nlink_t;
 
 #include "kmem.h"
 #include "mrlock.h"
-#include "uuid.h"
 
 #include <linux/semaphore.h>
 #include <linux/mm.h>
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index cd0b077deb35..8cec1e5505a4 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -352,13 +352,13 @@ xlog_header_check_mount(
 {
 	ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
 
-	if (uuid_is_nil(&head->h_fs_uuid)) {
+	if (uuid_is_null(&head->h_fs_uuid)) {
 		/*
 		 * IRIX doesn't write the h_fs_uuid or h_fmt fields. If
-		 * h_fs_uuid is nil, we assume this log was last mounted
+		 * h_fs_uuid is null, we assume this log was last mounted
 		 * by IRIX and continue.
 		 */
-		xfs_warn(mp, "nil uuid in log - IRIX style log");
+		xfs_warn(mp, "null uuid in log - IRIX style log");
 	} else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
 		xfs_warn(mp, "log has mismatched uuid - can't recover");
 		xlog_header_check_dump(mp, head);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2eaf81859166..d249546da15e 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -74,20 +74,19 @@ xfs_uuid_mount(
 	int			hole, i;
 
 	/* Publish UUID in struct super_block */
-	BUILD_BUG_ON(sizeof(mp->m_super->s_uuid) != sizeof(uuid_t));
-	memcpy(&mp->m_super->s_uuid, uuid, sizeof(uuid_t));
+	uuid_copy(&mp->m_super->s_uuid, uuid);
 
 	if (mp->m_flags & XFS_MOUNT_NOUUID)
 		return 0;
 
-	if (uuid_is_nil(uuid)) {
-		xfs_warn(mp, "Filesystem has nil UUID - can't mount");
+	if (uuid_is_null(uuid)) {
+		xfs_warn(mp, "Filesystem has null UUID - can't mount");
 		return -EINVAL;
 	}
 
 	mutex_lock(&xfs_uuid_table_mutex);
 	for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
-		if (uuid_is_nil(&xfs_uuid_table[i])) {
+		if (uuid_is_null(&xfs_uuid_table[i])) {
 			hole = i;
 			continue;
 		}
@@ -124,7 +123,7 @@ xfs_uuid_unmount(
 
 	mutex_lock(&xfs_uuid_table_mutex);
 	for (i = 0; i < xfs_uuid_table_size; i++) {
-		if (uuid_is_nil(&xfs_uuid_table[i]))
+		if (uuid_is_null(&xfs_uuid_table[i]))
 			continue;
 		if (!uuid_equal(uuid, &xfs_uuid_table[i]))
 			continue;
@@ -793,7 +792,10 @@ xfs_mountfs(
 	 *  Copies the low order bits of the timestamp and the randomly
 	 *  set "sequence" number out of a UUID.
 	 */
-	uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid);
+	mp->m_fixedfsid[0] =
+		(get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) |
+		 get_unaligned_be16(&sbp->sb_uuid.b[4]);
+	mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
 
 	mp->m_dmevmask = 0;	/* not persistent; set after each mount */
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 47d239dcf3f4..97df4db13b2e 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -52,6 +52,7 @@
 #include "xfs_reflink.h"
 
 #include <linux/namei.h>
+#include <linux/dax.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/mount.h>
@@ -1765,7 +1766,8 @@ STATIC int __init
 xfs_init_zones(void)
 {
 	xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
-			offsetof(struct xfs_ioend, io_inline_bio));
+			offsetof(struct xfs_ioend, io_inline_bio),
+			BIOSET_NEED_BVECS);
 	if (!xfs_ioend_bioset)
 		goto out;
 
diff --git a/include/Kbuild b/include/Kbuild
deleted file mode 100644
index bab1145bc7a7..000000000000
--- a/include/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# Top-level Makefile calls into asm-$(ARCH)
-# List only non-arch directories below
diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h
index 07740072da55..6db3b4668b1a 100644
--- a/include/acpi/acconfig.h
+++ b/include/acpi/acconfig.h
@@ -78,6 +78,7 @@
 #define ACPI_MAX_EXTPARSE_CACHE_DEPTH   96	/* Parse tree objects */
 #define ACPI_MAX_OBJECT_CACHE_DEPTH     96	/* Interpreter operand objects */
 #define ACPI_MAX_NAMESPACE_CACHE_DEPTH  96	/* Namespace objects */
+#define ACPI_MAX_COMMENT_CACHE_DEPTH    96	/* Comments for the -ca option */
 
 /*
  * Should the subsystem abort the loading of an ACPI table if the
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 73b82ac0b56b..c1b163cb68b1 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -61,17 +61,18 @@ bool acpi_ata_match(acpi_handle handle);
 bool acpi_bay_match(acpi_handle handle);
 bool acpi_dock_match(acpi_handle handle);
 
-bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs);
-union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
+bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs);
+union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
 			u64 rev, u64 func, union acpi_object *argv4);
 
 static inline union acpi_object *
-acpi_evaluate_dsm_typed(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
-			union acpi_object *argv4, acpi_object_type type)
+acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev,
+			u64 func, union acpi_object *argv4,
+			acpi_object_type type)
 {
 	union acpi_object *obj;
 
-	obj = acpi_evaluate_dsm(handle, uuid, rev, func, argv4);
+	obj = acpi_evaluate_dsm(handle, guid, rev, func, argv4);
 	if (obj && obj->type != type) {
 		ACPI_FREE(obj);
 		obj = NULL;
@@ -210,7 +211,8 @@ struct acpi_device_flags {
 	u32 of_compatible_ok:1;
 	u32 coherent_dma:1;
 	u32 cca_seen:1;
-	u32 reserved:20;
+	u32 spi_i2c_slave:1;
+	u32 reserved:19;
 };
 
 /* File System */
@@ -588,6 +590,15 @@ struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
 int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
 int acpi_disable_wakeup_device_power(struct acpi_device *dev);
 
+#ifdef CONFIG_X86
+bool acpi_device_always_present(struct acpi_device *adev);
+#else
+static inline bool acpi_device_always_present(struct acpi_device *adev)
+{
+	return false;
+}
+#endif
+
 #ifdef CONFIG_PM
 acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev,
 				 void (*work_func)(struct work_struct *work));
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 3795386ea706..15c86ce4df53 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -46,7 +46,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20170119
+#define ACPI_CA_VERSION                 0x20170303
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index d92543f3bbfd..bdc55c0da19c 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -374,6 +374,20 @@ struct acpi_table_desc {
 	u16 validation_count;
 };
 
+/*
+ * Maximum value of the validation_count field in struct acpi_table_desc.
+ * When reached, validation_count cannot be changed any more and the table will
+ * be permanently regarded as validated.
+ *
+ * This is to prevent situations in which unbalanced table get/put operations
+ * may cause premature table unmapping in the OS to happen.
+ *
+ * The maximum validation count can be defined to any value, but should be
+ * greater than the maximum number of OS early stage mapping slots to avoid
+ * leaking early stage table mappings to the late stage.
+ */
+#define ACPI_MAX_TABLE_VALIDATIONS          ACPI_UINT16_MAX
+
 /* Masks for Flags field above */
 
 #define ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL  (0)	/* Virtual address, external maintained */
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 0ff3c64ce924..faa9f2c0d5de 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -87,6 +87,7 @@
 #define ACPI_SIG_WDAT           "WDAT"	/* Watchdog Action Table */
 #define ACPI_SIG_WDDT           "WDDT"	/* Watchdog Timer Description Table */
 #define ACPI_SIG_WDRT           "WDRT"	/* Watchdog Resource Table */
+#define ACPI_SIG_XXXX           "XXXX"	/* Intermediate AML header for ASL/ASL+ converter */
 
 #ifdef ACPI_UNDEFINED_TABLES
 /*
@@ -783,6 +784,15 @@ struct acpi_iort_smmu {
 #define ACPI_IORT_SMMU_DVM_SUPPORTED    (1)
 #define ACPI_IORT_SMMU_COHERENT_WALK    (1<<1)
 
+/* Global interrupt format */
+
+struct acpi_iort_smmu_gsi {
+	u32 nsg_irpt;
+	u32 nsg_irpt_flags;
+	u32 nsg_cfg_irpt;
+	u32 nsg_cfg_irpt_flags;
+};
+
 struct acpi_iort_smmu_v3 {
 	u64 base_address;	/* SMMUv3 base address */
 	u32 flags;
diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
deleted file mode 100644
index d2ee86b4c091..000000000000
--- a/include/asm-generic/Kbuild.asm
+++ /dev/null
@@ -1 +0,0 @@
-include include/uapi/asm-generic/Kbuild.asm
diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index c0bd0d7651a9..bb837310c07e 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -913,4 +913,55 @@ void drm_dp_aux_unregister(struct drm_dp_aux *aux);
 int drm_dp_start_crc(struct drm_dp_aux *aux, struct drm_crtc *crtc);
 int drm_dp_stop_crc(struct drm_dp_aux *aux);
 
+struct drm_dp_dpcd_ident {
+	u8 oui[3];
+	u8 device_id[6];
+	u8 hw_rev;
+	u8 sw_major_rev;
+	u8 sw_minor_rev;
+} __packed;
+
+/**
+ * struct drm_dp_desc - DP branch/sink device descriptor
+ * @ident: DP device identification from DPCD 0x400 (sink) or 0x500 (branch).
+ * @quirks: Quirks; use drm_dp_has_quirk() to query for the quirks.
+ */
+struct drm_dp_desc {
+	struct drm_dp_dpcd_ident ident;
+	u32 quirks;
+};
+
+int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc,
+		     bool is_branch);
+
+/**
+ * enum drm_dp_quirk - Display Port sink/branch device specific quirks
+ *
+ * Display Port sink and branch devices in the wild have a variety of bugs, try
+ * to collect them here. The quirks are shared, but it's up to the drivers to
+ * implement workarounds for them.
+ */
+enum drm_dp_quirk {
+	/**
+	 * @DP_DPCD_QUIRK_LIMITED_M_N:
+	 *
+	 * The device requires main link attributes Mvid and Nvid to be limited
+	 * to 16 bits.
+	 */
+	DP_DPCD_QUIRK_LIMITED_M_N,
+};
+
+/**
+ * drm_dp_has_quirk() - does the DP device have a specific quirk
+ * @desc: Device decriptor filled by drm_dp_read_desc()
+ * @quirk: Quirk to query for
+ *
+ * Return true if DP device identified by @desc has @quirk.
+ */
+static inline bool
+drm_dp_has_quirk(const struct drm_dp_desc *desc, enum drm_dp_quirk quirk)
+{
+	return desc->quirks & BIT(quirk);
+}
+
 #endif /* _DRM_DP_HELPER_H_ */
diff --git a/include/dt-bindings/clock/hi6220-clock.h b/include/dt-bindings/clock/hi6220-clock.h
index 6b03c84f4278..b8ba665aab7b 100644
--- a/include/dt-bindings/clock/hi6220-clock.h
+++ b/include/dt-bindings/clock/hi6220-clock.h
@@ -124,7 +124,10 @@
 #define HI6220_CS_DAPB		57
 #define HI6220_CS_ATB_DIV	58
 
-#define HI6220_SYS_NR_CLKS	59
+/* gate clock */
+#define HI6220_DAPB_CLK		59
+
+#define HI6220_SYS_NR_CLKS	60
 
 /* clk in Hi6220 media controller */
 /* gate clocks */
diff --git a/include/dt-bindings/clock/mt6797-clk.h b/include/dt-bindings/clock/mt6797-clk.h
new file mode 100644
index 000000000000..2f25a5aca019
--- /dev/null
+++ b/include/dt-bindings/clock/mt6797-clk.h
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2017 MediaTek Inc.
+ * Author: Kevin Chen <kevin-cw.chen@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _DT_BINDINGS_CLK_MT6797_H
+#define _DT_BINDINGS_CLK_MT6797_H
+
+/* TOPCKGEN */
+#define	CLK_TOP_MUX_ULPOSC_AXI_CK_MUX_PRE	1
+#define	CLK_TOP_MUX_ULPOSC_AXI_CK_MUX		2
+#define	CLK_TOP_MUX_AXI				3
+#define	CLK_TOP_MUX_MEM				4
+#define	CLK_TOP_MUX_DDRPHYCFG			5
+#define	CLK_TOP_MUX_MM				6
+#define	CLK_TOP_MUX_PWM				7
+#define	CLK_TOP_MUX_VDEC			8
+#define	CLK_TOP_MUX_VENC			9
+#define	CLK_TOP_MUX_MFG				10
+#define	CLK_TOP_MUX_CAMTG			11
+#define	CLK_TOP_MUX_UART			12
+#define	CLK_TOP_MUX_SPI				13
+#define	CLK_TOP_MUX_ULPOSC_SPI_CK_MUX		14
+#define	CLK_TOP_MUX_USB20			15
+#define	CLK_TOP_MUX_MSDC50_0_HCLK		16
+#define	CLK_TOP_MUX_MSDC50_0			17
+#define	CLK_TOP_MUX_MSDC30_1			18
+#define	CLK_TOP_MUX_MSDC30_2			19
+#define	CLK_TOP_MUX_AUDIO			20
+#define	CLK_TOP_MUX_AUD_INTBUS			21
+#define	CLK_TOP_MUX_PMICSPI			22
+#define	CLK_TOP_MUX_SCP				23
+#define	CLK_TOP_MUX_ATB				24
+#define	CLK_TOP_MUX_MJC				25
+#define	CLK_TOP_MUX_DPI0			26
+#define	CLK_TOP_MUX_AUD_1			27
+#define	CLK_TOP_MUX_AUD_2			28
+#define	CLK_TOP_MUX_SSUSB_TOP_SYS		29
+#define	CLK_TOP_MUX_SPM				30
+#define	CLK_TOP_MUX_BSI_SPI			31
+#define	CLK_TOP_MUX_AUDIO_H			32
+#define	CLK_TOP_MUX_ANC_MD32			33
+#define	CLK_TOP_MUX_MFG_52M			34
+#define	CLK_TOP_SYSPLL_CK			35
+#define	CLK_TOP_SYSPLL_D2			36
+#define	CLK_TOP_SYSPLL1_D2			37
+#define	CLK_TOP_SYSPLL1_D4			38
+#define	CLK_TOP_SYSPLL1_D8			39
+#define	CLK_TOP_SYSPLL1_D16			40
+#define	CLK_TOP_SYSPLL_D3			41
+#define	CLK_TOP_SYSPLL_D3_D3			42
+#define	CLK_TOP_SYSPLL2_D2			43
+#define	CLK_TOP_SYSPLL2_D4			44
+#define	CLK_TOP_SYSPLL2_D8			45
+#define	CLK_TOP_SYSPLL_D5			46
+#define	CLK_TOP_SYSPLL3_D2			47
+#define	CLK_TOP_SYSPLL3_D4			48
+#define	CLK_TOP_SYSPLL_D7			49
+#define	CLK_TOP_SYSPLL4_D2			50
+#define	CLK_TOP_SYSPLL4_D4			51
+#define	CLK_TOP_UNIVPLL_CK			52
+#define	CLK_TOP_UNIVPLL_D7			53
+#define	CLK_TOP_UNIVPLL_D26			54
+#define	CLK_TOP_SSUSB_PHY_48M_CK		55
+#define	CLK_TOP_USB_PHY48M_CK			56
+#define	CLK_TOP_UNIVPLL_D2			57
+#define	CLK_TOP_UNIVPLL1_D2			58
+#define	CLK_TOP_UNIVPLL1_D4			59
+#define	CLK_TOP_UNIVPLL1_D8			60
+#define	CLK_TOP_UNIVPLL_D3			61
+#define	CLK_TOP_UNIVPLL2_D2			62
+#define	CLK_TOP_UNIVPLL2_D4			63
+#define	CLK_TOP_UNIVPLL2_D8			64
+#define	CLK_TOP_UNIVPLL_D5			65
+#define	CLK_TOP_UNIVPLL3_D2			66
+#define	CLK_TOP_UNIVPLL3_D4			67
+#define	CLK_TOP_UNIVPLL3_D8			68
+#define	CLK_TOP_ULPOSC_CK_ORG			69
+#define	CLK_TOP_ULPOSC_CK			70
+#define	CLK_TOP_ULPOSC_D2			71
+#define	CLK_TOP_ULPOSC_D3			72
+#define	CLK_TOP_ULPOSC_D4			73
+#define	CLK_TOP_ULPOSC_D8			74
+#define	CLK_TOP_ULPOSC_D10			75
+#define	CLK_TOP_APLL1_CK			76
+#define	CLK_TOP_APLL2_CK			77
+#define	CLK_TOP_MFGPLL_CK			78
+#define	CLK_TOP_MFGPLL_D2			79
+#define	CLK_TOP_IMGPLL_CK			80
+#define	CLK_TOP_IMGPLL_D2			81
+#define	CLK_TOP_IMGPLL_D4			82
+#define	CLK_TOP_CODECPLL_CK			83
+#define	CLK_TOP_CODECPLL_D2			84
+#define	CLK_TOP_VDECPLL_CK			85
+#define	CLK_TOP_TVDPLL_CK			86
+#define	CLK_TOP_TVDPLL_D2			87
+#define	CLK_TOP_TVDPLL_D4			88
+#define	CLK_TOP_TVDPLL_D8			89
+#define	CLK_TOP_TVDPLL_D16			90
+#define	CLK_TOP_MSDCPLL_CK			91
+#define	CLK_TOP_MSDCPLL_D2			92
+#define	CLK_TOP_MSDCPLL_D4			93
+#define	CLK_TOP_MSDCPLL_D8			94
+#define	CLK_TOP_NR				95
+
+/* APMIXED_SYS */
+#define CLK_APMIXED_MAINPLL			1
+#define CLK_APMIXED_UNIVPLL			2
+#define CLK_APMIXED_MFGPLL			3
+#define CLK_APMIXED_MSDCPLL			4
+#define CLK_APMIXED_IMGPLL			5
+#define CLK_APMIXED_TVDPLL			6
+#define CLK_APMIXED_CODECPLL			7
+#define CLK_APMIXED_VDECPLL			8
+#define CLK_APMIXED_APLL1			9
+#define CLK_APMIXED_APLL2			10
+#define CLK_APMIXED_NR				11
+
+/* INFRA_SYS */
+#define	CLK_INFRA_PMIC_TMR			1
+#define	CLK_INFRA_PMIC_AP			2
+#define	CLK_INFRA_PMIC_MD			3
+#define	CLK_INFRA_PMIC_CONN			4
+#define	CLK_INFRA_SCP				5
+#define	CLK_INFRA_SEJ				6
+#define	CLK_INFRA_APXGPT			7
+#define	CLK_INFRA_SEJ_13M			8
+#define	CLK_INFRA_ICUSB				9
+#define	CLK_INFRA_GCE				10
+#define	CLK_INFRA_THERM				11
+#define	CLK_INFRA_I2C0				12
+#define	CLK_INFRA_I2C1				13
+#define	CLK_INFRA_I2C2				14
+#define	CLK_INFRA_I2C3				15
+#define	CLK_INFRA_PWM_HCLK			16
+#define	CLK_INFRA_PWM1				17
+#define	CLK_INFRA_PWM2				18
+#define	CLK_INFRA_PWM3				19
+#define	CLK_INFRA_PWM4				20
+#define	CLK_INFRA_PWM				21
+#define	CLK_INFRA_UART0				22
+#define	CLK_INFRA_UART1				23
+#define	CLK_INFRA_UART2				24
+#define	CLK_INFRA_UART3				25
+#define	CLK_INFRA_MD2MD_CCIF_0			26
+#define	CLK_INFRA_MD2MD_CCIF_1			27
+#define	CLK_INFRA_MD2MD_CCIF_2			28
+#define	CLK_INFRA_FHCTL				29
+#define	CLK_INFRA_BTIF				30
+#define	CLK_INFRA_MD2MD_CCIF_3			31
+#define	CLK_INFRA_SPI				32
+#define	CLK_INFRA_MSDC0				33
+#define	CLK_INFRA_MD2MD_CCIF_4			34
+#define	CLK_INFRA_MSDC1				35
+#define	CLK_INFRA_MSDC2				36
+#define	CLK_INFRA_MD2MD_CCIF_5			37
+#define	CLK_INFRA_GCPU				38
+#define	CLK_INFRA_TRNG				39
+#define	CLK_INFRA_AUXADC			40
+#define	CLK_INFRA_CPUM				41
+#define	CLK_INFRA_AP_C2K_CCIF_0			42
+#define	CLK_INFRA_AP_C2K_CCIF_1			43
+#define	CLK_INFRA_CLDMA				44
+#define	CLK_INFRA_DISP_PWM			45
+#define	CLK_INFRA_AP_DMA			46
+#define	CLK_INFRA_DEVICE_APC			47
+#define	CLK_INFRA_L2C_SRAM			48
+#define	CLK_INFRA_CCIF_AP			49
+#define	CLK_INFRA_AUDIO				50
+#define	CLK_INFRA_CCIF_MD			51
+#define	CLK_INFRA_DRAMC_F26M			52
+#define	CLK_INFRA_I2C4				53
+#define	CLK_INFRA_I2C_APPM			54
+#define	CLK_INFRA_I2C_GPUPM			55
+#define	CLK_INFRA_I2C2_IMM			56
+#define	CLK_INFRA_I2C2_ARB			57
+#define	CLK_INFRA_I2C3_IMM			58
+#define	CLK_INFRA_I2C3_ARB			59
+#define	CLK_INFRA_I2C5				60
+#define	CLK_INFRA_SYS_CIRQ			61
+#define	CLK_INFRA_SPI1				62
+#define	CLK_INFRA_DRAMC_B_F26M			63
+#define	CLK_INFRA_ANC_MD32			64
+#define	CLK_INFRA_ANC_MD32_32K			65
+#define	CLK_INFRA_DVFS_SPM1			66
+#define	CLK_INFRA_AES_TOP0			67
+#define	CLK_INFRA_AES_TOP1			68
+#define	CLK_INFRA_SSUSB_BUS			69
+#define	CLK_INFRA_SPI2				70
+#define	CLK_INFRA_SPI3				71
+#define	CLK_INFRA_SPI4				72
+#define	CLK_INFRA_SPI5				73
+#define	CLK_INFRA_IRTX				74
+#define	CLK_INFRA_SSUSB_SYS			75
+#define	CLK_INFRA_SSUSB_REF			76
+#define	CLK_INFRA_AUDIO_26M			77
+#define	CLK_INFRA_AUDIO_26M_PAD_TOP		78
+#define	CLK_INFRA_MODEM_TEMP_SHARE		79
+#define	CLK_INFRA_VAD_WRAP_SOC			80
+#define	CLK_INFRA_DRAMC_CONF			81
+#define	CLK_INFRA_DRAMC_B_CONF			82
+#define	CLK_INFRA_MFG_VCG			83
+#define	CLK_INFRA_13M				84
+#define	CLK_INFRA_NR				85
+
+/* IMG_SYS */
+#define	CLK_IMG_FDVT				1
+#define	CLK_IMG_DPE				2
+#define	CLK_IMG_DIP				3
+#define	CLK_IMG_LARB6				4
+#define	CLK_IMG_NR				5
+
+/* MM_SYS */
+#define	CLK_MM_SMI_COMMON			1
+#define	CLK_MM_SMI_LARB0			2
+#define	CLK_MM_SMI_LARB5			3
+#define	CLK_MM_CAM_MDP				4
+#define	CLK_MM_MDP_RDMA0			5
+#define	CLK_MM_MDP_RDMA1			6
+#define	CLK_MM_MDP_RSZ0				7
+#define	CLK_MM_MDP_RSZ1				8
+#define	CLK_MM_MDP_RSZ2				9
+#define	CLK_MM_MDP_TDSHP			10
+#define	CLK_MM_MDP_COLOR			11
+#define	CLK_MM_MDP_WDMA				12
+#define	CLK_MM_MDP_WROT0			13
+#define	CLK_MM_MDP_WROT1			14
+#define	CLK_MM_FAKE_ENG				15
+#define	CLK_MM_DISP_OVL0			16
+#define	CLK_MM_DISP_OVL1			17
+#define	CLK_MM_DISP_OVL0_2L			18
+#define	CLK_MM_DISP_OVL1_2L			19
+#define	CLK_MM_DISP_RDMA0			20
+#define	CLK_MM_DISP_RDMA1			21
+#define	CLK_MM_DISP_WDMA0			22
+#define	CLK_MM_DISP_WDMA1			23
+#define	CLK_MM_DISP_COLOR			24
+#define	CLK_MM_DISP_CCORR			25
+#define	CLK_MM_DISP_AAL				26
+#define	CLK_MM_DISP_GAMMA			27
+#define	CLK_MM_DISP_OD				28
+#define	CLK_MM_DISP_DITHER			29
+#define	CLK_MM_DISP_UFOE			30
+#define	CLK_MM_DISP_DSC				31
+#define	CLK_MM_DISP_SPLIT			32
+#define	CLK_MM_DSI0_MM_CLOCK			33
+#define	CLK_MM_DSI1_MM_CLOCK			34
+#define	CLK_MM_DPI_MM_CLOCK			35
+#define	CLK_MM_DPI_INTERFACE_CLOCK		36
+#define	CLK_MM_LARB4_AXI_ASIF_MM_CLOCK		37
+#define	CLK_MM_LARB4_AXI_ASIF_MJC_CLOCK		38
+#define	CLK_MM_DISP_OVL0_MOUT_CLOCK		39
+#define	CLK_MM_FAKE_ENG2			40
+#define	CLK_MM_DSI0_INTERFACE_CLOCK		41
+#define	CLK_MM_DSI1_INTERFACE_CLOCK		42
+#define	CLK_MM_NR				43
+
+/* VDEC_SYS */
+#define	CLK_VDEC_CKEN_ENG			1
+#define	CLK_VDEC_ACTIVE				2
+#define	CLK_VDEC_CKEN				3
+#define	CLK_VDEC_LARB1_CKEN			4
+#define	CLK_VDEC_NR				5
+
+/* VENC_SYS */
+#define	CLK_VENC_0				1
+#define	CLK_VENC_1				2
+#define	CLK_VENC_2				3
+#define	CLK_VENC_3				4
+#define	CLK_VENC_NR				5
+
+#endif /* _DT_BINDINGS_CLK_MT6797_H */
diff --git a/include/dt-bindings/clock/r8a7795-cpg-mssr.h b/include/dt-bindings/clock/r8a7795-cpg-mssr.h
index e864aae0a256..f047eaf261f3 100644
--- a/include/dt-bindings/clock/r8a7795-cpg-mssr.h
+++ b/include/dt-bindings/clock/r8a7795-cpg-mssr.h
@@ -60,4 +60,11 @@
 #define R8A7795_CLK_R			45
 #define R8A7795_CLK_OSC			46
 
+/* r8a7795 ES2.0 CPG Core Clocks */
+#define R8A7795_CLK_S0D2		47
+#define R8A7795_CLK_S0D3		48
+#define R8A7795_CLK_S0D6		49
+#define R8A7795_CLK_S0D8		50
+#define R8A7795_CLK_S0D12		51
+
 #endif /* __DT_BINDINGS_CLOCK_R8A7795_CPG_MSSR_H__ */
diff --git a/include/dt-bindings/clock/rk3328-cru.h b/include/dt-bindings/clock/rk3328-cru.h
index ee702c8e4c09..d2b26a4b43eb 100644
--- a/include/dt-bindings/clock/rk3328-cru.h
+++ b/include/dt-bindings/clock/rk3328-cru.h
@@ -97,6 +97,7 @@
 #define SCLK_MAC2IO_SRC		99
 #define SCLK_MAC2IO		100
 #define SCLK_MAC2PHY		101
+#define SCLK_MAC2IO_EXT		102
 
 /* dclk gates */
 #define DCLK_LCDC		120
diff --git a/include/dt-bindings/clock/rk3368-cru.h b/include/dt-bindings/clock/rk3368-cru.h
index 9c5dd9ba2f6c..aeb83e581a11 100644
--- a/include/dt-bindings/clock/rk3368-cru.h
+++ b/include/dt-bindings/clock/rk3368-cru.h
@@ -44,13 +44,12 @@
 #define SCLK_I2S_8CH		82
 #define SCLK_SPDIF_8CH		83
 #define SCLK_I2S_2CH		84
-#define SCLK_TIMER0		85
-#define SCLK_TIMER1		86
-#define SCLK_TIMER2		87
-#define SCLK_TIMER3		88
-#define SCLK_TIMER4		89
-#define SCLK_TIMER5		90
-#define SCLK_TIMER6		91
+#define SCLK_TIMER00		85
+#define SCLK_TIMER01		86
+#define SCLK_TIMER02		87
+#define SCLK_TIMER03		88
+#define SCLK_TIMER04		89
+#define SCLK_TIMER05		90
 #define SCLK_OTGPHY0		93
 #define SCLK_OTG_ADP		96
 #define SCLK_HSICPHY480M	97
@@ -82,6 +81,12 @@
 #define SCLK_SFC		126
 #define SCLK_MAC		127
 #define SCLK_MACREF_OUT		128
+#define SCLK_TIMER10		133
+#define SCLK_TIMER11		134
+#define SCLK_TIMER12		135
+#define SCLK_TIMER13		136
+#define SCLK_TIMER14		137
+#define SCLK_TIMER15		138
 
 #define DCLK_VOP		190
 #define MCLK_CRYPTO		191
diff --git a/include/dt-bindings/clock/rk1108-cru.h b/include/dt-bindings/clock/rv1108-cru.h
index 9350a5527a36..ae26f8105914 100644
--- a/include/dt-bindings/clock/rk1108-cru.h
+++ b/include/dt-bindings/clock/rv1108-cru.h
@@ -13,8 +13,8 @@
  * GNU General Public License for more details.
  */
 
-#ifndef _DT_BINDINGS_CLK_ROCKCHIP_RK1108_H
-#define _DT_BINDINGS_CLK_ROCKCHIP_RK1108_H
+#ifndef _DT_BINDINGS_CLK_ROCKCHIP_RV1108_H
+#define _DT_BINDINGS_CLK_ROCKCHIP_RV1108_H
 
 /* pll id */
 #define PLL_APLL			0
@@ -266,4 +266,4 @@
 #define ARST_DSP_EDP_PERF		184
 #define ARST_DSP_EPP_PERF		185
 
-#endif /* _DT_BINDINGS_CLK_ROCKCHIP_RK1108_H */
+#endif /* _DT_BINDINGS_CLK_ROCKCHIP_RV1108_H */
diff --git a/include/dt-bindings/clock/sun50i-a64-ccu.h b/include/dt-bindings/clock/sun50i-a64-ccu.h
index 370c0a0473fc..d66432c6e675 100644
--- a/include/dt-bindings/clock/sun50i-a64-ccu.h
+++ b/include/dt-bindings/clock/sun50i-a64-ccu.h
@@ -43,6 +43,8 @@
 #ifndef _DT_BINDINGS_CLK_SUN50I_A64_H_
 #define _DT_BINDINGS_CLK_SUN50I_A64_H_
 
+#define CLK_PLL_PERIPH0		11
+
 #define CLK_BUS_MIPI_DSI	28
 #define CLK_BUS_CE		29
 #define CLK_BUS_DMA		30
diff --git a/include/dt-bindings/clock/sun8i-h3-ccu.h b/include/dt-bindings/clock/sun8i-h3-ccu.h
index efb7ba2bd515..e139fe5c62ec 100644
--- a/include/dt-bindings/clock/sun8i-h3-ccu.h
+++ b/include/dt-bindings/clock/sun8i-h3-ccu.h
@@ -43,6 +43,8 @@
 #ifndef _DT_BINDINGS_CLK_SUN8I_H3_H_
 #define _DT_BINDINGS_CLK_SUN8I_H3_H_
 
+#define CLK_PLL_PERIPH0		9
+
 #define CLK_CPUX		14
 
 #define CLK_BUS_CE		20
@@ -91,7 +93,7 @@
 #define CLK_BUS_UART1		63
 #define CLK_BUS_UART2		64
 #define CLK_BUS_UART3		65
-#define CLK_BUS_SCR		66
+#define CLK_BUS_SCR0		66
 #define CLK_BUS_EPHY		67
 #define CLK_BUS_DBG		68
 
@@ -142,4 +144,7 @@
 
 #define CLK_GPU			114
 
+/* New clocks imported in H5 */
+#define CLK_BUS_SCR1		115
+
 #endif /* _DT_BINDINGS_CLK_SUN8I_H3_H_ */
diff --git a/include/dt-bindings/clock/sun8i-r-ccu.h b/include/dt-bindings/clock/sun8i-r-ccu.h
new file mode 100644
index 000000000000..779d20aa0d05
--- /dev/null
+++ b/include/dt-bindings/clock/sun8i-r-ccu.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2016 Icenowy Zheng <icenowy@aosc.xyz>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_CLK_SUN8I_R_CCU_H_
+#define _DT_BINDINGS_CLK_SUN8I_R_CCU_H_
+
+#define CLK_AR100		0
+
+#define CLK_APB0_PIO		3
+#define CLK_APB0_IR		4
+#define CLK_APB0_TIMER		5
+#define CLK_APB0_RSB		6
+#define CLK_APB0_UART		7
+/* 8 is reserved for CLK_APB0_W1 on A31 */
+#define CLK_APB0_I2C		9
+#define CLK_APB0_TWD		10
+
+#define CLK_IR			11
+
+#endif /* _DT_BINDINGS_CLK_SUN8I_R_CCU_H_ */
diff --git a/include/dt-bindings/clock/tegra114-car.h b/include/dt-bindings/clock/tegra114-car.h
index 534c03f8ad72..ed5ca218c857 100644
--- a/include/dt-bindings/clock/tegra114-car.h
+++ b/include/dt-bindings/clock/tegra114-car.h
@@ -156,7 +156,7 @@
 /* 133 */
 /* 134 */
 /* 135 */
-/* 136 */
+#define TEGRA114_CLK_CEC 136
 /* 137 */
 /* 138 */
 /* 139 */
diff --git a/include/dt-bindings/clock/tegra124-car-common.h b/include/dt-bindings/clock/tegra124-car-common.h
index a2156090563f..9352c7e2ce0b 100644
--- a/include/dt-bindings/clock/tegra124-car-common.h
+++ b/include/dt-bindings/clock/tegra124-car-common.h
@@ -156,7 +156,7 @@
 /* 133 */
 /* 134 */
 /* 135 */
-/* 136 */
+#define TEGRA124_CLK_CEC 136
 /* 137 */
 /* 138 */
 /* 139 */
diff --git a/include/dt-bindings/clock/tegra210-car.h b/include/dt-bindings/clock/tegra210-car.h
index 35288b20f2c9..46689cd3750b 100644
--- a/include/dt-bindings/clock/tegra210-car.h
+++ b/include/dt-bindings/clock/tegra210-car.h
@@ -39,7 +39,7 @@
 /* 20 (register bit affects vi and vi_sensor) */
 /* 21 */
 #define TEGRA210_CLK_USBD 22
-#define TEGRA210_CLK_ISP 23
+#define TEGRA210_CLK_ISPA 23
 /* 24 */
 /* 25 */
 #define TEGRA210_CLK_DISP2 26
@@ -156,7 +156,7 @@
 /* 133 */
 /* 134 */
 /* 135 */
-/* 136 */
+#define TEGRA210_CLK_CEC 136
 /* 137 */
 /* 138 */
 /* 139 */
@@ -173,7 +173,7 @@
 #define TEGRA210_CLK_ENTROPY 149
 /* 150 */
 /* 151 */
-/* 152 */
+#define TEGRA210_CLK_DP2 152
 /* 153 */
 /* 154 */
 /* 155 (bit affects dfll_ref and dfll_soc) */
@@ -210,7 +210,7 @@
 #define TEGRA210_CLK_DBGAPB 185
 /* 186 */
 #define TEGRA210_CLK_PLL_P_OUT_ADSP 187
-/* 188 */
+/* 188 ((bit affects pll_a_out_adsp and pll_a_out0_out_adsp)*/
 #define TEGRA210_CLK_PLL_G_REF 189
 /* 190 */
 /* 191 */
@@ -222,7 +222,7 @@
 /* 196 */
 #define TEGRA210_CLK_DMIC3 197
 #define TEGRA210_CLK_APE 198
-/* 199 */
+#define TEGRA210_CLK_ADSP 199
 /* 200 */
 /* 201 */
 #define TEGRA210_CLK_MAUD 202
@@ -241,10 +241,10 @@
 /* 215 */
 /* 216 */
 /* 217 */
-/* 218 */
+#define TEGRA210_CLK_ADSP_NEON 218
 #define TEGRA210_CLK_NVENC 219
-/* 220 */
-/* 221 */
+#define TEGRA210_CLK_IQC2 220
+#define TEGRA210_CLK_IQC1 221
 #define TEGRA210_CLK_SOR_SAFE 222
 #define TEGRA210_CLK_PLL_P_OUT_CPU 223
 
@@ -349,9 +349,9 @@
 #define TEGRA210_CLK_PLL_RE_OUT1 319
 /* 320 */
 /* 321 */
-/* 322 */
-/* 323 */
-/* 324 */
+#define TEGRA210_CLK_ISP 322
+#define TEGRA210_CLK_PLL_A_OUT_ADSP 323
+#define TEGRA210_CLK_PLL_A_OUT0_OUT_ADSP 324
 /* 325 */
 /* 326 */
 /* 327 */
@@ -396,6 +396,15 @@
 #define TEGRA210_CLK_PLL_C_UD 364
 #define TEGRA210_CLK_SCLK_MUX 365
 
-#define TEGRA210_CLK_CLK_MAX 366
+#define TEGRA210_CLK_ACLK 370
+
+#define TEGRA210_CLK_DMIC1_SYNC_CLK 388
+#define TEGRA210_CLK_DMIC1_SYNC_CLK_MUX 389
+#define TEGRA210_CLK_DMIC2_SYNC_CLK 390
+#define TEGRA210_CLK_DMIC2_SYNC_CLK_MUX 391
+#define TEGRA210_CLK_DMIC3_SYNC_CLK 392
+#define TEGRA210_CLK_DMIC3_SYNC_CLK_MUX 393
+
+#define TEGRA210_CLK_CLK_MAX 394
 
 #endif	/* _DT_BINDINGS_CLOCK_TEGRA210_CAR_H */
diff --git a/include/dt-bindings/clock/tegra30-car.h b/include/dt-bindings/clock/tegra30-car.h
index 889e49ba0aa3..7213354b9652 100644
--- a/include/dt-bindings/clock/tegra30-car.h
+++ b/include/dt-bindings/clock/tegra30-car.h
@@ -156,7 +156,7 @@
 /* 133 */
 /* 134 */
 /* 135 */
-/* 136 */
+#define TEGRA30_CLK_CEC 136
 /* 137 */
 /* 138 */
 /* 139 */
diff --git a/include/dt-bindings/reset/mt2701-resets.h b/include/dt-bindings/reset/mt2701-resets.h
index aaf03057f755..21deb547cfa4 100644
--- a/include/dt-bindings/reset/mt2701-resets.h
+++ b/include/dt-bindings/reset/mt2701-resets.h
@@ -80,4 +80,11 @@
 #define MT2701_HIFSYS_PCIE1_RST			25
 #define MT2701_HIFSYS_PCIE2_RST			26
 
+/* ETHSYS resets */
+#define MT2701_ETHSYS_SYS_RST			0
+#define MT2701_ETHSYS_MCM_RST			2
+#define MT2701_ETHSYS_FE_RST			6
+#define MT2701_ETHSYS_GMAC_RST			23
+#define MT2701_ETHSYS_PPE_RST			31
+
 #endif  /* _DT_BINDINGS_RESET_CONTROLLER_MT2701 */
diff --git a/include/dt-bindings/reset/sun8i-h3-ccu.h b/include/dt-bindings/reset/sun8i-h3-ccu.h
index 6b7af80c26ec..484c2a22919d 100644
--- a/include/dt-bindings/reset/sun8i-h3-ccu.h
+++ b/include/dt-bindings/reset/sun8i-h3-ccu.h
@@ -98,6 +98,9 @@
 #define RST_BUS_UART1		50
 #define RST_BUS_UART2		51
 #define RST_BUS_UART3		52
-#define RST_BUS_SCR		53
+#define RST_BUS_SCR0		53
+
+/* New resets imported in H5 */
+#define RST_BUS_SCR1		54
 
 #endif /* _DT_BINDINGS_RST_SUN8I_H3_H_ */
diff --git a/include/dt-bindings/reset/sun8i-r-ccu.h b/include/dt-bindings/reset/sun8i-r-ccu.h
new file mode 100644
index 000000000000..4ba64f3d6fc9
--- /dev/null
+++ b/include/dt-bindings/reset/sun8i-r-ccu.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2016 Icenowy Zheng <icenowy@aosc.xyz>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_RST_SUN8I_R_CCU_H_
+#define _DT_BINDINGS_RST_SUN8I_R_CCU_H_
+
+#define RST_APB0_IR		0
+#define RST_APB0_TIMER		1
+#define RST_APB0_RSB		2
+#define RST_APB0_UART		3
+/* 4 is reserved for RST_APB0_W1 on A31 */
+#define RST_APB0_I2C		5
+
+#endif /* _DT_BINDINGS_RST_SUN8I_R_CCU_H_ */
diff --git a/include/dt-bindings/reset/tegra210-car.h b/include/dt-bindings/reset/tegra210-car.h
new file mode 100644
index 000000000000..296ec6e3f8c0
--- /dev/null
+++ b/include/dt-bindings/reset/tegra210-car.h
@@ -0,0 +1,13 @@
+/*
+ * This header provides Tegra210-specific constants for binding
+ * nvidia,tegra210-car.
+ */
+
+#ifndef _DT_BINDINGS_RESET_TEGRA210_CAR_H
+#define _DT_BINDINGS_RESET_TEGRA210_CAR_H
+
+#define TEGRA210_RESET(x)		(7 * 32 + (x))
+#define TEGRA210_RST_DFLL_DVCO		TEGRA210_RESET(0)
+#define TEGRA210_RST_ADSP		TEGRA210_RESET(1)
+
+#endif	/* _DT_BINDINGS_RESET_TEGRA210_CAR_H */
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 581a59ea7e34..ef718586321c 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -148,7 +148,6 @@ struct vgic_its {
 	gpa_t			vgic_its_base;
 
 	bool			enabled;
-	bool			initialized;
 	struct vgic_io_device	iodev;
 	struct kvm_device	*dev;
 
@@ -162,6 +161,9 @@ struct vgic_its {
 	u32			creadr;
 	u32			cwriter;
 
+	/* migration ABI revision in use */
+	u32			abi_rev;
+
 	/* Protects the device and collection lists */
 	struct mutex		its_lock;
 	struct list_head	device_list;
@@ -193,7 +195,10 @@ struct vgic_dist {
 		/* either a GICv2 CPU interface */
 		gpa_t			vgic_cpu_base;
 		/* or a number of GICv3 redistributor regions */
-		gpa_t			vgic_redist_base;
+		struct {
+			gpa_t		vgic_redist_base;
+			gpa_t		vgic_redist_free_offset;
+		};
 	};
 
 	/* distributor enabled */
@@ -283,6 +288,7 @@ extern struct static_key_false vgic_v2_cpuif_trap;
 
 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 void kvm_vgic_early_init(struct kvm *kvm);
+int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
 int kvm_vgic_create(struct kvm *kvm, u32 type);
 void kvm_vgic_destroy(struct kvm *kvm);
 void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 137e4a3d89c5..cafdfb84ca28 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -26,6 +26,7 @@
 #include <linux/resource_ext.h>
 #include <linux/device.h>
 #include <linux/property.h>
+#include <linux/uuid.h>
 
 #ifndef _LINUX
 #define _LINUX
@@ -457,7 +458,6 @@ struct acpi_osc_context {
 	struct acpi_buffer ret;		/* free by caller if success */
 };
 
-acpi_status acpi_str_to_uuid(char *str, u8 *uuid);
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 
 /* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */
@@ -741,7 +741,7 @@ static inline bool acpi_driver_match_device(struct device *dev,
 }
 
 static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle,
-						   const u8 *uuid,
+						   const guid_t *guid,
 						   int rev, int func,
 						   union acpi_object *argv4)
 {
diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h
index 2793652fbf66..a414a2b53e41 100644
--- a/include/linux/bcm47xx_nvram.h
+++ b/include/linux/bcm47xx_nvram.h
@@ -8,6 +8,7 @@
 #ifndef __BCM47XX_NVRAM_H
 #define __BCM47XX_NVRAM_H
 
+#include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/vmalloc.h>
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d1b04b0e99cf..664a27da276d 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -118,7 +118,6 @@ static inline void *bio_data(struct bio *bio)
 /*
  * will die
  */
-#define bio_to_phys(bio)	(page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio)))
 #define bvec_to_phys(bv)	(page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
 
 /*
@@ -373,8 +372,11 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
 	return bio_split(bio, sectors, gfp, bs);
 }
 
-extern struct bio_set *bioset_create(unsigned int, unsigned int);
-extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
+extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags);
+enum {
+	BIOSET_NEED_BVECS = BIT(0),
+	BIOSET_NEED_RESCUER = BIT(1),
+};
 extern void bioset_free(struct bio_set *);
 extern mempool_t *biovec_create_pool(int pool_entries);
 
@@ -392,11 +394,6 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 	return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
 }
 
-static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
-{
-	return bio_clone_bioset(bio, gfp_mask, fs_bio_set);
-}
-
 static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 {
 	return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
@@ -414,7 +411,13 @@ extern void bio_endio(struct bio *);
 
 static inline void bio_io_error(struct bio *bio)
 {
-	bio->bi_error = -EIO;
+	bio->bi_status = BLK_STS_IOERR;
+	bio_endio(bio);
+}
+
+static inline void bio_wouldblock_error(struct bio *bio)
+{
+	bio->bi_status = BLK_STS_AGAIN;
 	bio_endio(bio);
 }
 
@@ -426,6 +429,7 @@ extern void bio_advance(struct bio *, unsigned);
 
 extern void bio_init(struct bio *bio, struct bio_vec *table,
 		     unsigned short max_vecs);
+extern void bio_uninit(struct bio *);
 extern void bio_reset(struct bio *);
 void bio_chain(struct bio *, struct bio *);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c47aa248c640..14542308d25b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -33,14 +33,12 @@ struct blk_mq_hw_ctx {
 	struct blk_mq_ctx	**ctxs;
 	unsigned int		nr_ctx;
 
-	wait_queue_t		dispatch_wait;
+	wait_queue_entry_t		dispatch_wait;
 	atomic_t		wait_index;
 
 	struct blk_mq_tags	*tags;
 	struct blk_mq_tags	*sched_tags;
 
-	struct srcu_struct	queue_rq_srcu;
-
 	unsigned long		queued;
 	unsigned long		run;
 #define BLK_MQ_MAX_DISPATCH_ORDER	7
@@ -62,6 +60,9 @@ struct blk_mq_hw_ctx {
 	struct dentry		*debugfs_dir;
 	struct dentry		*sched_debugfs_dir;
 #endif
+
+	/* Must be the last member - see also blk_mq_hw_ctx_size(). */
+	struct srcu_struct	queue_rq_srcu[0];
 };
 
 struct blk_mq_tag_set {
@@ -87,7 +88,8 @@ struct blk_mq_queue_data {
 	bool last;
 };
 
-typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
+typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
+		const struct blk_mq_queue_data *);
 typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
@@ -142,6 +144,8 @@ struct blk_mq_ops {
 	init_request_fn		*init_request;
 	exit_request_fn		*exit_request;
 	reinit_request_fn	*reinit_request;
+	/* Called from inside blk_get_request() */
+	void (*initialize_rq_fn)(struct request *rq);
 
 	map_queues_fn		*map_queues;
 
@@ -155,10 +159,6 @@ struct blk_mq_ops {
 };
 
 enum {
-	BLK_MQ_RQ_QUEUE_OK	= 0,	/* queued fine */
-	BLK_MQ_RQ_QUEUE_BUSY	= 1,	/* requeue IO for later */
-	BLK_MQ_RQ_QUEUE_ERROR	= 2,	/* end IO with error */
-
 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
 	BLK_MQ_F_TAG_SHARED	= 1 << 1,
 	BLK_MQ_F_SG_MERGE	= 1 << 2,
@@ -204,10 +204,10 @@ enum {
 	BLK_MQ_REQ_INTERNAL	= (1 << 2), /* allocate internal/sched tag */
 };
 
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 		unsigned int flags);
-struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op,
-		unsigned int flags, unsigned int hctx_idx);
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+		unsigned int op, unsigned int flags, unsigned int hctx_idx);
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 
 enum {
@@ -230,15 +230,14 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
 
 int blk_mq_request_started(struct request *rq);
 void blk_mq_start_request(struct request *rq);
-void blk_mq_end_request(struct request *rq, int error);
-void __blk_mq_end_request(struct request *rq, int error);
+void blk_mq_end_request(struct request *rq, blk_status_t error);
+void __blk_mq_end_request(struct request *rq, blk_status_t error);
 
 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
 				bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
-void blk_mq_abort_requeue_list(struct request_queue *q);
 void blk_mq_complete_request(struct request *rq);
 
 bool blk_mq_queue_stopped(struct request_queue *q);
@@ -248,6 +247,8 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
+void blk_mq_quiesce_queue(struct request_queue *q);
+void blk_mq_unquiesce_queue(struct request_queue *q);
 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
@@ -265,6 +266,8 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
 int blk_mq_map_queues(struct blk_mq_tag_set *set);
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
+void blk_mq_quiesce_queue_nowait(struct request_queue *q);
+
 /*
  * Driver command data is immediately after the request. So subtract request
  * size to get back to the original request, add request size to get the PDU.
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 61339bc44400..d2eb87c84d82 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -17,6 +17,27 @@ struct io_context;
 struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
 
+/*
+ * Block error status values.  See block/blk-core:blk_errors for the details.
+ */
+typedef u8 __bitwise blk_status_t;
+#define	BLK_STS_OK 0
+#define BLK_STS_NOTSUPP		((__force blk_status_t)1)
+#define BLK_STS_TIMEOUT		((__force blk_status_t)2)
+#define BLK_STS_NOSPC		((__force blk_status_t)3)
+#define BLK_STS_TRANSPORT	((__force blk_status_t)4)
+#define BLK_STS_TARGET		((__force blk_status_t)5)
+#define BLK_STS_NEXUS		((__force blk_status_t)6)
+#define BLK_STS_MEDIUM		((__force blk_status_t)7)
+#define BLK_STS_PROTECTION	((__force blk_status_t)8)
+#define BLK_STS_RESOURCE	((__force blk_status_t)9)
+#define BLK_STS_IOERR		((__force blk_status_t)10)
+
+/* hack for device mapper, don't use elsewhere: */
+#define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)
+
+#define BLK_STS_AGAIN		((__force blk_status_t)12)
+
 struct blk_issue_stat {
 	u64 stat;
 };
@@ -28,13 +49,14 @@ struct blk_issue_stat {
 struct bio {
 	struct bio		*bi_next;	/* request queue link */
 	struct block_device	*bi_bdev;
-	int			bi_error;
+	blk_status_t		bi_status;
 	unsigned int		bi_opf;		/* bottom bits req flags,
 						 * top bits REQ_OP. Use
 						 * accessors.
 						 */
 	unsigned short		bi_flags;	/* status, etc and bvec pool number */
 	unsigned short		bi_ioprio;
+	unsigned short		bi_write_hint;
 
 	struct bvec_iter	bi_iter;
 
@@ -205,6 +227,7 @@ enum req_flag_bits {
 	/* command specific flags for REQ_OP_WRITE_ZEROES: */
 	__REQ_NOUNMAP,		/* do not free blocks when zeroing */
 
+	__REQ_NOWAIT,           /* Don't wait if request will block */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -223,6 +246,7 @@ enum req_flag_bits {
 #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
 
 #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
+#define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b5d1e27631ee..25f6a0cb27d3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -55,7 +55,7 @@ struct blk_stat_callback;
  */
 #define BLKCG_MAX_POLS		3
 
-typedef void (rq_end_io_fn)(struct request *, int);
+typedef void (rq_end_io_fn)(struct request *, blk_status_t);
 
 #define BLK_RL_SYNCFULL		(1U << 0)
 #define BLK_RL_ASYNCFULL	(1U << 1)
@@ -225,6 +225,8 @@ struct request {
 
 	unsigned int extra_len;	/* length of alignment and padding */
 
+	unsigned short write_hint;
+
 	unsigned long deadline;
 	struct list_head timeout_list;
 
@@ -391,6 +393,8 @@ struct request_queue {
 	int			nr_rqs[2];	/* # allocated [a]sync rqs */
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
+	atomic_t		shared_hctx_restart;
+
 	struct blk_queue_stats	*stats;
 	struct rq_wb		*rq_wb;
 
@@ -410,8 +414,12 @@ struct request_queue {
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
+	/* Called just after a request is allocated */
 	init_rq_fn		*init_rq_fn;
+	/* Called just before a request is freed */
 	exit_rq_fn		*exit_rq_fn;
+	/* Called from inside blk_get_request() */
+	void (*initialize_rq_fn)(struct request *rq);
 
 	const struct blk_mq_ops	*mq_ops;
 
@@ -586,6 +594,11 @@ struct request_queue {
 
 	size_t			cmd_size;
 	void			*rq_alloc_data;
+
+	struct work_struct	release_work;
+
+#define BLK_MAX_WRITE_HINTS	5
+	u64			write_hints[BLK_MAX_WRITE_HINTS];
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
@@ -618,6 +631,8 @@ struct request_queue {
 #define QUEUE_FLAG_STATS       27	/* track rq completion times */
 #define QUEUE_FLAG_POLL_STATS  28	/* collecting stats for hybrid polling */
 #define QUEUE_FLAG_REGISTERED  29	/* queue has been registered to a disk */
+#define QUEUE_FLAG_SCSI_PASSTHROUGH 30	/* queue supports SCSI commands */
+#define QUEUE_FLAG_QUIESCED    31	/* queue has been quiesced */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -629,6 +644,13 @@ struct request_queue {
 				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
 				 (1 << QUEUE_FLAG_POLL))
 
+/*
+ * @q->queue_lock is set while a queue is being initialized. Since we know
+ * that no other threads access the queue object before @q->queue_lock has
+ * been set, it is safe to manipulate queue flags without holding the
+ * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
+ * blk_init_allocated_queue().
+ */
 static inline void queue_lockdep_assert_held(struct request_queue *q)
 {
 	if (q->queue_lock)
@@ -708,10 +730,13 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 #define blk_queue_secure_erase(q) \
 	(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
 #define blk_queue_dax(q)	test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
+#define blk_queue_scsi_passthrough(q)	\
+	test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
 
 #define blk_noretry_request(rq) \
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
 			     REQ_FAILFAST_DRIVER))
+#define blk_queue_quiesced(q)	test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
 
 static inline bool blk_account_rq(struct request *rq)
 {
@@ -810,7 +835,8 @@ static inline bool rq_mergeable(struct request *rq)
 
 static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
 {
-	if (bio_data(a) == bio_data(b))
+	if (bio_page(a) == bio_page(b) &&
+	    bio_offset(a) == bio_offset(b))
 		return true;
 
 	return false;
@@ -858,19 +884,6 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn;
 #define BLK_DEFAULT_SG_TIMEOUT	(60 * HZ)
 #define BLK_MIN_SG_TIMEOUT	(7 * HZ)
 
-#ifdef CONFIG_BOUNCE
-extern int init_emergency_isa_pool(void);
-extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
-#else
-static inline int init_emergency_isa_pool(void)
-{
-	return 0;
-}
-static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
-{
-}
-#endif /* CONFIG_MMU */
-
 struct rq_map_data {
 	struct page **pages;
 	int page_order;
@@ -929,7 +942,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
-extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
+extern struct request *blk_get_request(struct request_queue *, unsigned int op,
+				       gfp_t gfp_mask);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
@@ -937,12 +951,11 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 			     int (*bio_ctr)(struct bio *, struct bio *, void *),
 			     void *data);
 extern void blk_rq_unprep_clone(struct request *rq);
-extern int blk_insert_cloned_request(struct request_queue *q,
+extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
-extern void blk_queue_split(struct request_queue *, struct bio **,
-			    struct bio_set *);
+extern void blk_queue_split(struct request_queue *, struct bio **);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
 extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
 extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
@@ -963,7 +976,6 @@ extern void __blk_run_queue(struct request_queue *q);
 extern void __blk_run_queue_uncond(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_run_queue_async(struct request_queue *q);
-extern void blk_mq_quiesce_queue(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
@@ -977,6 +989,9 @@ extern void blk_execute_rq(struct request_queue *, struct gendisk *,
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
 
+int blk_status_to_errno(blk_status_t status);
+blk_status_t errno_to_blk_status(int errno);
+
 bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
@@ -1109,16 +1124,16 @@ extern struct request *blk_fetch_request(struct request_queue *q);
  * blk_end_request() for parts of the original function.
  * This prevents code duplication in drivers.
  */
-extern bool blk_update_request(struct request *rq, int error,
+extern bool blk_update_request(struct request *rq, blk_status_t error,
 			       unsigned int nr_bytes);
-extern void blk_finish_request(struct request *rq, int error);
-extern bool blk_end_request(struct request *rq, int error,
+extern void blk_finish_request(struct request *rq, blk_status_t error);
+extern bool blk_end_request(struct request *rq, blk_status_t error,
 			    unsigned int nr_bytes);
-extern void blk_end_request_all(struct request *rq, int error);
-extern bool __blk_end_request(struct request *rq, int error,
+extern void blk_end_request_all(struct request *rq, blk_status_t error);
+extern bool __blk_end_request(struct request *rq, blk_status_t error,
 			      unsigned int nr_bytes);
-extern void __blk_end_request_all(struct request *rq, int error);
-extern bool __blk_end_request_cur(struct request *rq, int error);
+extern void __blk_end_request_all(struct request *rq, blk_status_t error);
+extern bool __blk_end_request_cur(struct request *rq, blk_status_t error);
 
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
@@ -1370,11 +1385,6 @@ enum blk_default_limits {
 
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 
-static inline unsigned long queue_bounce_pfn(struct request_queue *q)
-{
-	return q->limits.bounce_pfn;
-}
-
 static inline unsigned long queue_segment_boundary(struct request_queue *q)
 {
 	return q->limits.seg_boundary_mask;
@@ -1776,7 +1786,7 @@ struct blk_integrity_iter {
 	const char		*disk_name;
 };
 
-typedef int (integrity_processing_fn) (struct blk_integrity_iter *);
+typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
 
 struct blk_integrity_profile {
 	integrity_processing_fn		*generate_fn;
@@ -1947,8 +1957,6 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
 extern int bdev_read_page(struct block_device *, sector_t, struct page *);
 extern int bdev_write_page(struct block_device *, sector_t, struct page *,
 						struct writeback_control *);
-extern int bdev_dax_supported(struct super_block *, int);
-int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #else /* CONFIG_BLOCK */
 
 struct block_device;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 5efb4db44e1e..d5093b52b485 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -40,6 +40,9 @@ struct bpf_reg_state {
 	 */
 	s64 min_value;
 	u64 max_value;
+	u32 min_align;
+	u32 aux_off;
+	u32 aux_off_align;
 };
 
 enum bpf_stack_slot_type {
@@ -87,6 +90,7 @@ struct bpf_verifier_env {
 	struct bpf_prog *prog;		/* eBPF program being verified */
 	struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
 	int stack_size;			/* number of states to be processed */
+	bool strict_alignment;		/* perform strict pointer alignment checks */
 	struct bpf_verifier_state cur_state; /* current verifier state */
 	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
 	const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */
diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h
index aa2e19182d99..51c5bd64bd00 100644
--- a/include/linux/ceph/ceph_debug.h
+++ b/include/linux/ceph/ceph_debug.h
@@ -3,6 +3,8 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/string.h>
+
 #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
 
 /*
@@ -12,12 +14,10 @@
  */
 
 # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
-extern const char *ceph_file_part(const char *s, int len);
 #  define dout(fmt, ...)						\
 	pr_debug("%.*s %12.12s:%-4d : " fmt,				\
 		 8 - (int)sizeof(KBUILD_MODNAME), "    ",		\
-		 ceph_file_part(__FILE__, sizeof(__FILE__)),		\
-		 __LINE__, ##__VA_ARGS__)
+		 kbasename(__FILE__), __LINE__, ##__VA_ARGS__)
 # else
 /* faux printk call just to see any compiler warnings. */
 #  define dout(fmt, ...)	do {				\
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index ae2f66833762..fd8b2953c78f 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -105,8 +105,10 @@ static inline u64 ceph_sanitize_features(u64 features)
  */
 #define CEPH_FEATURES_SUPPORTED_DEFAULT		\
 	(CEPH_FEATURE_NOSRCADDR |		\
+	 CEPH_FEATURE_FLOCK |			\
 	 CEPH_FEATURE_SUBSCRIBE2 |		\
 	 CEPH_FEATURE_RECONNECT_SEQ |		\
+	 CEPH_FEATURE_DIRLAYOUTHASH |		\
 	 CEPH_FEATURE_PGID64 |			\
 	 CEPH_FEATURE_PGPOOL3 |			\
 	 CEPH_FEATURE_OSDENC |			\
@@ -114,11 +116,13 @@ static inline u64 ceph_sanitize_features(u64 features)
 	 CEPH_FEATURE_MSG_AUTH |		\
 	 CEPH_FEATURE_CRUSH_TUNABLES2 |		\
 	 CEPH_FEATURE_REPLY_CREATE_INODE |	\
+	 CEPH_FEATURE_MDSENC |			\
 	 CEPH_FEATURE_OSDHASHPSPOOL |		\
 	 CEPH_FEATURE_OSD_CACHEPOOL |		\
 	 CEPH_FEATURE_CRUSH_V2 |		\
 	 CEPH_FEATURE_EXPORT_PEER |		\
 	 CEPH_FEATURE_OSDMAP_ENC |		\
+	 CEPH_FEATURE_MDS_INLINE_DATA |		\
 	 CEPH_FEATURE_CRUSH_TUNABLES3 |		\
 	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY |	\
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index f4b2ee18f38c..ad078ebe25d6 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -365,6 +365,19 @@ extern const char *ceph_mds_op_name(int op);
 #define CEPH_READDIR_FRAG_END		(1<<0)
 #define CEPH_READDIR_FRAG_COMPLETE	(1<<8)
 #define CEPH_READDIR_HASH_ORDER		(1<<9)
+#define CEPH_READDIR_OFFSET_HASH	(1<<10)
+
+/*
+ * open request flags
+ */
+#define CEPH_O_RDONLY		00000000
+#define CEPH_O_WRONLY		00000001
+#define CEPH_O_RDWR		00000002
+#define CEPH_O_CREAT		00000100
+#define CEPH_O_EXCL		00000200
+#define CEPH_O_TRUNC		00001000
+#define CEPH_O_DIRECTORY	00200000
+#define CEPH_O_NOFOLLOW		00400000
 
 union ceph_mds_request_args {
 	struct {
@@ -384,6 +397,7 @@ union ceph_mds_request_args {
 		__le32 max_entries;          /* how many dentries to grab */
 		__le32 max_bytes;
 		__le16 flags;
+		__le32 offset_hash;
 	} __attribute__ ((packed)) readdir;
 	struct {
 		__le32 mode;
diff --git a/include/linux/ceph/cls_lock_client.h b/include/linux/ceph/cls_lock_client.h
index 84884d8d4710..0594d3bba774 100644
--- a/include/linux/ceph/cls_lock_client.h
+++ b/include/linux/ceph/cls_lock_client.h
@@ -37,6 +37,11 @@ int ceph_cls_break_lock(struct ceph_osd_client *osdc,
 			struct ceph_object_locator *oloc,
 			char *lock_name, char *cookie,
 			struct ceph_entity_name *locker);
+int ceph_cls_set_cookie(struct ceph_osd_client *osdc,
+			struct ceph_object_id *oid,
+			struct ceph_object_locator *oloc,
+			char *lock_name, u8 type, char *old_cookie,
+			char *tag, char *new_cookie);
 
 void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers);
 
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 88cd5dc8e238..3229ae6c7846 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -14,6 +14,7 @@
 #include <linux/wait.h>
 #include <linux/writeback.h>
 #include <linux/slab.h>
+#include <linux/refcount.h>
 
 #include <linux/ceph/types.h>
 #include <linux/ceph/messenger.h>
@@ -161,7 +162,7 @@ struct ceph_client {
  * dirtied.
  */
 struct ceph_snap_context {
-	atomic_t nref;
+	refcount_t nref;
 	u64 seq;
 	u32 num_snaps;
 	u64 snaps[];
@@ -262,10 +263,7 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client);
 extern void ceph_destroy_options(struct ceph_options *opt);
 extern int ceph_compare_options(struct ceph_options *new_opt,
 				struct ceph_client *client);
-extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
-					      void *private,
-					      u64 supported_features,
-					      u64 required_features);
+struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private);
 struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client);
 u64 ceph_client_gid(struct ceph_client *client);
 extern void ceph_destroy_client(struct ceph_client *client);
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
index 8ed5dc505fbb..d5f783f3226a 100644
--- a/include/linux/ceph/mdsmap.h
+++ b/include/linux/ceph/mdsmap.h
@@ -25,6 +25,7 @@ struct ceph_mdsmap {
 	u32 m_session_autoclose;        /* seconds */
 	u64 m_max_file_size;
 	u32 m_max_mds;                  /* size of m_addr, m_state arrays */
+	int m_num_mds;
 	struct ceph_mds_info *m_info;
 
 	/* which object pools file data can be stored in */
@@ -40,7 +41,7 @@ struct ceph_mdsmap {
 static inline struct ceph_entity_addr *
 ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
 {
-	if (w >= m->m_max_mds)
+	if (w >= m->m_num_mds)
 		return NULL;
 	return &m->m_info[w].addr;
 }
@@ -48,14 +49,14 @@ ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
 static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
 {
 	BUG_ON(w < 0);
-	if (w >= m->m_max_mds)
+	if (w >= m->m_num_mds)
 		return CEPH_MDS_STATE_DNE;
 	return m->m_info[w].state;
 }
 
 static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
 {
-	if (w >= 0 && w < m->m_max_mds)
+	if (w >= 0 && w < m->m_num_mds)
 		return m->m_info[w].laggy;
 	return false;
 }
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index c125b5d9e13c..85650b415e73 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -5,6 +5,7 @@
 #include <linux/kref.h>
 #include <linux/mempool.h>
 #include <linux/rbtree.h>
+#include <linux/refcount.h>
 
 #include <linux/ceph/types.h>
 #include <linux/ceph/osdmap.h>
@@ -27,7 +28,7 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
 
 /* a given osd we're communicating with */
 struct ceph_osd {
-	atomic_t o_ref;
+	refcount_t o_ref;
 	struct ceph_osd_client *o_osdc;
 	int o_osd;
 	int o_incarnation;
@@ -186,12 +187,12 @@ struct ceph_osd_request {
 	struct timespec r_mtime;              /* ditto */
 	u64 r_data_offset;                    /* ditto */
 	bool r_linger;                        /* don't resend on failure */
+	bool r_abort_on_full;		      /* return ENOSPC when full */
 
 	/* internal */
 	unsigned long r_stamp;                /* jiffies, send or check time */
 	unsigned long r_start_stamp;          /* jiffies */
 	int r_attempts;
-	struct ceph_eversion r_replay_version; /* aka reassert_version */
 	u32 r_last_force_resend;
 	u32 r_map_dne_bound;
 
@@ -266,6 +267,7 @@ struct ceph_osd_client {
 	struct rb_root         osds;          /* osds */
 	struct list_head       osd_lru;       /* idle osds */
 	spinlock_t             osd_lru_lock;
+	u32		       epoch_barrier;
 	struct ceph_osd        homeless_osd;
 	atomic64_t             last_tid;      /* tid of last request */
 	u64                    last_linger_id;
@@ -304,6 +306,7 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
 				   struct ceph_msg *msg);
 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
 				 struct ceph_msg *msg);
+void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
 
 extern void osd_req_op_init(struct ceph_osd_request *osd_req,
 			    unsigned int which, u16 opcode, u32 flags);
diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index 13d71fe18b0c..75a7db21457d 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -2,7 +2,7 @@
 #define __FS_CEPH_PAGELIST_H
 
 #include <asm/byteorder.h>
-#include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <linux/list.h>
 #include <linux/types.h>
 
@@ -13,7 +13,7 @@ struct ceph_pagelist {
 	size_t room;
 	struct list_head free_list;
 	size_t num_pages_free;
-	atomic_t refcnt;
+	refcount_t refcnt;
 };
 
 struct ceph_pagelist_cursor {
@@ -30,7 +30,7 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
 	pl->room = 0;
 	INIT_LIST_HEAD(&pl->free_list);
 	pl->num_pages_free = 0;
-	atomic_set(&pl->refcnt, 1);
+	refcount_set(&pl->refcnt, 1);
 }
 
 extern void ceph_pagelist_release(struct ceph_pagelist *pl);
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 21745946cae1..ec47101cb1bf 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -48,6 +48,7 @@ enum {
 	CSS_ONLINE	= (1 << 1), /* between ->css_online() and ->css_offline() */
 	CSS_RELEASED	= (1 << 2), /* refcnt reached zero, released */
 	CSS_VISIBLE	= (1 << 3), /* css is visible to userland */
+	CSS_DYING	= (1 << 4), /* css is dying */
 };
 
 /* bits in struct cgroup flags field */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed2573e149fa..710a005c6b7a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -344,6 +344,26 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css)
 }
 
 /**
+ * css_is_dying - test whether the specified css is dying
+ * @css: target css
+ *
+ * Test whether @css is in the process of offlining or already offline.  In
+ * most cases, ->css_online() and ->css_offline() callbacks should be
+ * enough; however, the actual offline operations are RCU delayed and this
+ * test returns %true also when @css is scheduled to be offlined.
+ *
+ * This is useful, for example, when the use case requires synchronous
+ * behavior with respect to cgroup removal.  cgroup removal schedules css
+ * offlining but the css can seem alive while the operation is being
+ * delayed.  If the delay affects user visible semantics, this test can be
+ * used to resolve the situation.
+ */
+static inline bool css_is_dying(struct cgroup_subsys_state *css)
+{
+	return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt);
+}
+
+/**
  * css_put - put a css reference
  * @css: target css
  *
diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
index fccf7f44139d..bbb3712dd892 100644
--- a/include/linux/cleancache.h
+++ b/include/linux/cleancache.h
@@ -27,7 +27,7 @@ struct cleancache_filekey {
 
 struct cleancache_ops {
 	int (*init_fs)(size_t);
-	int (*init_shared_fs)(char *uuid, size_t);
+	int (*init_shared_fs)(uuid_t *uuid, size_t);
 	int (*get_page)(int, struct cleancache_filekey,
 			pgoff_t, struct page *);
 	void (*put_page)(int, struct cleancache_filekey,
diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h
index 7007a5f48080..d23c9cf26993 100644
--- a/include/linux/clk/tegra.h
+++ b/include/linux/clk/tegra.h
@@ -125,5 +125,8 @@ extern void tegra210_xusb_pll_hw_control_enable(void);
 extern void tegra210_xusb_pll_hw_sequence_start(void);
 extern void tegra210_sata_pll_hw_control_enable(void);
 extern void tegra210_sata_pll_hw_sequence_start(void);
+extern void tegra210_set_sata_pll_seq_sw(bool state);
+extern void tegra210_put_utmipll_in_iddq(void);
+extern void tegra210_put_utmipll_out_iddq(void);
 
 #endif /* __LINUX_CLK_TEGRA_H_ */
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 6110fe09ed18..d18da839b810 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -19,6 +19,18 @@
 #include <linux/clkdev.h>
 
 /**
+ * struct clk_omap_reg - OMAP register declaration
+ * @offset: offset from the master IP module base address
+ * @index: index of the master IP module
+ */
+struct clk_omap_reg {
+	void __iomem *ptr;
+	u16 offset;
+	u8 index;
+	u8 flags;
+};
+
+/**
  * struct dpll_data - DPLL registers and integration data
  * @mult_div1_reg: register containing the DPLL M and N bitfields
  * @mult_mask: mask of the DPLL M bitfield in @mult_div1_reg
@@ -67,12 +79,12 @@
  * can be placed into read-only space.
  */
 struct dpll_data {
-	void __iomem		*mult_div1_reg;
+	struct clk_omap_reg	mult_div1_reg;
 	u32			mult_mask;
 	u32			div1_mask;
 	struct clk_hw		*clk_bypass;
 	struct clk_hw		*clk_ref;
-	void __iomem		*control_reg;
+	struct clk_omap_reg	control_reg;
 	u32			enable_mask;
 	unsigned long		last_rounded_rate;
 	u16			last_rounded_m;
@@ -84,8 +96,8 @@ struct dpll_data {
 	u16			max_divider;
 	unsigned long		max_rate;
 	u8			modes;
-	void __iomem		*autoidle_reg;
-	void __iomem		*idlest_reg;
+	struct clk_omap_reg	autoidle_reg;
+	struct clk_omap_reg	idlest_reg;
 	u32			autoidle_mask;
 	u32			freqsel_mask;
 	u32			idlest_mask;
@@ -113,10 +125,10 @@ struct clk_hw_omap;
  */
 struct clk_hw_omap_ops {
 	void	(*find_idlest)(struct clk_hw_omap *oclk,
-			       void __iomem **idlest_reg,
+			       struct clk_omap_reg *idlest_reg,
 			       u8 *idlest_bit, u8 *idlest_val);
 	void	(*find_companion)(struct clk_hw_omap *oclk,
-				  void __iomem **other_reg,
+				  struct clk_omap_reg *other_reg,
 				  u8 *other_bit);
 	void	(*allow_idle)(struct clk_hw_omap *oclk);
 	void	(*deny_idle)(struct clk_hw_omap *oclk);
@@ -129,8 +141,6 @@ struct clk_hw_omap_ops {
  * @enable_bit: bitshift to write to enable/disable the clock (see @enable_reg)
  * @flags: see "struct clk.flags possibilities" above
  * @clksel_reg: for clksel clks, register va containing src/divisor select
- * @clksel_mask: bitmask in @clksel_reg for the src/divisor selector
- * @clksel: for clksel clks, pointer to struct clksel for this clock
  * @dpll_data: for DPLLs, pointer to struct dpll_data for this clock
  * @clkdm_name: clockdomain name that this clock is contained in
  * @clkdm: pointer to struct clockdomain, resolved from @clkdm_name at runtime
@@ -141,12 +151,10 @@ struct clk_hw_omap {
 	struct list_head	node;
 	unsigned long		fixed_rate;
 	u8			fixed_div;
-	void __iomem		*enable_reg;
+	struct clk_omap_reg	enable_reg;
 	u8			enable_bit;
 	u8			flags;
-	void __iomem		*clksel_reg;
-	u32			clksel_mask;
-	const struct clksel	*clksel;
+	struct clk_omap_reg	clksel_reg;
 	struct dpll_data	*dpll_data;
 	const char		*clkdm_name;
 	struct clockdomain	*clkdm;
@@ -172,7 +180,6 @@ struct clk_hw_omap {
  *     should be used.  This is a temporary solution - a better approach
  *     would be to associate clock type-specific data with the clock,
  *     similar to the struct dpll_data approach.
- * MEMMAP_ADDRESSING: Use memmap addressing to access clock registers.
  */
 #define ENABLE_REG_32BIT	(1 << 0)	/* Use 32-bit access */
 #define CLOCK_IDLE_CONTROL	(1 << 1)
@@ -180,7 +187,6 @@ struct clk_hw_omap {
 #define ENABLE_ON_INIT		(1 << 3)	/* Enable upon framework init */
 #define INVERT_ENABLE		(1 << 4)	/* 0 enables, 1 disables */
 #define CLOCK_CLKOUTX2		(1 << 5)
-#define MEMMAP_ADDRESSING	(1 << 6)
 
 /* CM_CLKEN_PLL*.EN* bit values - not all are available for every DPLL */
 #define DPLL_LOW_POWER_STOP	0x1
@@ -202,21 +208,12 @@ enum {
 };
 
 /**
- * struct clk_omap_reg - OMAP register declaration
- * @offset: offset from the master IP module base address
- * @index: index of the master IP module
- */
-struct clk_omap_reg {
-	u16 offset;
-	u16 index;
-};
-
-/**
  * struct ti_clk_ll_ops - low-level ops for clocks
  * @clk_readl: pointer to register read function
  * @clk_writel: pointer to register write function
  * @clkdm_clk_enable: pointer to clockdomain enable function
  * @clkdm_clk_disable: pointer to clockdomain disable function
+ * @clkdm_lookup: pointer to clockdomain lookup function
  * @cm_wait_module_ready: pointer to CM module wait ready function
  * @cm_split_idlest_reg: pointer to CM module function to split idlest reg
  *
@@ -227,20 +224,20 @@ struct clk_omap_reg {
  * operations not provided directly by clock drivers.
  */
 struct ti_clk_ll_ops {
-	u32	(*clk_readl)(void __iomem *reg);
-	void	(*clk_writel)(u32 val, void __iomem *reg);
+	u32	(*clk_readl)(const struct clk_omap_reg *reg);
+	void	(*clk_writel)(u32 val, const struct clk_omap_reg *reg);
 	int	(*clkdm_clk_enable)(struct clockdomain *clkdm, struct clk *clk);
 	int	(*clkdm_clk_disable)(struct clockdomain *clkdm,
 				     struct clk *clk);
+	struct clockdomain * (*clkdm_lookup)(const char *name);
 	int	(*cm_wait_module_ready)(u8 part, s16 prcm_mod, u16 idlest_reg,
 					u8 idlest_shift);
-	int	(*cm_split_idlest_reg)(void __iomem *idlest_reg, s16 *prcm_inst,
-				       u8 *idlest_reg_id);
+	int	(*cm_split_idlest_reg)(struct clk_omap_reg *idlest_reg,
+				       s16 *prcm_inst, u8 *idlest_reg_id);
 };
 
 #define to_clk_hw_omap(_hw) container_of(_hw, struct clk_hw_omap, hw)
 
-void omap2_init_clk_clkdm(struct clk_hw *clk);
 int omap2_clk_disable_autoidle_all(void);
 int omap2_clk_enable_autoidle_all(void);
 int omap2_clk_allow_idle(struct clk *clk);
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index f2b10d9ebd04..81490456c242 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -96,6 +96,7 @@ struct clocksource {
 	void (*suspend)(struct clocksource *cs);
 	void (*resume)(struct clocksource *cs);
 	void (*mark_unstable)(struct clocksource *cs);
+	void (*tick_stable)(struct clocksource *cs);
 
 	/* private: */
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index de179993e039..d614c5ea1b5e 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -15,3 +15,11 @@
  * with any version that can compile the kernel
  */
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
+/*
+ * GCC does not warn about unused static inline functions for
+ * -Wunused-function.  This turns out to avoid the need for complex #ifdef
+ * directives.  Suppress the warning in clang as well.
+ */
+#undef inline
+#define inline inline __attribute__((unused)) notrace
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index f8110051188f..707242fdbb89 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -17,11 +17,7 @@
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
 # define __percpu	__attribute__((noderef, address_space(3)))
-#ifdef CONFIG_SPARSE_RCU_POINTER
 # define __rcu		__attribute__((noderef, address_space(4)))
-#else /* CONFIG_SPARSE_RCU_POINTER */
-# define __rcu
-#endif /* CONFIG_SPARSE_RCU_POINTER */
 # define __private	__attribute__((noderef))
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 2319b8c108e8..c96709049683 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -74,7 +74,8 @@ extern void config_item_init_type_name(struct config_item *item,
 				       const char *name,
 				       struct config_item_type *type);
 
-extern struct config_item * config_item_get(struct config_item *);
+extern struct config_item *config_item_get(struct config_item *);
+extern struct config_item *config_item_get_unless_zero(struct config_item *);
 extern void config_item_put(struct config_item *);
 
 struct config_item_type {
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 2404ad238c0b..4bf4479a3a80 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -236,6 +236,23 @@ unsigned int cpumask_local_spread(unsigned int i, int node);
 		(cpu) = cpumask_next_zero((cpu), (mask)),	\
 		(cpu) < nr_cpu_ids;)
 
+extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
+
+/**
+ * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
+ * @cpu: the (optionally unsigned) integer iterator
+ * @mask: the cpumask poiter
+ * @start: the start location
+ *
+ * The implementation does not assume any bit in @mask is set (including @start).
+ *
+ * After the loop, cpu is >= nr_cpu_ids.
+ */
+#define for_each_cpu_wrap(cpu, mask, start)					\
+	for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false);	\
+	     (cpu) < nr_cpumask_bits;						\
+	     (cpu) = cpumask_next_wrap((cpu), (mask), (start), true))
+
 /**
  * for_each_cpu_and - iterate over every cpu in both masks
  * @cpu: the (optionally unsigned) integer iterator
@@ -276,6 +293,12 @@ static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
 	set_bit(cpumask_check(cpu), cpumask_bits(dstp));
 }
 
+static inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
+{
+	__set_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
+
+
 /**
  * cpumask_clear_cpu - clear a cpu in a cpumask
  * @cpu: cpu number (< nr_cpu_ids)
@@ -286,6 +309,11 @@ static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp)
 	clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
 }
 
+static inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+	__clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
+
 /**
  * cpumask_test_cpu - test for a cpu in a cpumask
  * @cpu: cpu number (< nr_cpu_ids)
diff --git a/include/linux/dax.h b/include/linux/dax.h
index d3158e74a59e..5ec1f6c47716 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -18,12 +18,58 @@ struct dax_operations {
 			void **, pfn_t *);
 };
 
+#if IS_ENABLED(CONFIG_DAX)
+struct dax_device *dax_get_by_host(const char *host);
+void put_dax(struct dax_device *dax_dev);
+#else
+static inline struct dax_device *dax_get_by_host(const char *host)
+{
+	return NULL;
+}
+
+static inline void put_dax(struct dax_device *dax_dev)
+{
+}
+#endif
+
+int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
+#if IS_ENABLED(CONFIG_FS_DAX)
+int __bdev_dax_supported(struct super_block *sb, int blocksize);
+static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
+{
+	return __bdev_dax_supported(sb, blocksize);
+}
+
+static inline struct dax_device *fs_dax_get_by_host(const char *host)
+{
+	return dax_get_by_host(host);
+}
+
+static inline void fs_put_dax(struct dax_device *dax_dev)
+{
+	put_dax(dax_dev);
+}
+
+#else
+static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline struct dax_device *fs_dax_get_by_host(const char *host)
+{
+	return NULL;
+}
+
+static inline void fs_put_dax(struct dax_device *dax_dev)
+{
+}
+#endif
+
 int dax_read_lock(void);
 void dax_read_unlock(int id);
-struct dax_device *dax_get_by_host(const char *host);
 struct dax_device *alloc_dax(void *private, const char *host,
 		const struct dax_operations *ops);
-void put_dax(struct dax_device *dax_dev);
 bool dax_alive(struct dax_device *dax_dev);
 void kill_dax(struct dax_device *dax_dev);
 void *dax_get_private(struct dax_device *dax_dev);
@@ -63,7 +109,6 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
 		    const struct iomap_ops *ops);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
-int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
 int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
 				      pgoff_t index);
 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
index c35d0c0e0ada..4635f95000a4 100644
--- a/include/linux/devfreq_cooling.h
+++ b/include/linux/devfreq_cooling.h
@@ -34,6 +34,23 @@
  *			If get_dynamic_power() is NULL, then the
  *			dynamic power is calculated as
  *			@dyn_power_coeff * frequency * voltage^2
+ * @get_real_power:	When this is set, the framework uses it to ask the
+ *			device driver for the actual power.
+ *			Some devices have more sophisticated methods
+ *			(like power counters) to approximate the actual power
+ *			that they use.
+ *			This function provides more accurate data to the
+ *			thermal governor. When the driver does not provide
+ *			such function, framework just uses pre-calculated
+ *			table and scale the power by 'utilization'
+ *			(based on 'busy_time' and 'total_time' taken from
+ *			devfreq 'last_status').
+ *			The value returned by this function must be lower
+ *			or equal than the maximum power value
+ *			for the current	state
+ *			(which can be found in power_table[state]).
+ *			When this interface is used, the power_table holds
+ *			max total (static + dynamic) power value for each OPP.
  */
 struct devfreq_cooling_power {
 	unsigned long (*get_static_power)(struct devfreq *devfreq,
@@ -41,6 +58,8 @@ struct devfreq_cooling_power {
 	unsigned long (*get_dynamic_power)(struct devfreq *devfreq,
 					   unsigned long freq,
 					   unsigned long voltage);
+	int (*get_real_power)(struct devfreq *df, u32 *power,
+			      unsigned long freq, unsigned long voltage);
 	unsigned long dyn_power_coeff;
 };
 
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index f4c639c0c362..456da5017b32 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -72,9 +72,9 @@ typedef void (*dm_release_clone_request_fn) (struct request *clone);
  * 2   : The target wants to push back the io
  */
 typedef int (*dm_endio_fn) (struct dm_target *ti,
-			    struct bio *bio, int error);
+			    struct bio *bio, blk_status_t *error);
 typedef int (*dm_request_endio_fn) (struct dm_target *ti,
-				    struct request *clone, int error,
+				    struct request *clone, blk_status_t error,
 				    union map_info *map_context);
 
 typedef void (*dm_presuspend_fn) (struct dm_target *ti);
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 6048fa404e57..a5195a7d6f77 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -229,7 +229,7 @@ static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence)
  *
  * Function returns NULL if no refcount could be obtained, or the fence.
  * This function handles acquiring a reference to a fence that may be
- * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU),
+ * reallocated within the RCU grace period (such as with SLAB_TYPESAFE_BY_RCU),
  * so long as the caller is using RCU on the pointer to the fence.
  *
  * An alternative mechanism is to employ a seqlock to protect a bunch of
@@ -257,7 +257,7 @@ dma_fence_get_rcu_safe(struct dma_fence * __rcu *fencep)
 		 * have successfully acquire a reference to it. If it no
 		 * longer matches, we are holding a reference to some other
 		 * reallocated pointer. This is possible if the allocator
-		 * is using a freelist like SLAB_DESTROY_BY_RCU where the
+		 * is using a freelist like SLAB_TYPESAFE_BY_RCU where the
 		 * fence remains valid for the RCU grace period, but it
 		 * may be reallocated. When using such allocators, we are
 		 * responsible for ensuring the reference we get is to
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 4eac2670bfa1..92f20832fd28 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -78,6 +78,7 @@ void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
 
 struct iommu_domain;
 struct msi_msg;
+struct device;
 
 static inline int iommu_dma_init(void)
 {
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index 5e9c74cf8894..9bbf21a516e4 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -136,7 +136,7 @@ static inline int dmi_name_in_vendors(const char *s) { return 0; }
 static inline int dmi_name_in_serial(const char *s) { return 0; }
 #define dmi_available 0
 static inline int dmi_walk(void (*decode)(const struct dmi_header *, void *),
-	void *private_data) { return -1; }
+	void *private_data) { return -ENXIO; }
 static inline bool dmi_match(enum dmi_field f, const char *str)
 	{ return false; }
 static inline void dmi_memdev_name(u16 handle, const char **bank,
diff --git a/include/linux/efi.h b/include/linux/efi.h
index ec36f42a2add..8269bcb8ccf7 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -137,6 +137,18 @@ struct efi_boot_memmap {
 #define EFI_CAPSULE_POPULATE_SYSTEM_TABLE	0x00020000
 #define EFI_CAPSULE_INITIATE_RESET		0x00040000
 
+struct capsule_info {
+	efi_capsule_header_t	header;
+	int			reset_type;
+	long			index;
+	size_t			count;
+	size_t			total_size;
+	phys_addr_t		*pages;
+	size_t			page_bytes_remain;
+};
+
+int __efi_capsule_setup_info(struct capsule_info *cap_info);
+
 /*
  * Allocation types for calls to boottime->allocate_pages.
  */
@@ -1403,7 +1415,7 @@ extern int efi_capsule_supported(efi_guid_t guid, u32 flags,
 				 size_t size, int *reset);
 
 extern int efi_capsule_update(efi_capsule_header_t *capsule,
-			      struct page **pages);
+			      phys_addr_t *pages);
 
 #ifdef CONFIG_EFI_RUNTIME_MAP
 int efi_runtime_map_init(struct kobject *);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 9ec5e22846e0..5bc8f8682a3e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -104,8 +104,9 @@ struct elevator_mq_ops {
 	int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
 	void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
 	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
-	struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
-	void (*put_request)(struct request *);
+	void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
+	void (*prepare_request)(struct request *, struct bio *bio);
+	void (*finish_request)(struct request *);
 	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
 	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
 	bool (*has_work)(struct blk_mq_hw_ctx *);
@@ -114,8 +115,6 @@ struct elevator_mq_ops {
 	void (*requeue_request)(struct request *);
 	struct request *(*former_request)(struct request_queue *, struct request *);
 	struct request *(*next_request)(struct request_queue *, struct request *);
-	int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *);
-	void (*put_rq_priv)(struct request_queue *, struct request *);
 	void (*init_icq)(struct io_cq *);
 	void (*exit_icq)(struct io_cq *);
 };
@@ -153,7 +152,7 @@ struct elevator_type
 #endif
 
 	/* managed by elevator core */
-	char icq_cache_name[ELV_NAME_MAX + 5];	/* elvname + "_io_cq" */
+	char icq_cache_name[ELV_NAME_MAX + 6];	/* elvname + "_io_cq" */
 	struct list_head list;
 };
 
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index ff0b981f078e..9e4befd95bc7 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -37,7 +37,7 @@ struct eventfd_ctx *eventfd_ctx_fdget(int fd);
 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
 __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
 ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt);
-int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
 				  __u64 *cnt);
 
 #else /* CONFIG_EVENTFD */
@@ -73,7 +73,7 @@ static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait,
 }
 
 static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
-						wait_queue_t *wait, __u64 *cnt)
+						wait_queue_entry_t *wait, __u64 *cnt)
 {
 	return -ENOSYS;
 }
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 56197f82af45..62d948f80730 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -272,6 +272,16 @@ struct bpf_prog_aux;
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Unconditional jumps, goto pc + off16 */
+
+#define BPF_JMP_A(OFF)						\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_JA,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Function call */
 
 #define BPF_EMIT_CALL(FUNC)					\
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 26488b419965..771fe1131467 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2,7 +2,7 @@
 #define _LINUX_FS_H
 
 #include <linux/linkage.h>
-#include <linux/wait.h>
+#include <linux/wait_bit.h>
 #include <linux/kdev_t.h>
 #include <linux/dcache.h>
 #include <linux/path.h>
@@ -20,6 +20,7 @@
 #include <linux/rwsem.h>
 #include <linux/capability.h>
 #include <linux/semaphore.h>
+#include <linux/fcntl.h>
 #include <linux/fiemap.h>
 #include <linux/rculist_bl.h>
 #include <linux/atomic.h>
@@ -30,6 +31,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/workqueue.h>
 #include <linux/delayed_call.h>
+#include <linux/uuid.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -142,6 +144,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
 
+/* File is capable of returning -EAGAIN if AIO will block */
+#define FMODE_AIO_NOWAIT	((__force fmode_t)0x8000000)
+
 /*
  * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
  * that indicates that they should check the contents of the iovec are
@@ -261,6 +266,18 @@ struct page;
 struct address_space;
 struct writeback_control;
 
+/*
+ * Write life time hint values.
+ */
+enum rw_hint {
+	WRITE_LIFE_NOT_SET	= 0,
+	WRITE_LIFE_NONE		= RWH_WRITE_LIFE_NONE,
+	WRITE_LIFE_SHORT	= RWH_WRITE_LIFE_SHORT,
+	WRITE_LIFE_MEDIUM	= RWH_WRITE_LIFE_MEDIUM,
+	WRITE_LIFE_LONG		= RWH_WRITE_LIFE_LONG,
+	WRITE_LIFE_EXTREME	= RWH_WRITE_LIFE_EXTREME,
+};
+
 #define IOCB_EVENTFD		(1 << 0)
 #define IOCB_APPEND		(1 << 1)
 #define IOCB_DIRECT		(1 << 2)
@@ -268,6 +285,7 @@ struct writeback_control;
 #define IOCB_DSYNC		(1 << 4)
 #define IOCB_SYNC		(1 << 5)
 #define IOCB_WRITE		(1 << 6)
+#define IOCB_NOWAIT		(1 << 7)
 
 struct kiocb {
 	struct file		*ki_filp;
@@ -275,6 +293,7 @@ struct kiocb {
 	void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
 	void			*private;
 	int			ki_flags;
+	enum rw_hint		ki_hint;
 };
 
 static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -282,16 +301,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb)
 	return kiocb->ki_complete == NULL;
 }
 
-static inline int iocb_flags(struct file *file);
-
-static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
-{
-	*kiocb = (struct kiocb) {
-		.ki_filp = filp,
-		.ki_flags = iocb_flags(filp),
-	};
-}
-
 /*
  * "descriptor" for what we're up to with a read.
  * This allows us to use the same read code yet
@@ -592,6 +601,7 @@ struct inode {
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 	unsigned short          i_bytes;
 	unsigned int		i_blkbits;
+	enum rw_hint		i_write_hint;
 	blkcnt_t		i_blocks;
 
 #ifdef __NEED_I_SIZE_ORDERED
@@ -846,6 +856,7 @@ struct file {
 	 * Must not be taken from IRQ context.
 	 */
 	spinlock_t		f_lock;
+	enum rw_hint		f_write_hint;
 	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
@@ -909,6 +920,8 @@ static inline struct file *get_file(struct file *f)
 #define FL_OFDLCK	1024	/* lock is "owned" by struct file */
 #define FL_LAYOUT	2048	/* outstanding pNFS layout */
 
+#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
+
 /*
  * Special return value from posix_lock_file() and vfs_lock_file() for
  * asynchronous locking.
@@ -1019,8 +1032,6 @@ struct file_lock_context {
 #define OFFT_OFFSET_MAX	INT_LIMIT(off_t)
 #endif
 
-#include <linux/fcntl.h>
-
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
 /*
@@ -1326,8 +1337,8 @@ struct super_block {
 
 	struct sb_writers	s_writers;
 
-	char s_id[32];				/* Informational name */
-	u8 s_uuid[16];				/* UUID */
+	char			s_id[32];	/* Informational name */
+	uuid_t			s_uuid;		/* UUID */
 
 	void 			*s_fs_info;	/* Filesystem private info */
 	unsigned int		s_max_links;
@@ -1429,7 +1440,6 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
 	inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
 }
 
-extern struct timespec current_fs_time(struct super_block *sb);
 extern struct timespec current_time(struct inode *inode);
 
 /*
@@ -1872,6 +1882,25 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode)
 	return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
 }
 
+static inline enum rw_hint file_write_hint(struct file *file)
+{
+	if (file->f_write_hint != WRITE_LIFE_NOT_SET)
+		return file->f_write_hint;
+
+	return file_inode(file)->i_write_hint;
+}
+
+static inline int iocb_flags(struct file *file);
+
+static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
+{
+	*kiocb = (struct kiocb) {
+		.ki_filp = filp,
+		.ki_flags = iocb_flags(filp),
+		.ki_hint = file_write_hint(filp),
+	};
+}
+
 /*
  * Inode state bits.  Protected by inode->i_lock
  *
@@ -2516,6 +2545,8 @@ extern int filemap_fdatawait(struct address_space *);
 extern void filemap_fdatawait_keep_errors(struct address_space *);
 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
 				   loff_t lend);
+extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
+				  loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
 				        loff_t lstart, loff_t lend);
@@ -2842,7 +2873,7 @@ enum {
 	DIO_SKIP_DIO_COUNT = 0x08,
 };
 
-void dio_end_io(struct bio *bio, int error);
+void dio_end_io(struct bio *bio);
 
 ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 			     struct block_device *bdev, struct iov_iter *iter,
@@ -3055,6 +3086,25 @@ static inline int iocb_flags(struct file *file)
 	return res;
 }
 
+static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags)
+{
+	if (unlikely(flags & ~RWF_SUPPORTED))
+		return -EOPNOTSUPP;
+
+	if (flags & RWF_NOWAIT) {
+		if (!(ki->ki_filp->f_mode & FMODE_AIO_NOWAIT))
+			return -EOPNOTSUPP;
+		ki->ki_flags |= IOCB_NOWAIT;
+	}
+	if (flags & RWF_HIPRI)
+		ki->ki_flags |= IOCB_HIPRI;
+	if (flags & RWF_DSYNC)
+		ki->ki_flags |= IOCB_DSYNC;
+	if (flags & RWF_SYNC)
+		ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+	return 0;
+}
+
 static inline ino_t parent_ino(struct dentry *dentry)
 {
 	ino_t res;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index acff9437e5c3..e619fae2f037 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -219,12 +219,6 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part)
 	return NULL;
 }
 
-static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to)
-{
-	uuid_be_to_bin(uuid_str, (uuid_be *)to);
-	return 0;
-}
-
 static inline int disk_max_parts(struct gendisk *disk)
 {
 	if (disk->flags & GENHD_FL_EXT_DEVT)
@@ -736,11 +730,6 @@ static inline dev_t blk_lookup_devt(const char *name, int partno)
 	dev_t devt = MKDEV(0, 0);
 	return devt;
 }
-
-static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to)
-{
-	return -EINVAL;
-}
 #endif /* CONFIG_BLOCK */
 
 #endif /* _LINUX_GENHD_H */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2b1a44f5bdb6..a89d37e8b387 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -41,7 +41,7 @@ struct vm_area_struct;
 #define ___GFP_WRITE		0x800000u
 #define ___GFP_KSWAPD_RECLAIM	0x1000000u
 #ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP	0x4000000u
+#define ___GFP_NOLOCKDEP	0x2000000u
 #else
 #define ___GFP_NOLOCKDEP	0
 #endif
diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h
index c0d712d22b07..f738d50cc17d 100644
--- a/include/linux/gpio/machine.h
+++ b/include/linux/gpio/machine.h
@@ -56,7 +56,14 @@ struct gpiod_lookup_table {
 	.flags = _flags,                                                  \
 }
 
+#ifdef CONFIG_GPIOLIB
 void gpiod_add_lookup_table(struct gpiod_lookup_table *table);
 void gpiod_remove_lookup_table(struct gpiod_lookup_table *table);
+#else
+static inline
+void gpiod_add_lookup_table(struct gpiod_lookup_table *table) {}
+static inline
+void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) {}
+#endif
 
 #endif /* __LINUX_GPIO_MACHINE_H */
diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h
index 661e5c2a8e2a..082dc1bd0801 100644
--- a/include/linux/hashtable.h
+++ b/include/linux/hashtable.h
@@ -167,7 +167,6 @@ static inline void hash_del_rcu(struct hlist_node *node)
 /**
  * hash_for_each_possible_rcu - iterate over all possible objects hashing to the
  * same bucket in an rcu enabled hashtable
- * in a rcu enabled hashtable
  * @name: hashtable to iterate
  * @obj: the type * to use as a loop cursor for each entry
  * @member: the name of the hlist_node within the struct
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 6980ca322074..dc152e4b7f73 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -671,7 +671,7 @@ struct ide_port_ops {
 	void	(*init_dev)(ide_drive_t *);
 	void	(*set_pio_mode)(struct hwif_s *, ide_drive_t *);
 	void	(*set_dma_mode)(struct hwif_s *, ide_drive_t *);
-	int	(*reset_poll)(ide_drive_t *);
+	blk_status_t (*reset_poll)(ide_drive_t *);
 	void	(*pre_reset)(ide_drive_t *);
 	void	(*resetproc)(ide_drive_t *);
 	void	(*maskproc)(ide_drive_t *, int);
@@ -1092,7 +1092,7 @@ int generic_ide_ioctl(ide_drive_t *, struct block_device *, unsigned, unsigned l
 extern int ide_vlb_clk;
 extern int ide_pci_clk;
 
-int ide_end_rq(ide_drive_t *, struct request *, int, unsigned int);
+int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int);
 void ide_kill_rq(ide_drive_t *, struct request *);
 
 void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
@@ -1123,7 +1123,7 @@ extern int ide_devset_execute(ide_drive_t *drive,
 			      const struct ide_devset *setting, int arg);
 
 void ide_complete_cmd(ide_drive_t *, struct ide_cmd *, u8, u8);
-int ide_complete_rq(ide_drive_t *, int, unsigned int);
+int ide_complete_rq(ide_drive_t *, blk_status_t, unsigned int);
 
 void ide_tf_readback(ide_drive_t *drive, struct ide_cmd *cmd);
 void ide_tf_dump(const char *, struct ide_cmd *);
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 8d5fcd6284ce..283dc2f5364d 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -614,14 +614,16 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
 static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
 						    netdev_features_t features)
 {
-	if (skb_vlan_tagged_multi(skb))
-		features = netdev_intersect_features(features,
-						     NETIF_F_SG |
-						     NETIF_F_HIGHDMA |
-						     NETIF_F_FRAGLIST |
-						     NETIF_F_HW_CSUM |
-						     NETIF_F_HW_VLAN_CTAG_TX |
-						     NETIF_F_HW_VLAN_STAG_TX);
+	if (skb_vlan_tagged_multi(skb)) {
+		/* In the case of multi-tagged packets, use a direct mask
+		 * instead of using netdev_interesect_features(), to make
+		 * sure that only devices supporting NETIF_F_HW_CSUM will
+		 * have checksum offloading support.
+		 */
+		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
+			    NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX |
+			    NETIF_F_HW_VLAN_STAG_TX;
+	}
 
 	return features;
 }
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f753e788da31..69f4e9470084 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -52,6 +52,7 @@ struct iomap {
 #define IOMAP_REPORT		(1 << 2) /* report extent status, e.g. FIEMAP */
 #define IOMAP_FAULT		(1 << 3) /* mapping for page fault */
 #define IOMAP_DIRECT		(1 << 4) /* direct I/O */
+#define IOMAP_NOWAIT		(1 << 5) /* Don't wait for writeback */
 
 struct iomap_ops {
 	/*
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 97cbca19430d..1fa293a37f4a 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -132,6 +132,9 @@
 #define GIC_BASER_SHAREABILITY(reg, type)				\
 	(GIC_BASER_##type << reg##_SHAREABILITY_SHIFT)
 
+/* encode a size field of width @w containing @n - 1 units */
+#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0))
+
 #define GICR_PROPBASER_SHAREABILITY_SHIFT		(10)
 #define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT		(7)
 #define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT		(56)
@@ -156,6 +159,8 @@
 #define GICR_PROPBASER_RaWaWb	GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb)
 
 #define GICR_PROPBASER_IDBITS_MASK			(0x1f)
+#define GICR_PROPBASER_ADDRESS(x)	((x) & GENMASK_ULL(51, 12))
+#define GICR_PENDBASER_ADDRESS(x)	((x) & GENMASK_ULL(51, 16))
 
 #define GICR_PENDBASER_SHAREABILITY_SHIFT		(10)
 #define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT		(7)
@@ -232,12 +237,18 @@
 #define GITS_CTLR_QUIESCENT		(1U << 31)
 
 #define GITS_TYPER_PLPIS		(1UL << 0)
+#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT	4
 #define GITS_TYPER_IDBITS_SHIFT		8
 #define GITS_TYPER_DEVBITS_SHIFT	13
 #define GITS_TYPER_DEVBITS(r)		((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
 #define GITS_TYPER_PTA			(1UL << 19)
 #define GITS_TYPER_HWCOLLCNT_SHIFT	24
 
+#define GITS_IIDR_REV_SHIFT		12
+#define GITS_IIDR_REV_MASK		(0xf << GITS_IIDR_REV_SHIFT)
+#define GITS_IIDR_REV(r)		(((r) >> GITS_IIDR_REV_SHIFT) & 0xf)
+#define GITS_IIDR_PRODUCTID_SHIFT	24
+
 #define GITS_CBASER_VALID			(1ULL << 63)
 #define GITS_CBASER_SHAREABILITY_SHIFT		(10)
 #define GITS_CBASER_INNER_CACHEABILITY_SHIFT	(59)
@@ -290,6 +301,7 @@
 #define GITS_BASER_TYPE(r)		(((r) >> GITS_BASER_TYPE_SHIFT) & 7)
 #define GITS_BASER_ENTRY_SIZE_SHIFT		(48)
 #define GITS_BASER_ENTRY_SIZE(r)	((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
+#define GITS_BASER_ENTRY_SIZE_MASK	GENMASK_ULL(52, 48)
 #define GITS_BASER_SHAREABILITY_SHIFT	(10)
 #define GITS_BASER_InnerShareable					\
 	GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
@@ -337,9 +349,11 @@
 #define E_ITS_INT_UNMAPPED_INTERRUPT		0x010307
 #define E_ITS_CLEAR_UNMAPPED_INTERRUPT		0x010507
 #define E_ITS_MAPD_DEVICE_OOR			0x010801
+#define E_ITS_MAPD_ITTSIZE_OOR			0x010802
 #define E_ITS_MAPC_PROCNUM_OOR			0x010902
 #define E_ITS_MAPC_COLLECTION_OOR		0x010903
 #define E_ITS_MAPTI_UNMAPPED_DEVICE		0x010a04
+#define E_ITS_MAPTI_ID_OOR			0x010a05
 #define E_ITS_MAPTI_PHYSICALID_OOR		0x010a06
 #define E_ITS_INV_UNMAPPED_INTERRUPT		0x010c07
 #define E_ITS_INVALL_UNMAPPED_COLLECTION	0x010d09
@@ -403,6 +417,10 @@
 #define ICH_HCR_EN			(1 << 0)
 #define ICH_HCR_UIE			(1 << 1)
 
+#define ICH_VMCR_ACK_CTL_SHIFT		2
+#define ICH_VMCR_ACK_CTL_MASK		(1 << ICH_VMCR_ACK_CTL_SHIFT)
+#define ICH_VMCR_FIQ_EN_SHIFT		3
+#define ICH_VMCR_FIQ_EN_MASK		(1 << ICH_VMCR_FIQ_EN_SHIFT)
 #define ICH_VMCR_CBPR_SHIFT		4
 #define ICH_VMCR_CBPR_MASK		(1 << ICH_VMCR_CBPR_SHIFT)
 #define ICH_VMCR_EOIM_SHIFT		9
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index dc30f3d057eb..d3453ee072fc 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -25,7 +25,18 @@
 #define GICC_ENABLE			0x1
 #define GICC_INT_PRI_THRESHOLD		0xf0
 
-#define GIC_CPU_CTRL_EOImodeNS		(1 << 9)
+#define GIC_CPU_CTRL_EnableGrp0_SHIFT	0
+#define GIC_CPU_CTRL_EnableGrp0		(1 << GIC_CPU_CTRL_EnableGrp0_SHIFT)
+#define GIC_CPU_CTRL_EnableGrp1_SHIFT	1
+#define GIC_CPU_CTRL_EnableGrp1		(1 << GIC_CPU_CTRL_EnableGrp1_SHIFT)
+#define GIC_CPU_CTRL_AckCtl_SHIFT	2
+#define GIC_CPU_CTRL_AckCtl		(1 << GIC_CPU_CTRL_AckCtl_SHIFT)
+#define GIC_CPU_CTRL_FIQEn_SHIFT	3
+#define GIC_CPU_CTRL_FIQEn		(1 << GIC_CPU_CTRL_FIQEn_SHIFT)
+#define GIC_CPU_CTRL_CBPR_SHIFT		4
+#define GIC_CPU_CTRL_CBPR		(1 << GIC_CPU_CTRL_CBPR_SHIFT)
+#define GIC_CPU_CTRL_EOImodeNS_SHIFT	9
+#define GIC_CPU_CTRL_EOImodeNS		(1 << GIC_CPU_CTRL_EOImodeNS_SHIFT)
 
 #define GICC_IAR_INT_ID_MASK		0x3ff
 #define GICC_INT_SPURIOUS		1023
@@ -84,8 +95,19 @@
 #define GICH_LR_EOI			(1 << 19)
 #define GICH_LR_HW			(1 << 31)
 
-#define GICH_VMCR_CTRL_SHIFT		0
-#define GICH_VMCR_CTRL_MASK		(0x21f << GICH_VMCR_CTRL_SHIFT)
+#define GICH_VMCR_ENABLE_GRP0_SHIFT	0
+#define GICH_VMCR_ENABLE_GRP0_MASK	(1 << GICH_VMCR_ENABLE_GRP0_SHIFT)
+#define GICH_VMCR_ENABLE_GRP1_SHIFT	1
+#define GICH_VMCR_ENABLE_GRP1_MASK	(1 << GICH_VMCR_ENABLE_GRP1_SHIFT)
+#define GICH_VMCR_ACK_CTL_SHIFT		2
+#define GICH_VMCR_ACK_CTL_MASK		(1 << GICH_VMCR_ACK_CTL_SHIFT)
+#define GICH_VMCR_FIQ_EN_SHIFT		3
+#define GICH_VMCR_FIQ_EN_MASK		(1 << GICH_VMCR_FIQ_EN_SHIFT)
+#define GICH_VMCR_CBPR_SHIFT		4
+#define GICH_VMCR_CBPR_MASK		(1 << GICH_VMCR_CBPR_SHIFT)
+#define GICH_VMCR_EOI_MODE_SHIFT	9
+#define GICH_VMCR_EOI_MODE_MASK		(1 << GICH_VMCR_EOI_MODE_SHIFT)
+
 #define GICH_VMCR_PRIMASK_SHIFT		27
 #define GICH_VMCR_PRIMASK_MASK		(0x1f << GICH_VMCR_PRIMASK_SHIFT)
 #define GICH_VMCR_BINPOINT_SHIFT	21
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 36872fbb815d..734377ad42e9 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -64,13 +64,17 @@ extern int register_refined_jiffies(long clock_tick_rate);
 /* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
 #define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
 
+#ifndef __jiffy_arch_data
+#define __jiffy_arch_data
+#endif
+
 /*
  * The 64-bit value is not atomic - you MUST NOT read it
  * without sampling the sequence number in jiffies_lock.
  * get_jiffies_64() will do this for you as appropriate.
  */
 extern u64 __cacheline_aligned_in_smp jiffies_64;
-extern unsigned long volatile __cacheline_aligned_in_smp jiffies;
+extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies;
 
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void);
diff --git a/include/linux/kbuild.h b/include/linux/kbuild.h
index 22a72198c14b..4e80f3a9ad58 100644
--- a/include/linux/kbuild.h
+++ b/include/linux/kbuild.h
@@ -2,14 +2,14 @@
 #define __LINUX_KBUILD_H
 
 #define DEFINE(sym, val) \
-        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+	asm volatile("\n.ascii \"->" #sym " %0 " #val "\"" : : "i" (val))
 
-#define BLANK() asm volatile("\n->" : : )
+#define BLANK() asm volatile("\n.ascii \"->\"" : : )
 
 #define OFFSET(sym, str, mem) \
 	DEFINE(sym, offsetof(struct str, mem))
 
 #define COMMENT(x) \
-	asm volatile("\n->#" x)
+	asm volatile("\n.ascii \"->#" x "\"")
 
 #endif
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 13bc08aba704..1c91f26e2996 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -490,9 +490,13 @@ extern int root_mountflags;
 
 extern bool early_boot_irqs_disabled;
 
-/* Values used for system_state */
+/*
+ * Values used for system_state. Ordering of the states must not be changed
+ * as code checks for <, <=, >, >= STATE.
+ */
 extern enum system_states {
 	SYSTEM_BOOTING,
+	SYSTEM_SCHEDULING,
 	SYSTEM_RUNNING,
 	SYSTEM_HALT,
 	SYSTEM_POWER_OFF,
diff --git a/include/linux/key.h b/include/linux/key.h
index 0c9b93b0d1f7..78e25aabedaf 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -173,7 +173,6 @@ struct key {
 #ifdef KEY_DEBUGGING
 	unsigned		magic;
 #define KEY_DEBUG_MAGIC		0x18273645u
-#define KEY_DEBUG_MAGIC_X	0xf8e9dacbu
 #endif
 
 	unsigned long		flags;		/* status flags (change with bitops) */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 30f90c1a0aaf..541df0b5b815 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -349,6 +349,9 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
 					     int write, void __user *buffer,
 					     size_t *length, loff_t *ppos);
 #endif
+extern void wait_for_kprobe_optimizer(void);
+#else
+static inline void wait_for_kprobe_optimizer(void) { }
 #endif /* CONFIG_OPTPROBES */
 #ifdef CONFIG_KPROBES_ON_FTRACE
 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4d629471869b..8c0664309815 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -384,8 +384,6 @@ struct kvm {
 	struct mutex slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
-	struct srcu_struct srcu;
-	struct srcu_struct irq_srcu;
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 
 	/*
@@ -438,6 +436,8 @@ struct kvm {
 	struct list_head devices;
 	struct dentry *debugfs_dentry;
 	struct kvm_stat_data **debugfs_stat_data;
+	struct srcu_struct srcu;
+	struct srcu_struct irq_srcu;
 };
 
 #define kvm_err(fmt, ...) \
@@ -499,6 +499,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
 	return NULL;
 }
 
+static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu *tmp;
+	int idx;
+
+	kvm_for_each_vcpu(idx, tmp, vcpu->kvm)
+		if (tmp == vcpu)
+			return idx;
+	BUG();
+}
+
 #define kvm_for_each_memslot(memslot, slots)	\
 	for (memslot = &slots->memslots[0];	\
 	      memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
@@ -1167,7 +1178,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 void kvm_unregister_device_ops(u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
-extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
 
diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index 0c1de05098c8..76c2fbc59f35 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -46,7 +46,7 @@ struct kvm_kernel_irqfd_resampler {
 struct kvm_kernel_irqfd {
 	/* Used for MSI fast-path */
 	struct kvm *kvm;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	/* Update side is protected by irqfds.lock */
 	struct kvm_kernel_irq_routing_entry irq_entry;
 	seqcount_t irq_entry_sc;
diff --git a/include/linux/llist.h b/include/linux/llist.h
index 171baa90f6f6..d11738110a7a 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -110,6 +110,25 @@ static inline void init_llist_head(struct llist_head *list)
 	for ((pos) = (node); pos; (pos) = (pos)->next)
 
 /**
+ * llist_for_each_safe - iterate over some deleted entries of a lock-less list
+ *			 safe against removal of list entry
+ * @pos:	the &struct llist_node to use as a loop cursor
+ * @n:		another &struct llist_node to use as temporary storage
+ * @node:	the first entry of deleted list entries
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being deleted from list, so start with an entry
+ * instead of list head.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry.  If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each_safe(pos, n, node)			\
+	for ((pos) = (node); (pos) && ((n) = (pos)->next, true); (pos) = (n))
+
+/**
  * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type
  * @pos:	the type * to use as a loop cursor.
  * @node:	the fist entry of deleted list entries.
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 140edab64446..05728396a1a1 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -18,6 +18,7 @@
 
 /* Dummy declarations */
 struct svc_rqst;
+struct rpc_task;
 
 /*
  * This is the set of functions for lockd->nfsd communication
@@ -43,6 +44,7 @@ struct nlmclnt_initdata {
 	u32			nfs_version;
 	int			noresvport;
 	struct net		*net;
+	const struct nlmclnt_operations	*nlmclnt_ops;
 };
 
 /*
@@ -52,8 +54,26 @@ struct nlmclnt_initdata {
 extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init);
 extern void	nlmclnt_done(struct nlm_host *host);
 
-extern int	nlmclnt_proc(struct nlm_host *host, int cmd,
-					struct file_lock *fl);
+/*
+ * NLM client operations provide a means to modify RPC processing of NLM
+ * requests.  Callbacks receive a pointer to data passed into the call to
+ * nlmclnt_proc().
+ */
+struct nlmclnt_operations {
+	/* Called on successful allocation of nlm_rqst, use for allocation or
+	 * reference counting. */
+	void (*nlmclnt_alloc_call)(void *);
+
+	/* Called in rpc_task_prepare for unlock.  A return value of true
+	 * indicates the callback has put the task to sleep on a waitqueue
+	 * and NLM should not call rpc_call_start(). */
+	bool (*nlmclnt_unlock_prepare)(struct rpc_task*, void *);
+
+	/* Called when the nlm_rqst is freed, callbacks should clean up here */
+	void (*nlmclnt_release_call)(void *);
+};
+
+extern int	nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, void *data);
 extern int	lockd_up(struct net *net);
 extern void	lockd_down(struct net *net);
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index b37dee3acaba..41f7b6a04d69 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -69,6 +69,7 @@ struct nlm_host {
 	char			*h_addrbuf;	/* address eyecatcher */
 	struct net		*net;		/* host net */
 	char			nodename[UNX_MAXNODENAME + 1];
+	const struct nlmclnt_operations	*h_nlmclnt_ops;	/* Callback ops for NLM users */
 };
 
 /*
@@ -142,6 +143,7 @@ struct nlm_rqst {
 	struct nlm_block *	a_block;
 	unsigned int		a_retries;	/* Retry count */
 	u8			a_owner[NLMCLNT_OHSIZE];
+	void *	a_callback_data; /* sent to nlmclnt_operations callbacks */
 };
 
 /*
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 4ce24a376262..8098695e5d8d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -425,12 +425,20 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end)
 }
 #endif
 
+extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
+		phys_addr_t end_addr);
 #else
 static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
 {
 	return 0;
 }
 
+static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
+		phys_addr_t end_addr)
+{
+	return 0;
+}
+
 #endif /* CONFIG_HAVE_MEMBLOCK */
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index b4ee8f62ce8d..8e2828d48d7f 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -470,6 +470,7 @@ struct mlx4_update_qp_params {
 	u16	rate_val;
 };
 
+struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn);
 int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 		   enum mlx4_update_qp_attr attr,
 		   struct mlx4_update_qp_params *params);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index dd9a263ed368..a940ec6a046c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -787,8 +787,14 @@ enum {
 };
 
 enum {
-	CQE_RSS_HTYPE_IP	= 0x3 << 6,
-	CQE_RSS_HTYPE_L4	= 0x3 << 2,
+	CQE_RSS_HTYPE_IP	= 0x3 << 2,
+	/* cqe->rss_hash_type[3:2] - IP destination selected for hash
+	 * (00 = none,  01 = IPv4, 10 = IPv6, 11 = Reserved)
+	 */
+	CQE_RSS_HTYPE_L4	= 0x3 << 6,
+	/* cqe->rss_hash_type[7:6] - L4 destination selected for hash
+	 * (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI
+	 */
 };
 
 enum {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index bcdf739ee41a..93273d9ea4d1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -787,7 +787,12 @@ enum {
 
 typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
 
+enum {
+	MLX5_CMD_ENT_STATE_PENDING_COMP,
+};
+
 struct mlx5_cmd_work_ent {
+	unsigned long		state;
 	struct mlx5_cmd_msg    *in;
 	struct mlx5_cmd_msg    *out;
 	void		       *uout;
@@ -976,7 +981,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
 void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       int nent, u64 mask, const char *name,
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 1b166d2e19c5..b25e7baa273e 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -109,7 +109,6 @@ struct mlx5_flow_table_attr {
 	int max_fte;
 	u32 level;
 	u32 flags;
-	u32 underlay_qpn;
 };
 
 struct mlx5_flow_table *
@@ -167,4 +166,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
 			  u64 *bytes, u64 *packets, u64 *lastuse);
+int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
+int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
+
 #endif
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 32de0724b400..edafedb7b509 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -766,6 +766,12 @@ enum {
 	MLX5_CAP_PORT_TYPE_ETH = 0x1,
 };
 
+enum {
+	MLX5_CAP_UMR_FENCE_STRONG	= 0x0,
+	MLX5_CAP_UMR_FENCE_SMALL	= 0x1,
+	MLX5_CAP_UMR_FENCE_NONE		= 0x2,
+};
+
 struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_0[0x80];
 
@@ -875,7 +881,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_202[0x1];
 	u8         ipoib_enhanced_offloads[0x1];
 	u8         ipoib_basic_offloads[0x1];
-	u8         reserved_at_205[0xa];
+	u8         reserved_at_205[0x5];
+	u8         umr_fence[0x2];
+	u8         reserved_at_20c[0x3];
 	u8         drain_sigerr[0x1];
 	u8         cmdif_checksum[0x2];
 	u8         sigerr_cqe[0x1];
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7cb17c6b97de..6f543a47fc92 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1393,12 +1393,6 @@ int clear_page_dirty_for_io(struct page *page);
 
 int get_cmdline(struct task_struct *task, char *buffer, int buflen);
 
-/* Is the vma a continuation of the stack vma above it? */
-static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
-{
-	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
-}
-
 static inline bool vma_is_anonymous(struct vm_area_struct *vma)
 {
 	return !vma->vm_ops;
@@ -1414,28 +1408,6 @@ bool vma_is_shmem(struct vm_area_struct *vma);
 static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
 #endif
 
-static inline int stack_guard_page_start(struct vm_area_struct *vma,
-					     unsigned long addr)
-{
-	return (vma->vm_flags & VM_GROWSDOWN) &&
-		(vma->vm_start == addr) &&
-		!vma_growsdown(vma->vm_prev, addr);
-}
-
-/* Is the vma a continuation of the stack vma below it? */
-static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
-{
-	return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
-}
-
-static inline int stack_guard_page_end(struct vm_area_struct *vma,
-					   unsigned long addr)
-{
-	return (vma->vm_flags & VM_GROWSUP) &&
-		(vma->vm_end == addr) &&
-		!vma_growsup(vma->vm_next, addr);
-}
-
 int vma_is_stack_for_current(struct vm_area_struct *vma);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
@@ -2222,6 +2194,7 @@ void page_cache_async_readahead(struct address_space *mapping,
 				pgoff_t offset,
 				unsigned long size);
 
+extern unsigned long stack_guard_gap;
 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
 
@@ -2250,6 +2223,30 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m
 	return vma;
 }
 
+static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
+{
+	unsigned long vm_start = vma->vm_start;
+
+	if (vma->vm_flags & VM_GROWSDOWN) {
+		vm_start -= stack_guard_gap;
+		if (vm_start > vma->vm_start)
+			vm_start = 0;
+	}
+	return vm_start;
+}
+
+static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
+{
+	unsigned long vm_end = vma->vm_end;
+
+	if (vma->vm_flags & VM_GROWSUP) {
+		vm_end += stack_guard_gap;
+		if (vm_end < vma->vm_end)
+			vm_end = -PAGE_SIZE;
+	}
+	return vm_end;
+}
+
 static inline unsigned long vma_pages(struct vm_area_struct *vma)
 {
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
@@ -2327,6 +2324,17 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_REMOTE	0x2000	/* we are working on non-current tsk/mm */
 #define FOLL_COW	0x4000	/* internal GUP flag */
 
+static inline int vm_fault_to_errno(int vm_fault, int foll_flags)
+{
+	if (vm_fault & VM_FAULT_OOM)
+		return -ENOMEM;
+	if (vm_fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
+		return (foll_flags & FOLL_HWPOISON) ? -EHWPOISON : -EFAULT;
+	if (vm_fault & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
+		return -EFAULT;
+	return 0;
+}
+
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 136dfdf63ba1..fc412fbd80bd 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -14,6 +14,10 @@
 
 #include <asm/page.h>
 
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+#include <asm/tlbbatch.h>
+#endif
+
 #define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
 #define USE_SPLIT_PMD_PTLOCKS	(USE_SPLIT_PTE_PTLOCKS && \
 		IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
@@ -67,12 +71,15 @@ struct page_frag {
 struct tlbflush_unmap_batch {
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	/*
-	 * Each bit set is a CPU that potentially has a TLB entry for one of
-	 * the PFNs being flushed. See set_tlb_ubc_flush_pending().
+	 * The arch code makes the following promise: generic code can modify a
+	 * PTE, then call arch_tlbbatch_add_mm() (which internally provides all
+	 * needed barriers), then call arch_tlbbatch_flush(), and the entries
+	 * will be flushed on all CPUs by the time that arch_tlbbatch_flush()
+	 * returns.
 	 */
-	struct cpumask cpumask;
+	struct arch_tlbflush_unmap_batch arch;
 
-	/* True if any bit in cpumask is set */
+	/* True if a flush is needed. */
 	bool flush_required;
 
 	/*
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ebaccd4e7d8c..ef6a13b7bd3e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -678,6 +678,7 @@ typedef struct pglist_data {
 	 * is the first PFN that needs to be initialised.
 	 */
 	unsigned long first_deferred_pfn;
+	unsigned long static_init_size;
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 566fda587fcf..3f74ef2281e8 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -467,6 +467,7 @@ enum dmi_field {
 	DMI_PRODUCT_VERSION,
 	DMI_PRODUCT_SERIAL,
 	DMI_PRODUCT_UUID,
+	DMI_PRODUCT_FAMILY,
 	DMI_BOARD_VENDOR,
 	DMI_BOARD_NAME,
 	DMI_BOARD_VERSION,
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 52666d90ca94..1ee7b30dafec 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -60,9 +60,11 @@ struct kernel_param_ops {
  * Flags available for kernel_param
  *
  * UNSAFE - the parameter is dangerous and setting it will taint the kernel
+ * HWPARAM - Hardware param not permitted in lockdown mode
  */
 enum {
-	KERNEL_PARAM_FL_UNSAFE = (1 << 0)
+	KERNEL_PARAM_FL_UNSAFE	= (1 << 0),
+	KERNEL_PARAM_FL_HWPARAM	= (1 << 1),
 };
 
 struct kernel_param {
@@ -451,6 +453,67 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp);
 			    perm, -1, 0);				\
 	__MODULE_PARM_TYPE(name, "array of " #type)
 
+enum hwparam_type {
+	hwparam_ioport,		/* Module parameter configures an I/O port */
+	hwparam_iomem,		/* Module parameter configures an I/O mem address */
+	hwparam_ioport_or_iomem, /* Module parameter could be either, depending on other option */
+	hwparam_irq,		/* Module parameter configures an IRQ */
+	hwparam_dma,		/* Module parameter configures a DMA channel */
+	hwparam_dma_addr,	/* Module parameter configures a DMA buffer address */
+	hwparam_other,		/* Module parameter configures some other value */
+};
+
+/**
+ * module_param_hw_named - A parameter representing a hw parameters
+ * @name: a valid C identifier which is the parameter name.
+ * @value: the actual lvalue to alter.
+ * @type: the type of the parameter
+ * @hwtype: what the value represents (enum hwparam_type)
+ * @perm: visibility in sysfs.
+ *
+ * Usually it's a good idea to have variable names and user-exposed names the
+ * same, but that's harder if the variable must be non-static or is inside a
+ * structure.  This allows exposure under a different name.
+ */
+#define module_param_hw_named(name, value, type, hwtype, perm)		\
+	param_check_##type(name, &(value));				\
+	__module_param_call(MODULE_PARAM_PREFIX, name,			\
+			    &param_ops_##type, &value,			\
+			    perm, -1,					\
+			    KERNEL_PARAM_FL_HWPARAM | (hwparam_##hwtype & 0));	\
+	__MODULE_PARM_TYPE(name, #type)
+
+#define module_param_hw(name, type, hwtype, perm)		\
+	module_param_hw_named(name, name, type, hwtype, perm)
+
+/**
+ * module_param_hw_array - A parameter representing an array of hw parameters
+ * @name: the name of the array variable
+ * @type: the type, as per module_param()
+ * @hwtype: what the value represents (enum hwparam_type)
+ * @nump: optional pointer filled in with the number written
+ * @perm: visibility in sysfs
+ *
+ * Input and output are as comma-separated values.  Commas inside values
+ * don't work properly (eg. an array of charp).
+ *
+ * ARRAY_SIZE(@name) is used to determine the number of elements in the
+ * array, so the definition must be visible.
+ */
+#define module_param_hw_array(name, type, hwtype, nump, perm)		\
+	param_check_##type(name, &(name)[0]);				\
+	static const struct kparam_array __param_arr_##name		\
+	= { .max = ARRAY_SIZE(name), .num = nump,			\
+	    .ops = &param_ops_##type,					\
+	    .elemsize = sizeof(name[0]), .elem = name };		\
+	__module_param_call(MODULE_PARAM_PREFIX, name,			\
+			    &param_array_ops,				\
+			    .arr = &__param_arr_##name,			\
+			    perm, -1,					\
+			    KERNEL_PARAM_FL_HWPARAM | (hwparam_##hwtype & 0));	\
+	__MODULE_PARM_TYPE(name, "array of " #type)
+
+
 extern const struct kernel_param_ops param_array_ops;
 
 extern const struct kernel_param_ops param_ops_string;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 79b176eca04a..f8a2ef239c60 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -388,7 +388,7 @@ static inline void mtd_set_of_node(struct mtd_info *mtd,
 
 static inline struct device_node *mtd_get_of_node(struct mtd_info *mtd)
 {
-	return mtd->dev.of_node;
+	return dev_of_node(&mtd->dev);
 }
 
 static inline int mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops)
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 9591e0fbe5bd..8f67b1581683 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -366,26 +366,6 @@ struct onfi_ext_param_page {
 	 */
 } __packed;
 
-struct nand_onfi_vendor_micron {
-	u8 two_plane_read;
-	u8 read_cache;
-	u8 read_unique_id;
-	u8 dq_imped;
-	u8 dq_imped_num_settings;
-	u8 dq_imped_feat_addr;
-	u8 rb_pulldown_strength;
-	u8 rb_pulldown_strength_feat_addr;
-	u8 rb_pulldown_strength_num_settings;
-	u8 otp_mode;
-	u8 otp_page_start;
-	u8 otp_data_prot_addr;
-	u8 otp_num_pages;
-	u8 otp_feat_addr;
-	u8 read_retry_options;
-	u8 reserved[72];
-	u8 param_revision;
-} __packed;
-
 struct jedec_ecc_info {
 	u8 ecc_bits;
 	u8 codeword_size;
@@ -465,6 +445,17 @@ struct nand_jedec_params {
 } __packed;
 
 /**
+ * struct nand_id - NAND id structure
+ * @data: buffer containing the id bytes. Currently 8 bytes large, but can
+ *	  be extended if required.
+ * @len: ID length.
+ */
+struct nand_id {
+	u8 data[8];
+	int len;
+};
+
+/**
  * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices
  * @lock:               protection lock
  * @active:		the mtd device which holds the controller currently
@@ -525,7 +516,7 @@ static inline void nand_hw_control_init(struct nand_hw_control *nfc)
  *			out-of-band data).
  * @read_page:	function to read a page according to the ECC generator
  *		requirements; returns maximum number of bitflips corrected in
- *		any single ECC step, 0 if bitflips uncorrectable, -EIO hw error
+ *		any single ECC step, -EIO hw error
  * @read_subpage:	function to read parts of the page covered by ECC;
  *			returns same as read_page()
  * @write_subpage:	function to write parts of the page covered by ECC.
@@ -721,6 +712,20 @@ nand_get_sdr_timings(const struct nand_data_interface *conf)
 }
 
 /**
+ * struct nand_manufacturer_ops - NAND Manufacturer operations
+ * @detect: detect the NAND memory organization and capabilities
+ * @init: initialize all vendor specific fields (like the ->read_retry()
+ *	  implementation) if any.
+ * @cleanup: the ->init() function may have allocated resources, ->cleanup()
+ *	     is here to let vendor specific code release those resources.
+ */
+struct nand_manufacturer_ops {
+	void (*detect)(struct nand_chip *chip);
+	int (*init)(struct nand_chip *chip);
+	void (*cleanup)(struct nand_chip *chip);
+};
+
+/**
  * struct nand_chip - NAND Private Flash Chip Data
  * @mtd:		MTD device registered to the MTD framework
  * @IO_ADDR_R:		[BOARDSPECIFIC] address to read the 8 I/O lines of the
@@ -750,6 +755,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf)
  *			setting the read-retry mode. Mostly needed for MLC NAND.
  * @ecc:		[BOARDSPECIFIC] ECC control structure
  * @buffers:		buffer structure for read/write
+ * @buf_align:		minimum buffer alignment required by a platform
  * @hwcontrol:		platform-specific hardware control structure
  * @erase:		[REPLACEABLE] erase function
  * @scan_bbt:		[REPLACEABLE] function to scan bad block table
@@ -793,6 +799,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf)
  * @pagebuf_bitflips:	[INTERN] holds the bitflip count for the page which is
  *			currently in data_buf.
  * @subpagesize:	[INTERN] holds the subpagesize
+ * @id:			[INTERN] holds NAND ID
  * @onfi_version:	[INTERN] holds the chip ONFI version (BCD encoded),
  *			non 0 if ONFI supported.
  * @jedec_version:	[INTERN] holds the chip JEDEC version (BCD encoded),
@@ -822,7 +829,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf)
  * @errstat:		[OPTIONAL] hardware specific function to perform
  *			additional error status checks (determine if errors are
  *			correctable).
- * @write_page:		[REPLACEABLE] High-level page write function
+ * @manufacturer:	[INTERN] Contains manufacturer information
  */
 
 struct nand_chip {
@@ -847,9 +854,6 @@ struct nand_chip {
 	int (*scan_bbt)(struct mtd_info *mtd);
 	int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state,
 			int status, int page);
-	int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip,
-			uint32_t offset, int data_len, const uint8_t *buf,
-			int oob_required, int page, int cached, int raw);
 	int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip,
 			int feature_addr, uint8_t *subfeature_para);
 	int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip,
@@ -881,6 +885,7 @@ struct nand_chip {
 	int badblockpos;
 	int badblockbits;
 
+	struct nand_id id;
 	int onfi_version;
 	int jedec_version;
 	union {
@@ -901,6 +906,7 @@ struct nand_chip {
 
 	struct nand_ecc_ctrl ecc;
 	struct nand_buffers *buffers;
+	unsigned long buf_align;
 	struct nand_hw_control hwcontrol;
 
 	uint8_t *bbt;
@@ -910,6 +916,11 @@ struct nand_chip {
 	struct nand_bbt_descr *badblock_pattern;
 
 	void *priv;
+
+	struct {
+		const struct nand_manufacturer *desc;
+		void *priv;
+	} manufacturer;
 };
 
 extern const struct mtd_ooblayout_ops nand_ooblayout_sp_ops;
@@ -946,6 +957,17 @@ static inline void nand_set_controller_data(struct nand_chip *chip, void *priv)
 	chip->priv = priv;
 }
 
+static inline void nand_set_manufacturer_data(struct nand_chip *chip,
+					      void *priv)
+{
+	chip->manufacturer.priv = priv;
+}
+
+static inline void *nand_get_manufacturer_data(struct nand_chip *chip)
+{
+	return chip->manufacturer.priv;
+}
+
 /*
  * NAND Flash Manufacturer ID Codes
  */
@@ -1049,17 +1071,33 @@ struct nand_flash_dev {
 };
 
 /**
- * struct nand_manufacturers - NAND Flash Manufacturer ID Structure
+ * struct nand_manufacturer - NAND Flash Manufacturer structure
  * @name:	Manufacturer name
  * @id:		manufacturer ID code of device.
+ * @ops:	manufacturer operations
 */
-struct nand_manufacturers {
+struct nand_manufacturer {
 	int id;
 	char *name;
+	const struct nand_manufacturer_ops *ops;
 };
 
+const struct nand_manufacturer *nand_get_manufacturer(u8 id);
+
+static inline const char *
+nand_manufacturer_name(const struct nand_manufacturer *manufacturer)
+{
+	return manufacturer ? manufacturer->name : "Unknown";
+}
+
 extern struct nand_flash_dev nand_flash_ids[];
-extern struct nand_manufacturers nand_manuf_ids[];
+
+extern const struct nand_manufacturer_ops toshiba_nand_manuf_ops;
+extern const struct nand_manufacturer_ops samsung_nand_manuf_ops;
+extern const struct nand_manufacturer_ops hynix_nand_manuf_ops;
+extern const struct nand_manufacturer_ops micron_nand_manuf_ops;
+extern const struct nand_manufacturer_ops amd_nand_manuf_ops;
+extern const struct nand_manufacturer_ops macronix_nand_manuf_ops;
 
 int nand_default_bbt(struct mtd_info *mtd);
 int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
@@ -1226,4 +1264,6 @@ int nand_reset(struct nand_chip *chip, int chipnr);
 /* Free resources held by the NAND device */
 void nand_cleanup(struct nand_chip *chip);
 
+/* Default extended ID decoding function */
+void nand_decode_ext_id(struct nand_chip *chip);
 #endif /* __LINUX_MTD_NAND_H */
diff --git a/include/linux/namei.h b/include/linux/namei.h
index f29abda31e6d..8b4794e83196 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -44,6 +44,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_JUMPED		0x1000
 #define LOOKUP_ROOT		0x2000
 #define LOOKUP_EMPTY		0x4000
+#define LOOKUP_DOWN		0x8000
 
 extern int path_pts(struct path *path);
 
diff --git a/include/linux/nd.h b/include/linux/nd.h
index fa66aeed441a..194b8e002ea7 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -48,7 +48,7 @@ struct nd_namespace_common {
 	struct device dev;
 	struct device *claim;
 	int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset,
-			void *buf, size_t size, int rw);
+			void *buf, size_t size, int rw, unsigned long flags);
 };
 
 static inline struct nd_namespace_common *to_ndns(struct device *dev)
@@ -134,9 +134,10 @@ static inline struct nd_namespace_blk *to_nd_namespace_blk(const struct device *
  * @buf is up-to-date upon return from this routine.
  */
 static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns,
-		resource_size_t offset, void *buf, size_t size)
+		resource_size_t offset, void *buf, size_t size,
+		unsigned long flags)
 {
-	return ndns->rw_bytes(ndns, offset, buf, size, READ);
+	return ndns->rw_bytes(ndns, offset, buf, size, READ, flags);
 }
 
 /**
@@ -152,9 +153,10 @@ static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns,
  * to media is handled internal to the @ndns driver, if at all.
  */
 static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns,
-		resource_size_t offset, void *buf, size_t size)
+		resource_size_t offset, void *buf, size_t size,
+		unsigned long flags)
 {
-	return ndns->rw_bytes(ndns, offset, buf, size, WRITE);
+	return ndns->rw_bytes(ndns, offset, buf, size, WRITE, flags);
 }
 
 #define MODULE_ALIAS_ND_DEVICE(type) \
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9c23bd2efb56..4ed952c17fc7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -914,8 +914,7 @@ struct xfrmdev_ops {
  *
  * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu);
  *	Called when a user wants to change the Maximum Transfer Unit
- *	of a device. If not defined, any request to change MTU will
- *	will return an error.
+ *	of a device.
  *
  * void (*ndo_tx_timeout)(struct net_device *dev);
  *	Callback used when the transmitter has not made any progress
@@ -1596,8 +1595,8 @@ enum netdev_priv_flags {
  *	@rtnl_link_state:	This enum represents the phases of creating
  *				a new link
  *
- *	@destructor:		Called from unregister,
- *				can be used to call free_netdev
+ *	@needs_free_netdev:	Should unregister perform free_netdev?
+ *	@priv_destructor:	Called from unregister
  *	@npinfo:		XXX: need comments on this one
  * 	@nd_net:		Network namespace this network device is inside
  *
@@ -1858,7 +1857,8 @@ struct net_device {
 		RTNL_LINK_INITIALIZING,
 	} rtnl_link_state:16;
 
-	void (*destructor)(struct net_device *dev);
+	bool needs_free_netdev;
+	void (*priv_destructor)(struct net_device *dev);
 
 #ifdef CONFIG_NETPOLL
 	struct netpoll_info __rcu	*npinfo;
@@ -3296,11 +3296,15 @@ int dev_get_phys_port_id(struct net_device *dev,
 int dev_get_phys_port_name(struct net_device *dev,
 			   char *name, size_t len);
 int dev_change_proto_down(struct net_device *dev, bool proto_down);
-int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
-		      int fd, u32 flags);
 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				    struct netdev_queue *txq, int *ret);
+
+typedef int (*xdp_op_t)(struct net_device *dev, struct netdev_xdp *xdp);
+int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
+		      int fd, u32 flags);
+bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op);
+
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 bool is_skb_forwardable(const struct net_device *dev,
@@ -4257,6 +4261,11 @@ static inline const char *netdev_name(const struct net_device *dev)
 	return dev->name;
 }
 
+static inline bool netdev_unregistering(const struct net_device *dev)
+{
+	return dev->reg_state == NETREG_UNREGISTERING;
+}
+
 static inline const char *netdev_reg_state(const struct net_device *dev)
 {
 	switch (dev->reg_state) {
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index be378cf47fcc..b3044c2c62cb 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -294,7 +294,7 @@ int xt_match_to_user(const struct xt_entry_match *m,
 int xt_target_to_user(const struct xt_entry_target *t,
 		      struct xt_entry_target __user *u);
 int xt_data_to_user(void __user *dst, const void *src,
-		    int usersize, int size);
+		    int usersize, int size, int aligned_size);
 
 void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
 				 struct xt_counters_info *info, bool compat);
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index a30efb437e6d..e0cbf17af780 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -125,4 +125,9 @@ extern unsigned int ebt_do_table(struct sk_buff *skb,
 /* True if the target is not a standard target */
 #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0)
 
+static inline bool ebt_invalid_target(int target)
+{
+	return (target < -NUM_STANDARD_TARGETS || target >= 0);
+}
+
 #endif
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 287f34161086..bb0eb2c9acca 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -76,6 +76,7 @@ struct nfs_open_context {
 #define NFS_CONTEXT_ERROR_WRITE		(0)
 #define NFS_CONTEXT_RESEND_WRITES	(1)
 #define NFS_CONTEXT_BAD			(2)
+#define NFS_CONTEXT_UNLOCK	(3)
 	int error;
 
 	struct list_head list;
@@ -499,25 +500,13 @@ extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
  */
 extern int nfs_sync_inode(struct inode *inode);
 extern int nfs_wb_all(struct inode *inode);
-extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder);
+extern int nfs_wb_page(struct inode *inode, struct page *page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 extern int  nfs_commit_inode(struct inode *, int);
-extern struct nfs_commit_data *nfs_commitdata_alloc(void);
+extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail);
 extern void nfs_commit_free(struct nfs_commit_data *data);
 
 static inline int
-nfs_wb_launder_page(struct inode *inode, struct page *page)
-{
-	return nfs_wb_single_page(inode, page, true);
-}
-
-static inline int
-nfs_wb_page(struct inode *inode, struct page *page)
-{
-	return nfs_wb_single_page(inode, page, false);
-}
-
-static inline int
 nfs_have_writebacks(struct inode *inode)
 {
 	return NFS_I(inode)->nrequests != 0;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e1502c55741e..e418a1096662 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -221,6 +221,7 @@ struct nfs_server {
 	u32			mountd_version;
 	unsigned short		mountd_port;
 	unsigned short		mountd_protocol;
+	struct rpc_wait_queue	uoc_rpcwaitq;
 };
 
 /* Server capabilities */
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 957049f72290..247cc3d3498f 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -64,7 +64,6 @@ struct nfs_pageio_ops {
 };
 
 struct nfs_rw_ops {
-	const fmode_t rw_mode;
 	struct nfs_pgio_header *(*rw_alloc_header)(void);
 	void (*rw_free_header)(struct nfs_pgio_header *);
 	int  (*rw_done)(struct rpc_task *, struct nfs_pgio_header *,
@@ -124,7 +123,8 @@ extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 			     const struct nfs_pgio_completion_ops *compl_ops,
 			     const struct nfs_rw_ops *rw_ops,
 			     size_t bsize,
-			     int how);
+			     int how,
+			     gfp_t gfp_flags);
 extern	int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
 				   struct nfs_page *);
 extern  int nfs_pageio_resend(struct nfs_pageio_descriptor *,
@@ -141,6 +141,7 @@ extern int nfs_page_group_lock(struct nfs_page *, bool);
 extern void nfs_page_group_lock_wait(struct nfs_page *);
 extern void nfs_page_group_unlock(struct nfs_page *);
 extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
+extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);
 
 /*
  * Lock the page of an asynchronous request
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 348f7c158084..b28c83475ee8 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1383,6 +1383,7 @@ struct nfs42_copy_res {
 	struct nfs42_write_res		write_res;
 	bool				consecutive;
 	bool				synchronous;
+	struct nfs_commitres		commit_res;
 };
 
 struct nfs42_seek_args {
@@ -1427,6 +1428,7 @@ struct nfs_pgio_header {
 	struct list_head	pages;
 	struct nfs_page		*req;
 	struct nfs_writeverf	verf;		/* Used for writes */
+	fmode_t			rw_mode;
 	struct pnfs_layout_segment *lseg;
 	loff_t			io_start;
 	const struct rpc_call_ops *mds_ops;
@@ -1550,6 +1552,7 @@ struct nfs_rpc_ops {
 	const struct inode_operations *dir_inode_ops;
 	const struct inode_operations *file_inode_ops;
 	const struct file_operations *file_ops;
+	const struct nlmclnt_operations *nlmclnt_ops;
 
 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fsinfo *);
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 0db37158a61d..6c8c5d8041b7 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -27,8 +27,8 @@
 
 /* FC Port role bitmask - can merge with FC Port Roles in fc transport */
 #define FC_PORT_ROLE_NVME_INITIATOR	0x10
-#define FC_PORT_ROLE_NVME_TARGET	0x11
-#define FC_PORT_ROLE_NVME_DISCOVERY	0x12
+#define FC_PORT_ROLE_NVME_TARGET	0x20
+#define FC_PORT_ROLE_NVME_DISCOVERY	0x40
 
 
 /**
@@ -642,15 +642,7 @@ enum {
 		 * sequence in one LLDD operation. Errors during Data
 		 * sequence transmit must not allow RSP sequence to be sent.
 		 */
-	NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED = (1 << 1),
-		/* Bit 1: When 0, the LLDD will deliver FCP CMD
-		 * on the CPU it should be affinitized to. Thus work will
-		 * be scheduled on the cpu received on. When 1, the LLDD
-		 * may not deliver the CMD on the CPU it should be worked
-		 * on. The transport should pick a cpu to schedule the work
-		 * on.
-		 */
-	NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 2),
+	NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 1),
 		/* Bit 2: When 0, the LLDD is calling the cmd rcv handler
 		 * in a non-isr context, allowing the transport to finish
 		 * op completion in the calling context. When 1, the LLDD
@@ -658,7 +650,7 @@ enum {
 		 * requiring the transport to transition to a workqueue
 		 * for op completion.
 		 */
-	NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 3),
+	NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 2),
 		/* Bit 3: When 0, the LLDD is calling the op done handler
 		 * in a non-isr context, allowing the transport to finish
 		 * op completion in the calling context. When 1, the LLDD
diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
index e997c4a49a88..bc711a10be05 100644
--- a/include/linux/nvme-fc.h
+++ b/include/linux/nvme-fc.h
@@ -177,7 +177,6 @@ struct fcnvme_lsdesc_rjt {
 };
 
 
-#define FCNVME_ASSOC_HOSTID_LEN		16
 #define FCNVME_ASSOC_HOSTNQN_LEN	256
 #define FCNVME_ASSOC_SUBNQN_LEN		256
 
@@ -191,7 +190,7 @@ struct fcnvme_lsdesc_cr_assoc_cmd {
 	__be16	cntlid;
 	__be16	sqsize;
 	__be32	rsvd52;
-	u8	hostid[FCNVME_ASSOC_HOSTID_LEN];
+	uuid_t	hostid;
 	u8	hostnqn[FCNVME_ASSOC_HOSTNQN_LEN];
 	u8	subnqn[FCNVME_ASSOC_SUBNQN_LEN];
 	u8	rsvd632[384];
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b625bacf37ef..6b8ee9e628e1 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -16,6 +16,7 @@
 #define _LINUX_NVME_H
 
 #include <linux/types.h>
+#include <linux/uuid.h>
 
 /* NQN names in commands fields specified one size */
 #define NVMF_NQN_FIELD_LEN	256
@@ -86,7 +87,7 @@ enum {
 	NVMF_RDMA_CMS_RDMA_CM	= 1, /* Sockets based endpoint addressing */
 };
 
-#define NVMF_AQ_DEPTH		32
+#define NVME_AQ_DEPTH		32
 
 enum {
 	NVME_REG_CAP	= 0x0000,	/* Controller Capabilities */
@@ -101,6 +102,7 @@ enum {
 	NVME_REG_ACQ	= 0x0030,	/* Admin CQ Base Address */
 	NVME_REG_CMBLOC = 0x0038,	/* Controller Memory Buffer Location */
 	NVME_REG_CMBSZ	= 0x003c,	/* Controller Memory Buffer Size */
+	NVME_REG_DBS	= 0x1000,	/* SQ 0 Tail Doorbell */
 };
 
 #define NVME_CAP_MQES(cap)	((cap) & 0xffff)
@@ -207,9 +209,15 @@ struct nvme_id_ctrl {
 	__u8			tnvmcap[16];
 	__u8			unvmcap[16];
 	__le32			rpmbs;
-	__u8			rsvd316[4];
+	__le16			edstt;
+	__u8			dsto;
+	__u8			fwug;
 	__le16			kas;
-	__u8			rsvd322[190];
+	__le16			hctma;
+	__le16			mntmt;
+	__le16			mxtmt;
+	__le32			sanicap;
+	__u8			rsvd332[180];
 	__u8			sqes;
 	__u8			cqes;
 	__le16			maxcmd;
@@ -245,6 +253,7 @@ enum {
 	NVME_CTRL_ONCS_WRITE_ZEROES		= 1 << 3,
 	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 	NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
+	NVME_CTRL_OACS_DIRECTIVES		= 1 << 5,
 	NVME_CTRL_OACS_DBBUF_SUPP		= 1 << 7,
 };
 
@@ -274,7 +283,7 @@ struct nvme_id_ns {
 	__le16			nabsn;
 	__le16			nabo;
 	__le16			nabspf;
-	__u16			rsvd46;
+	__le16			noiob;
 	__u8			nvmcap[16];
 	__u8			rsvd64[40];
 	__u8			nguid[16];
@@ -288,6 +297,7 @@ enum {
 	NVME_ID_CNS_NS			= 0x00,
 	NVME_ID_CNS_CTRL		= 0x01,
 	NVME_ID_CNS_NS_ACTIVE_LIST	= 0x02,
+	NVME_ID_CNS_NS_DESC_LIST	= 0x03,
 	NVME_ID_CNS_NS_PRESENT_LIST	= 0x10,
 	NVME_ID_CNS_NS_PRESENT		= 0x11,
 	NVME_ID_CNS_CTRL_NS_LIST	= 0x12,
@@ -295,6 +305,19 @@ enum {
 };
 
 enum {
+	NVME_DIR_IDENTIFY		= 0x00,
+	NVME_DIR_STREAMS		= 0x01,
+	NVME_DIR_SND_ID_OP_ENABLE	= 0x01,
+	NVME_DIR_SND_ST_OP_REL_ID	= 0x01,
+	NVME_DIR_SND_ST_OP_REL_RSC	= 0x02,
+	NVME_DIR_RCV_ID_OP_PARAM	= 0x01,
+	NVME_DIR_RCV_ST_OP_PARAM	= 0x01,
+	NVME_DIR_RCV_ST_OP_STATUS	= 0x02,
+	NVME_DIR_RCV_ST_OP_RESOURCE	= 0x03,
+	NVME_DIR_ENDIR			= 0x01,
+};
+
+enum {
 	NVME_NS_FEAT_THIN	= 1 << 0,
 	NVME_NS_FLBAS_LBA_MASK	= 0xf,
 	NVME_NS_FLBAS_META_EXT	= 0x10,
@@ -314,6 +337,22 @@ enum {
 	NVME_NS_DPS_PI_TYPE3	= 3,
 };
 
+struct nvme_ns_id_desc {
+	__u8 nidt;
+	__u8 nidl;
+	__le16 reserved;
+};
+
+#define NVME_NIDT_EUI64_LEN	8
+#define NVME_NIDT_NGUID_LEN	16
+#define NVME_NIDT_UUID_LEN	16
+
+enum {
+	NVME_NIDT_EUI64		= 0x01,
+	NVME_NIDT_NGUID		= 0x02,
+	NVME_NIDT_UUID		= 0x03,
+};
+
 struct nvme_smart_log {
 	__u8			critical_warning;
 	__u8			temperature[2];
@@ -535,6 +574,7 @@ enum {
 	NVME_RW_PRINFO_PRCHK_APP	= 1 << 11,
 	NVME_RW_PRINFO_PRCHK_GUARD	= 1 << 12,
 	NVME_RW_PRINFO_PRACT		= 1 << 13,
+	NVME_RW_DTYPE_STREAMS		= 1 << 4,
 };
 
 struct nvme_dsm_cmd {
@@ -586,6 +626,11 @@ struct nvme_feat_auto_pst {
 	__le64 entries[32];
 };
 
+enum {
+	NVME_HOST_MEM_ENABLE	= (1 << 0),
+	NVME_HOST_MEM_RETURN	= (1 << 1),
+};
+
 /* Admin commands */
 
 enum nvme_admin_opcode {
@@ -604,6 +649,8 @@ enum nvme_admin_opcode {
 	nvme_admin_download_fw		= 0x11,
 	nvme_admin_ns_attach		= 0x15,
 	nvme_admin_keep_alive		= 0x18,
+	nvme_admin_directive_send	= 0x19,
+	nvme_admin_directive_recv	= 0x1a,
 	nvme_admin_dbbuf		= 0x7C,
 	nvme_admin_format_nvm		= 0x80,
 	nvme_admin_security_send	= 0x81,
@@ -658,6 +705,8 @@ struct nvme_identify {
 	__u32			rsvd11[5];
 };
 
+#define NVME_IDENTIFY_DATA_SIZE 4096
+
 struct nvme_features {
 	__u8			opcode;
 	__u8			flags;
@@ -667,7 +716,16 @@ struct nvme_features {
 	union nvme_data_ptr	dptr;
 	__le32			fid;
 	__le32			dword11;
-	__u32			rsvd12[4];
+	__le32                  dword12;
+	__le32                  dword13;
+	__le32                  dword14;
+	__le32                  dword15;
+};
+
+struct nvme_host_mem_buf_desc {
+	__le64			addr;
+	__le32			size;
+	__u32			rsvd;
 };
 
 struct nvme_create_cq {
@@ -756,6 +814,24 @@ struct nvme_get_log_page_command {
 	__u32			rsvd14[2];
 };
 
+struct nvme_directive_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2[2];
+	union nvme_data_ptr	dptr;
+	__le32			numd;
+	__u8			doper;
+	__u8			dtype;
+	__le16			dspec;
+	__u8			endir;
+	__u8			tdtype;
+	__u16			rsvd15;
+
+	__u32			rsvd16[3];
+};
+
 /*
  * Fabrics subcommands.
  */
@@ -843,7 +919,7 @@ struct nvmf_connect_command {
 };
 
 struct nvmf_connect_data {
-	__u8		hostid[16];
+	uuid_t		hostid;
 	__le16		cntlid;
 	char		resv4[238];
 	char		subsysnqn[NVMF_NQN_FIELD_LEN];
@@ -886,6 +962,18 @@ struct nvme_dbbuf {
 	__u32			rsvd12[6];
 };
 
+struct streams_directive_params {
+	__u16	msl;
+	__u16	nssa;
+	__u16	nsso;
+	__u8	rsvd[10];
+	__u32	sws;
+	__u16	sgs;
+	__u16	nsa;
+	__u16	nso;
+	__u8	rsvd2[6];
+};
+
 struct nvme_command {
 	union {
 		struct nvme_common_command common;
@@ -906,6 +994,7 @@ struct nvme_command {
 		struct nvmf_property_set_command prop_set;
 		struct nvmf_property_get_command prop_get;
 		struct nvme_dbbuf dbbuf;
+		struct nvme_directive_cmd directive;
 	};
 };
 
@@ -1050,4 +1139,8 @@ struct nvme_completion {
 #define NVME_VS(major, minor, tertiary) \
 	(((major) << 16) | ((minor) << 8) | (tertiary))
 
+#define NVME_MAJOR(ver)		((ver) >> 16)
+#define NVME_MINOR(ver)		(((ver) >> 8) & 0xff)
+#define NVME_TERTIARY(ver)	((ver) & 0xff)
+
 #endif /* _LINUX_NVME_H */
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 271b3fdf0070..1dfbfd0d8040 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -54,6 +54,11 @@ extern char __dtb_end[];
 extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname,
 				     int depth, void *data),
 			   void *data);
+extern int of_scan_flat_dt_subnodes(unsigned long node,
+				    int (*it)(unsigned long node,
+					      const char *uname,
+					      void *data),
+				    void *data);
 extern int of_get_flat_dt_subnode_by_name(unsigned long node,
 					  const char *uname);
 extern const void *of_get_flat_dt_prop(unsigned long node, const char *name,
@@ -62,6 +67,7 @@ extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern int of_flat_dt_match(unsigned long node, const char *const *matches);
 extern unsigned long of_get_flat_dt_root(void);
 extern int of_get_flat_dt_size(void);
+extern uint32_t of_get_flat_dt_phandle(unsigned long node);
 
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data);
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index ec6b11deb773..1e0deb8e8494 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -8,7 +8,7 @@
 #include <linux/ioport.h>
 #include <linux/of.h>
 
-typedef int const (*of_irq_init_cb_t)(struct device_node *, struct device_node *);
+typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *);
 
 /*
  * Workarounds only applied to 32bit powermac machines
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index dc8224ae28d5..e0d1946270f3 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -64,6 +64,7 @@ extern struct platform_device *of_platform_device_create(struct device_node *np,
 						   const char *bus_id,
 						   struct device *parent);
 
+extern int of_platform_device_destroy(struct device *dev, void *data);
 extern int of_platform_bus_probe(struct device_node *root,
 				 const struct of_device_id *matches,
 				 struct device *parent);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 316a19f6b635..e7bbd9d4dc6c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -524,7 +524,7 @@ void page_endio(struct page *page, bool is_write, int err);
 /*
  * Add an arbitrary waiter to a page's wait queue
  */
-extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter);
+extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
 
 /*
  * Fault everything in given userspace address range in.
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 7a4e83a8c89c..dd86c97f2454 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -105,7 +105,7 @@ static inline void acpiphp_remove_slots(struct pci_bus *bus) { }
 static inline void acpiphp_check_host_bridge(struct acpi_device *adev) { }
 #endif
 
-extern const u8 pci_acpi_dsm_uuid[];
+extern const guid_t pci_acpi_dsm_guid;
 #define DEVICE_LABEL_DSM	0x07
 #define RESET_DELAY_DSM		0x08
 #define FUNCTION_DELAY_DSM	0x09
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b77429..8039f9f0ca05 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -183,6 +183,11 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9),
 	/* Do not use FLR even if device advertises PCI_AF_CAP */
 	PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
+	/*
+	 * Resume before calling the driver's system suspend hooks, disabling
+	 * the direct_complete optimization.
+	 */
+	PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
 };
 
 enum pci_irq_reroute_variant {
@@ -1342,9 +1347,9 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 			       unsigned int max_vecs, unsigned int flags,
 			       const struct irq_affinity *aff_desc)
 {
-	if (min_vecs > 1)
-		return -EINVAL;
-	return 1;
+	if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq)
+		return 1;
+	return -ENOSPC;
 }
 
 static inline void pci_free_irq_vectors(struct pci_dev *dev)
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 279e3c5326e3..7620eb127cff 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -42,8 +42,6 @@
  * @PIN_CONFIG_BIAS_PULL_UP: the pin will be pulled up (usually with high
  *	impedance to VDD). If the argument is != 0 pull-up is enabled,
  *	if it is 0, pull-up is total, i.e. the pin is connected to VDD.
- * @PIN_CONFIG_BIDIRECTIONAL: the pin will be configured to allow simultaneous
- *	input and output operations.
  * @PIN_CONFIG_DRIVE_OPEN_DRAIN: the pin will be driven with open drain (open
  *	collector) which means it is usually wired with other output ports
  *	which are then pulled up with an external resistor. Setting this
@@ -98,7 +96,6 @@ enum pin_config_param {
 	PIN_CONFIG_BIAS_PULL_DOWN,
 	PIN_CONFIG_BIAS_PULL_PIN_DEFAULT,
 	PIN_CONFIG_BIAS_PULL_UP,
-	PIN_CONFIG_BIDIRECTIONAL,
 	PIN_CONFIG_DRIVE_OPEN_DRAIN,
 	PIN_CONFIG_DRIVE_OPEN_SOURCE,
 	PIN_CONFIG_DRIVE_PUSH_PULL,
diff --git a/include/linux/platform_data/video-imxfb.h b/include/linux/platform_data/video-imxfb.h
index a5c0a71ec914..cf9348b376ac 100644
--- a/include/linux/platform_data/video-imxfb.h
+++ b/include/linux/platform_data/video-imxfb.h
@@ -50,6 +50,7 @@
 struct imx_fb_videomode {
 	struct fb_videomode mode;
 	u32 pcr;
+	bool aus_mode;
 	unsigned char	bpp;
 };
 
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index a3447932df1f..4c2cba7ec1d4 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -106,8 +106,8 @@ extern void __pm_stay_awake(struct wakeup_source *ws);
 extern void pm_stay_awake(struct device *dev);
 extern void __pm_relax(struct wakeup_source *ws);
 extern void pm_relax(struct device *dev);
-extern void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec);
-extern void pm_wakeup_event(struct device *dev, unsigned int msec);
+extern void pm_wakeup_ws_event(struct wakeup_source *ws, unsigned int msec, bool hard);
+extern void pm_wakeup_dev_event(struct device *dev, unsigned int msec, bool hard);
 
 #else /* !CONFIG_PM_SLEEP */
 
@@ -182,9 +182,11 @@ static inline void __pm_relax(struct wakeup_source *ws) {}
 
 static inline void pm_relax(struct device *dev) {}
 
-static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) {}
+static inline void pm_wakeup_ws_event(struct wakeup_source *ws,
+				      unsigned int msec, bool hard) {}
 
-static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {}
+static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec,
+				       bool hard) {}
 
 #endif /* !CONFIG_PM_SLEEP */
 
@@ -201,4 +203,19 @@ static inline void wakeup_source_trash(struct wakeup_source *ws)
 	wakeup_source_drop(ws);
 }
 
+static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
+{
+	return pm_wakeup_ws_event(ws, msec, false);
+}
+
+static inline void pm_wakeup_event(struct device *dev, unsigned int msec)
+{
+	return pm_wakeup_dev_event(dev, msec, false);
+}
+
+static inline void pm_wakeup_hard_event(struct device *dev)
+{
+	return pm_wakeup_dev_event(dev, 0, true);
+}
+
 #endif /* _LINUX_PM_WAKEUP_H */
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 75ffc5729e4c..2889f09a1c60 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -75,7 +75,7 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
 struct poll_table_entry {
 	struct file *filp;
 	unsigned long key;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	wait_queue_head_t *wait_address;
 };
 
diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h
index 522757ac9cd4..a7ed29baf44a 100644
--- a/include/linux/power/max17042_battery.h
+++ b/include/linux/power/max17042_battery.h
@@ -24,8 +24,15 @@
 #define __MAX17042_BATTERY_H_
 
 #define MAX17042_STATUS_BattAbsent	(1 << 3)
-#define MAX17042_BATTERY_FULL	(100)
+#define MAX17042_BATTERY_FULL		(95)   /* Recommend. FullSOCThr value */
 #define MAX17042_DEFAULT_SNS_RESISTOR	(10000)
+#define MAX17042_DEFAULT_VMIN		(3000)
+#define MAX17042_DEFAULT_VMAX		(4500) /* LiHV cell max */
+#define MAX17042_DEFAULT_TEMP_MIN	(0)    /* For sys without temp sensor */
+#define MAX17042_DEFAULT_TEMP_MAX	(700)  /* 70 degrees Celcius */
+
+/* Consider RepCap which is less then 10 units below FullCAP full */
+#define MAX17042_FULL_THRESHOLD		10
 
 #define MAX17042_CHARACTERIZATION_DATA_SIZE 48
 
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 6c70444da3b9..6b2e0dd88569 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -34,11 +34,13 @@
 struct ptr_ring {
 	int producer ____cacheline_aligned_in_smp;
 	spinlock_t producer_lock;
-	int consumer ____cacheline_aligned_in_smp;
+	int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
+	int consumer_tail; /* next entry to invalidate */
 	spinlock_t consumer_lock;
 	/* Shared consumer/producer data */
 	/* Read-only by both the producer and the consumer */
 	int size ____cacheline_aligned_in_smp; /* max entries in queue */
+	int batch; /* number of entries to consume in a batch */
 	void **queue;
 };
 
@@ -170,7 +172,7 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
 static inline void *__ptr_ring_peek(struct ptr_ring *r)
 {
 	if (likely(r->size))
-		return r->queue[r->consumer];
+		return r->queue[r->consumer_head];
 	return NULL;
 }
 
@@ -231,9 +233,38 @@ static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
 /* Must only be called after __ptr_ring_peek returned !NULL */
 static inline void __ptr_ring_discard_one(struct ptr_ring *r)
 {
-	r->queue[r->consumer++] = NULL;
-	if (unlikely(r->consumer >= r->size))
-		r->consumer = 0;
+	/* Fundamentally, what we want to do is update consumer
+	 * index and zero out the entry so producer can reuse it.
+	 * Doing it naively at each consume would be as simple as:
+	 *       r->queue[r->consumer++] = NULL;
+	 *       if (unlikely(r->consumer >= r->size))
+	 *               r->consumer = 0;
+	 * but that is suboptimal when the ring is full as producer is writing
+	 * out new entries in the same cache line.  Defer these updates until a
+	 * batch of entries has been consumed.
+	 */
+	int head = r->consumer_head++;
+
+	/* Once we have processed enough entries invalidate them in
+	 * the ring all at once so producer can reuse their space in the ring.
+	 * We also do this when we reach end of the ring - not mandatory
+	 * but helps keep the implementation simple.
+	 */
+	if (unlikely(r->consumer_head - r->consumer_tail >= r->batch ||
+		     r->consumer_head >= r->size)) {
+		/* Zero out entries in the reverse order: this way we touch the
+		 * cache line that producer might currently be reading the last;
+		 * producer won't make progress and touch other cache lines
+		 * besides the first one until we write out all entries.
+		 */
+		while (likely(head >= r->consumer_tail))
+			r->queue[head--] = NULL;
+		r->consumer_tail = r->consumer_head;
+	}
+	if (unlikely(r->consumer_head >= r->size)) {
+		r->consumer_head = 0;
+		r->consumer_tail = 0;
+	}
 }
 
 static inline void *__ptr_ring_consume(struct ptr_ring *r)
@@ -345,14 +376,27 @@ static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp)
 	return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp);
 }
 
+static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
+{
+	r->size = size;
+	r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
+	/* We need to set batch at least to 1 to make logic
+	 * in __ptr_ring_discard_one work correctly.
+	 * Batching too much (because ring is small) would cause a lot of
+	 * burstiness. Needs tuning, for now disable batching.
+	 */
+	if (r->batch > r->size / 2 || !r->batch)
+		r->batch = 1;
+}
+
 static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
 {
 	r->queue = __ptr_ring_init_queue_alloc(size, gfp);
 	if (!r->queue)
 		return -ENOMEM;
 
-	r->size = size;
-	r->producer = r->consumer = 0;
+	__ptr_ring_set_size(r, size);
+	r->producer = r->consumer_head = r->consumer_tail = 0;
 	spin_lock_init(&r->producer_lock);
 	spin_lock_init(&r->consumer_lock);
 
@@ -373,9 +417,10 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
 		else if (destroy)
 			destroy(ptr);
 
-	r->size = size;
+	__ptr_ring_set_size(r, size);
 	r->producer = producer;
-	r->consumer = 0;
+	r->consumer_head = 0;
+	r->consumer_tail = 0;
 	old = r->queue;
 	r->queue = queue;
 
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 422bc2e4cb6a..0e5fcc11b1b8 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -54,7 +54,8 @@ extern int ptrace_request(struct task_struct *child, long request,
 			  unsigned long addr, unsigned long data);
 extern void ptrace_notify(int exit_code);
 extern void __ptrace_link(struct task_struct *child,
-			  struct task_struct *new_parent);
+			  struct task_struct *new_parent,
+			  const struct cred *ptracer_cred);
 extern void __ptrace_unlink(struct task_struct *child);
 extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
 #define PTRACE_MODE_READ	0x01
@@ -206,7 +207,7 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 
 	if (unlikely(ptrace) && current->ptrace) {
 		child->ptrace = current->ptrace;
-		__ptrace_link(child, current->parent);
+		__ptrace_link(child, current->parent, current->ptracer_cred);
 
 		if (child->ptrace & PT_SEIZED)
 			task_set_jobctl_pending(child, JOBCTL_TRAP_STOP);
@@ -215,6 +216,8 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 
 		set_tsk_thread_flag(child, TIF_SIGPENDING);
 	}
+	else
+		child->ptracer_cred = NULL;
 }
 
 /**
@@ -388,10 +391,6 @@ static inline void user_single_step_siginfo(struct task_struct *tsk,
 #define current_pt_regs() task_pt_regs(current)
 #endif
 
-#ifndef ptrace_signal_deliver
-#define ptrace_signal_deliver() ((void)0)
-#endif
-
 /*
  * unlike current_pt_regs(), this one is equal to task_pt_regs(current)
  * on *all* architectures; the only reason to have a per-arch definition
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 9c6f768b7d32..dda22f45fc1b 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -44,6 +44,7 @@ void inode_sub_rsv_space(struct inode *inode, qsize_t number);
 void inode_reclaim_rsv_space(struct inode *inode, qsize_t number);
 
 int dquot_initialize(struct inode *inode);
+bool dquot_initialize_needed(struct inode *inode);
 void dquot_drop(struct inode *inode);
 struct dquot *dqget(struct super_block *sb, struct kqid qid);
 static inline struct dquot *dqgrab(struct dquot *dquot)
@@ -207,6 +208,11 @@ static inline int dquot_initialize(struct inode *inode)
 	return 0;
 }
 
+static inline bool dquot_initialize_needed(struct inode *inode)
+{
+	return false;
+}
+
 static inline void dquot_drop(struct inode *inode)
 {
 }
diff --git a/include/linux/rcu_node_tree.h b/include/linux/rcu_node_tree.h
new file mode 100644
index 000000000000..426cee67f0e2
--- /dev/null
+++ b/include/linux/rcu_node_tree.h
@@ -0,0 +1,103 @@
+/*
+ * RCU node combining tree definitions.  These are used to compute
+ * global attributes while avoiding common-case global contention.  A key
+ * property that these computations rely on is a tournament-style approach
+ * where only one of the tasks contending a lower level in the tree need
+ * advance to the next higher level.  If properly configured, this allows
+ * unlimited scalability while maintaining a constant level of contention
+ * on the root node.
+ *
+ * This seemingly RCU-private file must be available to SRCU users
+ * because the size of the TREE SRCU srcu_struct structure depends
+ * on these definitions.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright IBM Corporation, 2017
+ *
+ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#ifndef __LINUX_RCU_NODE_TREE_H
+#define __LINUX_RCU_NODE_TREE_H
+
+/*
+ * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
+ * CONFIG_RCU_FANOUT_LEAF.
+ * In theory, it should be possible to add more levels straightforwardly.
+ * In practice, this did work well going from three levels to four.
+ * Of course, your mileage may vary.
+ */
+
+#ifdef CONFIG_RCU_FANOUT
+#define RCU_FANOUT CONFIG_RCU_FANOUT
+#else /* #ifdef CONFIG_RCU_FANOUT */
+# ifdef CONFIG_64BIT
+# define RCU_FANOUT 64
+# else
+# define RCU_FANOUT 32
+# endif
+#endif /* #else #ifdef CONFIG_RCU_FANOUT */
+
+#ifdef CONFIG_RCU_FANOUT_LEAF
+#define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
+#else /* #ifdef CONFIG_RCU_FANOUT_LEAF */
+#define RCU_FANOUT_LEAF 16
+#endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */
+
+#define RCU_FANOUT_1	      (RCU_FANOUT_LEAF)
+#define RCU_FANOUT_2	      (RCU_FANOUT_1 * RCU_FANOUT)
+#define RCU_FANOUT_3	      (RCU_FANOUT_2 * RCU_FANOUT)
+#define RCU_FANOUT_4	      (RCU_FANOUT_3 * RCU_FANOUT)
+
+#if NR_CPUS <= RCU_FANOUT_1
+#  define RCU_NUM_LVLS	      1
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_NODES	      NUM_RCU_LVL_0
+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0 }
+#  define RCU_NODE_NAME_INIT  { "rcu_node_0" }
+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0" }
+#elif NR_CPUS <= RCU_FANOUT_2
+#  define RCU_NUM_LVLS	      2
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1)
+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
+#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1" }
+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1" }
+#elif NR_CPUS <= RCU_FANOUT_3
+#  define RCU_NUM_LVLS	      3
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2)
+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
+#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
+#elif NR_CPUS <= RCU_FANOUT_4
+#  define RCU_NUM_LVLS	      4
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
+#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+#  define NUM_RCU_LVL_3	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
+#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
+#else
+# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
+#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
+
+#endif /* __LINUX_RCU_NODE_TREE_H */
diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
new file mode 100644
index 000000000000..c3ad00e63556
--- /dev/null
+++ b/include/linux/rcu_segcblist.h
@@ -0,0 +1,94 @@
+/*
+ * RCU segmented callback lists
+ *
+ * This seemingly RCU-private file must be available to SRCU users
+ * because the size of the TREE SRCU srcu_struct structure depends
+ * on these definitions.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright IBM Corporation, 2017
+ *
+ * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#ifndef __INCLUDE_LINUX_RCU_SEGCBLIST_H
+#define __INCLUDE_LINUX_RCU_SEGCBLIST_H
+
+/* Simple unsegmented callback lists. */
+struct rcu_cblist {
+	struct rcu_head *head;
+	struct rcu_head **tail;
+	long len;
+	long len_lazy;
+};
+
+#define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head }
+
+/* Complicated segmented callback lists.  ;-) */
+
+/*
+ * Index values for segments in rcu_segcblist structure.
+ *
+ * The segments are as follows:
+ *
+ * [head, *tails[RCU_DONE_TAIL]):
+ *	Callbacks whose grace period has elapsed, and thus can be invoked.
+ * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]):
+ *	Callbacks waiting for the current GP from the current CPU's viewpoint.
+ * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]):
+ *	Callbacks that arrived before the next GP started, again from
+ *	the current CPU's viewpoint.  These can be handled by the next GP.
+ * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]):
+ *	Callbacks that might have arrived after the next GP started.
+ *	There is some uncertainty as to when a given GP starts and
+ *	ends, but a CPU knows the exact times if it is the one starting
+ *	or ending the GP.  Other CPUs know that the previous GP ends
+ *	before the next one starts.
+ *
+ * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also
+ * empty.
+ *
+ * The ->gp_seq[] array contains the grace-period number at which the
+ * corresponding segment of callbacks will be ready to invoke.  A given
+ * element of this array is meaningful only when the corresponding segment
+ * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks
+ * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have
+ * not yet been assigned a grace-period number).
+ */
+#define RCU_DONE_TAIL		0	/* Also RCU_WAIT head. */
+#define RCU_WAIT_TAIL		1	/* Also RCU_NEXT_READY head. */
+#define RCU_NEXT_READY_TAIL	2	/* Also RCU_NEXT head. */
+#define RCU_NEXT_TAIL		3
+#define RCU_CBLIST_NSEGS	4
+
+struct rcu_segcblist {
+	struct rcu_head *head;
+	struct rcu_head **tails[RCU_CBLIST_NSEGS];
+	unsigned long gp_seq[RCU_CBLIST_NSEGS];
+	long len;
+	long len_lazy;
+};
+
+#define RCU_SEGCBLIST_INITIALIZER(n) \
+{ \
+	.head = NULL, \
+	.tails[RCU_DONE_TAIL] = &n.head, \
+	.tails[RCU_WAIT_TAIL] = &n.head, \
+	.tails[RCU_NEXT_READY_TAIL] = &n.head, \
+	.tails[RCU_NEXT_TAIL] = &n.head, \
+}
+
+#endif /* __INCLUDE_LINUX_RCU_SEGCBLIST_H */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 4f7a9561b8c4..b1fd8bf85fdc 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -509,7 +509,8 @@ static inline void hlist_add_tail_rcu(struct hlist_node *n,
 {
 	struct hlist_node *i, *last = NULL;
 
-	for (i = hlist_first_rcu(h); i; i = hlist_next_rcu(i))
+	/* Note: write side code, so rcu accessors are not needed. */
+	for (i = h->first; i; i = i->next)
 		last = i;
 
 	if (last) {
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index dea8f17b2fe3..f816fc72b51e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -34,104 +34,15 @@
 #define __LINUX_RCUPDATE_H
 
 #include <linux/types.h>
-#include <linux/cache.h>
-#include <linux/spinlock.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/seqlock.h>
-#include <linux/lockdep.h>
-#include <linux/debugobjects.h>
-#include <linux/bug.h>
 #include <linux/compiler.h>
-#include <linux/ktime.h>
+#include <linux/atomic.h>
 #include <linux/irqflags.h>
+#include <linux/preempt.h>
+#include <linux/bottom_half.h>
+#include <linux/lockdep.h>
+#include <asm/processor.h>
+#include <linux/cpumask.h>
 
-#include <asm/barrier.h>
-
-#ifndef CONFIG_TINY_RCU
-extern int rcu_expedited; /* for sysctl */
-extern int rcu_normal;    /* also for sysctl */
-#endif /* #ifndef CONFIG_TINY_RCU */
-
-#ifdef CONFIG_TINY_RCU
-/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
-static inline bool rcu_gp_is_normal(void)  /* Internal RCU use. */
-{
-	return true;
-}
-static inline bool rcu_gp_is_expedited(void)  /* Internal RCU use. */
-{
-	return false;
-}
-
-static inline void rcu_expedite_gp(void)
-{
-}
-
-static inline void rcu_unexpedite_gp(void)
-{
-}
-#else /* #ifdef CONFIG_TINY_RCU */
-bool rcu_gp_is_normal(void);     /* Internal RCU use. */
-bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
-void rcu_expedite_gp(void);
-void rcu_unexpedite_gp(void);
-#endif /* #else #ifdef CONFIG_TINY_RCU */
-
-enum rcutorture_type {
-	RCU_FLAVOR,
-	RCU_BH_FLAVOR,
-	RCU_SCHED_FLAVOR,
-	RCU_TASKS_FLAVOR,
-	SRCU_FLAVOR,
-	INVALID_RCU_FLAVOR
-};
-
-#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
-void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
-			    unsigned long *gpnum, unsigned long *completed);
-void rcutorture_record_test_transition(void);
-void rcutorture_record_progress(unsigned long vernum);
-void do_trace_rcu_torture_read(const char *rcutorturename,
-			       struct rcu_head *rhp,
-			       unsigned long secs,
-			       unsigned long c_old,
-			       unsigned long c);
-bool rcu_irq_enter_disabled(void);
-#else
-static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
-					  int *flags,
-					  unsigned long *gpnum,
-					  unsigned long *completed)
-{
-	*flags = 0;
-	*gpnum = 0;
-	*completed = 0;
-}
-static inline void rcutorture_record_test_transition(void)
-{
-}
-static inline void rcutorture_record_progress(unsigned long vernum)
-{
-}
-static inline bool rcu_irq_enter_disabled(void)
-{
-	return false;
-}
-#ifdef CONFIG_RCU_TRACE
-void do_trace_rcu_torture_read(const char *rcutorturename,
-			       struct rcu_head *rhp,
-			       unsigned long secs,
-			       unsigned long c_old,
-			       unsigned long c);
-#else
-#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
-	do { } while (0)
-#endif
-#endif
-
-#define UINT_CMP_GE(a, b)	(UINT_MAX / 2 >= (a) - (b))
-#define UINT_CMP_LT(a, b)	(UINT_MAX / 2 < (a) - (b))
 #define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
 #define ULONG_CMP_LT(a, b)	(ULONG_MAX / 2 < (a) - (b))
 #define ulong2long(a)		(*(long *)(&(a)))
@@ -139,115 +50,14 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
 /* Exported common interfaces */
 
 #ifdef CONFIG_PREEMPT_RCU
-
-/**
- * call_rcu() - Queue an RCU callback for invocation after a grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all pre-existing RCU read-side
- * critical sections have completed.  However, the callback function
- * might well execute concurrently with RCU read-side critical sections
- * that started after call_rcu() was invoked.  RCU read-side critical
- * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
- * and may be nested.
- *
- * Note that all CPUs must agree that the grace period extended beyond
- * all pre-existing RCU read-side critical section.  On systems with more
- * than one CPU, this means that when "func()" is invoked, each CPU is
- * guaranteed to have executed a full memory barrier since the end of its
- * last RCU read-side critical section whose beginning preceded the call
- * to call_rcu().  It also means that each CPU executing an RCU read-side
- * critical section that continues beyond the start of "func()" must have
- * executed a memory barrier after the call_rcu() but before the beginning
- * of that RCU read-side critical section.  Note that these guarantees
- * include CPUs that are offline, idle, or executing in user mode, as
- * well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- * resulting RCU callback function "func()", then both CPU A and CPU B are
- * guaranteed to execute a full memory barrier during the time interval
- * between the call to call_rcu() and the invocation of "func()" -- even
- * if CPU A and CPU B are the same CPU (but again only if the system has
- * more than one CPU).
- */
-void call_rcu(struct rcu_head *head,
-	      rcu_callback_t func);
-
+void call_rcu(struct rcu_head *head, rcu_callback_t func);
 #else /* #ifdef CONFIG_PREEMPT_RCU */
-
-/* In classic RCU, call_rcu() is just call_rcu_sched(). */
 #define	call_rcu	call_rcu_sched
-
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
-/**
- * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_bh() assumes
- * that the read-side critical sections end on completion of a softirq
- * handler. This means that read-side critical sections in process
- * context must not be interrupted by softirqs. This interface is to be
- * used when most of the read-side critical sections are in softirq context.
- * RCU read-side critical sections are delimited by :
- *  - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context.
- *  OR
- *  - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
- *  These may be nested.
- *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
- */
-void call_rcu_bh(struct rcu_head *head,
-		 rcu_callback_t func);
-
-/**
- * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_sched() assumes
- * that the read-side critical sections end on enabling of preemption
- * or on voluntary preemption.
- * RCU read-side critical sections are delimited by :
- *  - rcu_read_lock_sched() and  rcu_read_unlock_sched(),
- *  OR
- *  anything that disables preemption.
- *  These may be nested.
- *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
- */
-void call_rcu_sched(struct rcu_head *head,
-		    rcu_callback_t func);
-
+void call_rcu_bh(struct rcu_head *head, rcu_callback_t func);
+void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
 void synchronize_sched(void);
-
-/**
- * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_tasks() assumes
- * that the read-side critical sections end at a voluntary context
- * switch (not a preemption!), entry into idle, or transition to usermode
- * execution.  As such, there are no read-side primitives analogous to
- * rcu_read_lock() and rcu_read_unlock() because this primitive is intended
- * to determine that all tasks have passed through a safe state, not so
- * much for data-strcuture synchronization.
- *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
- */
 void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
 void synchronize_rcu_tasks(void);
 void rcu_barrier_tasks(void);
@@ -301,22 +111,12 @@ void rcu_check_callbacks(int user);
 void rcu_report_dead(unsigned int cpu);
 void rcu_cpu_starting(unsigned int cpu);
 
-#ifndef CONFIG_TINY_RCU
-void rcu_end_inkernel_boot(void);
-#else /* #ifndef CONFIG_TINY_RCU */
-static inline void rcu_end_inkernel_boot(void) { }
-#endif /* #ifndef CONFIG_TINY_RCU */
-
 #ifdef CONFIG_RCU_STALL_COMMON
 void rcu_sysrq_start(void);
 void rcu_sysrq_end(void);
 #else /* #ifdef CONFIG_RCU_STALL_COMMON */
-static inline void rcu_sysrq_start(void)
-{
-}
-static inline void rcu_sysrq_end(void)
-{
-}
+static inline void rcu_sysrq_start(void) { }
+static inline void rcu_sysrq_end(void) { }
 #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
@@ -330,9 +130,7 @@ static inline void rcu_user_exit(void) { }
 #ifdef CONFIG_RCU_NOCB_CPU
 void rcu_init_nohz(void);
 #else /* #ifdef CONFIG_RCU_NOCB_CPU */
-static inline void rcu_init_nohz(void)
-{
-}
+static inline void rcu_init_nohz(void) { }
 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
 
 /**
@@ -368,15 +166,20 @@ static inline void rcu_init_nohz(void)
 #ifdef CONFIG_TASKS_RCU
 #define TASKS_RCU(x) x
 extern struct srcu_struct tasks_rcu_exit_srcu;
-#define rcu_note_voluntary_context_switch(t) \
+#define rcu_note_voluntary_context_switch_lite(t) \
 	do { \
-		rcu_all_qs(); \
 		if (READ_ONCE((t)->rcu_tasks_holdout)) \
 			WRITE_ONCE((t)->rcu_tasks_holdout, false); \
 	} while (0)
+#define rcu_note_voluntary_context_switch(t) \
+	do { \
+		rcu_all_qs(); \
+		rcu_note_voluntary_context_switch_lite(t); \
+	} while (0)
 #else /* #ifdef CONFIG_TASKS_RCU */
 #define TASKS_RCU(x) do { } while (0)
-#define rcu_note_voluntary_context_switch(t)	rcu_all_qs()
+#define rcu_note_voluntary_context_switch_lite(t)	do { } while (0)
+#define rcu_note_voluntary_context_switch(t)		rcu_all_qs()
 #endif /* #else #ifdef CONFIG_TASKS_RCU */
 
 /**
@@ -392,10 +195,6 @@ do { \
 		rcu_note_voluntary_context_switch(current); \
 } while (0)
 
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
-bool __rcu_is_watching(void);
-#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
-
 /*
  * Infrastructure to implement the synchronize_() primitives in
  * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
@@ -409,10 +208,6 @@ bool __rcu_is_watching(void);
 #error "Unknown RCU implementation specified to kernel configuration"
 #endif
 
-#define RCU_SCHEDULER_INACTIVE	0
-#define RCU_SCHEDULER_INIT	1
-#define RCU_SCHEDULER_RUNNING	2
-
 /*
  * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
  * initialization and destruction of rcu_head on the stack. rcu_head structures
@@ -425,30 +220,16 @@ void destroy_rcu_head(struct rcu_head *head);
 void init_rcu_head_on_stack(struct rcu_head *head);
 void destroy_rcu_head_on_stack(struct rcu_head *head);
 #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-static inline void init_rcu_head(struct rcu_head *head)
-{
-}
-
-static inline void destroy_rcu_head(struct rcu_head *head)
-{
-}
-
-static inline void init_rcu_head_on_stack(struct rcu_head *head)
-{
-}
-
-static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
-{
-}
+static inline void init_rcu_head(struct rcu_head *head) { }
+static inline void destroy_rcu_head(struct rcu_head *head) { }
+static inline void init_rcu_head_on_stack(struct rcu_head *head) { }
+static inline void destroy_rcu_head_on_stack(struct rcu_head *head) { }
 #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
 #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
 bool rcu_lockdep_current_cpu_online(void);
 #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
-static inline bool rcu_lockdep_current_cpu_online(void)
-{
-	return true;
-}
+static inline bool rcu_lockdep_current_cpu_online(void) { return true; }
 #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -468,18 +249,8 @@ extern struct lockdep_map rcu_bh_lock_map;
 extern struct lockdep_map rcu_sched_lock_map;
 extern struct lockdep_map rcu_callback_map;
 int debug_lockdep_rcu_enabled(void);
-
 int rcu_read_lock_held(void);
 int rcu_read_lock_bh_held(void);
-
-/**
- * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
- *
- * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
- * RCU-sched read-side critical section.  In absence of
- * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
- * critical section unless it can prove otherwise.
- */
 int rcu_read_lock_sched_held(void);
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -526,9 +297,7 @@ static inline void rcu_preempt_sleep_check(void)
 			 "Illegal context switch in RCU read-side critical section");
 }
 #else /* #ifdef CONFIG_PROVE_RCU */
-static inline void rcu_preempt_sleep_check(void)
-{
-}
+static inline void rcu_preempt_sleep_check(void) { }
 #endif /* #else #ifdef CONFIG_PROVE_RCU */
 
 #define rcu_sleep_check()						\
@@ -1079,52 +848,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #define kfree_rcu(ptr, rcu_head)					\
 	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
 
-#ifdef CONFIG_TINY_RCU
-static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
-{
-	*nextevt = KTIME_MAX;
-	return 0;
-}
-#endif /* #ifdef CONFIG_TINY_RCU */
-
-#if defined(CONFIG_RCU_NOCB_CPU_ALL)
-static inline bool rcu_is_nocb_cpu(int cpu) { return true; }
-#elif defined(CONFIG_RCU_NOCB_CPU)
-bool rcu_is_nocb_cpu(int cpu);
-#else
-static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
-#endif
-
-
-/* Only for use by adaptive-ticks code. */
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-bool rcu_sys_is_idle(void);
-void rcu_sysidle_force_exit(void);
-#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-
-static inline bool rcu_sys_is_idle(void)
-{
-	return false;
-}
-
-static inline void rcu_sysidle_force_exit(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-
-
-/*
- * Dump the ftrace buffer, but only one time per callsite per boot.
- */
-#define rcu_ftrace_dump(oops_dump_mode) \
-do { \
-	static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
-	\
-	if (!atomic_read(&___rfd_beenhere) && \
-	    !atomic_xchg(&___rfd_beenhere, 1)) \
-		ftrace_dump(oops_dump_mode); \
-} while (0)
 
 /*
  * Place this after a lock-acquisition primitive to guarantee that
@@ -1132,11 +855,11 @@ do { \
  * if the UNLOCK and LOCK are executed by the same CPU or if the
  * UNLOCK and LOCK operate on the same lock variable.
  */
-#ifdef CONFIG_PPC
+#ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE
 #define smp_mb__after_unlock_lock()	smp_mb()  /* Full ordering for lock. */
-#else /* #ifdef CONFIG_PPC */
+#else /* #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
 #define smp_mb__after_unlock_lock()	do { } while (0)
-#endif /* #else #ifdef CONFIG_PPC */
+#endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
 
 
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index b452953e21c8..5becbbccb998 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -25,7 +25,7 @@
 #ifndef __LINUX_TINY_H
 #define __LINUX_TINY_H
 
-#include <linux/cache.h>
+#include <linux/ktime.h>
 
 struct rcu_dynticks;
 static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
@@ -33,6 +33,9 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
 	return 0;
 }
 
+/* Never flag non-existent other CPUs! */
+static inline bool rcu_eqs_special_set(int cpu) { return false; }
+
 static inline unsigned long get_state_synchronize_rcu(void)
 {
 	return 0;
@@ -87,160 +90,44 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 	call_rcu(head, func);
 }
 
-static inline void rcu_note_context_switch(void)
-{
-	rcu_sched_qs();
-}
-
-/*
- * Take advantage of the fact that there is only one CPU, which
- * allows us to ignore virtualization-based context switches.
- */
-static inline void rcu_virt_note_context_switch(int cpu)
-{
-}
-
-/*
- * Return the number of grace periods started.
- */
-static inline unsigned long rcu_batches_started(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of bottom-half grace periods started.
- */
-static inline unsigned long rcu_batches_started_bh(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of sched grace periods started.
- */
-static inline unsigned long rcu_batches_started_sched(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of grace periods completed.
- */
-static inline unsigned long rcu_batches_completed(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of bottom-half grace periods completed.
- */
-static inline unsigned long rcu_batches_completed_bh(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of sched grace periods completed.
- */
-static inline unsigned long rcu_batches_completed_sched(void)
-{
-	return 0;
-}
+#define rcu_note_context_switch(preempt) \
+	do { \
+		rcu_sched_qs(); \
+		rcu_note_voluntary_context_switch_lite(current); \
+	} while (0)
 
-/*
- * Return the number of expedited grace periods completed.
- */
-static inline unsigned long rcu_exp_batches_completed(void)
+static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
 {
+	*nextevt = KTIME_MAX;
 	return 0;
 }
 
 /*
- * Return the number of expedited sched grace periods completed.
+ * Take advantage of the fact that there is only one CPU, which
+ * allows us to ignore virtualization-based context switches.
  */
-static inline unsigned long rcu_exp_batches_completed_sched(void)
-{
-	return 0;
-}
-
-static inline void rcu_force_quiescent_state(void)
-{
-}
-
-static inline void rcu_bh_force_quiescent_state(void)
-{
-}
-
-static inline void rcu_sched_force_quiescent_state(void)
-{
-}
-
-static inline void show_rcu_gp_kthreads(void)
-{
-}
-
-static inline void rcu_cpu_stall_reset(void)
-{
-}
-
-static inline void rcu_idle_enter(void)
-{
-}
-
-static inline void rcu_idle_exit(void)
-{
-}
-
-static inline void rcu_irq_enter(void)
-{
-}
-
-static inline void rcu_irq_exit_irqson(void)
-{
-}
-
-static inline void rcu_irq_enter_irqson(void)
-{
-}
-
-static inline void rcu_irq_exit(void)
-{
-}
-
-static inline void exit_rcu(void)
-{
-}
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static inline void rcu_virt_note_context_switch(int cpu) { }
+static inline void rcu_cpu_stall_reset(void) { }
+static inline void rcu_idle_enter(void) { }
+static inline void rcu_idle_exit(void) { }
+static inline void rcu_irq_enter(void) { }
+static inline bool rcu_irq_enter_disabled(void) { return false; }
+static inline void rcu_irq_exit_irqson(void) { }
+static inline void rcu_irq_enter_irqson(void) { }
+static inline void rcu_irq_exit(void) { }
+static inline void exit_rcu(void) { }
+
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
 extern int rcu_scheduler_active __read_mostly;
 void rcu_scheduler_starting(void);
-#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-static inline void rcu_scheduler_starting(void)
-{
-}
-#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
-
-static inline bool rcu_is_watching(void)
-{
-	return __rcu_is_watching();
-}
-
-#else /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
-
-static inline bool rcu_is_watching(void)
-{
-	return true;
-}
-
-#endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
-
-static inline void rcu_all_qs(void)
-{
-	barrier(); /* Avoid RCU read-side critical sections leaking across. */
-}
+#else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
+static inline void rcu_scheduler_starting(void) { }
+#endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
+static inline void rcu_end_inkernel_boot(void) { }
+static inline bool rcu_is_watching(void) { return true; }
+
+/* Avoid RCU read-side critical sections leaking across. */
+static inline void rcu_all_qs(void) { barrier(); }
 
 /* RCUtree hotplug events */
 #define rcutree_prepare_cpu      NULL
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 63a4e4cf40a5..37d6fd3b7ff8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,7 +30,7 @@
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H
 
-void rcu_note_context_switch(void);
+void rcu_note_context_switch(bool preempt);
 int rcu_needs_cpu(u64 basem, u64 *nextevt);
 void rcu_cpu_stall_reset(void);
 
@@ -41,7 +41,7 @@ void rcu_cpu_stall_reset(void);
  */
 static inline void rcu_virt_note_context_switch(int cpu)
 {
-	rcu_note_context_switch();
+	rcu_note_context_switch(false);
 }
 
 void synchronize_rcu_bh(void);
@@ -79,36 +79,20 @@ void cond_synchronize_rcu(unsigned long oldstate);
 unsigned long get_state_synchronize_sched(void);
 void cond_synchronize_sched(unsigned long oldstate);
 
-extern unsigned long rcutorture_testseq;
-extern unsigned long rcutorture_vernum;
-unsigned long rcu_batches_started(void);
-unsigned long rcu_batches_started_bh(void);
-unsigned long rcu_batches_started_sched(void);
-unsigned long rcu_batches_completed(void);
-unsigned long rcu_batches_completed_bh(void);
-unsigned long rcu_batches_completed_sched(void);
-unsigned long rcu_exp_batches_completed(void);
-unsigned long rcu_exp_batches_completed_sched(void);
-void show_rcu_gp_kthreads(void);
-
-void rcu_force_quiescent_state(void);
-void rcu_bh_force_quiescent_state(void);
-void rcu_sched_force_quiescent_state(void);
-
 void rcu_idle_enter(void);
 void rcu_idle_exit(void);
 void rcu_irq_enter(void);
 void rcu_irq_exit(void);
 void rcu_irq_enter_irqson(void);
 void rcu_irq_exit_irqson(void);
+bool rcu_irq_enter_disabled(void);
 
 void exit_rcu(void);
 
 void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
-
+void rcu_end_inkernel_boot(void);
 bool rcu_is_watching(void);
-
 void rcu_all_qs(void);
 
 /* RCUtree hotplug events */
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index b34aa649d204..591792c8e5b0 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -41,6 +41,7 @@ static inline unsigned int refcount_read(const refcount_t *r)
 	return atomic_read(&r->refs);
 }
 
+#ifdef CONFIG_REFCOUNT_FULL
 extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r);
 extern void refcount_add(unsigned int i, refcount_t *r);
 
@@ -48,10 +49,45 @@ extern __must_check bool refcount_inc_not_zero(refcount_t *r);
 extern void refcount_inc(refcount_t *r);
 
 extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r);
-extern void refcount_sub(unsigned int i, refcount_t *r);
 
 extern __must_check bool refcount_dec_and_test(refcount_t *r);
 extern void refcount_dec(refcount_t *r);
+#else
+static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r)
+{
+	return atomic_add_unless(&r->refs, i, 0);
+}
+
+static inline void refcount_add(unsigned int i, refcount_t *r)
+{
+	atomic_add(i, &r->refs);
+}
+
+static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
+{
+	return atomic_add_unless(&r->refs, 1, 0);
+}
+
+static inline void refcount_inc(refcount_t *r)
+{
+	atomic_inc(&r->refs);
+}
+
+static inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+{
+	return atomic_sub_and_test(i, &r->refs);
+}
+
+static inline __must_check bool refcount_dec_and_test(refcount_t *r)
+{
+	return atomic_dec_and_test(&r->refs);
+}
+
+static inline void refcount_dec(refcount_t *r)
+{
+	atomic_dec(&r->refs);
+}
+#endif /* CONFIG_REFCOUNT_FULL */
 
 extern __must_check bool refcount_dec_if_one(refcount_t *r);
 extern __must_check bool refcount_dec_not_one(refcount_t *r);
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 1abba5ce2a2f..44fd002f7cd5 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -37,6 +37,9 @@ struct rt_mutex {
 	int			line;
 	void			*magic;
 #endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	dep_map;
+#endif
 };
 
 struct rt_mutex_waiter;
@@ -58,19 +61,33 @@ struct hrtimer_sleeper;
 #ifdef CONFIG_DEBUG_RT_MUTEXES
 # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
 	, .name = #mutexname, .file = __FILE__, .line = __LINE__
-# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, __func__)
+
+# define rt_mutex_init(mutex) \
+do { \
+	static struct lock_class_key __key; \
+	__rt_mutex_init(mutex, __func__, &__key); \
+} while (0)
+
  extern void rt_mutex_debug_task_free(struct task_struct *tsk);
 #else
 # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
-# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, NULL)
+# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, NULL, NULL)
 # define rt_mutex_debug_task_free(t)			do { } while (0)
 #endif
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \
+	, .dep_map = { .name = #mutexname }
+#else
+#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)
+#endif
+
 #define __RT_MUTEX_INITIALIZER(mutexname) \
 	{ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
 	, .waiters = RB_ROOT \
 	, .owner = NULL \
-	__DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
+	__DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
+	__DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)}
 
 #define DEFINE_RT_MUTEX(mutexname) \
 	struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
@@ -86,7 +103,7 @@ static inline int rt_mutex_is_locked(struct rt_mutex *lock)
 	return lock->owner != NULL;
 }
 
-extern void __rt_mutex_init(struct rt_mutex *lock, const char *name);
+extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key);
 extern void rt_mutex_destroy(struct rt_mutex *lock);
 
 extern void rt_mutex_lock(struct rt_mutex *lock);
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index cb3c8fe6acd7..4b3286ac60c8 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -278,6 +278,8 @@ size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
 			    const void *buf, size_t buflen, off_t skip);
 size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
 			  void *buf, size_t buflen, off_t skip);
+size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
+		       size_t buflen, off_t skip);
 
 /*
  * Maximum number of entries that will be allocated in one piece, if
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b69fc650201..1f0f427e0292 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -421,7 +421,8 @@ struct sched_dl_entity {
 	u64				dl_runtime;	/* Maximum runtime for each instance	*/
 	u64				dl_deadline;	/* Relative deadline of each instance	*/
 	u64				dl_period;	/* Separation of two instances (period) */
-	u64				dl_bw;		/* dl_runtime / dl_deadline		*/
+	u64				dl_bw;		/* dl_runtime / dl_period		*/
+	u64				dl_density;	/* dl_runtime / dl_deadline		*/
 
 	/*
 	 * Actual scheduling parameters. Initialized with the values above,
@@ -445,16 +446,33 @@ struct sched_dl_entity {
 	 *
 	 * @dl_yielded tells if task gave up the CPU before consuming
 	 * all its available runtime during the last job.
+	 *
+	 * @dl_non_contending tells if the task is inactive while still
+	 * contributing to the active utilization. In other words, it
+	 * indicates if the inactive timer has been armed and its handler
+	 * has not been executed yet. This flag is useful to avoid race
+	 * conditions between the inactive timer handler and the wakeup
+	 * code.
 	 */
 	int				dl_throttled;
 	int				dl_boosted;
 	int				dl_yielded;
+	int				dl_non_contending;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
 	 * own bandwidth to be enforced, thus we need one timer per task.
 	 */
 	struct hrtimer			dl_timer;
+
+	/*
+	 * Inactive timer, responsible for decreasing the active utilization
+	 * at the "0-lag time". When a -deadline task blocks, it contributes
+	 * to GRUB's active utilization until the "0-lag time", hence a
+	 * timer is needed to decrease the active utilization at the correct
+	 * time.
+	 */
+	struct hrtimer inactive_timer;
 };
 
 union rcu_special {
@@ -1096,8 +1114,6 @@ static inline struct pid *task_session(struct task_struct *task)
  *                     current.
  * task_xid_nr_ns()  : id seen from the ns specified;
  *
- * set_task_vxid()   : assigns a virtual id to a task;
- *
  * see also pid_nr() etc in include/linux/pid.h
  */
 pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns);
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index 34fe92ce1ebd..a55600ffdf4b 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -23,10 +23,6 @@ extern u64 sched_clock_cpu(int cpu);
 extern void sched_clock_init(void);
 
 #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-static inline void sched_clock_init_late(void)
-{
-}
-
 static inline void sched_clock_tick(void)
 {
 }
@@ -39,7 +35,7 @@ static inline void sched_clock_idle_sleep_event(void)
 {
 }
 
-static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
+static inline void sched_clock_idle_wakeup_event(void)
 {
 }
 
@@ -53,7 +49,6 @@ static inline u64 local_clock(void)
 	return sched_clock();
 }
 #else
-extern void sched_clock_init_late(void);
 extern int sched_clock_stable(void);
 extern void clear_sched_clock_stable(void);
 
@@ -63,10 +58,10 @@ extern void clear_sched_clock_stable(void);
  */
 extern u64 __sched_clock_offset;
 
-
 extern void sched_clock_tick(void);
+extern void sched_clock_tick_stable(void);
 extern void sched_clock_idle_sleep_event(void);
-extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+extern void sched_clock_idle_wakeup_event(void);
 
 /*
  * As outlined in clock.c, provides a fast, high resolution, nanosecond
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 4995b717500b..7d3f75db23e5 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -23,11 +23,11 @@ static inline void set_cpu_sd_state_idle(void) { }
 #endif
 
 #ifdef CONFIG_NO_HZ_COMMON
-void calc_load_enter_idle(void);
-void calc_load_exit_idle(void);
+void calc_load_nohz_start(void);
+void calc_load_nohz_stop(void);
 #else
-static inline void calc_load_enter_idle(void) { }
-static inline void calc_load_exit_idle(void) { }
+static inline void calc_load_nohz_start(void) { }
+static inline void calc_load_nohz_stop(void) { }
 #endif /* CONFIG_NO_HZ_COMMON */
 
 #if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index a978d7189cfd..f0f065c5afcf 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -95,8 +95,6 @@ static inline void put_task_struct(struct task_struct *t)
 }
 
 struct task_struct *task_rcu_dereference(struct task_struct **ptask);
-struct task_struct *try_get_task_struct(struct task_struct **ptask);
-
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
 extern int arch_task_struct_size __read_mostly;
diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index cda76c6506ca..e69402d4a8ae 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -195,6 +195,7 @@ int serdev_device_open(struct serdev_device *);
 void serdev_device_close(struct serdev_device *);
 unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int);
 void serdev_device_set_flow_control(struct serdev_device *, bool);
+int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t);
 void serdev_device_wait_until_sent(struct serdev_device *, long);
 int serdev_device_get_tiocm(struct serdev_device *);
 int serdev_device_set_tiocm(struct serdev_device *, int, int);
@@ -236,6 +237,12 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev
 	return 0;
 }
 static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {}
+static inline int serdev_device_write_buf(struct serdev_device *serdev,
+					  const unsigned char *buf,
+					  size_t count)
+{
+	return -ENODEV;
+}
 static inline void serdev_device_wait_until_sent(struct serdev_device *sdev, long timeout) {}
 static inline int serdev_device_get_tiocm(struct serdev_device *serdev)
 {
@@ -301,7 +308,7 @@ struct tty_driver;
 struct device *serdev_tty_port_register(struct tty_port *port,
 					struct device *parent,
 					struct tty_driver *drv, int idx);
-void serdev_tty_port_unregister(struct tty_port *port);
+int serdev_tty_port_unregister(struct tty_port *port);
 #else
 static inline struct device *serdev_tty_port_register(struct tty_port *port,
 					   struct device *parent,
@@ -309,14 +316,10 @@ static inline struct device *serdev_tty_port_register(struct tty_port *port,
 {
 	return ERR_PTR(-ENODEV);
 }
-static inline void serdev_tty_port_unregister(struct tty_port *port) {}
-#endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */
-
-static inline int serdev_device_write_buf(struct serdev_device *serdev,
-					  const unsigned char *data,
-					  size_t count)
+static inline int serdev_tty_port_unregister(struct tty_port *port)
 {
-	return serdev_device_write(serdev, data, count, 0);
+	return -ENODEV;
 }
+#endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */
 
 #endif /*_LINUX_SERDEV_H */
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 3c37a8c51921..04a7f7993e67 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -28,7 +28,7 @@
 #define SLAB_STORE_USER		0x00010000UL	/* DEBUG: Store the last owner for bug hunting */
 #define SLAB_PANIC		0x00040000UL	/* Panic if kmem_cache_create() fails */
 /*
- * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS!
+ * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
  *
  * This delays freeing the SLAB page by a grace period, it does _NOT_
  * delay object freeing. This means that if you do kmem_cache_free()
@@ -61,8 +61,10 @@
  *
  * rcu_read_lock before reading the address, then rcu_read_unlock after
  * taking the spinlock within the structure expected at that address.
+ *
+ * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
  */
-#define SLAB_DESTROY_BY_RCU	0x00080000UL	/* Defer freeing slabs to RCU */
+#define SLAB_TYPESAFE_BY_RCU	0x00080000UL	/* Defer freeing slabs to RCU */
 #define SLAB_MEM_SPREAD		0x00100000UL	/* Spread some memory over cpuset */
 #define SLAB_TRACE		0x00200000UL	/* Trace allocations and frees */
 
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 07ef550c6627..93315d6b21a8 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -84,6 +84,7 @@ struct kmem_cache {
 	int red_left_pad;	/* Left redzone padding size */
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;	/* For sysfs */
+	struct work_struct kobj_remove_work;
 #endif
 #ifdef CONFIG_MEMCG
 	struct memcg_cache_params memcg_params;
diff --git a/include/linux/soc/renesas/rcar-rst.h b/include/linux/soc/renesas/rcar-rst.h
index a18e0783946b..787e7ad53d45 100644
--- a/include/linux/soc/renesas/rcar-rst.h
+++ b/include/linux/soc/renesas/rcar-rst.h
@@ -1,6 +1,11 @@
 #ifndef __LINUX_SOC_RENESAS_RCAR_RST_H__
 #define __LINUX_SOC_RENESAS_RCAR_RST_H__
 
+#if defined(CONFIG_ARCH_RCAR_GEN1) || defined(CONFIG_ARCH_RCAR_GEN2) || \
+    defined(CONFIG_ARCH_R8A7795) || defined(CONFIG_ARCH_R8A7796)
 int rcar_rst_read_mode_pins(u32 *mode);
+#else
+static inline int rcar_rst_read_mode_pins(u32 *mode) { return -ENODEV; }
+#endif
 
 #endif /* __LINUX_SOC_RENESAS_RCAR_RST_H__ */
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 59248dcc6ef3..d9510e8522d4 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -369,6 +369,26 @@ static __always_inline int spin_trylock_irq(spinlock_t *lock)
 	raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
 })
 
+/**
+ * spin_unlock_wait - Interpose between successive critical sections
+ * @lock: the spinlock whose critical sections are to be interposed.
+ *
+ * Semantically this is equivalent to a spin_lock() immediately
+ * followed by a spin_unlock().  However, most architectures have
+ * more efficient implementations in which the spin_unlock_wait()
+ * cannot block concurrent lock acquisition, and in some cases
+ * where spin_unlock_wait() does not write to the lock variable.
+ * Nevertheless, spin_unlock_wait() can have high overhead, so if
+ * you feel the need to use it, please check to see if there is
+ * a better way to get your job done.
+ *
+ * The ordering guarantees provided by spin_unlock_wait() are:
+ *
+ * 1.  All accesses preceding the spin_unlock_wait() happen before
+ *     any accesses in later critical sections for this same lock.
+ * 2.  All accesses following the spin_unlock_wait() happen after
+ *     any accesses in earlier critical sections for this same lock.
+ */
 static __always_inline void spin_unlock_wait(spinlock_t *lock)
 {
 	raw_spin_unlock_wait(&lock->rlock);
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index a598cf3ac70c..39af9bc0f653 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -22,7 +22,7 @@
  *	   Lai Jiangshan <laijs@cn.fujitsu.com>
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU/ *.txt
+ *		Documentation/RCU/ *.txt
  *
  */
 
@@ -32,35 +32,9 @@
 #include <linux/mutex.h>
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
+#include <linux/rcu_segcblist.h>
 
-struct srcu_array {
-	unsigned long lock_count[2];
-	unsigned long unlock_count[2];
-};
-
-struct rcu_batch {
-	struct rcu_head *head, **tail;
-};
-
-#define RCU_BATCH_INIT(name) { NULL, &(name.head) }
-
-struct srcu_struct {
-	unsigned long completed;
-	struct srcu_array __percpu *per_cpu_ref;
-	spinlock_t queue_lock; /* protect ->batch_queue, ->running */
-	bool running;
-	/* callbacks just queued */
-	struct rcu_batch batch_queue;
-	/* callbacks try to do the first check_zero */
-	struct rcu_batch batch_check0;
-	/* callbacks done with the first check_zero and the flip */
-	struct rcu_batch batch_check1;
-	struct rcu_batch batch_done;
-	struct delayed_work work;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-};
+struct srcu_struct;
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
@@ -82,74 +56,23 @@ int init_srcu_struct(struct srcu_struct *sp);
 #define __SRCU_DEP_MAP_INIT(srcu_name)
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-void process_srcu(struct work_struct *work);
-
-#define __SRCU_STRUCT_INIT(name)					\
-	{								\
-		.completed = -300,					\
-		.per_cpu_ref = &name##_srcu_array,			\
-		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
-		.running = false,					\
-		.batch_queue = RCU_BATCH_INIT(name.batch_queue),	\
-		.batch_check0 = RCU_BATCH_INIT(name.batch_check0),	\
-		.batch_check1 = RCU_BATCH_INIT(name.batch_check1),	\
-		.batch_done = RCU_BATCH_INIT(name.batch_done),		\
-		.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
-		__SRCU_DEP_MAP_INIT(name)				\
-	}
-
-/*
- * Define and initialize a srcu struct at build time.
- * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
- *
- * Note that although DEFINE_STATIC_SRCU() hides the name from other
- * files, the per-CPU variable rules nevertheless require that the
- * chosen name be globally unique.  These rules also prohibit use of
- * DEFINE_STATIC_SRCU() within a function.  If these rules are too
- * restrictive, declare the srcu_struct manually.  For example, in
- * each file:
- *
- *	static struct srcu_struct my_srcu;
- *
- * Then, before the first use of each my_srcu, manually initialize it:
- *
- *	init_srcu_struct(&my_srcu);
- *
- * See include/linux/percpu-defs.h for the rules on per-CPU variables.
- */
-#define __DEFINE_SRCU(name, is_static)					\
-	static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\
-	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
-#define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
-#define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)
+#ifdef CONFIG_TINY_SRCU
+#include <linux/srcutiny.h>
+#elif defined(CONFIG_TREE_SRCU)
+#include <linux/srcutree.h>
+#elif defined(CONFIG_SRCU)
+#error "Unknown SRCU implementation specified to kernel configuration"
+#else
+/* Dummy definition for things like notifiers.  Actual use gets link error. */
+struct srcu_struct { };
+#endif
 
-/**
- * call_srcu() - Queue a callback for invocation after an SRCU grace period
- * @sp: srcu_struct in queue the callback
- * @head: structure to be used for queueing the SRCU callback.
- * @func: function to be invoked after the SRCU grace period
- *
- * The callback function will be invoked some time after a full SRCU
- * grace period elapses, in other words after all pre-existing SRCU
- * read-side critical sections have completed.  However, the callback
- * function might well execute concurrently with other SRCU read-side
- * critical sections that started after call_srcu() was invoked.  SRCU
- * read-side critical sections are delimited by srcu_read_lock() and
- * srcu_read_unlock(), and may be nested.
- *
- * The callback will be invoked from process context, but must nevertheless
- * be fast and must not block.
- */
 void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
 		void (*func)(struct rcu_head *head));
-
 void cleanup_srcu_struct(struct srcu_struct *sp);
 int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
 void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
 void synchronize_srcu(struct srcu_struct *sp);
-void synchronize_srcu_expedited(struct srcu_struct *sp);
-unsigned long srcu_batches_completed(struct srcu_struct *sp);
-void srcu_barrier(struct srcu_struct *sp);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
@@ -232,9 +155,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 {
 	int retval;
 
-	preempt_disable();
 	retval = __srcu_read_lock(sp);
-	preempt_enable();
 	rcu_lock_acquire(&(sp)->dep_map);
 	return retval;
 }
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
new file mode 100644
index 000000000000..cfbfc540cafc
--- /dev/null
+++ b/include/linux/srcutiny.h
@@ -0,0 +1,90 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion,
+ *	tiny variant.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2017
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ */
+
+#ifndef _LINUX_SRCU_TINY_H
+#define _LINUX_SRCU_TINY_H
+
+#include <linux/swait.h>
+
+struct srcu_struct {
+	short srcu_lock_nesting[2];	/* srcu_read_lock() nesting depth. */
+	short srcu_idx;			/* Current reader array element. */
+	u8 srcu_gp_running;		/* GP workqueue running? */
+	u8 srcu_gp_waiting;		/* GP waiting for readers? */
+	struct swait_queue_head srcu_wq;
+					/* Last srcu_read_unlock() wakes GP. */
+	struct rcu_head *srcu_cb_head;	/* Pending callbacks: Head. */
+	struct rcu_head **srcu_cb_tail;	/* Pending callbacks: Tail. */
+	struct work_struct srcu_work;	/* For driving grace periods. */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+};
+
+void srcu_drive_gp(struct work_struct *wp);
+
+#define __SRCU_STRUCT_INIT(name)					\
+{									\
+	.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq),	\
+	.srcu_cb_tail = &name.srcu_cb_head,				\
+	.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp),	\
+	__SRCU_DEP_MAP_INIT(name)					\
+}
+
+/*
+ * This odd _STATIC_ arrangement is needed for API compatibility with
+ * Tree SRCU, which needs some per-CPU data.
+ */
+#define DEFINE_SRCU(name) \
+	struct srcu_struct name = __SRCU_STRUCT_INIT(name)
+#define DEFINE_STATIC_SRCU(name) \
+	static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
+
+void synchronize_srcu(struct srcu_struct *sp);
+
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct.  Can be invoked from irq/bh handlers, but the matching
+ * __srcu_read_unlock() must be in the same handler instance.  Returns an
+ * index that must be passed to the matching srcu_read_unlock().
+ */
+static inline int __srcu_read_lock(struct srcu_struct *sp)
+{
+	int idx;
+
+	idx = READ_ONCE(sp->srcu_idx);
+	WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1);
+	return idx;
+}
+
+static inline void synchronize_srcu_expedited(struct srcu_struct *sp)
+{
+	synchronize_srcu(sp);
+}
+
+static inline void srcu_barrier(struct srcu_struct *sp)
+{
+	synchronize_srcu(sp);
+}
+
+#endif
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
new file mode 100644
index 000000000000..42973f787e7e
--- /dev/null
+++ b/include/linux/srcutree.h
@@ -0,0 +1,145 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion,
+ *	tree variant.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2017
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ */
+
+#ifndef _LINUX_SRCU_TREE_H
+#define _LINUX_SRCU_TREE_H
+
+#include <linux/rcu_node_tree.h>
+#include <linux/completion.h>
+
+struct srcu_node;
+struct srcu_struct;
+
+/*
+ * Per-CPU structure feeding into leaf srcu_node, similar in function
+ * to rcu_node.
+ */
+struct srcu_data {
+	/* Read-side state. */
+	unsigned long srcu_lock_count[2];	/* Locks per CPU. */
+	unsigned long srcu_unlock_count[2];	/* Unlocks per CPU. */
+
+	/* Update-side state. */
+	raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp;
+	struct rcu_segcblist srcu_cblist;	/* List of callbacks.*/
+	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
+	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
+	bool srcu_cblist_invoking;		/* Invoking these CBs? */
+	struct delayed_work work;		/* Context for CB invoking. */
+	struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */
+	struct srcu_node *mynode;		/* Leaf srcu_node. */
+	unsigned long grpmask;			/* Mask for leaf srcu_node */
+						/*  ->srcu_data_have_cbs[]. */
+	int cpu;
+	struct srcu_struct *sp;
+};
+
+/*
+ * Node in SRCU combining tree, similar in function to rcu_data.
+ */
+struct srcu_node {
+	raw_spinlock_t __private lock;
+	unsigned long srcu_have_cbs[4];		/* GP seq for children */
+						/*  having CBs, but only */
+						/*  is > ->srcu_gq_seq. */
+	unsigned long srcu_data_have_cbs[4];	/* Which srcu_data structs */
+						/*  have CBs for given GP? */
+	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
+	struct srcu_node *srcu_parent;		/* Next up in tree. */
+	int grplo;				/* Least CPU for node. */
+	int grphi;				/* Biggest CPU for node. */
+};
+
+/*
+ * Per-SRCU-domain structure, similar in function to rcu_state.
+ */
+struct srcu_struct {
+	struct srcu_node node[NUM_RCU_NODES];	/* Combining tree. */
+	struct srcu_node *level[RCU_NUM_LVLS + 1];
+						/* First node at each level. */
+	struct mutex srcu_cb_mutex;		/* Serialize CB preparation. */
+	raw_spinlock_t __private lock;		/* Protect counters */
+	struct mutex srcu_gp_mutex;		/* Serialize GP work. */
+	unsigned int srcu_idx;			/* Current rdr array element. */
+	unsigned long srcu_gp_seq;		/* Grace-period seq #. */
+	unsigned long srcu_gp_seq_needed;	/* Latest gp_seq needed. */
+	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
+	unsigned long srcu_last_gp_end;		/* Last GP end timestamp (ns) */
+	struct srcu_data __percpu *sda;		/* Per-CPU srcu_data array. */
+	unsigned long srcu_barrier_seq;		/* srcu_barrier seq #. */
+	struct mutex srcu_barrier_mutex;	/* Serialize barrier ops. */
+	struct completion srcu_barrier_completion;
+						/* Awaken barrier rq at end. */
+	atomic_t srcu_barrier_cpu_cnt;		/* # CPUs not yet posting a */
+						/*  callback for the barrier */
+						/*  operation. */
+	struct delayed_work work;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+};
+
+/* Values for state variable (bottom bits of ->srcu_gp_seq). */
+#define SRCU_STATE_IDLE		0
+#define SRCU_STATE_SCAN1	1
+#define SRCU_STATE_SCAN2	2
+
+void process_srcu(struct work_struct *work);
+
+#define __SRCU_STRUCT_INIT(name)					\
+	{								\
+		.sda = &name##_srcu_data,				\
+		.lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock),		\
+		.srcu_gp_seq_needed = 0 - 1,				\
+		__SRCU_DEP_MAP_INIT(name)				\
+	}
+
+/*
+ * Define and initialize a srcu struct at build time.
+ * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
+ *
+ * Note that although DEFINE_STATIC_SRCU() hides the name from other
+ * files, the per-CPU variable rules nevertheless require that the
+ * chosen name be globally unique.  These rules also prohibit use of
+ * DEFINE_STATIC_SRCU() within a function.  If these rules are too
+ * restrictive, declare the srcu_struct manually.  For example, in
+ * each file:
+ *
+ *	static struct srcu_struct my_srcu;
+ *
+ * Then, before the first use of each my_srcu, manually initialize it:
+ *
+ *	init_srcu_struct(&my_srcu);
+ *
+ * See include/linux/percpu-defs.h for the rules on per-CPU variables.
+ */
+#define __DEFINE_SRCU(name, is_static)					\
+	static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\
+	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
+#define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
+#define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)
+
+void synchronize_srcu_expedited(struct srcu_struct *sp);
+void srcu_barrier(struct srcu_struct *sp);
+
+#endif
diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
index 245fc59b7324..b7e85b341a54 100644
--- a/include/linux/sunrpc/rpc_rdma.h
+++ b/include/linux/sunrpc/rpc_rdma.h
@@ -143,6 +143,9 @@ enum rpcrdma_proc {
 #define rdma_done	cpu_to_be32(RDMA_DONE)
 #define rdma_error	cpu_to_be32(RDMA_ERROR)
 
+#define err_vers	cpu_to_be32(ERR_VERS)
+#define err_chunk	cpu_to_be32(ERR_CHUNK)
+
 /*
  * Private extension to RPC-over-RDMA Version One.
  * Message passed during RDMA-CM connection set-up.
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 7ba040c797ec..9d7529ffc4ce 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -13,7 +13,7 @@
 #include <linux/ktime.h>
 #include <linux/sunrpc/types.h>
 #include <linux/spinlock.h>
-#include <linux/wait.h>
+#include <linux/wait_bit.h>
 #include <linux/workqueue.h>
 #include <linux/sunrpc/xdr.h>
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index e770abeed32d..11cef5a7bc87 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -474,6 +474,7 @@ void		   svc_pool_map_put(void);
 struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
 			struct svc_serv_ops *);
 int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
+int		   svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int);
 int		   svc_pool_stats_open(struct svc_serv *serv, struct file *file);
 void		   svc_destroy(struct svc_serv *);
 void		   svc_shutdown_net(struct svc_serv *, struct net *);
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index b105f73e3ca2..f3787d800ba4 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -48,6 +48,12 @@
 #include <rdma/rdma_cm.h>
 #define SVCRDMA_DEBUG
 
+/* Default and maximum inline threshold sizes */
+enum {
+	RPCRDMA_DEF_INLINE_THRESH = 4096,
+	RPCRDMA_MAX_INLINE_THRESH = 65536
+};
+
 /* RPC/RDMA parameters and stats */
 extern unsigned int svcrdma_ord;
 extern unsigned int svcrdma_max_requests;
@@ -85,27 +91,11 @@ struct svc_rdma_op_ctxt {
 	enum dma_data_direction direction;
 	int count;
 	unsigned int mapped_sges;
-	struct ib_sge sge[RPCSVC_MAXPAGES];
+	struct ib_send_wr send_wr;
+	struct ib_sge sge[1 + RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE];
 	struct page *pages[RPCSVC_MAXPAGES];
 };
 
-/*
- * NFS_ requests are mapped on the client side by the chunk lists in
- * the RPCRDMA header. During the fetching of the RPC from the client
- * and the writing of the reply to the client, the memory in the
- * client and the memory in the server must be mapped as contiguous
- * vaddr/len for access by the hardware. These data strucures keep
- * these mappings.
- *
- * For an RDMA_WRITE, the 'sge' maps the RPC REPLY. For RDMA_READ, the
- * 'sge' in the svc_rdma_req_map maps the server side RPC reply and the
- * 'ch' field maps the read-list of the RPCRDMA header to the 'sge'
- * mapping of the reply.
- */
-struct svc_rdma_chunk_sge {
-	int start;		/* sge no for this chunk */
-	int count;		/* sge count for this chunk */
-};
 struct svc_rdma_fastreg_mr {
 	struct ib_mr *mr;
 	struct scatterlist *sg;
@@ -114,15 +104,7 @@ struct svc_rdma_fastreg_mr {
 	enum dma_data_direction direction;
 	struct list_head frmr_list;
 };
-struct svc_rdma_req_map {
-	struct list_head free;
-	unsigned long count;
-	union {
-		struct kvec sge[RPCSVC_MAXPAGES];
-		struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
-		unsigned long lkey[RPCSVC_MAXPAGES];
-	};
-};
+
 #define RDMACTXT_F_LAST_CTXT	2
 
 #define	SVCRDMA_DEVCAP_FAST_REG		1	/* fast mr registration */
@@ -144,14 +126,15 @@ struct svcxprt_rdma {
 	u32		     sc_max_requests;	/* Max requests */
 	u32		     sc_max_bc_requests;/* Backward credits */
 	int                  sc_max_req_size;	/* Size of each RQ WR buf */
+	u8		     sc_port_num;
 
 	struct ib_pd         *sc_pd;
 
 	spinlock_t	     sc_ctxt_lock;
 	struct list_head     sc_ctxts;
 	int		     sc_ctxt_used;
-	spinlock_t	     sc_map_lock;
-	struct list_head     sc_maps;
+	spinlock_t	     sc_rw_ctxt_lock;
+	struct list_head     sc_rw_ctxts;
 
 	struct list_head     sc_rq_dto_q;
 	spinlock_t	     sc_rq_dto_lock;
@@ -181,9 +164,7 @@ struct svcxprt_rdma {
 /* The default ORD value is based on two outstanding full-size writes with a
  * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ.  */
 #define RPCRDMA_ORD             (64/4)
-#define RPCRDMA_SQ_DEPTH_MULT   8
 #define RPCRDMA_MAX_REQUESTS    32
-#define RPCRDMA_MAX_REQ_SIZE    4096
 
 /* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our
  * current NFSv4.1 implementation supports one backchannel slot.
@@ -201,19 +182,11 @@ static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma,
 
 /* svc_rdma_backchannel.c */
 extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
-				    struct rpcrdma_msg *rmsgp,
+				    __be32 *rdma_resp,
 				    struct xdr_buf *rcvbuf);
 
 /* svc_rdma_marshal.c */
 extern int svc_rdma_xdr_decode_req(struct xdr_buf *);
-extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
-				     struct rpcrdma_msg *,
-				     enum rpcrdma_errcode, __be32 *);
-extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
-extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
-extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
-					    __be32, __be64, u32);
-extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp);
 
 /* svc_rdma_recvfrom.c */
 extern int svc_rdma_recvfrom(struct svc_rqst *);
@@ -224,16 +197,25 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
 				struct svc_rdma_op_ctxt *, int *, u32 *,
 				u32, u32, u64, bool);
 
+/* svc_rdma_rw.c */
+extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
+extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
+				     __be32 *wr_ch, struct xdr_buf *xdr);
+extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
+				     __be32 *rp_ch, bool writelist,
+				     struct xdr_buf *xdr);
+
 /* svc_rdma_sendto.c */
-extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
-			    struct svc_rdma_req_map *, bool);
+extern int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma,
+				  struct svc_rdma_op_ctxt *ctxt,
+				  __be32 *rdma_resp, unsigned int len);
+extern int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma,
+				 struct svc_rdma_op_ctxt *ctxt,
+				 int num_sge, u32 inv_rkey);
 extern int svc_rdma_sendto(struct svc_rqst *);
-extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
-				int);
 
 /* svc_rdma_transport.c */
 extern void svc_rdma_wc_send(struct ib_cq *, struct ib_wc *);
-extern void svc_rdma_wc_write(struct ib_cq *, struct ib_wc *);
 extern void svc_rdma_wc_reg(struct ib_cq *, struct ib_wc *);
 extern void svc_rdma_wc_read(struct ib_cq *, struct ib_wc *);
 extern void svc_rdma_wc_inv(struct ib_cq *, struct ib_wc *);
@@ -244,9 +226,6 @@ extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
 extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
-extern struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *);
-extern void svc_rdma_put_req_map(struct svcxprt_rdma *,
-				 struct svc_rdma_req_map *);
 extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
 extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
 			      struct svc_rdma_fastreg_mr *);
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
new file mode 100644
index 000000000000..0f175b8f6456
--- /dev/null
+++ b/include/linux/tee_drv.h
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __TEE_DRV_H
+#define __TEE_DRV_H
+
+#include <linux/types.h>
+#include <linux/idr.h>
+#include <linux/list.h>
+#include <linux/tee.h>
+
+/*
+ * The file describes the API provided by the generic TEE driver to the
+ * specific TEE driver.
+ */
+
+#define TEE_SHM_MAPPED		0x1	/* Memory mapped by the kernel */
+#define TEE_SHM_DMA_BUF		0x2	/* Memory with dma-buf handle */
+
+struct tee_device;
+struct tee_shm;
+struct tee_shm_pool;
+
+/**
+ * struct tee_context - driver specific context on file pointer data
+ * @teedev:	pointer to this drivers struct tee_device
+ * @list_shm:	List of shared memory object owned by this context
+ * @data:	driver specific context data, managed by the driver
+ */
+struct tee_context {
+	struct tee_device *teedev;
+	struct list_head list_shm;
+	void *data;
+};
+
+struct tee_param_memref {
+	size_t shm_offs;
+	size_t size;
+	struct tee_shm *shm;
+};
+
+struct tee_param_value {
+	u64 a;
+	u64 b;
+	u64 c;
+};
+
+struct tee_param {
+	u64 attr;
+	union {
+		struct tee_param_memref memref;
+		struct tee_param_value value;
+	} u;
+};
+
+/**
+ * struct tee_driver_ops - driver operations vtable
+ * @get_version:	returns version of driver
+ * @open:		called when the device file is opened
+ * @release:		release this open file
+ * @open_session:	open a new session
+ * @close_session:	close a session
+ * @invoke_func:	invoke a trusted function
+ * @cancel_req:		request cancel of an ongoing invoke or open
+ * @supp_revc:		called for supplicant to get a command
+ * @supp_send:		called for supplicant to send a response
+ */
+struct tee_driver_ops {
+	void (*get_version)(struct tee_device *teedev,
+			    struct tee_ioctl_version_data *vers);
+	int (*open)(struct tee_context *ctx);
+	void (*release)(struct tee_context *ctx);
+	int (*open_session)(struct tee_context *ctx,
+			    struct tee_ioctl_open_session_arg *arg,
+			    struct tee_param *param);
+	int (*close_session)(struct tee_context *ctx, u32 session);
+	int (*invoke_func)(struct tee_context *ctx,
+			   struct tee_ioctl_invoke_arg *arg,
+			   struct tee_param *param);
+	int (*cancel_req)(struct tee_context *ctx, u32 cancel_id, u32 session);
+	int (*supp_recv)(struct tee_context *ctx, u32 *func, u32 *num_params,
+			 struct tee_param *param);
+	int (*supp_send)(struct tee_context *ctx, u32 ret, u32 num_params,
+			 struct tee_param *param);
+};
+
+/**
+ * struct tee_desc - Describes the TEE driver to the subsystem
+ * @name:	name of driver
+ * @ops:	driver operations vtable
+ * @owner:	module providing the driver
+ * @flags:	Extra properties of driver, defined by TEE_DESC_* below
+ */
+#define TEE_DESC_PRIVILEGED	0x1
+struct tee_desc {
+	const char *name;
+	const struct tee_driver_ops *ops;
+	struct module *owner;
+	u32 flags;
+};
+
+/**
+ * tee_device_alloc() - Allocate a new struct tee_device instance
+ * @teedesc:	Descriptor for this driver
+ * @dev:	Parent device for this device
+ * @pool:	Shared memory pool, NULL if not used
+ * @driver_data: Private driver data for this device
+ *
+ * Allocates a new struct tee_device instance. The device is
+ * removed by tee_device_unregister().
+ *
+ * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure
+ */
+struct tee_device *tee_device_alloc(const struct tee_desc *teedesc,
+				    struct device *dev,
+				    struct tee_shm_pool *pool,
+				    void *driver_data);
+
+/**
+ * tee_device_register() - Registers a TEE device
+ * @teedev:	Device to register
+ *
+ * tee_device_unregister() need to be called to remove the @teedev if
+ * this function fails.
+ *
+ * @returns < 0 on failure
+ */
+int tee_device_register(struct tee_device *teedev);
+
+/**
+ * tee_device_unregister() - Removes a TEE device
+ * @teedev:	Device to unregister
+ *
+ * This function should be called to remove the @teedev even if
+ * tee_device_register() hasn't been called yet. Does nothing if
+ * @teedev is NULL.
+ */
+void tee_device_unregister(struct tee_device *teedev);
+
+/**
+ * struct tee_shm_pool_mem_info - holds information needed to create a shared
+ * memory pool
+ * @vaddr:	Virtual address of start of pool
+ * @paddr:	Physical address of start of pool
+ * @size:	Size in bytes of the pool
+ */
+struct tee_shm_pool_mem_info {
+	unsigned long vaddr;
+	phys_addr_t paddr;
+	size_t size;
+};
+
+/**
+ * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved
+ * memory range
+ * @priv_info:	 Information for driver private shared memory pool
+ * @dmabuf_info: Information for dma-buf shared memory pool
+ *
+ * Start and end of pools will must be page aligned.
+ *
+ * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied
+ * in @dmabuf, others will use the range provided by @priv.
+ *
+ * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure.
+ */
+struct tee_shm_pool *
+tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info,
+			   struct tee_shm_pool_mem_info *dmabuf_info);
+
+/**
+ * tee_shm_pool_free() - Free a shared memory pool
+ * @pool:	The shared memory pool to free
+ *
+ * The must be no remaining shared memory allocated from this pool when
+ * this function is called.
+ */
+void tee_shm_pool_free(struct tee_shm_pool *pool);
+
+/**
+ * tee_get_drvdata() - Return driver_data pointer
+ * @returns the driver_data pointer supplied to tee_register().
+ */
+void *tee_get_drvdata(struct tee_device *teedev);
+
+/**
+ * tee_shm_alloc() - Allocate shared memory
+ * @ctx:	Context that allocates the shared memory
+ * @size:	Requested size of shared memory
+ * @flags:	Flags setting properties for the requested shared memory.
+ *
+ * Memory allocated as global shared memory is automatically freed when the
+ * TEE file pointer is closed. The @flags field uses the bits defined by
+ * TEE_SHM_* above. TEE_SHM_MAPPED must currently always be set. If
+ * TEE_SHM_DMA_BUF global shared memory will be allocated and associated
+ * with a dma-buf handle, else driver private memory.
+ *
+ * @returns a pointer to 'struct tee_shm'
+ */
+struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags);
+
+/**
+ * tee_shm_free() - Free shared memory
+ * @shm:	Handle to shared memory to free
+ */
+void tee_shm_free(struct tee_shm *shm);
+
+/**
+ * tee_shm_put() - Decrease reference count on a shared memory handle
+ * @shm:	Shared memory handle
+ */
+void tee_shm_put(struct tee_shm *shm);
+
+/**
+ * tee_shm_va2pa() - Get physical address of a virtual address
+ * @shm:	Shared memory handle
+ * @va:		Virtual address to tranlsate
+ * @pa:		Returned physical address
+ * @returns 0 on success and < 0 on failure
+ */
+int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa);
+
+/**
+ * tee_shm_pa2va() - Get virtual address of a physical address
+ * @shm:	Shared memory handle
+ * @pa:		Physical address to tranlsate
+ * @va:		Returned virtual address
+ * @returns 0 on success and < 0 on failure
+ */
+int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va);
+
+/**
+ * tee_shm_get_va() - Get virtual address of a shared memory plus an offset
+ * @shm:	Shared memory handle
+ * @offs:	Offset from start of this shared memory
+ * @returns virtual address of the shared memory + offs if offs is within
+ *	the bounds of this shared memory, else an ERR_PTR
+ */
+void *tee_shm_get_va(struct tee_shm *shm, size_t offs);
+
+/**
+ * tee_shm_get_pa() - Get physical address of a shared memory plus an offset
+ * @shm:	Shared memory handle
+ * @offs:	Offset from start of this shared memory
+ * @pa:		Physical address to return
+ * @returns 0 if offs is within the bounds of this shared memory, else an
+ *	error code.
+ */
+int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa);
+
+/**
+ * tee_shm_get_id() - Get id of a shared memory object
+ * @shm:	Shared memory handle
+ * @returns id
+ */
+int tee_shm_get_id(struct tee_shm *shm);
+
+/**
+ * tee_shm_get_from_id() - Find shared memory object and increase reference
+ * count
+ * @ctx:	Context owning the shared memory
+ * @id:		Id of shared memory object
+ * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure
+ */
+struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id);
+
+#endif /*__TEE_DRV_H*/
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 110f4532188c..f7043ccca81c 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -29,7 +29,6 @@
  */
 struct tk_read_base {
 	struct clocksource	*clock;
-	u64			(*read)(struct clocksource *cs);
 	u64			mask;
 	u64			cycle_last;
 	u32			mult;
@@ -58,7 +57,7 @@ struct tk_read_base {
  *			interval.
  * @xtime_remainder:	Shifted nano seconds left over when rounding
  *			@cycle_interval
- * @raw_interval:	Raw nano seconds accumulated per NTP interval.
+ * @raw_interval:	Shifted raw nano seconds accumulated per NTP interval.
  * @ntp_error:		Difference between accumulated time and NTP time in ntp
  *			shifted nano seconds.
  * @ntp_error_shift:	Shift conversion between clock shifted nano seconds and
@@ -100,7 +99,7 @@ struct timekeeper {
 	u64			cycle_interval;
 	u64			xtime_interval;
 	s64			xtime_remainder;
-	u32			raw_interval;
+	u64			raw_interval;
 	/* The ntp_tick_length() value currently being used.
 	 * This cached copy ensures we consistently apply the tick
 	 * length for an entire tick, as ntp_tick_length may change
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d07cd2105a6c..eccb4ec30a8a 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -558,6 +558,15 @@ extern struct device *tty_port_register_device_attr(struct tty_port *port,
 		struct tty_driver *driver, unsigned index,
 		struct device *device, void *drvdata,
 		const struct attribute_group **attr_grp);
+extern struct device *tty_port_register_device_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device);
+extern struct device *tty_port_register_device_attr_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device, void *drvdata,
+		const struct attribute_group **attr_grp);
+extern void tty_port_unregister_device(struct tty_port *port,
+		struct tty_driver *driver, unsigned index);
 extern int tty_port_alloc_xmit_buf(struct tty_port *port);
 extern void tty_port_free_xmit_buf(struct tty_port *port);
 extern void tty_port_destroy(struct tty_port *port);
diff --git a/include/linux/types.h b/include/linux/types.h
index 1e7bd24848fc..258099a4ed82 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -209,7 +209,7 @@ struct ustat {
  * naturally due ABI requirements, but some architectures (like CRIS) have
  * weird ABI and we need to ask it explicitly.
  *
- * The alignment is required to guarantee that bits 0 and 1 of @next will be
+ * The alignment is required to guarantee that bit 0 of @next will be
  * clear under normal conditions -- as long as we use call_rcu(),
  * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback.
  *
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index a469999a106d..50398b69ca44 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -148,6 +148,7 @@ struct usb_hcd {
 	unsigned		rh_registered:1;/* is root hub registered? */
 	unsigned		rh_pollable:1;	/* may we poll the root hub? */
 	unsigned		msix_enabled:1;	/* driver has MSI-X enabled? */
+	unsigned		msi_enabled:1;	/* driver has MSI enabled? */
 	unsigned		remove_phy:1;	/* auto-remove USB phy */
 
 	/* The next flag is a stopgap, to be removed when all the HCDs
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 7dffa5624ea6..97116379db5f 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -206,6 +206,7 @@ struct cdc_state {
 };
 
 extern int usbnet_generic_cdc_bind(struct usbnet *, struct usb_interface *);
+extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf);
 extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *);
 extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *);
 extern void usbnet_cdc_status(struct usbnet *, struct urb *);
diff --git a/include/linux/uuid.h b/include/linux/uuid.h
index 4dff73a89758..d1defe4ab167 100644
--- a/include/linux/uuid.h
+++ b/include/linux/uuid.h
@@ -18,29 +18,16 @@
 
 #include <uapi/linux/uuid.h>
 
-/*
- * V1 (time-based) UUID definition [RFC 4122].
- * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns
- *   increments since midnight 15th October 1582
- *   - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID
- *     time
- * - the clock sequence is a 14-bit counter to avoid duplicate times
- */
-struct uuid_v1 {
-	__be32		time_low;			/* low part of timestamp */
-	__be16		time_mid;			/* mid part of timestamp */
-	__be16		time_hi_and_version;		/* high part of timestamp and version  */
-#define UUID_TO_UNIX_TIME	0x01b21dd213814000ULL
-#define UUID_TIMEHI_MASK	0x0fff
-#define UUID_VERSION_TIME	0x1000	/* time-based UUID */
-#define UUID_VERSION_NAME	0x3000	/* name-based UUID */
-#define UUID_VERSION_RANDOM	0x4000	/* (pseudo-)random generated UUID */
-	u8		clock_seq_hi_and_reserved;	/* clock seq hi and variant */
-#define UUID_CLOCKHI_MASK	0x3f
-#define UUID_VARIANT_STD	0x80
-	u8		clock_seq_low;			/* clock seq low */
-	u8		node[6];			/* spatially unique node ID (MAC addr) */
-};
+typedef struct {
+	__u8 b[16];
+} uuid_t;
+
+#define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)			\
+((uuid_t)								\
+{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \
+   ((b) >> 8) & 0xff, (b) & 0xff,					\
+   ((c) >> 8) & 0xff, (c) & 0xff,					\
+   (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
 
 /*
  * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee")
@@ -48,27 +35,73 @@ struct uuid_v1 {
  */
 #define	UUID_STRING_LEN		36
 
-static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2)
+extern const guid_t guid_null;
+extern const uuid_t uuid_null;
+
+static inline bool guid_equal(const guid_t *u1, const guid_t *u2)
+{
+	return memcmp(u1, u2, sizeof(guid_t)) == 0;
+}
+
+static inline void guid_copy(guid_t *dst, const guid_t *src)
+{
+	memcpy(dst, src, sizeof(guid_t));
+}
+
+static inline bool guid_is_null(const guid_t *guid)
+{
+	return guid_equal(guid, &guid_null);
+}
+
+static inline bool uuid_equal(const uuid_t *u1, const uuid_t *u2)
+{
+	return memcmp(u1, u2, sizeof(uuid_t)) == 0;
+}
+
+static inline void uuid_copy(uuid_t *dst, const uuid_t *src)
 {
-	return memcmp(&u1, &u2, sizeof(uuid_le));
+	memcpy(dst, src, sizeof(uuid_t));
 }
 
-static inline int uuid_be_cmp(const uuid_be u1, const uuid_be u2)
+static inline bool uuid_is_null(const uuid_t *uuid)
 {
-	return memcmp(&u1, &u2, sizeof(uuid_be));
+	return uuid_equal(uuid, &uuid_null);
 }
 
 void generate_random_uuid(unsigned char uuid[16]);
 
-extern void uuid_le_gen(uuid_le *u);
-extern void uuid_be_gen(uuid_be *u);
+extern void guid_gen(guid_t *u);
+extern void uuid_gen(uuid_t *u);
 
 bool __must_check uuid_is_valid(const char *uuid);
 
-extern const u8 uuid_le_index[16];
-extern const u8 uuid_be_index[16];
+extern const u8 guid_index[16];
+extern const u8 uuid_index[16];
+
+int guid_parse(const char *uuid, guid_t *u);
+int uuid_parse(const char *uuid, uuid_t *u);
+
+/* backwards compatibility, don't use in new code */
+typedef uuid_t uuid_be;
+#define UUID_BE(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+	UUID_INIT(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7)
+#define NULL_UUID_BE 							\
+	UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
+	     0x00, 0x00, 0x00, 0x00)
 
-int uuid_le_to_bin(const char *uuid, uuid_le *u);
-int uuid_be_to_bin(const char *uuid, uuid_be *u);
+#define uuid_le_gen(u)		guid_gen(u)
+#define uuid_be_gen(u)		uuid_gen(u)
+#define uuid_le_to_bin(guid, u)	guid_parse(guid, u)
+#define uuid_be_to_bin(uuid, u)	uuid_parse(uuid, u)
+
+static inline int uuid_le_cmp(const guid_t u1, const guid_t u2)
+{
+	return memcmp(&u1, &u2, sizeof(guid_t));
+}
+
+static inline int uuid_be_cmp(const uuid_t u1, const uuid_t u2)
+{
+	return memcmp(&u1, &u2, sizeof(uuid_t));
+}
 
 #endif
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index edf9b2cad277..f57076b958b7 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -183,7 +183,7 @@ struct virqfd {
 	void			(*thread)(void *, void *);
 	void			*data;
 	struct work_struct	inject;
-	wait_queue_t		wait;
+	wait_queue_entry_t		wait;
 	poll_table		pt;
 	struct work_struct	shutdown;
 	struct virqfd		**pvirqfd;
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 7edfbdb55a99..28b0e965360f 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -44,6 +44,12 @@ int virtqueue_add_inbuf(struct virtqueue *vq,
 			void *data,
 			gfp_t gfp);
 
+int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
+			    struct scatterlist sg[], unsigned int num,
+			    void *data,
+			    void *ctx,
+			    gfp_t gfp);
+
 int virtqueue_add_sgs(struct virtqueue *vq,
 		      struct scatterlist *sgs[],
 		      unsigned int out_sgs,
@@ -59,6 +65,9 @@ bool virtqueue_notify(struct virtqueue *vq);
 
 void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
 
+void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
+			    void **ctx);
+
 void virtqueue_disable_cb(struct virtqueue *vq);
 
 bool virtqueue_enable_cb(struct virtqueue *vq);
@@ -156,9 +165,13 @@ int virtio_device_restore(struct virtio_device *dev);
  * @feature_table_legacy: same as feature_table but when working in legacy mode.
  * @feature_table_size_legacy: number of entries in feature table legacy array.
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
+ * @scan: optional function to call after successful probe; intended
+ *    for virtio-scsi to invoke a scan.
  * @remove: the function to call when a device is removed.
  * @config_changed: optional function to call when the device configuration
  *    changes; may be called in interrupt context.
+ * @freeze: optional function to call during suspend/hibernation.
+ * @restore: optional function to call on resume.
  */
 struct virtio_driver {
 	struct device_driver driver;
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 8355bab175e1..0133d8a12ccd 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -72,7 +72,8 @@ struct virtio_config_ops {
 	void (*reset)(struct virtio_device *vdev);
 	int (*find_vqs)(struct virtio_device *, unsigned nvqs,
 			struct virtqueue *vqs[], vq_callback_t *callbacks[],
-			const char * const names[], struct irq_affinity *desc);
+			const char * const names[], const bool *ctx,
+			struct irq_affinity *desc);
 	void (*del_vqs)(struct virtio_device *);
 	u64 (*get_features)(struct virtio_device *vdev);
 	int (*finalize_features)(struct virtio_device *vdev);
@@ -173,12 +174,32 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 	vq_callback_t *callbacks[] = { c };
 	const char *names[] = { n };
 	struct virtqueue *vq;
-	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL);
+	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL,
+					 NULL);
 	if (err < 0)
 		return ERR_PTR(err);
 	return vq;
 }
 
+static inline
+int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+			struct virtqueue *vqs[], vq_callback_t *callbacks[],
+			const char * const names[],
+			struct irq_affinity *desc)
+{
+	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc);
+}
+
+static inline
+int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
+			struct virtqueue *vqs[], vq_callback_t *callbacks[],
+			const char * const names[], const bool *ctx,
+			struct irq_affinity *desc)
+{
+	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx,
+				      desc);
+}
+
 /**
  * virtio_device_ready - enable vq use in probe function
  * @vdev: the device
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index e8d36938f09a..270cfa81830e 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -71,6 +71,7 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
 					 struct virtio_device *vdev,
 					 bool weak_barriers,
 					 bool may_reduce_num,
+					 bool ctx,
 					 bool (*notify)(struct virtqueue *vq),
 					 void (*callback)(struct virtqueue *vq),
 					 const char *name);
@@ -80,6 +81,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 					struct vring vring,
 					struct virtio_device *vdev,
 					bool weak_barriers,
+					bool ctx,
 					bool (*notify)(struct virtqueue *),
 					void (*callback)(struct virtqueue *),
 					const char *name);
@@ -93,6 +95,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
 				      bool weak_barriers,
+				      bool ctx,
 				      void *pages,
 				      bool (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index d84ae90ccd5c..be3ab2d13adf 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -93,10 +93,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #endif
 #endif
 #ifdef CONFIG_DEBUG_TLBFLUSH
-#ifdef CONFIG_SMP
 		NR_TLB_REMOTE_FLUSH,	/* cpu tried to flush others' tlbs */
 		NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */
-#endif /* CONFIG_SMP */
 		NR_TLB_LOCAL_FLUSH_ALL,
 		NR_TLB_LOCAL_FLUSH_ONE,
 #endif /* CONFIG_DEBUG_TLBFLUSH */
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 0328ce003992..2d92dd002abd 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -6,7 +6,6 @@
 #include <linux/list.h>
 #include <linux/llist.h>
 #include <asm/page.h>		/* pgprot_t */
-#include <asm/pgtable.h>	/* PAGE_KERNEL */
 #include <linux/rbtree.h>
 
 struct vm_area_struct;		/* vma defining user mapping in mm_types.h */
@@ -83,22 +82,14 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
 			const void *caller);
 #ifndef CONFIG_MMU
 extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags);
-#else
-extern void *__vmalloc_node(unsigned long size, unsigned long align,
-			    gfp_t gfp_mask, pgprot_t prot,
-			    int node, const void *caller);
-
-/*
- * We really want to have this inlined due to caller tracking. This
- * function is used by the highlevel vmalloc apis and so we want to track
- * their callers and inlining will achieve that.
- */
-static inline void *__vmalloc_node_flags(unsigned long size,
-					int node, gfp_t flags)
+static inline void *__vmalloc_node_flags_caller(unsigned long size, int node,
+						gfp_t flags, void *caller)
 {
-	return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
-					node, __builtin_return_address(0));
+	return __vmalloc_node_flags(size, node, flags);
 }
+#else
+extern void *__vmalloc_node_flags_caller(unsigned long size,
+					 int node, gfp_t flags, void *caller);
 #endif
 
 extern void vfree(const void *addr);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index db076ca7f11d..b289c96151ee 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -10,38 +10,30 @@
 #include <asm/current.h>
 #include <uapi/linux/wait.h>
 
-typedef struct __wait_queue wait_queue_t;
-typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
-int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
+typedef struct wait_queue_entry wait_queue_entry_t;
 
-/* __wait_queue::flags */
+typedef int (*wait_queue_func_t)(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key);
+int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key);
+
+/* wait_queue_entry::flags */
 #define WQ_FLAG_EXCLUSIVE	0x01
 #define WQ_FLAG_WOKEN		0x02
 
-struct __wait_queue {
+/*
+ * A single wait-queue entry structure:
+ */
+struct wait_queue_entry {
 	unsigned int		flags;
 	void			*private;
 	wait_queue_func_t	func;
-	struct list_head	task_list;
-};
-
-struct wait_bit_key {
-	void			*flags;
-	int			bit_nr;
-#define WAIT_ATOMIC_T_BIT_NR	-1
-	unsigned long		timeout;
+	struct list_head	entry;
 };
 
-struct wait_bit_queue {
-	struct wait_bit_key	key;
-	wait_queue_t		wait;
-};
-
-struct __wait_queue_head {
+struct wait_queue_head {
 	spinlock_t		lock;
-	struct list_head	task_list;
+	struct list_head	head;
 };
-typedef struct __wait_queue_head wait_queue_head_t;
+typedef struct wait_queue_head wait_queue_head_t;
 
 struct task_struct;
 
@@ -49,82 +41,76 @@ struct task_struct;
  * Macros for declaration and initialisaton of the datatypes
  */
 
-#define __WAITQUEUE_INITIALIZER(name, tsk) {				\
-	.private	= tsk,						\
-	.func		= default_wake_function,			\
-	.task_list	= { NULL, NULL } }
+#define __WAITQUEUE_INITIALIZER(name, tsk) {					\
+	.private	= tsk,							\
+	.func		= default_wake_function,				\
+	.entry		= { NULL, NULL } }
 
-#define DECLARE_WAITQUEUE(name, tsk)					\
-	wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
+#define DECLARE_WAITQUEUE(name, tsk)						\
+	struct wait_queue_entry name = __WAITQUEUE_INITIALIZER(name, tsk)
 
-#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {				\
-	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),		\
-	.task_list	= { &(name).task_list, &(name).task_list } }
+#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {					\
+	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),			\
+	.head		= { &(name).head, &(name).head } }
 
 #define DECLARE_WAIT_QUEUE_HEAD(name) \
-	wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
-
-#define __WAIT_BIT_KEY_INITIALIZER(word, bit)				\
-	{ .flags = word, .bit_nr = bit, }
+	struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
 
-#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p)				\
-	{ .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, }
+extern void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *);
 
-extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *);
-
-#define init_waitqueue_head(q)				\
-	do {						\
-		static struct lock_class_key __key;	\
-							\
-		__init_waitqueue_head((q), #q, &__key);	\
+#define init_waitqueue_head(wq_head)						\
+	do {									\
+		static struct lock_class_key __key;				\
+										\
+		__init_waitqueue_head((wq_head), #wq_head, &__key);		\
 	} while (0)
 
 #ifdef CONFIG_LOCKDEP
 # define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
 	({ init_waitqueue_head(&name); name; })
 # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \
-	wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
+	struct wait_queue_head name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
 #else
 # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name)
 #endif
 
-static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
+static inline void init_waitqueue_entry(struct wait_queue_entry *wq_entry, struct task_struct *p)
 {
-	q->flags	= 0;
-	q->private	= p;
-	q->func		= default_wake_function;
+	wq_entry->flags		= 0;
+	wq_entry->private	= p;
+	wq_entry->func		= default_wake_function;
 }
 
 static inline void
-init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
+init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t func)
 {
-	q->flags	= 0;
-	q->private	= NULL;
-	q->func		= func;
+	wq_entry->flags		= 0;
+	wq_entry->private	= NULL;
+	wq_entry->func		= func;
 }
 
 /**
  * waitqueue_active -- locklessly test for waiters on the queue
- * @q: the waitqueue to test for waiters
+ * @wq_head: the waitqueue to test for waiters
  *
  * returns true if the wait list is not empty
  *
  * NOTE: this function is lockless and requires care, incorrect usage _will_
  * lead to sporadic and non-obvious failure.
  *
- * Use either while holding wait_queue_head_t::lock or when used for wakeups
+ * Use either while holding wait_queue_head::lock or when used for wakeups
  * with an extra smp_mb() like:
  *
  *      CPU0 - waker                    CPU1 - waiter
  *
  *                                      for (;;) {
- *      @cond = true;                     prepare_to_wait(&wq, &wait, state);
+ *      @cond = true;                     prepare_to_wait(&wq_head, &wait, state);
  *      smp_mb();                         // smp_mb() from set_current_state()
- *      if (waitqueue_active(wq))         if (@cond)
- *        wake_up(wq);                      break;
+ *      if (waitqueue_active(wq_head))         if (@cond)
+ *        wake_up(wq_head);                      break;
  *                                        schedule();
  *                                      }
- *                                      finish_wait(&wq, &wait);
+ *                                      finish_wait(&wq_head, &wait);
  *
  * Because without the explicit smp_mb() it's possible for the
  * waitqueue_active() load to get hoisted over the @cond store such that we'll
@@ -133,20 +119,20 @@ init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
  * Also note that this 'optimization' trades a spin_lock() for an smp_mb(),
  * which (when the lock is uncontended) are of roughly equal cost.
  */
-static inline int waitqueue_active(wait_queue_head_t *q)
+static inline int waitqueue_active(struct wait_queue_head *wq_head)
 {
-	return !list_empty(&q->task_list);
+	return !list_empty(&wq_head->head);
 }
 
 /**
  * wq_has_sleeper - check if there are any waiting processes
- * @wq: wait queue head
+ * @wq_head: wait queue head
  *
- * Returns true if wq has waiting processes
+ * Returns true if wq_head has waiting processes
  *
  * Please refer to the comment for waitqueue_active.
  */
-static inline bool wq_has_sleeper(wait_queue_head_t *wq)
+static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
 {
 	/*
 	 * We need to be sure we are in sync with the
@@ -156,63 +142,51 @@ static inline bool wq_has_sleeper(wait_queue_head_t *wq)
 	 * waiting side.
 	 */
 	smp_mb();
-	return waitqueue_active(wq);
+	return waitqueue_active(wq_head);
 }
 
-extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
-extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait);
-extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
+extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 
-static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
+static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	list_add(&new->task_list, &head->task_list);
+	list_add(&wq_entry->entry, &wq_head->head);
 }
 
 /*
  * Used for wake-one threads:
  */
 static inline void
-__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+__add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	__add_wait_queue(q, wait);
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	__add_wait_queue(wq_head, wq_entry);
 }
 
-static inline void __add_wait_queue_tail(wait_queue_head_t *head,
-					 wait_queue_t *new)
+static inline void __add_wait_queue_entry_tail(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	list_add_tail(&new->task_list, &head->task_list);
+	list_add_tail(&wq_entry->entry, &wq_head->head);
 }
 
 static inline void
-__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+__add_wait_queue_entry_tail_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	__add_wait_queue_tail(q, wait);
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	__add_wait_queue_entry_tail(wq_head, wq_entry);
 }
 
 static inline void
-__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
+__remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	list_del(&old->task_list);
+	list_del(&wq_entry->entry);
 }
 
-typedef int wait_bit_action_f(struct wait_bit_key *, int mode);
-void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
-void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
-void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
-void __wake_up_bit(wait_queue_head_t *, void *, int);
-int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
-int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
-void wake_up_bit(void *, int);
-void wake_up_atomic_t(atomic_t *);
-int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned);
-int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long);
-int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned);
-int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
-wait_queue_head_t *bit_waitqueue(void *, int);
+void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
+void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
+void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
 
 #define wake_up(x)			__wake_up(x, TASK_NORMAL, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_NORMAL, nr, NULL)
@@ -228,28 +202,28 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 /*
  * Wakeup macros to be used to report events to the targets.
  */
-#define wake_up_poll(x, m)						\
+#define wake_up_poll(x, m)							\
 	__wake_up(x, TASK_NORMAL, 1, (void *) (m))
-#define wake_up_locked_poll(x, m)					\
+#define wake_up_locked_poll(x, m)						\
 	__wake_up_locked_key((x), TASK_NORMAL, (void *) (m))
-#define wake_up_interruptible_poll(x, m)				\
+#define wake_up_interruptible_poll(x, m)					\
 	__wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m))
-#define wake_up_interruptible_sync_poll(x, m)				\
+#define wake_up_interruptible_sync_poll(x, m)					\
 	__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
 
-#define ___wait_cond_timeout(condition)					\
-({									\
-	bool __cond = (condition);					\
-	if (__cond && !__ret)						\
-		__ret = 1;						\
-	__cond || !__ret;						\
+#define ___wait_cond_timeout(condition)						\
+({										\
+	bool __cond = (condition);						\
+	if (__cond && !__ret)							\
+		__ret = 1;							\
+	__cond || !__ret;							\
 })
 
-#define ___wait_is_interruptible(state)					\
-	(!__builtin_constant_p(state) ||				\
-		state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE)	\
+#define ___wait_is_interruptible(state)						\
+	(!__builtin_constant_p(state) ||					\
+		state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE)		\
 
-extern void init_wait_entry(wait_queue_t *__wait, int flags);
+extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);
 
 /*
  * The below macro ___wait_event() has an explicit shadow of the __ret
@@ -263,108 +237,108 @@ extern void init_wait_entry(wait_queue_t *__wait, int flags);
  * otherwise.
  */
 
-#define ___wait_event(wq, condition, state, exclusive, ret, cmd)	\
-({									\
-	__label__ __out;						\
-	wait_queue_t __wait;						\
-	long __ret = ret;	/* explicit shadow */			\
-									\
-	init_wait_entry(&__wait, exclusive ? WQ_FLAG_EXCLUSIVE : 0);	\
-	for (;;) {							\
-		long __int = prepare_to_wait_event(&wq, &__wait, state);\
-									\
-		if (condition)						\
-			break;						\
-									\
-		if (___wait_is_interruptible(state) && __int) {		\
-			__ret = __int;					\
-			goto __out;					\
-		}							\
-									\
-		cmd;							\
-	}								\
-	finish_wait(&wq, &__wait);					\
-__out:	__ret;								\
+#define ___wait_event(wq_head, condition, state, exclusive, ret, cmd)		\
+({										\
+	__label__ __out;							\
+	struct wait_queue_entry __wq_entry;					\
+	long __ret = ret;	/* explicit shadow */				\
+										\
+	init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);	\
+	for (;;) {								\
+		long __int = prepare_to_wait_event(&wq_head, &__wq_entry, state);\
+										\
+		if (condition)							\
+			break;							\
+										\
+		if (___wait_is_interruptible(state) && __int) {			\
+			__ret = __int;						\
+			goto __out;						\
+		}								\
+										\
+		cmd;								\
+	}									\
+	finish_wait(&wq_head, &__wq_entry);					\
+__out:	__ret;									\
 })
 
-#define __wait_event(wq, condition)					\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+#define __wait_event(wq_head, condition)					\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			    schedule())
 
 /**
  * wait_event - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  */
-#define wait_event(wq, condition)					\
-do {									\
-	might_sleep();							\
-	if (condition)							\
-		break;							\
-	__wait_event(wq, condition);					\
+#define wait_event(wq_head, condition)						\
+do {										\
+	might_sleep();								\
+	if (condition)								\
+		break;								\
+	__wait_event(wq_head, condition);					\
 } while (0)
 
-#define __io_wait_event(wq, condition)					\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+#define __io_wait_event(wq_head, condition)					\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			    io_schedule())
 
 /*
  * io_wait_event() -- like wait_event() but with io_schedule()
  */
-#define io_wait_event(wq, condition)					\
-do {									\
-	might_sleep();							\
-	if (condition)							\
-		break;							\
-	__io_wait_event(wq, condition);					\
+#define io_wait_event(wq_head, condition)					\
+do {										\
+	might_sleep();								\
+	if (condition)								\
+		break;								\
+	__io_wait_event(wq_head, condition);					\
 } while (0)
 
-#define __wait_event_freezable(wq, condition)				\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
+#define __wait_event_freezable(wq_head, condition)				\
+	___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 			    schedule(); try_to_freeze())
 
 /**
  * wait_event_freezable - sleep (or freeze) until a condition gets true
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE -- so as not to contribute
  * to system load) until the @condition evaluates to true. The
- * @condition is checked each time the waitqueue @wq is woken up.
+ * @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  */
-#define wait_event_freezable(wq, condition)				\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_freezable(wq, condition);		\
-	__ret;								\
+#define wait_event_freezable(wq_head, condition)				\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_freezable(wq_head, condition);		\
+	__ret;									\
 })
 
-#define __wait_event_timeout(wq, condition, timeout)			\
-	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_UNINTERRUPTIBLE, 0, timeout,			\
+#define __wait_event_timeout(wq_head, condition, timeout)			\
+	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
+		      TASK_UNINTERRUPTIBLE, 0, timeout,				\
 		      __ret = schedule_timeout(__ret))
 
 /**
  * wait_event_timeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout, in jiffies
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -375,83 +349,83 @@ do {									\
  * or the remaining jiffies (at least 1) if the @condition evaluated
  * to %true before the @timeout elapsed.
  */
-#define wait_event_timeout(wq, condition, timeout)			\
-({									\
-	long __ret = timeout;						\
-	might_sleep();							\
-	if (!___wait_cond_timeout(condition))				\
-		__ret = __wait_event_timeout(wq, condition, timeout);	\
-	__ret;								\
+#define wait_event_timeout(wq_head, condition, timeout)				\
+({										\
+	long __ret = timeout;							\
+	might_sleep();								\
+	if (!___wait_cond_timeout(condition))					\
+		__ret = __wait_event_timeout(wq_head, condition, timeout);	\
+	__ret;									\
 })
 
-#define __wait_event_freezable_timeout(wq, condition, timeout)		\
-	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_INTERRUPTIBLE, 0, timeout,			\
+#define __wait_event_freezable_timeout(wq_head, condition, timeout)		\
+	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
+		      TASK_INTERRUPTIBLE, 0, timeout,				\
 		      __ret = schedule_timeout(__ret); try_to_freeze())
 
 /*
  * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid
  * increasing load and is freezable.
  */
-#define wait_event_freezable_timeout(wq, condition, timeout)		\
-({									\
-	long __ret = timeout;						\
-	might_sleep();							\
-	if (!___wait_cond_timeout(condition))				\
-		__ret = __wait_event_freezable_timeout(wq, condition, timeout);	\
-	__ret;								\
+#define wait_event_freezable_timeout(wq_head, condition, timeout)		\
+({										\
+	long __ret = timeout;							\
+	might_sleep();								\
+	if (!___wait_cond_timeout(condition))					\
+		__ret = __wait_event_freezable_timeout(wq_head, condition, timeout); \
+	__ret;									\
 })
 
-#define __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2)		\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 1, 0,	\
+#define __wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2)		\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 1, 0,	\
 			    cmd1; schedule(); cmd2)
 /*
  * Just like wait_event_cmd(), except it sets exclusive flag
  */
-#define wait_event_exclusive_cmd(wq, condition, cmd1, cmd2)		\
-do {									\
-	if (condition)							\
-		break;							\
-	__wait_event_exclusive_cmd(wq, condition, cmd1, cmd2);		\
+#define wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2)		\
+do {										\
+	if (condition)								\
+		break;								\
+	__wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2);		\
 } while (0)
 
-#define __wait_event_cmd(wq, condition, cmd1, cmd2)			\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+#define __wait_event_cmd(wq_head, condition, cmd1, cmd2)			\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			    cmd1; schedule(); cmd2)
 
 /**
  * wait_event_cmd - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @cmd1: the command will be executed before sleep
  * @cmd2: the command will be executed after sleep
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  */
-#define wait_event_cmd(wq, condition, cmd1, cmd2)			\
-do {									\
-	if (condition)							\
-		break;							\
-	__wait_event_cmd(wq, condition, cmd1, cmd2);			\
+#define wait_event_cmd(wq_head, condition, cmd1, cmd2)				\
+do {										\
+	if (condition)								\
+		break;								\
+	__wait_event_cmd(wq_head, condition, cmd1, cmd2);			\
 } while (0)
 
-#define __wait_event_interruptible(wq, condition)			\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
+#define __wait_event_interruptible(wq_head, condition)				\
+	___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 		      schedule())
 
 /**
  * wait_event_interruptible - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -459,29 +433,29 @@ do {									\
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible(wq, condition)				\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_interruptible(wq, condition);	\
-	__ret;								\
+#define wait_event_interruptible(wq_head, condition)				\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_interruptible(wq_head, condition);		\
+	__ret;									\
 })
 
-#define __wait_event_interruptible_timeout(wq, condition, timeout)	\
-	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_INTERRUPTIBLE, 0, timeout,			\
+#define __wait_event_interruptible_timeout(wq_head, condition, timeout)		\
+	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
+		      TASK_INTERRUPTIBLE, 0, timeout,				\
 		      __ret = schedule_timeout(__ret))
 
 /**
  * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout, in jiffies
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -493,50 +467,49 @@ do {									\
  * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was
  * interrupted by a signal.
  */
-#define wait_event_interruptible_timeout(wq, condition, timeout)	\
-({									\
-	long __ret = timeout;						\
-	might_sleep();							\
-	if (!___wait_cond_timeout(condition))				\
-		__ret = __wait_event_interruptible_timeout(wq,		\
-						condition, timeout);	\
-	__ret;								\
+#define wait_event_interruptible_timeout(wq_head, condition, timeout)		\
+({										\
+	long __ret = timeout;							\
+	might_sleep();								\
+	if (!___wait_cond_timeout(condition))					\
+		__ret = __wait_event_interruptible_timeout(wq_head,		\
+						condition, timeout);		\
+	__ret;									\
 })
 
-#define __wait_event_hrtimeout(wq, condition, timeout, state)		\
-({									\
-	int __ret = 0;							\
-	struct hrtimer_sleeper __t;					\
-									\
-	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC,		\
-			      HRTIMER_MODE_REL);			\
-	hrtimer_init_sleeper(&__t, current);				\
-	if ((timeout) != KTIME_MAX)				\
-		hrtimer_start_range_ns(&__t.timer, timeout,		\
-				       current->timer_slack_ns,		\
-				       HRTIMER_MODE_REL);		\
-									\
-	__ret = ___wait_event(wq, condition, state, 0, 0,		\
-		if (!__t.task) {					\
-			__ret = -ETIME;					\
-			break;						\
-		}							\
-		schedule());						\
-									\
-	hrtimer_cancel(&__t.timer);					\
-	destroy_hrtimer_on_stack(&__t.timer);				\
-	__ret;								\
+#define __wait_event_hrtimeout(wq_head, condition, timeout, state)		\
+({										\
+	int __ret = 0;								\
+	struct hrtimer_sleeper __t;						\
+										\
+	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);	\
+	hrtimer_init_sleeper(&__t, current);					\
+	if ((timeout) != KTIME_MAX)						\
+		hrtimer_start_range_ns(&__t.timer, timeout,			\
+				       current->timer_slack_ns,			\
+				       HRTIMER_MODE_REL);			\
+										\
+	__ret = ___wait_event(wq_head, condition, state, 0, 0,			\
+		if (!__t.task) {						\
+			__ret = -ETIME;						\
+			break;							\
+		}								\
+		schedule());							\
+										\
+	hrtimer_cancel(&__t.timer);						\
+	destroy_hrtimer_on_stack(&__t.timer);					\
+	__ret;									\
 })
 
 /**
  * wait_event_hrtimeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout, as a ktime_t
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -544,25 +517,25 @@ do {									\
  * The function returns 0 if @condition became true, or -ETIME if the timeout
  * elapsed.
  */
-#define wait_event_hrtimeout(wq, condition, timeout)			\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_hrtimeout(wq, condition, timeout,	\
-					       TASK_UNINTERRUPTIBLE);	\
-	__ret;								\
+#define wait_event_hrtimeout(wq_head, condition, timeout)			\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_hrtimeout(wq_head, condition, timeout,	\
+					       TASK_UNINTERRUPTIBLE);		\
+	__ret;									\
 })
 
 /**
  * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout, as a ktime_t
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -570,73 +543,73 @@ do {									\
  * The function returns 0 if @condition became true, -ERESTARTSYS if it was
  * interrupted by a signal, or -ETIME if the timeout elapsed.
  */
-#define wait_event_interruptible_hrtimeout(wq, condition, timeout)	\
-({									\
-	long __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_hrtimeout(wq, condition, timeout,	\
-					       TASK_INTERRUPTIBLE);	\
-	__ret;								\
+#define wait_event_interruptible_hrtimeout(wq, condition, timeout)		\
+({										\
+	long __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_hrtimeout(wq, condition, timeout,		\
+					       TASK_INTERRUPTIBLE);		\
+	__ret;									\
 })
 
-#define __wait_event_interruptible_exclusive(wq, condition)		\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,		\
+#define __wait_event_interruptible_exclusive(wq, condition)			\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,			\
 		      schedule())
 
-#define wait_event_interruptible_exclusive(wq, condition)		\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_interruptible_exclusive(wq, condition);\
-	__ret;								\
+#define wait_event_interruptible_exclusive(wq, condition)			\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_interruptible_exclusive(wq, condition);	\
+	__ret;									\
 })
 
-#define __wait_event_killable_exclusive(wq, condition)			\
-	___wait_event(wq, condition, TASK_KILLABLE, 1, 0,		\
+#define __wait_event_killable_exclusive(wq, condition)				\
+	___wait_event(wq, condition, TASK_KILLABLE, 1, 0,			\
 		      schedule())
 
-#define wait_event_killable_exclusive(wq, condition)			\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_killable_exclusive(wq, condition);	\
-	__ret;								\
+#define wait_event_killable_exclusive(wq, condition)				\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_killable_exclusive(wq, condition);		\
+	__ret;									\
 })
 
 
-#define __wait_event_freezable_exclusive(wq, condition)			\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,		\
+#define __wait_event_freezable_exclusive(wq, condition)				\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,			\
 			schedule(); try_to_freeze())
 
-#define wait_event_freezable_exclusive(wq, condition)			\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_freezable_exclusive(wq, condition);\
-	__ret;								\
+#define wait_event_freezable_exclusive(wq, condition)				\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_freezable_exclusive(wq, condition);	\
+	__ret;									\
 })
 
-extern int do_wait_intr(wait_queue_head_t *, wait_queue_t *);
-extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
-
-#define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \
-({									\
-	int __ret;							\
-	DEFINE_WAIT(__wait);						\
-	if (exclusive)							\
-		__wait.flags |= WQ_FLAG_EXCLUSIVE;			\
-	do {								\
-		__ret = fn(&(wq), &__wait);				\
-		if (__ret)						\
-			break;						\
-	} while (!(condition));						\
-	__remove_wait_queue(&(wq), &__wait);				\
-	__set_current_state(TASK_RUNNING);				\
-	__ret;								\
+extern int do_wait_intr(wait_queue_head_t *, wait_queue_entry_t *);
+extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
+
+#define __wait_event_interruptible_locked(wq, condition, exclusive, fn)		\
+({										\
+	int __ret;								\
+	DEFINE_WAIT(__wait);							\
+	if (exclusive)								\
+		__wait.flags |= WQ_FLAG_EXCLUSIVE;				\
+	do {									\
+		__ret = fn(&(wq), &__wait);					\
+		if (__ret)							\
+			break;							\
+	} while (!(condition));							\
+	__remove_wait_queue(&(wq), &__wait);					\
+	__set_current_state(TASK_RUNNING);					\
+	__ret;									\
 })
 
 
@@ -663,8 +636,8 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_locked(wq, condition)			\
-	((condition)							\
+#define wait_event_interruptible_locked(wq, condition)				\
+	((condition)								\
 	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr))
 
 /**
@@ -690,8 +663,8 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_locked_irq(wq, condition)		\
-	((condition)							\
+#define wait_event_interruptible_locked_irq(wq, condition)			\
+	((condition)								\
 	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr_irq))
 
 /**
@@ -721,8 +694,8 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_exclusive_locked(wq, condition)	\
-	((condition)							\
+#define wait_event_interruptible_exclusive_locked(wq, condition)		\
+	((condition)								\
 	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr))
 
 /**
@@ -752,12 +725,12 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_exclusive_locked_irq(wq, condition)	\
-	((condition)							\
+#define wait_event_interruptible_exclusive_locked_irq(wq, condition)		\
+	((condition)								\
 	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr_irq))
 
 
-#define __wait_event_killable(wq, condition)				\
+#define __wait_event_killable(wq, condition)					\
 	___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule())
 
 /**
@@ -775,21 +748,21 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_killable(wq, condition)				\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_killable(wq, condition);		\
-	__ret;								\
+#define wait_event_killable(wq_head, condition)					\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_killable(wq_head, condition);		\
+	__ret;									\
 })
 
 
-#define __wait_event_lock_irq(wq, condition, lock, cmd)			\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
-			    spin_unlock_irq(&lock);			\
-			    cmd;					\
-			    schedule();					\
+#define __wait_event_lock_irq(wq_head, condition, lock, cmd)			\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+			    spin_unlock_irq(&lock);				\
+			    cmd;						\
+			    schedule();						\
 			    spin_lock_irq(&lock))
 
 /**
@@ -797,7 +770,7 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  *			     condition is checked under the lock. This
  *			     is expected to be called with the lock
  *			     taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before cmd
  *	  and schedule() and reacquired afterwards.
@@ -806,7 +779,7 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -815,11 +788,11 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  * dropped before invoking the cmd and going to sleep and is reacquired
  * afterwards.
  */
-#define wait_event_lock_irq_cmd(wq, condition, lock, cmd)		\
-do {									\
-	if (condition)							\
-		break;							\
-	__wait_event_lock_irq(wq, condition, lock, cmd);		\
+#define wait_event_lock_irq_cmd(wq_head, condition, lock, cmd)			\
+do {										\
+	if (condition)								\
+		break;								\
+	__wait_event_lock_irq(wq_head, condition, lock, cmd);			\
 } while (0)
 
 /**
@@ -827,14 +800,14 @@ do {									\
  *			 condition is checked under the lock. This
  *			 is expected to be called with the lock
  *			 taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before schedule()
  *	  and reacquired afterwards.
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -842,26 +815,26 @@ do {									\
  * This is supposed to be called while holding the lock. The lock is
  * dropped before going to sleep and is reacquired afterwards.
  */
-#define wait_event_lock_irq(wq, condition, lock)			\
-do {									\
-	if (condition)							\
-		break;							\
-	__wait_event_lock_irq(wq, condition, lock, );			\
+#define wait_event_lock_irq(wq_head, condition, lock)				\
+do {										\
+	if (condition)								\
+		break;								\
+	__wait_event_lock_irq(wq_head, condition, lock, );			\
 } while (0)
 
 
-#define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd)	\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
-		      spin_unlock_irq(&lock);				\
-		      cmd;						\
-		      schedule();					\
+#define __wait_event_interruptible_lock_irq(wq_head, condition, lock, cmd)	\
+	___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0,		\
+		      spin_unlock_irq(&lock);					\
+		      cmd;							\
+		      schedule();						\
 		      spin_lock_irq(&lock))
 
 /**
  * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
  *		The condition is checked under the lock. This is expected to
  *		be called with the lock taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before cmd and
  *	  schedule() and reacquired afterwards.
@@ -870,7 +843,7 @@ do {									\
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received. The @condition is
- * checked each time the waitqueue @wq is woken up.
+ * checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -882,27 +855,27 @@ do {									\
  * The macro will return -ERESTARTSYS if it was interrupted by a signal
  * and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd)	\
-({									\
-	int __ret = 0;							\
-	if (!(condition))						\
-		__ret = __wait_event_interruptible_lock_irq(wq,		\
-						condition, lock, cmd);	\
-	__ret;								\
+#define wait_event_interruptible_lock_irq_cmd(wq_head, condition, lock, cmd)	\
+({										\
+	int __ret = 0;								\
+	if (!(condition))							\
+		__ret = __wait_event_interruptible_lock_irq(wq_head,		\
+						condition, lock, cmd);		\
+	__ret;									\
 })
 
 /**
  * wait_event_interruptible_lock_irq - sleep until a condition gets true.
  *		The condition is checked under the lock. This is expected
  *		to be called with the lock taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before schedule()
  *	  and reacquired afterwards.
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or signal is received. The @condition is
- * checked each time the waitqueue @wq is woken up.
+ * checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -913,28 +886,28 @@ do {									\
  * The macro will return -ERESTARTSYS if it was interrupted by a signal
  * and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_lock_irq(wq, condition, lock)		\
-({									\
-	int __ret = 0;							\
-	if (!(condition))						\
-		__ret = __wait_event_interruptible_lock_irq(wq,		\
-						condition, lock,);	\
-	__ret;								\
+#define wait_event_interruptible_lock_irq(wq_head, condition, lock)		\
+({										\
+	int __ret = 0;								\
+	if (!(condition))							\
+		__ret = __wait_event_interruptible_lock_irq(wq_head,		\
+						condition, lock,);		\
+	__ret;									\
 })
 
-#define __wait_event_interruptible_lock_irq_timeout(wq, condition,	\
-						    lock, timeout)	\
-	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_INTERRUPTIBLE, 0, timeout,			\
-		      spin_unlock_irq(&lock);				\
-		      __ret = schedule_timeout(__ret);			\
+#define __wait_event_interruptible_lock_irq_timeout(wq_head, condition,		\
+						    lock, timeout)		\
+	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
+		      TASK_INTERRUPTIBLE, 0, timeout,				\
+		      spin_unlock_irq(&lock);					\
+		      __ret = schedule_timeout(__ret);				\
 		      spin_lock_irq(&lock));
 
 /**
  * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets
  *		true or a timeout elapses. The condition is checked under
  *		the lock. This is expected to be called with the lock taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before schedule()
  *	  and reacquired afterwards.
@@ -942,7 +915,7 @@ do {									\
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or signal is received. The @condition is
- * checked each time the waitqueue @wq is woken up.
+ * checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -954,263 +927,42 @@ do {									\
  * was interrupted by a signal, and the remaining jiffies otherwise
  * if the condition evaluated to true before the timeout elapsed.
  */
-#define wait_event_interruptible_lock_irq_timeout(wq, condition, lock,	\
-						  timeout)		\
-({									\
-	long __ret = timeout;						\
-	if (!___wait_cond_timeout(condition))				\
-		__ret = __wait_event_interruptible_lock_irq_timeout(	\
-					wq, condition, lock, timeout);	\
-	__ret;								\
+#define wait_event_interruptible_lock_irq_timeout(wq_head, condition, lock,	\
+						  timeout)			\
+({										\
+	long __ret = timeout;							\
+	if (!___wait_cond_timeout(condition))					\
+		__ret = __wait_event_interruptible_lock_irq_timeout(		\
+					wq_head, condition, lock, timeout);	\
+	__ret;									\
 })
 
 /*
  * Waitqueues which are removed from the waitqueue_head at wakeup time
  */
-void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
-void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
-long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
-void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
-long wait_woken(wait_queue_t *wait, unsigned mode, long timeout);
-int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
-int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
-int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
-
-#define DEFINE_WAIT_FUNC(name, function)				\
-	wait_queue_t name = {						\
-		.private	= current,				\
-		.func		= function,				\
-		.task_list	= LIST_HEAD_INIT((name).task_list),	\
+void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+void prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
+int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
+int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
+
+#define DEFINE_WAIT_FUNC(name, function)					\
+	struct wait_queue_entry name = {					\
+		.private	= current,					\
+		.func		= function,					\
+		.entry		= LIST_HEAD_INIT((name).entry),			\
 	}
 
 #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
 
-#define DEFINE_WAIT_BIT(name, word, bit)				\
-	struct wait_bit_queue name = {					\
-		.key = __WAIT_BIT_KEY_INITIALIZER(word, bit),		\
-		.wait	= {						\
-			.private	= current,			\
-			.func		= wake_bit_function,		\
-			.task_list	=				\
-				LIST_HEAD_INIT((name).wait.task_list),	\
-		},							\
-	}
-
-#define init_wait(wait)							\
-	do {								\
-		(wait)->private = current;				\
-		(wait)->func = autoremove_wake_function;		\
-		INIT_LIST_HEAD(&(wait)->task_list);			\
-		(wait)->flags = 0;					\
+#define init_wait(wait)								\
+	do {									\
+		(wait)->private = current;					\
+		(wait)->func = autoremove_wake_function;			\
+		INIT_LIST_HEAD(&(wait)->entry);					\
+		(wait)->flags = 0;						\
 	} while (0)
 
-
-extern int bit_wait(struct wait_bit_key *, int);
-extern int bit_wait_io(struct wait_bit_key *, int);
-extern int bit_wait_timeout(struct wait_bit_key *, int);
-extern int bit_wait_io_timeout(struct wait_bit_key *, int);
-
-/**
- * wait_on_bit - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that waits on a bit.
- * For instance, if one were to have waiters on a bitflag, one would
- * call wait_on_bit() in threads waiting for the bit to clear.
- * One uses wait_on_bit() where one is waiting for the bit to clear,
- * but has no intention of setting it.
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
- */
-static inline int
-wait_on_bit(unsigned long *word, int bit, unsigned mode)
-{
-	might_sleep();
-	if (!test_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit(word, bit,
-				       bit_wait,
-				       mode);
-}
-
-/**
- * wait_on_bit_io - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared.  This is similar to wait_on_bit(), but calls
- * io_schedule() instead of schedule() for the actual waiting.
- *
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
- */
-static inline int
-wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
-{
-	might_sleep();
-	if (!test_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit(word, bit,
-				       bit_wait_io,
-				       mode);
-}
-
-/**
- * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- * @timeout: timeout, in jiffies
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared. This is similar to wait_on_bit(), except also takes a
- * timeout parameter.
- *
- * Returned value will be zero if the bit was cleared before the
- * @timeout elapsed, or non-zero if the @timeout elapsed or process
- * received a signal and the mode permitted wakeup on that signal.
- */
-static inline int
-wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
-		    unsigned long timeout)
-{
-	might_sleep();
-	if (!test_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit_timeout(word, bit,
-					       bit_wait_timeout,
-					       mode, timeout);
-}
-
-/**
- * wait_on_bit_action - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @action: the function used to sleep, which may take special actions
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared, and allow the waiting action to be specified.
- * This is like wait_on_bit() but allows fine control of how the waiting
- * is done.
- *
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
- */
-static inline int
-wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
-		   unsigned mode)
-{
-	might_sleep();
-	if (!test_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit(word, bit, action, mode);
-}
-
-/**
- * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that waits on a bit
- * when one intends to set it, for instance, trying to lock bitflags.
- * For instance, if one were to have waiters trying to set bitflag
- * and waiting for it to clear before setting it, one would call
- * wait_on_bit() in threads waiting to be able to set the bit.
- * One uses wait_on_bit_lock() where one is waiting for the bit to
- * clear with the intention of setting it, and when done, clearing it.
- *
- * Returns zero if the bit was (eventually) found to be clear and was
- * set.  Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
- */
-static inline int
-wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
-{
-	might_sleep();
-	if (!test_and_set_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
-}
-
-/**
- * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared and then to atomically set it.  This is similar
- * to wait_on_bit(), but calls io_schedule() instead of schedule()
- * for the actual waiting.
- *
- * Returns zero if the bit was (eventually) found to be clear and was
- * set.  Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
- */
-static inline int
-wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode)
-{
-	might_sleep();
-	if (!test_and_set_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
-}
-
-/**
- * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @action: the function used to sleep, which may take special actions
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared and then to set it, and allow the waiting action
- * to be specified.
- * This is like wait_on_bit() but allows fine control of how the waiting
- * is done.
- *
- * Returns zero if the bit was (eventually) found to be clear and was
- * set.  Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
- */
-static inline int
-wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
-			unsigned mode)
-{
-	might_sleep();
-	if (!test_and_set_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit_lock(word, bit, action, mode);
-}
-
-/**
- * wait_on_atomic_t - Wait for an atomic_t to become 0
- * @val: The atomic value being waited on, a kernel virtual address
- * @action: the function used to sleep, which may take special actions
- * @mode: the task state to sleep in
- *
- * Wait for an atomic_t to become 0.  We abuse the bit-wait waitqueue table for
- * the purpose of getting a waitqueue, but we set the key to a bit number
- * outside of the target 'word'.
- */
-static inline
-int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
-{
-	might_sleep();
-	if (atomic_read(val) == 0)
-		return 0;
-	return out_of_line_wait_on_atomic_t(val, action, mode);
-}
-
 #endif /* _LINUX_WAIT_H */
diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
new file mode 100644
index 000000000000..12b26660d7e9
--- /dev/null
+++ b/include/linux/wait_bit.h
@@ -0,0 +1,261 @@
+#ifndef _LINUX_WAIT_BIT_H
+#define _LINUX_WAIT_BIT_H
+
+/*
+ * Linux wait-bit related types and methods:
+ */
+#include <linux/wait.h>
+
+struct wait_bit_key {
+	void			*flags;
+	int			bit_nr;
+#define WAIT_ATOMIC_T_BIT_NR	-1
+	unsigned long		timeout;
+};
+
+struct wait_bit_queue_entry {
+	struct wait_bit_key	key;
+	struct wait_queue_entry	wq_entry;
+};
+
+#define __WAIT_BIT_KEY_INITIALIZER(word, bit)					\
+	{ .flags = word, .bit_nr = bit, }
+
+#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p)					\
+	{ .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, }
+
+typedef int wait_bit_action_f(struct wait_bit_key *key, int mode);
+void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit);
+int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
+int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
+void wake_up_bit(void *word, int bit);
+void wake_up_atomic_t(atomic_t *p);
+int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode);
+int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout);
+int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode);
+int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode);
+struct wait_queue_head *bit_waitqueue(void *word, int bit);
+extern void __init wait_bit_init(void);
+
+int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
+
+#define DEFINE_WAIT_BIT(name, word, bit)					\
+	struct wait_bit_queue_entry name = {					\
+		.key = __WAIT_BIT_KEY_INITIALIZER(word, bit),			\
+		.wq_entry = {							\
+			.private	= current,				\
+			.func		= wake_bit_function,			\
+			.entry		=					\
+				LIST_HEAD_INIT((name).wq_entry.entry),		\
+		},								\
+	}
+
+extern int bit_wait(struct wait_bit_key *key, int bit);
+extern int bit_wait_io(struct wait_bit_key *key, int bit);
+extern int bit_wait_timeout(struct wait_bit_key *key, int bit);
+extern int bit_wait_io_timeout(struct wait_bit_key *key, int bit);
+
+/**
+ * wait_on_bit - wait for a bit to be cleared
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ *
+ * There is a standard hashed waitqueue table for generic use. This
+ * is the part of the hashtable's accessor API that waits on a bit.
+ * For instance, if one were to have waiters on a bitflag, one would
+ * call wait_on_bit() in threads waiting for the bit to clear.
+ * One uses wait_on_bit() where one is waiting for the bit to clear,
+ * but has no intention of setting it.
+ * Returned value will be zero if the bit was cleared, or non-zero
+ * if the process received a signal and the mode permitted wakeup
+ * on that signal.
+ */
+static inline int
+wait_on_bit(unsigned long *word, int bit, unsigned mode)
+{
+	might_sleep();
+	if (!test_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit(word, bit,
+				       bit_wait,
+				       mode);
+}
+
+/**
+ * wait_on_bit_io - wait for a bit to be cleared
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared.  This is similar to wait_on_bit(), but calls
+ * io_schedule() instead of schedule() for the actual waiting.
+ *
+ * Returned value will be zero if the bit was cleared, or non-zero
+ * if the process received a signal and the mode permitted wakeup
+ * on that signal.
+ */
+static inline int
+wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
+{
+	might_sleep();
+	if (!test_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit(word, bit,
+				       bit_wait_io,
+				       mode);
+}
+
+/**
+ * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ * @timeout: timeout, in jiffies
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared. This is similar to wait_on_bit(), except also takes a
+ * timeout parameter.
+ *
+ * Returned value will be zero if the bit was cleared before the
+ * @timeout elapsed, or non-zero if the @timeout elapsed or process
+ * received a signal and the mode permitted wakeup on that signal.
+ */
+static inline int
+wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
+		    unsigned long timeout)
+{
+	might_sleep();
+	if (!test_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit_timeout(word, bit,
+					       bit_wait_timeout,
+					       mode, timeout);
+}
+
+/**
+ * wait_on_bit_action - wait for a bit to be cleared
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @action: the function used to sleep, which may take special actions
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared, and allow the waiting action to be specified.
+ * This is like wait_on_bit() but allows fine control of how the waiting
+ * is done.
+ *
+ * Returned value will be zero if the bit was cleared, or non-zero
+ * if the process received a signal and the mode permitted wakeup
+ * on that signal.
+ */
+static inline int
+wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
+		   unsigned mode)
+{
+	might_sleep();
+	if (!test_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit(word, bit, action, mode);
+}
+
+/**
+ * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ *
+ * There is a standard hashed waitqueue table for generic use. This
+ * is the part of the hashtable's accessor API that waits on a bit
+ * when one intends to set it, for instance, trying to lock bitflags.
+ * For instance, if one were to have waiters trying to set bitflag
+ * and waiting for it to clear before setting it, one would call
+ * wait_on_bit() in threads waiting to be able to set the bit.
+ * One uses wait_on_bit_lock() where one is waiting for the bit to
+ * clear with the intention of setting it, and when done, clearing it.
+ *
+ * Returns zero if the bit was (eventually) found to be clear and was
+ * set.  Returns non-zero if a signal was delivered to the process and
+ * the @mode allows that signal to wake the process.
+ */
+static inline int
+wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
+{
+	might_sleep();
+	if (!test_and_set_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
+}
+
+/**
+ * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared and then to atomically set it.  This is similar
+ * to wait_on_bit(), but calls io_schedule() instead of schedule()
+ * for the actual waiting.
+ *
+ * Returns zero if the bit was (eventually) found to be clear and was
+ * set.  Returns non-zero if a signal was delivered to the process and
+ * the @mode allows that signal to wake the process.
+ */
+static inline int
+wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode)
+{
+	might_sleep();
+	if (!test_and_set_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
+}
+
+/**
+ * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @action: the function used to sleep, which may take special actions
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared and then to set it, and allow the waiting action
+ * to be specified.
+ * This is like wait_on_bit() but allows fine control of how the waiting
+ * is done.
+ *
+ * Returns zero if the bit was (eventually) found to be clear and was
+ * set.  Returns non-zero if a signal was delivered to the process and
+ * the @mode allows that signal to wake the process.
+ */
+static inline int
+wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
+			unsigned mode)
+{
+	might_sleep();
+	if (!test_and_set_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit_lock(word, bit, action, mode);
+}
+
+/**
+ * wait_on_atomic_t - Wait for an atomic_t to become 0
+ * @val: The atomic value being waited on, a kernel virtual address
+ * @action: the function used to sleep, which may take special actions
+ * @mode: the task state to sleep in
+ *
+ * Wait for an atomic_t to become 0.  We abuse the bit-wait waitqueue table for
+ * the purpose of getting a waitqueue, but we set the key to a bit number
+ * outside of the target 'word'.
+ */
+static inline
+int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
+{
+	might_sleep();
+	if (atomic_read(val) == 0)
+		return 0;
+	return out_of_line_wait_on_atomic_t(val, action, mode);
+}
+
+#endif /* _LINUX_WAIT_BIT_H */
diff --git a/include/media/cec-notifier.h b/include/media/cec-notifier.h
index eb50ce54b759..298f996969df 100644
--- a/include/media/cec-notifier.h
+++ b/include/media/cec-notifier.h
@@ -29,7 +29,7 @@ struct edid;
 struct cec_adapter;
 struct cec_notifier;
 
-#ifdef CONFIG_MEDIA_CEC_NOTIFIER
+#if IS_REACHABLE(CONFIG_CEC_CORE) && IS_ENABLED(CONFIG_CEC_NOTIFIER)
 
 /**
  * cec_notifier_get - find or create a new cec_notifier for the given device.
@@ -106,6 +106,16 @@ static inline void cec_notifier_set_phys_addr_from_edid(struct cec_notifier *n,
 {
 }
 
+static inline void cec_notifier_register(struct cec_notifier *n,
+			 struct cec_adapter *adap,
+			 void (*callback)(struct cec_adapter *adap, u16 pa))
+{
+}
+
+static inline void cec_notifier_unregister(struct cec_notifier *n)
+{
+}
+
 #endif
 
 #endif
diff --git a/include/media/cec.h b/include/media/cec.h
index b8eb895731d5..201f060978da 100644
--- a/include/media/cec.h
+++ b/include/media/cec.h
@@ -173,7 +173,7 @@ struct cec_adapter {
 	bool passthrough;
 	struct cec_log_addrs log_addrs;
 
-#ifdef CONFIG_MEDIA_CEC_NOTIFIER
+#ifdef CONFIG_CEC_NOTIFIER
 	struct cec_notifier *notifier;
 #endif
 
@@ -206,7 +206,7 @@ static inline bool cec_is_sink(const struct cec_adapter *adap)
 #define cec_phys_addr_exp(pa) \
 	((pa) >> 12), ((pa) >> 8) & 0xf, ((pa) >> 4) & 0xf, (pa) & 0xf
 
-#if IS_ENABLED(CONFIG_CEC_CORE)
+#if IS_REACHABLE(CONFIG_CEC_CORE)
 struct cec_adapter *cec_allocate_adapter(const struct cec_adap_ops *ops,
 		void *priv, const char *name, u32 caps, u8 available_las);
 int cec_register_adapter(struct cec_adapter *adap, struct device *parent);
@@ -300,7 +300,7 @@ u16 cec_phys_addr_for_input(u16 phys_addr, u8 input);
  */
 int cec_phys_addr_validate(u16 phys_addr, u16 *parent, u16 *port);
 
-#ifdef CONFIG_MEDIA_CEC_NOTIFIER
+#ifdef CONFIG_CEC_NOTIFIER
 void cec_register_cec_notifier(struct cec_adapter *adap,
 			       struct cec_notifier *notifier);
 #endif
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index fd60eccb59a6..75e612a45824 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -62,7 +62,7 @@ struct unix_sock {
 #define UNIX_GC_CANDIDATE	0
 #define UNIX_GC_MAYBE_CYCLE	1
 	struct socket_wq	peer_wq;
-	wait_queue_t		peer_wake;
+	wait_queue_entry_t		peer_wake;
 };
 
 static inline struct unix_sock *unix_sk(const struct sock *sk)
diff --git a/include/net/dst.h b/include/net/dst.h
index 049af33da3b6..cfc043784166 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -107,10 +107,16 @@ struct dst_entry {
 	};
 };
 
+struct dst_metrics {
+	u32		metrics[RTAX_MAX];
+	atomic_t	refcnt;
+};
+extern const struct dst_metrics dst_default_metrics;
+
 u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
-extern const u32 dst_default_metrics[];
 
 #define DST_METRICS_READ_ONLY		0x1UL
+#define DST_METRICS_REFCOUNTED		0x2UL
 #define DST_METRICS_FLAGS		0x3UL
 #define __DST_METRICS_PTR(Y)	\
 	((u32 *)((Y) & ~DST_METRICS_FLAGS))
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 6692c5758b33..f7f6aa789c61 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -114,11 +114,11 @@ struct fib_info {
 	__be32			fib_prefsrc;
 	u32			fib_tb_id;
 	u32			fib_priority;
-	u32			*fib_metrics;
-#define fib_mtu fib_metrics[RTAX_MTU-1]
-#define fib_window fib_metrics[RTAX_WINDOW-1]
-#define fib_rtt fib_metrics[RTAX_RTT-1]
-#define fib_advmss fib_metrics[RTAX_ADVMSS-1]
+	struct dst_metrics	*fib_metrics;
+#define fib_mtu fib_metrics->metrics[RTAX_MTU-1]
+#define fib_window fib_metrics->metrics[RTAX_WINDOW-1]
+#define fib_rtt fib_metrics->metrics[RTAX_RTT-1]
+#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1]
 	int			fib_nhs;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int			fib_weight;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index dbf0abba33b8..3e505bbff8ca 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1007,6 +1007,7 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
  */
 extern const struct proto_ops inet6_stream_ops;
 extern const struct proto_ops inet6_dgram_ops;
+extern const struct proto_ops inet6_sockraw_ops;
 
 struct group_source_req;
 struct group_filter;
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index e04fa7691e5d..c519bb5b5bb8 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -9,6 +9,7 @@
 
 #ifndef _NF_CONNTRACK_HELPER_H
 #define _NF_CONNTRACK_HELPER_H
+#include <linux/refcount.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_expect.h>
@@ -26,6 +27,7 @@ struct nf_conntrack_helper {
 	struct hlist_node hnode;	/* Internal use. */
 
 	char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */
+	refcount_t refcnt;
 	struct module *me;		/* pointer to self */
 	const struct nf_conntrack_expect_policy *expect_policy;
 
@@ -79,6 +81,8 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name,
 struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name,
 							       u16 l3num,
 							       u8 protonum);
+void nf_conntrack_helper_put(struct nf_conntrack_helper *helper);
+
 void nf_ct_helper_init(struct nf_conntrack_helper *helper,
 		       u16 l3num, u16 protonum, const char *name,
 		       u16 default_port, u16 spec_port, u32 id,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 028faec8fc27..8a8bab8d7b15 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -176,7 +176,7 @@ struct nft_data_desc {
 int nft_data_init(const struct nft_ctx *ctx,
 		  struct nft_data *data, unsigned int size,
 		  struct nft_data_desc *desc, const struct nlattr *nla);
-void nft_data_uninit(const struct nft_data *data, enum nft_data_types type);
+void nft_data_release(const struct nft_data *data, enum nft_data_types type);
 int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
 		  enum nft_data_types type, unsigned int len);
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 66349e49d468..f33e3d134e0b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -995,7 +995,7 @@ struct smc_hashinfo;
 struct module;
 
 /*
- * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
+ * caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes
  * un-modified. Special care is taken when initializing object to zero.
  */
 static inline void sk_prot_clear_nulls(struct sock *sk, int size)
diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
index f31fb6331a53..3248beaf16b0 100644
--- a/include/net/tc_act/tc_csum.h
+++ b/include/net/tc_act/tc_csum.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <net/act_api.h>
+#include <linux/tc_act/tc_csum.h>
 
 struct tcf_csum {
 	struct tc_action common;
@@ -11,4 +12,18 @@ struct tcf_csum {
 };
 #define to_tcf_csum(a) ((struct tcf_csum *)a)
 
+static inline bool is_tcf_csum(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (a->ops && a->ops->type == TCA_ACT_CSUM)
+		return true;
+#endif
+	return false;
+}
+
+static inline u32 tcf_csum_update_flags(const struct tc_action *a)
+{
+	return to_tcf_csum(a)->update_flags;
+}
+
 #endif /* __NET_TC_CSUM_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 38a7427ae902..be6223c586fa 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -924,7 +924,7 @@ struct tcp_congestion_ops {
 	void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
 	/* call when ack arrives (optional) */
 	void (*in_ack_event)(struct sock *sk, u32 flags);
-	/* new value of cwnd after loss (optional) */
+	/* new value of cwnd after loss (required) */
 	u32  (*undo_cwnd)(struct sock *sk);
 	/* hook for packet ack accounting (optional) */
 	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
diff --git a/include/net/wext.h b/include/net/wext.h
index 345911965dbb..454ff763eeba 100644
--- a/include/net/wext.h
+++ b/include/net/wext.h
@@ -6,7 +6,7 @@
 struct net;
 
 #ifdef CONFIG_WEXT_CORE
-int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
 		      void __user *arg);
 int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
 			     unsigned long arg);
@@ -14,7 +14,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
 struct iw_statistics *get_wireless_stats(struct net_device *dev);
 int call_commit_handler(struct net_device *dev);
 #else
-static inline int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+static inline int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
 				    void __user *arg)
 {
 	return -EINVAL;
diff --git a/include/net/x25.h b/include/net/x25.h
index c383aa4edbf0..6d30a01d281d 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -298,10 +298,10 @@ void x25_check_rbuf(struct sock *);
 
 /* sysctl_net_x25.c */
 #ifdef CONFIG_SYSCTL
-void x25_register_sysctl(void);
+int x25_register_sysctl(void);
 void x25_unregister_sysctl(void);
 #else
-static inline void x25_register_sysctl(void) {};
+static inline int x25_register_sysctl(void) { return 0; };
 static inline void x25_unregister_sysctl(void) {};
 #endif /* CONFIG_SYSCTL */
 
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 6793a30c66b1..62f5a259e597 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -979,10 +979,6 @@ struct xfrm_dst {
 	struct flow_cache_object flo;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	int num_pols, num_xfrms;
-#ifdef CONFIG_XFRM_SUB_POLICY
-	struct flowi *origin;
-	struct xfrm_selector *partner;
-#endif
 	u32 xfrm_genid;
 	u32 policy_genid;
 	u32 route_mtu_cached;
@@ -998,12 +994,6 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
 	dst_release(xdst->route);
 	if (likely(xdst->u.dst.xfrm))
 		xfrm_state_put(xdst->u.dst.xfrm);
-#ifdef CONFIG_XFRM_SUB_POLICY
-	kfree(xdst->origin);
-	xdst->origin = NULL;
-	kfree(xdst->partner);
-	xdst->partner = NULL;
-#endif
 }
 #endif
 
@@ -1860,8 +1850,9 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
 }
 #endif
 
-#ifdef CONFIG_XFRM_OFFLOAD
 void __net_init xfrm_dev_init(void);
+
+#ifdef CONFIG_XFRM_OFFLOAD
 int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features);
 int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 		       struct xfrm_user_offload *xuo);
@@ -1887,10 +1878,6 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
 	}
 }
 #else
-static inline void __net_init xfrm_dev_init(void)
-{
-}
-
 static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
 {
 	return 0;
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index f5f70e345318..355b81f4242d 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -158,7 +158,6 @@ enum sa_path_rec_type {
 };
 
 struct sa_path_rec_ib {
-	__be64       service_id;
 	__be16       dlid;
 	__be16       slid;
 	u8           raw_traffic;
@@ -174,7 +173,6 @@ struct sa_path_rec_roce {
 };
 
 struct sa_path_rec_opa {
-	__be64       service_id;
 	__be32       dlid;
 	__be32       slid;
 	u8           raw_traffic;
@@ -189,6 +187,7 @@ struct sa_path_rec_opa {
 struct sa_path_rec {
 	union ib_gid dgid;
 	union ib_gid sgid;
+	__be64       service_id;
 	/* reserved */
 	__be32       flow_label;
 	u8           hop_limit;
@@ -262,7 +261,7 @@ static inline void path_conv_opa_to_ib(struct sa_path_rec *ib,
 		ib->ib.dlid	= htons(ntohl(opa->opa.dlid));
 		ib->ib.slid	= htons(ntohl(opa->opa.slid));
 	}
-	ib->ib.service_id	= opa->opa.service_id;
+	ib->service_id		= opa->service_id;
 	ib->ib.raw_traffic	= opa->opa.raw_traffic;
 }
 
@@ -281,7 +280,7 @@ static inline void path_conv_ib_to_opa(struct sa_path_rec *opa,
 	}
 	opa->opa.slid		= slid;
 	opa->opa.dlid		= dlid;
-	opa->opa.service_id	= ib->ib.service_id;
+	opa->service_id		= ib->service_id;
 	opa->opa.raw_traffic	= ib->ib.raw_traffic;
 }
 
@@ -591,15 +590,6 @@ static inline bool sa_path_is_roce(struct sa_path_rec *rec)
 		(rec->rec_type == SA_PATH_REC_TYPE_ROCE_V2));
 }
 
-static inline void sa_path_set_service_id(struct sa_path_rec *rec,
-					  __be64 service_id)
-{
-	if (rec->rec_type == SA_PATH_REC_TYPE_IB)
-		rec->ib.service_id = service_id;
-	else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
-		rec->opa.service_id = service_id;
-}
-
 static inline void sa_path_set_slid(struct sa_path_rec *rec, __be32 slid)
 {
 	if (rec->rec_type == SA_PATH_REC_TYPE_IB)
@@ -625,15 +615,6 @@ static inline void sa_path_set_raw_traffic(struct sa_path_rec *rec,
 		rec->opa.raw_traffic = raw_traffic;
 }
 
-static inline __be64 sa_path_get_service_id(struct sa_path_rec *rec)
-{
-	if (rec->rec_type == SA_PATH_REC_TYPE_IB)
-		return rec->ib.service_id;
-	else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
-		return rec->opa.service_id;
-	return 0;
-}
-
 static inline __be32 sa_path_get_slid(struct sa_path_rec *rec)
 {
 	if (rec->rec_type == SA_PATH_REC_TYPE_IB)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index f0cb4906478a..ba8314ec5768 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -62,6 +62,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/uaccess.h>
 #include <linux/cgroup_rdma.h>
+#include <uapi/rdma/ib_user_verbs.h>
 
 extern struct workqueue_struct *ib_wq;
 extern struct workqueue_struct *ib_comp_wq;
@@ -1889,8 +1890,6 @@ enum ib_mad_result {
 	IB_MAD_RESULT_CONSUMED = 1 << 2  /* Packet consumed: stop processing */
 };
 
-#define IB_DEVICE_NAME_MAX 64
-
 struct ib_port_cache {
 	struct ib_pkey_cache  *pkey;
 	struct ib_gid_table   *gid;
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 585266144329..348c102cb5f6 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -10,9 +10,6 @@ struct ibnl_client_cbs {
 	struct module *module;
 };
 
-int ibnl_init(void);
-void ibnl_cleanup(void);
-
 /**
  * Add a a client to the list of IB netlink exporters.
  * @index: Index of the added client
@@ -77,11 +74,4 @@ int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
 int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
 			unsigned int group, gfp_t flags);
 
-/**
- * Check if there are any listeners to the netlink group
- * @group: the netlink group ID
- * Returns 0 on success or a negative for no listeners.
- */
-int ibnl_chk_listeners(unsigned int group);
-
 #endif /* _RDMA_NETLINK_H */
diff --git a/include/scsi/fc/Kbuild b/include/scsi/fc/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/include/scsi/fc/Kbuild
+++ /dev/null
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index a09cca829082..a29d3086eb56 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -157,7 +157,7 @@ struct osd_request {
 
 	osd_req_done_fn *async_done;
 	void *async_private;
-	int async_error;
+	blk_status_t async_error;
 	int req_errors;
 };
 
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index b379f93a2c48..da9bf2bcdf1a 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -166,6 +166,7 @@ extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
 extern void scsi_kunmap_atomic_sg(void *virt);
 
 extern int scsi_init_io(struct scsi_cmnd *cmd);
+extern void scsi_initialize_rq(struct request *rq);
 
 extern int scsi_dma_map(struct scsi_cmnd *cmd);
 extern void scsi_dma_unmap(struct scsi_cmnd *cmd);
diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
index 6ba66e01f6df..ce78ec8e367d 100644
--- a/include/scsi/scsi_proto.h
+++ b/include/scsi/scsi_proto.h
@@ -112,6 +112,7 @@
 #define WRITE_16              0x8a
 #define READ_ATTRIBUTE        0x8c
 #define WRITE_ATTRIBUTE	      0x8d
+#define WRITE_VERIFY_16	      0x8e
 #define VERIFY_16	      0x8f
 #define SYNCHRONIZE_CACHE_16  0x91
 #define WRITE_SAME_16	      0x93
diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h
index f0c76f9dc285..e0afa445ee4e 100644
--- a/include/scsi/scsi_request.h
+++ b/include/scsi/scsi_request.h
@@ -27,6 +27,6 @@ static inline void scsi_req_free_cmd(struct scsi_request *req)
 		kfree(req->cmd);
 }
 
-void scsi_req_init(struct request *);
+void scsi_req_init(struct scsi_request *req);
 
 #endif /* _SCSI_SCSI_REQUEST_H */
diff --git a/include/soc/fsl/qe/immap_qe.h b/include/soc/fsl/qe/immap_qe.h
index c76ef30b05ba..7baaabd5ec2c 100644
--- a/include/soc/fsl/qe/immap_qe.h
+++ b/include/soc/fsl/qe/immap_qe.h
@@ -464,25 +464,6 @@ struct qe_immap {
 } __attribute__ ((packed));
 
 extern struct qe_immap __iomem *qe_immr;
-extern phys_addr_t get_qe_base(void);
-
-/*
- * Returns the offset within the QE address space of the given pointer.
- *
- * Note that the QE does not support 36-bit physical addresses, so if
- * get_qe_base() returns a number above 4GB, the caller will probably fail.
- */
-static inline phys_addr_t immrbar_virt_to_phys(void *address)
-{
-	void *q = (void *)qe_immr;
-
-	/* Is it a MURAM address? */
-	if ((address >= q) && (address < (q + QE_IMMAP_SIZE)))
-		return get_qe_base() + (address - q);
-
-	/* It's an address returned by kmalloc */
-	return virt_to_phys(address);
-}
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_IMMAP_QE_H */
diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h
index 70339d7958c0..0cd4c11479b1 100644
--- a/include/soc/fsl/qe/qe.h
+++ b/include/soc/fsl/qe/qe.h
@@ -243,6 +243,7 @@ static inline int qe_alive_during_sleep(void)
 #define qe_muram_free cpm_muram_free
 #define qe_muram_addr cpm_muram_addr
 #define qe_muram_offset cpm_muram_offset
+#define qe_muram_dma cpm_muram_dma
 
 #define qe_setbits32(_addr, _v) iowrite32be(ioread32be(_addr) |  (_v), (_addr))
 #define qe_clrbits32(_addr, _v) iowrite32be(ioread32be(_addr) & ~(_v), (_addr))
diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index 275581d483dd..5f17fb770477 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -557,6 +557,7 @@ struct iscsi_conn {
 #define LOGIN_FLAGS_READ_ACTIVE		1
 #define LOGIN_FLAGS_CLOSED		2
 #define LOGIN_FLAGS_READY		4
+#define LOGIN_FLAGS_INITIAL_PDU		8
 	unsigned long		login_flags;
 	struct delayed_work	login_work;
 	struct delayed_work	login_cleanup_work;
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 1b0f447ce850..e475531565fd 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -10,6 +10,7 @@
  * backend module.
  */
 #define TRANSPORT_FLAG_PASSTHROUGH_ALUA		0x2
+#define TRANSPORT_FLAG_PASSTHROUGH_PGR          0x4
 
 struct request_queue;
 struct scatterlist;
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index ccfad0e9c2cd..0c1dce2ac6f0 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -664,6 +664,7 @@ struct se_dev_attrib {
 	int		pi_prot_format;
 	enum target_prot_type pi_prot_type;
 	enum target_prot_type hw_pi_prot_type;
+	int		pi_prot_verify;
 	int		enforce_pr_isids;
 	int		force_pr_aptpl;
 	int		is_nonrot;
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index a3c3cab643a9..e37973526153 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -12,6 +12,7 @@ struct btrfs_root;
 struct btrfs_fs_info;
 struct btrfs_inode;
 struct extent_map;
+struct btrfs_file_extent_item;
 struct btrfs_ordered_extent;
 struct btrfs_delayed_ref_node;
 struct btrfs_delayed_tree_ref;
@@ -24,6 +25,7 @@ struct extent_buffer;
 struct btrfs_work;
 struct __btrfs_workqueue;
 struct btrfs_qgroup_extent_record;
+struct btrfs_qgroup;
 
 #define show_ref_type(type)						\
 	__print_symbolic(type,						\
@@ -54,6 +56,12 @@ struct btrfs_qgroup_extent_record;
 	      (obj >= BTRFS_ROOT_TREE_OBJECTID &&			\
 	       obj <= BTRFS_QUOTA_TREE_OBJECTID)) ? __show_root_type(obj) : "-"
 
+#define show_fi_type(type)						\
+	__print_symbolic(type,						\
+		 { BTRFS_FILE_EXTENT_INLINE,	"INLINE" },		\
+		 { BTRFS_FILE_EXTENT_REG,	"REG"	 },		\
+		 { BTRFS_FILE_EXTENT_PREALLOC,	"PREALLOC"})
+
 #define BTRFS_GROUP_FLAGS	\
 	{ BTRFS_BLOCK_GROUP_DATA,	"DATA"},	\
 	{ BTRFS_BLOCK_GROUP_SYSTEM,	"SYSTEM"},	\
@@ -213,7 +221,7 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		__entry->block_start	= map->block_start;
 		__entry->block_len	= map->block_len;
 		__entry->flags		= map->flags;
-		__entry->refs		= atomic_read(&map->refs);
+		__entry->refs		= refcount_read(&map->refs);
 		__entry->compress_type	= map->compress_type;
 	),
 
@@ -232,6 +240,138 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		  __entry->refs, __entry->compress_type)
 );
 
+/* file extent item */
+DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular,
+
+	TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
+		 struct btrfs_file_extent_item *fi, u64 start),
+
+	TP_ARGS(bi, l, fi, start),
+
+	TP_STRUCT__entry_btrfs(
+		__field(	u64,	root_obj	)
+		__field(	u64,	ino		)
+		__field(	loff_t,	isize		)
+		__field(	u64,	disk_isize	)
+		__field(	u64,	num_bytes	)
+		__field(	u64,	ram_bytes	)
+		__field(	u64,	disk_bytenr	)
+		__field(	u64,	disk_num_bytes	)
+		__field(	u64,	extent_offset	)
+		__field(	u8,	extent_type	)
+		__field(	u8,	compression	)
+		__field(	u64,	extent_start	)
+		__field(	u64,	extent_end	)
+	),
+
+	TP_fast_assign_btrfs(bi->root->fs_info,
+		__entry->root_obj	= bi->root->objectid;
+		__entry->ino		= btrfs_ino(bi);
+		__entry->isize		= bi->vfs_inode.i_size;
+		__entry->disk_isize	= bi->disk_i_size;
+		__entry->num_bytes	= btrfs_file_extent_num_bytes(l, fi);
+		__entry->ram_bytes	= btrfs_file_extent_ram_bytes(l, fi);
+		__entry->disk_bytenr	= btrfs_file_extent_disk_bytenr(l, fi);
+		__entry->disk_num_bytes	= btrfs_file_extent_disk_num_bytes(l, fi);
+		__entry->extent_offset	= btrfs_file_extent_offset(l, fi);
+		__entry->extent_type	= btrfs_file_extent_type(l, fi);
+		__entry->compression	= btrfs_file_extent_compression(l, fi);
+		__entry->extent_start	= start;
+		__entry->extent_end	= (start + __entry->num_bytes);
+	),
+
+	TP_printk_btrfs(
+		"root=%llu(%s) inode=%llu size=%llu disk_isize=%llu "
+		"file extent range=[%llu %llu] "
+		"(num_bytes=%llu ram_bytes=%llu disk_bytenr=%llu "
+		"disk_num_bytes=%llu extent_offset=%llu type=%s "
+		"compression=%u",
+		show_root_type(__entry->root_obj), __entry->ino,
+		__entry->isize,
+		__entry->disk_isize, __entry->extent_start,
+		__entry->extent_end, __entry->num_bytes, __entry->ram_bytes,
+		__entry->disk_bytenr, __entry->disk_num_bytes,
+		__entry->extent_offset, show_fi_type(__entry->extent_type),
+		__entry->compression)
+);
+
+DECLARE_EVENT_CLASS(
+	btrfs__file_extent_item_inline,
+
+	TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
+		 struct btrfs_file_extent_item *fi, int slot, u64 start),
+
+	TP_ARGS(bi, l, fi, slot,  start),
+
+	TP_STRUCT__entry_btrfs(
+		__field(	u64,	root_obj	)
+		__field(	u64,	ino		)
+		__field(	loff_t,	isize		)
+		__field(	u64,	disk_isize	)
+		__field(	u8,	extent_type	)
+		__field(	u8,	compression	)
+		__field(	u64,	extent_start	)
+		__field(	u64,	extent_end	)
+	),
+
+	TP_fast_assign_btrfs(
+		bi->root->fs_info,
+		__entry->root_obj	= bi->root->objectid;
+		__entry->ino		= btrfs_ino(bi);
+		__entry->isize		= bi->vfs_inode.i_size;
+		__entry->disk_isize	= bi->disk_i_size;
+		__entry->extent_type	= btrfs_file_extent_type(l, fi);
+		__entry->compression	= btrfs_file_extent_compression(l, fi);
+		__entry->extent_start	= start;
+		__entry->extent_end	= (start + btrfs_file_extent_inline_len(l, slot, fi));
+	),
+
+	TP_printk_btrfs(
+		"root=%llu(%s) inode=%llu size=%llu disk_isize=%llu "
+		"file extent range=[%llu %llu] "
+		"extent_type=%s compression=%u",
+		show_root_type(__entry->root_obj), __entry->ino, __entry->isize,
+		__entry->disk_isize, __entry->extent_start,
+		__entry->extent_end, show_fi_type(__entry->extent_type),
+		__entry->compression)
+);
+
+DEFINE_EVENT(
+	btrfs__file_extent_item_regular, btrfs_get_extent_show_fi_regular,
+
+	TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
+		 struct btrfs_file_extent_item *fi, u64 start),
+
+	TP_ARGS(bi, l, fi, start)
+);
+
+DEFINE_EVENT(
+	btrfs__file_extent_item_regular, btrfs_truncate_show_fi_regular,
+
+	TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
+		 struct btrfs_file_extent_item *fi, u64 start),
+
+	TP_ARGS(bi, l, fi, start)
+);
+
+DEFINE_EVENT(
+	btrfs__file_extent_item_inline, btrfs_get_extent_show_fi_inline,
+
+	TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
+		 struct btrfs_file_extent_item *fi, int slot, u64 start),
+
+	TP_ARGS(bi, l, fi, slot, start)
+);
+
+DEFINE_EVENT(
+	btrfs__file_extent_item_inline, btrfs_truncate_show_fi_inline,
+
+	TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
+		 struct btrfs_file_extent_item *fi, int slot, u64 start),
+
+	TP_ARGS(bi, l, fi, slot, start)
+);
+
 #define show_ordered_flags(flags)					   \
 	__print_flags(flags, "|",					   \
 		{ (1 << BTRFS_ORDERED_IO_DONE), 	"IO_DONE" 	}, \
@@ -275,7 +415,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
 		__entry->bytes_left	= ordered->bytes_left;
 		__entry->flags		= ordered->flags;
 		__entry->compress_type	= ordered->compress_type;
-		__entry->refs		= atomic_read(&ordered->refs);
+		__entry->refs		= refcount_read(&ordered->refs);
 		__entry->root_objectid	=
 				BTRFS_I(inode)->root->root_key.objectid;
 		__entry->truncated_len	= ordered->truncated_len;
@@ -1475,6 +1615,49 @@ TRACE_EVENT(qgroup_update_counters,
 		  __entry->cur_new_count)
 );
 
+TRACE_EVENT(qgroup_update_reserve,
+
+	TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup,
+		 s64 diff),
+
+	TP_ARGS(fs_info, qgroup, diff),
+
+	TP_STRUCT__entry_btrfs(
+		__field(	u64,	qgid			)
+		__field(	u64,	cur_reserved		)
+		__field(	s64,	diff			)
+	),
+
+	TP_fast_assign_btrfs(fs_info,
+		__entry->qgid		= qgroup->qgroupid;
+		__entry->cur_reserved	= qgroup->reserved;
+		__entry->diff		= diff;
+	),
+
+	TP_printk_btrfs("qgid=%llu cur_reserved=%llu diff=%lld",
+		__entry->qgid, __entry->cur_reserved, __entry->diff)
+);
+
+TRACE_EVENT(qgroup_meta_reserve,
+
+	TP_PROTO(struct btrfs_root *root, s64 diff),
+
+	TP_ARGS(root, diff),
+
+	TP_STRUCT__entry_btrfs(
+		__field(	u64,	refroot			)
+		__field(	s64,	diff			)
+	),
+
+	TP_fast_assign_btrfs(root->fs_info,
+		__entry->refroot	= root->objectid;
+		__entry->diff		= diff;
+	),
+
+	TP_printk_btrfs("refroot=%llu(%s) diff=%lld",
+		show_root_type(__entry->refroot), __entry->diff)
+);
+
 #endif /* _TRACE_BTRFS_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index e3facb356838..91dc089d65b7 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -742,6 +742,7 @@ TRACE_EVENT(rcu_torture_read,
  *	"OnlineQ": _rcu_barrier() found online CPU with callbacks.
  *	"OnlineNQ": _rcu_barrier() found online CPU, no callbacks.
  *	"IRQ": An rcu_barrier_callback() callback posted on remote CPU.
+ *	"IRQNQ": An rcu_barrier_callback() callback found no callbacks.
  *	"CB": An rcu_barrier_callback() invoked a callback, not the last.
  *	"LastCB": An rcu_barrier_callback() invoked the last callback.
  *	"Inc2": _rcu_barrier() piggyback check counter incremented.
diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h
index 2b4a8ff72d0d..6cde5b3514c2 100644
--- a/include/trace/events/thermal.h
+++ b/include/trace/events/thermal.h
@@ -151,9 +151,9 @@ TRACE_EVENT(thermal_power_cpu_limit,
 TRACE_EVENT(thermal_power_devfreq_get_power,
 	TP_PROTO(struct thermal_cooling_device *cdev,
 		 struct devfreq_dev_status *status, unsigned long freq,
-		u32 dynamic_power, u32 static_power),
+		u32 dynamic_power, u32 static_power, u32 power),
 
-	TP_ARGS(cdev, status,  freq, dynamic_power, static_power),
+	TP_ARGS(cdev, status,  freq, dynamic_power, static_power, power),
 
 	TP_STRUCT__entry(
 		__string(type,         cdev->type    )
@@ -161,6 +161,7 @@ TRACE_EVENT(thermal_power_devfreq_get_power,
 		__field(u32,           load          )
 		__field(u32,           dynamic_power )
 		__field(u32,           static_power  )
+		__field(u32,           power)
 	),
 
 	TP_fast_assign(
@@ -169,11 +170,13 @@ TRACE_EVENT(thermal_power_devfreq_get_power,
 		__entry->load = (100 * status->busy_time) / status->total_time;
 		__entry->dynamic_power = dynamic_power;
 		__entry->static_power = static_power;
+		__entry->power = power;
 	),
 
-	TP_printk("type=%s freq=%lu load=%u dynamic_power=%u static_power=%u",
+	TP_printk("type=%s freq=%lu load=%u dynamic_power=%u static_power=%u power=%u",
 		__get_str(type), __entry->freq,
-		__entry->load, __entry->dynamic_power, __entry->static_power)
+		__entry->load, __entry->dynamic_power, __entry->static_power,
+		__entry->power)
 );
 
 TRACE_EVENT(thermal_power_devfreq_limit,
diff --git a/include/uapi/Kbuild b/include/uapi/Kbuild
deleted file mode 100644
index 245aa6e05e6a..000000000000
--- a/include/uapi/Kbuild
+++ /dev/null
@@ -1,15 +0,0 @@
-# UAPI Header export list
-# Top-level Makefile calls into asm-$(ARCH)
-# List only non-arch directories below
-
-
-header-y += asm-generic/
-header-y += linux/
-header-y += sound/
-header-y += mtd/
-header-y += rdma/
-header-y += video/
-header-y += drm/
-header-y += xen/
-header-y += scsi/
-header-y += misc/
diff --git a/include/uapi/asm-generic/Kbuild b/include/uapi/asm-generic/Kbuild
deleted file mode 100644
index b73de7bb7a62..000000000000
--- a/include/uapi/asm-generic/Kbuild
+++ /dev/null
@@ -1,36 +0,0 @@
-# UAPI Header export list
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += errno-base.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += int-l64.h
-header-y += int-ll64.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman-common.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += shmparam.h
-header-y += siginfo.h
-header-y += signal-defs.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += ucontext.h
-header-y += unistd.h
diff --git a/include/uapi/asm-generic/Kbuild.asm b/include/uapi/asm-generic/Kbuild.asm
index fcd50b759217..21381449d98a 100644
--- a/include/uapi/asm-generic/Kbuild.asm
+++ b/include/uapi/asm-generic/Kbuild.asm
@@ -1,49 +1,33 @@
 #
-# Headers that are optional in usr/include/asm/
-#
-opt-header += kvm.h
-opt-header += kvm_para.h
-opt-header += a.out.h
-
-#
 # Headers that are mandatory in usr/include/asm/
 #
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
-
-header-y += $(foreach hdr,$(opt-header), \
-	      $(if \
-		$(wildcard \
-			$(srctree)/arch/$(SRCARCH)/include/uapi/asm/$(hdr) \
-			$(srctree)/arch/$(SRCARCH)/include/asm/$(hdr) \
-		), \
-		$(hdr) \
-		))
+mandatory-y += auxvec.h
+mandatory-y += bitsperlong.h
+mandatory-y += byteorder.h
+mandatory-y += errno.h
+mandatory-y += fcntl.h
+mandatory-y += ioctl.h
+mandatory-y += ioctls.h
+mandatory-y += ipcbuf.h
+mandatory-y += mman.h
+mandatory-y += msgbuf.h
+mandatory-y += param.h
+mandatory-y += poll.h
+mandatory-y += posix_types.h
+mandatory-y += ptrace.h
+mandatory-y += resource.h
+mandatory-y += sembuf.h
+mandatory-y += setup.h
+mandatory-y += shmbuf.h
+mandatory-y += sigcontext.h
+mandatory-y += siginfo.h
+mandatory-y += signal.h
+mandatory-y += socket.h
+mandatory-y += sockios.h
+mandatory-y += stat.h
+mandatory-y += statfs.h
+mandatory-y += swab.h
+mandatory-y += termbits.h
+mandatory-y += termios.h
+mandatory-y += types.h
+mandatory-y += unistd.h
diff --git a/include/uapi/drm/Kbuild b/include/uapi/drm/Kbuild
deleted file mode 100644
index c97addd08f8c..000000000000
--- a/include/uapi/drm/Kbuild
+++ /dev/null
@@ -1,23 +0,0 @@
-# UAPI Header export list
-header-y += drm.h
-header-y += drm_fourcc.h
-header-y += drm_mode.h
-header-y += drm_sarea.h
-header-y += amdgpu_drm.h
-header-y += exynos_drm.h
-header-y += i810_drm.h
-header-y += i915_drm.h
-header-y += mga_drm.h
-header-y += nouveau_drm.h
-header-y += omap_drm.h
-header-y += qxl_drm.h
-header-y += r128_drm.h
-header-y += radeon_drm.h
-header-y += savage_drm.h
-header-y += sis_drm.h
-header-y += tegra_drm.h
-header-y += via_drm.h
-header-y += vmwgfx_drm.h
-header-y += msm_drm.h
-header-y += vc4_drm.h
-header-y += virtgpu_drm.h
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 516a9f285730..6c249e5cfb09 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -295,7 +295,10 @@ union drm_amdgpu_gem_wait_idle {
 };
 
 struct drm_amdgpu_wait_cs_in {
-	/** Command submission handle */
+	/* Command submission handle
+         * handle equals 0 means none to wait for
+         * handle equals ~0ull means wait for the latest sequence number
+         */
 	__u64 handle;
 	/** Absolute timeout to wait */
 	__u64 timeout;
@@ -764,6 +767,25 @@ struct drm_amdgpu_info_device {
 	__u64 cntl_sb_buf_gpu_addr;
 	/* NGG Parameter Cache */
 	__u64 param_buf_gpu_addr;
+	__u32 prim_buf_size;
+	__u32 pos_buf_size;
+	__u32 cntl_sb_buf_size;
+	__u32 param_buf_size;
+	/* wavefront size*/
+	__u32 wave_front_size;
+	/* shader visible vgprs*/
+	__u32 num_shader_visible_vgprs;
+	/* CU per shader array*/
+	__u32 num_cu_per_sh;
+	/* number of tcc blocks*/
+	__u32 num_tcc_blocks;
+	/* gs vgt table depth*/
+	__u32 gs_vgt_table_depth;
+	/* gs primitive buffer depth*/
+	__u32 gs_prim_buffer_depth;
+	/* max gs wavefront per vgt*/
+	__u32 max_gs_waves_per_vgt;
+	__u32 _pad1;
 };
 
 struct drm_amdgpu_info_hw_ip {
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 662c592b74dd..ca2787d9bf0f 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -1,495 +1,13 @@
 # UAPI Header export list
-header-y += android/
-header-y += byteorder/
-header-y += can/
-header-y += caif/
-header-y += dvb/
-header-y += hdlc/
-header-y += hsi/
-header-y += iio/
-header-y += isdn/
-header-y += mmc/
-header-y += nfsd/
-header-y += raid/
-header-y += spi/
-header-y += sunrpc/
-header-y += tc_act/
-header-y += tc_ematch/
-header-y += netfilter/
-header-y += netfilter_arp/
-header-y += netfilter_bridge/
-header-y += netfilter_ipv4/
-header-y += netfilter_ipv6/
-header-y += usb/
-header-y += wimax/
 
-genhdr-y += version.h
-
-ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/a.out.h \
-		  $(srctree)/arch/$(SRCARCH)/include/asm/a.out.h),)
-header-y += a.out.h
+ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/a.out.h),)
+no-export-headers += a.out.h
 endif
 
-header-y += acct.h
-header-y += adb.h
-header-y += adfs_fs.h
-header-y += affs_hardblocks.h
-header-y += agpgart.h
-header-y += aio_abi.h
-header-y += am437x-vpfe.h
-header-y += apm_bios.h
-header-y += arcfb.h
-header-y += atalk.h
-header-y += atmapi.h
-header-y += atmarp.h
-header-y += atmbr2684.h
-header-y += atmclip.h
-header-y += atmdev.h
-header-y += atm_eni.h
-header-y += atm.h
-header-y += atm_he.h
-header-y += atm_idt77105.h
-header-y += atmioc.h
-header-y += atmlec.h
-header-y += atmmpc.h
-header-y += atm_nicstar.h
-header-y += atmppp.h
-header-y += atmsap.h
-header-y += atmsvc.h
-header-y += atm_tcp.h
-header-y += atm_zatm.h
-header-y += audit.h
-header-y += auto_fs4.h
-header-y += auto_fs.h
-header-y += auxvec.h
-header-y += ax25.h
-header-y += b1lli.h
-header-y += batman_adv.h
-header-y += baycom.h
-header-y += bcm933xx_hcs.h
-header-y += bfs_fs.h
-header-y += binfmts.h
-header-y += blkpg.h
-header-y += blktrace_api.h
-header-y += blkzoned.h
-header-y += bpf_common.h
-header-y += bpf_perf_event.h
-header-y += bpf.h
-header-y += bpqether.h
-header-y += bsg.h
-header-y += bt-bmc.h
-header-y += btrfs.h
-header-y += can.h
-header-y += capability.h
-header-y += capi.h
-header-y += cciss_defs.h
-header-y += cciss_ioctl.h
-header-y += cdrom.h
-header-y += cec.h
-header-y += cec-funcs.h
-header-y += cgroupstats.h
-header-y += chio.h
-header-y += cm4000_cs.h
-header-y += cn_proc.h
-header-y += coda.h
-header-y += coda_psdev.h
-header-y += coff.h
-header-y += connector.h
-header-y += const.h
-header-y += cramfs_fs.h
-header-y += cuda.h
-header-y += cyclades.h
-header-y += cycx_cfm.h
-header-y += dcbnl.h
-header-y += dccp.h
-header-y += devlink.h
-header-y += dlmconstants.h
-header-y += dlm_device.h
-header-y += dlm.h
-header-y += dlm_netlink.h
-header-y += dlm_plock.h
-header-y += dm-ioctl.h
-header-y += dm-log-userspace.h
-header-y += dma-buf.h
-header-y += dn.h
-header-y += dqblk_xfs.h
-header-y += edd.h
-header-y += efs_fs_sb.h
-header-y += elfcore.h
-header-y += elf-em.h
-header-y += elf-fdpic.h
-header-y += elf.h
-header-y += errno.h
-header-y += errqueue.h
-header-y += ethtool.h
-header-y += eventpoll.h
-header-y += fadvise.h
-header-y += falloc.h
-header-y += fanotify.h
-header-y += fb.h
-header-y += fcntl.h
-header-y += fd.h
-header-y += fdreg.h
-header-y += fib_rules.h
-header-y += fiemap.h
-header-y += filter.h
-header-y += firewire-cdev.h
-header-y += firewire-constants.h
-header-y += flat.h
-header-y += fou.h
-header-y += fs.h
-header-y += fsl_hypervisor.h
-header-y += fuse.h
-header-y += futex.h
-header-y += gameport.h
-header-y += genetlink.h
-header-y += gen_stats.h
-header-y += gfs2_ondisk.h
-header-y += gigaset_dev.h
-header-y += gpio.h
-header-y += gsmmux.h
-header-y += gtp.h
-header-y += hdlcdrv.h
-header-y += hdlc.h
-header-y += hdreg.h
-header-y += hiddev.h
-header-y += hid.h
-header-y += hidraw.h
-header-y += hpet.h
-header-y += hsr_netlink.h
-header-y += hyperv.h
-header-y += hysdn_if.h
-header-y += i2c-dev.h
-header-y += i2c.h
-header-y += i2o-dev.h
-header-y += i8k.h
-header-y += icmp.h
-header-y += icmpv6.h
-header-y += if_addr.h
-header-y += if_addrlabel.h
-header-y += if_alg.h
-header-y += if_arcnet.h
-header-y += if_arp.h
-header-y += if_bonding.h
-header-y += if_bridge.h
-header-y += if_cablemodem.h
-header-y += if_eql.h
-header-y += if_ether.h
-header-y += if_fc.h
-header-y += if_fddi.h
-header-y += if_frad.h
-header-y += if.h
-header-y += if_hippi.h
-header-y += if_infiniband.h
-header-y += if_link.h
-header-y += if_ltalk.h
-header-y += if_macsec.h
-header-y += if_packet.h
-header-y += if_phonet.h
-header-y += if_plip.h
-header-y += if_ppp.h
-header-y += if_pppol2tp.h
-header-y += if_pppox.h
-header-y += if_slip.h
-header-y += if_team.h
-header-y += if_tun.h
-header-y += if_tunnel.h
-header-y += if_vlan.h
-header-y += if_x25.h
-header-y += ife.h
-header-y += igmp.h
-header-y += ila.h
-header-y += in6.h
-header-y += inet_diag.h
-header-y += in.h
-header-y += inotify.h
-header-y += input.h
-header-y += input-event-codes.h
-header-y += in_route.h
-header-y += ioctl.h
-header-y += ip6_tunnel.h
-header-y += ipc.h
-header-y += ip.h
-header-y += ipmi.h
-header-y += ipmi_msgdefs.h
-header-y += ipsec.h
-header-y += ipv6.h
-header-y += ipv6_route.h
-header-y += ip_vs.h
-header-y += ipx.h
-header-y += irda.h
-header-y += irqnr.h
-header-y += isdn_divertif.h
-header-y += isdn.h
-header-y += isdnif.h
-header-y += isdn_ppp.h
-header-y += iso_fs.h
-header-y += ivtvfb.h
-header-y += ivtv.h
-header-y += ixjuser.h
-header-y += jffs2.h
-header-y += joystick.h
-header-y += kcmp.h
-header-y += kdev_t.h
-header-y += kd.h
-header-y += kernelcapi.h
-header-y += kernel.h
-header-y += kernel-page-flags.h
-header-y += kexec.h
-header-y += keyboard.h
-header-y += keyctl.h
-
-ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm.h \
-		  $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h),)
-header-y += kvm.h
+ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm.h),)
+no-export-headers += kvm.h
 endif
 
-
-ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm_para.h \
-		  $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h),)
-header-y += kvm_para.h
+ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm_para.h),)
+no-export-headers += kvm_para.h
 endif
-
-header-y += hw_breakpoint.h
-header-y += l2tp.h
-header-y += libc-compat.h
-header-y += lirc.h
-header-y += limits.h
-header-y += llc.h
-header-y += loop.h
-header-y += lp.h
-header-y += lwtunnel.h
-header-y += magic.h
-header-y += major.h
-header-y += map_to_7segment.h
-header-y += matroxfb.h
-header-y += mdio.h
-header-y += media.h
-header-y += media-bus-format.h
-header-y += mei.h
-header-y += membarrier.h
-header-y += memfd.h
-header-y += mempolicy.h
-header-y += meye.h
-header-y += mic_common.h
-header-y += mic_ioctl.h
-header-y += mii.h
-header-y += minix_fs.h
-header-y += mman.h
-header-y += mmtimer.h
-header-y += mpls.h
-header-y += mpls_iptunnel.h
-header-y += mqueue.h
-header-y += mroute6.h
-header-y += mroute.h
-header-y += msdos_fs.h
-header-y += msg.h
-header-y += mtio.h
-header-y += nbd.h
-header-y += ncp_fs.h
-header-y += ncp.h
-header-y += ncp_mount.h
-header-y += ncp_no.h
-header-y += ndctl.h
-header-y += neighbour.h
-header-y += netconf.h
-header-y += netdevice.h
-header-y += net_dropmon.h
-header-y += netfilter_arp.h
-header-y += netfilter_bridge.h
-header-y += netfilter_decnet.h
-header-y += netfilter.h
-header-y += netfilter_ipv4.h
-header-y += netfilter_ipv6.h
-header-y += net.h
-header-y += netlink_diag.h
-header-y += netlink.h
-header-y += netrom.h
-header-y += net_namespace.h
-header-y += net_tstamp.h
-header-y += nfc.h
-header-y += psample.h
-header-y += nfs2.h
-header-y += nfs3.h
-header-y += nfs4.h
-header-y += nfs4_mount.h
-header-y += nfsacl.h
-header-y += nfs_fs.h
-header-y += nfs.h
-header-y += nfs_idmap.h
-header-y += nfs_mount.h
-header-y += nl80211.h
-header-y += n_r3964.h
-header-y += nubus.h
-header-y += nvme_ioctl.h
-header-y += nvram.h
-header-y += omap3isp.h
-header-y += omapfb.h
-header-y += oom.h
-header-y += openvswitch.h
-header-y += packet_diag.h
-header-y += param.h
-header-y += parport.h
-header-y += patchkey.h
-header-y += pci.h
-header-y += pci_regs.h
-header-y += pcitest.h
-header-y += perf_event.h
-header-y += personality.h
-header-y += pfkeyv2.h
-header-y += pg.h
-header-y += phantom.h
-header-y += phonet.h
-header-y += pktcdvd.h
-header-y += pkt_cls.h
-header-y += pkt_sched.h
-header-y += pmu.h
-header-y += poll.h
-header-y += posix_acl.h
-header-y += posix_acl_xattr.h
-header-y += posix_types.h
-header-y += ppdev.h
-header-y += ppp-comp.h
-header-y += ppp_defs.h
-header-y += ppp-ioctl.h
-header-y += pps.h
-header-y += prctl.h
-header-y += psci.h
-header-y += ptp_clock.h
-header-y += ptrace.h
-header-y += qnx4_fs.h
-header-y += qnxtypes.h
-header-y += quota.h
-header-y += radeonfb.h
-header-y += random.h
-header-y += raw.h
-header-y += rds.h
-header-y += reboot.h
-header-y += reiserfs_fs.h
-header-y += reiserfs_xattr.h
-header-y += resource.h
-header-y += rfkill.h
-header-y += rio_cm_cdev.h
-header-y += rio_mport_cdev.h
-header-y += romfs_fs.h
-header-y += rose.h
-header-y += route.h
-header-y += rtc.h
-header-y += rtnetlink.h
-header-y += scc.h
-header-y += sched.h
-header-y += scif_ioctl.h
-header-y += screen_info.h
-header-y += sctp.h
-header-y += sdla.h
-header-y += seccomp.h
-header-y += securebits.h
-header-y += seg6_genl.h
-header-y += seg6.h
-header-y += seg6_hmac.h
-header-y += seg6_iptunnel.h
-header-y += selinux_netlink.h
-header-y += sem.h
-header-y += serial_core.h
-header-y += serial.h
-header-y += serial_reg.h
-header-y += serio.h
-header-y += shm.h
-header-y += signalfd.h
-header-y += signal.h
-header-y += smiapp.h
-header-y += snmp.h
-header-y += sock_diag.h
-header-y += socket.h
-header-y += sockios.h
-header-y += sonet.h
-header-y += sonypi.h
-header-y += soundcard.h
-header-y += sound.h
-header-y += stat.h
-header-y += stddef.h
-header-y += string.h
-header-y += suspend_ioctls.h
-header-y += swab.h
-header-y += synclink.h
-header-y += sync_file.h
-header-y += sysctl.h
-header-y += sysinfo.h
-header-y += target_core_user.h
-header-y += taskstats.h
-header-y += tcp.h
-header-y += tcp_metrics.h
-header-y += telephony.h
-header-y += termios.h
-header-y += thermal.h
-header-y += time.h
-header-y += timerfd.h
-header-y += times.h
-header-y += timex.h
-header-y += tiocl.h
-header-y += tipc_config.h
-header-y += tipc_netlink.h
-header-y += tipc.h
-header-y += toshiba.h
-header-y += tty_flags.h
-header-y += tty.h
-header-y += types.h
-header-y += udf_fs_i.h
-header-y += udp.h
-header-y += uhid.h
-header-y += uinput.h
-header-y += uio.h
-header-y += uleds.h
-header-y += ultrasound.h
-header-y += un.h
-header-y += unistd.h
-header-y += unix_diag.h
-header-y += usbdevice_fs.h
-header-y += usbip.h
-header-y += userio.h
-header-y += utime.h
-header-y += utsname.h
-header-y += uuid.h
-header-y += uvcvideo.h
-header-y += v4l2-common.h
-header-y += v4l2-controls.h
-header-y += v4l2-dv-timings.h
-header-y += v4l2-mediabus.h
-header-y += v4l2-subdev.h
-header-y += veth.h
-header-y += vfio.h
-header-y += vhost.h
-header-y += videodev2.h
-header-y += virtio_9p.h
-header-y += virtio_balloon.h
-header-y += virtio_blk.h
-header-y += virtio_config.h
-header-y += virtio_console.h
-header-y += virtio_gpu.h
-header-y += virtio_ids.h
-header-y += virtio_input.h
-header-y += virtio_mmio.h
-header-y += virtio_net.h
-header-y += virtio_pci.h
-header-y += virtio_ring.h
-header-y += virtio_rng.h
-header-y += virtio_scsi.h
-header-y += virtio_types.h
-header-y += virtio_vsock.h
-header-y += virtio_crypto.h
-header-y += vm_sockets.h
-header-y += vsockmon.h
-header-y += vt.h
-header-y += vtpm_proxy.h
-header-y += wait.h
-header-y += wanrouter.h
-header-y += watchdog.h
-header-y += wimax.h
-header-y += wireless.h
-header-y += x25.h
-header-y += xattr.h
-header-y += xfrm.h
-header-y += xilinx-v4l2-controls.h
-header-y += zorro.h
-header-y += zorro_ids.h
-header-y += userfaultfd.h
diff --git a/include/uapi/linux/a.out.h b/include/uapi/linux/a.out.h
index 7caf44c7fa51..295cd3ef6330 100644
--- a/include/uapi/linux/a.out.h
+++ b/include/uapi/linux/a.out.h
@@ -112,24 +112,7 @@ enum machine_type {
 #define N_TXTADDR(x) (N_MAGIC(x) == QMAGIC ? PAGE_SIZE : 0)
 #endif
 
-/* Address of data segment in memory after it is loaded.
-   Note that it is up to you to define SEGMENT_SIZE
-   on machines not listed here.  */
-#if defined(vax) || defined(hp300) || defined(pyr)
-#define SEGMENT_SIZE page_size
-#endif
-#ifdef	sony
-#define	SEGMENT_SIZE	0x2000
-#endif	/* Sony.  */
-#ifdef is68k
-#define SEGMENT_SIZE 0x20000
-#endif
-#if defined(m68k) && defined(PORTAR)
-#define PAGE_SIZE 0x400
-#define SEGMENT_SIZE PAGE_SIZE
-#endif
-
-#ifdef linux
+/* Address of data segment in memory after it is loaded. */
 #ifndef __KERNEL__
 #include <unistd.h>
 #endif
@@ -142,7 +125,6 @@ enum machine_type {
 #endif
 #endif
 #endif
-#endif
 
 #define _N_SEGMENT_ROUND(x) ALIGN(x, SEGMENT_SIZE)
 
@@ -260,13 +242,7 @@ struct relocation_info
   unsigned int r_extern:1;
   /* Four bits that aren't used, but when writing an object file
      it is desirable to clear them.  */
-#ifdef NS32K
-  unsigned r_bsr:1;
-  unsigned r_disp:1;
-  unsigned r_pad:2;
-#else
   unsigned int r_pad:4;
-#endif
 };
 #endif /* no N_RELOCATION_INFO_DECLARED.  */
 
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index bb2554f7fbd1..a2d4a8ac94ca 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -79,7 +79,7 @@ struct io_event {
 struct iocb {
 	/* these are internal to the kernel/libc. */
 	__u64	aio_data;	/* data to be returned in event's data */
-	__u32	PADDED(aio_key, aio_reserved1);
+	__u32	PADDED(aio_key, aio_rw_flags);
 				/* the kernel sets aio_key to the req # */
 
 	/* common fields */
diff --git a/include/uapi/linux/android/Kbuild b/include/uapi/linux/android/Kbuild
deleted file mode 100644
index ca011eec252a..000000000000
--- a/include/uapi/linux/android/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += binder.h
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index aa63451ef20a..1953f8d6063b 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h
@@ -26,7 +26,7 @@
 #define AUTOFS_MIN_PROTO_VERSION	AUTOFS_PROTO_VERSION
 
 /*
- * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
+ * The wait_queue_entry_token (autofs_wqt_t) is part of a structure which is passed
  * back to the kernel via ioctl from userspace. On architectures where 32- and
  * 64-bit userspace binaries can be executed it's important that the size of
  * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we
@@ -49,7 +49,7 @@ struct autofs_packet_hdr {
 
 struct autofs_packet_missing {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_token;
+	autofs_wqt_t wait_queue_entry_token;
 	int len;
 	char name[NAME_MAX+1];
 };	
diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h
index 7c6da423d54e..65b72d0222e7 100644
--- a/include/uapi/linux/auto_fs4.h
+++ b/include/uapi/linux/auto_fs4.h
@@ -108,7 +108,7 @@ enum autofs_notify {
 /* v4 multi expire (via pipe) */
 struct autofs_packet_expire_multi {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_token;
+	autofs_wqt_t wait_queue_entry_token;
 	int len;
 	char name[NAME_MAX+1];
 };
@@ -123,7 +123,7 @@ union autofs_packet_union {
 /* autofs v5 common packet struct */
 struct autofs_v5_packet {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_token;
+	autofs_wqt_t wait_queue_entry_token;
 	__u32 dev;
 	__u64 ino;
 	__u32 uid;
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 22b6ad31c706..e3bb0635e94a 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -5,7 +5,7 @@
  * Bcache on disk data structures
  */
 
-#include <asm/types.h>
+#include <linux/types.h>
 
 #define BITMASK(name, type, field, offset, size)		\
 static inline __u64 name(const type *k)				\
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 945a1f5f63c5..94dfa9def355 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -132,6 +132,13 @@ enum bpf_attach_type {
  */
 #define BPF_F_ALLOW_OVERRIDE	(1U << 0)
 
+/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+ * verifier will perform strict alignment checking as if the kernel
+ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+ * and NET_IP_ALIGN defined to 2.
+ */
+#define BPF_F_STRICT_ALIGNMENT	(1U << 0)
+
 #define BPF_PSEUDO_MAP_FD	1
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
@@ -177,6 +184,7 @@ union bpf_attr {
 		__u32		log_size;	/* size of user buffer */
 		__aligned_u64	log_buf;	/* user supplied buffer */
 		__u32		kern_version;	/* checked when prog_type=kprobe */
+		__u32		prog_flags;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index dcfc3a5a9cb1..a456e5309238 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -291,10 +291,10 @@ struct btrfs_ioctl_feature_flags {
 struct btrfs_balance_args {
 	__u64 profiles;
 	union {
-		__le64 usage;
+		__u64 usage;
 		struct {
-			__le32 usage_min;
-			__le32 usage_max;
+			__u32 usage_min;
+			__u32 usage_max;
 		};
 	};
 	__u64 devid;
@@ -324,8 +324,8 @@ struct btrfs_balance_args {
 	 * Process chunks that cross stripes_min..stripes_max devices,
 	 * BTRFS_BALANCE_ARGS_STRIPES_RANGE
 	 */
-	__le32 stripes_min;
-	__le32 stripes_max;
+	__u32 stripes_min;
+	__u32 stripes_max;
 
 	__u64 unused[6];
 } __attribute__ ((__packed__));
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index d5ad15a106a7..10689e1fdf11 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -1,6 +1,9 @@
 #ifndef _BTRFS_CTREE_H_
 #define _BTRFS_CTREE_H_
 
+#include <linux/btrfs.h>
+#include <linux/types.h>
+
 /*
  * This header contains the structure definitions and constants used
  * by file system objects that can be retrieved using
diff --git a/include/uapi/linux/byteorder/Kbuild b/include/uapi/linux/byteorder/Kbuild
deleted file mode 100644
index 619225b9ff2e..000000000000
--- a/include/uapi/linux/byteorder/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-# UAPI Header export list
-header-y += big_endian.h
-header-y += little_endian.h
diff --git a/include/uapi/linux/caif/Kbuild b/include/uapi/linux/caif/Kbuild
deleted file mode 100644
index 43396612d3a3..000000000000
--- a/include/uapi/linux/caif/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-# UAPI Header export list
-header-y += caif_socket.h
-header-y += if_caif.h
diff --git a/include/uapi/linux/can/Kbuild b/include/uapi/linux/can/Kbuild
deleted file mode 100644
index 21c91bf25a29..000000000000
--- a/include/uapi/linux/can/Kbuild
+++ /dev/null
@@ -1,6 +0,0 @@
-# UAPI Header export list
-header-y += bcm.h
-header-y += error.h
-header-y += gw.h
-header-y += netlink.h
-header-y += raw.h
diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h
index b4def5c630e7..fdcbb3c29083 100644
--- a/include/uapi/linux/cryptouser.h
+++ b/include/uapi/linux/cryptouser.h
@@ -18,6 +18,8 @@
  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include <linux/types.h>
+
 /* Netlink configuration messages.  */
 enum {
 	CRYPTO_MSG_BASE = 0x10,
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 4bf9f1eabffc..2f6c77aebe1a 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	35
+#define DM_VERSION_MINOR	36
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2016-06-23)"
+#define DM_VERSION_EXTRA	"-ioctl (2017-06-09)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
diff --git a/include/uapi/linux/dvb/Kbuild b/include/uapi/linux/dvb/Kbuild
deleted file mode 100644
index d40942cfc627..000000000000
--- a/include/uapi/linux/dvb/Kbuild
+++ /dev/null
@@ -1,9 +0,0 @@
-# UAPI Header export list
-header-y += audio.h
-header-y += ca.h
-header-y += dmx.h
-header-y += frontend.h
-header-y += net.h
-header-y += osd.h
-header-y += version.h
-header-y += video.h
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index d179d7767f51..7d4a594d5d58 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1486,8 +1486,10 @@ enum ethtool_link_mode_bit_indices {
  * it was forced up into this mode or autonegotiated.
  */
 
-/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal. */
-/* Update drivers/net/phy/phy.c:phy_speed_to_str() when adding new values */
+/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal.
+ * Update drivers/net/phy/phy.c:phy_speed_to_str() and
+ * drivers/net/bonding/bond_3ad.c:__get_link_speed() when adding new values.
+ */
 #define SPEED_10		10
 #define SPEED_100		100
 #define SPEED_1000		1000
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 813afd6eee71..ec69d55bcec7 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -43,6 +43,27 @@
 /* (1U << 31) is reserved for signed error codes */
 
 /*
+ * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
+ * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
+ * the specific file.
+ */
+#define F_GET_RW_HINT		(F_LINUX_SPECIFIC_BASE + 11)
+#define F_SET_RW_HINT		(F_LINUX_SPECIFIC_BASE + 12)
+#define F_GET_FILE_RW_HINT	(F_LINUX_SPECIFIC_BASE + 13)
+#define F_SET_FILE_RW_HINT	(F_LINUX_SPECIFIC_BASE + 14)
+
+/*
+ * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
+ * used to clear any hints previously set.
+ */
+#define RWF_WRITE_LIFE_NOT_SET	0
+#define RWH_WRITE_LIFE_NONE	1
+#define RWH_WRITE_LIFE_SHORT	2
+#define RWH_WRITE_LIFE_MEDIUM	3
+#define RWH_WRITE_LIFE_LONG	4
+#define RWH_WRITE_LIFE_EXTREME	5
+
+/*
  * Types of directory notifications that may be requested.
  */
 #define DN_ACCESS	0x00000001	/* File accessed */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 24e61a54feaa..27d8c36c04af 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -360,5 +360,9 @@ struct fscrypt_key {
 #define RWF_HIPRI			0x00000001 /* high priority request, poll if possible */
 #define RWF_DSYNC			0x00000002 /* per-IO O_DSYNC */
 #define RWF_SYNC			0x00000004 /* per-IO O_SYNC */
+#define RWF_NOWAIT			0x00000008 /* per-IO, return -EAGAIN if operation would block */
+
+#define RWF_SUPPORTED			(RWF_HIPRI | RWF_DSYNC | RWF_SYNC |\
+					 RWF_NOWAIT)
 
 #endif /* _UAPI_LINUX_FS_H */
diff --git a/include/uapi/linux/hdlc/Kbuild b/include/uapi/linux/hdlc/Kbuild
deleted file mode 100644
index 8c1d2cb75e33..000000000000
--- a/include/uapi/linux/hdlc/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += ioctl.h
diff --git a/include/uapi/linux/hsi/Kbuild b/include/uapi/linux/hsi/Kbuild
deleted file mode 100644
index a16a00544258..000000000000
--- a/include/uapi/linux/hsi/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += hsi_char.h cs-protocol.h
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8e56ac70e0d1..15ac20382aba 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -888,9 +888,18 @@ enum {
 /* XDP section */
 
 #define XDP_FLAGS_UPDATE_IF_NOEXIST	(1U << 0)
-#define XDP_FLAGS_SKB_MODE		(2U << 0)
+#define XDP_FLAGS_SKB_MODE		(1U << 1)
+#define XDP_FLAGS_DRV_MODE		(1U << 2)
 #define XDP_FLAGS_MASK			(XDP_FLAGS_UPDATE_IF_NOEXIST | \
-					 XDP_FLAGS_SKB_MODE)
+					 XDP_FLAGS_SKB_MODE | \
+					 XDP_FLAGS_DRV_MODE)
+
+/* These are stored into IFLA_XDP_ATTACHED on dump. */
+enum {
+	XDP_ATTACHED_NONE = 0,
+	XDP_ATTACHED_DRV,
+	XDP_ATTACHED_SKB,
+};
 
 enum {
 	IFLA_XDP_UNSPEC,
diff --git a/include/uapi/linux/iio/Kbuild b/include/uapi/linux/iio/Kbuild
deleted file mode 100644
index 86f76d84c44f..000000000000
--- a/include/uapi/linux/iio/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-# UAPI Header export list
-header-y += events.h
-header-y += types.h
diff --git a/include/uapi/linux/isdn/Kbuild b/include/uapi/linux/isdn/Kbuild
deleted file mode 100644
index 89e52850bf29..000000000000
--- a/include/uapi/linux/isdn/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += capicmd.h
diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index 201c6644b237..ef16df06642a 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -70,8 +70,8 @@ struct keyctl_dh_params {
 };
 
 struct keyctl_kdf_params {
-	char *hashname;
-	char *otherinfo;
+	char __user *hashname;
+	char __user *otherinfo;
 	__u32 otherinfolen;
 	__u32 __spare[8];
 };
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index c8125ec1f4f2..a3960f98679c 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -22,6 +22,7 @@ enum {
 	LO_FLAGS_AUTOCLEAR	= 4,
 	LO_FLAGS_PARTSCAN	= 8,
 	LO_FLAGS_DIRECT_IO	= 16,
+	LO_FLAGS_BLOCKSIZE	= 32,
 };
 
 #include <asm/posix_types.h>	/* for __kernel_old_dev_t */
@@ -59,6 +60,8 @@ struct loop_info64 {
 	__u64		   lo_init[2];
 };
 
+#define LO_INFO_BLOCKSIZE(l) (l)->lo_init[0]
+
 /*
  * Loop filter types
  */
diff --git a/include/uapi/linux/mmc/Kbuild b/include/uapi/linux/mmc/Kbuild
deleted file mode 100644
index 8c1d2cb75e33..000000000000
--- a/include/uapi/linux/mmc/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += ioctl.h
diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h
index 155e33f81913..a50527ebf671 100644
--- a/include/uapi/linux/nbd.h
+++ b/include/uapi/linux/nbd.h
@@ -41,10 +41,14 @@ enum {
 #define NBD_FLAG_HAS_FLAGS	(1 << 0) /* nbd-server supports flags */
 #define NBD_FLAG_READ_ONLY	(1 << 1) /* device is read-only */
 #define NBD_FLAG_SEND_FLUSH	(1 << 2) /* can flush writeback cache */
+#define NBD_FLAG_SEND_FUA	(1 << 3) /* send FUA (forced unit access) */
 /* there is a gap here to match userspace */
 #define NBD_FLAG_SEND_TRIM	(1 << 5) /* send trim/discard */
 #define NBD_FLAG_CAN_MULTI_CONN	(1 << 8)	/* Server supports multiple connections per export. */
 
+/* values for cmd flags in the upper 16 bits of request type */
+#define NBD_CMD_FLAG_FUA	(1 << 16) /* FUA (forced unit access) op */
+
 /* These are client behavior specific flags. */
 #define NBD_CFLAG_DESTROY_ON_DISCONNECT	(1 << 0) /* delete the nbd device on
 						    disconnect. */
diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild
deleted file mode 100644
index 03f194aeadc5..000000000000
--- a/include/uapi/linux/netfilter/Kbuild
+++ /dev/null
@@ -1,89 +0,0 @@
-# UAPI Header export list
-header-y += ipset/
-header-y += nf_conntrack_common.h
-header-y += nf_conntrack_ftp.h
-header-y += nf_conntrack_sctp.h
-header-y += nf_conntrack_tcp.h
-header-y += nf_conntrack_tuple_common.h
-header-y += nf_log.h
-header-y += nf_tables.h
-header-y += nf_tables_compat.h
-header-y += nf_nat.h
-header-y += nfnetlink.h
-header-y += nfnetlink_acct.h
-header-y += nfnetlink_compat.h
-header-y += nfnetlink_conntrack.h
-header-y += nfnetlink_cthelper.h
-header-y += nfnetlink_cttimeout.h
-header-y += nfnetlink_log.h
-header-y += nfnetlink_queue.h
-header-y += x_tables.h
-header-y += xt_AUDIT.h
-header-y += xt_CHECKSUM.h
-header-y += xt_CLASSIFY.h
-header-y += xt_CONNMARK.h
-header-y += xt_CONNSECMARK.h
-header-y += xt_CT.h
-header-y += xt_DSCP.h
-header-y += xt_HMARK.h
-header-y += xt_IDLETIMER.h
-header-y += xt_LED.h
-header-y += xt_LOG.h
-header-y += xt_MARK.h
-header-y += xt_NFLOG.h
-header-y += xt_NFQUEUE.h
-header-y += xt_RATEEST.h
-header-y += xt_SECMARK.h
-header-y += xt_SYNPROXY.h
-header-y += xt_TCPMSS.h
-header-y += xt_TCPOPTSTRIP.h
-header-y += xt_TEE.h
-header-y += xt_TPROXY.h
-header-y += xt_addrtype.h
-header-y += xt_bpf.h
-header-y += xt_cgroup.h
-header-y += xt_cluster.h
-header-y += xt_comment.h
-header-y += xt_connbytes.h
-header-y += xt_connlabel.h
-header-y += xt_connlimit.h
-header-y += xt_connmark.h
-header-y += xt_conntrack.h
-header-y += xt_cpu.h
-header-y += xt_dccp.h
-header-y += xt_devgroup.h
-header-y += xt_dscp.h
-header-y += xt_ecn.h
-header-y += xt_esp.h
-header-y += xt_hashlimit.h
-header-y += xt_helper.h
-header-y += xt_ipcomp.h
-header-y += xt_iprange.h
-header-y += xt_ipvs.h
-header-y += xt_l2tp.h
-header-y += xt_length.h
-header-y += xt_limit.h
-header-y += xt_mac.h
-header-y += xt_mark.h
-header-y += xt_multiport.h
-header-y += xt_nfacct.h
-header-y += xt_osf.h
-header-y += xt_owner.h
-header-y += xt_physdev.h
-header-y += xt_pkttype.h
-header-y += xt_policy.h
-header-y += xt_quota.h
-header-y += xt_rateest.h
-header-y += xt_realm.h
-header-y += xt_recent.h
-header-y += xt_rpfilter.h
-header-y += xt_sctp.h
-header-y += xt_set.h
-header-y += xt_socket.h
-header-y += xt_state.h
-header-y += xt_statistic.h
-header-y += xt_string.h
-header-y += xt_tcpmss.h
-header-y += xt_tcpudp.h
-header-y += xt_time.h
-header-y += xt_u32.h
diff --git a/include/uapi/linux/netfilter/ipset/Kbuild b/include/uapi/linux/netfilter/ipset/Kbuild
deleted file mode 100644
index d2680423d9ab..000000000000
--- a/include/uapi/linux/netfilter/ipset/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-# UAPI Header export list
-header-y += ip_set.h
-header-y += ip_set_bitmap.h
-header-y += ip_set_hash.h
-header-y += ip_set_list.h
diff --git a/include/uapi/linux/netfilter_arp/Kbuild b/include/uapi/linux/netfilter_arp/Kbuild
deleted file mode 100644
index 62d5637cc0ac..000000000000
--- a/include/uapi/linux/netfilter_arp/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-# UAPI Header export list
-header-y += arp_tables.h
-header-y += arpt_mangle.h
diff --git a/include/uapi/linux/netfilter_bridge/Kbuild b/include/uapi/linux/netfilter_bridge/Kbuild
deleted file mode 100644
index 0fbad8ef96de..000000000000
--- a/include/uapi/linux/netfilter_bridge/Kbuild
+++ /dev/null
@@ -1,18 +0,0 @@
-# UAPI Header export list
-header-y += ebt_802_3.h
-header-y += ebt_among.h
-header-y += ebt_arp.h
-header-y += ebt_arpreply.h
-header-y += ebt_ip.h
-header-y += ebt_ip6.h
-header-y += ebt_limit.h
-header-y += ebt_log.h
-header-y += ebt_mark_m.h
-header-y += ebt_mark_t.h
-header-y += ebt_nat.h
-header-y += ebt_nflog.h
-header-y += ebt_pkttype.h
-header-y += ebt_redirect.h
-header-y += ebt_stp.h
-header-y += ebt_vlan.h
-header-y += ebtables.h
diff --git a/include/uapi/linux/netfilter_ipv4/Kbuild b/include/uapi/linux/netfilter_ipv4/Kbuild
deleted file mode 100644
index ecb291df390e..000000000000
--- a/include/uapi/linux/netfilter_ipv4/Kbuild
+++ /dev/null
@@ -1,10 +0,0 @@
-# UAPI Header export list
-header-y += ip_tables.h
-header-y += ipt_CLUSTERIP.h
-header-y += ipt_ECN.h
-header-y += ipt_LOG.h
-header-y += ipt_REJECT.h
-header-y += ipt_TTL.h
-header-y += ipt_ah.h
-header-y += ipt_ecn.h
-header-y += ipt_ttl.h
diff --git a/include/uapi/linux/netfilter_ipv6/Kbuild b/include/uapi/linux/netfilter_ipv6/Kbuild
deleted file mode 100644
index 75a668ca2353..000000000000
--- a/include/uapi/linux/netfilter_ipv6/Kbuild
+++ /dev/null
@@ -1,13 +0,0 @@
-# UAPI Header export list
-header-y += ip6_tables.h
-header-y += ip6t_HL.h
-header-y += ip6t_LOG.h
-header-y += ip6t_NPT.h
-header-y += ip6t_REJECT.h
-header-y += ip6t_ah.h
-header-y += ip6t_frag.h
-header-y += ip6t_hl.h
-header-y += ip6t_ipv6header.h
-header-y += ip6t_mh.h
-header-y += ip6t_opts.h
-header-y += ip6t_rt.h
diff --git a/include/uapi/linux/nfsd/Kbuild b/include/uapi/linux/nfsd/Kbuild
deleted file mode 100644
index c11bc404053c..000000000000
--- a/include/uapi/linux/nfsd/Kbuild
+++ /dev/null
@@ -1,6 +0,0 @@
-# UAPI Header export list
-header-y += cld.h
-header-y += debug.h
-header-y += export.h
-header-y += nfsfh.h
-header-y += stats.h
diff --git a/include/uapi/linux/nfsd/cld.h b/include/uapi/linux/nfsd/cld.h
index f14a9ab06f1f..ec260274be0c 100644
--- a/include/uapi/linux/nfsd/cld.h
+++ b/include/uapi/linux/nfsd/cld.h
@@ -22,6 +22,8 @@
 #ifndef _NFSD_CLD_H
 #define _NFSD_CLD_H
 
+#include <linux/types.h>
+
 /* latest upcall version available */
 #define CLD_UPCALL_VERSION 1
 
@@ -37,18 +39,18 @@ enum cld_command {
 
 /* representation of long-form NFSv4 client ID */
 struct cld_name {
-	uint16_t	cn_len;				/* length of cm_id */
+	__u16		cn_len;				/* length of cm_id */
 	unsigned char	cn_id[NFS4_OPAQUE_LIMIT];	/* client-provided */
 } __attribute__((packed));
 
 /* message struct for communication with userspace */
 struct cld_msg {
-	uint8_t		cm_vers;		/* upcall version */
-	uint8_t		cm_cmd;			/* upcall command */
-	int16_t		cm_status;		/* return code */
-	uint32_t	cm_xid;			/* transaction id */
+	__u8		cm_vers;		/* upcall version */
+	__u8		cm_cmd;			/* upcall command */
+	__s16		cm_status;		/* return code */
+	__u32		cm_xid;			/* transaction id */
 	union {
-		int64_t		cm_gracetime;	/* grace period start time */
+		__s64		cm_gracetime;	/* grace period start time */
 		struct cld_name	cm_name;
 	} __attribute__((packed)) cm_u;
 } __attribute__((packed));
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 61b7d36dfe34..156ee4cab82e 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -343,6 +343,7 @@ enum ovs_key_attr {
 #define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
 
 enum ovs_tunnel_key_attr {
+	/* OVS_TUNNEL_KEY_ATTR_NONE, standard nl API requires this attribute! */
 	OVS_TUNNEL_KEY_ATTR_ID,                 /* be64 Tunnel ID */
 	OVS_TUNNEL_KEY_ATTR_IPV4_SRC,           /* be32 src IP address. */
 	OVS_TUNNEL_KEY_ATTR_IPV4_DST,           /* be32 dst IP address. */
diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h
index 57d7c0f916b6..645ef3cf3dd0 100644
--- a/include/uapi/linux/pr.h
+++ b/include/uapi/linux/pr.h
@@ -1,6 +1,8 @@
 #ifndef _UAPI_PR_H
 #define _UAPI_PR_H
 
+#include <linux/types.h>
+
 enum pr_type {
 	PR_WRITE_EXCLUSIVE		= 1,
 	PR_EXCLUSIVE_ACCESS		= 2,
diff --git a/include/uapi/linux/qrtr.h b/include/uapi/linux/qrtr.h
index 66c0748d26e2..9d76c566f66e 100644
--- a/include/uapi/linux/qrtr.h
+++ b/include/uapi/linux/qrtr.h
@@ -2,6 +2,7 @@
 #define _LINUX_QRTR_H
 
 #include <linux/socket.h>
+#include <linux/types.h>
 
 struct sockaddr_qrtr {
 	__kernel_sa_family_t sq_family;
diff --git a/include/uapi/linux/raid/Kbuild b/include/uapi/linux/raid/Kbuild
deleted file mode 100644
index e2c3d25405d7..000000000000
--- a/include/uapi/linux/raid/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-# UAPI Header export list
-header-y += md_p.h
-header-y += md_u.h
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 5f0fe019a720..e2a6c7b3510b 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -47,5 +47,6 @@
  * For the sched_{set,get}attr() calls
  */
 #define SCHED_FLAG_RESET_ON_FORK	0x01
+#define SCHED_FLAG_RECLAIM		0x02
 
 #endif /* _UAPI_LINUX_SCHED_H */
diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
index 0063919fea34..87712bfaa9dd 100644
--- a/include/uapi/linux/smc_diag.h
+++ b/include/uapi/linux/smc_diag.h
@@ -3,7 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/inet_diag.h>
-#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
 
 /* Request structure */
 struct smc_diag_req {
diff --git a/include/uapi/linux/spi/Kbuild b/include/uapi/linux/spi/Kbuild
deleted file mode 100644
index 0cc747eff165..000000000000
--- a/include/uapi/linux/spi/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += spidev.h
diff --git a/include/uapi/linux/sunrpc/Kbuild b/include/uapi/linux/sunrpc/Kbuild
deleted file mode 100644
index 8e02e47c20fb..000000000000
--- a/include/uapi/linux/sunrpc/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += debug.h
diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild
deleted file mode 100644
index ba62ddf0e58a..000000000000
--- a/include/uapi/linux/tc_act/Kbuild
+++ /dev/null
@@ -1,16 +0,0 @@
-# UAPI Header export list
-header-y += tc_csum.h
-header-y += tc_defact.h
-header-y += tc_gact.h
-header-y += tc_ipt.h
-header-y += tc_mirred.h
-header-y += tc_sample.h
-header-y += tc_nat.h
-header-y += tc_pedit.h
-header-y += tc_skbedit.h
-header-y += tc_vlan.h
-header-y += tc_bpf.h
-header-y += tc_connmark.h
-header-y += tc_ife.h
-header-y += tc_tunnel_key.h
-header-y += tc_skbmod.h
diff --git a/include/uapi/linux/tc_ematch/Kbuild b/include/uapi/linux/tc_ematch/Kbuild
deleted file mode 100644
index 53fca3925535..000000000000
--- a/include/uapi/linux/tc_ematch/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-# UAPI Header export list
-header-y += tc_em_cmp.h
-header-y += tc_em_meta.h
-header-y += tc_em_nbyte.h
-header-y += tc_em_text.h
diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h
new file mode 100644
index 000000000000..370d8845ab21
--- /dev/null
+++ b/include/uapi/linux/tee.h
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __TEE_H
+#define __TEE_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * This file describes the API provided by a TEE driver to user space.
+ *
+ * Each TEE driver defines a TEE specific protocol which is used for the
+ * data passed back and forth using TEE_IOC_CMD.
+ */
+
+/* Helpers to make the ioctl defines */
+#define TEE_IOC_MAGIC	0xa4
+#define TEE_IOC_BASE	0
+
+/* Flags relating to shared memory */
+#define TEE_IOCTL_SHM_MAPPED	0x1	/* memory mapped in normal world */
+#define TEE_IOCTL_SHM_DMA_BUF	0x2	/* dma-buf handle on shared memory */
+
+#define TEE_MAX_ARG_SIZE	1024
+
+#define TEE_GEN_CAP_GP		(1 << 0)/* GlobalPlatform compliant TEE */
+
+/*
+ * TEE Implementation ID
+ */
+#define TEE_IMPL_ID_OPTEE	1
+
+/*
+ * OP-TEE specific capabilities
+ */
+#define TEE_OPTEE_CAP_TZ	(1 << 0)
+
+/**
+ * struct tee_ioctl_version_data - TEE version
+ * @impl_id:	[out] TEE implementation id
+ * @impl_caps:	[out] Implementation specific capabilities
+ * @gen_caps:	[out] Generic capabilities, defined by TEE_GEN_CAPS_* above
+ *
+ * Identifies the TEE implementation, @impl_id is one of TEE_IMPL_ID_* above.
+ * @impl_caps is implementation specific, for example TEE_OPTEE_CAP_*
+ * is valid when @impl_id == TEE_IMPL_ID_OPTEE.
+ */
+struct tee_ioctl_version_data {
+	__u32 impl_id;
+	__u32 impl_caps;
+	__u32 gen_caps;
+};
+
+/**
+ * TEE_IOC_VERSION - query version of TEE
+ *
+ * Takes a tee_ioctl_version_data struct and returns with the TEE version
+ * data filled in.
+ */
+#define TEE_IOC_VERSION		_IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 0, \
+				     struct tee_ioctl_version_data)
+
+/**
+ * struct tee_ioctl_shm_alloc_data - Shared memory allocate argument
+ * @size:	[in/out] Size of shared memory to allocate
+ * @flags:	[in/out] Flags to/from allocation.
+ * @id:		[out] Identifier of the shared memory
+ *
+ * The flags field should currently be zero as input. Updated by the call
+ * with actual flags as defined by TEE_IOCTL_SHM_* above.
+ * This structure is used as argument for TEE_IOC_SHM_ALLOC below.
+ */
+struct tee_ioctl_shm_alloc_data {
+	__u64 size;
+	__u32 flags;
+	__s32 id;
+};
+
+/**
+ * TEE_IOC_SHM_ALLOC - allocate shared memory
+ *
+ * Allocates shared memory between the user space process and secure OS.
+ *
+ * Returns a file descriptor on success or < 0 on failure
+ *
+ * The returned file descriptor is used to map the shared memory into user
+ * space. The shared memory is freed when the descriptor is closed and the
+ * memory is unmapped.
+ */
+#define TEE_IOC_SHM_ALLOC	_IOWR(TEE_IOC_MAGIC, TEE_IOC_BASE + 1, \
+				     struct tee_ioctl_shm_alloc_data)
+
+/**
+ * struct tee_ioctl_buf_data - Variable sized buffer
+ * @buf_ptr:	[in] A __user pointer to a buffer
+ * @buf_len:	[in] Length of the buffer above
+ *
+ * Used as argument for TEE_IOC_OPEN_SESSION, TEE_IOC_INVOKE,
+ * TEE_IOC_SUPPL_RECV, and TEE_IOC_SUPPL_SEND below.
+ */
+struct tee_ioctl_buf_data {
+	__u64 buf_ptr;
+	__u64 buf_len;
+};
+
+/*
+ * Attributes for struct tee_ioctl_param, selects field in the union
+ */
+#define TEE_IOCTL_PARAM_ATTR_TYPE_NONE		0	/* parameter not used */
+
+/*
+ * These defines value parameters (struct tee_ioctl_param_value)
+ */
+#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT	1
+#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT	2
+#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT	3	/* input and output */
+
+/*
+ * These defines shared memory reference parameters (struct
+ * tee_ioctl_param_memref)
+ */
+#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT	5
+#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT	6
+#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT	7	/* input and output */
+
+/*
+ * Mask for the type part of the attribute, leaves room for more types
+ */
+#define TEE_IOCTL_PARAM_ATTR_TYPE_MASK		0xff
+
+/*
+ * Matches TEEC_LOGIN_* in GP TEE Client API
+ * Are only defined for GP compliant TEEs
+ */
+#define TEE_IOCTL_LOGIN_PUBLIC			0
+#define TEE_IOCTL_LOGIN_USER			1
+#define TEE_IOCTL_LOGIN_GROUP			2
+#define TEE_IOCTL_LOGIN_APPLICATION		4
+#define TEE_IOCTL_LOGIN_USER_APPLICATION	5
+#define TEE_IOCTL_LOGIN_GROUP_APPLICATION	6
+
+/**
+ * struct tee_ioctl_param - parameter
+ * @attr: attributes
+ * @a: if a memref, offset into the shared memory object, else a value parameter
+ * @b: if a memref, size of the buffer, else a value parameter
+ * @c: if a memref, shared memory identifier, else a value parameter
+ *
+ * @attr & TEE_PARAM_ATTR_TYPE_MASK indicates if memref or value is used in
+ * the union. TEE_PARAM_ATTR_TYPE_VALUE_* indicates value and
+ * TEE_PARAM_ATTR_TYPE_MEMREF_* indicates memref. TEE_PARAM_ATTR_TYPE_NONE
+ * indicates that none of the members are used.
+ *
+ * Shared memory is allocated with TEE_IOC_SHM_ALLOC which returns an
+ * identifier representing the shared memory object. A memref can reference
+ * a part of a shared memory by specifying an offset (@a) and size (@b) of
+ * the object. To supply the entire shared memory object set the offset
+ * (@a) to 0 and size (@b) to the previously returned size of the object.
+ */
+struct tee_ioctl_param {
+	__u64 attr;
+	__u64 a;
+	__u64 b;
+	__u64 c;
+};
+
+#define TEE_IOCTL_UUID_LEN		16
+
+/**
+ * struct tee_ioctl_open_session_arg - Open session argument
+ * @uuid:	[in] UUID of the Trusted Application
+ * @clnt_uuid:	[in] UUID of client
+ * @clnt_login:	[in] Login class of client, TEE_IOCTL_LOGIN_* above
+ * @cancel_id:	[in] Cancellation id, a unique value to identify this request
+ * @session:	[out] Session id
+ * @ret:	[out] return value
+ * @ret_origin	[out] origin of the return value
+ * @num_params	[in] number of parameters following this struct
+ */
+struct tee_ioctl_open_session_arg {
+	__u8 uuid[TEE_IOCTL_UUID_LEN];
+	__u8 clnt_uuid[TEE_IOCTL_UUID_LEN];
+	__u32 clnt_login;
+	__u32 cancel_id;
+	__u32 session;
+	__u32 ret;
+	__u32 ret_origin;
+	__u32 num_params;
+	/* num_params tells the actual number of element in params */
+	struct tee_ioctl_param params[];
+};
+
+/**
+ * TEE_IOC_OPEN_SESSION - opens a session to a Trusted Application
+ *
+ * Takes a struct tee_ioctl_buf_data which contains a struct
+ * tee_ioctl_open_session_arg followed by any array of struct
+ * tee_ioctl_param
+ */
+#define TEE_IOC_OPEN_SESSION	_IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 2, \
+				     struct tee_ioctl_buf_data)
+
+/**
+ * struct tee_ioctl_invoke_func_arg - Invokes a function in a Trusted
+ * Application
+ * @func:	[in] Trusted Application function, specific to the TA
+ * @session:	[in] Session id
+ * @cancel_id:	[in] Cancellation id, a unique value to identify this request
+ * @ret:	[out] return value
+ * @ret_origin	[out] origin of the return value
+ * @num_params	[in] number of parameters following this struct
+ */
+struct tee_ioctl_invoke_arg {
+	__u32 func;
+	__u32 session;
+	__u32 cancel_id;
+	__u32 ret;
+	__u32 ret_origin;
+	__u32 num_params;
+	/* num_params tells the actual number of element in params */
+	struct tee_ioctl_param params[];
+};
+
+/**
+ * TEE_IOC_INVOKE - Invokes a function in a Trusted Application
+ *
+ * Takes a struct tee_ioctl_buf_data which contains a struct
+ * tee_invoke_func_arg followed by any array of struct tee_param
+ */
+#define TEE_IOC_INVOKE		_IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 3, \
+				     struct tee_ioctl_buf_data)
+
+/**
+ * struct tee_ioctl_cancel_arg - Cancels an open session or invoke ioctl
+ * @cancel_id:	[in] Cancellation id, a unique value to identify this request
+ * @session:	[in] Session id, if the session is opened, else set to 0
+ */
+struct tee_ioctl_cancel_arg {
+	__u32 cancel_id;
+	__u32 session;
+};
+
+/**
+ * TEE_IOC_CANCEL - Cancels an open session or invoke
+ */
+#define TEE_IOC_CANCEL		_IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 4, \
+				     struct tee_ioctl_cancel_arg)
+
+/**
+ * struct tee_ioctl_close_session_arg - Closes an open session
+ * @session:	[in] Session id
+ */
+struct tee_ioctl_close_session_arg {
+	__u32 session;
+};
+
+/**
+ * TEE_IOC_CLOSE_SESSION - Closes a session
+ */
+#define TEE_IOC_CLOSE_SESSION	_IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 5, \
+				     struct tee_ioctl_close_session_arg)
+
+/**
+ * struct tee_iocl_supp_recv_arg - Receive a request for a supplicant function
+ * @func:	[in] supplicant function
+ * @num_params	[in/out] number of parameters following this struct
+ *
+ * @num_params is the number of params that tee-supplicant has room to
+ * receive when input, @num_params is the number of actual params
+ * tee-supplicant receives when output.
+ */
+struct tee_iocl_supp_recv_arg {
+	__u32 func;
+	__u32 num_params;
+	/* num_params tells the actual number of element in params */
+	struct tee_ioctl_param params[];
+};
+
+/**
+ * TEE_IOC_SUPPL_RECV - Receive a request for a supplicant function
+ *
+ * Takes a struct tee_ioctl_buf_data which contains a struct
+ * tee_iocl_supp_recv_arg followed by any array of struct tee_param
+ */
+#define TEE_IOC_SUPPL_RECV	_IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 6, \
+				     struct tee_ioctl_buf_data)
+
+/**
+ * struct tee_iocl_supp_send_arg - Send a response to a received request
+ * @ret:	[out] return value
+ * @num_params	[in] number of parameters following this struct
+ */
+struct tee_iocl_supp_send_arg {
+	__u32 ret;
+	__u32 num_params;
+	/* num_params tells the actual number of element in params */
+	struct tee_ioctl_param params[];
+};
+
+/**
+ * TEE_IOC_SUPPL_SEND - Receive a request for a supplicant function
+ *
+ * Takes a struct tee_ioctl_buf_data which contains a struct
+ * tee_iocl_supp_send_arg followed by any array of struct tee_param
+ */
+#define TEE_IOC_SUPPL_SEND	_IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 7, \
+				     struct tee_ioctl_buf_data)
+
+/*
+ * Five syscalls are used when communicating with the TEE driver.
+ * open(): opens the device associated with the driver
+ * ioctl(): as described above operating on the file descriptor from open()
+ * close(): two cases
+ *   - closes the device file descriptor
+ *   - closes a file descriptor connected to allocated shared memory
+ * mmap(): maps shared memory into user space using information from struct
+ *	   tee_ioctl_shm_alloc_data
+ * munmap(): unmaps previously shared memory
+ */
+
+#endif /*__TEE_H*/
diff --git a/include/uapi/linux/usb/Kbuild b/include/uapi/linux/usb/Kbuild
deleted file mode 100644
index 4cc4d6e7e523..000000000000
--- a/include/uapi/linux/usb/Kbuild
+++ /dev/null
@@ -1,12 +0,0 @@
-# UAPI Header export list
-header-y += audio.h
-header-y += cdc.h
-header-y += cdc-wdm.h
-header-y += ch11.h
-header-y += ch9.h
-header-y += functionfs.h
-header-y += g_printer.h
-header-y += gadgetfs.h
-header-y += midi.h
-header-y += tmc.h
-header-y += video.h
diff --git a/include/uapi/linux/usb/ch11.h b/include/uapi/linux/usb/ch11.h
index 361297e96f58..576c704e3fb8 100644
--- a/include/uapi/linux/usb/ch11.h
+++ b/include/uapi/linux/usb/ch11.h
@@ -22,6 +22,9 @@
  */
 #define USB_MAXCHILDREN		31
 
+/* See USB 3.1 spec Table 10-5 */
+#define USB_SS_MAXPORTS		15
+
 /*
  * Hub request types
  */
diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h
index 3738e5fb6a4d..8ef82f433877 100644
--- a/include/uapi/linux/uuid.h
+++ b/include/uapi/linux/uuid.h
@@ -22,33 +22,21 @@
 
 typedef struct {
 	__u8 b[16];
-} uuid_le;
+} guid_t;
 
-typedef struct {
-	__u8 b[16];
-} uuid_be;
-
-#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)		\
-((uuid_le)								\
+#define GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)			\
+((guid_t)								\
 {{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
    (b) & 0xff, ((b) >> 8) & 0xff,					\
    (c) & 0xff, ((c) >> 8) & 0xff,					\
    (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
 
-#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)		\
-((uuid_be)								\
-{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \
-   ((b) >> 8) & 0xff, (b) & 0xff,					\
-   ((c) >> 8) & 0xff, (c) & 0xff,					\
-   (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
-
+/* backwards compatibility, don't use in new code */
+typedef guid_t uuid_le;
+#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)		\
+	GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)
 #define NULL_UUID_LE							\
 	UUID_LE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
-		0x00, 0x00, 0x00, 0x00)
-
-#define NULL_UUID_BE							\
-	UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
-		0x00, 0x00, 0x00, 0x00)
-
+	     0x00, 0x00, 0x00, 0x00)
 
 #endif /* _UAPI_LINUX_UUID_H_ */
diff --git a/include/uapi/linux/wimax/Kbuild b/include/uapi/linux/wimax/Kbuild
deleted file mode 100644
index 1c97be49971f..000000000000
--- a/include/uapi/linux/wimax/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += i2400m.h
diff --git a/include/uapi/misc/Kbuild b/include/uapi/misc/Kbuild
deleted file mode 100644
index e96cae7d58c9..000000000000
--- a/include/uapi/misc/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# misc Header export list
-header-y += cxl.h
diff --git a/include/uapi/mtd/Kbuild b/include/uapi/mtd/Kbuild
deleted file mode 100644
index 5a691e10cd0e..000000000000
--- a/include/uapi/mtd/Kbuild
+++ /dev/null
@@ -1,6 +0,0 @@
-# UAPI Header export list
-header-y += inftl-user.h
-header-y += mtd-abi.h
-header-y += mtd-user.h
-header-y += nftl-user.h
-header-y += ubi-user.h
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
deleted file mode 100644
index 1e0af1ff75c3..000000000000
--- a/include/uapi/rdma/Kbuild
+++ /dev/null
@@ -1,20 +0,0 @@
-# UAPI Header export list
-header-y += ib_user_cm.h
-header-y += rdma_user_ioctl.h
-header-y += ib_user_mad.h
-header-y += ib_user_sa.h
-header-y += ib_user_verbs.h
-header-y += rdma_netlink.h
-header-y += rdma_user_cm.h
-header-y += hfi/
-header-y += rdma_user_rxe.h
-header-y += cxgb3-abi.h
-header-y += cxgb4-abi.h
-header-y += mlx4-abi.h
-header-y += mlx5-abi.h
-header-y += mthca-abi.h
-header-y += nes-abi.h
-header-y += ocrdma-abi.h
-header-y += hns-abi.h
-header-y += vmw_pvrdma-abi.h
-header-y += qedr-abi.h
diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
index e2c8a3f0ccec..74018bd18d72 100644
--- a/include/uapi/rdma/bnxt_re-abi.h
+++ b/include/uapi/rdma/bnxt_re-abi.h
@@ -39,6 +39,8 @@
 #ifndef __BNXT_RE_UVERBS_ABI_H__
 #define __BNXT_RE_UVERBS_ABI_H__
 
+#include <linux/types.h>
+
 #define BNXT_RE_ABI_VERSION	1
 
 struct bnxt_re_uctx_resp {
diff --git a/include/uapi/rdma/hfi/Kbuild b/include/uapi/rdma/hfi/Kbuild
deleted file mode 100644
index b65b0b3a5f63..000000000000
--- a/include/uapi/rdma/hfi/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-# UAPI Header export list
-header-y += hfi1_user.h
-header-y += hfi1_ioctl.h
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 477d629f539d..270c350bedc6 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -1135,4 +1135,6 @@ struct ib_uverbs_ex_destroy_rwq_ind_table  {
 	__u32 ind_tbl_handle;
 };
 
+#define IB_DEVICE_NAME_MAX 64
+
 #endif /* IB_USER_VERBS_H */
diff --git a/include/uapi/scsi/Kbuild b/include/uapi/scsi/Kbuild
deleted file mode 100644
index d791e0ad509d..000000000000
--- a/include/uapi/scsi/Kbuild
+++ /dev/null
@@ -1,6 +0,0 @@
-# UAPI Header export list
-header-y += fc/
-header-y += scsi_bsg_fc.h
-header-y += scsi_netlink.h
-header-y += scsi_netlink_fc.h
-header-y += cxlflash_ioctl.h
diff --git a/include/uapi/scsi/fc/Kbuild b/include/uapi/scsi/fc/Kbuild
deleted file mode 100644
index 5ead9fac265c..000000000000
--- a/include/uapi/scsi/fc/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-# UAPI Header export list
-header-y += fc_els.h
-header-y += fc_fs.h
-header-y += fc_gs.h
-header-y += fc_ns.h
diff --git a/include/uapi/sound/Kbuild b/include/uapi/sound/Kbuild
deleted file mode 100644
index 9578d8bdbf31..000000000000
--- a/include/uapi/sound/Kbuild
+++ /dev/null
@@ -1,16 +0,0 @@
-# UAPI Header export list
-header-y += asequencer.h
-header-y += asoc.h
-header-y += asound.h
-header-y += asound_fm.h
-header-y += compress_offload.h
-header-y += compress_params.h
-header-y += emu10k1.h
-header-y += firewire.h
-header-y += hdsp.h
-header-y += hdspm.h
-header-y += sb16_csp.h
-header-y += sfnt_info.h
-header-y += tlv.h
-header-y += usb_stream.h
-header-y += snd_sst_tokens.h
diff --git a/include/uapi/video/Kbuild b/include/uapi/video/Kbuild
deleted file mode 100644
index ac7203bb32cc..000000000000
--- a/include/uapi/video/Kbuild
+++ /dev/null
@@ -1,4 +0,0 @@
-# UAPI Header export list
-header-y += edid.h
-header-y += sisfb.h
-header-y += uvesafb.h
diff --git a/include/uapi/xen/Kbuild b/include/uapi/xen/Kbuild
deleted file mode 100644
index 5c459628e8c7..000000000000
--- a/include/uapi/xen/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-# UAPI Header export list
-header-y += evtchn.h
-header-y += gntalloc.h
-header-y += gntdev.h
-header-y += privcmd.h
diff --git a/include/video/Kbuild b/include/video/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/include/video/Kbuild
+++ /dev/null
diff --git a/init/Kconfig b/init/Kconfig
index a92f27da4a27..ee0f03b69d11 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -472,315 +472,7 @@ config TASK_IO_ACCOUNTING
 
 endmenu # "CPU/Task time and stats accounting"
 
-menu "RCU Subsystem"
-
-config TREE_RCU
-	bool
-	default y if !PREEMPT && SMP
-	help
-	  This option selects the RCU implementation that is
-	  designed for very large SMP system with hundreds or
-	  thousands of CPUs.  It also scales down nicely to
-	  smaller systems.
-
-config PREEMPT_RCU
-	bool
-	default y if PREEMPT
-	help
-	  This option selects the RCU implementation that is
-	  designed for very large SMP systems with hundreds or
-	  thousands of CPUs, but for which real-time response
-	  is also required.  It also scales down nicely to
-	  smaller systems.
-
-	  Select this option if you are unsure.
-
-config TINY_RCU
-	bool
-	default y if !PREEMPT && !SMP
-	help
-	  This option selects the RCU implementation that is
-	  designed for UP systems from which real-time response
-	  is not required.  This option greatly reduces the
-	  memory footprint of RCU.
-
-config RCU_EXPERT
-	bool "Make expert-level adjustments to RCU configuration"
-	default n
-	help
-	  This option needs to be enabled if you wish to make
-	  expert-level adjustments to RCU configuration.  By default,
-	  no such adjustments can be made, which has the often-beneficial
-	  side-effect of preventing "make oldconfig" from asking you all
-	  sorts of detailed questions about how you would like numerous
-	  obscure RCU options to be set up.
-
-	  Say Y if you need to make expert-level adjustments to RCU.
-
-	  Say N if you are unsure.
-
-config SRCU
-	bool
-	help
-	  This option selects the sleepable version of RCU. This version
-	  permits arbitrary sleeping or blocking within RCU read-side critical
-	  sections.
-
-config TASKS_RCU
-	bool
-	default n
-	select SRCU
-	help
-	  This option enables a task-based RCU implementation that uses
-	  only voluntary context switch (not preemption!), idle, and
-	  user-mode execution as quiescent states.
-
-config RCU_STALL_COMMON
-	def_bool ( TREE_RCU || PREEMPT_RCU || RCU_TRACE )
-	help
-	  This option enables RCU CPU stall code that is common between
-	  the TINY and TREE variants of RCU.  The purpose is to allow
-	  the tiny variants to disable RCU CPU stall warnings, while
-	  making these warnings mandatory for the tree variants.
-
-config CONTEXT_TRACKING
-       bool
-
-config CONTEXT_TRACKING_FORCE
-	bool "Force context tracking"
-	depends on CONTEXT_TRACKING
-	default y if !NO_HZ_FULL
-	help
-	  The major pre-requirement for full dynticks to work is to
-	  support the context tracking subsystem. But there are also
-	  other dependencies to provide in order to make the full
-	  dynticks working.
-
-	  This option stands for testing when an arch implements the
-	  context tracking backend but doesn't yet fullfill all the
-	  requirements to make the full dynticks feature working.
-	  Without the full dynticks, there is no way to test the support
-	  for context tracking and the subsystems that rely on it: RCU
-	  userspace extended quiescent state and tickless cputime
-	  accounting. This option copes with the absence of the full
-	  dynticks subsystem by forcing the context tracking on all
-	  CPUs in the system.
-
-	  Say Y only if you're working on the development of an
-	  architecture backend for the context tracking.
-
-	  Say N otherwise, this option brings an overhead that you
-	  don't want in production.
-
-
-config RCU_FANOUT
-	int "Tree-based hierarchical RCU fanout value"
-	range 2 64 if 64BIT
-	range 2 32 if !64BIT
-	depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
-	default 64 if 64BIT
-	default 32 if !64BIT
-	help
-	  This option controls the fanout of hierarchical implementations
-	  of RCU, allowing RCU to work efficiently on machines with
-	  large numbers of CPUs.  This value must be at least the fourth
-	  root of NR_CPUS, which allows NR_CPUS to be insanely large.
-	  The default value of RCU_FANOUT should be used for production
-	  systems, but if you are stress-testing the RCU implementation
-	  itself, small RCU_FANOUT values allow you to test large-system
-	  code paths on small(er) systems.
-
-	  Select a specific number if testing RCU itself.
-	  Take the default if unsure.
-
-config RCU_FANOUT_LEAF
-	int "Tree-based hierarchical RCU leaf-level fanout value"
-	range 2 64 if 64BIT
-	range 2 32 if !64BIT
-	depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
-	default 16
-	help
-	  This option controls the leaf-level fanout of hierarchical
-	  implementations of RCU, and allows trading off cache misses
-	  against lock contention.  Systems that synchronize their
-	  scheduling-clock interrupts for energy-efficiency reasons will
-	  want the default because the smaller leaf-level fanout keeps
-	  lock contention levels acceptably low.  Very large systems
-	  (hundreds or thousands of CPUs) will instead want to set this
-	  value to the maximum value possible in order to reduce the
-	  number of cache misses incurred during RCU's grace-period
-	  initialization.  These systems tend to run CPU-bound, and thus
-	  are not helped by synchronized interrupts, and thus tend to
-	  skew them, which reduces lock contention enough that large
-	  leaf-level fanouts work well.
-
-	  Select a specific number if testing RCU itself.
-
-	  Select the maximum permissible value for large systems.
-
-	  Take the default if unsure.
-
-config RCU_FAST_NO_HZ
-	bool "Accelerate last non-dyntick-idle CPU's grace periods"
-	depends on NO_HZ_COMMON && SMP && RCU_EXPERT
-	default n
-	help
-	  This option permits CPUs to enter dynticks-idle state even if
-	  they have RCU callbacks queued, and prevents RCU from waking
-	  these CPUs up more than roughly once every four jiffies (by
-	  default, you can adjust this using the rcutree.rcu_idle_gp_delay
-	  parameter), thus improving energy efficiency.  On the other
-	  hand, this option increases the duration of RCU grace periods,
-	  for example, slowing down synchronize_rcu().
-
-	  Say Y if energy efficiency is critically important, and you
-	  	don't care about increased grace-period durations.
-
-	  Say N if you are unsure.
-
-config TREE_RCU_TRACE
-	def_bool RCU_TRACE && ( TREE_RCU || PREEMPT_RCU )
-	select DEBUG_FS
-	help
-	  This option provides tracing for the TREE_RCU and
-	  PREEMPT_RCU implementations, permitting Makefile to
-	  trivially select kernel/rcutree_trace.c.
-
-config RCU_BOOST
-	bool "Enable RCU priority boosting"
-	depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
-	default n
-	help
-	  This option boosts the priority of preempted RCU readers that
-	  block the current preemptible RCU grace period for too long.
-	  This option also prevents heavy loads from blocking RCU
-	  callback invocation for all flavors of RCU.
-
-	  Say Y here if you are working with real-time apps or heavy loads
-	  Say N here if you are unsure.
-
-config RCU_KTHREAD_PRIO
-	int "Real-time priority to use for RCU worker threads"
-	range 1 99 if RCU_BOOST
-	range 0 99 if !RCU_BOOST
-	default 1 if RCU_BOOST
-	default 0 if !RCU_BOOST
-	depends on RCU_EXPERT
-	help
-	  This option specifies the SCHED_FIFO priority value that will be
-	  assigned to the rcuc/n and rcub/n threads and is also the value
-	  used for RCU_BOOST (if enabled). If you are working with a
-	  real-time application that has one or more CPU-bound threads
-	  running at a real-time priority level, you should set
-	  RCU_KTHREAD_PRIO to a priority higher than the highest-priority
-	  real-time CPU-bound application thread.  The default RCU_KTHREAD_PRIO
-	  value of 1 is appropriate in the common case, which is real-time
-	  applications that do not have any CPU-bound threads.
-
-	  Some real-time applications might not have a single real-time
-	  thread that saturates a given CPU, but instead might have
-	  multiple real-time threads that, taken together, fully utilize
-	  that CPU.  In this case, you should set RCU_KTHREAD_PRIO to
-	  a priority higher than the lowest-priority thread that is
-	  conspiring to prevent the CPU from running any non-real-time
-	  tasks.  For example, if one thread at priority 10 and another
-	  thread at priority 5 are between themselves fully consuming
-	  the CPU time on a given CPU, then RCU_KTHREAD_PRIO should be
-	  set to priority 6 or higher.
-
-	  Specify the real-time priority, or take the default if unsure.
-
-config RCU_BOOST_DELAY
-	int "Milliseconds to delay boosting after RCU grace-period start"
-	range 0 3000
-	depends on RCU_BOOST
-	default 500
-	help
-	  This option specifies the time to wait after the beginning of
-	  a given grace period before priority-boosting preempted RCU
-	  readers blocking that grace period.  Note that any RCU reader
-	  blocking an expedited RCU grace period is boosted immediately.
-
-	  Accept the default if unsure.
-
-config RCU_NOCB_CPU
-	bool "Offload RCU callback processing from boot-selected CPUs"
-	depends on TREE_RCU || PREEMPT_RCU
-	depends on RCU_EXPERT || NO_HZ_FULL
-	default n
-	help
-	  Use this option to reduce OS jitter for aggressive HPC or
-	  real-time workloads.	It can also be used to offload RCU
-	  callback invocation to energy-efficient CPUs in battery-powered
-	  asymmetric multiprocessors.
-
-	  This option offloads callback invocation from the set of
-	  CPUs specified at boot time by the rcu_nocbs parameter.
-	  For each such CPU, a kthread ("rcuox/N") will be created to
-	  invoke callbacks, where the "N" is the CPU being offloaded,
-	  and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and
-	  "s" for RCU-sched.  Nothing prevents this kthread from running
-	  on the specified CPUs, but (1) the kthreads may be preempted
-	  between each callback, and (2) affinity or cgroups can be used
-	  to force the kthreads to run on whatever set of CPUs is desired.
-
-	  Say Y here if you want to help to debug reduced OS jitter.
-	  Say N here if you are unsure.
-
-choice
-	prompt "Build-forced no-CBs CPUs"
-	default RCU_NOCB_CPU_NONE
-	depends on RCU_NOCB_CPU
-	help
-	  This option allows no-CBs CPUs (whose RCU callbacks are invoked
-	  from kthreads rather than from softirq context) to be specified
-	  at build time.  Additional no-CBs CPUs may be specified by
-	  the rcu_nocbs= boot parameter.
-
-config RCU_NOCB_CPU_NONE
-	bool "No build_forced no-CBs CPUs"
-	help
-	  This option does not force any of the CPUs to be no-CBs CPUs.
-	  Only CPUs designated by the rcu_nocbs= boot parameter will be
-	  no-CBs CPUs, whose RCU callbacks will be invoked by per-CPU
-	  kthreads whose names begin with "rcuo".  All other CPUs will
-	  invoke their own RCU callbacks in softirq context.
-
-	  Select this option if you want to choose no-CBs CPUs at
-	  boot time, for example, to allow testing of different no-CBs
-	  configurations without having to rebuild the kernel each time.
-
-config RCU_NOCB_CPU_ZERO
-	bool "CPU 0 is a build_forced no-CBs CPU"
-	help
-	  This option forces CPU 0 to be a no-CBs CPU, so that its RCU
-	  callbacks are invoked by a per-CPU kthread whose name begins
-	  with "rcuo".	Additional CPUs may be designated as no-CBs
-	  CPUs using the rcu_nocbs= boot parameter will be no-CBs CPUs.
-	  All other CPUs will invoke their own RCU callbacks in softirq
-	  context.
-
-	  Select this if CPU 0 needs to be a no-CBs CPU for real-time
-	  or energy-efficiency reasons, but the real reason it exists
-	  is to ensure that randconfig testing covers mixed systems.
-
-config RCU_NOCB_CPU_ALL
-	bool "All CPUs are build_forced no-CBs CPUs"
-	help
-	  This option forces all CPUs to be no-CBs CPUs.  The rcu_nocbs=
-	  boot parameter will be ignored.  All CPUs' RCU callbacks will
-	  be executed in the context of per-CPU rcuo kthreads created for
-	  this purpose.  Assuming that the kthreads whose names start with
-	  "rcuo" are bound to "housekeeping" CPUs, this reduces OS jitter
-	  on the remaining CPUs, but might decrease memory locality during
-	  RCU-callback invocation, thus potentially degrading throughput.
-
-	  Select this if all CPUs need to be no-CBs CPUs for real-time
-	  or energy-efficiency reasons.
-
-endchoice
-
-endmenu # "RCU Subsystem"
+source "kernel/rcu/Kconfig"
 
 config BUILD_BIN2C
 	bool
@@ -1117,6 +809,7 @@ config CGROUP_HUGETLB
 
 config CPUSETS
 	bool "Cpuset controller"
+	depends on SMP
 	help
 	  This option will let you create and manage CPUSETs which
 	  allow dynamically partitioning a system into sets of CPUs and
diff --git a/init/main.c b/init/main.c
index f866510472d7..df58a416dd1d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -389,6 +389,7 @@ static __initdata DECLARE_COMPLETION(kthreadd_done);
 
 static noinline void __ref rest_init(void)
 {
+	struct task_struct *tsk;
 	int pid;
 
 	rcu_scheduler_starting();
@@ -397,12 +398,32 @@ static noinline void __ref rest_init(void)
 	 * the init task will end up wanting to create kthreads, which, if
 	 * we schedule it before we create kthreadd, will OOPS.
 	 */
-	kernel_thread(kernel_init, NULL, CLONE_FS);
+	pid = kernel_thread(kernel_init, NULL, CLONE_FS);
+	/*
+	 * Pin init on the boot CPU. Task migration is not properly working
+	 * until sched_init_smp() has been run. It will set the allowed
+	 * CPUs for init to the non isolated CPUs.
+	 */
+	rcu_read_lock();
+	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+	set_cpus_allowed_ptr(tsk, cpumask_of(smp_processor_id()));
+	rcu_read_unlock();
+
 	numa_default_policy();
 	pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
 	rcu_read_lock();
 	kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
 	rcu_read_unlock();
+
+	/*
+	 * Enable might_sleep() and smp_processor_id() checks.
+	 * They cannot be enabled earlier because with CONFIG_PRREMPT=y
+	 * kernel_thread() would trigger might_sleep() splats. With
+	 * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled
+	 * already, but it's stuck on the kthreadd_done completion.
+	 */
+	system_state = SYSTEM_SCHEDULING;
+
 	complete(&kthreadd_done);
 
 	/*
@@ -1015,10 +1036,6 @@ static noinline void __init kernel_init_freeable(void)
 	 * init can allocate pages on any node
 	 */
 	set_mems_allowed(node_states[N_MEMORY]);
-	/*
-	 * init can run on any cpu.
-	 */
-	set_cpus_allowed_ptr(current, cpu_all_mask);
 
 	cad_pid = task_pid(current);
 
diff --git a/kernel/async.c b/kernel/async.c
index d2edd6efec56..2cbd3dd5940d 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -114,14 +114,14 @@ static void async_run_entry_fn(struct work_struct *work)
 	ktime_t uninitialized_var(calltime), delta, rettime;
 
 	/* 1) run (and print duration) */
-	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+	if (initcall_debug && system_state < SYSTEM_RUNNING) {
 		pr_debug("calling  %lli_%pF @ %i\n",
 			(long long)entry->cookie,
 			entry->func, task_pid_nr(current));
 		calltime = ktime_get();
 	}
 	entry->func(entry->data, entry->cookie);
-	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+	if (initcall_debug && system_state < SYSTEM_RUNNING) {
 		rettime = ktime_get();
 		delta = ktime_sub(rettime, calltime);
 		pr_debug("initcall %lli_%pF returned 0 after %lld usecs\n",
@@ -284,14 +284,14 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain
 {
 	ktime_t uninitialized_var(starttime), delta, endtime;
 
-	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+	if (initcall_debug && system_state < SYSTEM_RUNNING) {
 		pr_debug("async_waiting @ %i\n", task_pid_nr(current));
 		starttime = ktime_get();
 	}
 
 	wait_event(async_done, lowest_in_progress(domain) >= cookie);
 
-	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+	if (initcall_debug && system_state < SYSTEM_RUNNING) {
 		endtime = ktime_get();
 		delta = ktime_sub(endtime, starttime);
 
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 5e00b2333c26..172dc8ee0e3b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -86,6 +86,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	array->map.key_size = attr->key_size;
 	array->map.value_size = attr->value_size;
 	array->map.max_entries = attr->max_entries;
+	array->map.map_flags = attr->map_flags;
 	array->elem_size = elem_size;
 
 	if (!percpu)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 39cfafd895b8..b09185f0f17d 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -432,6 +432,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
 	trie->map.key_size = attr->key_size;
 	trie->map.value_size = attr->value_size;
 	trie->map.max_entries = attr->max_entries;
+	trie->map.map_flags = attr->map_flags;
 	trie->data_size = attr->key_size -
 			  offsetof(struct bpf_lpm_trie_key, data);
 	trie->max_prefixlen = trie->data_size * 8;
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 4dfd6f2ec2f9..31147d730abf 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -88,6 +88,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 	smap->map.key_size = attr->key_size;
 	smap->map.value_size = value_size;
 	smap->map.max_entries = attr->max_entries;
+	smap->map.map_flags = attr->map_flags;
 	smap->n_buckets = n_buckets;
 	smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index fd2411fd6914..265a0d854e33 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -783,7 +783,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
 EXPORT_SYMBOL_GPL(bpf_prog_get_type);
 
 /* last field in 'union bpf_attr' used by this command */
-#define	BPF_PROG_LOAD_LAST_FIELD kern_version
+#define	BPF_PROG_LOAD_LAST_FIELD prog_flags
 
 static int bpf_prog_load(union bpf_attr *attr)
 {
@@ -796,6 +796,9 @@ static int bpf_prog_load(union bpf_attr *attr)
 	if (CHECK_ATTR(BPF_PROG_LOAD))
 		return -EINVAL;
 
+	if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT)
+		return -EINVAL;
+
 	/* copy eBPF program license from user space */
 	if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
 			      sizeof(license) - 1) < 0)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c5b56c92f8e2..a8a725697bed 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -140,7 +140,7 @@ struct bpf_verifier_stack_elem {
 	struct bpf_verifier_stack_elem *next;
 };
 
-#define BPF_COMPLEXITY_LIMIT_INSNS	65536
+#define BPF_COMPLEXITY_LIMIT_INSNS	98304
 #define BPF_COMPLEXITY_LIMIT_STACK	1024
 
 #define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA)
@@ -241,6 +241,12 @@ static void print_verifier_state(struct bpf_verifier_state *state)
 		if (reg->max_value != BPF_REGISTER_MAX_RANGE)
 			verbose(",max_value=%llu",
 				(unsigned long long)reg->max_value);
+		if (reg->min_align)
+			verbose(",min_align=%u", reg->min_align);
+		if (reg->aux_off)
+			verbose(",aux_off=%u", reg->aux_off);
+		if (reg->aux_off_align)
+			verbose(",aux_off_align=%u", reg->aux_off_align);
 	}
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] == STACK_SPILL)
@@ -457,16 +463,22 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
 
+static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
+{
+	BUG_ON(regno >= MAX_BPF_REG);
+
+	memset(&regs[regno], 0, sizeof(regs[regno]));
+	regs[regno].type = NOT_INIT;
+	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
+	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+}
+
 static void init_reg_state(struct bpf_reg_state *regs)
 {
 	int i;
 
-	for (i = 0; i < MAX_BPF_REG; i++) {
-		regs[i].type = NOT_INIT;
-		regs[i].imm = 0;
-		regs[i].min_value = BPF_REGISTER_MIN_RANGE;
-		regs[i].max_value = BPF_REGISTER_MAX_RANGE;
-	}
+	for (i = 0; i < MAX_BPF_REG; i++)
+		mark_reg_not_init(regs, i);
 
 	/* frame pointer */
 	regs[BPF_REG_FP].type = FRAME_PTR;
@@ -492,6 +504,7 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
 {
 	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
 	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+	regs[regno].min_align = 0;
 }
 
 static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs,
@@ -779,17 +792,37 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 }
 
 static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
-				   int off, int size)
+				   int off, int size, bool strict)
 {
-	if (reg->id && size != 1) {
-		verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n");
-		return -EACCES;
+	int ip_align;
+	int reg_off;
+
+	/* Byte size accesses are always allowed. */
+	if (!strict || size == 1)
+		return 0;
+
+	reg_off = reg->off;
+	if (reg->id) {
+		if (reg->aux_off_align % size) {
+			verbose("Packet access is only %u byte aligned, %d byte access not allowed\n",
+				reg->aux_off_align, size);
+			return -EACCES;
+		}
+		reg_off += reg->aux_off;
 	}
 
-	/* skb->data is NET_IP_ALIGN-ed */
-	if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
+	/* For platforms that do not have a Kconfig enabling
+	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
+	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
+	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
+	 * to this code only in strict mode where we want to emulate
+	 * the NET_IP_ALIGN==2 checking.  Therefore use an
+	 * unconditional IP align value of '2'.
+	 */
+	ip_align = 2;
+	if ((ip_align + reg_off + off) % size != 0) {
 		verbose("misaligned packet access off %d+%d+%d size %d\n",
-			NET_IP_ALIGN, reg->off, off, size);
+			ip_align, reg_off, off, size);
 		return -EACCES;
 	}
 
@@ -797,9 +830,9 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 }
 
 static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
-				   int size)
+				   int size, bool strict)
 {
-	if (size != 1) {
+	if (strict && size != 1) {
 		verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
 		return -EACCES;
 	}
@@ -807,16 +840,17 @@ static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
 	return 0;
 }
 
-static int check_ptr_alignment(const struct bpf_reg_state *reg,
+static int check_ptr_alignment(struct bpf_verifier_env *env,
+			       const struct bpf_reg_state *reg,
 			       int off, int size)
 {
+	bool strict = env->strict_alignment;
+
 	switch (reg->type) {
 	case PTR_TO_PACKET:
-		return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
-		       check_pkt_ptr_alignment(reg, off, size);
+		return check_pkt_ptr_alignment(reg, off, size, strict);
 	case PTR_TO_MAP_VALUE_ADJ:
-		return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
-		       check_val_ptr_alignment(reg, size);
+		return check_val_ptr_alignment(reg, size, strict);
 	default:
 		if (off % size != 0) {
 			verbose("misaligned access off %d size %d\n",
@@ -849,7 +883,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 	if (size < 0)
 		return size;
 
-	err = check_ptr_alignment(reg, off, size);
+	err = check_ptr_alignment(env, reg, off, size);
 	if (err)
 		return err;
 
@@ -883,6 +917,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 							 value_regno);
 			/* note that reg.[id|off|range] == 0 */
 			state->regs[value_regno].type = reg_type;
+			state->regs[value_regno].aux_off = 0;
+			state->regs[value_regno].aux_off_align = 0;
 		}
 
 	} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
@@ -953,6 +989,11 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	if (err)
 		return err;
 
+	if (is_pointer_value(env, insn->src_reg)) {
+		verbose("R%d leaks addr into mem\n", insn->src_reg);
+		return -EACCES;
+	}
+
 	/* check whether atomic_add can read the memory */
 	err = check_mem_access(env, insn->dst_reg, insn->off,
 			       BPF_SIZE(insn->code), BPF_READ, -1);
@@ -1313,7 +1354,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	struct bpf_verifier_state *state = &env->cur_state;
 	const struct bpf_func_proto *fn = NULL;
 	struct bpf_reg_state *regs = state->regs;
-	struct bpf_reg_state *reg;
 	struct bpf_call_arg_meta meta;
 	bool changes_data;
 	int i, err;
@@ -1380,11 +1420,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	}
 
 	/* reset caller saved regs */
-	for (i = 0; i < CALLER_SAVED_REGS; i++) {
-		reg = regs + caller_saved[i];
-		reg->type = NOT_INIT;
-		reg->imm = 0;
-	}
+	for (i = 0; i < CALLER_SAVED_REGS; i++)
+		mark_reg_not_init(regs, caller_saved[i]);
 
 	/* update return register */
 	if (fn->ret_type == RET_INTEGER) {
@@ -1455,6 +1492,8 @@ add_imm:
 		 */
 		dst_reg->off += imm;
 	} else {
+		bool had_id;
+
 		if (src_reg->type == PTR_TO_PACKET) {
 			/* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */
 			tmp_reg = *dst_reg;  /* save r7 state */
@@ -1488,14 +1527,23 @@ add_imm:
 				src_reg->imm);
 			return -EACCES;
 		}
+
+		had_id = (dst_reg->id != 0);
+
 		/* dst_reg stays as pkt_ptr type and since some positive
 		 * integer value was added to the pointer, increment its 'id'
 		 */
 		dst_reg->id = ++env->id_gen;
 
-		/* something was added to pkt_ptr, set range and off to zero */
+		/* something was added to pkt_ptr, set range to zero */
+		dst_reg->aux_off += dst_reg->off;
 		dst_reg->off = 0;
 		dst_reg->range = 0;
+		if (had_id)
+			dst_reg->aux_off_align = min(dst_reg->aux_off_align,
+						     src_reg->min_align);
+		else
+			dst_reg->aux_off_align = src_reg->min_align;
 	}
 	return 0;
 }
@@ -1669,6 +1717,13 @@ static void check_reg_overflow(struct bpf_reg_state *reg)
 		reg->min_value = BPF_REGISTER_MIN_RANGE;
 }
 
+static u32 calc_align(u32 imm)
+{
+	if (!imm)
+		return 1U << 31;
+	return imm - ((imm - 1) & imm);
+}
+
 static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 				    struct bpf_insn *insn)
 {
@@ -1676,8 +1731,10 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 	s64 min_val = BPF_REGISTER_MIN_RANGE;
 	u64 max_val = BPF_REGISTER_MAX_RANGE;
 	u8 opcode = BPF_OP(insn->code);
+	u32 dst_align, src_align;
 
 	dst_reg = &regs[insn->dst_reg];
+	src_align = 0;
 	if (BPF_SRC(insn->code) == BPF_X) {
 		check_reg_overflow(&regs[insn->src_reg]);
 		min_val = regs[insn->src_reg].min_value;
@@ -1693,12 +1750,18 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 		    regs[insn->src_reg].type != UNKNOWN_VALUE) {
 			min_val = BPF_REGISTER_MIN_RANGE;
 			max_val = BPF_REGISTER_MAX_RANGE;
+			src_align = 0;
+		} else {
+			src_align = regs[insn->src_reg].min_align;
 		}
 	} else if (insn->imm < BPF_REGISTER_MAX_RANGE &&
 		   (s64)insn->imm > BPF_REGISTER_MIN_RANGE) {
 		min_val = max_val = insn->imm;
+		src_align = calc_align(insn->imm);
 	}
 
+	dst_align = dst_reg->min_align;
+
 	/* We don't know anything about what was done to this register, mark it
 	 * as unknown.
 	 */
@@ -1723,18 +1786,21 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 			dst_reg->min_value += min_val;
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
 			dst_reg->max_value += max_val;
+		dst_reg->min_align = min(src_align, dst_align);
 		break;
 	case BPF_SUB:
 		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
 			dst_reg->min_value -= min_val;
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
 			dst_reg->max_value -= max_val;
+		dst_reg->min_align = min(src_align, dst_align);
 		break;
 	case BPF_MUL:
 		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
 			dst_reg->min_value *= min_val;
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
 			dst_reg->max_value *= max_val;
+		dst_reg->min_align = max(src_align, dst_align);
 		break;
 	case BPF_AND:
 		/* Disallow AND'ing of negative numbers, ain't nobody got time
@@ -1746,17 +1812,23 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 		else
 			dst_reg->min_value = 0;
 		dst_reg->max_value = max_val;
+		dst_reg->min_align = max(src_align, dst_align);
 		break;
 	case BPF_LSH:
 		/* Gotta have special overflow logic here, if we're shifting
 		 * more than MAX_RANGE then just assume we have an invalid
 		 * range.
 		 */
-		if (min_val > ilog2(BPF_REGISTER_MAX_RANGE))
+		if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) {
 			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
-		else if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
-			dst_reg->min_value <<= min_val;
-
+			dst_reg->min_align = 1;
+		} else {
+			if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
+				dst_reg->min_value <<= min_val;
+			if (!dst_reg->min_align)
+				dst_reg->min_align = 1;
+			dst_reg->min_align <<= min_val;
+		}
 		if (max_val > ilog2(BPF_REGISTER_MAX_RANGE))
 			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
 		else if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
@@ -1766,11 +1838,19 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 		/* RSH by a negative number is undefined, and the BPF_RSH is an
 		 * unsigned shift, so make the appropriate casts.
 		 */
-		if (min_val < 0 || dst_reg->min_value < 0)
+		if (min_val < 0 || dst_reg->min_value < 0) {
 			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
-		else
+		} else {
 			dst_reg->min_value =
 				(u64)(dst_reg->min_value) >> min_val;
+		}
+		if (min_val < 0) {
+			dst_reg->min_align = 1;
+		} else {
+			dst_reg->min_align >>= (u64) min_val;
+			if (!dst_reg->min_align)
+				dst_reg->min_align = 1;
+		}
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
 			dst_reg->max_value >>= max_val;
 		break;
@@ -1872,6 +1952,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 			regs[insn->dst_reg].imm = insn->imm;
 			regs[insn->dst_reg].max_value = insn->imm;
 			regs[insn->dst_reg].min_value = insn->imm;
+			regs[insn->dst_reg].min_align = calc_align(insn->imm);
 		}
 
 	} else if (opcode > BPF_END) {
@@ -2368,7 +2449,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
 	struct bpf_reg_state *regs = env->cur_state.regs;
 	u8 mode = BPF_MODE(insn->code);
-	struct bpf_reg_state *reg;
 	int i, err;
 
 	if (!may_access_skb(env->prog->type)) {
@@ -2401,11 +2481,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	}
 
 	/* reset caller saved regs to unreadable */
-	for (i = 0; i < CALLER_SAVED_REGS; i++) {
-		reg = regs + caller_saved[i];
-		reg->type = NOT_INIT;
-		reg->imm = 0;
-	}
+	for (i = 0; i < CALLER_SAVED_REGS; i++)
+		mark_reg_not_init(regs, caller_saved[i]);
 
 	/* mark destination R0 register as readable, since it contains
 	 * the value fetched from the packet
@@ -2564,6 +2641,7 @@ peek_stack:
 				env->explored_states[t + 1] = STATE_LIST_MARK;
 		} else {
 			/* conditional jump with two edges */
+			env->explored_states[t] = STATE_LIST_MARK;
 			ret = push_insn(t, t + 1, FALLTHROUGH, env);
 			if (ret == 1)
 				goto peek_stack;
@@ -2615,7 +2693,8 @@ err_free:
 /* the following conditions reduce the number of explored insns
  * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
  */
-static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
+static bool compare_ptrs_to_packet(struct bpf_verifier_env *env,
+				   struct bpf_reg_state *old,
 				   struct bpf_reg_state *cur)
 {
 	if (old->id != cur->id)
@@ -2658,7 +2737,7 @@ static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
 	 * 'if (R4 > data_end)' and all further insn were already good with r=20,
 	 * so they will be good with r=30 and we can prune the search.
 	 */
-	if (old->off <= cur->off &&
+	if (!env->strict_alignment && old->off <= cur->off &&
 	    old->off >= old->range && cur->off >= cur->range)
 		return true;
 
@@ -2722,8 +2801,14 @@ static bool states_equal(struct bpf_verifier_env *env,
 		     rcur->type != NOT_INIT))
 			continue;
 
+		/* Don't care about the reg->id in this case. */
+		if (rold->type == PTR_TO_MAP_VALUE_OR_NULL &&
+		    rcur->type == PTR_TO_MAP_VALUE_OR_NULL &&
+		    rold->map_ptr == rcur->map_ptr)
+			continue;
+
 		if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
-		    compare_ptrs_to_packet(rold, rcur))
+		    compare_ptrs_to_packet(env, rold, rcur))
 			continue;
 
 		return false;
@@ -2856,8 +2941,15 @@ static int do_check(struct bpf_verifier_env *env)
 			goto process_bpf_exit;
 		}
 
-		if (log_level && do_print_state) {
-			verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx);
+		if (need_resched())
+			cond_resched();
+
+		if (log_level > 1 || (log_level && do_print_state)) {
+			if (log_level > 1)
+				verbose("%d:", insn_idx);
+			else
+				verbose("\nfrom %d to %d:",
+					prev_insn_idx, insn_idx);
 			print_verifier_state(&env->cur_state);
 			do_print_state = false;
 		}
@@ -3495,6 +3587,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 		log_level = 0;
 	}
 
+	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
+	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+		env->strict_alignment = true;
+
 	ret = replace_map_fd_with_map_ptr(env);
 	if (ret < 0)
 		goto skip_full_check;
@@ -3600,6 +3696,10 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
 
 	log_level = 0;
 
+	env->strict_alignment = false;
+	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+		env->strict_alignment = true;
+
 	env->explored_states = kcalloc(env->prog->len,
 				       sizeof(struct bpf_verifier_state_list *),
 				       GFP_KERNEL);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index c3c9a0e1b3c9..8d4e85eae42c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4265,6 +4265,11 @@ static void kill_css(struct cgroup_subsys_state *css)
 {
 	lockdep_assert_held(&cgroup_mutex);
 
+	if (css->flags & CSS_DYING)
+		return;
+
+	css->flags |= CSS_DYING;
+
 	/*
 	 * This must happen before css is disassociated with its cgroup.
 	 * See seq_css() for details.
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index f6501f4f6040..ae643412948a 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -176,9 +176,9 @@ typedef enum {
 } cpuset_flagbits_t;
 
 /* convenient tests for these bits */
-static inline bool is_cpuset_online(const struct cpuset *cs)
+static inline bool is_cpuset_online(struct cpuset *cs)
 {
-	return test_bit(CS_ONLINE, &cs->flags);
+	return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css);
 }
 
 static inline int is_cpu_exclusive(const struct cpuset *cs)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9ae6fbe5b5cf..cb5103413bd8 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1658,13 +1658,13 @@ static ssize_t write_cpuhp_target(struct device *dev,
 	ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
 	mutex_unlock(&cpuhp_state_mutex);
 	if (ret)
-		return ret;
+		goto out;
 
 	if (st->state < target)
 		ret = do_cpu_up(dev->id, target);
 	else
 		ret = do_cpu_down(dev->id, target);
-
+out:
 	unlock_device_hotplug();
 	return ret ? ret : count;
 }
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index c04917cad1bf..1b2be63c8528 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -229,12 +229,18 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
 		}
 
 		if (regs) {
+			mm_segment_t fs;
+
 			if (crosstask)
 				goto exit_put;
 
 			if (add_mark)
 				perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
+
+			fs = get_fs();
+			set_fs(USER_DS);
 			perf_callchain_user(&ctx, regs);
+			set_fs(fs);
 		}
 	}
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6e75a5c9412d..bc63f8db1b0d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -925,11 +925,6 @@ static inline int is_cgroup_event(struct perf_event *event)
 	return 0;
 }
 
-static inline u64 perf_cgroup_event_cgrp_time(struct perf_event *event)
-{
-	return 0;
-}
-
 static inline void update_cgrp_time_from_event(struct perf_event *event)
 {
 }
@@ -5729,9 +5724,6 @@ static void perf_output_read_one(struct perf_output_handle *handle,
 	__output_copy(handle, values, n * sizeof(u64));
 }
 
-/*
- * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
- */
 static void perf_output_read_group(struct perf_output_handle *handle,
 			    struct perf_event *event,
 			    u64 enabled, u64 running)
@@ -5776,6 +5768,13 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
 				 PERF_FORMAT_TOTAL_TIME_RUNNING)
 
+/*
+ * XXX PERF_SAMPLE_READ vs inherited events seems difficult.
+ *
+ * The problem is that its both hard and excessively expensive to iterate the
+ * child list, not to mention that its impossible to IPI the children running
+ * on another CPU, from interrupt/NMI context.
+ */
 static void perf_output_read(struct perf_output_handle *handle,
 			     struct perf_event *event)
 {
@@ -7316,6 +7315,21 @@ int perf_event_account_interrupt(struct perf_event *event)
 	return __perf_event_account_interrupt(event, 1);
 }
 
+static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
+{
+	/*
+	 * Due to interrupt latency (AKA "skid"), we may enter the
+	 * kernel before taking an overflow, even if the PMU is only
+	 * counting user events.
+	 * To avoid leaking information to userspace, we must always
+	 * reject kernel samples when exclude_kernel is set.
+	 */
+	if (event->attr.exclude_kernel && !user_mode(regs))
+		return false;
+
+	return true;
+}
+
 /*
  * Generic event overflow handling, sampling.
  */
@@ -7337,6 +7351,12 @@ static int __perf_event_overflow(struct perf_event *event,
 	ret = __perf_event_account_interrupt(event, throttle);
 
 	/*
+	 * For security, drop the skid kernel samples if necessary.
+	 */
+	if (!sample_is_allowed(event, regs))
+		return ret;
+
+	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * events
 	 */
@@ -9172,7 +9192,7 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
 
 static struct pmu *perf_init_event(struct perf_event *event)
 {
-	struct pmu *pmu = NULL;
+	struct pmu *pmu;
 	int idx;
 	int ret;
 
@@ -9441,9 +9461,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 	local64_set(&hwc->period_left, hwc->sample_period);
 
 	/*
-	 * we currently do not support PERF_FORMAT_GROUP on inherited events
+	 * We currently do not support PERF_SAMPLE_READ on inherited events.
+	 * See perf_output_read().
 	 */
-	if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
+	if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ))
 		goto err_ns;
 
 	if (!has_branch_stack(event))
@@ -9456,9 +9477,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 	}
 
 	pmu = perf_init_event(event);
-	if (!pmu)
-		goto err_ns;
-	else if (IS_ERR(pmu)) {
+	if (IS_ERR(pmu)) {
 		err = PTR_ERR(pmu);
 		goto err_ns;
 	}
@@ -9471,8 +9490,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 		event->addr_filters_offs = kcalloc(pmu->nr_addr_filters,
 						   sizeof(unsigned long),
 						   GFP_KERNEL);
-		if (!event->addr_filters_offs)
+		if (!event->addr_filters_offs) {
+			err = -ENOMEM;
 			goto err_per_task;
+		}
 
 		/* force hw sync on the address filters */
 		event->addr_filters_gen = 1;
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 2831480c63a2..ee97196bb151 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -580,7 +580,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
 	int ret = -ENOMEM, max_order = 0;
 
 	if (!has_aux(event))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) {
 		/*
diff --git a/kernel/exit.c b/kernel/exit.c
index 516acdb0e0ec..c63226283aef 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -318,19 +318,6 @@ void rcuwait_wake_up(struct rcuwait *w)
 	rcu_read_unlock();
 }
 
-struct task_struct *try_get_task_struct(struct task_struct **ptask)
-{
-	struct task_struct *task;
-
-	rcu_read_lock();
-	task = task_rcu_dereference(ptask);
-	if (task)
-		get_task_struct(task);
-	rcu_read_unlock();
-
-	return task;
-}
-
 /*
  * Determine if a process group is "orphaned", according to the POSIX
  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
@@ -1004,7 +991,7 @@ struct wait_opts {
 	int __user		*wo_stat;
 	struct rusage __user	*wo_rusage;
 
-	wait_queue_t		child_wait;
+	wait_queue_entry_t		child_wait;
 	int			notask_error;
 };
 
@@ -1541,7 +1528,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
 	return 0;
 }
 
-static int child_wait_callback(wait_queue_t *wait, unsigned mode,
+static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode,
 				int sync, void *key)
 {
 	struct wait_opts *wo = container_of(wait, struct wait_opts,
diff --git a/kernel/extable.c b/kernel/extable.c
index 2676d7f8baf6..0fbdd8582f08 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -75,7 +75,7 @@ int core_kernel_text(unsigned long addr)
 	    addr < (unsigned long)_etext)
 		return 1;
 
-	if (system_state == SYSTEM_BOOTING &&
+	if (system_state < SYSTEM_RUNNING &&
 	    init_kernel_text(addr))
 		return 1;
 	return 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index 08ba696aa561..e53770d2bf95 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -560,7 +560,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	set_task_stack_end_magic(tsk);
 
 #ifdef CONFIG_CC_STACKPROTECTOR
-	tsk->stack_canary = get_random_int();
+	tsk->stack_canary = get_random_long();
 #endif
 
 	/*
@@ -1337,7 +1337,7 @@ void __cleanup_sighand(struct sighand_struct *sighand)
 	if (atomic_dec_and_test(&sighand->count)) {
 		signalfd_cleanup(sighand);
 		/*
-		 * sighand_cachep is SLAB_DESTROY_BY_RCU so we can free it
+		 * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it
 		 * without an RCU grace period, see __lock_task_sighand().
 		 */
 		kmem_cache_free(sighand_cachep, sighand);
@@ -1577,6 +1577,18 @@ static __latent_entropy struct task_struct *copy_process(
 	if (!p)
 		goto fork_out;
 
+	/*
+	 * This _must_ happen before we call free_task(), i.e. before we jump
+	 * to any of the bad_fork_* labels. This is to avoid freeing
+	 * p->set_child_tid which is (ab)used as a kthread's data pointer for
+	 * kernel threads (PF_KTHREAD).
+	 */
+	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
+	/*
+	 * Clear TID on mm_release()?
+	 */
+	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
+
 	ftrace_graph_init_task(p);
 
 	rt_mutex_init_task(p);
@@ -1743,11 +1755,6 @@ static __latent_entropy struct task_struct *copy_process(
 		}
 	}
 
-	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
-	/*
-	 * Clear TID on mm_release()?
-	 */
-	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
 #ifdef CONFIG_BLOCK
 	p->plug = NULL;
 #endif
@@ -1845,11 +1852,13 @@ static __latent_entropy struct task_struct *copy_process(
 	*/
 	recalc_sigpending();
 	if (signal_pending(current)) {
-		spin_unlock(&current->sighand->siglock);
-		write_unlock_irq(&tasklist_lock);
 		retval = -ERESTARTNOINTR;
 		goto bad_fork_cancel_cgroup;
 	}
+	if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
+		retval = -ENOMEM;
+		goto bad_fork_cancel_cgroup;
+	}
 
 	if (likely(p->pid)) {
 		ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
@@ -1907,6 +1916,8 @@ static __latent_entropy struct task_struct *copy_process(
 	return p;
 
 bad_fork_cancel_cgroup:
+	spin_unlock(&current->sighand->siglock);
+	write_unlock_irq(&tasklist_lock);
 	cgroup_cancel_fork(p);
 bad_fork_free_pid:
 	cgroup_threadgroup_change_end(current);
@@ -2176,7 +2187,7 @@ void __init proc_caches_init(void)
 {
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
 			SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor);
 	signal_cachep = kmem_cache_create("signal_cache",
 			sizeof(struct signal_struct), 0,
diff --git a/kernel/futex.c b/kernel/futex.c
index 357348a6cf6b..d6cf71d08f21 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -225,7 +225,7 @@ struct futex_pi_state {
  * @requeue_pi_key:	the requeue_pi target futex key
  * @bitset:		bitset for the optional bitmasked wakeup
  *
- * We use this hashed waitqueue, instead of a normal wait_queue_t, so
+ * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
  * we can wake only the relevant ones (hashed queues may be shared).
  *
  * A futex_q has a woken state, just like tasks have TASK_RUNNING.
diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c
index 2f9df37940a0..c51a49c9be70 100644
--- a/kernel/gcov/base.c
+++ b/kernel/gcov/base.c
@@ -98,6 +98,12 @@ void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters)
 }
 EXPORT_SYMBOL(__gcov_merge_icall_topn);
 
+void __gcov_exit(void)
+{
+	/* Unused. */
+}
+EXPORT_SYMBOL(__gcov_exit);
+
 /**
  * gcov_enable_events - enable event reporting through gcov_event()
  *
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index 6a5c239c7669..46a18e72bce6 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -18,7 +18,9 @@
 #include <linux/vmalloc.h>
 #include "gcov.h"
 
-#if (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
+#if (__GNUC__ >= 7)
+#define GCOV_COUNTERS			9
+#elif (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
 #define GCOV_COUNTERS			10
 #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9
 #define GCOV_COUNTERS			9
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 686be4b73018..c94da688ee9b 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -880,8 +880,8 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
 	if (!desc)
 		return;
 
-	__irq_do_set_handler(desc, handle, 1, NULL);
 	desc->irq_common_data.handler_data = data;
+	__irq_do_set_handler(desc, handle, 1, NULL);
 
 	irq_put_desc_busunlock(desc, flags);
 }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 070be980c37a..425170d4439b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1312,8 +1312,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 			ret = __irq_set_trigger(desc,
 						new->flags & IRQF_TRIGGER_MASK);
 
-			if (ret)
+			if (ret) {
+				irq_release_resources(desc);
 				goto out_mask;
+			}
 		}
 
 		desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index ae1a3ba24df5..154ffb489b93 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -38,6 +38,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/compiler.h>
 #include <linux/hugetlb.h>
+#include <linux/frame.h>
 
 #include <asm/page.h>
 #include <asm/sections.h>
@@ -874,7 +875,7 @@ int kexec_load_disabled;
  * only when panic_cpu holds the current CPU number; this is the only CPU
  * which processes crash_kexec routines.
  */
-void __crash_kexec(struct pt_regs *regs)
+void __noclone __crash_kexec(struct pt_regs *regs)
 {
 	/* Take the kexec_mutex here to prevent sys_kexec_load
 	 * running on one cpu from replacing the crash kernel
@@ -896,6 +897,7 @@ void __crash_kexec(struct pt_regs *regs)
 		mutex_unlock(&kexec_mutex);
 	}
 }
+STACK_FRAME_NON_STANDARD(__crash_kexec);
 
 void crash_kexec(struct pt_regs *regs)
 {
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 7367e0ec6f81..adfe3b4cfe05 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -122,7 +122,7 @@ static void *alloc_insn_page(void)
 	return module_alloc(PAGE_SIZE);
 }
 
-static void free_insn_page(void *page)
+void __weak free_insn_page(void *page)
 {
 	module_memfree(page);
 }
@@ -595,7 +595,7 @@ static void kprobe_optimizer(struct work_struct *work)
 }
 
 /* Wait for completing optimization and unoptimization */
-static void wait_for_kprobe_optimizer(void)
+void wait_for_kprobe_optimizer(void)
 {
 	mutex_lock(&kprobe_mutex);
 
@@ -2183,6 +2183,12 @@ static int kprobes_module_callback(struct notifier_block *nb,
 				 * The vaddr this probe is installed will soon
 				 * be vfreed buy not synced to disk. Hence,
 				 * disarming the breakpoint isn't needed.
+				 *
+				 * Note, this will also move any optimized probes
+				 * that are pending to be removed from their
+				 * corresponding lists to the freeing_list and
+				 * will not be touched by the delayed
+				 * kprobe_optimizer work handler.
 				 */
 				kill_kprobe(p);
 			}
diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig
index 045022557936..ec4565122e65 100644
--- a/kernel/livepatch/Kconfig
+++ b/kernel/livepatch/Kconfig
@@ -10,6 +10,7 @@ config LIVEPATCH
 	depends on SYSFS
 	depends on KALLSYMS_ALL
 	depends on HAVE_LIVEPATCH
+	depends on !TRIM_UNUSED_KSYMS
 	help
 	  Say Y here if you want to support kernel live patching.
 	  This option has no runtime impact until a kernel "patch"
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index f8269036bf0b..52c4e907c14b 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -59,7 +59,11 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 
 	ops = container_of(fops, struct klp_ops, fops);
 
-	rcu_read_lock();
+	/*
+	 * A variant of synchronize_sched() is used to allow patching functions
+	 * where RCU is not watching, see klp_synchronize_transition().
+	 */
+	preempt_disable_notrace();
 
 	func = list_first_or_null_rcu(&ops->func_stack, struct klp_func,
 				      stack_node);
@@ -115,7 +119,7 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 
 	klp_arch_set_pc(regs, (unsigned long)func->new_func);
 unlock:
-	rcu_read_unlock();
+	preempt_enable_notrace();
 }
 
 /*
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index adc0cc64aa4b..b004a1fb6032 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -49,6 +49,28 @@ static void klp_transition_work_fn(struct work_struct *work)
 static DECLARE_DELAYED_WORK(klp_transition_work, klp_transition_work_fn);
 
 /*
+ * This function is just a stub to implement a hard force
+ * of synchronize_sched(). This requires synchronizing
+ * tasks even in userspace and idle.
+ */
+static void klp_sync(struct work_struct *work)
+{
+}
+
+/*
+ * We allow to patch also functions where RCU is not watching,
+ * e.g. before user_exit(). We can not rely on the RCU infrastructure
+ * to do the synchronization. Instead hard force the sched synchronization.
+ *
+ * This approach allows to use RCU functions for manipulating func_stack
+ * safely.
+ */
+static void klp_synchronize_transition(void)
+{
+	schedule_on_each_cpu(klp_sync);
+}
+
+/*
  * The transition to the target patch state is complete.  Clean up the data
  * structures.
  */
@@ -73,7 +95,7 @@ static void klp_complete_transition(void)
 		 * func->transition gets cleared, the handler may choose a
 		 * removed function.
 		 */
-		synchronize_rcu();
+		klp_synchronize_transition();
 	}
 
 	if (klp_transition_patch->immediate)
@@ -92,7 +114,7 @@ static void klp_complete_transition(void)
 
 	/* Prevent klp_ftrace_handler() from seeing KLP_UNDEFINED state */
 	if (klp_target_state == KLP_PATCHED)
-		synchronize_rcu();
+		klp_synchronize_transition();
 
 	read_lock(&tasklist_lock);
 	for_each_process_thread(g, task) {
@@ -136,7 +158,11 @@ void klp_cancel_transition(void)
  */
 void klp_update_patch_state(struct task_struct *task)
 {
-	rcu_read_lock();
+	/*
+	 * A variant of synchronize_sched() is used to allow patching functions
+	 * where RCU is not watching, see klp_synchronize_transition().
+	 */
+	preempt_disable_notrace();
 
 	/*
 	 * This test_and_clear_tsk_thread_flag() call also serves as a read
@@ -153,7 +179,7 @@ void klp_update_patch_state(struct task_struct *task)
 	if (test_and_clear_tsk_thread_flag(task, TIF_PATCH_PENDING))
 		task->patch_state = READ_ONCE(klp_target_state);
 
-	rcu_read_unlock();
+	preempt_enable_notrace();
 }
 
 /*
@@ -539,7 +565,7 @@ void klp_reverse_transition(void)
 		clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING);
 
 	/* Let any remaining calls to klp_update_patch_state() complete */
-	synchronize_rcu();
+	klp_synchronize_transition();
 
 	klp_start_transition();
 }
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 0a1b3c748478..7d2499bec5fe 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1157,18 +1157,18 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
 	if (debug_locks_silent)
 		return 0;
 
-	printk("\n");
-	printk("======================================================\n");
-	printk("[ INFO: possible circular locking dependency detected ]\n");
+	pr_warn("\n");
+	pr_warn("======================================================\n");
+	pr_warn("WARNING: possible circular locking dependency detected\n");
 	print_kernel_ident();
-	printk("-------------------------------------------------------\n");
-	printk("%s/%d is trying to acquire lock:\n",
+	pr_warn("------------------------------------------------------\n");
+	pr_warn("%s/%d is trying to acquire lock:\n",
 		curr->comm, task_pid_nr(curr));
 	print_lock(check_src);
-	printk("\nbut task is already holding lock:\n");
+	pr_warn("\nbut task is already holding lock:\n");
 	print_lock(check_tgt);
-	printk("\nwhich lock already depends on the new lock.\n\n");
-	printk("\nthe existing dependency chain (in reverse order) is:\n");
+	pr_warn("\nwhich lock already depends on the new lock.\n\n");
+	pr_warn("\nthe existing dependency chain (in reverse order) is:\n");
 
 	print_circular_bug_entry(entry, depth);
 
@@ -1495,13 +1495,13 @@ print_bad_irq_dependency(struct task_struct *curr,
 	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
-	printk("\n");
-	printk("======================================================\n");
-	printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
+	pr_warn("\n");
+	pr_warn("=====================================================\n");
+	pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n",
 		irqclass, irqclass);
 	print_kernel_ident();
-	printk("------------------------------------------------------\n");
-	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
+	pr_warn("-----------------------------------------------------\n");
+	pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
 		curr->comm, task_pid_nr(curr),
 		curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
 		curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
@@ -1509,46 +1509,46 @@ print_bad_irq_dependency(struct task_struct *curr,
 		curr->softirqs_enabled);
 	print_lock(next);
 
-	printk("\nand this task is already holding:\n");
+	pr_warn("\nand this task is already holding:\n");
 	print_lock(prev);
-	printk("which would create a new lock dependency:\n");
+	pr_warn("which would create a new lock dependency:\n");
 	print_lock_name(hlock_class(prev));
-	printk(KERN_CONT " ->");
+	pr_cont(" ->");
 	print_lock_name(hlock_class(next));
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 
-	printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
+	pr_warn("\nbut this new dependency connects a %s-irq-safe lock:\n",
 		irqclass);
 	print_lock_name(backwards_entry->class);
-	printk("\n... which became %s-irq-safe at:\n", irqclass);
+	pr_warn("\n... which became %s-irq-safe at:\n", irqclass);
 
 	print_stack_trace(backwards_entry->class->usage_traces + bit1, 1);
 
-	printk("\nto a %s-irq-unsafe lock:\n", irqclass);
+	pr_warn("\nto a %s-irq-unsafe lock:\n", irqclass);
 	print_lock_name(forwards_entry->class);
-	printk("\n... which became %s-irq-unsafe at:\n", irqclass);
-	printk("...");
+	pr_warn("\n... which became %s-irq-unsafe at:\n", irqclass);
+	pr_warn("...");
 
 	print_stack_trace(forwards_entry->class->usage_traces + bit2, 1);
 
-	printk("\nother info that might help us debug this:\n\n");
+	pr_warn("\nother info that might help us debug this:\n\n");
 	print_irq_lock_scenario(backwards_entry, forwards_entry,
 				hlock_class(prev), hlock_class(next));
 
 	lockdep_print_held_locks(curr);
 
-	printk("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
+	pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
 	if (!save_trace(&prev_root->trace))
 		return 0;
 	print_shortest_lock_dependencies(backwards_entry, prev_root);
 
-	printk("\nthe dependencies between the lock to be acquired");
-	printk(" and %s-irq-unsafe lock:\n", irqclass);
+	pr_warn("\nthe dependencies between the lock to be acquired");
+	pr_warn(" and %s-irq-unsafe lock:\n", irqclass);
 	if (!save_trace(&next_root->trace))
 		return 0;
 	print_shortest_lock_dependencies(forwards_entry, next_root);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
 	return 0;
@@ -1724,22 +1724,22 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
 	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
-	printk("\n");
-	printk("=============================================\n");
-	printk("[ INFO: possible recursive locking detected ]\n");
+	pr_warn("\n");
+	pr_warn("============================================\n");
+	pr_warn("WARNING: possible recursive locking detected\n");
 	print_kernel_ident();
-	printk("---------------------------------------------\n");
-	printk("%s/%d is trying to acquire lock:\n",
+	pr_warn("--------------------------------------------\n");
+	pr_warn("%s/%d is trying to acquire lock:\n",
 		curr->comm, task_pid_nr(curr));
 	print_lock(next);
-	printk("\nbut task is already holding lock:\n");
+	pr_warn("\nbut task is already holding lock:\n");
 	print_lock(prev);
 
-	printk("\nother info that might help us debug this:\n");
+	pr_warn("\nother info that might help us debug this:\n");
 	print_deadlock_scenario(next, prev);
 	lockdep_print_held_locks(curr);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
 	return 0;
@@ -2074,21 +2074,21 @@ static void print_collision(struct task_struct *curr,
 			struct held_lock *hlock_next,
 			struct lock_chain *chain)
 {
-	printk("\n");
-	printk("======================\n");
-	printk("[chain_key collision ]\n");
+	pr_warn("\n");
+	pr_warn("============================\n");
+	pr_warn("WARNING: chain_key collision\n");
 	print_kernel_ident();
-	printk("----------------------\n");
-	printk("%s/%d: ", current->comm, task_pid_nr(current));
-	printk("Hash chain already cached but the contents don't match!\n");
+	pr_warn("----------------------------\n");
+	pr_warn("%s/%d: ", current->comm, task_pid_nr(current));
+	pr_warn("Hash chain already cached but the contents don't match!\n");
 
-	printk("Held locks:");
+	pr_warn("Held locks:");
 	print_chain_keys_held_locks(curr, hlock_next);
 
-	printk("Locks in cached chain:");
+	pr_warn("Locks in cached chain:");
 	print_chain_keys_chain(chain);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 }
 #endif
@@ -2373,16 +2373,16 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
 	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
-	printk("\n");
-	printk("=================================\n");
-	printk("[ INFO: inconsistent lock state ]\n");
+	pr_warn("\n");
+	pr_warn("================================\n");
+	pr_warn("WARNING: inconsistent lock state\n");
 	print_kernel_ident();
-	printk("---------------------------------\n");
+	pr_warn("--------------------------------\n");
 
-	printk("inconsistent {%s} -> {%s} usage.\n",
+	pr_warn("inconsistent {%s} -> {%s} usage.\n",
 		usage_str[prev_bit], usage_str[new_bit]);
 
-	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
+	pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
 		curr->comm, task_pid_nr(curr),
 		trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
 		trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
@@ -2390,16 +2390,16 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
 		trace_softirqs_enabled(curr));
 	print_lock(this);
 
-	printk("{%s} state was registered at:\n", usage_str[prev_bit]);
+	pr_warn("{%s} state was registered at:\n", usage_str[prev_bit]);
 	print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1);
 
 	print_irqtrace_events(curr);
-	printk("\nother info that might help us debug this:\n");
+	pr_warn("\nother info that might help us debug this:\n");
 	print_usage_bug_scenario(this);
 
 	lockdep_print_held_locks(curr);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
 	return 0;
@@ -2438,28 +2438,28 @@ print_irq_inversion_bug(struct task_struct *curr,
 	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
-	printk("\n");
-	printk("=========================================================\n");
-	printk("[ INFO: possible irq lock inversion dependency detected ]\n");
+	pr_warn("\n");
+	pr_warn("========================================================\n");
+	pr_warn("WARNING: possible irq lock inversion dependency detected\n");
 	print_kernel_ident();
-	printk("---------------------------------------------------------\n");
-	printk("%s/%d just changed the state of lock:\n",
+	pr_warn("--------------------------------------------------------\n");
+	pr_warn("%s/%d just changed the state of lock:\n",
 		curr->comm, task_pid_nr(curr));
 	print_lock(this);
 	if (forwards)
-		printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
+		pr_warn("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
 	else
-		printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
+		pr_warn("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
 	print_lock_name(other->class);
-	printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
+	pr_warn("\n\nand interrupts could create inverse lock ordering between them.\n\n");
 
-	printk("\nother info that might help us debug this:\n");
+	pr_warn("\nother info that might help us debug this:\n");
 
 	/* Find a middle lock (if one exists) */
 	depth = get_lock_depth(other);
 	do {
 		if (depth == 0 && (entry != root)) {
-			printk("lockdep:%s bad path found in chain graph\n", __func__);
+			pr_warn("lockdep:%s bad path found in chain graph\n", __func__);
 			break;
 		}
 		middle = entry;
@@ -2475,12 +2475,12 @@ print_irq_inversion_bug(struct task_struct *curr,
 
 	lockdep_print_held_locks(curr);
 
-	printk("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
+	pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
 	if (!save_trace(&root->trace))
 		return 0;
 	print_shortest_lock_dependencies(other, root);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
 	return 0;
@@ -3189,25 +3189,25 @@ print_lock_nested_lock_not_held(struct task_struct *curr,
 	if (debug_locks_silent)
 		return 0;
 
-	printk("\n");
-	printk("==================================\n");
-	printk("[ BUG: Nested lock was not taken ]\n");
+	pr_warn("\n");
+	pr_warn("==================================\n");
+	pr_warn("WARNING: Nested lock was not taken\n");
 	print_kernel_ident();
-	printk("----------------------------------\n");
+	pr_warn("----------------------------------\n");
 
-	printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr));
+	pr_warn("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr));
 	print_lock(hlock);
 
-	printk("\nbut this task is not holding:\n");
-	printk("%s\n", hlock->nest_lock->name);
+	pr_warn("\nbut this task is not holding:\n");
+	pr_warn("%s\n", hlock->nest_lock->name);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
-	printk("\nother info that might help us debug this:\n");
+	pr_warn("\nother info that might help us debug this:\n");
 	lockdep_print_held_locks(curr);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
 	return 0;
@@ -3402,21 +3402,21 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
 	if (debug_locks_silent)
 		return 0;
 
-	printk("\n");
-	printk("=====================================\n");
-	printk("[ BUG: bad unlock balance detected! ]\n");
+	pr_warn("\n");
+	pr_warn("=====================================\n");
+	pr_warn("WARNING: bad unlock balance detected!\n");
 	print_kernel_ident();
-	printk("-------------------------------------\n");
-	printk("%s/%d is trying to release lock (",
+	pr_warn("-------------------------------------\n");
+	pr_warn("%s/%d is trying to release lock (",
 		curr->comm, task_pid_nr(curr));
 	print_lockdep_cache(lock);
-	printk(KERN_CONT ") at:\n");
+	pr_cont(") at:\n");
 	print_ip_sym(ip);
-	printk("but there are no more locks to release!\n");
-	printk("\nother info that might help us debug this:\n");
+	pr_warn("but there are no more locks to release!\n");
+	pr_warn("\nother info that might help us debug this:\n");
 	lockdep_print_held_locks(curr);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
 	return 0;
@@ -3974,21 +3974,21 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
 	if (debug_locks_silent)
 		return 0;
 
-	printk("\n");
-	printk("=================================\n");
-	printk("[ BUG: bad contention detected! ]\n");
+	pr_warn("\n");
+	pr_warn("=================================\n");
+	pr_warn("WARNING: bad contention detected!\n");
 	print_kernel_ident();
-	printk("---------------------------------\n");
-	printk("%s/%d is trying to contend lock (",
+	pr_warn("---------------------------------\n");
+	pr_warn("%s/%d is trying to contend lock (",
 		curr->comm, task_pid_nr(curr));
 	print_lockdep_cache(lock);
-	printk(KERN_CONT ") at:\n");
+	pr_cont(") at:\n");
 	print_ip_sym(ip);
-	printk("but there are no locks held!\n");
-	printk("\nother info that might help us debug this:\n");
+	pr_warn("but there are no locks held!\n");
+	pr_warn("\nother info that might help us debug this:\n");
 	lockdep_print_held_locks(curr);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
 	return 0;
@@ -4318,17 +4318,17 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
 	if (debug_locks_silent)
 		return;
 
-	printk("\n");
-	printk("=========================\n");
-	printk("[ BUG: held lock freed! ]\n");
+	pr_warn("\n");
+	pr_warn("=========================\n");
+	pr_warn("WARNING: held lock freed!\n");
 	print_kernel_ident();
-	printk("-------------------------\n");
-	printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
+	pr_warn("-------------------------\n");
+	pr_warn("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
 		curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
 	print_lock(hlock);
 	lockdep_print_held_locks(curr);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 }
 
@@ -4376,14 +4376,14 @@ static void print_held_locks_bug(void)
 	if (debug_locks_silent)
 		return;
 
-	printk("\n");
-	printk("=====================================\n");
-	printk("[ BUG: %s/%d still has locks held! ]\n",
+	pr_warn("\n");
+	pr_warn("====================================\n");
+	pr_warn("WARNING: %s/%d still has locks held!\n",
 	       current->comm, task_pid_nr(current));
 	print_kernel_ident();
-	printk("-------------------------------------\n");
+	pr_warn("------------------------------------\n");
 	lockdep_print_held_locks(current);
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 }
 
@@ -4402,10 +4402,10 @@ void debug_show_all_locks(void)
 	int unlock = 1;
 
 	if (unlikely(!debug_locks)) {
-		printk("INFO: lockdep is turned off.\n");
+		pr_warn("INFO: lockdep is turned off.\n");
 		return;
 	}
-	printk("\nShowing all locks held in the system:\n");
+	pr_warn("\nShowing all locks held in the system:\n");
 
 	/*
 	 * Here we try to get the tasklist_lock as hard as possible,
@@ -4416,18 +4416,18 @@ void debug_show_all_locks(void)
 retry:
 	if (!read_trylock(&tasklist_lock)) {
 		if (count == 10)
-			printk("hm, tasklist_lock locked, retrying... ");
+			pr_warn("hm, tasklist_lock locked, retrying... ");
 		if (count) {
 			count--;
-			printk(" #%d", 10-count);
+			pr_cont(" #%d", 10-count);
 			mdelay(200);
 			goto retry;
 		}
-		printk(" ignoring it.\n");
+		pr_cont(" ignoring it.\n");
 		unlock = 0;
 	} else {
 		if (count != 10)
-			printk(KERN_CONT " locked it.\n");
+			pr_cont(" locked it.\n");
 	}
 
 	do_each_thread(g, p) {
@@ -4445,8 +4445,8 @@ retry:
 				unlock = 1;
 	} while_each_thread(g, p);
 
-	printk("\n");
-	printk("=============================================\n\n");
+	pr_warn("\n");
+	pr_warn("=============================================\n\n");
 
 	if (unlock)
 		read_unlock(&tasklist_lock);
@@ -4475,12 +4475,12 @@ asmlinkage __visible void lockdep_sys_exit(void)
 	if (unlikely(curr->lockdep_depth)) {
 		if (!debug_locks_off())
 			return;
-		printk("\n");
-		printk("================================================\n");
-		printk("[ BUG: lock held when returning to user space! ]\n");
+		pr_warn("\n");
+		pr_warn("================================================\n");
+		pr_warn("WARNING: lock held when returning to user space!\n");
 		print_kernel_ident();
-		printk("------------------------------------------------\n");
-		printk("%s/%d is leaving the kernel with locks still held!\n",
+		pr_warn("------------------------------------------------\n");
+		pr_warn("%s/%d is leaving the kernel with locks still held!\n",
 				curr->comm, curr->pid);
 		lockdep_print_held_locks(curr);
 	}
@@ -4490,19 +4490,15 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
 {
 	struct task_struct *curr = current;
 
-#ifndef CONFIG_PROVE_RCU_REPEATEDLY
-	if (!debug_locks_off())
-		return;
-#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
 	/* Note: the following can be executed concurrently, so be careful. */
-	printk("\n");
-	pr_err("===============================\n");
-	pr_err("[ ERR: suspicious RCU usage.  ]\n");
+	pr_warn("\n");
+	pr_warn("=============================\n");
+	pr_warn("WARNING: suspicious RCU usage\n");
 	print_kernel_ident();
-	pr_err("-------------------------------\n");
-	pr_err("%s:%d %s!\n", file, line, s);
-	pr_err("\nother info that might help us debug this:\n\n");
-	pr_err("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
+	pr_warn("-----------------------------\n");
+	pr_warn("%s:%d %s!\n", file, line, s);
+	pr_warn("\nother info that might help us debug this:\n\n");
+	pr_warn("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
 	       !rcu_lockdep_current_cpu_online()
 			? "RCU used illegally from offline CPU!\n"
 			: !rcu_is_watching()
@@ -4529,10 +4525,10 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
 	 * rcu_read_lock_bh() and so on from extended quiescent states.
 	 */
 	if (!rcu_is_watching())
-		printk("RCU used illegally from extended quiescent state!\n");
+		pr_warn("RCU used illegally from extended quiescent state!\n");
 
 	lockdep_print_held_locks(curr);
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 }
 EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c
index 32fe775a2eaf..ac35e648b0e5 100644
--- a/kernel/locking/rtmutex-debug.c
+++ b/kernel/locking/rtmutex-debug.c
@@ -102,10 +102,11 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
 		return;
 	}
 
-	printk("\n============================================\n");
-	printk(  "[ BUG: circular locking deadlock detected! ]\n");
-	printk("%s\n", print_tainted());
-	printk(  "--------------------------------------------\n");
+	pr_warn("\n");
+	pr_warn("============================================\n");
+	pr_warn("WARNING: circular locking deadlock detected!\n");
+	pr_warn("%s\n", print_tainted());
+	pr_warn("--------------------------------------------\n");
 	printk("%s/%d is deadlocking current task %s/%d\n\n",
 	       task->comm, task_pid_nr(task),
 	       current->comm, task_pid_nr(current));
@@ -165,12 +166,16 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
 	memset(waiter, 0x22, sizeof(*waiter));
 }
 
-void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
+void debug_rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key)
 {
 	/*
 	 * Make sure we are not reinitializing a held lock:
 	 */
 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
 	lock->name = name;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	lockdep_init_map(&lock->dep_map, name, key, 0);
+#endif
 }
 
diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h
index b585af9a1b50..5078c6ddf4a5 100644
--- a/kernel/locking/rtmutex-debug.h
+++ b/kernel/locking/rtmutex-debug.h
@@ -11,7 +11,7 @@
 
 extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
 extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
-extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
+extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key);
 extern void debug_rt_mutex_lock(struct rt_mutex *lock);
 extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
 extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index b95509416909..78069895032a 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1481,6 +1481,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock)
 {
 	might_sleep();
 
+	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_lock);
@@ -1496,9 +1497,16 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
  */
 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
 {
+	int ret;
+
 	might_sleep();
 
-	return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
+	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
+	if (ret)
+		mutex_release(&lock->dep_map, 1, _RET_IP_);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
 
@@ -1526,11 +1534,18 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
 int
 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
 {
+	int ret;
+
 	might_sleep();
 
-	return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
+	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
 				       RT_MUTEX_MIN_CHAINWALK,
 				       rt_mutex_slowlock);
+	if (ret)
+		mutex_release(&lock->dep_map, 1, _RET_IP_);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
 
@@ -1547,10 +1562,16 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  */
 int __sched rt_mutex_trylock(struct rt_mutex *lock)
 {
+	int ret;
+
 	if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
 		return 0;
 
-	return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
+	ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
+	if (ret)
+		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
 
@@ -1561,6 +1582,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_trylock);
  */
 void __sched rt_mutex_unlock(struct rt_mutex *lock)
 {
+	mutex_release(&lock->dep_map, 1, _RET_IP_);
 	rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
@@ -1620,7 +1642,6 @@ void rt_mutex_destroy(struct rt_mutex *lock)
 	lock->magic = NULL;
 #endif
 }
-
 EXPORT_SYMBOL_GPL(rt_mutex_destroy);
 
 /**
@@ -1632,14 +1653,16 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
  *
  * Initializing of a locked rt lock is not allowed
  */
-void __rt_mutex_init(struct rt_mutex *lock, const char *name)
+void __rt_mutex_init(struct rt_mutex *lock, const char *name,
+		     struct lock_class_key *key)
 {
 	lock->owner = NULL;
 	raw_spin_lock_init(&lock->wait_lock);
 	lock->waiters = RB_ROOT;
 	lock->waiters_leftmost = NULL;
 
-	debug_rt_mutex_init(lock, name);
+	if (name && key)
+		debug_rt_mutex_init(lock, name, key);
 }
 EXPORT_SYMBOL_GPL(__rt_mutex_init);
 
@@ -1660,7 +1683,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
 				struct task_struct *proxy_owner)
 {
-	__rt_mutex_init(lock, NULL);
+	__rt_mutex_init(lock, NULL, NULL);
 	debug_rt_mutex_proxy_lock(lock, proxy_owner);
 	rt_mutex_set_owner(lock, proxy_owner);
 }
@@ -1785,12 +1808,14 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
 	int ret;
 
 	raw_spin_lock_irq(&lock->wait_lock);
-
-	set_current_state(TASK_INTERRUPTIBLE);
-
 	/* sleep on the mutex */
+	set_current_state(TASK_INTERRUPTIBLE);
 	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
-
+	/*
+	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+	 * have to fix that up.
+	 */
+	fixup_rt_mutex_waiters(lock);
 	raw_spin_unlock_irq(&lock->wait_lock);
 
 	return ret;
@@ -1822,15 +1847,25 @@ bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
 
 	raw_spin_lock_irq(&lock->wait_lock);
 	/*
+	 * Do an unconditional try-lock, this deals with the lock stealing
+	 * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()
+	 * sets a NULL owner.
+	 *
+	 * We're not interested in the return value, because the subsequent
+	 * test on rt_mutex_owner() will infer that. If the trylock succeeded,
+	 * we will own the lock and it will have removed the waiter. If we
+	 * failed the trylock, we're still not owner and we need to remove
+	 * ourselves.
+	 */
+	try_to_take_rt_mutex(lock, current, waiter);
+	/*
 	 * Unless we're the owner; we're still enqueued on the wait_list.
 	 * So check if we became owner, if not, take us off the wait_list.
 	 */
 	if (rt_mutex_owner(lock) != current) {
 		remove_waiter(lock, waiter);
-		fixup_rt_mutex_waiters(lock);
 		cleanup = true;
 	}
-
 	/*
 	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
 	 * have to fix that up.
diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h
index 6607802efa8b..5c253caffe91 100644
--- a/kernel/locking/rtmutex.h
+++ b/kernel/locking/rtmutex.h
@@ -17,7 +17,7 @@
 #define debug_rt_mutex_proxy_lock(l,p)			do { } while (0)
 #define debug_rt_mutex_proxy_unlock(l)			do { } while (0)
 #define debug_rt_mutex_unlock(l)			do { } while (0)
-#define debug_rt_mutex_init(m, n)			do { } while (0)
+#define debug_rt_mutex_init(m, n, k)			do { } while (0)
 #define debug_rt_mutex_deadlock(d, a ,l)		do { } while (0)
 #define debug_rt_mutex_print_deadlock(w)		do { } while (0)
 #define debug_rt_mutex_reset_waiter(w)			do { } while (0)
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index d1f3e9f558b8..74a5a7255b4d 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -277,7 +277,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 	 * if reparented.
 	 */
 	for (;;) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
+		set_current_state(TASK_INTERRUPTIBLE);
 		if (pid_ns->nr_hashed == init_pids)
 			break;
 		schedule();
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3b1e0f3ad07f..fa46606f3356 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1425,7 +1425,7 @@ static unsigned int nr_meta_pages;
  * Numbers of normal and highmem page frames allocated for hibernation image
  * before suspending devices.
  */
-unsigned int alloc_normal, alloc_highmem;
+static unsigned int alloc_normal, alloc_highmem;
 /*
  * Memory bitmap used for marking saveable pages (during hibernation) or
  * hibernation image pages (during restore)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f80fd33639e0..57d22571f306 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -225,14 +225,14 @@ static struct block_device *hib_resume_bdev;
 struct hib_bio_batch {
 	atomic_t		count;
 	wait_queue_head_t	wait;
-	int			error;
+	blk_status_t		error;
 };
 
 static void hib_init_batch(struct hib_bio_batch *hb)
 {
 	atomic_set(&hb->count, 0);
 	init_waitqueue_head(&hb->wait);
-	hb->error = 0;
+	hb->error = BLK_STS_OK;
 }
 
 static void hib_end_io(struct bio *bio)
@@ -240,7 +240,7 @@ static void hib_end_io(struct bio *bio)
 	struct hib_bio_batch *hb = bio->bi_private;
 	struct page *page = bio->bi_io_vec[0].bv_page;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
 				imajor(bio->bi_bdev->bd_inode),
 				iminor(bio->bi_bdev->bd_inode),
@@ -253,8 +253,8 @@ static void hib_end_io(struct bio *bio)
 		flush_icache_range((unsigned long)page_address(page),
 				   (unsigned long)page_address(page) + PAGE_SIZE);
 
-	if (bio->bi_error && !hb->error)
-		hb->error = bio->bi_error;
+	if (bio->bi_status && !hb->error)
+		hb->error = bio->bi_status;
 	if (atomic_dec_and_test(&hb->count))
 		wake_up(&hb->wait);
 
@@ -293,10 +293,10 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
 	return error;
 }
 
-static int hib_wait_io(struct hib_bio_batch *hb)
+static blk_status_t hib_wait_io(struct hib_bio_batch *hb)
 {
 	wait_event(hb->wait, atomic_read(&hb->count) == 0);
-	return hb->error;
+	return blk_status_to_errno(hb->error);
 }
 
 /*
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index a1aecf44ab07..bd53ea579dc8 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -269,7 +269,6 @@ static struct console *exclusive_console;
 #define MAX_CMDLINECONSOLES 8
 
 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
-static int console_cmdline_cnt;
 
 static int preferred_console = -1;
 int console_set_on_cmdline;
@@ -1176,7 +1175,7 @@ static void boot_delay_msec(int level)
 	unsigned long long k;
 	unsigned long timeout;
 
-	if ((boot_delay == 0 || system_state != SYSTEM_BOOTING)
+	if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING)
 		|| suppress_message_printing(level)) {
 		return;
 	}
@@ -1906,25 +1905,12 @@ static int __add_preferred_console(char *name, int idx, char *options,
 	 *	See if this tty is not yet registered, and
 	 *	if we have a slot free.
 	 */
-	for (i = 0, c = console_cmdline; i < console_cmdline_cnt; i++, c++) {
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++) {
 		if (strcmp(c->name, name) == 0 && c->index == idx) {
-			if (brl_options)
-				return 0;
-
-			/*
-			 * Maintain an invariant that will help to find if
-			 * the matching console is preferred, see
-			 * register_console():
-			 *
-			 * The last non-braille console is always
-			 * the preferred one.
-			 */
-			if (i != console_cmdline_cnt - 1)
-				swap(console_cmdline[i],
-				     console_cmdline[console_cmdline_cnt - 1]);
-
-			preferred_console = console_cmdline_cnt - 1;
-
+			if (!brl_options)
+				preferred_console = i;
 			return 0;
 		}
 	}
@@ -1937,7 +1923,6 @@ static int __add_preferred_console(char *name, int idx, char *options,
 	braille_set_options(c, brl_options);
 
 	c->index = idx;
-	console_cmdline_cnt++;
 	return 0;
 }
 /*
@@ -2477,23 +2462,12 @@ void register_console(struct console *newcon)
 	}
 
 	/*
-	 * See if this console matches one we selected on the command line.
-	 *
-	 * There may be several entries in the console_cmdline array matching
-	 * with the same console, one with newcon->match(), another by
-	 * name/index:
-	 *
-	 *	pl011,mmio,0x87e024000000,115200 -- added from SPCR
-	 *	ttyAMA0 -- added from command line
-	 *
-	 * Traverse the console_cmdline array in reverse order to be
-	 * sure that if this console is preferred then it will be the first
-	 * matching entry.  We use the invariant that is maintained in
-	 * __add_preferred_console().
+	 *	See if this console matches one we selected on
+	 *	the command line.
 	 */
-	for (i = console_cmdline_cnt - 1; i >= 0; i--) {
-		c = console_cmdline + i;
-
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++) {
 		if (!newcon->match ||
 		    newcon->match(newcon, c->name, c->index, c->options) != 0) {
 			/* default matching */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 266ddcc1d8bb..60f356d91060 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -60,19 +60,25 @@ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
 }
 
 
+void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
+		   const struct cred *ptracer_cred)
+{
+	BUG_ON(!list_empty(&child->ptrace_entry));
+	list_add(&child->ptrace_entry, &new_parent->ptraced);
+	child->parent = new_parent;
+	child->ptracer_cred = get_cred(ptracer_cred);
+}
+
 /*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
  *
  * Must be called with the tasklist lock write-held.
  */
-void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
+static void ptrace_link(struct task_struct *child, struct task_struct *new_parent)
 {
-	BUG_ON(!list_empty(&child->ptrace_entry));
-	list_add(&child->ptrace_entry, &new_parent->ptraced);
-	child->parent = new_parent;
 	rcu_read_lock();
-	child->ptracer_cred = get_cred(__task_cred(new_parent));
+	__ptrace_link(child, new_parent, __task_cred(new_parent));
 	rcu_read_unlock();
 }
 
@@ -386,7 +392,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 		flags |= PT_SEIZED;
 	task->ptrace = flags;
 
-	__ptrace_link(task, current);
+	ptrace_link(task, current);
 
 	/* SEIZE doesn't trap tracee on attach */
 	if (!seize)
@@ -459,7 +465,7 @@ static int ptrace_traceme(void)
 		 */
 		if (!ret && !(current->real_parent->flags & PF_EXITING)) {
 			current->ptrace = PT_PTRACED;
-			__ptrace_link(current, current->real_parent);
+			ptrace_link(current, current->real_parent);
 		}
 	}
 	write_unlock_irq(&tasklist_lock);
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
new file mode 100644
index 000000000000..be90c945063f
--- /dev/null
+++ b/kernel/rcu/Kconfig
@@ -0,0 +1,242 @@
+#
+# RCU-related configuration options
+#
+
+menu "RCU Subsystem"
+
+config TREE_RCU
+	bool
+	default y if !PREEMPT && SMP
+	help
+	  This option selects the RCU implementation that is
+	  designed for very large SMP system with hundreds or
+	  thousands of CPUs.  It also scales down nicely to
+	  smaller systems.
+
+config PREEMPT_RCU
+	bool
+	default y if PREEMPT
+	help
+	  This option selects the RCU implementation that is
+	  designed for very large SMP systems with hundreds or
+	  thousands of CPUs, but for which real-time response
+	  is also required.  It also scales down nicely to
+	  smaller systems.
+
+	  Select this option if you are unsure.
+
+config TINY_RCU
+	bool
+	default y if !PREEMPT && !SMP
+	help
+	  This option selects the RCU implementation that is
+	  designed for UP systems from which real-time response
+	  is not required.  This option greatly reduces the
+	  memory footprint of RCU.
+
+config RCU_EXPERT
+	bool "Make expert-level adjustments to RCU configuration"
+	default n
+	help
+	  This option needs to be enabled if you wish to make
+	  expert-level adjustments to RCU configuration.  By default,
+	  no such adjustments can be made, which has the often-beneficial
+	  side-effect of preventing "make oldconfig" from asking you all
+	  sorts of detailed questions about how you would like numerous
+	  obscure RCU options to be set up.
+
+	  Say Y if you need to make expert-level adjustments to RCU.
+
+	  Say N if you are unsure.
+
+config SRCU
+	bool
+	help
+	  This option selects the sleepable version of RCU. This version
+	  permits arbitrary sleeping or blocking within RCU read-side critical
+	  sections.
+
+config TINY_SRCU
+	bool
+	default y if SRCU && TINY_RCU
+	help
+	  This option selects the single-CPU non-preemptible version of SRCU.
+
+config TREE_SRCU
+	bool
+	default y if SRCU && !TINY_RCU
+	help
+	  This option selects the full-fledged version of SRCU.
+
+config TASKS_RCU
+	bool
+	default n
+	select SRCU
+	help
+	  This option enables a task-based RCU implementation that uses
+	  only voluntary context switch (not preemption!), idle, and
+	  user-mode execution as quiescent states.
+
+config RCU_STALL_COMMON
+	def_bool ( TREE_RCU || PREEMPT_RCU )
+	help
+	  This option enables RCU CPU stall code that is common between
+	  the TINY and TREE variants of RCU.  The purpose is to allow
+	  the tiny variants to disable RCU CPU stall warnings, while
+	  making these warnings mandatory for the tree variants.
+
+config RCU_NEED_SEGCBLIST
+	def_bool ( TREE_RCU || PREEMPT_RCU || TREE_SRCU )
+
+config CONTEXT_TRACKING
+       bool
+
+config CONTEXT_TRACKING_FORCE
+	bool "Force context tracking"
+	depends on CONTEXT_TRACKING
+	default y if !NO_HZ_FULL
+	help
+	  The major pre-requirement for full dynticks to work is to
+	  support the context tracking subsystem. But there are also
+	  other dependencies to provide in order to make the full
+	  dynticks working.
+
+	  This option stands for testing when an arch implements the
+	  context tracking backend but doesn't yet fullfill all the
+	  requirements to make the full dynticks feature working.
+	  Without the full dynticks, there is no way to test the support
+	  for context tracking and the subsystems that rely on it: RCU
+	  userspace extended quiescent state and tickless cputime
+	  accounting. This option copes with the absence of the full
+	  dynticks subsystem by forcing the context tracking on all
+	  CPUs in the system.
+
+	  Say Y only if you're working on the development of an
+	  architecture backend for the context tracking.
+
+	  Say N otherwise, this option brings an overhead that you
+	  don't want in production.
+
+
+config RCU_FANOUT
+	int "Tree-based hierarchical RCU fanout value"
+	range 2 64 if 64BIT
+	range 2 32 if !64BIT
+	depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
+	default 64 if 64BIT
+	default 32 if !64BIT
+	help
+	  This option controls the fanout of hierarchical implementations
+	  of RCU, allowing RCU to work efficiently on machines with
+	  large numbers of CPUs.  This value must be at least the fourth
+	  root of NR_CPUS, which allows NR_CPUS to be insanely large.
+	  The default value of RCU_FANOUT should be used for production
+	  systems, but if you are stress-testing the RCU implementation
+	  itself, small RCU_FANOUT values allow you to test large-system
+	  code paths on small(er) systems.
+
+	  Select a specific number if testing RCU itself.
+	  Take the default if unsure.
+
+config RCU_FANOUT_LEAF
+	int "Tree-based hierarchical RCU leaf-level fanout value"
+	range 2 64 if 64BIT
+	range 2 32 if !64BIT
+	depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
+	default 16
+	help
+	  This option controls the leaf-level fanout of hierarchical
+	  implementations of RCU, and allows trading off cache misses
+	  against lock contention.  Systems that synchronize their
+	  scheduling-clock interrupts for energy-efficiency reasons will
+	  want the default because the smaller leaf-level fanout keeps
+	  lock contention levels acceptably low.  Very large systems
+	  (hundreds or thousands of CPUs) will instead want to set this
+	  value to the maximum value possible in order to reduce the
+	  number of cache misses incurred during RCU's grace-period
+	  initialization.  These systems tend to run CPU-bound, and thus
+	  are not helped by synchronized interrupts, and thus tend to
+	  skew them, which reduces lock contention enough that large
+	  leaf-level fanouts work well.  That said, setting leaf-level
+	  fanout to a large number will likely cause problematic
+	  lock contention on the leaf-level rcu_node structures unless
+	  you boot with the skew_tick kernel parameter.
+
+	  Select a specific number if testing RCU itself.
+
+	  Select the maximum permissible value for large systems, but
+	  please understand that you may also need to set the skew_tick
+	  kernel boot parameter to avoid contention on the rcu_node
+	  structure's locks.
+
+	  Take the default if unsure.
+
+config RCU_FAST_NO_HZ
+	bool "Accelerate last non-dyntick-idle CPU's grace periods"
+	depends on NO_HZ_COMMON && SMP && RCU_EXPERT
+	default n
+	help
+	  This option permits CPUs to enter dynticks-idle state even if
+	  they have RCU callbacks queued, and prevents RCU from waking
+	  these CPUs up more than roughly once every four jiffies (by
+	  default, you can adjust this using the rcutree.rcu_idle_gp_delay
+	  parameter), thus improving energy efficiency.  On the other
+	  hand, this option increases the duration of RCU grace periods,
+	  for example, slowing down synchronize_rcu().
+
+	  Say Y if energy efficiency is critically important, and you
+	  	don't care about increased grace-period durations.
+
+	  Say N if you are unsure.
+
+config RCU_BOOST
+	bool "Enable RCU priority boosting"
+	depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
+	default n
+	help
+	  This option boosts the priority of preempted RCU readers that
+	  block the current preemptible RCU grace period for too long.
+	  This option also prevents heavy loads from blocking RCU
+	  callback invocation for all flavors of RCU.
+
+	  Say Y here if you are working with real-time apps or heavy loads
+	  Say N here if you are unsure.
+
+config RCU_BOOST_DELAY
+	int "Milliseconds to delay boosting after RCU grace-period start"
+	range 0 3000
+	depends on RCU_BOOST
+	default 500
+	help
+	  This option specifies the time to wait after the beginning of
+	  a given grace period before priority-boosting preempted RCU
+	  readers blocking that grace period.  Note that any RCU reader
+	  blocking an expedited RCU grace period is boosted immediately.
+
+	  Accept the default if unsure.
+
+config RCU_NOCB_CPU
+	bool "Offload RCU callback processing from boot-selected CPUs"
+	depends on TREE_RCU || PREEMPT_RCU
+	depends on RCU_EXPERT || NO_HZ_FULL
+	default n
+	help
+	  Use this option to reduce OS jitter for aggressive HPC or
+	  real-time workloads.	It can also be used to offload RCU
+	  callback invocation to energy-efficient CPUs in battery-powered
+	  asymmetric multiprocessors.
+
+	  This option offloads callback invocation from the set of
+	  CPUs specified at boot time by the rcu_nocbs parameter.
+	  For each such CPU, a kthread ("rcuox/N") will be created to
+	  invoke callbacks, where the "N" is the CPU being offloaded,
+	  and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and
+	  "s" for RCU-sched.  Nothing prevents this kthread from running
+	  on the specified CPUs, but (1) the kthreads may be preempted
+	  between each callback, and (2) affinity or cgroups can be used
+	  to force the kthreads to run on whatever set of CPUs is desired.
+
+	  Say Y here if you want to help to debug reduced OS jitter.
+	  Say N here if you are unsure.
+
+endmenu # "RCU Subsystem"
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
new file mode 100644
index 000000000000..0ec7d1d33a14
--- /dev/null
+++ b/kernel/rcu/Kconfig.debug
@@ -0,0 +1,82 @@
+#
+# RCU-related debugging configuration options
+#
+
+menu "RCU Debugging"
+
+config PROVE_RCU
+	def_bool PROVE_LOCKING
+
+config TORTURE_TEST
+	tristate
+	default n
+
+config RCU_PERF_TEST
+	tristate "performance tests for RCU"
+	depends on DEBUG_KERNEL
+	select TORTURE_TEST
+	select SRCU
+	select TASKS_RCU
+	default n
+	help
+	  This option provides a kernel module that runs performance
+	  tests on the RCU infrastructure.  The kernel module may be built
+	  after the fact on the running kernel to be tested, if desired.
+
+	  Say Y here if you want RCU performance tests to be built into
+	  the kernel.
+	  Say M if you want the RCU performance tests to build as a module.
+	  Say N if you are unsure.
+
+config RCU_TORTURE_TEST
+	tristate "torture tests for RCU"
+	depends on DEBUG_KERNEL
+	select TORTURE_TEST
+	select SRCU
+	select TASKS_RCU
+	default n
+	help
+	  This option provides a kernel module that runs torture tests
+	  on the RCU infrastructure.  The kernel module may be built
+	  after the fact on the running kernel to be tested, if desired.
+
+	  Say Y here if you want RCU torture tests to be built into
+	  the kernel.
+	  Say M if you want the RCU torture tests to build as a module.
+	  Say N if you are unsure.
+
+config RCU_CPU_STALL_TIMEOUT
+	int "RCU CPU stall timeout in seconds"
+	depends on RCU_STALL_COMMON
+	range 3 300
+	default 21
+	help
+	  If a given RCU grace period extends more than the specified
+	  number of seconds, a CPU stall warning is printed.  If the
+	  RCU grace period persists, additional CPU stall warnings are
+	  printed at more widely spaced intervals.
+
+config RCU_TRACE
+	bool "Enable tracing for RCU"
+	depends on DEBUG_KERNEL
+	default y if TREE_RCU
+	select TRACE_CLOCK
+	help
+	  This option enables additional tracepoints for ftrace-style
+	  event tracing.
+
+	  Say Y here if you want to enable RCU tracing
+	  Say N if you are unsure.
+
+config RCU_EQS_DEBUG
+	bool "Provide debugging asserts for adding NO_HZ support to an arch"
+	depends on DEBUG_KERNEL
+	help
+	  This option provides consistency checks in RCU's handling of
+	  NO_HZ.  These checks have proven quite helpful in detecting
+	  bugs in arch-specific NO_HZ code.
+
+	  Say N here if you need ultimate kernel/user switch latencies
+	  Say Y if you are unsure
+
+endmenu # "RCU Debugging"
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 18dfc485225c..13c0fc852767 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -3,10 +3,11 @@
 KCOV_INSTRUMENT := n
 
 obj-y += update.o sync.o
-obj-$(CONFIG_SRCU) += srcu.o
+obj-$(CONFIG_TREE_SRCU) += srcutree.o
+obj-$(CONFIG_TINY_SRCU) += srcutiny.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o
 obj-$(CONFIG_TREE_RCU) += tree.o
 obj-$(CONFIG_PREEMPT_RCU) += tree.o
-obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o
 obj-$(CONFIG_TINY_RCU) += tiny.o
+obj-$(CONFIG_RCU_NEED_SEGCBLIST) += rcu_segcblist.o
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 0d6ff3e471be..808b8c85f626 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -56,6 +56,83 @@
 #define DYNTICK_TASK_EXIT_IDLE	   (DYNTICK_TASK_NEST_VALUE + \
 				    DYNTICK_TASK_FLAG)
 
+
+/*
+ * Grace-period counter management.
+ */
+
+#define RCU_SEQ_CTR_SHIFT	2
+#define RCU_SEQ_STATE_MASK	((1 << RCU_SEQ_CTR_SHIFT) - 1)
+
+/*
+ * Return the counter portion of a sequence number previously returned
+ * by rcu_seq_snap() or rcu_seq_current().
+ */
+static inline unsigned long rcu_seq_ctr(unsigned long s)
+{
+	return s >> RCU_SEQ_CTR_SHIFT;
+}
+
+/*
+ * Return the state portion of a sequence number previously returned
+ * by rcu_seq_snap() or rcu_seq_current().
+ */
+static inline int rcu_seq_state(unsigned long s)
+{
+	return s & RCU_SEQ_STATE_MASK;
+}
+
+/*
+ * Set the state portion of the pointed-to sequence number.
+ * The caller is responsible for preventing conflicting updates.
+ */
+static inline void rcu_seq_set_state(unsigned long *sp, int newstate)
+{
+	WARN_ON_ONCE(newstate & ~RCU_SEQ_STATE_MASK);
+	WRITE_ONCE(*sp, (*sp & ~RCU_SEQ_STATE_MASK) + newstate);
+}
+
+/* Adjust sequence number for start of update-side operation. */
+static inline void rcu_seq_start(unsigned long *sp)
+{
+	WRITE_ONCE(*sp, *sp + 1);
+	smp_mb(); /* Ensure update-side operation after counter increment. */
+	WARN_ON_ONCE(rcu_seq_state(*sp) != 1);
+}
+
+/* Adjust sequence number for end of update-side operation. */
+static inline void rcu_seq_end(unsigned long *sp)
+{
+	smp_mb(); /* Ensure update-side operation before counter increment. */
+	WARN_ON_ONCE(!rcu_seq_state(*sp));
+	WRITE_ONCE(*sp, (*sp | RCU_SEQ_STATE_MASK) + 1);
+}
+
+/* Take a snapshot of the update side's sequence number. */
+static inline unsigned long rcu_seq_snap(unsigned long *sp)
+{
+	unsigned long s;
+
+	s = (READ_ONCE(*sp) + 2 * RCU_SEQ_STATE_MASK + 1) & ~RCU_SEQ_STATE_MASK;
+	smp_mb(); /* Above access must not bleed into critical section. */
+	return s;
+}
+
+/* Return the current value the update side's sequence number, no ordering. */
+static inline unsigned long rcu_seq_current(unsigned long *sp)
+{
+	return READ_ONCE(*sp);
+}
+
+/*
+ * Given a snapshot from rcu_seq_snap(), determine whether or not a
+ * full update-side operation has occurred.
+ */
+static inline bool rcu_seq_done(unsigned long *sp, unsigned long s)
+{
+	return ULONG_CMP_GE(READ_ONCE(*sp), s);
+}
+
 /*
  * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
  * by call_rcu() and rcu callback execution, and are therefore not part of the
@@ -109,12 +186,12 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
 
 	rcu_lock_acquire(&rcu_callback_map);
 	if (__is_kfree_rcu_offset(offset)) {
-		RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
+		RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset);)
 		kfree((void *)head - offset);
 		rcu_lock_release(&rcu_callback_map);
 		return true;
 	} else {
-		RCU_TRACE(trace_rcu_invoke_callback(rn, head));
+		RCU_TRACE(trace_rcu_invoke_callback(rn, head);)
 		head->func(head);
 		rcu_lock_release(&rcu_callback_map);
 		return false;
@@ -135,6 +212,18 @@ int rcu_jiffies_till_stall_check(void);
  */
 #define TPS(x)  tracepoint_string(x)
 
+/*
+ * Dump the ftrace buffer, but only one time per callsite per boot.
+ */
+#define rcu_ftrace_dump(oops_dump_mode) \
+do { \
+	static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
+	\
+	if (!atomic_read(&___rfd_beenhere) && \
+	    !atomic_xchg(&___rfd_beenhere, 1)) \
+		ftrace_dump(oops_dump_mode); \
+} while (0)
+
 void rcu_early_boot_tests(void);
 void rcu_test_sync_prims(void);
 
@@ -144,4 +233,341 @@ void rcu_test_sync_prims(void);
  */
 extern void resched_cpu(int cpu);
 
+#if defined(SRCU) || !defined(TINY_RCU)
+
+#include <linux/rcu_node_tree.h>
+
+extern int rcu_num_lvls;
+extern int num_rcu_lvl[];
+extern int rcu_num_nodes;
+static bool rcu_fanout_exact;
+static int rcu_fanout_leaf;
+
+/*
+ * Compute the per-level fanout, either using the exact fanout specified
+ * or balancing the tree, depending on the rcu_fanout_exact boot parameter.
+ */
+static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
+{
+	int i;
+
+	if (rcu_fanout_exact) {
+		levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
+		for (i = rcu_num_lvls - 2; i >= 0; i--)
+			levelspread[i] = RCU_FANOUT;
+	} else {
+		int ccur;
+		int cprv;
+
+		cprv = nr_cpu_ids;
+		for (i = rcu_num_lvls - 1; i >= 0; i--) {
+			ccur = levelcnt[i];
+			levelspread[i] = (cprv + ccur - 1) / ccur;
+			cprv = ccur;
+		}
+	}
+}
+
+/*
+ * Do a full breadth-first scan of the rcu_node structures for the
+ * specified rcu_state structure.
+ */
+#define rcu_for_each_node_breadth_first(rsp, rnp) \
+	for ((rnp) = &(rsp)->node[0]; \
+	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
+
+/*
+ * Do a breadth-first scan of the non-leaf rcu_node structures for the
+ * specified rcu_state structure.  Note that if there is a singleton
+ * rcu_node tree with but one rcu_node structure, this loop is a no-op.
+ */
+#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
+	for ((rnp) = &(rsp)->node[0]; \
+	     (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
+
+/*
+ * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
+ * structure.  Note that if there is a singleton rcu_node tree with but
+ * one rcu_node structure, this loop -will- visit the rcu_node structure.
+ * It is still a leaf node, even if it is also the root node.
+ */
+#define rcu_for_each_leaf_node(rsp, rnp) \
+	for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
+	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
+
+/*
+ * Iterate over all possible CPUs in a leaf RCU node.
+ */
+#define for_each_leaf_node_possible_cpu(rnp, cpu) \
+	for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \
+	     cpu <= rnp->grphi; \
+	     cpu = cpumask_next((cpu), cpu_possible_mask))
+
+/*
+ * Wrappers for the rcu_node::lock acquire and release.
+ *
+ * Because the rcu_nodes form a tree, the tree traversal locking will observe
+ * different lock values, this in turn means that an UNLOCK of one level
+ * followed by a LOCK of another level does not imply a full memory barrier;
+ * and most importantly transitivity is lost.
+ *
+ * In order to restore full ordering between tree levels, augment the regular
+ * lock acquire functions with smp_mb__after_unlock_lock().
+ *
+ * As ->lock of struct rcu_node is a __private field, therefore one should use
+ * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock.
+ */
+#define raw_spin_lock_rcu_node(p)					\
+do {									\
+	raw_spin_lock(&ACCESS_PRIVATE(p, lock));			\
+	smp_mb__after_unlock_lock();					\
+} while (0)
+
+#define raw_spin_unlock_rcu_node(p) raw_spin_unlock(&ACCESS_PRIVATE(p, lock))
+
+#define raw_spin_lock_irq_rcu_node(p)					\
+do {									\
+	raw_spin_lock_irq(&ACCESS_PRIVATE(p, lock));			\
+	smp_mb__after_unlock_lock();					\
+} while (0)
+
+#define raw_spin_unlock_irq_rcu_node(p)					\
+	raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
+
+#define raw_spin_lock_irqsave_rcu_node(p, flags)			\
+do {									\
+	raw_spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags);	\
+	smp_mb__after_unlock_lock();					\
+} while (0)
+
+#define raw_spin_unlock_irqrestore_rcu_node(p, flags)			\
+	raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)	\
+
+#define raw_spin_trylock_rcu_node(p)					\
+({									\
+	bool ___locked = raw_spin_trylock(&ACCESS_PRIVATE(p, lock));	\
+									\
+	if (___locked)							\
+		smp_mb__after_unlock_lock();				\
+	___locked;							\
+})
+
+#endif /* #if defined(SRCU) || !defined(TINY_RCU) */
+
+#ifdef CONFIG_TINY_RCU
+/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
+static inline bool rcu_gp_is_normal(void)  /* Internal RCU use. */
+{
+	return true;
+}
+static inline bool rcu_gp_is_expedited(void)  /* Internal RCU use. */
+{
+	return false;
+}
+
+static inline void rcu_expedite_gp(void)
+{
+}
+
+static inline void rcu_unexpedite_gp(void)
+{
+}
+#else /* #ifdef CONFIG_TINY_RCU */
+bool rcu_gp_is_normal(void);     /* Internal RCU use. */
+bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
+void rcu_expedite_gp(void);
+void rcu_unexpedite_gp(void);
+void rcupdate_announce_bootup_oddness(void);
+#endif /* #else #ifdef CONFIG_TINY_RCU */
+
+#define RCU_SCHEDULER_INACTIVE	0
+#define RCU_SCHEDULER_INIT	1
+#define RCU_SCHEDULER_RUNNING	2
+
+#ifdef CONFIG_TINY_RCU
+static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
+#else /* #ifdef CONFIG_TINY_RCU */
+void rcu_request_urgent_qs_task(struct task_struct *t);
+#endif /* #else #ifdef CONFIG_TINY_RCU */
+
+enum rcutorture_type {
+	RCU_FLAVOR,
+	RCU_BH_FLAVOR,
+	RCU_SCHED_FLAVOR,
+	RCU_TASKS_FLAVOR,
+	SRCU_FLAVOR,
+	INVALID_RCU_FLAVOR
+};
+
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
+void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
+			    unsigned long *gpnum, unsigned long *completed);
+void rcutorture_record_test_transition(void);
+void rcutorture_record_progress(unsigned long vernum);
+void do_trace_rcu_torture_read(const char *rcutorturename,
+			       struct rcu_head *rhp,
+			       unsigned long secs,
+			       unsigned long c_old,
+			       unsigned long c);
+#else
+static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
+					  int *flags,
+					  unsigned long *gpnum,
+					  unsigned long *completed)
+{
+	*flags = 0;
+	*gpnum = 0;
+	*completed = 0;
+}
+static inline void rcutorture_record_test_transition(void)
+{
+}
+static inline void rcutorture_record_progress(unsigned long vernum)
+{
+}
+#ifdef CONFIG_RCU_TRACE
+void do_trace_rcu_torture_read(const char *rcutorturename,
+			       struct rcu_head *rhp,
+			       unsigned long secs,
+			       unsigned long c_old,
+			       unsigned long c);
+#else
+#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+	do { } while (0)
+#endif
+#endif
+
+#ifdef CONFIG_TINY_SRCU
+
+static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
+					   struct srcu_struct *sp, int *flags,
+					   unsigned long *gpnum,
+					   unsigned long *completed)
+{
+	if (test_type != SRCU_FLAVOR)
+		return;
+	*flags = 0;
+	*completed = sp->srcu_idx;
+	*gpnum = *completed;
+}
+
+#elif defined(CONFIG_TREE_SRCU)
+
+void srcutorture_get_gp_data(enum rcutorture_type test_type,
+			     struct srcu_struct *sp, int *flags,
+			     unsigned long *gpnum, unsigned long *completed);
+
+#endif
+
+#ifdef CONFIG_TINY_RCU
+
+/*
+ * Return the number of grace periods started.
+ */
+static inline unsigned long rcu_batches_started(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of bottom-half grace periods started.
+ */
+static inline unsigned long rcu_batches_started_bh(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of sched grace periods started.
+ */
+static inline unsigned long rcu_batches_started_sched(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of grace periods completed.
+ */
+static inline unsigned long rcu_batches_completed(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of bottom-half grace periods completed.
+ */
+static inline unsigned long rcu_batches_completed_bh(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of sched grace periods completed.
+ */
+static inline unsigned long rcu_batches_completed_sched(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of expedited grace periods completed.
+ */
+static inline unsigned long rcu_exp_batches_completed(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of expedited sched grace periods completed.
+ */
+static inline unsigned long rcu_exp_batches_completed_sched(void)
+{
+	return 0;
+}
+
+static inline unsigned long srcu_batches_completed(struct srcu_struct *sp)
+{
+	return 0;
+}
+
+static inline void rcu_force_quiescent_state(void)
+{
+}
+
+static inline void rcu_bh_force_quiescent_state(void)
+{
+}
+
+static inline void rcu_sched_force_quiescent_state(void)
+{
+}
+
+static inline void show_rcu_gp_kthreads(void)
+{
+}
+
+#else /* #ifdef CONFIG_TINY_RCU */
+extern unsigned long rcutorture_testseq;
+extern unsigned long rcutorture_vernum;
+unsigned long rcu_batches_started(void);
+unsigned long rcu_batches_started_bh(void);
+unsigned long rcu_batches_started_sched(void);
+unsigned long rcu_batches_completed(void);
+unsigned long rcu_batches_completed_bh(void);
+unsigned long rcu_batches_completed_sched(void);
+unsigned long rcu_exp_batches_completed(void);
+unsigned long rcu_exp_batches_completed_sched(void);
+unsigned long srcu_batches_completed(struct srcu_struct *sp);
+void show_rcu_gp_kthreads(void);
+void rcu_force_quiescent_state(void);
+void rcu_bh_force_quiescent_state(void);
+void rcu_sched_force_quiescent_state(void);
+#endif /* #else #ifdef CONFIG_TINY_RCU */
+
+#ifdef CONFIG_RCU_NOCB_CPU
+bool rcu_is_nocb_cpu(int cpu);
+#else
+static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
+#endif
+
 #endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
new file mode 100644
index 000000000000..2b62a38b080f
--- /dev/null
+++ b/kernel/rcu/rcu_segcblist.c
@@ -0,0 +1,505 @@
+/*
+ * RCU segmented callback lists, function definitions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright IBM Corporation, 2017
+ *
+ * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+
+#include "rcu_segcblist.h"
+
+/* Initialize simple callback list. */
+void rcu_cblist_init(struct rcu_cblist *rclp)
+{
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+	rclp->len = 0;
+	rclp->len_lazy = 0;
+}
+
+/*
+ * Debug function to actually count the number of callbacks.
+ * If the number exceeds the limit specified, return -1.
+ */
+long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
+{
+	int cnt = 0;
+	struct rcu_head **rhpp = &rclp->head;
+
+	for (;;) {
+		if (!*rhpp)
+			return cnt;
+		if (++cnt > lim)
+			return -1;
+		rhpp = &(*rhpp)->next;
+	}
+}
+
+/*
+ * Dequeue the oldest rcu_head structure from the specified callback
+ * list.  This function assumes that the callback is non-lazy, but
+ * the caller can later invoke rcu_cblist_dequeued_lazy() if it
+ * finds otherwise (and if it cares about laziness).  This allows
+ * different users to have different ways of determining laziness.
+ */
+struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
+{
+	struct rcu_head *rhp;
+
+	rhp = rclp->head;
+	if (!rhp)
+		return NULL;
+	rclp->len--;
+	rclp->head = rhp->next;
+	if (!rclp->head)
+		rclp->tail = &rclp->head;
+	return rhp;
+}
+
+/*
+ * Initialize an rcu_segcblist structure.
+ */
+void rcu_segcblist_init(struct rcu_segcblist *rsclp)
+{
+	int i;
+
+	BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
+	BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
+	rsclp->head = NULL;
+	for (i = 0; i < RCU_CBLIST_NSEGS; i++)
+		rsclp->tails[i] = &rsclp->head;
+	rsclp->len = 0;
+	rsclp->len_lazy = 0;
+}
+
+/*
+ * Disable the specified rcu_segcblist structure, so that callbacks can
+ * no longer be posted to it.  This structure must be empty.
+ */
+void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
+{
+	WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
+	rsclp->tails[RCU_NEXT_TAIL] = NULL;
+}
+
+/*
+ * Is the specified segment of the specified rcu_segcblist structure
+ * empty of callbacks?
+ */
+bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
+{
+	if (seg == RCU_DONE_TAIL)
+		return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
+	return rsclp->tails[seg - 1] == rsclp->tails[seg];
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * are ready to be invoked?
+ */
+bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * are still pending, that is, not yet ready to be invoked?
+ */
+bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
+}
+
+/*
+ * Dequeue and return the first ready-to-invoke callback.  If there
+ * are no ready-to-invoke callbacks, return NULL.  Disables interrupts
+ * to avoid interference.  Does not protect from interference from other
+ * CPUs or tasks.
+ */
+struct rcu_head *rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
+{
+	unsigned long flags;
+	int i;
+	struct rcu_head *rhp;
+
+	local_irq_save(flags);
+	if (!rcu_segcblist_ready_cbs(rsclp)) {
+		local_irq_restore(flags);
+		return NULL;
+	}
+	rhp = rsclp->head;
+	BUG_ON(!rhp);
+	rsclp->head = rhp->next;
+	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
+		if (rsclp->tails[i] != &rhp->next)
+			break;
+		rsclp->tails[i] = &rsclp->head;
+	}
+	smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
+	WRITE_ONCE(rsclp->len, rsclp->len - 1);
+	local_irq_restore(flags);
+	return rhp;
+}
+
+/*
+ * Account for the fact that a previously dequeued callback turned out
+ * to be marked as lazy.
+ */
+void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	rsclp->len_lazy--;
+	local_irq_restore(flags);
+}
+
+/*
+ * Return a pointer to the first callback in the specified rcu_segcblist
+ * structure.  This is useful for diagnostics.
+ */
+struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
+{
+	if (rcu_segcblist_is_enabled(rsclp))
+		return rsclp->head;
+	return NULL;
+}
+
+/*
+ * Return a pointer to the first pending callback in the specified
+ * rcu_segcblist structure.  This is useful just after posting a given
+ * callback -- if that callback is the first pending callback, then
+ * you cannot rely on someone else having already started up the required
+ * grace period.
+ */
+struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
+{
+	if (rcu_segcblist_is_enabled(rsclp))
+		return *rsclp->tails[RCU_DONE_TAIL];
+	return NULL;
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * have not yet been processed beyond having been posted, that is,
+ * does it contain callbacks in its last segment?
+ */
+bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
+}
+
+/*
+ * Enqueue the specified callback onto the specified rcu_segcblist
+ * structure, updating accounting as needed.  Note that the ->len
+ * field may be accessed locklessly, hence the WRITE_ONCE().
+ * The ->len field is used by rcu_barrier() and friends to determine
+ * if it must post a callback on this structure, and it is OK
+ * for rcu_barrier() to sometimes post callbacks needlessly, but
+ * absolutely not OK for it to ever miss posting a callback.
+ */
+void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
+			   struct rcu_head *rhp, bool lazy)
+{
+	WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */
+	if (lazy)
+		rsclp->len_lazy++;
+	smp_mb(); /* Ensure counts are updated before callback is enqueued. */
+	rhp->next = NULL;
+	*rsclp->tails[RCU_NEXT_TAIL] = rhp;
+	rsclp->tails[RCU_NEXT_TAIL] = &rhp->next;
+}
+
+/*
+ * Entrain the specified callback onto the specified rcu_segcblist at
+ * the end of the last non-empty segment.  If the entire rcu_segcblist
+ * is empty, make no change, but return false.
+ *
+ * This is intended for use by rcu_barrier()-like primitives, -not-
+ * for normal grace-period use.  IMPORTANT:  The callback you enqueue
+ * will wait for all prior callbacks, NOT necessarily for a grace
+ * period.  You have been warned.
+ */
+bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
+			   struct rcu_head *rhp, bool lazy)
+{
+	int i;
+
+	if (rcu_segcblist_n_cbs(rsclp) == 0)
+		return false;
+	WRITE_ONCE(rsclp->len, rsclp->len + 1);
+	if (lazy)
+		rsclp->len_lazy++;
+	smp_mb(); /* Ensure counts are updated before callback is entrained. */
+	rhp->next = NULL;
+	for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--)
+		if (rsclp->tails[i] != rsclp->tails[i - 1])
+			break;
+	*rsclp->tails[i] = rhp;
+	for (; i <= RCU_NEXT_TAIL; i++)
+		rsclp->tails[i] = &rhp->next;
+	return true;
+}
+
+/*
+ * Extract only the counts from the specified rcu_segcblist structure,
+ * and place them in the specified rcu_cblist structure.  This function
+ * supports both callback orphaning and invocation, hence the separation
+ * of counts and callbacks.  (Callbacks ready for invocation must be
+ * orphaned and adopted separately from pending callbacks, but counts
+ * apply to all callbacks.  Locking must be used to make sure that
+ * both orphaned-callbacks lists are consistent.)
+ */
+void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
+					       struct rcu_cblist *rclp)
+{
+	rclp->len_lazy += rsclp->len_lazy;
+	rclp->len += rsclp->len;
+	rsclp->len_lazy = 0;
+	WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */
+}
+
+/*
+ * Extract only those callbacks ready to be invoked from the specified
+ * rcu_segcblist structure and place them in the specified rcu_cblist
+ * structure.
+ */
+void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
+				    struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rcu_segcblist_ready_cbs(rsclp))
+		return; /* Nothing to do. */
+	*rclp->tail = rsclp->head;
+	rsclp->head = *rsclp->tails[RCU_DONE_TAIL];
+	*rsclp->tails[RCU_DONE_TAIL] = NULL;
+	rclp->tail = rsclp->tails[RCU_DONE_TAIL];
+	for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
+		if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
+			rsclp->tails[i] = &rsclp->head;
+}
+
+/*
+ * Extract only those callbacks still pending (not yet ready to be
+ * invoked) from the specified rcu_segcblist structure and place them in
+ * the specified rcu_cblist structure.  Note that this loses information
+ * about any callbacks that might have been partway done waiting for
+ * their grace period.  Too bad!  They will have to start over.
+ */
+void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
+				    struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rcu_segcblist_pend_cbs(rsclp))
+		return; /* Nothing to do. */
+	*rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
+	rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
+	*rsclp->tails[RCU_DONE_TAIL] = NULL;
+	for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
+		rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL];
+}
+
+/*
+ * Insert counts from the specified rcu_cblist structure in the
+ * specified rcu_segcblist structure.
+ */
+void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
+				struct rcu_cblist *rclp)
+{
+	rsclp->len_lazy += rclp->len_lazy;
+	/* ->len sampled locklessly. */
+	WRITE_ONCE(rsclp->len, rsclp->len + rclp->len);
+	rclp->len_lazy = 0;
+	rclp->len = 0;
+}
+
+/*
+ * Move callbacks from the specified rcu_cblist to the beginning of the
+ * done-callbacks segment of the specified rcu_segcblist.
+ */
+void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
+				   struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rclp->head)
+		return; /* No callbacks to move. */
+	*rclp->tail = rsclp->head;
+	rsclp->head = rclp->head;
+	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
+		if (&rsclp->head == rsclp->tails[i])
+			rsclp->tails[i] = rclp->tail;
+		else
+			break;
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+}
+
+/*
+ * Move callbacks from the specified rcu_cblist to the end of the
+ * new-callbacks segment of the specified rcu_segcblist.
+ */
+void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
+				   struct rcu_cblist *rclp)
+{
+	if (!rclp->head)
+		return; /* Nothing to do. */
+	*rsclp->tails[RCU_NEXT_TAIL] = rclp->head;
+	rsclp->tails[RCU_NEXT_TAIL] = rclp->tail;
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+}
+
+/*
+ * Advance the callbacks in the specified rcu_segcblist structure based
+ * on the current value passed in for the grace-period counter.
+ */
+void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq)
+{
+	int i, j;
+
+	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
+		return;
+
+	/*
+	 * Find all callbacks whose ->gp_seq numbers indicate that they
+	 * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
+	 */
+	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
+		if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
+			break;
+		rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i];
+	}
+
+	/* If no callbacks moved, nothing more need be done. */
+	if (i == RCU_WAIT_TAIL)
+		return;
+
+	/* Clean up tail pointers that might have been misordered above. */
+	for (j = RCU_WAIT_TAIL; j < i; j++)
+		rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL];
+
+	/*
+	 * Callbacks moved, so clean up the misordered ->tails[] pointers
+	 * that now point into the middle of the list of ready-to-invoke
+	 * callbacks.  The overall effect is to copy down the later pointers
+	 * into the gap that was created by the now-ready segments.
+	 */
+	for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
+		if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
+			break;  /* No more callbacks. */
+		rsclp->tails[j] = rsclp->tails[i];
+		rsclp->gp_seq[j] = rsclp->gp_seq[i];
+	}
+}
+
+/*
+ * "Accelerate" callbacks based on more-accurate grace-period information.
+ * The reason for this is that RCU does not synchronize the beginnings and
+ * ends of grace periods, and that callbacks are posted locally.  This in
+ * turn means that the callbacks must be labelled conservatively early
+ * on, as getting exact information would degrade both performance and
+ * scalability.  When more accurate grace-period information becomes
+ * available, previously posted callbacks can be "accelerated", marking
+ * them to complete at the end of the earlier grace period.
+ *
+ * This function operates on an rcu_segcblist structure, and also the
+ * grace-period sequence number seq at which new callbacks would become
+ * ready to invoke.  Returns true if there are callbacks that won't be
+ * ready to invoke until seq, false otherwise.
+ */
+bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
+{
+	int i;
+
+	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
+		return false;
+
+	/*
+	 * Find the segment preceding the oldest segment of callbacks
+	 * whose ->gp_seq[] completion is at or after that passed in via
+	 * "seq", skipping any empty segments.  This oldest segment, along
+	 * with any later segments, can be merged in with any newly arrived
+	 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
+	 * as their ->gp_seq[] grace-period completion sequence number.
+	 */
+	for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
+		if (rsclp->tails[i] != rsclp->tails[i - 1] &&
+		    ULONG_CMP_LT(rsclp->gp_seq[i], seq))
+			break;
+
+	/*
+	 * If all the segments contain callbacks that correspond to
+	 * earlier grace-period sequence numbers than "seq", leave.
+	 * Assuming that the rcu_segcblist structure has enough
+	 * segments in its arrays, this can only happen if some of
+	 * the non-done segments contain callbacks that really are
+	 * ready to invoke.  This situation will get straightened
+	 * out by the next call to rcu_segcblist_advance().
+	 *
+	 * Also advance to the oldest segment of callbacks whose
+	 * ->gp_seq[] completion is at or after that passed in via "seq",
+	 * skipping any empty segments.
+	 */
+	if (++i >= RCU_NEXT_TAIL)
+		return false;
+
+	/*
+	 * Merge all later callbacks, including newly arrived callbacks,
+	 * into the segment located by the for-loop above.  Assign "seq"
+	 * as the ->gp_seq[] value in order to correctly handle the case
+	 * where there were no pending callbacks in the rcu_segcblist
+	 * structure other than in the RCU_NEXT_TAIL segment.
+	 */
+	for (; i < RCU_NEXT_TAIL; i++) {
+		rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL];
+		rsclp->gp_seq[i] = seq;
+	}
+	return true;
+}
+
+/*
+ * Scan the specified rcu_segcblist structure for callbacks that need
+ * a grace period later than the one specified by "seq".  We don't look
+ * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
+ * have a grace-period sequence number.
+ */
+bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
+				    unsigned long seq)
+{
+	int i;
+
+	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
+		if (rsclp->tails[i - 1] != rsclp->tails[i] &&
+		    ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
+			return true;
+	return false;
+}
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
new file mode 100644
index 000000000000..6e36e36478cd
--- /dev/null
+++ b/kernel/rcu/rcu_segcblist.h
@@ -0,0 +1,164 @@
+/*
+ * RCU segmented callback lists, internal-to-rcu header file
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright IBM Corporation, 2017
+ *
+ * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#include <linux/rcu_segcblist.h>
+
+/*
+ * Account for the fact that a previously dequeued callback turned out
+ * to be marked as lazy.
+ */
+static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp)
+{
+	rclp->len_lazy--;
+}
+
+/*
+ * Interim function to return rcu_cblist head pointer.  Longer term, the
+ * rcu_cblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
+{
+	return rclp->head;
+}
+
+/*
+ * Interim function to return rcu_cblist head pointer.  Longer term, the
+ * rcu_cblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
+{
+	WARN_ON_ONCE(!rclp->head);
+	return rclp->tail;
+}
+
+void rcu_cblist_init(struct rcu_cblist *rclp);
+long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim);
+struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp);
+
+/*
+ * Is the specified rcu_segcblist structure empty?
+ *
+ * But careful!  The fact that the ->head field is NULL does not
+ * necessarily imply that there are no callbacks associated with
+ * this structure.  When callbacks are being invoked, they are
+ * removed as a group.  If callback invocation must be preempted,
+ * the remaining callbacks will be added back to the list.  Either
+ * way, the counts are updated later.
+ *
+ * So it is often the case that rcu_segcblist_n_cbs() should be used
+ * instead.
+ */
+static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp)
+{
+	return !rsclp->head;
+}
+
+/* Return number of callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
+{
+	return READ_ONCE(rsclp->len);
+}
+
+/* Return number of lazy callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp)
+{
+	return rsclp->len_lazy;
+}
+
+/* Return number of lazy callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp)
+{
+	return rsclp->len - rsclp->len_lazy;
+}
+
+/*
+ * Is the specified rcu_segcblist enabled, for example, not corresponding
+ * to an offline or callback-offloaded CPU?
+ */
+static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
+{
+	return !!rsclp->tails[RCU_NEXT_TAIL];
+}
+
+/*
+ * Are all segments following the specified segment of the specified
+ * rcu_segcblist structure empty of callbacks?  (The specified
+ * segment might well contain callbacks.)
+ */
+static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
+{
+	return !*rsclp->tails[seg];
+}
+
+/*
+ * Interim function to return rcu_segcblist head pointer.  Longer term, the
+ * rcu_segcblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp)
+{
+	return rsclp->head;
+}
+
+/*
+ * Interim function to return rcu_segcblist head pointer.  Longer term, the
+ * rcu_segcblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
+{
+	WARN_ON_ONCE(rcu_segcblist_empty(rsclp));
+	return rsclp->tails[RCU_NEXT_TAIL];
+}
+
+void rcu_segcblist_init(struct rcu_segcblist *rsclp);
+void rcu_segcblist_disable(struct rcu_segcblist *rsclp);
+bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg);
+bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp);
+bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp);
+struct rcu_head *rcu_segcblist_dequeue(struct rcu_segcblist *rsclp);
+void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp);
+struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp);
+struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp);
+bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp);
+void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
+			   struct rcu_head *rhp, bool lazy);
+bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
+			   struct rcu_head *rhp, bool lazy);
+void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
+				 struct rcu_cblist *rclp);
+void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
+				    struct rcu_cblist *rclp);
+void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
+				    struct rcu_cblist *rclp);
+void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
+				struct rcu_cblist *rclp);
+void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
+				   struct rcu_cblist *rclp);
+void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
+				   struct rcu_cblist *rclp);
+void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq);
+bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
+bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
+				    unsigned long seq);
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index a4a86fb47e4a..3cc18110b612 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -48,6 +48,8 @@
 #include <linux/torture.h>
 #include <linux/vmalloc.h>
 
+#include "rcu.h"
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
 
@@ -59,12 +61,16 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
 #define VERBOSE_PERFOUT_ERRSTRING(s) \
 	do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
 
+torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
+torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader");
 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
 torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
-torture_param(int, nreaders, -1, "Number of RCU reader threads");
+torture_param(int, nreaders, 0, "Number of RCU reader threads");
 torture_param(int, nwriters, -1, "Number of RCU updater threads");
-torture_param(bool, shutdown, false, "Shutdown at end of performance tests.");
+torture_param(bool, shutdown, !IS_ENABLED(MODULE),
+	      "Shutdown at end of performance tests.");
 torture_param(bool, verbose, true, "Enable verbose debugging printk()s");
+torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
 
 static char *perf_type = "rcu";
 module_param(perf_type, charp, 0444);
@@ -86,13 +92,16 @@ static u64 t_rcu_perf_writer_started;
 static u64 t_rcu_perf_writer_finished;
 static unsigned long b_rcu_perf_writer_started;
 static unsigned long b_rcu_perf_writer_finished;
+static DEFINE_PER_CPU(atomic_t, n_async_inflight);
 
 static int rcu_perf_writer_state;
 #define RTWS_INIT		0
-#define RTWS_EXP_SYNC		1
-#define RTWS_SYNC		2
-#define RTWS_IDLE		2
-#define RTWS_STOPPING		3
+#define RTWS_ASYNC		1
+#define RTWS_BARRIER		2
+#define RTWS_EXP_SYNC		3
+#define RTWS_SYNC		4
+#define RTWS_IDLE		5
+#define RTWS_STOPPING		6
 
 #define MAX_MEAS 10000
 #define MIN_MEAS 100
@@ -114,6 +123,8 @@ struct rcu_perf_ops {
 	unsigned long (*started)(void);
 	unsigned long (*completed)(void);
 	unsigned long (*exp_completed)(void);
+	void (*async)(struct rcu_head *head, rcu_callback_t func);
+	void (*gp_barrier)(void);
 	void (*sync)(void);
 	void (*exp_sync)(void);
 	const char *name;
@@ -153,6 +164,8 @@ static struct rcu_perf_ops rcu_ops = {
 	.started	= rcu_batches_started,
 	.completed	= rcu_batches_completed,
 	.exp_completed	= rcu_exp_batches_completed,
+	.async		= call_rcu,
+	.gp_barrier	= rcu_barrier,
 	.sync		= synchronize_rcu,
 	.exp_sync	= synchronize_rcu_expedited,
 	.name		= "rcu"
@@ -181,6 +194,8 @@ static struct rcu_perf_ops rcu_bh_ops = {
 	.started	= rcu_batches_started_bh,
 	.completed	= rcu_batches_completed_bh,
 	.exp_completed	= rcu_exp_batches_completed_sched,
+	.async		= call_rcu_bh,
+	.gp_barrier	= rcu_barrier_bh,
 	.sync		= synchronize_rcu_bh,
 	.exp_sync	= synchronize_rcu_bh_expedited,
 	.name		= "rcu_bh"
@@ -208,6 +223,16 @@ static unsigned long srcu_perf_completed(void)
 	return srcu_batches_completed(srcu_ctlp);
 }
 
+static void srcu_call_rcu(struct rcu_head *head, rcu_callback_t func)
+{
+	call_srcu(srcu_ctlp, head, func);
+}
+
+static void srcu_rcu_barrier(void)
+{
+	srcu_barrier(srcu_ctlp);
+}
+
 static void srcu_perf_synchronize(void)
 {
 	synchronize_srcu(srcu_ctlp);
@@ -226,11 +251,42 @@ static struct rcu_perf_ops srcu_ops = {
 	.started	= NULL,
 	.completed	= srcu_perf_completed,
 	.exp_completed	= srcu_perf_completed,
+	.async		= srcu_call_rcu,
+	.gp_barrier	= srcu_rcu_barrier,
 	.sync		= srcu_perf_synchronize,
 	.exp_sync	= srcu_perf_synchronize_expedited,
 	.name		= "srcu"
 };
 
+static struct srcu_struct srcud;
+
+static void srcu_sync_perf_init(void)
+{
+	srcu_ctlp = &srcud;
+	init_srcu_struct(srcu_ctlp);
+}
+
+static void srcu_sync_perf_cleanup(void)
+{
+	cleanup_srcu_struct(srcu_ctlp);
+}
+
+static struct rcu_perf_ops srcud_ops = {
+	.ptype		= SRCU_FLAVOR,
+	.init		= srcu_sync_perf_init,
+	.cleanup	= srcu_sync_perf_cleanup,
+	.readlock	= srcu_perf_read_lock,
+	.readunlock	= srcu_perf_read_unlock,
+	.started	= NULL,
+	.completed	= srcu_perf_completed,
+	.exp_completed	= srcu_perf_completed,
+	.async		= srcu_call_rcu,
+	.gp_barrier	= srcu_rcu_barrier,
+	.sync		= srcu_perf_synchronize,
+	.exp_sync	= srcu_perf_synchronize_expedited,
+	.name		= "srcud"
+};
+
 /*
  * Definitions for sched perf testing.
  */
@@ -254,6 +310,8 @@ static struct rcu_perf_ops sched_ops = {
 	.started	= rcu_batches_started_sched,
 	.completed	= rcu_batches_completed_sched,
 	.exp_completed	= rcu_exp_batches_completed_sched,
+	.async		= call_rcu_sched,
+	.gp_barrier	= rcu_barrier_sched,
 	.sync		= synchronize_sched,
 	.exp_sync	= synchronize_sched_expedited,
 	.name		= "sched"
@@ -281,6 +339,8 @@ static struct rcu_perf_ops tasks_ops = {
 	.readunlock	= tasks_perf_read_unlock,
 	.started	= rcu_no_completed,
 	.completed	= rcu_no_completed,
+	.async		= call_rcu_tasks,
+	.gp_barrier	= rcu_barrier_tasks,
 	.sync		= synchronize_rcu_tasks,
 	.exp_sync	= synchronize_rcu_tasks,
 	.name		= "tasks"
@@ -344,6 +404,15 @@ rcu_perf_reader(void *arg)
 }
 
 /*
+ * Callback function for asynchronous grace periods from rcu_perf_writer().
+ */
+static void rcu_perf_async_cb(struct rcu_head *rhp)
+{
+	atomic_dec(this_cpu_ptr(&n_async_inflight));
+	kfree(rhp);
+}
+
+/*
  * RCU perf writer kthread.  Repeatedly does a grace period.
  */
 static int
@@ -352,6 +421,7 @@ rcu_perf_writer(void *arg)
 	int i = 0;
 	int i_max;
 	long me = (long)arg;
+	struct rcu_head *rhp = NULL;
 	struct sched_param sp;
 	bool started = false, done = false, alldone = false;
 	u64 t;
@@ -380,9 +450,27 @@ rcu_perf_writer(void *arg)
 	}
 
 	do {
+		if (writer_holdoff)
+			udelay(writer_holdoff);
 		wdp = &wdpp[i];
 		*wdp = ktime_get_mono_fast_ns();
-		if (gp_exp) {
+		if (gp_async) {
+retry:
+			if (!rhp)
+				rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
+			if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
+				rcu_perf_writer_state = RTWS_ASYNC;
+				atomic_inc(this_cpu_ptr(&n_async_inflight));
+				cur_ops->async(rhp, rcu_perf_async_cb);
+				rhp = NULL;
+			} else if (!kthread_should_stop()) {
+				rcu_perf_writer_state = RTWS_BARRIER;
+				cur_ops->gp_barrier();
+				goto retry;
+			} else {
+				kfree(rhp); /* Because we are stopping. */
+			}
+		} else if (gp_exp) {
 			rcu_perf_writer_state = RTWS_EXP_SYNC;
 			cur_ops->exp_sync();
 		} else {
@@ -429,6 +517,10 @@ rcu_perf_writer(void *arg)
 			i++;
 		rcu_perf_wait_shutdown();
 	} while (!torture_must_stop());
+	if (gp_async) {
+		rcu_perf_writer_state = RTWS_BARRIER;
+		cur_ops->gp_barrier();
+	}
 	rcu_perf_writer_state = RTWS_STOPPING;
 	writer_n_durations[me] = i_max;
 	torture_kthread_stopping("rcu_perf_writer");
@@ -452,6 +544,17 @@ rcu_perf_cleanup(void)
 	u64 *wdp;
 	u64 *wdpp;
 
+	/*
+	 * Would like warning at start, but everything is expedited
+	 * during the mid-boot phase, so have to wait till the end.
+	 */
+	if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp)
+		VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
+	if (rcu_gp_is_normal() && gp_exp)
+		VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
+	if (gp_exp && gp_async)
+		VERBOSE_PERFOUT_ERRSTRING("No expedited async GPs, so went with async!");
+
 	if (torture_cleanup_begin())
 		return;
 
@@ -554,7 +657,7 @@ rcu_perf_init(void)
 	long i;
 	int firsterr = 0;
 	static struct rcu_perf_ops *perf_ops[] = {
-		&rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops,
+		&rcu_ops, &rcu_bh_ops, &srcu_ops, &srcud_ops, &sched_ops,
 		RCUPERF_TASKS_OPS
 	};
 
@@ -624,16 +727,6 @@ rcu_perf_init(void)
 		firsterr = -ENOMEM;
 		goto unwind;
 	}
-	if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) {
-		VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
-		firsterr = -EINVAL;
-		goto unwind;
-	}
-	if (rcu_gp_is_normal() && gp_exp) {
-		VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
-		firsterr = -EINVAL;
-		goto unwind;
-	}
 	for (i = 0; i < nrealwriters; i++) {
 		writer_durations[i] =
 			kcalloc(MAX_MEAS, sizeof(*writer_durations[i]),
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index cccc417a8135..b8f7f8ce8575 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -52,6 +52,8 @@
 #include <linux/torture.h>
 #include <linux/vmalloc.h>
 
+#include "rcu.h"
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
 
@@ -559,19 +561,22 @@ static void srcu_torture_barrier(void)
 
 static void srcu_torture_stats(void)
 {
-	int cpu;
-	int idx = srcu_ctlp->completed & 0x1;
+	int __maybe_unused cpu;
+	int idx;
 
-	pr_alert("%s%s per-CPU(idx=%d):",
+#ifdef CONFIG_TREE_SRCU
+	idx = srcu_ctlp->srcu_idx & 0x1;
+	pr_alert("%s%s Tree SRCU per-CPU(idx=%d):",
 		 torture_type, TORTURE_FLAG, idx);
 	for_each_possible_cpu(cpu) {
 		unsigned long l0, l1;
 		unsigned long u0, u1;
 		long c0, c1;
-		struct srcu_array *counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu);
+		struct srcu_data *counts;
 
-		u0 = counts->unlock_count[!idx];
-		u1 = counts->unlock_count[idx];
+		counts = per_cpu_ptr(srcu_ctlp->sda, cpu);
+		u0 = counts->srcu_unlock_count[!idx];
+		u1 = counts->srcu_unlock_count[idx];
 
 		/*
 		 * Make sure that a lock is always counted if the corresponding
@@ -579,14 +584,21 @@ static void srcu_torture_stats(void)
 		 */
 		smp_rmb();
 
-		l0 = counts->lock_count[!idx];
-		l1 = counts->lock_count[idx];
+		l0 = counts->srcu_lock_count[!idx];
+		l1 = counts->srcu_lock_count[idx];
 
 		c0 = l0 - u0;
 		c1 = l1 - u1;
 		pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
 	}
 	pr_cont("\n");
+#elif defined(CONFIG_TINY_SRCU)
+	idx = READ_ONCE(srcu_ctlp->srcu_idx) & 0x1;
+	pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n",
+		 torture_type, TORTURE_FLAG, idx,
+		 READ_ONCE(srcu_ctlp->srcu_lock_nesting[!idx]),
+		 READ_ONCE(srcu_ctlp->srcu_lock_nesting[idx]));
+#endif
 }
 
 static void srcu_torture_synchronize_expedited(void)
@@ -1333,12 +1345,14 @@ rcu_torture_stats_print(void)
 		cur_ops->stats();
 	if (rtcv_snap == rcu_torture_current_version &&
 	    rcu_torture_current != NULL) {
-		int __maybe_unused flags;
-		unsigned long __maybe_unused gpnum;
-		unsigned long __maybe_unused completed;
+		int __maybe_unused flags = 0;
+		unsigned long __maybe_unused gpnum = 0;
+		unsigned long __maybe_unused completed = 0;
 
 		rcutorture_get_gp_data(cur_ops->ttype,
 				       &flags, &gpnum, &completed);
+		srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
+					&flags, &gpnum, &completed);
 		wtp = READ_ONCE(writer_task);
 		pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx\n",
 			 rcu_torture_writer_state_getname(),
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
deleted file mode 100644
index ef3bcfb15b39..000000000000
--- a/kernel/rcu/srcu.c
+++ /dev/null
@@ -1,656 +0,0 @@
-/*
- * Sleepable Read-Copy Update mechanism for mutual exclusion.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * Copyright (C) IBM Corporation, 2006
- * Copyright (C) Fujitsu, 2012
- *
- * Author: Paul McKenney <paulmck@us.ibm.com>
- *	   Lai Jiangshan <laijs@cn.fujitsu.com>
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU/ *.txt
- *
- */
-
-#include <linux/export.h>
-#include <linux/mutex.h>
-#include <linux/percpu.h>
-#include <linux/preempt.h>
-#include <linux/rcupdate_wait.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/delay.h>
-#include <linux/srcu.h>
-
-#include "rcu.h"
-
-/*
- * Initialize an rcu_batch structure to empty.
- */
-static inline void rcu_batch_init(struct rcu_batch *b)
-{
-	b->head = NULL;
-	b->tail = &b->head;
-}
-
-/*
- * Enqueue a callback onto the tail of the specified rcu_batch structure.
- */
-static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head)
-{
-	*b->tail = head;
-	b->tail = &head->next;
-}
-
-/*
- * Is the specified rcu_batch structure empty?
- */
-static inline bool rcu_batch_empty(struct rcu_batch *b)
-{
-	return b->tail == &b->head;
-}
-
-/*
- * Remove the callback at the head of the specified rcu_batch structure
- * and return a pointer to it, or return NULL if the structure is empty.
- */
-static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b)
-{
-	struct rcu_head *head;
-
-	if (rcu_batch_empty(b))
-		return NULL;
-
-	head = b->head;
-	b->head = head->next;
-	if (b->tail == &head->next)
-		rcu_batch_init(b);
-
-	return head;
-}
-
-/*
- * Move all callbacks from the rcu_batch structure specified by "from" to
- * the structure specified by "to".
- */
-static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
-{
-	if (!rcu_batch_empty(from)) {
-		*to->tail = from->head;
-		to->tail = from->tail;
-		rcu_batch_init(from);
-	}
-}
-
-static int init_srcu_struct_fields(struct srcu_struct *sp)
-{
-	sp->completed = 0;
-	spin_lock_init(&sp->queue_lock);
-	sp->running = false;
-	rcu_batch_init(&sp->batch_queue);
-	rcu_batch_init(&sp->batch_check0);
-	rcu_batch_init(&sp->batch_check1);
-	rcu_batch_init(&sp->batch_done);
-	INIT_DELAYED_WORK(&sp->work, process_srcu);
-	sp->per_cpu_ref = alloc_percpu(struct srcu_array);
-	return sp->per_cpu_ref ? 0 : -ENOMEM;
-}
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-
-int __init_srcu_struct(struct srcu_struct *sp, const char *name,
-		       struct lock_class_key *key)
-{
-	/* Don't re-initialize a lock while it is held. */
-	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
-	lockdep_init_map(&sp->dep_map, name, key, 0);
-	return init_srcu_struct_fields(sp);
-}
-EXPORT_SYMBOL_GPL(__init_srcu_struct);
-
-#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-/**
- * init_srcu_struct - initialize a sleep-RCU structure
- * @sp: structure to initialize.
- *
- * Must invoke this on a given srcu_struct before passing that srcu_struct
- * to any other function.  Each srcu_struct represents a separate domain
- * of SRCU protection.
- */
-int init_srcu_struct(struct srcu_struct *sp)
-{
-	return init_srcu_struct_fields(sp);
-}
-EXPORT_SYMBOL_GPL(init_srcu_struct);
-
-#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-/*
- * Returns approximate total of the readers' ->lock_count[] values for the
- * rank of per-CPU counters specified by idx.
- */
-static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx)
-{
-	int cpu;
-	unsigned long sum = 0;
-
-	for_each_possible_cpu(cpu) {
-		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
-
-		sum += READ_ONCE(cpuc->lock_count[idx]);
-	}
-	return sum;
-}
-
-/*
- * Returns approximate total of the readers' ->unlock_count[] values for the
- * rank of per-CPU counters specified by idx.
- */
-static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx)
-{
-	int cpu;
-	unsigned long sum = 0;
-
-	for_each_possible_cpu(cpu) {
-		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
-
-		sum += READ_ONCE(cpuc->unlock_count[idx]);
-	}
-	return sum;
-}
-
-/*
- * Return true if the number of pre-existing readers is determined to
- * be zero.
- */
-static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
-{
-	unsigned long unlocks;
-
-	unlocks = srcu_readers_unlock_idx(sp, idx);
-
-	/*
-	 * Make sure that a lock is always counted if the corresponding unlock
-	 * is counted. Needs to be a smp_mb() as the read side may contain a
-	 * read from a variable that is written to before the synchronize_srcu()
-	 * in the write side. In this case smp_mb()s A and B act like the store
-	 * buffering pattern.
-	 *
-	 * This smp_mb() also pairs with smp_mb() C to prevent accesses after the
-	 * synchronize_srcu() from being executed before the grace period ends.
-	 */
-	smp_mb(); /* A */
-
-	/*
-	 * If the locks are the same as the unlocks, then there must have
-	 * been no readers on this index at some time in between. This does not
-	 * mean that there are no more readers, as one could have read the
-	 * current index but not have incremented the lock counter yet.
-	 *
-	 * Possible bug: There is no guarantee that there haven't been ULONG_MAX
-	 * increments of ->lock_count[] since the unlocks were counted, meaning
-	 * that this could return true even if there are still active readers.
-	 * Since there are no memory barriers around srcu_flip(), the CPU is not
-	 * required to increment ->completed before running
-	 * srcu_readers_unlock_idx(), which means that there could be an
-	 * arbitrarily large number of critical sections that execute after
-	 * srcu_readers_unlock_idx() but use the old value of ->completed.
-	 */
-	return srcu_readers_lock_idx(sp, idx) == unlocks;
-}
-
-/**
- * srcu_readers_active - returns true if there are readers. and false
- *                       otherwise
- * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
- *
- * Note that this is not an atomic primitive, and can therefore suffer
- * severe errors when invoked on an active srcu_struct.  That said, it
- * can be useful as an error check at cleanup time.
- */
-static bool srcu_readers_active(struct srcu_struct *sp)
-{
-	int cpu;
-	unsigned long sum = 0;
-
-	for_each_possible_cpu(cpu) {
-		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
-
-		sum += READ_ONCE(cpuc->lock_count[0]);
-		sum += READ_ONCE(cpuc->lock_count[1]);
-		sum -= READ_ONCE(cpuc->unlock_count[0]);
-		sum -= READ_ONCE(cpuc->unlock_count[1]);
-	}
-	return sum;
-}
-
-/**
- * cleanup_srcu_struct - deconstruct a sleep-RCU structure
- * @sp: structure to clean up.
- *
- * Must invoke this after you are finished using a given srcu_struct that
- * was initialized via init_srcu_struct(), else you leak memory.
- */
-void cleanup_srcu_struct(struct srcu_struct *sp)
-{
-	if (WARN_ON(srcu_readers_active(sp)))
-		return; /* Leakage unless caller handles error. */
-	free_percpu(sp->per_cpu_ref);
-	sp->per_cpu_ref = NULL;
-}
-EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
-
-/*
- * Counts the new reader in the appropriate per-CPU element of the
- * srcu_struct.  Must be called from process context.
- * Returns an index that must be passed to the matching srcu_read_unlock().
- */
-int __srcu_read_lock(struct srcu_struct *sp)
-{
-	int idx;
-
-	idx = READ_ONCE(sp->completed) & 0x1;
-	__this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
-	smp_mb(); /* B */  /* Avoid leaking the critical section. */
-	return idx;
-}
-EXPORT_SYMBOL_GPL(__srcu_read_lock);
-
-/*
- * Removes the count for the old reader from the appropriate per-CPU
- * element of the srcu_struct.  Note that this may well be a different
- * CPU than that which was incremented by the corresponding srcu_read_lock().
- * Must be called from process context.
- */
-void __srcu_read_unlock(struct srcu_struct *sp, int idx)
-{
-	smp_mb(); /* C */  /* Avoid leaking the critical section. */
-	this_cpu_inc(sp->per_cpu_ref->unlock_count[idx]);
-}
-EXPORT_SYMBOL_GPL(__srcu_read_unlock);
-
-/*
- * We use an adaptive strategy for synchronize_srcu() and especially for
- * synchronize_srcu_expedited().  We spin for a fixed time period
- * (defined below) to allow SRCU readers to exit their read-side critical
- * sections.  If there are still some readers after 10 microseconds,
- * we repeatedly block for 1-millisecond time periods.  This approach
- * has done well in testing, so there is no need for a config parameter.
- */
-#define SRCU_RETRY_CHECK_DELAY		5
-#define SYNCHRONIZE_SRCU_TRYCOUNT	2
-#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT	12
-
-/*
- * @@@ Wait until all pre-existing readers complete.  Such readers
- * will have used the index specified by "idx".
- * the caller should ensures the ->completed is not changed while checking
- * and idx = (->completed & 1) ^ 1
- */
-static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
-{
-	for (;;) {
-		if (srcu_readers_active_idx_check(sp, idx))
-			return true;
-		if (--trycount <= 0)
-			return false;
-		udelay(SRCU_RETRY_CHECK_DELAY);
-	}
-}
-
-/*
- * Increment the ->completed counter so that future SRCU readers will
- * use the other rank of the ->(un)lock_count[] arrays.  This allows
- * us to wait for pre-existing readers in a starvation-free manner.
- */
-static void srcu_flip(struct srcu_struct *sp)
-{
-	WRITE_ONCE(sp->completed, sp->completed + 1);
-
-	/*
-	 * Ensure that if the updater misses an __srcu_read_unlock()
-	 * increment, that task's next __srcu_read_lock() will see the
-	 * above counter update.  Note that both this memory barrier
-	 * and the one in srcu_readers_active_idx_check() provide the
-	 * guarantee for __srcu_read_lock().
-	 */
-	smp_mb(); /* D */  /* Pairs with C. */
-}
-
-/*
- * Enqueue an SRCU callback on the specified srcu_struct structure,
- * initiating grace-period processing if it is not already running.
- *
- * Note that all CPUs must agree that the grace period extended beyond
- * all pre-existing SRCU read-side critical section.  On systems with
- * more than one CPU, this means that when "func()" is invoked, each CPU
- * is guaranteed to have executed a full memory barrier since the end of
- * its last corresponding SRCU read-side critical section whose beginning
- * preceded the call to call_rcu().  It also means that each CPU executing
- * an SRCU read-side critical section that continues beyond the start of
- * "func()" must have executed a memory barrier after the call_rcu()
- * but before the beginning of that SRCU read-side critical section.
- * Note that these guarantees include CPUs that are offline, idle, or
- * executing in user mode, as well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- * resulting SRCU callback function "func()", then both CPU A and CPU
- * B are guaranteed to execute a full memory barrier during the time
- * interval between the call to call_rcu() and the invocation of "func()".
- * This guarantee applies even if CPU A and CPU B are the same CPU (but
- * again only if the system has more than one CPU).
- *
- * Of course, these guarantees apply only for invocations of call_srcu(),
- * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
- * srcu_struct structure.
- */
-void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
-	       rcu_callback_t func)
-{
-	unsigned long flags;
-
-	head->next = NULL;
-	head->func = func;
-	spin_lock_irqsave(&sp->queue_lock, flags);
-	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
-	rcu_batch_queue(&sp->batch_queue, head);
-	if (!sp->running) {
-		sp->running = true;
-		queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
-	}
-	spin_unlock_irqrestore(&sp->queue_lock, flags);
-}
-EXPORT_SYMBOL_GPL(call_srcu);
-
-static void srcu_advance_batches(struct srcu_struct *sp, int trycount);
-static void srcu_reschedule(struct srcu_struct *sp);
-
-/*
- * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
- */
-static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
-{
-	struct rcu_synchronize rcu;
-	struct rcu_head *head = &rcu.head;
-	bool done = false;
-
-	RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) ||
-			 lock_is_held(&rcu_bh_lock_map) ||
-			 lock_is_held(&rcu_lock_map) ||
-			 lock_is_held(&rcu_sched_lock_map),
-			 "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
-
-	might_sleep();
-	init_completion(&rcu.completion);
-
-	head->next = NULL;
-	head->func = wakeme_after_rcu;
-	spin_lock_irq(&sp->queue_lock);
-	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
-	if (!sp->running) {
-		/* steal the processing owner */
-		sp->running = true;
-		rcu_batch_queue(&sp->batch_check0, head);
-		spin_unlock_irq(&sp->queue_lock);
-
-		srcu_advance_batches(sp, trycount);
-		if (!rcu_batch_empty(&sp->batch_done)) {
-			BUG_ON(sp->batch_done.head != head);
-			rcu_batch_dequeue(&sp->batch_done);
-			done = true;
-		}
-		/* give the processing owner to work_struct */
-		srcu_reschedule(sp);
-	} else {
-		rcu_batch_queue(&sp->batch_queue, head);
-		spin_unlock_irq(&sp->queue_lock);
-	}
-
-	if (!done) {
-		wait_for_completion(&rcu.completion);
-		smp_mb(); /* Caller's later accesses after GP. */
-	}
-
-}
-
-/**
- * synchronize_srcu - wait for prior SRCU read-side critical-section completion
- * @sp: srcu_struct with which to synchronize.
- *
- * Wait for the count to drain to zero of both indexes. To avoid the
- * possible starvation of synchronize_srcu(), it waits for the count of
- * the index=((->completed & 1) ^ 1) to drain to zero at first,
- * and then flip the completed and wait for the count of the other index.
- *
- * Can block; must be called from process context.
- *
- * Note that it is illegal to call synchronize_srcu() from the corresponding
- * SRCU read-side critical section; doing so will result in deadlock.
- * However, it is perfectly legal to call synchronize_srcu() on one
- * srcu_struct from some other srcu_struct's read-side critical section,
- * as long as the resulting graph of srcu_structs is acyclic.
- *
- * There are memory-ordering constraints implied by synchronize_srcu().
- * On systems with more than one CPU, when synchronize_srcu() returns,
- * each CPU is guaranteed to have executed a full memory barrier since
- * the end of its last corresponding SRCU-sched read-side critical section
- * whose beginning preceded the call to synchronize_srcu().  In addition,
- * each CPU having an SRCU read-side critical section that extends beyond
- * the return from synchronize_srcu() is guaranteed to have executed a
- * full memory barrier after the beginning of synchronize_srcu() and before
- * the beginning of that SRCU read-side critical section.  Note that these
- * guarantees include CPUs that are offline, idle, or executing in user mode,
- * as well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked synchronize_srcu(), which returned
- * to its caller on CPU B, then both CPU A and CPU B are guaranteed
- * to have executed a full memory barrier during the execution of
- * synchronize_srcu().  This guarantee applies even if CPU A and CPU B
- * are the same CPU, but again only if the system has more than one CPU.
- *
- * Of course, these memory-ordering guarantees apply only when
- * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
- * passed the same srcu_struct structure.
- */
-void synchronize_srcu(struct srcu_struct *sp)
-{
-	__synchronize_srcu(sp, (rcu_gp_is_expedited() && !rcu_gp_is_normal())
-			   ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
-			   : SYNCHRONIZE_SRCU_TRYCOUNT);
-}
-EXPORT_SYMBOL_GPL(synchronize_srcu);
-
-/**
- * synchronize_srcu_expedited - Brute-force SRCU grace period
- * @sp: srcu_struct with which to synchronize.
- *
- * Wait for an SRCU grace period to elapse, but be more aggressive about
- * spinning rather than blocking when waiting.
- *
- * Note that synchronize_srcu_expedited() has the same deadlock and
- * memory-ordering properties as does synchronize_srcu().
- */
-void synchronize_srcu_expedited(struct srcu_struct *sp)
-{
-	__synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT);
-}
-EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
-
-/**
- * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
- * @sp: srcu_struct on which to wait for in-flight callbacks.
- */
-void srcu_barrier(struct srcu_struct *sp)
-{
-	synchronize_srcu(sp);
-}
-EXPORT_SYMBOL_GPL(srcu_barrier);
-
-/**
- * srcu_batches_completed - return batches completed.
- * @sp: srcu_struct on which to report batch completion.
- *
- * Report the number of batches, correlated with, but not necessarily
- * precisely the same as, the number of grace periods that have elapsed.
- */
-unsigned long srcu_batches_completed(struct srcu_struct *sp)
-{
-	return sp->completed;
-}
-EXPORT_SYMBOL_GPL(srcu_batches_completed);
-
-#define SRCU_CALLBACK_BATCH	10
-#define SRCU_INTERVAL		1
-
-/*
- * Move any new SRCU callbacks to the first stage of the SRCU grace
- * period pipeline.
- */
-static void srcu_collect_new(struct srcu_struct *sp)
-{
-	if (!rcu_batch_empty(&sp->batch_queue)) {
-		spin_lock_irq(&sp->queue_lock);
-		rcu_batch_move(&sp->batch_check0, &sp->batch_queue);
-		spin_unlock_irq(&sp->queue_lock);
-	}
-}
-
-/*
- * Core SRCU state machine.  Advance callbacks from ->batch_check0 to
- * ->batch_check1 and then to ->batch_done as readers drain.
- */
-static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
-{
-	int idx = 1 ^ (sp->completed & 1);
-
-	/*
-	 * Because readers might be delayed for an extended period after
-	 * fetching ->completed for their index, at any point in time there
-	 * might well be readers using both idx=0 and idx=1.  We therefore
-	 * need to wait for readers to clear from both index values before
-	 * invoking a callback.
-	 */
-
-	if (rcu_batch_empty(&sp->batch_check0) &&
-	    rcu_batch_empty(&sp->batch_check1))
-		return; /* no callbacks need to be advanced */
-
-	if (!try_check_zero(sp, idx, trycount))
-		return; /* failed to advance, will try after SRCU_INTERVAL */
-
-	/*
-	 * The callbacks in ->batch_check1 have already done with their
-	 * first zero check and flip back when they were enqueued on
-	 * ->batch_check0 in a previous invocation of srcu_advance_batches().
-	 * (Presumably try_check_zero() returned false during that
-	 * invocation, leaving the callbacks stranded on ->batch_check1.)
-	 * They are therefore ready to invoke, so move them to ->batch_done.
-	 */
-	rcu_batch_move(&sp->batch_done, &sp->batch_check1);
-
-	if (rcu_batch_empty(&sp->batch_check0))
-		return; /* no callbacks need to be advanced */
-	srcu_flip(sp);
-
-	/*
-	 * The callbacks in ->batch_check0 just finished their
-	 * first check zero and flip, so move them to ->batch_check1
-	 * for future checking on the other idx.
-	 */
-	rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
-
-	/*
-	 * SRCU read-side critical sections are normally short, so check
-	 * at least twice in quick succession after a flip.
-	 */
-	trycount = trycount < 2 ? 2 : trycount;
-	if (!try_check_zero(sp, idx^1, trycount))
-		return; /* failed to advance, will try after SRCU_INTERVAL */
-
-	/*
-	 * The callbacks in ->batch_check1 have now waited for all
-	 * pre-existing readers using both idx values.  They are therefore
-	 * ready to invoke, so move them to ->batch_done.
-	 */
-	rcu_batch_move(&sp->batch_done, &sp->batch_check1);
-}
-
-/*
- * Invoke a limited number of SRCU callbacks that have passed through
- * their grace period.  If there are more to do, SRCU will reschedule
- * the workqueue.  Note that needed memory barriers have been executed
- * in this task's context by srcu_readers_active_idx_check().
- */
-static void srcu_invoke_callbacks(struct srcu_struct *sp)
-{
-	int i;
-	struct rcu_head *head;
-
-	for (i = 0; i < SRCU_CALLBACK_BATCH; i++) {
-		head = rcu_batch_dequeue(&sp->batch_done);
-		if (!head)
-			break;
-		local_bh_disable();
-		head->func(head);
-		local_bh_enable();
-	}
-}
-
-/*
- * Finished one round of SRCU grace period.  Start another if there are
- * more SRCU callbacks queued, otherwise put SRCU into not-running state.
- */
-static void srcu_reschedule(struct srcu_struct *sp)
-{
-	bool pending = true;
-
-	if (rcu_batch_empty(&sp->batch_done) &&
-	    rcu_batch_empty(&sp->batch_check1) &&
-	    rcu_batch_empty(&sp->batch_check0) &&
-	    rcu_batch_empty(&sp->batch_queue)) {
-		spin_lock_irq(&sp->queue_lock);
-		if (rcu_batch_empty(&sp->batch_done) &&
-		    rcu_batch_empty(&sp->batch_check1) &&
-		    rcu_batch_empty(&sp->batch_check0) &&
-		    rcu_batch_empty(&sp->batch_queue)) {
-			sp->running = false;
-			pending = false;
-		}
-		spin_unlock_irq(&sp->queue_lock);
-	}
-
-	if (pending)
-		queue_delayed_work(system_power_efficient_wq,
-				   &sp->work, SRCU_INTERVAL);
-}
-
-/*
- * This is the work-queue function that handles SRCU grace periods.
- */
-void process_srcu(struct work_struct *work)
-{
-	struct srcu_struct *sp;
-
-	sp = container_of(work, struct srcu_struct, work.work);
-
-	srcu_collect_new(sp);
-	srcu_advance_batches(sp, 1);
-	srcu_invoke_callbacks(sp);
-	srcu_reschedule(sp);
-}
-EXPORT_SYMBOL_GPL(process_srcu);
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
new file mode 100644
index 000000000000..1a1c1047d2ed
--- /dev/null
+++ b/kernel/rcu/srcutiny.c
@@ -0,0 +1,195 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion,
+ *	tiny version for non-preemptible single-CPU use.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2017
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <linux/mutex.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate_wait.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/srcu.h>
+
+#include <linux/rcu_node_tree.h>
+#include "rcu_segcblist.h"
+#include "rcu.h"
+
+static int init_srcu_struct_fields(struct srcu_struct *sp)
+{
+	sp->srcu_lock_nesting[0] = 0;
+	sp->srcu_lock_nesting[1] = 0;
+	init_swait_queue_head(&sp->srcu_wq);
+	sp->srcu_cb_head = NULL;
+	sp->srcu_cb_tail = &sp->srcu_cb_head;
+	sp->srcu_gp_running = false;
+	sp->srcu_gp_waiting = false;
+	sp->srcu_idx = 0;
+	INIT_WORK(&sp->srcu_work, srcu_drive_gp);
+	return 0;
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+int __init_srcu_struct(struct srcu_struct *sp, const char *name,
+		       struct lock_class_key *key)
+{
+	/* Don't re-initialize a lock while it is held. */
+	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
+	lockdep_init_map(&sp->dep_map, name, key, 0);
+	return init_srcu_struct_fields(sp);
+}
+EXPORT_SYMBOL_GPL(__init_srcu_struct);
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+/*
+ * init_srcu_struct - initialize a sleep-RCU structure
+ * @sp: structure to initialize.
+ *
+ * Must invoke this on a given srcu_struct before passing that srcu_struct
+ * to any other function.  Each srcu_struct represents a separate domain
+ * of SRCU protection.
+ */
+int init_srcu_struct(struct srcu_struct *sp)
+{
+	return init_srcu_struct_fields(sp);
+}
+EXPORT_SYMBOL_GPL(init_srcu_struct);
+
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+/*
+ * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.
+ */
+void cleanup_srcu_struct(struct srcu_struct *sp)
+{
+	WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]);
+	flush_work(&sp->srcu_work);
+	WARN_ON(sp->srcu_gp_running);
+	WARN_ON(sp->srcu_gp_waiting);
+	WARN_ON(sp->srcu_cb_head);
+	WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail);
+}
+EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+
+/*
+ * Removes the count for the old reader from the appropriate element of
+ * the srcu_struct.
+ */
+void __srcu_read_unlock(struct srcu_struct *sp, int idx)
+{
+	int newval = sp->srcu_lock_nesting[idx] - 1;
+
+	WRITE_ONCE(sp->srcu_lock_nesting[idx], newval);
+	if (!newval && READ_ONCE(sp->srcu_gp_waiting))
+		swake_up(&sp->srcu_wq);
+}
+EXPORT_SYMBOL_GPL(__srcu_read_unlock);
+
+/*
+ * Workqueue handler to drive one grace period and invoke any callbacks
+ * that become ready as a result.  Single-CPU and !PREEMPT operation
+ * means that we get away with murder on synchronization.  ;-)
+ */
+void srcu_drive_gp(struct work_struct *wp)
+{
+	int idx;
+	struct rcu_head *lh;
+	struct rcu_head *rhp;
+	struct srcu_struct *sp;
+
+	sp = container_of(wp, struct srcu_struct, srcu_work);
+	if (sp->srcu_gp_running || !READ_ONCE(sp->srcu_cb_head))
+		return; /* Already running or nothing to do. */
+
+	/* Remove recently arrived callbacks and wait for readers. */
+	WRITE_ONCE(sp->srcu_gp_running, true);
+	local_irq_disable();
+	lh = sp->srcu_cb_head;
+	sp->srcu_cb_head = NULL;
+	sp->srcu_cb_tail = &sp->srcu_cb_head;
+	local_irq_enable();
+	idx = sp->srcu_idx;
+	WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx);
+	WRITE_ONCE(sp->srcu_gp_waiting, true);  /* srcu_read_unlock() wakes! */
+	swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx]));
+	WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
+
+	/* Invoke the callbacks we removed above. */
+	while (lh) {
+		rhp = lh;
+		lh = lh->next;
+		local_bh_disable();
+		rhp->func(rhp);
+		local_bh_enable();
+	}
+
+	/*
+	 * Enable rescheduling, and if there are more callbacks,
+	 * reschedule ourselves.  This can race with a call_srcu()
+	 * at interrupt level, but the ->srcu_gp_running checks will
+	 * straighten that out.
+	 */
+	WRITE_ONCE(sp->srcu_gp_running, false);
+	if (READ_ONCE(sp->srcu_cb_head))
+		schedule_work(&sp->srcu_work);
+}
+EXPORT_SYMBOL_GPL(srcu_drive_gp);
+
+/*
+ * Enqueue an SRCU callback on the specified srcu_struct structure,
+ * initiating grace-period processing if it is not already running.
+ */
+void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
+	       rcu_callback_t func)
+{
+	unsigned long flags;
+
+	rhp->func = func;
+	rhp->next = NULL;
+	local_irq_save(flags);
+	*sp->srcu_cb_tail = rhp;
+	sp->srcu_cb_tail = &rhp->next;
+	local_irq_restore(flags);
+	if (!READ_ONCE(sp->srcu_gp_running))
+		schedule_work(&sp->srcu_work);
+}
+EXPORT_SYMBOL_GPL(call_srcu);
+
+/*
+ * synchronize_srcu - wait for prior SRCU read-side critical-section completion
+ */
+void synchronize_srcu(struct srcu_struct *sp)
+{
+	struct rcu_synchronize rs;
+
+	init_rcu_head_on_stack(&rs.head);
+	init_completion(&rs.completion);
+	call_srcu(sp, &rs.head, wakeme_after_rcu);
+	wait_for_completion(&rs.completion);
+	destroy_rcu_head_on_stack(&rs.head);
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu);
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
new file mode 100644
index 000000000000..d0ca524bf042
--- /dev/null
+++ b/kernel/rcu/srcutree.c
@@ -0,0 +1,1227 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ * Copyright (C) Fujitsu, 2012
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ *	   Lai Jiangshan <laijs@cn.fujitsu.com>
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ *		Documentation/RCU/ *.txt
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate_wait.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/srcu.h>
+
+#include "rcu.h"
+#include "rcu_segcblist.h"
+
+/* Holdoff in nanoseconds for auto-expediting. */
+#define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000)
+static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF;
+module_param(exp_holdoff, ulong, 0444);
+
+/* Overflow-check frequency.  N bits roughly says every 2**N grace periods. */
+static ulong counter_wrap_check = (ULONG_MAX >> 2);
+module_param(counter_wrap_check, ulong, 0444);
+
+static void srcu_invoke_callbacks(struct work_struct *work);
+static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
+
+/*
+ * Initialize SRCU combining tree.  Note that statically allocated
+ * srcu_struct structures might already have srcu_read_lock() and
+ * srcu_read_unlock() running against them.  So if the is_static parameter
+ * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
+ */
+static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
+{
+	int cpu;
+	int i;
+	int level = 0;
+	int levelspread[RCU_NUM_LVLS];
+	struct srcu_data *sdp;
+	struct srcu_node *snp;
+	struct srcu_node *snp_first;
+
+	/* Work out the overall tree geometry. */
+	sp->level[0] = &sp->node[0];
+	for (i = 1; i < rcu_num_lvls; i++)
+		sp->level[i] = sp->level[i - 1] + num_rcu_lvl[i - 1];
+	rcu_init_levelspread(levelspread, num_rcu_lvl);
+
+	/* Each pass through this loop initializes one srcu_node structure. */
+	rcu_for_each_node_breadth_first(sp, snp) {
+		raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock));
+		WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
+			     ARRAY_SIZE(snp->srcu_data_have_cbs));
+		for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
+			snp->srcu_have_cbs[i] = 0;
+			snp->srcu_data_have_cbs[i] = 0;
+		}
+		snp->srcu_gp_seq_needed_exp = 0;
+		snp->grplo = -1;
+		snp->grphi = -1;
+		if (snp == &sp->node[0]) {
+			/* Root node, special case. */
+			snp->srcu_parent = NULL;
+			continue;
+		}
+
+		/* Non-root node. */
+		if (snp == sp->level[level + 1])
+			level++;
+		snp->srcu_parent = sp->level[level - 1] +
+				   (snp - sp->level[level]) /
+				   levelspread[level - 1];
+	}
+
+	/*
+	 * Initialize the per-CPU srcu_data array, which feeds into the
+	 * leaves of the srcu_node tree.
+	 */
+	WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) !=
+		     ARRAY_SIZE(sdp->srcu_unlock_count));
+	level = rcu_num_lvls - 1;
+	snp_first = sp->level[level];
+	for_each_possible_cpu(cpu) {
+		sdp = per_cpu_ptr(sp->sda, cpu);
+		raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
+		rcu_segcblist_init(&sdp->srcu_cblist);
+		sdp->srcu_cblist_invoking = false;
+		sdp->srcu_gp_seq_needed = sp->srcu_gp_seq;
+		sdp->srcu_gp_seq_needed_exp = sp->srcu_gp_seq;
+		sdp->mynode = &snp_first[cpu / levelspread[level]];
+		for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) {
+			if (snp->grplo < 0)
+				snp->grplo = cpu;
+			snp->grphi = cpu;
+		}
+		sdp->cpu = cpu;
+		INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks);
+		sdp->sp = sp;
+		sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
+		if (is_static)
+			continue;
+
+		/* Dynamically allocated, better be no srcu_read_locks()! */
+		for (i = 0; i < ARRAY_SIZE(sdp->srcu_lock_count); i++) {
+			sdp->srcu_lock_count[i] = 0;
+			sdp->srcu_unlock_count[i] = 0;
+		}
+	}
+}
+
+/*
+ * Initialize non-compile-time initialized fields, including the
+ * associated srcu_node and srcu_data structures.  The is_static
+ * parameter is passed through to init_srcu_struct_nodes(), and
+ * also tells us that ->sda has already been wired up to srcu_data.
+ */
+static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static)
+{
+	mutex_init(&sp->srcu_cb_mutex);
+	mutex_init(&sp->srcu_gp_mutex);
+	sp->srcu_idx = 0;
+	sp->srcu_gp_seq = 0;
+	sp->srcu_barrier_seq = 0;
+	mutex_init(&sp->srcu_barrier_mutex);
+	atomic_set(&sp->srcu_barrier_cpu_cnt, 0);
+	INIT_DELAYED_WORK(&sp->work, process_srcu);
+	if (!is_static)
+		sp->sda = alloc_percpu(struct srcu_data);
+	init_srcu_struct_nodes(sp, is_static);
+	sp->srcu_gp_seq_needed_exp = 0;
+	sp->srcu_last_gp_end = ktime_get_mono_fast_ns();
+	smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */
+	return sp->sda ? 0 : -ENOMEM;
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+int __init_srcu_struct(struct srcu_struct *sp, const char *name,
+		       struct lock_class_key *key)
+{
+	/* Don't re-initialize a lock while it is held. */
+	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
+	lockdep_init_map(&sp->dep_map, name, key, 0);
+	raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
+	return init_srcu_struct_fields(sp, false);
+}
+EXPORT_SYMBOL_GPL(__init_srcu_struct);
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+/**
+ * init_srcu_struct - initialize a sleep-RCU structure
+ * @sp: structure to initialize.
+ *
+ * Must invoke this on a given srcu_struct before passing that srcu_struct
+ * to any other function.  Each srcu_struct represents a separate domain
+ * of SRCU protection.
+ */
+int init_srcu_struct(struct srcu_struct *sp)
+{
+	raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
+	return init_srcu_struct_fields(sp, false);
+}
+EXPORT_SYMBOL_GPL(init_srcu_struct);
+
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+/*
+ * First-use initialization of statically allocated srcu_struct
+ * structure.  Wiring up the combining tree is more than can be
+ * done with compile-time initialization, so this check is added
+ * to each update-side SRCU primitive.  Use sp->lock, which -is-
+ * compile-time initialized, to resolve races involving multiple
+ * CPUs trying to garner first-use privileges.
+ */
+static void check_init_srcu_struct(struct srcu_struct *sp)
+{
+	unsigned long flags;
+
+	WARN_ON_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INIT);
+	/* The smp_load_acquire() pairs with the smp_store_release(). */
+	if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
+		return; /* Already initialized. */
+	raw_spin_lock_irqsave_rcu_node(sp, flags);
+	if (!rcu_seq_state(sp->srcu_gp_seq_needed)) {
+		raw_spin_unlock_irqrestore_rcu_node(sp, flags);
+		return;
+	}
+	init_srcu_struct_fields(sp, true);
+	raw_spin_unlock_irqrestore_rcu_node(sp, flags);
+}
+
+/*
+ * Returns approximate total of the readers' ->srcu_lock_count[] values
+ * for the rank of per-CPU counters specified by idx.
+ */
+static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx)
+{
+	int cpu;
+	unsigned long sum = 0;
+
+	for_each_possible_cpu(cpu) {
+		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
+
+		sum += READ_ONCE(cpuc->srcu_lock_count[idx]);
+	}
+	return sum;
+}
+
+/*
+ * Returns approximate total of the readers' ->srcu_unlock_count[] values
+ * for the rank of per-CPU counters specified by idx.
+ */
+static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx)
+{
+	int cpu;
+	unsigned long sum = 0;
+
+	for_each_possible_cpu(cpu) {
+		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
+
+		sum += READ_ONCE(cpuc->srcu_unlock_count[idx]);
+	}
+	return sum;
+}
+
+/*
+ * Return true if the number of pre-existing readers is determined to
+ * be zero.
+ */
+static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
+{
+	unsigned long unlocks;
+
+	unlocks = srcu_readers_unlock_idx(sp, idx);
+
+	/*
+	 * Make sure that a lock is always counted if the corresponding
+	 * unlock is counted. Needs to be a smp_mb() as the read side may
+	 * contain a read from a variable that is written to before the
+	 * synchronize_srcu() in the write side. In this case smp_mb()s
+	 * A and B act like the store buffering pattern.
+	 *
+	 * This smp_mb() also pairs with smp_mb() C to prevent accesses
+	 * after the synchronize_srcu() from being executed before the
+	 * grace period ends.
+	 */
+	smp_mb(); /* A */
+
+	/*
+	 * If the locks are the same as the unlocks, then there must have
+	 * been no readers on this index at some time in between. This does
+	 * not mean that there are no more readers, as one could have read
+	 * the current index but not have incremented the lock counter yet.
+	 *
+	 * So suppose that the updater is preempted here for so long
+	 * that more than ULONG_MAX non-nested readers come and go in
+	 * the meantime.  It turns out that this cannot result in overflow
+	 * because if a reader modifies its unlock count after we read it
+	 * above, then that reader's next load of ->srcu_idx is guaranteed
+	 * to get the new value, which will cause it to operate on the
+	 * other bank of counters, where it cannot contribute to the
+	 * overflow of these counters.  This means that there is a maximum
+	 * of 2*NR_CPUS increments, which cannot overflow given current
+	 * systems, especially not on 64-bit systems.
+	 *
+	 * OK, how about nesting?  This does impose a limit on nesting
+	 * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient,
+	 * especially on 64-bit systems.
+	 */
+	return srcu_readers_lock_idx(sp, idx) == unlocks;
+}
+
+/**
+ * srcu_readers_active - returns true if there are readers. and false
+ *                       otherwise
+ * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
+ *
+ * Note that this is not an atomic primitive, and can therefore suffer
+ * severe errors when invoked on an active srcu_struct.  That said, it
+ * can be useful as an error check at cleanup time.
+ */
+static bool srcu_readers_active(struct srcu_struct *sp)
+{
+	int cpu;
+	unsigned long sum = 0;
+
+	for_each_possible_cpu(cpu) {
+		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
+
+		sum += READ_ONCE(cpuc->srcu_lock_count[0]);
+		sum += READ_ONCE(cpuc->srcu_lock_count[1]);
+		sum -= READ_ONCE(cpuc->srcu_unlock_count[0]);
+		sum -= READ_ONCE(cpuc->srcu_unlock_count[1]);
+	}
+	return sum;
+}
+
+#define SRCU_INTERVAL		1
+
+/*
+ * Return grace-period delay, zero if there are expedited grace
+ * periods pending, SRCU_INTERVAL otherwise.
+ */
+static unsigned long srcu_get_delay(struct srcu_struct *sp)
+{
+	if (ULONG_CMP_LT(READ_ONCE(sp->srcu_gp_seq),
+			 READ_ONCE(sp->srcu_gp_seq_needed_exp)))
+		return 0;
+	return SRCU_INTERVAL;
+}
+
+/**
+ * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.
+ */
+void cleanup_srcu_struct(struct srcu_struct *sp)
+{
+	int cpu;
+
+	if (WARN_ON(!srcu_get_delay(sp)))
+		return; /* Leakage unless caller handles error. */
+	if (WARN_ON(srcu_readers_active(sp)))
+		return; /* Leakage unless caller handles error. */
+	flush_delayed_work(&sp->work);
+	for_each_possible_cpu(cpu)
+		flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
+	if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
+	    WARN_ON(srcu_readers_active(sp))) {
+		pr_info("cleanup_srcu_struct: Active srcu_struct %p state: %d\n", sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
+		return; /* Caller forgot to stop doing call_srcu()? */
+	}
+	free_percpu(sp->sda);
+	sp->sda = NULL;
+}
+EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct.
+ * Returns an index that must be passed to the matching srcu_read_unlock().
+ */
+int __srcu_read_lock(struct srcu_struct *sp)
+{
+	int idx;
+
+	idx = READ_ONCE(sp->srcu_idx) & 0x1;
+	this_cpu_inc(sp->sda->srcu_lock_count[idx]);
+	smp_mb(); /* B */  /* Avoid leaking the critical section. */
+	return idx;
+}
+EXPORT_SYMBOL_GPL(__srcu_read_lock);
+
+/*
+ * Removes the count for the old reader from the appropriate per-CPU
+ * element of the srcu_struct.  Note that this may well be a different
+ * CPU than that which was incremented by the corresponding srcu_read_lock().
+ */
+void __srcu_read_unlock(struct srcu_struct *sp, int idx)
+{
+	smp_mb(); /* C */  /* Avoid leaking the critical section. */
+	this_cpu_inc(sp->sda->srcu_unlock_count[idx]);
+}
+EXPORT_SYMBOL_GPL(__srcu_read_unlock);
+
+/*
+ * We use an adaptive strategy for synchronize_srcu() and especially for
+ * synchronize_srcu_expedited().  We spin for a fixed time period
+ * (defined below) to allow SRCU readers to exit their read-side critical
+ * sections.  If there are still some readers after a few microseconds,
+ * we repeatedly block for 1-millisecond time periods.
+ */
+#define SRCU_RETRY_CHECK_DELAY		5
+
+/*
+ * Start an SRCU grace period.
+ */
+static void srcu_gp_start(struct srcu_struct *sp)
+{
+	struct srcu_data *sdp = this_cpu_ptr(sp->sda);
+	int state;
+
+	lockdep_assert_held(&sp->lock);
+	WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
+	rcu_segcblist_advance(&sdp->srcu_cblist,
+			      rcu_seq_current(&sp->srcu_gp_seq));
+	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
+				       rcu_seq_snap(&sp->srcu_gp_seq));
+	smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
+	rcu_seq_start(&sp->srcu_gp_seq);
+	state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
+	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
+}
+
+/*
+ * Track online CPUs to guide callback workqueue placement.
+ */
+DEFINE_PER_CPU(bool, srcu_online);
+
+void srcu_online_cpu(unsigned int cpu)
+{
+	WRITE_ONCE(per_cpu(srcu_online, cpu), true);
+}
+
+void srcu_offline_cpu(unsigned int cpu)
+{
+	WRITE_ONCE(per_cpu(srcu_online, cpu), false);
+}
+
+/*
+ * Place the workqueue handler on the specified CPU if online, otherwise
+ * just run it whereever.  This is useful for placing workqueue handlers
+ * that are to invoke the specified CPU's callbacks.
+ */
+static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
+				       struct delayed_work *dwork,
+				       unsigned long delay)
+{
+	bool ret;
+
+	preempt_disable();
+	if (READ_ONCE(per_cpu(srcu_online, cpu)))
+		ret = queue_delayed_work_on(cpu, wq, dwork, delay);
+	else
+		ret = queue_delayed_work(wq, dwork, delay);
+	preempt_enable();
+	return ret;
+}
+
+/*
+ * Schedule callback invocation for the specified srcu_data structure,
+ * if possible, on the corresponding CPU.
+ */
+static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
+{
+	srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq,
+				   &sdp->work, delay);
+}
+
+/*
+ * Schedule callback invocation for all srcu_data structures associated
+ * with the specified srcu_node structure that have callbacks for the
+ * just-completed grace period, the one corresponding to idx.  If possible,
+ * schedule this invocation on the corresponding CPUs.
+ */
+static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp,
+				  unsigned long mask, unsigned long delay)
+{
+	int cpu;
+
+	for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
+		if (!(mask & (1 << (cpu - snp->grplo))))
+			continue;
+		srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), delay);
+	}
+}
+
+/*
+ * Note the end of an SRCU grace period.  Initiates callback invocation
+ * and starts a new grace period if needed.
+ *
+ * The ->srcu_cb_mutex acquisition does not protect any data, but
+ * instead prevents more than one grace period from starting while we
+ * are initiating callback invocation.  This allows the ->srcu_have_cbs[]
+ * array to have a finite number of elements.
+ */
+static void srcu_gp_end(struct srcu_struct *sp)
+{
+	unsigned long cbdelay;
+	bool cbs;
+	int cpu;
+	unsigned long flags;
+	unsigned long gpseq;
+	int idx;
+	int idxnext;
+	unsigned long mask;
+	struct srcu_data *sdp;
+	struct srcu_node *snp;
+
+	/* Prevent more than one additional grace period. */
+	mutex_lock(&sp->srcu_cb_mutex);
+
+	/* End the current grace period. */
+	raw_spin_lock_irq_rcu_node(sp);
+	idx = rcu_seq_state(sp->srcu_gp_seq);
+	WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
+	cbdelay = srcu_get_delay(sp);
+	sp->srcu_last_gp_end = ktime_get_mono_fast_ns();
+	rcu_seq_end(&sp->srcu_gp_seq);
+	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
+	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq))
+		sp->srcu_gp_seq_needed_exp = gpseq;
+	raw_spin_unlock_irq_rcu_node(sp);
+	mutex_unlock(&sp->srcu_gp_mutex);
+	/* A new grace period can start at this point.  But only one. */
+
+	/* Initiate callback invocation as needed. */
+	idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
+	idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs);
+	rcu_for_each_node_breadth_first(sp, snp) {
+		raw_spin_lock_irq_rcu_node(snp);
+		cbs = false;
+		if (snp >= sp->level[rcu_num_lvls - 1])
+			cbs = snp->srcu_have_cbs[idx] == gpseq;
+		snp->srcu_have_cbs[idx] = gpseq;
+		rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
+		if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq))
+			snp->srcu_gp_seq_needed_exp = gpseq;
+		mask = snp->srcu_data_have_cbs[idx];
+		snp->srcu_data_have_cbs[idx] = 0;
+		raw_spin_unlock_irq_rcu_node(snp);
+		if (cbs)
+			srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
+
+		/* Occasionally prevent srcu_data counter wrap. */
+		if (!(gpseq & counter_wrap_check))
+			for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
+				sdp = per_cpu_ptr(sp->sda, cpu);
+				raw_spin_lock_irqsave_rcu_node(sdp, flags);
+				if (ULONG_CMP_GE(gpseq,
+						 sdp->srcu_gp_seq_needed + 100))
+					sdp->srcu_gp_seq_needed = gpseq;
+				raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
+			}
+	}
+
+	/* Callback initiation done, allow grace periods after next. */
+	mutex_unlock(&sp->srcu_cb_mutex);
+
+	/* Start a new grace period if needed. */
+	raw_spin_lock_irq_rcu_node(sp);
+	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
+	if (!rcu_seq_state(gpseq) &&
+	    ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
+		srcu_gp_start(sp);
+		raw_spin_unlock_irq_rcu_node(sp);
+		/* Throttle expedited grace periods: Should be rare! */
+		srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff
+				    ? 0 : SRCU_INTERVAL);
+	} else {
+		raw_spin_unlock_irq_rcu_node(sp);
+	}
+}
+
+/*
+ * Funnel-locking scheme to scalably mediate many concurrent expedited
+ * grace-period requests.  This function is invoked for the first known
+ * expedited request for a grace period that has already been requested,
+ * but without expediting.  To start a completely new grace period,
+ * whether expedited or not, use srcu_funnel_gp_start() instead.
+ */
+static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
+				  unsigned long s)
+{
+	unsigned long flags;
+
+	for (; snp != NULL; snp = snp->srcu_parent) {
+		if (rcu_seq_done(&sp->srcu_gp_seq, s) ||
+		    ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
+			return;
+		raw_spin_lock_irqsave_rcu_node(snp, flags);
+		if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
+			raw_spin_unlock_irqrestore_rcu_node(snp, flags);
+			return;
+		}
+		WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
+		raw_spin_unlock_irqrestore_rcu_node(snp, flags);
+	}
+	raw_spin_lock_irqsave_rcu_node(sp, flags);
+	if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
+		sp->srcu_gp_seq_needed_exp = s;
+	raw_spin_unlock_irqrestore_rcu_node(sp, flags);
+}
+
+/*
+ * Funnel-locking scheme to scalably mediate many concurrent grace-period
+ * requests.  The winner has to do the work of actually starting grace
+ * period s.  Losers must either ensure that their desired grace-period
+ * number is recorded on at least their leaf srcu_node structure, or they
+ * must take steps to invoke their own callbacks.
+ */
+static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
+				 unsigned long s, bool do_norm)
+{
+	unsigned long flags;
+	int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs);
+	struct srcu_node *snp = sdp->mynode;
+	unsigned long snp_seq;
+
+	/* Each pass through the loop does one level of the srcu_node tree. */
+	for (; snp != NULL; snp = snp->srcu_parent) {
+		if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode)
+			return; /* GP already done and CBs recorded. */
+		raw_spin_lock_irqsave_rcu_node(snp, flags);
+		if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
+			snp_seq = snp->srcu_have_cbs[idx];
+			if (snp == sdp->mynode && snp_seq == s)
+				snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
+			raw_spin_unlock_irqrestore_rcu_node(snp, flags);
+			if (snp == sdp->mynode && snp_seq != s) {
+				srcu_schedule_cbs_sdp(sdp, do_norm
+							   ? SRCU_INTERVAL
+							   : 0);
+				return;
+			}
+			if (!do_norm)
+				srcu_funnel_exp_start(sp, snp, s);
+			return;
+		}
+		snp->srcu_have_cbs[idx] = s;
+		if (snp == sdp->mynode)
+			snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
+		if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
+			snp->srcu_gp_seq_needed_exp = s;
+		raw_spin_unlock_irqrestore_rcu_node(snp, flags);
+	}
+
+	/* Top of tree, must ensure the grace period will be started. */
+	raw_spin_lock_irqsave_rcu_node(sp, flags);
+	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) {
+		/*
+		 * Record need for grace period s.  Pair with load
+		 * acquire setting up for initialization.
+		 */
+		smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/
+	}
+	if (!do_norm && ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
+		sp->srcu_gp_seq_needed_exp = s;
+
+	/* If grace period not already done and none in progress, start it. */
+	if (!rcu_seq_done(&sp->srcu_gp_seq, s) &&
+	    rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) {
+		WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
+		srcu_gp_start(sp);
+		queue_delayed_work(system_power_efficient_wq, &sp->work,
+				   srcu_get_delay(sp));
+	}
+	raw_spin_unlock_irqrestore_rcu_node(sp, flags);
+}
+
+/*
+ * Wait until all readers counted by array index idx complete, but
+ * loop an additional time if there is an expedited grace period pending.
+ * The caller must ensure that ->srcu_idx is not changed while checking.
+ */
+static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
+{
+	for (;;) {
+		if (srcu_readers_active_idx_check(sp, idx))
+			return true;
+		if (--trycount + !srcu_get_delay(sp) <= 0)
+			return false;
+		udelay(SRCU_RETRY_CHECK_DELAY);
+	}
+}
+
+/*
+ * Increment the ->srcu_idx counter so that future SRCU readers will
+ * use the other rank of the ->srcu_(un)lock_count[] arrays.  This allows
+ * us to wait for pre-existing readers in a starvation-free manner.
+ */
+static void srcu_flip(struct srcu_struct *sp)
+{
+	/*
+	 * Ensure that if this updater saw a given reader's increment
+	 * from __srcu_read_lock(), that reader was using an old value
+	 * of ->srcu_idx.  Also ensure that if a given reader sees the
+	 * new value of ->srcu_idx, this updater's earlier scans cannot
+	 * have seen that reader's increments (which is OK, because this
+	 * grace period need not wait on that reader).
+	 */
+	smp_mb(); /* E */  /* Pairs with B and C. */
+
+	WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1);
+
+	/*
+	 * Ensure that if the updater misses an __srcu_read_unlock()
+	 * increment, that task's next __srcu_read_lock() will see the
+	 * above counter update.  Note that both this memory barrier
+	 * and the one in srcu_readers_active_idx_check() provide the
+	 * guarantee for __srcu_read_lock().
+	 */
+	smp_mb(); /* D */  /* Pairs with C. */
+}
+
+/*
+ * If SRCU is likely idle, return true, otherwise return false.
+ *
+ * Note that it is OK for several current from-idle requests for a new
+ * grace period from idle to specify expediting because they will all end
+ * up requesting the same grace period anyhow.  So no loss.
+ *
+ * Note also that if any CPU (including the current one) is still invoking
+ * callbacks, this function will nevertheless say "idle".  This is not
+ * ideal, but the overhead of checking all CPUs' callback lists is even
+ * less ideal, especially on large systems.  Furthermore, the wakeup
+ * can happen before the callback is fully removed, so we have no choice
+ * but to accept this type of error.
+ *
+ * This function is also subject to counter-wrap errors, but let's face
+ * it, if this function was preempted for enough time for the counters
+ * to wrap, it really doesn't matter whether or not we expedite the grace
+ * period.  The extra overhead of a needlessly expedited grace period is
+ * negligible when amoritized over that time period, and the extra latency
+ * of a needlessly non-expedited grace period is similarly negligible.
+ */
+static bool srcu_might_be_idle(struct srcu_struct *sp)
+{
+	unsigned long curseq;
+	unsigned long flags;
+	struct srcu_data *sdp;
+	unsigned long t;
+
+	/* If the local srcu_data structure has callbacks, not idle.  */
+	local_irq_save(flags);
+	sdp = this_cpu_ptr(sp->sda);
+	if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) {
+		local_irq_restore(flags);
+		return false; /* Callbacks already present, so not idle. */
+	}
+	local_irq_restore(flags);
+
+	/*
+	 * No local callbacks, so probabalistically probe global state.
+	 * Exact information would require acquiring locks, which would
+	 * kill scalability, hence the probabalistic nature of the probe.
+	 */
+
+	/* First, see if enough time has passed since the last GP. */
+	t = ktime_get_mono_fast_ns();
+	if (exp_holdoff == 0 ||
+	    time_in_range_open(t, sp->srcu_last_gp_end,
+			       sp->srcu_last_gp_end + exp_holdoff))
+		return false; /* Too soon after last GP. */
+
+	/* Next, check for probable idleness. */
+	curseq = rcu_seq_current(&sp->srcu_gp_seq);
+	smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */
+	if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed)))
+		return false; /* Grace period in progress, so not idle. */
+	smp_mb(); /* Order ->srcu_gp_seq with prior access. */
+	if (curseq != rcu_seq_current(&sp->srcu_gp_seq))
+		return false; /* GP # changed, so not idle. */
+	return true; /* With reasonable probability, idle! */
+}
+
+/*
+ * SRCU callback function to leak a callback.
+ */
+static void srcu_leak_callback(struct rcu_head *rhp)
+{
+}
+
+/*
+ * Enqueue an SRCU callback on the srcu_data structure associated with
+ * the current CPU and the specified srcu_struct structure, initiating
+ * grace-period processing if it is not already running.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing SRCU read-side critical section.  On systems with
+ * more than one CPU, this means that when "func()" is invoked, each CPU
+ * is guaranteed to have executed a full memory barrier since the end of
+ * its last corresponding SRCU read-side critical section whose beginning
+ * preceded the call to call_rcu().  It also means that each CPU executing
+ * an SRCU read-side critical section that continues beyond the start of
+ * "func()" must have executed a memory barrier after the call_rcu()
+ * but before the beginning of that SRCU read-side critical section.
+ * Note that these guarantees include CPUs that are offline, idle, or
+ * executing in user mode, as well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting SRCU callback function "func()", then both CPU A and CPU
+ * B are guaranteed to execute a full memory barrier during the time
+ * interval between the call to call_rcu() and the invocation of "func()".
+ * This guarantee applies even if CPU A and CPU B are the same CPU (but
+ * again only if the system has more than one CPU).
+ *
+ * Of course, these guarantees apply only for invocations of call_srcu(),
+ * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
+ * srcu_struct structure.
+ */
+void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
+		 rcu_callback_t func, bool do_norm)
+{
+	unsigned long flags;
+	bool needexp = false;
+	bool needgp = false;
+	unsigned long s;
+	struct srcu_data *sdp;
+
+	check_init_srcu_struct(sp);
+	if (debug_rcu_head_queue(rhp)) {
+		/* Probable double call_srcu(), so leak the callback. */
+		WRITE_ONCE(rhp->func, srcu_leak_callback);
+		WARN_ONCE(1, "call_srcu(): Leaked duplicate callback\n");
+		return;
+	}
+	rhp->func = func;
+	local_irq_save(flags);
+	sdp = this_cpu_ptr(sp->sda);
+	raw_spin_lock_rcu_node(sdp);
+	rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false);
+	rcu_segcblist_advance(&sdp->srcu_cblist,
+			      rcu_seq_current(&sp->srcu_gp_seq));
+	s = rcu_seq_snap(&sp->srcu_gp_seq);
+	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s);
+	if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
+		sdp->srcu_gp_seq_needed = s;
+		needgp = true;
+	}
+	if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) {
+		sdp->srcu_gp_seq_needed_exp = s;
+		needexp = true;
+	}
+	raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
+	if (needgp)
+		srcu_funnel_gp_start(sp, sdp, s, do_norm);
+	else if (needexp)
+		srcu_funnel_exp_start(sp, sdp->mynode, s);
+}
+
+/**
+ * call_srcu() - Queue a callback for invocation after an SRCU grace period
+ * @sp: srcu_struct in queue the callback
+ * @head: structure to be used for queueing the SRCU callback.
+ * @func: function to be invoked after the SRCU grace period
+ *
+ * The callback function will be invoked some time after a full SRCU
+ * grace period elapses, in other words after all pre-existing SRCU
+ * read-side critical sections have completed.  However, the callback
+ * function might well execute concurrently with other SRCU read-side
+ * critical sections that started after call_srcu() was invoked.  SRCU
+ * read-side critical sections are delimited by srcu_read_lock() and
+ * srcu_read_unlock(), and may be nested.
+ *
+ * The callback will be invoked from process context, but must nevertheless
+ * be fast and must not block.
+ */
+void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
+	       rcu_callback_t func)
+{
+	__call_srcu(sp, rhp, func, true);
+}
+EXPORT_SYMBOL_GPL(call_srcu);
+
+/*
+ * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
+ */
+static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm)
+{
+	struct rcu_synchronize rcu;
+
+	RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) ||
+			 lock_is_held(&rcu_bh_lock_map) ||
+			 lock_is_held(&rcu_lock_map) ||
+			 lock_is_held(&rcu_sched_lock_map),
+			 "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
+
+	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
+		return;
+	might_sleep();
+	check_init_srcu_struct(sp);
+	init_completion(&rcu.completion);
+	init_rcu_head_on_stack(&rcu.head);
+	__call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm);
+	wait_for_completion(&rcu.completion);
+	destroy_rcu_head_on_stack(&rcu.head);
+}
+
+/**
+ * synchronize_srcu_expedited - Brute-force SRCU grace period
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Wait for an SRCU grace period to elapse, but be more aggressive about
+ * spinning rather than blocking when waiting.
+ *
+ * Note that synchronize_srcu_expedited() has the same deadlock and
+ * memory-ordering properties as does synchronize_srcu().
+ */
+void synchronize_srcu_expedited(struct srcu_struct *sp)
+{
+	__synchronize_srcu(sp, rcu_gp_is_normal());
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
+
+/**
+ * synchronize_srcu - wait for prior SRCU read-side critical-section completion
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Wait for the count to drain to zero of both indexes. To avoid the
+ * possible starvation of synchronize_srcu(), it waits for the count of
+ * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first,
+ * and then flip the srcu_idx and wait for the count of the other index.
+ *
+ * Can block; must be called from process context.
+ *
+ * Note that it is illegal to call synchronize_srcu() from the corresponding
+ * SRCU read-side critical section; doing so will result in deadlock.
+ * However, it is perfectly legal to call synchronize_srcu() on one
+ * srcu_struct from some other srcu_struct's read-side critical section,
+ * as long as the resulting graph of srcu_structs is acyclic.
+ *
+ * There are memory-ordering constraints implied by synchronize_srcu().
+ * On systems with more than one CPU, when synchronize_srcu() returns,
+ * each CPU is guaranteed to have executed a full memory barrier since
+ * the end of its last corresponding SRCU-sched read-side critical section
+ * whose beginning preceded the call to synchronize_srcu().  In addition,
+ * each CPU having an SRCU read-side critical section that extends beyond
+ * the return from synchronize_srcu() is guaranteed to have executed a
+ * full memory barrier after the beginning of synchronize_srcu() and before
+ * the beginning of that SRCU read-side critical section.  Note that these
+ * guarantees include CPUs that are offline, idle, or executing in user mode,
+ * as well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked synchronize_srcu(), which returned
+ * to its caller on CPU B, then both CPU A and CPU B are guaranteed
+ * to have executed a full memory barrier during the execution of
+ * synchronize_srcu().  This guarantee applies even if CPU A and CPU B
+ * are the same CPU, but again only if the system has more than one CPU.
+ *
+ * Of course, these memory-ordering guarantees apply only when
+ * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
+ * passed the same srcu_struct structure.
+ *
+ * If SRCU is likely idle, expedite the first request.  This semantic
+ * was provided by Classic SRCU, and is relied upon by its users, so TREE
+ * SRCU must also provide it.  Note that detecting idleness is heuristic
+ * and subject to both false positives and negatives.
+ */
+void synchronize_srcu(struct srcu_struct *sp)
+{
+	if (srcu_might_be_idle(sp) || rcu_gp_is_expedited())
+		synchronize_srcu_expedited(sp);
+	else
+		__synchronize_srcu(sp, true);
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu);
+
+/*
+ * Callback function for srcu_barrier() use.
+ */
+static void srcu_barrier_cb(struct rcu_head *rhp)
+{
+	struct srcu_data *sdp;
+	struct srcu_struct *sp;
+
+	sdp = container_of(rhp, struct srcu_data, srcu_barrier_head);
+	sp = sdp->sp;
+	if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt))
+		complete(&sp->srcu_barrier_completion);
+}
+
+/**
+ * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
+ * @sp: srcu_struct on which to wait for in-flight callbacks.
+ */
+void srcu_barrier(struct srcu_struct *sp)
+{
+	int cpu;
+	struct srcu_data *sdp;
+	unsigned long s = rcu_seq_snap(&sp->srcu_barrier_seq);
+
+	check_init_srcu_struct(sp);
+	mutex_lock(&sp->srcu_barrier_mutex);
+	if (rcu_seq_done(&sp->srcu_barrier_seq, s)) {
+		smp_mb(); /* Force ordering following return. */
+		mutex_unlock(&sp->srcu_barrier_mutex);
+		return; /* Someone else did our work for us. */
+	}
+	rcu_seq_start(&sp->srcu_barrier_seq);
+	init_completion(&sp->srcu_barrier_completion);
+
+	/* Initial count prevents reaching zero until all CBs are posted. */
+	atomic_set(&sp->srcu_barrier_cpu_cnt, 1);
+
+	/*
+	 * Each pass through this loop enqueues a callback, but only
+	 * on CPUs already having callbacks enqueued.  Note that if
+	 * a CPU already has callbacks enqueue, it must have already
+	 * registered the need for a future grace period, so all we
+	 * need do is enqueue a callback that will use the same
+	 * grace period as the last callback already in the queue.
+	 */
+	for_each_possible_cpu(cpu) {
+		sdp = per_cpu_ptr(sp->sda, cpu);
+		raw_spin_lock_irq_rcu_node(sdp);
+		atomic_inc(&sp->srcu_barrier_cpu_cnt);
+		sdp->srcu_barrier_head.func = srcu_barrier_cb;
+		debug_rcu_head_queue(&sdp->srcu_barrier_head);
+		if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
+					   &sdp->srcu_barrier_head, 0)) {
+			debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
+			atomic_dec(&sp->srcu_barrier_cpu_cnt);
+		}
+		raw_spin_unlock_irq_rcu_node(sdp);
+	}
+
+	/* Remove the initial count, at which point reaching zero can happen. */
+	if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt))
+		complete(&sp->srcu_barrier_completion);
+	wait_for_completion(&sp->srcu_barrier_completion);
+
+	rcu_seq_end(&sp->srcu_barrier_seq);
+	mutex_unlock(&sp->srcu_barrier_mutex);
+}
+EXPORT_SYMBOL_GPL(srcu_barrier);
+
+/**
+ * srcu_batches_completed - return batches completed.
+ * @sp: srcu_struct on which to report batch completion.
+ *
+ * Report the number of batches, correlated with, but not necessarily
+ * precisely the same as, the number of grace periods that have elapsed.
+ */
+unsigned long srcu_batches_completed(struct srcu_struct *sp)
+{
+	return sp->srcu_idx;
+}
+EXPORT_SYMBOL_GPL(srcu_batches_completed);
+
+/*
+ * Core SRCU state machine.  Push state bits of ->srcu_gp_seq
+ * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has
+ * completed in that state.
+ */
+static void srcu_advance_state(struct srcu_struct *sp)
+{
+	int idx;
+
+	mutex_lock(&sp->srcu_gp_mutex);
+
+	/*
+	 * Because readers might be delayed for an extended period after
+	 * fetching ->srcu_idx for their index, at any point in time there
+	 * might well be readers using both idx=0 and idx=1.  We therefore
+	 * need to wait for readers to clear from both index values before
+	 * invoking a callback.
+	 *
+	 * The load-acquire ensures that we see the accesses performed
+	 * by the prior grace period.
+	 */
+	idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */
+	if (idx == SRCU_STATE_IDLE) {
+		raw_spin_lock_irq_rcu_node(sp);
+		if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
+			WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq));
+			raw_spin_unlock_irq_rcu_node(sp);
+			mutex_unlock(&sp->srcu_gp_mutex);
+			return;
+		}
+		idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
+		if (idx == SRCU_STATE_IDLE)
+			srcu_gp_start(sp);
+		raw_spin_unlock_irq_rcu_node(sp);
+		if (idx != SRCU_STATE_IDLE) {
+			mutex_unlock(&sp->srcu_gp_mutex);
+			return; /* Someone else started the grace period. */
+		}
+	}
+
+	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
+		idx = 1 ^ (sp->srcu_idx & 1);
+		if (!try_check_zero(sp, idx, 1)) {
+			mutex_unlock(&sp->srcu_gp_mutex);
+			return; /* readers present, retry later. */
+		}
+		srcu_flip(sp);
+		rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2);
+	}
+
+	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
+
+		/*
+		 * SRCU read-side critical sections are normally short,
+		 * so check at least twice in quick succession after a flip.
+		 */
+		idx = 1 ^ (sp->srcu_idx & 1);
+		if (!try_check_zero(sp, idx, 2)) {
+			mutex_unlock(&sp->srcu_gp_mutex);
+			return; /* readers present, retry later. */
+		}
+		srcu_gp_end(sp);  /* Releases ->srcu_gp_mutex. */
+	}
+}
+
+/*
+ * Invoke a limited number of SRCU callbacks that have passed through
+ * their grace period.  If there are more to do, SRCU will reschedule
+ * the workqueue.  Note that needed memory barriers have been executed
+ * in this task's context by srcu_readers_active_idx_check().
+ */
+static void srcu_invoke_callbacks(struct work_struct *work)
+{
+	bool more;
+	struct rcu_cblist ready_cbs;
+	struct rcu_head *rhp;
+	struct srcu_data *sdp;
+	struct srcu_struct *sp;
+
+	sdp = container_of(work, struct srcu_data, work.work);
+	sp = sdp->sp;
+	rcu_cblist_init(&ready_cbs);
+	raw_spin_lock_irq_rcu_node(sdp);
+	rcu_segcblist_advance(&sdp->srcu_cblist,
+			      rcu_seq_current(&sp->srcu_gp_seq));
+	if (sdp->srcu_cblist_invoking ||
+	    !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
+		raw_spin_unlock_irq_rcu_node(sdp);
+		return;  /* Someone else on the job or nothing to do. */
+	}
+
+	/* We are on the job!  Extract and invoke ready callbacks. */
+	sdp->srcu_cblist_invoking = true;
+	rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
+	raw_spin_unlock_irq_rcu_node(sdp);
+	rhp = rcu_cblist_dequeue(&ready_cbs);
+	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
+		debug_rcu_head_unqueue(rhp);
+		local_bh_disable();
+		rhp->func(rhp);
+		local_bh_enable();
+	}
+
+	/*
+	 * Update counts, accelerate new callbacks, and if needed,
+	 * schedule another round of callback invocation.
+	 */
+	raw_spin_lock_irq_rcu_node(sdp);
+	rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs);
+	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
+				       rcu_seq_snap(&sp->srcu_gp_seq));
+	sdp->srcu_cblist_invoking = false;
+	more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
+	raw_spin_unlock_irq_rcu_node(sdp);
+	if (more)
+		srcu_schedule_cbs_sdp(sdp, 0);
+}
+
+/*
+ * Finished one round of SRCU grace period.  Start another if there are
+ * more SRCU callbacks queued, otherwise put SRCU into not-running state.
+ */
+static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
+{
+	bool pushgp = true;
+
+	raw_spin_lock_irq_rcu_node(sp);
+	if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
+		if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) {
+			/* All requests fulfilled, time to go idle. */
+			pushgp = false;
+		}
+	} else if (!rcu_seq_state(sp->srcu_gp_seq)) {
+		/* Outstanding request and no GP.  Start one. */
+		srcu_gp_start(sp);
+	}
+	raw_spin_unlock_irq_rcu_node(sp);
+
+	if (pushgp)
+		queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
+}
+
+/*
+ * This is the work-queue function that handles SRCU grace periods.
+ */
+void process_srcu(struct work_struct *work)
+{
+	struct srcu_struct *sp;
+
+	sp = container_of(work, struct srcu_struct, work.work);
+
+	srcu_advance_state(sp);
+	srcu_reschedule(sp, srcu_get_delay(sp));
+}
+EXPORT_SYMBOL_GPL(process_srcu);
+
+void srcutorture_get_gp_data(enum rcutorture_type test_type,
+			     struct srcu_struct *sp, int *flags,
+			     unsigned long *gpnum, unsigned long *completed)
+{
+	if (test_type != SRCU_FLAVOR)
+		return;
+	*flags = 0;
+	*completed = rcu_seq_ctr(sp->srcu_gp_seq);
+	*gpnum = rcu_seq_ctr(sp->srcu_gp_seq_needed);
+}
+EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
+
+static int __init srcu_bootup_announce(void)
+{
+	pr_info("Hierarchical SRCU implementation.\n");
+	if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF)
+		pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
+	return 0;
+}
+early_initcall(srcu_bootup_announce);
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 6ad330dbbae2..f8488965250f 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -35,15 +35,26 @@
 #include <linux/time.h>
 #include <linux/cpu.h>
 #include <linux/prefetch.h>
-#include <linux/trace_events.h>
 
 #include "rcu.h"
 
-/* Forward declarations for tiny_plugin.h. */
-struct rcu_ctrlblk;
-static void __call_rcu(struct rcu_head *head,
-		       rcu_callback_t func,
-		       struct rcu_ctrlblk *rcp);
+/* Global control variables for rcupdate callback mechanism. */
+struct rcu_ctrlblk {
+	struct rcu_head *rcucblist;	/* List of pending callbacks (CBs). */
+	struct rcu_head **donetail;	/* ->next pointer of last "done" CB. */
+	struct rcu_head **curtail;	/* ->next pointer of last CB. */
+};
+
+/* Definition for rcupdate control block. */
+static struct rcu_ctrlblk rcu_sched_ctrlblk = {
+	.donetail	= &rcu_sched_ctrlblk.rcucblist,
+	.curtail	= &rcu_sched_ctrlblk.rcucblist,
+};
+
+static struct rcu_ctrlblk rcu_bh_ctrlblk = {
+	.donetail	= &rcu_bh_ctrlblk.rcucblist,
+	.curtail	= &rcu_bh_ctrlblk.rcucblist,
+};
 
 #include "tiny_plugin.h"
 
@@ -59,19 +70,6 @@ void rcu_barrier_sched(void)
 }
 EXPORT_SYMBOL(rcu_barrier_sched);
 
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
-
-/*
- * Test whether RCU thinks that the current CPU is idle.
- */
-bool notrace __rcu_is_watching(void)
-{
-	return true;
-}
-EXPORT_SYMBOL(__rcu_is_watching);
-
-#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
-
 /*
  * Helper function for rcu_sched_qs() and rcu_bh_qs().
  * Also irqs are disabled to avoid confusion due to interrupt handlers
@@ -79,7 +77,6 @@ EXPORT_SYMBOL(__rcu_is_watching);
  */
 static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
 {
-	RCU_TRACE(reset_cpu_stall_ticks(rcp));
 	if (rcp->donetail != rcp->curtail) {
 		rcp->donetail = rcp->curtail;
 		return 1;
@@ -125,7 +122,6 @@ void rcu_bh_qs(void)
  */
 void rcu_check_callbacks(int user)
 {
-	RCU_TRACE(check_cpu_stalls());
 	if (user)
 		rcu_sched_qs();
 	else if (!in_softirq())
@@ -140,10 +136,8 @@ void rcu_check_callbacks(int user)
  */
 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 {
-	const char *rn = NULL;
 	struct rcu_head *next, *list;
 	unsigned long flags;
-	RCU_TRACE(int cb_count = 0);
 
 	/* Move the ready-to-invoke callbacks to a local list. */
 	local_irq_save(flags);
@@ -152,7 +146,6 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 		local_irq_restore(flags);
 		return;
 	}
-	RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1));
 	list = rcp->rcucblist;
 	rcp->rcucblist = *rcp->donetail;
 	*rcp->donetail = NULL;
@@ -162,22 +155,15 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 	local_irq_restore(flags);
 
 	/* Invoke the callbacks on the local list. */
-	RCU_TRACE(rn = rcp->name);
 	while (list) {
 		next = list->next;
 		prefetch(next);
 		debug_rcu_head_unqueue(list);
 		local_bh_disable();
-		__rcu_reclaim(rn, list);
+		__rcu_reclaim("", list);
 		local_bh_enable();
 		list = next;
-		RCU_TRACE(cb_count++);
 	}
-	RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
-	RCU_TRACE(trace_rcu_batch_end(rcp->name,
-				      cb_count, 0, need_resched(),
-				      is_idle_task(current),
-				      false));
 }
 
 static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
@@ -221,7 +207,6 @@ static void __call_rcu(struct rcu_head *head,
 	local_irq_save(flags);
 	*rcp->curtail = head;
 	rcp->curtail = &head->next;
-	RCU_TRACE(rcp->qlen++);
 	local_irq_restore(flags);
 
 	if (unlikely(is_idle_task(current))) {
@@ -254,8 +239,5 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
 void __init rcu_init(void)
 {
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
-	RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk));
-	RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk));
-
 	rcu_early_boot_tests();
 }
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index c64b827ecbca..f0a01b2a3062 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -22,37 +22,7 @@
  * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  */
 
-#include <linux/kthread.h>
-#include <linux/init.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-/* Global control variables for rcupdate callback mechanism. */
-struct rcu_ctrlblk {
-	struct rcu_head *rcucblist;	/* List of pending callbacks (CBs). */
-	struct rcu_head **donetail;	/* ->next pointer of last "done" CB. */
-	struct rcu_head **curtail;	/* ->next pointer of last CB. */
-	RCU_TRACE(long qlen);		/* Number of pending CBs. */
-	RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
-	RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
-	RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
-	RCU_TRACE(const char *name);	/* Name of RCU type. */
-};
-
-/* Definition for rcupdate control block. */
-static struct rcu_ctrlblk rcu_sched_ctrlblk = {
-	.donetail	= &rcu_sched_ctrlblk.rcucblist,
-	.curtail	= &rcu_sched_ctrlblk.rcucblist,
-	RCU_TRACE(.name = "rcu_sched")
-};
-
-static struct rcu_ctrlblk rcu_bh_ctrlblk = {
-	.donetail	= &rcu_bh_ctrlblk.rcucblist,
-	.curtail	= &rcu_bh_ctrlblk.rcucblist,
-	RCU_TRACE(.name = "rcu_bh")
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
 #include <linux/kernel_stat.h>
 
 int rcu_scheduler_active __read_mostly;
@@ -65,105 +35,13 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active);
  * to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage.
  * The reason for this is that Tiny RCU does not need kthreads, so does
  * not have to care about the fact that the scheduler is half-initialized
- * at a certain phase of the boot process.
+ * at a certain phase of the boot process.  Unless SRCU is in the mix.
  */
 void __init rcu_scheduler_starting(void)
 {
 	WARN_ON(nr_context_switches() > 0);
-	rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
-}
-
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-#ifdef CONFIG_RCU_TRACE
-
-static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	rcp->qlen -= n;
-	local_irq_restore(flags);
-}
-
-/*
- * Dump statistics for TINY_RCU, such as they are.
- */
-static int show_tiny_stats(struct seq_file *m, void *unused)
-{
-	seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
-	seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
-	return 0;
-}
-
-static int show_tiny_stats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_tiny_stats, NULL);
-}
-
-static const struct file_operations show_tiny_stats_fops = {
-	.owner = THIS_MODULE,
-	.open = show_tiny_stats_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static struct dentry *rcudir;
-
-static int __init rcutiny_trace_init(void)
-{
-	struct dentry *retval;
-
-	rcudir = debugfs_create_dir("rcu", NULL);
-	if (!rcudir)
-		goto free_out;
-	retval = debugfs_create_file("rcudata", 0444, rcudir,
-				     NULL, &show_tiny_stats_fops);
-	if (!retval)
-		goto free_out;
-	return 0;
-free_out:
-	debugfs_remove_recursive(rcudir);
-	return 1;
-}
-device_initcall(rcutiny_trace_init);
-
-static void check_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-	unsigned long j;
-	unsigned long js;
-
-	if (rcu_cpu_stall_suppress)
-		return;
-	rcp->ticks_this_gp++;
-	j = jiffies;
-	js = READ_ONCE(rcp->jiffies_stall);
-	if (rcp->rcucblist && ULONG_CMP_GE(j, js)) {
-		pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
-		       rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE,
-		       jiffies - rcp->gp_start, rcp->qlen);
-		dump_stack();
-		WRITE_ONCE(rcp->jiffies_stall,
-			   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
-	} else if (ULONG_CMP_GE(j, js)) {
-		WRITE_ONCE(rcp->jiffies_stall,
-			   jiffies + rcu_jiffies_till_stall_check());
-	}
-}
-
-static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
-{
-	rcp->ticks_this_gp = 0;
-	rcp->gp_start = jiffies;
-	WRITE_ONCE(rcp->jiffies_stall,
-		   jiffies + rcu_jiffies_till_stall_check());
-}
-
-static void check_cpu_stalls(void)
-{
-	RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
-	RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
+	rcu_scheduler_active = IS_ENABLED(CONFIG_SRCU)
+		? RCU_SCHEDULER_INIT : RCU_SCHEDULER_RUNNING;
 }
 
-#endif /* #ifdef CONFIG_RCU_TRACE */
+#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a6dcf3bd244f..51d4c3acf32d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -98,8 +98,8 @@ struct rcu_state sname##_state = { \
 	.gpnum = 0UL - 300UL, \
 	.completed = 0UL - 300UL, \
 	.orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
-	.orphan_nxttail = &sname##_state.orphan_nxtlist, \
-	.orphan_donetail = &sname##_state.orphan_donelist, \
+	.orphan_pend = RCU_CBLIST_INITIALIZER(sname##_state.orphan_pend), \
+	.orphan_done = RCU_CBLIST_INITIALIZER(sname##_state.orphan_done), \
 	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
 	.name = RCU_STATE_NAME(sname), \
 	.abbr = sabbr, \
@@ -124,7 +124,7 @@ static int rcu_fanout_leaf = RCU_FANOUT_LEAF;
 module_param(rcu_fanout_leaf, int, 0444);
 int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
 /* Number of rcu_nodes at specified level. */
-static int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
+int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
 int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
 /* panic() on RCU Stall sysctl. */
 int sysctl_panic_on_rcu_stall __read_mostly;
@@ -168,39 +168,21 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp,
 static void sync_sched_exp_online_cleanup(int cpu);
 
 /* rcuc/rcub kthread realtime priority */
-#ifdef CONFIG_RCU_KTHREAD_PRIO
-static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
-#else /* #ifdef CONFIG_RCU_KTHREAD_PRIO */
 static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
-#endif /* #else #ifdef CONFIG_RCU_KTHREAD_PRIO */
 module_param(kthread_prio, int, 0644);
 
 /* Delay in jiffies for grace-period initialization delays, debug only. */
 
-#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT
-static int gp_preinit_delay = CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY;
-module_param(gp_preinit_delay, int, 0644);
-#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */
-static const int gp_preinit_delay;
-#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */
-
-#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT
-static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY;
-module_param(gp_init_delay, int, 0644);
-#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
-static const int gp_init_delay;
-#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
-
-#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP
-static int gp_cleanup_delay = CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY;
-module_param(gp_cleanup_delay, int, 0644);
-#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */
-static const int gp_cleanup_delay;
-#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */
+static int gp_preinit_delay;
+module_param(gp_preinit_delay, int, 0444);
+static int gp_init_delay;
+module_param(gp_init_delay, int, 0444);
+static int gp_cleanup_delay;
+module_param(gp_cleanup_delay, int, 0444);
 
 /*
  * Number of grace periods between delays, normalized by the duration of
- * the delay.  The longer the the delay, the more the grace periods between
+ * the delay.  The longer the delay, the more the grace periods between
  * each delay.  The reason for this normalization is that it means that,
  * for non-zero delays, the overall slowdown of grace periods is constant
  * regardless of the duration of the delay.  This arrangement balances
@@ -250,6 +232,7 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
  */
 void rcu_sched_qs(void)
 {
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_sched_qs() invoked with preemption enabled!!!");
 	if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s))
 		return;
 	trace_rcu_grace_period(TPS("rcu_sched"),
@@ -265,6 +248,7 @@ void rcu_sched_qs(void)
 
 void rcu_bh_qs(void)
 {
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!");
 	if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {
 		trace_rcu_grace_period(TPS("rcu_bh"),
 				       __this_cpu_read(rcu_bh_data.gpnum),
@@ -273,15 +257,19 @@ void rcu_bh_qs(void)
 	}
 }
 
-static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
+/*
+ * Steal a bit from the bottom of ->dynticks for idle entry/exit
+ * control.  Initially this is for TLB flushing.
+ */
+#define RCU_DYNTICK_CTRL_MASK 0x1
+#define RCU_DYNTICK_CTRL_CTR  (RCU_DYNTICK_CTRL_MASK + 1)
+#ifndef rcu_eqs_special_exit
+#define rcu_eqs_special_exit() do { } while (0)
+#endif
 
 static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
 	.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
-	.dynticks = ATOMIC_INIT(1),
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-	.dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
-	.dynticks_idle = ATOMIC_INIT(1),
-#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
+	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
 };
 
 /*
@@ -305,15 +293,20 @@ bool rcu_irq_enter_disabled(void)
 static void rcu_dynticks_eqs_enter(void)
 {
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-	int special;
+	int seq;
 
 	/*
-	 * CPUs seeing atomic_inc_return() must see prior RCU read-side
+	 * CPUs seeing atomic_add_return() must see prior RCU read-side
 	 * critical sections, and we also must force ordering with the
 	 * next idle sojourn.
 	 */
-	special = atomic_inc_return(&rdtp->dynticks);
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && special & 0x1);
+	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
+	/* Better be in an extended quiescent state! */
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+		     (seq & RCU_DYNTICK_CTRL_CTR));
+	/* Better not have special action (TLB flush) pending! */
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+		     (seq & RCU_DYNTICK_CTRL_MASK));
 }
 
 /*
@@ -323,15 +316,22 @@ static void rcu_dynticks_eqs_enter(void)
 static void rcu_dynticks_eqs_exit(void)
 {
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-	int special;
+	int seq;
 
 	/*
-	 * CPUs seeing atomic_inc_return() must see prior idle sojourns,
+	 * CPUs seeing atomic_add_return() must see prior idle sojourns,
 	 * and we also must force ordering with the next RCU read-side
 	 * critical section.
 	 */
-	special = atomic_inc_return(&rdtp->dynticks);
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(special & 0x1));
+	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+		     !(seq & RCU_DYNTICK_CTRL_CTR));
+	if (seq & RCU_DYNTICK_CTRL_MASK) {
+		atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdtp->dynticks);
+		smp_mb__after_atomic(); /* _exit after clearing mask. */
+		/* Prefer duplicate flushes to losing a flush. */
+		rcu_eqs_special_exit();
+	}
 }
 
 /*
@@ -348,9 +348,9 @@ static void rcu_dynticks_eqs_online(void)
 {
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 
-	if (atomic_read(&rdtp->dynticks) & 0x1)
+	if (atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR)
 		return;
-	atomic_add(0x1, &rdtp->dynticks);
+	atomic_add(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
 }
 
 /*
@@ -362,7 +362,7 @@ bool rcu_dynticks_curr_cpu_in_eqs(void)
 {
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 
-	return !(atomic_read(&rdtp->dynticks) & 0x1);
+	return !(atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR);
 }
 
 /*
@@ -373,7 +373,7 @@ int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
 {
 	int snap = atomic_add_return(0, &rdtp->dynticks);
 
-	return snap;
+	return snap & ~RCU_DYNTICK_CTRL_MASK;
 }
 
 /*
@@ -382,7 +382,7 @@ int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
  */
 static bool rcu_dynticks_in_eqs(int snap)
 {
-	return !(snap & 0x1);
+	return !(snap & RCU_DYNTICK_CTRL_CTR);
 }
 
 /*
@@ -402,14 +402,34 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_dynticks *rdtp, int snap)
 static void rcu_dynticks_momentary_idle(void)
 {
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-	int special = atomic_add_return(2, &rdtp->dynticks);
+	int special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
+					&rdtp->dynticks);
 
 	/* It is illegal to call this from idle state. */
-	WARN_ON_ONCE(!(special & 0x1));
+	WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
 }
 
-DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
-EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
+/*
+ * Set the special (bottom) bit of the specified CPU so that it
+ * will take special action (such as flushing its TLB) on the
+ * next exit from an extended quiescent state.  Returns true if
+ * the bit was successfully set, or false if the CPU was not in
+ * an extended quiescent state.
+ */
+bool rcu_eqs_special_set(int cpu)
+{
+	int old;
+	int new;
+	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+	do {
+		old = atomic_read(&rdtp->dynticks);
+		if (old & RCU_DYNTICK_CTRL_CTR)
+			return false;
+		new = old | RCU_DYNTICK_CTRL_MASK;
+	} while (atomic_cmpxchg(&rdtp->dynticks, old, new) != old);
+	return true;
+}
 
 /*
  * Let the RCU core know that this CPU has gone through the scheduler,
@@ -418,44 +438,14 @@ EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
  * memory barriers to let the RCU core know about it, regardless of what
  * this CPU might (or might not) do in the near future.
  *
- * We inform the RCU core by emulating a zero-duration dyntick-idle
- * period, which we in turn do by incrementing the ->dynticks counter
- * by two.
+ * We inform the RCU core by emulating a zero-duration dyntick-idle period.
  *
  * The caller must have disabled interrupts.
  */
 static void rcu_momentary_dyntick_idle(void)
 {
-	struct rcu_data *rdp;
-	int resched_mask;
-	struct rcu_state *rsp;
-
-	/*
-	 * Yes, we can lose flag-setting operations.  This is OK, because
-	 * the flag will be set again after some delay.
-	 */
-	resched_mask = raw_cpu_read(rcu_sched_qs_mask);
-	raw_cpu_write(rcu_sched_qs_mask, 0);
-
-	/* Find the flavor that needs a quiescent state. */
-	for_each_rcu_flavor(rsp) {
-		rdp = raw_cpu_ptr(rsp->rda);
-		if (!(resched_mask & rsp->flavor_mask))
-			continue;
-		smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */
-		if (READ_ONCE(rdp->mynode->completed) !=
-		    READ_ONCE(rdp->cond_resched_completed))
-			continue;
-
-		/*
-		 * Pretend to be momentarily idle for the quiescent state.
-		 * This allows the grace-period kthread to record the
-		 * quiescent state, with no need for this CPU to do anything
-		 * further.
-		 */
-		rcu_dynticks_momentary_idle();
-		break;
-	}
+	raw_cpu_write(rcu_dynticks.rcu_need_heavy_qs, false);
+	rcu_dynticks_momentary_idle();
 }
 
 /*
@@ -463,14 +453,22 @@ static void rcu_momentary_dyntick_idle(void)
  * and requires special handling for preemptible RCU.
  * The caller must have disabled interrupts.
  */
-void rcu_note_context_switch(void)
+void rcu_note_context_switch(bool preempt)
 {
 	barrier(); /* Avoid RCU read-side critical sections leaking down. */
 	trace_rcu_utilization(TPS("Start context switch"));
 	rcu_sched_qs();
-	rcu_preempt_note_context_switch();
-	if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
+	rcu_preempt_note_context_switch(preempt);
+	/* Load rcu_urgent_qs before other flags. */
+	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
+		goto out;
+	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
+	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
 		rcu_momentary_dyntick_idle();
+	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
+	if (!preempt)
+		rcu_note_voluntary_context_switch_lite(current);
+out:
 	trace_rcu_utilization(TPS("End context switch"));
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
@@ -493,35 +491,35 @@ void rcu_all_qs(void)
 {
 	unsigned long flags;
 
+	if (!raw_cpu_read(rcu_dynticks.rcu_urgent_qs))
+		return;
+	preempt_disable();
+	/* Load rcu_urgent_qs before other flags. */
+	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) {
+		preempt_enable();
+		return;
+	}
+	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
 	barrier(); /* Avoid RCU read-side critical sections leaking down. */
-	if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) {
+	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) {
 		local_irq_save(flags);
 		rcu_momentary_dyntick_idle();
 		local_irq_restore(flags);
 	}
-	if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))) {
-		/*
-		 * Yes, we just checked a per-CPU variable with preemption
-		 * enabled, so we might be migrated to some other CPU at
-		 * this point.  That is OK because in that case, the
-		 * migration will supply the needed quiescent state.
-		 * We might end up needlessly disabling preemption and
-		 * invoking rcu_sched_qs() on the destination CPU, but
-		 * the probability and cost are both quite low, so this
-		 * should not be a problem in practice.
-		 */
-		preempt_disable();
+	if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)))
 		rcu_sched_qs();
-		preempt_enable();
-	}
-	this_cpu_inc(rcu_qs_ctr);
+	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
+	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(rcu_all_qs);
 
-static long blimit = 10;	/* Maximum callbacks per rcu_do_batch. */
-static long qhimark = 10000;	/* If this many pending, ignore blimit. */
-static long qlowmark = 100;	/* Once only this many pending, use blimit. */
+#define DEFAULT_RCU_BLIMIT 10     /* Maximum callbacks per rcu_do_batch. */
+static long blimit = DEFAULT_RCU_BLIMIT;
+#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */
+static long qhimark = DEFAULT_RCU_QHIMARK;
+#define DEFAULT_RCU_QLOMARK 100   /* Once only this many pending, use blimit. */
+static long qlowmark = DEFAULT_RCU_QLOMARK;
 
 module_param(blimit, long, 0444);
 module_param(qhimark, long, 0444);
@@ -544,10 +542,7 @@ module_param(jiffies_till_sched_qs, ulong, 0644);
 
 static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
 				  struct rcu_data *rdp);
-static void force_qs_rnp(struct rcu_state *rsp,
-			 int (*f)(struct rcu_data *rsp, bool *isidle,
-				  unsigned long *maxj),
-			 bool *isidle, unsigned long *maxj);
+static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));
 static void force_quiescent_state(struct rcu_state *rsp);
 static int rcu_pending(void);
 
@@ -704,15 +699,11 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
 	default:
 		break;
 	}
-	if (rsp != NULL) {
-		*flags = READ_ONCE(rsp->gp_flags);
-		*gpnum = READ_ONCE(rsp->gpnum);
-		*completed = READ_ONCE(rsp->completed);
+	if (rsp == NULL)
 		return;
-	}
-	*flags = 0;
-	*gpnum = 0;
-	*completed = 0;
+	*flags = READ_ONCE(rsp->gp_flags);
+	*gpnum = READ_ONCE(rsp->gpnum);
+	*completed = READ_ONCE(rsp->completed);
 }
 EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
 
@@ -728,16 +719,6 @@ void rcutorture_record_progress(unsigned long vernum)
 EXPORT_SYMBOL_GPL(rcutorture_record_progress);
 
 /*
- * Does the CPU have callbacks ready to be invoked?
- */
-static int
-cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
-{
-	return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
-	       rdp->nxttail[RCU_NEXT_TAIL] != NULL;
-}
-
-/*
  * Return the root node of the specified rcu_state structure.
  */
 static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
@@ -756,6 +737,7 @@ static int rcu_future_needs_gp(struct rcu_state *rsp)
 	int idx = (READ_ONCE(rnp->completed) + 1) & 0x1;
 	int *fp = &rnp->need_future_gp[idx];
 
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_future_needs_gp() invoked with irqs enabled!!!");
 	return READ_ONCE(*fp);
 }
 
@@ -767,21 +749,18 @@ static int rcu_future_needs_gp(struct rcu_state *rsp)
 static bool
 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
 {
-	int i;
-
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "cpu_needs_another_gp() invoked with irqs enabled!!!");
 	if (rcu_gp_in_progress(rsp))
 		return false;  /* No, a grace period is already in progress. */
 	if (rcu_future_needs_gp(rsp))
 		return true;  /* Yes, a no-CBs CPU needs one. */
-	if (!rdp->nxttail[RCU_NEXT_TAIL])
+	if (!rcu_segcblist_is_enabled(&rdp->cblist))
 		return false;  /* No, this is a no-CBs (or offline) CPU. */
-	if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
+	if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
 		return true;  /* Yes, CPU has newly registered callbacks. */
-	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
-		if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
-		    ULONG_CMP_LT(READ_ONCE(rsp->completed),
-				 rdp->nxtcompleted[i]))
-			return true;  /* Yes, CBs for future grace period. */
+	if (rcu_segcblist_future_gp_needed(&rdp->cblist,
+					   READ_ONCE(rsp->completed)))
+		return true;  /* Yes, CBs for future grace period. */
 	return false; /* No grace period needed. */
 }
 
@@ -797,6 +776,7 @@ static void rcu_eqs_enter_common(bool user)
 	struct rcu_data *rdp;
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_eqs_enter_common() invoked with irqs enabled!!!");
 	trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0);
 	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
 	    !user && !is_idle_task(current)) {
@@ -867,7 +847,6 @@ void rcu_idle_enter(void)
 
 	local_irq_save(flags);
 	rcu_eqs_enter(false);
-	rcu_sysidle_enter(0);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(rcu_idle_enter);
@@ -917,7 +896,6 @@ void rcu_irq_exit(void)
 		trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1);
 		rdtp->dynticks_nesting--;
 	}
-	rcu_sysidle_enter(1);
 }
 
 /*
@@ -970,6 +948,7 @@ static void rcu_eqs_exit(bool user)
 	struct rcu_dynticks *rdtp;
 	long long oldval;
 
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_eqs_exit() invoked with irqs enabled!!!");
 	rdtp = this_cpu_ptr(&rcu_dynticks);
 	oldval = rdtp->dynticks_nesting;
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
@@ -998,7 +977,6 @@ void rcu_idle_exit(void)
 
 	local_irq_save(flags);
 	rcu_eqs_exit(false);
-	rcu_sysidle_exit(0);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(rcu_idle_exit);
@@ -1050,7 +1028,6 @@ void rcu_irq_enter(void)
 		trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
 	else
 		rcu_eqs_exit_common(oldval, true);
-	rcu_sysidle_exit(1);
 }
 
 /*
@@ -1133,22 +1110,11 @@ void rcu_nmi_exit(void)
 }
 
 /**
- * __rcu_is_watching - are RCU read-side critical sections safe?
- *
- * Return true if RCU is watching the running CPU, which means that
- * this CPU can safely enter RCU read-side critical sections.  Unlike
- * rcu_is_watching(), the caller of __rcu_is_watching() must have at
- * least disabled preemption.
- */
-bool notrace __rcu_is_watching(void)
-{
-	return !rcu_dynticks_curr_cpu_in_eqs();
-}
-
-/**
  * rcu_is_watching - see if RCU thinks that the current CPU is idle
  *
- * If the current CPU is in its idle loop and is neither in an interrupt
+ * Return true if RCU is watching the running CPU, which means that this
+ * CPU can safely enter RCU read-side critical sections.  In other words,
+ * if the current CPU is in its idle loop and is neither in an interrupt
  * or NMI handler, return true.
  */
 bool notrace rcu_is_watching(void)
@@ -1156,12 +1122,30 @@ bool notrace rcu_is_watching(void)
 	bool ret;
 
 	preempt_disable_notrace();
-	ret = __rcu_is_watching();
+	ret = !rcu_dynticks_curr_cpu_in_eqs();
 	preempt_enable_notrace();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(rcu_is_watching);
 
+/*
+ * If a holdout task is actually running, request an urgent quiescent
+ * state from its CPU.  This is unsynchronized, so migrations can cause
+ * the request to go to the wrong CPU.  Which is OK, all that will happen
+ * is that the CPU's next context switch will be a bit slower and next
+ * time around this task will generate another request.
+ */
+void rcu_request_urgent_qs_task(struct task_struct *t)
+{
+	int cpu;
+
+	barrier();
+	cpu = task_cpu(t);
+	if (!task_curr(t))
+		return; /* This task is not running on that CPU. */
+	smp_store_release(per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, cpu), true);
+}
+
 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
 
 /*
@@ -1222,11 +1206,9 @@ static int rcu_is_cpu_rrupt_from_idle(void)
  * credit them with an implicit quiescent state.  Return 1 if this CPU
  * is in dynticks idle mode, which is an extended quiescent state.
  */
-static int dyntick_save_progress_counter(struct rcu_data *rdp,
-					 bool *isidle, unsigned long *maxj)
+static int dyntick_save_progress_counter(struct rcu_data *rdp)
 {
 	rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks);
-	rcu_sysidle_check_cpu(rdp, isidle, maxj);
 	if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
 		if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
@@ -1243,11 +1225,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
  * idle state since the last call to dyntick_save_progress_counter()
  * for this same CPU, or by virtue of having been offline.
  */
-static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
-				    bool *isidle, unsigned long *maxj)
+static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 {
 	unsigned long jtsq;
-	int *rcrmp;
+	bool *rnhqp;
+	bool *ruqp;
 	unsigned long rjtsc;
 	struct rcu_node *rnp;
 
@@ -1283,11 +1265,15 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	 * might not be the case for nohz_full CPUs looping in the kernel.
 	 */
 	rnp = rdp->mynode;
+	ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
 	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
-	    READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_qs_ctr, rdp->cpu) &&
+	    READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) &&
 	    READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc"));
 		return 1;
+	} else {
+		/* Load rcu_qs_ctr before store to rcu_urgent_qs. */
+		smp_store_release(ruqp, true);
 	}
 
 	/* Check for the CPU being offline. */
@@ -1304,7 +1290,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	 * in-kernel CPU-bound tasks cannot advance grace periods.
 	 * So if the grace period is old enough, make the CPU pay attention.
 	 * Note that the unsynchronized assignments to the per-CPU
-	 * rcu_sched_qs_mask variable are safe.  Yes, setting of
+	 * rcu_need_heavy_qs variable are safe.  Yes, setting of
 	 * bits can be lost, but they will be set again on the next
 	 * force-quiescent-state pass.  So lost bit sets do not result
 	 * in incorrect behavior, merely in a grace period lasting
@@ -1318,16 +1304,13 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	 * is set too high, we override with half of the RCU CPU stall
 	 * warning delay.
 	 */
-	rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu);
-	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) ||
-	    time_after(jiffies, rdp->rsp->jiffies_resched)) {
-		if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) {
-			WRITE_ONCE(rdp->cond_resched_completed,
-				   READ_ONCE(rdp->mynode->completed));
-			smp_mb(); /* ->cond_resched_completed before *rcrmp. */
-			WRITE_ONCE(*rcrmp,
-				   READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask);
-		}
+	rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu);
+	if (!READ_ONCE(*rnhqp) &&
+	    (time_after(jiffies, rdp->rsp->gp_start + jtsq) ||
+	     time_after(jiffies, rdp->rsp->jiffies_resched))) {
+		WRITE_ONCE(*rnhqp, true);
+		/* Store rcu_need_heavy_qs before rcu_urgent_qs. */
+		smp_store_release(ruqp, true);
 		rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
 	}
 
@@ -1487,7 +1470,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
 
 	print_cpu_stall_info_end();
 	for_each_possible_cpu(cpu)
-		totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
+		totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,
+							    cpu)->cblist);
 	pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n",
 	       smp_processor_id(), (long)(jiffies - rsp->gp_start),
 	       (long)rsp->gpnum, (long)rsp->completed, totqlen);
@@ -1541,7 +1525,8 @@ static void print_cpu_stall(struct rcu_state *rsp)
 	print_cpu_stall_info(rsp, smp_processor_id());
 	print_cpu_stall_info_end();
 	for_each_possible_cpu(cpu)
-		totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
+		totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,
+							    cpu)->cblist);
 	pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
 		jiffies - rsp->gp_start,
 		(long)rsp->gpnum, (long)rsp->completed, totqlen);
@@ -1644,30 +1629,6 @@ void rcu_cpu_stall_reset(void)
 }
 
 /*
- * Initialize the specified rcu_data structure's default callback list
- * to empty.  The default callback list is the one that is not used by
- * no-callbacks CPUs.
- */
-static void init_default_callback_list(struct rcu_data *rdp)
-{
-	int i;
-
-	rdp->nxtlist = NULL;
-	for (i = 0; i < RCU_NEXT_SIZE; i++)
-		rdp->nxttail[i] = &rdp->nxtlist;
-}
-
-/*
- * Initialize the specified rcu_data structure's callback list to empty.
- */
-static void init_callback_list(struct rcu_data *rdp)
-{
-	if (init_nocb_callback_list(rdp))
-		return;
-	init_default_callback_list(rdp);
-}
-
-/*
  * Determine the value that ->completed will have at the end of the
  * next subsequent grace period.  This is used to tag callbacks so that
  * a CPU can invoke callbacks in a timely fashion even if that CPU has
@@ -1679,6 +1640,8 @@ static void init_callback_list(struct rcu_data *rdp)
 static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
 				       struct rcu_node *rnp)
 {
+	lockdep_assert_held(&rnp->lock);
+
 	/*
 	 * If RCU is idle, we just wait for the next grace period.
 	 * But we can only be sure that RCU is idle if we are looking
@@ -1721,10 +1684,11 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 		    unsigned long *c_out)
 {
 	unsigned long c;
-	int i;
 	bool ret = false;
 	struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
 
+	lockdep_assert_held(&rnp->lock);
+
 	/*
 	 * Pick up grace-period number for new callbacks.  If this
 	 * grace period is already marked as needed, return to the caller.
@@ -1767,13 +1731,11 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 	/*
 	 * Get a new grace-period number.  If there really is no grace
 	 * period in progress, it will be smaller than the one we obtained
-	 * earlier.  Adjust callbacks as needed.  Note that even no-CBs
-	 * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed.
+	 * earlier.  Adjust callbacks as needed.
 	 */
 	c = rcu_cbs_completed(rdp->rsp, rnp_root);
-	for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++)
-		if (ULONG_CMP_LT(c, rdp->nxtcompleted[i]))
-			rdp->nxtcompleted[i] = c;
+	if (!rcu_is_nocb_cpu(rdp->cpu))
+		(void)rcu_segcblist_accelerate(&rdp->cblist, c);
 
 	/*
 	 * If the needed for the required grace period is already
@@ -1805,9 +1767,7 @@ out:
 
 /*
  * Clean up any old requests for the just-ended grace period.  Also return
- * whether any additional grace periods have been requested.  Also invoke
- * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads
- * waiting for this grace period to complete.
+ * whether any additional grace periods have been requested.
  */
 static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
 {
@@ -1853,57 +1813,29 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
 static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 			       struct rcu_data *rdp)
 {
-	unsigned long c;
-	int i;
-	bool ret;
+	bool ret = false;
 
-	/* If the CPU has no callbacks, nothing to do. */
-	if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
-		return false;
+	lockdep_assert_held(&rnp->lock);
 
-	/*
-	 * Starting from the sublist containing the callbacks most
-	 * recently assigned a ->completed number and working down, find the
-	 * first sublist that is not assignable to an upcoming grace period.
-	 * Such a sublist has something in it (first two tests) and has
-	 * a ->completed number assigned that will complete sooner than
-	 * the ->completed number for newly arrived callbacks (last test).
-	 *
-	 * The key point is that any later sublist can be assigned the
-	 * same ->completed number as the newly arrived callbacks, which
-	 * means that the callbacks in any of these later sublist can be
-	 * grouped into a single sublist, whether or not they have already
-	 * been assigned a ->completed number.
-	 */
-	c = rcu_cbs_completed(rsp, rnp);
-	for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
-		if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
-		    !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
-			break;
-
-	/*
-	 * If there are no sublist for unassigned callbacks, leave.
-	 * At the same time, advance "i" one sublist, so that "i" will
-	 * index into the sublist where all the remaining callbacks should
-	 * be grouped into.
-	 */
-	if (++i >= RCU_NEXT_TAIL)
+	/* If no pending (not yet ready to invoke) callbacks, nothing to do. */
+	if (!rcu_segcblist_pend_cbs(&rdp->cblist))
 		return false;
 
 	/*
-	 * Assign all subsequent callbacks' ->completed number to the next
-	 * full grace period and group them all in the sublist initially
-	 * indexed by "i".
+	 * Callbacks are often registered with incomplete grace-period
+	 * information.  Something about the fact that getting exact
+	 * information requires acquiring a global lock...  RCU therefore
+	 * makes a conservative estimate of the grace period number at which
+	 * a given callback will become ready to invoke.	The following
+	 * code checks this estimate and improves it when possible, thus
+	 * accelerating callback invocation to an earlier grace-period
+	 * number.
 	 */
-	for (; i <= RCU_NEXT_TAIL; i++) {
-		rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
-		rdp->nxtcompleted[i] = c;
-	}
-	/* Record any needed additional grace periods. */
-	ret = rcu_start_future_gp(rnp, rdp, NULL);
+	if (rcu_segcblist_accelerate(&rdp->cblist, rcu_cbs_completed(rsp, rnp)))
+		ret = rcu_start_future_gp(rnp, rdp, NULL);
 
 	/* Trace depending on how much we were able to accelerate. */
-	if (!*rdp->nxttail[RCU_WAIT_TAIL])
+	if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
 		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
 	else
 		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
@@ -1923,32 +1855,17 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 			    struct rcu_data *rdp)
 {
-	int i, j;
+	lockdep_assert_held(&rnp->lock);
 
-	/* If the CPU has no callbacks, nothing to do. */
-	if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
+	/* If no pending (not yet ready to invoke) callbacks, nothing to do. */
+	if (!rcu_segcblist_pend_cbs(&rdp->cblist))
 		return false;
 
 	/*
 	 * Find all callbacks whose ->completed numbers indicate that they
 	 * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
 	 */
-	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
-		if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
-			break;
-		rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
-	}
-	/* Clean up any sublist tail pointers that were misordered above. */
-	for (j = RCU_WAIT_TAIL; j < i; j++)
-		rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
-
-	/* Copy down callbacks to fill in empty sublists. */
-	for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
-		if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
-			break;
-		rdp->nxttail[j] = rdp->nxttail[i];
-		rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
-	}
+	rcu_segcblist_advance(&rdp->cblist, rnp->completed);
 
 	/* Classify any remaining callbacks. */
 	return rcu_accelerate_cbs(rsp, rnp, rdp);
@@ -1966,6 +1883,8 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
 	bool ret;
 	bool need_gp;
 
+	lockdep_assert_held(&rnp->lock);
+
 	/* Handle the ends of any preceding grace periods first. */
 	if (rdp->completed == rnp->completed &&
 	    !unlikely(READ_ONCE(rdp->gpwrap))) {
@@ -1993,7 +1912,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
 		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
 		need_gp = !!(rnp->qsmask & rdp->grpmask);
 		rdp->cpu_no_qs.b.norm = need_gp;
-		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
+		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);
 		rdp->core_needs_qs = need_gp;
 		zero_cpu_stall_ticks(rdp);
 		WRITE_ONCE(rdp->gpwrap, false);
@@ -2172,25 +2091,16 @@ static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
  */
 static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time)
 {
-	bool isidle = false;
-	unsigned long maxj;
 	struct rcu_node *rnp = rcu_get_root(rsp);
 
 	WRITE_ONCE(rsp->gp_activity, jiffies);
 	rsp->n_force_qs++;
 	if (first_time) {
 		/* Collect dyntick-idle snapshots. */
-		if (is_sysidle_rcu_state(rsp)) {
-			isidle = true;
-			maxj = jiffies - ULONG_MAX / 4;
-		}
-		force_qs_rnp(rsp, dyntick_save_progress_counter,
-			     &isidle, &maxj);
-		rcu_sysidle_report_gp(rsp, isidle, maxj);
+		force_qs_rnp(rsp, dyntick_save_progress_counter);
 	} else {
 		/* Handle dyntick-idle and offline CPUs. */
-		isidle = true;
-		force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
+		force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
 	}
 	/* Clear flag to prevent immediate re-entry. */
 	if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
@@ -2398,6 +2308,7 @@ static bool
 rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
 		      struct rcu_data *rdp)
 {
+	lockdep_assert_held(&rnp->lock);
 	if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
 		/*
 		 * Either we have not yet spawned the grace-period
@@ -2459,6 +2370,7 @@ static bool rcu_start_gp(struct rcu_state *rsp)
 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
 	__releases(rcu_get_root(rsp)->lock)
 {
+	lockdep_assert_held(&rcu_get_root(rsp)->lock);
 	WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
 	WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
 	raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
@@ -2483,6 +2395,8 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
 	unsigned long oldmask = 0;
 	struct rcu_node *rnp_c;
 
+	lockdep_assert_held(&rnp->lock);
+
 	/* Walk up the rcu_node hierarchy. */
 	for (;;) {
 		if (!(rnp->qsmask & mask) || rnp->gpnum != gps) {
@@ -2543,6 +2457,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
 	unsigned long mask;
 	struct rcu_node *rnp_p;
 
+	lockdep_assert_held(&rnp->lock);
 	if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p ||
 	    rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -2591,7 +2506,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
 		 * within the current grace period.
 		 */
 		rdp->cpu_no_qs.b.norm = true;	/* need qs for new gp. */
-		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
+		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
@@ -2656,6 +2571,8 @@ static void
 rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
 			  struct rcu_node *rnp, struct rcu_data *rdp)
 {
+	lockdep_assert_held(&rsp->orphan_lock);
+
 	/* No-CBs CPUs do not have orphanable callbacks. */
 	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu))
 		return;
@@ -2665,13 +2582,8 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
 	 * because _rcu_barrier() excludes CPU-hotplug operations, so it
 	 * cannot be running now.  Thus no memory barrier is required.
 	 */
-	if (rdp->nxtlist != NULL) {
-		rsp->qlen_lazy += rdp->qlen_lazy;
-		rsp->qlen += rdp->qlen;
-		rdp->n_cbs_orphaned += rdp->qlen;
-		rdp->qlen_lazy = 0;
-		WRITE_ONCE(rdp->qlen, 0);
-	}
+	rdp->n_cbs_orphaned += rcu_segcblist_n_cbs(&rdp->cblist);
+	rcu_segcblist_extract_count(&rdp->cblist, &rsp->orphan_done);
 
 	/*
 	 * Next, move those callbacks still needing a grace period to
@@ -2679,31 +2591,18 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
 	 * Some of the callbacks might have gone partway through a grace
 	 * period, but that is too bad.  They get to start over because we
 	 * cannot assume that grace periods are synchronized across CPUs.
-	 * We don't bother updating the ->nxttail[] array yet, instead
-	 * we just reset the whole thing later on.
 	 */
-	if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) {
-		*rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL];
-		rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL];
-		*rdp->nxttail[RCU_DONE_TAIL] = NULL;
-	}
+	rcu_segcblist_extract_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
 
 	/*
 	 * Then move the ready-to-invoke callbacks to the orphanage,
 	 * where some other CPU will pick them up.  These will not be
 	 * required to pass though another grace period: They are done.
 	 */
-	if (rdp->nxtlist != NULL) {
-		*rsp->orphan_donetail = rdp->nxtlist;
-		rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL];
-	}
+	rcu_segcblist_extract_done_cbs(&rdp->cblist, &rsp->orphan_done);
 
-	/*
-	 * Finally, initialize the rcu_data structure's list to empty and
-	 * disallow further callbacks on this CPU.
-	 */
-	init_callback_list(rdp);
-	rdp->nxttail[RCU_NEXT_TAIL] = NULL;
+	/* Finally, disallow further callbacks on this CPU.  */
+	rcu_segcblist_disable(&rdp->cblist);
 }
 
 /*
@@ -2712,22 +2611,20 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
  */
 static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
 {
-	int i;
 	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
 
+	lockdep_assert_held(&rsp->orphan_lock);
+
 	/* No-CBs CPUs are handled specially. */
 	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
 	    rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
 		return;
 
 	/* Do the accounting first. */
-	rdp->qlen_lazy += rsp->qlen_lazy;
-	rdp->qlen += rsp->qlen;
-	rdp->n_cbs_adopted += rsp->qlen;
-	if (rsp->qlen_lazy != rsp->qlen)
+	rdp->n_cbs_adopted += rsp->orphan_done.len;
+	if (rsp->orphan_done.len_lazy != rsp->orphan_done.len)
 		rcu_idle_count_callbacks_posted();
-	rsp->qlen_lazy = 0;
-	rsp->qlen = 0;
+	rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done);
 
 	/*
 	 * We do not need a memory barrier here because the only way we
@@ -2735,24 +2632,13 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
 	 * we are the task doing the rcu_barrier().
 	 */
 
-	/* First adopt the ready-to-invoke callbacks. */
-	if (rsp->orphan_donelist != NULL) {
-		*rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL];
-		*rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist;
-		for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--)
-			if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
-				rdp->nxttail[i] = rsp->orphan_donetail;
-		rsp->orphan_donelist = NULL;
-		rsp->orphan_donetail = &rsp->orphan_donelist;
-	}
-
-	/* And then adopt the callbacks that still need a grace period. */
-	if (rsp->orphan_nxtlist != NULL) {
-		*rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist;
-		rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail;
-		rsp->orphan_nxtlist = NULL;
-		rsp->orphan_nxttail = &rsp->orphan_nxtlist;
-	}
+	/* First adopt the ready-to-invoke callbacks, then the done ones. */
+	rcu_segcblist_insert_done_cbs(&rdp->cblist, &rsp->orphan_done);
+	WARN_ON_ONCE(rsp->orphan_done.head);
+	rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
+	WARN_ON_ONCE(rsp->orphan_pend.head);
+	WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) !=
+		     !rcu_segcblist_n_cbs(&rdp->cblist));
 }
 
 /*
@@ -2760,14 +2646,14 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
  */
 static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
 {
-	RCU_TRACE(unsigned long mask);
-	RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda));
-	RCU_TRACE(struct rcu_node *rnp = rdp->mynode);
+	RCU_TRACE(unsigned long mask;)
+	RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda);)
+	RCU_TRACE(struct rcu_node *rnp = rdp->mynode;)
 
 	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
 		return;
 
-	RCU_TRACE(mask = rdp->grpmask);
+	RCU_TRACE(mask = rdp->grpmask;)
 	trace_rcu_grace_period(rsp->name,
 			       rnp->gpnum + 1 - !!(rnp->qsmask & mask),
 			       TPS("cpuofl"));
@@ -2795,6 +2681,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
 	long mask;
 	struct rcu_node *rnp = rnp_leaf;
 
+	lockdep_assert_held(&rnp->lock);
 	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
 	    rnp->qsmaskinit || rcu_preempt_has_tasks(rnp))
 		return;
@@ -2840,9 +2727,11 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
 	rcu_adopt_orphan_cbs(rsp, flags);
 	raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags);
 
-	WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
-		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
-		  cpu, rdp->qlen, rdp->nxtlist);
+	WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
+		  !rcu_segcblist_empty(&rdp->cblist),
+		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
+		  cpu, rcu_segcblist_n_cbs(&rdp->cblist),
+		  rcu_segcblist_first_cb(&rdp->cblist));
 }
 
 /*
@@ -2852,14 +2741,17 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
 static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 {
 	unsigned long flags;
-	struct rcu_head *next, *list, **tail;
-	long bl, count, count_lazy;
-	int i;
+	struct rcu_head *rhp;
+	struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
+	long bl, count;
 
 	/* If no callbacks are ready, just return. */
-	if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
-		trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
-		trace_rcu_batch_end(rsp->name, 0, !!READ_ONCE(rdp->nxtlist),
+	if (!rcu_segcblist_ready_cbs(&rdp->cblist)) {
+		trace_rcu_batch_start(rsp->name,
+				      rcu_segcblist_n_lazy_cbs(&rdp->cblist),
+				      rcu_segcblist_n_cbs(&rdp->cblist), 0);
+		trace_rcu_batch_end(rsp->name, 0,
+				    !rcu_segcblist_empty(&rdp->cblist),
 				    need_resched(), is_idle_task(current),
 				    rcu_is_callbacks_kthread());
 		return;
@@ -2867,73 +2759,61 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 
 	/*
 	 * Extract the list of ready callbacks, disabling to prevent
-	 * races with call_rcu() from interrupt handlers.
+	 * races with call_rcu() from interrupt handlers.  Leave the
+	 * callback counts, as rcu_barrier() needs to be conservative.
 	 */
 	local_irq_save(flags);
 	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
 	bl = rdp->blimit;
-	trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl);
-	list = rdp->nxtlist;
-	rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
-	*rdp->nxttail[RCU_DONE_TAIL] = NULL;
-	tail = rdp->nxttail[RCU_DONE_TAIL];
-	for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
-		if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
-			rdp->nxttail[i] = &rdp->nxtlist;
+	trace_rcu_batch_start(rsp->name, rcu_segcblist_n_lazy_cbs(&rdp->cblist),
+			      rcu_segcblist_n_cbs(&rdp->cblist), bl);
+	rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
 	local_irq_restore(flags);
 
 	/* Invoke callbacks. */
-	count = count_lazy = 0;
-	while (list) {
-		next = list->next;
-		prefetch(next);
-		debug_rcu_head_unqueue(list);
-		if (__rcu_reclaim(rsp->name, list))
-			count_lazy++;
-		list = next;
-		/* Stop only if limit reached and CPU has something to do. */
-		if (++count >= bl &&
+	rhp = rcu_cblist_dequeue(&rcl);
+	for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) {
+		debug_rcu_head_unqueue(rhp);
+		if (__rcu_reclaim(rsp->name, rhp))
+			rcu_cblist_dequeued_lazy(&rcl);
+		/*
+		 * Stop only if limit reached and CPU has something to do.
+		 * Note: The rcl structure counts down from zero.
+		 */
+		if (-rcl.len >= bl &&
 		    (need_resched() ||
 		     (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
 			break;
 	}
 
 	local_irq_save(flags);
-	trace_rcu_batch_end(rsp->name, count, !!list, need_resched(),
-			    is_idle_task(current),
-			    rcu_is_callbacks_kthread());
-
-	/* Update count, and requeue any remaining callbacks. */
-	if (list != NULL) {
-		*tail = rdp->nxtlist;
-		rdp->nxtlist = list;
-		for (i = 0; i < RCU_NEXT_SIZE; i++)
-			if (&rdp->nxtlist == rdp->nxttail[i])
-				rdp->nxttail[i] = tail;
-			else
-				break;
-	}
+	count = -rcl.len;
+	trace_rcu_batch_end(rsp->name, count, !!rcl.head, need_resched(),
+			    is_idle_task(current), rcu_is_callbacks_kthread());
+
+	/* Update counts and requeue any remaining callbacks. */
+	rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);
 	smp_mb(); /* List handling before counting for rcu_barrier(). */
-	rdp->qlen_lazy -= count_lazy;
-	WRITE_ONCE(rdp->qlen, rdp->qlen - count);
 	rdp->n_cbs_invoked += count;
+	rcu_segcblist_insert_count(&rdp->cblist, &rcl);
 
 	/* Reinstate batch limit if we have worked down the excess. */
-	if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
+	count = rcu_segcblist_n_cbs(&rdp->cblist);
+	if (rdp->blimit == LONG_MAX && count <= qlowmark)
 		rdp->blimit = blimit;
 
 	/* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
-	if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
+	if (count == 0 && rdp->qlen_last_fqs_check != 0) {
 		rdp->qlen_last_fqs_check = 0;
 		rdp->n_force_qs_snap = rsp->n_force_qs;
-	} else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
-		rdp->qlen_last_fqs_check = rdp->qlen;
-	WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
+	} else if (count < rdp->qlen_last_fqs_check - qhimark)
+		rdp->qlen_last_fqs_check = count;
+	WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0));
 
 	local_irq_restore(flags);
 
 	/* Re-invoke RCU core processing if there are callbacks remaining. */
-	if (cpu_has_callbacks_ready_to_invoke(rdp))
+	if (rcu_segcblist_ready_cbs(&rdp->cblist))
 		invoke_rcu_core();
 }
 
@@ -2992,10 +2872,7 @@ void rcu_check_callbacks(int user)
  *
  * The caller must have suppressed start of new grace periods.
  */
-static void force_qs_rnp(struct rcu_state *rsp,
-			 int (*f)(struct rcu_data *rsp, bool *isidle,
-				  unsigned long *maxj),
-			 bool *isidle, unsigned long *maxj)
+static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
 {
 	int cpu;
 	unsigned long flags;
@@ -3034,7 +2911,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
 		for_each_leaf_node_possible_cpu(rnp, cpu) {
 			unsigned long bit = leaf_node_cpu_bit(rnp, cpu);
 			if ((rnp->qsmask & bit) != 0) {
-				if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
+				if (f(per_cpu_ptr(rsp->rda, cpu)))
 					mask |= bit;
 			}
 		}
@@ -3099,7 +2976,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
 	bool needwake;
 	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
 
-	WARN_ON_ONCE(rdp->beenonline == 0);
+	WARN_ON_ONCE(!rdp->beenonline);
 
 	/* Update RCU state based on any recent quiescent states. */
 	rcu_check_quiescent_state(rsp, rdp);
@@ -3117,7 +2994,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
 	}
 
 	/* If there are callbacks ready, invoke them. */
-	if (cpu_has_callbacks_ready_to_invoke(rdp))
+	if (rcu_segcblist_ready_cbs(&rdp->cblist))
 		invoke_rcu_callbacks(rsp, rdp);
 
 	/* Do any needed deferred wakeups of rcuo kthreads. */
@@ -3189,7 +3066,8 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
 	 * invoking force_quiescent_state() if the newly enqueued callback
 	 * is the only one waiting for a grace period to complete.
 	 */
-	if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
+	if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) >
+		     rdp->qlen_last_fqs_check + qhimark)) {
 
 		/* Are we ignoring a completed grace period? */
 		note_gp_changes(rsp, rdp);
@@ -3207,10 +3085,10 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
 			/* Give the grace period a kick. */
 			rdp->blimit = LONG_MAX;
 			if (rsp->n_force_qs == rdp->n_force_qs_snap &&
-			    *rdp->nxttail[RCU_DONE_TAIL] != head)
+			    rcu_segcblist_first_pend_cb(&rdp->cblist) != head)
 				force_quiescent_state(rsp);
 			rdp->n_force_qs_snap = rsp->n_force_qs;
-			rdp->qlen_last_fqs_check = rdp->qlen;
+			rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
 		}
 	}
 }
@@ -3239,9 +3117,14 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
 	WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));
 
 	if (debug_rcu_head_queue(head)) {
-		/* Probable double call_rcu(), so leak the callback. */
+		/*
+		 * Probable double call_rcu(), so leak the callback.
+		 * Use rcu:rcu_callback trace event to find the previous
+		 * time callback was passed to __call_rcu().
+		 */
+		WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pF()!!!\n",
+			  head, head->func);
 		WRITE_ONCE(head->func, rcu_leak_callback);
-		WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n");
 		return;
 	}
 	head->func = func;
@@ -3250,7 +3133,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
 	rdp = this_cpu_ptr(rsp->rda);
 
 	/* Add the callback to our list. */
-	if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) {
+	if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist)) || cpu != -1) {
 		int offline;
 
 		if (cpu != -1)
@@ -3269,31 +3152,45 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
 		 */
 		BUG_ON(cpu != -1);
 		WARN_ON_ONCE(!rcu_is_watching());
-		if (!likely(rdp->nxtlist))
-			init_default_callback_list(rdp);
+		if (rcu_segcblist_empty(&rdp->cblist))
+			rcu_segcblist_init(&rdp->cblist);
 	}
-	WRITE_ONCE(rdp->qlen, rdp->qlen + 1);
-	if (lazy)
-		rdp->qlen_lazy++;
-	else
+	rcu_segcblist_enqueue(&rdp->cblist, head, lazy);
+	if (!lazy)
 		rcu_idle_count_callbacks_posted();
-	smp_mb();  /* Count before adding callback for rcu_barrier(). */
-	*rdp->nxttail[RCU_NEXT_TAIL] = head;
-	rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
 
 	if (__is_kfree_rcu_offset((unsigned long)func))
 		trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
-					 rdp->qlen_lazy, rdp->qlen);
+					 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
+					 rcu_segcblist_n_cbs(&rdp->cblist));
 	else
-		trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
+		trace_rcu_callback(rsp->name, head,
+				   rcu_segcblist_n_lazy_cbs(&rdp->cblist),
+				   rcu_segcblist_n_cbs(&rdp->cblist));
 
 	/* Go handle any RCU core processing required. */
 	__call_rcu_core(rsp, rdp, head, flags);
 	local_irq_restore(flags);
 }
 
-/*
- * Queue an RCU-sched callback for invocation after a grace period.
+/**
+ * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_sched() assumes
+ * that the read-side critical sections end on enabling of preemption
+ * or on voluntary preemption.
+ * RCU read-side critical sections are delimited by :
+ *  - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR
+ *  - anything that disables preemption.
+ *
+ *  These may be nested.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
  */
 void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 {
@@ -3301,8 +3198,26 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(call_rcu_sched);
 
-/*
- * Queue an RCU callback for invocation after a quicker grace period.
+/**
+ * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_bh() assumes
+ * that the read-side critical sections end on completion of a softirq
+ * handler. This means that read-side critical sections in process
+ * context must not be interrupted by softirqs. This interface is to be
+ * used when most of the read-side critical sections are in softirq context.
+ * RCU read-side critical sections are delimited by :
+ *  - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context.
+ *  OR
+ *  - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
+ *  These may be nested.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
  */
 void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
 {
@@ -3378,12 +3293,6 @@ static inline int rcu_blocking_is_gp(void)
  * to have executed a full memory barrier during the execution of
  * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
  * again only if the system has more than one CPU).
- *
- * This primitive provides the guarantees made by the (now removed)
- * synchronize_kernel() API.  In contrast, synchronize_rcu() only
- * guarantees that rcu_read_lock() sections will have completed.
- * In "classic RCU", these two guarantees happen to be one and
- * the same, but can differ in realtime RCU implementations.
  */
 void synchronize_sched(void)
 {
@@ -3531,41 +3440,6 @@ void cond_synchronize_sched(unsigned long oldstate)
 }
 EXPORT_SYMBOL_GPL(cond_synchronize_sched);
 
-/* Adjust sequence number for start of update-side operation. */
-static void rcu_seq_start(unsigned long *sp)
-{
-	WRITE_ONCE(*sp, *sp + 1);
-	smp_mb(); /* Ensure update-side operation after counter increment. */
-	WARN_ON_ONCE(!(*sp & 0x1));
-}
-
-/* Adjust sequence number for end of update-side operation. */
-static void rcu_seq_end(unsigned long *sp)
-{
-	smp_mb(); /* Ensure update-side operation before counter increment. */
-	WRITE_ONCE(*sp, *sp + 1);
-	WARN_ON_ONCE(*sp & 0x1);
-}
-
-/* Take a snapshot of the update side's sequence number. */
-static unsigned long rcu_seq_snap(unsigned long *sp)
-{
-	unsigned long s;
-
-	s = (READ_ONCE(*sp) + 3) & ~0x1;
-	smp_mb(); /* Above access must not bleed into critical section. */
-	return s;
-}
-
-/*
- * Given a snapshot from rcu_seq_snap(), determine whether or not a
- * full update-side operation has occurred.
- */
-static bool rcu_seq_done(unsigned long *sp, unsigned long s)
-{
-	return ULONG_CMP_GE(READ_ONCE(*sp), s);
-}
-
 /*
  * Check to see if there is any immediate RCU-related work to be done
  * by the current CPU, for the specified type of RCU, returning 1 if so.
@@ -3589,7 +3463,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
 	/* Is the RCU core waiting for a quiescent state from this CPU? */
 	if (rcu_scheduler_fully_active &&
 	    rdp->core_needs_qs && rdp->cpu_no_qs.b.norm &&
-	    rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) {
+	    rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_dynticks.rcu_qs_ctr)) {
 		rdp->n_rp_core_needs_qs++;
 	} else if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) {
 		rdp->n_rp_report_qs++;
@@ -3597,7 +3471,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
 	}
 
 	/* Does this CPU have callbacks ready to invoke? */
-	if (cpu_has_callbacks_ready_to_invoke(rdp)) {
+	if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
 		rdp->n_rp_cb_ready++;
 		return 1;
 	}
@@ -3661,10 +3535,10 @@ static bool __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy)
 
 	for_each_rcu_flavor(rsp) {
 		rdp = this_cpu_ptr(rsp->rda);
-		if (!rdp->nxtlist)
+		if (rcu_segcblist_empty(&rdp->cblist))
 			continue;
 		hc = true;
-		if (rdp->qlen != rdp->qlen_lazy || !all_lazy) {
+		if (rcu_segcblist_n_nonlazy_cbs(&rdp->cblist) || !all_lazy) {
 			al = false;
 			break;
 		}
@@ -3711,8 +3585,14 @@ static void rcu_barrier_func(void *type)
 	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
 
 	_rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence);
-	atomic_inc(&rsp->barrier_cpu_count);
-	rsp->call(&rdp->barrier_head, rcu_barrier_callback);
+	rdp->barrier_head.func = rcu_barrier_callback;
+	debug_rcu_head_queue(&rdp->barrier_head);
+	if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {
+		atomic_inc(&rsp->barrier_cpu_count);
+	} else {
+		debug_rcu_head_unqueue(&rdp->barrier_head);
+		_rcu_barrier_trace(rsp, "IRQNQ", -1, rsp->barrier_sequence);
+	}
 }
 
 /*
@@ -3773,7 +3653,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 				__call_rcu(&rdp->barrier_head,
 					   rcu_barrier_callback, rsp, cpu, 0);
 			}
-		} else if (READ_ONCE(rdp->qlen)) {
+		} else if (rcu_segcblist_n_cbs(&rdp->cblist)) {
 			_rcu_barrier_trace(rsp, "OnlineQ", cpu,
 					   rsp->barrier_sequence);
 			smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
@@ -3831,6 +3711,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
 	long mask;
 	struct rcu_node *rnp = rnp_leaf;
 
+	lockdep_assert_held(&rnp->lock);
 	for (;;) {
 		mask = rnp->grpmask;
 		rnp = rnp->parent;
@@ -3882,10 +3763,10 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	rdp->qlen_last_fqs_check = 0;
 	rdp->n_force_qs_snap = rsp->n_force_qs;
 	rdp->blimit = blimit;
-	if (!rdp->nxtlist)
-		init_callback_list(rdp);  /* Re-enable callbacks on this CPU. */
+	if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */
+	    !init_nocb_callback_list(rdp))
+		rcu_segcblist_init(&rdp->cblist);  /* Re-enable callbacks. */
 	rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-	rcu_sysidle_init_percpu_data(rdp->dynticks);
 	rcu_dynticks_eqs_online();
 	raw_spin_unlock_rcu_node(rnp);		/* irqs remain disabled. */
 
@@ -3902,12 +3783,16 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
 	rdp->completed = rnp->completed;
 	rdp->cpu_no_qs.b.norm = true;
-	rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu);
+	rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu);
 	rdp->core_needs_qs = false;
 	trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
+/*
+ * Invoked early in the CPU-online process, when pretty much all
+ * services are available.  The incoming CPU is not present.
+ */
 int rcutree_prepare_cpu(unsigned int cpu)
 {
 	struct rcu_state *rsp;
@@ -3921,6 +3806,9 @@ int rcutree_prepare_cpu(unsigned int cpu)
 	return 0;
 }
 
+/*
+ * Update RCU priority boot kthread affinity for CPU-hotplug changes.
+ */
 static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
 {
 	struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
@@ -3928,20 +3816,34 @@ static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
 	rcu_boost_kthread_setaffinity(rdp->mynode, outgoing);
 }
 
+/*
+ * Near the end of the CPU-online process.  Pretty much all services
+ * enabled, and the CPU is now very much alive.
+ */
 int rcutree_online_cpu(unsigned int cpu)
 {
 	sync_sched_exp_online_cleanup(cpu);
 	rcutree_affinity_setting(cpu, -1);
+	if (IS_ENABLED(CONFIG_TREE_SRCU))
+		srcu_online_cpu(cpu);
 	return 0;
 }
 
+/*
+ * Near the beginning of the process.  The CPU is still very much alive
+ * with pretty much all services enabled.
+ */
 int rcutree_offline_cpu(unsigned int cpu)
 {
 	rcutree_affinity_setting(cpu, cpu);
+	if (IS_ENABLED(CONFIG_TREE_SRCU))
+		srcu_offline_cpu(cpu);
 	return 0;
 }
 
-
+/*
+ * Near the end of the offline process.  We do only tracing here.
+ */
 int rcutree_dying_cpu(unsigned int cpu)
 {
 	struct rcu_state *rsp;
@@ -3951,6 +3853,9 @@ int rcutree_dying_cpu(unsigned int cpu)
 	return 0;
 }
 
+/*
+ * The outgoing CPU is gone and we are running elsewhere.
+ */
 int rcutree_dead_cpu(unsigned int cpu)
 {
 	struct rcu_state *rsp;
@@ -3968,6 +3873,10 @@ int rcutree_dead_cpu(unsigned int cpu)
  * incoming CPUs are not allowed to use RCU read-side critical sections
  * until this function is called.  Failing to observe this restriction
  * will result in lockdep splats.
+ *
+ * Note that this function is special in that it is invoked directly
+ * from the incoming CPU rather than from the cpuhp_step mechanism.
+ * This is because this function must be invoked at a precise location.
  */
 void rcu_cpu_starting(unsigned int cpu)
 {
@@ -3993,9 +3902,6 @@ void rcu_cpu_starting(unsigned int cpu)
  * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
  * function.  We now remove it from the rcu_node tree's ->qsmaskinit
  * bit masks.
- * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
- * function.  We now remove it from the rcu_node tree's ->qsmaskinit
- * bit masks.
  */
 static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
 {
@@ -4011,6 +3917,14 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
+/*
+ * The outgoing function has no further need of RCU, so remove it from
+ * the list of CPUs that RCU must track.
+ *
+ * Note that this function is special in that it is invoked directly
+ * from the outgoing CPU rather than from the cpuhp_step mechanism.
+ * This is because this function must be invoked at a precise location.
+ */
 void rcu_report_dead(unsigned int cpu)
 {
 	struct rcu_state *rsp;
@@ -4025,6 +3939,10 @@ void rcu_report_dead(unsigned int cpu)
 }
 #endif
 
+/*
+ * On non-huge systems, use expedited RCU grace periods to make suspend
+ * and hibernation run faster.
+ */
 static int rcu_pm_notify(struct notifier_block *self,
 			 unsigned long action, void *hcpu)
 {
@@ -4095,7 +4013,7 @@ early_initcall(rcu_spawn_gp_kthread);
  * task is booting the system, and such primitives are no-ops).  After this
  * function is called, any synchronous grace-period primitives are run as
  * expedited, with the requesting task driving the grace period forward.
- * A later core_initcall() rcu_exp_runtime_mode() will switch to full
+ * A later core_initcall() rcu_set_runtime_mode() will switch to full
  * runtime RCU functionality.
  */
 void rcu_scheduler_starting(void)
@@ -4108,31 +4026,6 @@ void rcu_scheduler_starting(void)
 }
 
 /*
- * Compute the per-level fanout, either using the exact fanout specified
- * or balancing the tree, depending on the rcu_fanout_exact boot parameter.
- */
-static void __init rcu_init_levelspread(int *levelspread, const int *levelcnt)
-{
-	int i;
-
-	if (rcu_fanout_exact) {
-		levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
-		for (i = rcu_num_lvls - 2; i >= 0; i--)
-			levelspread[i] = RCU_FANOUT;
-	} else {
-		int ccur;
-		int cprv;
-
-		cprv = nr_cpu_ids;
-		for (i = rcu_num_lvls - 1; i >= 0; i--) {
-			ccur = levelcnt[i];
-			levelspread[i] = (cprv + ccur - 1) / ccur;
-			cprv = ccur;
-		}
-	}
-}
-
-/*
  * Helper function for rcu_init() that initializes one rcu_state structure.
  */
 static void __init rcu_init_one(struct rcu_state *rsp)
@@ -4141,9 +4034,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 	static const char * const fqs[] = RCU_FQS_NAME_INIT;
 	static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 	static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
-	static u8 fl_mask = 0x1;
 
-	int levelcnt[RCU_NUM_LVLS];		/* # nodes in each level. */
 	int levelspread[RCU_NUM_LVLS];		/* kids/node in each level. */
 	int cpustride = 1;
 	int i;
@@ -4158,20 +4049,16 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 
 	/* Initialize the level-tracking arrays. */
 
-	for (i = 0; i < rcu_num_lvls; i++)
-		levelcnt[i] = num_rcu_lvl[i];
 	for (i = 1; i < rcu_num_lvls; i++)
-		rsp->level[i] = rsp->level[i - 1] + levelcnt[i - 1];
-	rcu_init_levelspread(levelspread, levelcnt);
-	rsp->flavor_mask = fl_mask;
-	fl_mask <<= 1;
+		rsp->level[i] = rsp->level[i - 1] + num_rcu_lvl[i - 1];
+	rcu_init_levelspread(levelspread, num_rcu_lvl);
 
 	/* Initialize the elements themselves, starting from the leaves. */
 
 	for (i = rcu_num_lvls - 1; i >= 0; i--) {
 		cpustride *= levelspread[i];
 		rnp = rsp->level[i];
-		for (j = 0; j < levelcnt[i]; j++, rnp++) {
+		for (j = 0; j < num_rcu_lvl[i]; j++, rnp++) {
 			raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock));
 			lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock),
 						   &rcu_node_class[i], buf[i]);
@@ -4344,6 +4231,8 @@ void __init rcu_init(void)
 	for_each_online_cpu(cpu) {
 		rcutree_prepare_cpu(cpu);
 		rcu_cpu_starting(cpu);
+		if (IS_ENABLED(CONFIG_TREE_SRCU))
+			srcu_online_cpu(cpu);
 	}
 }
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index ec62a05bfdb3..9af0f31d6847 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -30,80 +30,9 @@
 #include <linux/seqlock.h>
 #include <linux/swait.h>
 #include <linux/stop_machine.h>
+#include <linux/rcu_node_tree.h>
 
-/*
- * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
- * CONFIG_RCU_FANOUT_LEAF.
- * In theory, it should be possible to add more levels straightforwardly.
- * In practice, this did work well going from three levels to four.
- * Of course, your mileage may vary.
- */
-
-#ifdef CONFIG_RCU_FANOUT
-#define RCU_FANOUT CONFIG_RCU_FANOUT
-#else /* #ifdef CONFIG_RCU_FANOUT */
-# ifdef CONFIG_64BIT
-# define RCU_FANOUT 64
-# else
-# define RCU_FANOUT 32
-# endif
-#endif /* #else #ifdef CONFIG_RCU_FANOUT */
-
-#ifdef CONFIG_RCU_FANOUT_LEAF
-#define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
-#else /* #ifdef CONFIG_RCU_FANOUT_LEAF */
-# ifdef CONFIG_64BIT
-# define RCU_FANOUT_LEAF 64
-# else
-# define RCU_FANOUT_LEAF 32
-# endif
-#endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */
-
-#define RCU_FANOUT_1	      (RCU_FANOUT_LEAF)
-#define RCU_FANOUT_2	      (RCU_FANOUT_1 * RCU_FANOUT)
-#define RCU_FANOUT_3	      (RCU_FANOUT_2 * RCU_FANOUT)
-#define RCU_FANOUT_4	      (RCU_FANOUT_3 * RCU_FANOUT)
-
-#if NR_CPUS <= RCU_FANOUT_1
-#  define RCU_NUM_LVLS	      1
-#  define NUM_RCU_LVL_0	      1
-#  define NUM_RCU_NODES	      NUM_RCU_LVL_0
-#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0 }
-#  define RCU_NODE_NAME_INIT  { "rcu_node_0" }
-#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0" }
-#elif NR_CPUS <= RCU_FANOUT_2
-#  define RCU_NUM_LVLS	      2
-#  define NUM_RCU_LVL_0	      1
-#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1)
-#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
-#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1" }
-#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1" }
-#elif NR_CPUS <= RCU_FANOUT_3
-#  define RCU_NUM_LVLS	      3
-#  define NUM_RCU_LVL_0	      1
-#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
-#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2)
-#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
-#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
-#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
-#elif NR_CPUS <= RCU_FANOUT_4
-#  define RCU_NUM_LVLS	      4
-#  define NUM_RCU_LVL_0	      1
-#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
-#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
-#  define NUM_RCU_LVL_3	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
-#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
-#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
-#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
-#else
-# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
-#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
-
-extern int rcu_num_lvls;
-extern int rcu_num_nodes;
+#include "rcu_segcblist.h"
 
 /*
  * Dynticks per-CPU state.
@@ -113,14 +42,9 @@ struct rcu_dynticks {
 				    /* Process level is worth LLONG_MAX/2. */
 	int dynticks_nmi_nesting;   /* Track NMI nesting level. */
 	atomic_t dynticks;	    /* Even value for idle, else odd. */
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-	long long dynticks_idle_nesting;
-				    /* irq/process nesting level from idle. */
-	atomic_t dynticks_idle;	    /* Even value for idle, else odd. */
-				    /*  "Idle" excludes userspace execution. */
-	unsigned long dynticks_idle_jiffies;
-				    /* End of last non-NMI non-idle period. */
-#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
+	bool rcu_need_heavy_qs;     /* GP old, need heavy quiescent state. */
+	unsigned long rcu_qs_ctr;   /* Light universal quiescent state ctr. */
+	bool rcu_urgent_qs;	    /* GP old need light quiescent state. */
 #ifdef CONFIG_RCU_FAST_NO_HZ
 	bool all_lazy;		    /* Are all CPU's CBs lazy? */
 	unsigned long nonlazy_posted;
@@ -228,19 +152,6 @@ struct rcu_node {
 				/* Number of tasks boosted for expedited GP. */
 	unsigned long n_normal_boosts;
 				/* Number of tasks boosted for normal GP. */
-	unsigned long n_balk_blkd_tasks;
-				/* Refused to boost: no blocked tasks. */
-	unsigned long n_balk_exp_gp_tasks;
-				/* Refused to boost: nothing blocking GP. */
-	unsigned long n_balk_boost_tasks;
-				/* Refused to boost: already boosting. */
-	unsigned long n_balk_notblocked;
-				/* Refused to boost: RCU RS CS still running. */
-	unsigned long n_balk_notyet;
-				/* Refused to boost: not yet time. */
-	unsigned long n_balk_nos;
-				/* Refused to boost: not sure why, though. */
-				/*  This can happen due to race conditions. */
 #ifdef CONFIG_RCU_NOCB_CPU
 	struct swait_queue_head nocb_gp_wq[2];
 				/* Place for rcu_nocb_kthread() to wait GP. */
@@ -262,41 +173,6 @@ struct rcu_node {
 #define leaf_node_cpu_bit(rnp, cpu) (1UL << ((cpu) - (rnp)->grplo))
 
 /*
- * Do a full breadth-first scan of the rcu_node structures for the
- * specified rcu_state structure.
- */
-#define rcu_for_each_node_breadth_first(rsp, rnp) \
-	for ((rnp) = &(rsp)->node[0]; \
-	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
-
-/*
- * Do a breadth-first scan of the non-leaf rcu_node structures for the
- * specified rcu_state structure.  Note that if there is a singleton
- * rcu_node tree with but one rcu_node structure, this loop is a no-op.
- */
-#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
-	for ((rnp) = &(rsp)->node[0]; \
-	     (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
-
-/*
- * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
- * structure.  Note that if there is a singleton rcu_node tree with but
- * one rcu_node structure, this loop -will- visit the rcu_node structure.
- * It is still a leaf node, even if it is also the root node.
- */
-#define rcu_for_each_leaf_node(rsp, rnp) \
-	for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
-	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
-
-/*
- * Iterate over all possible CPUs in a leaf RCU node.
- */
-#define for_each_leaf_node_possible_cpu(rnp, cpu) \
-	for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \
-	     cpu <= rnp->grphi; \
-	     cpu = cpumask_next((cpu), cpu_possible_mask))
-
-/*
  * Union to allow "aggregate OR" operation on the need for a quiescent
  * state by the normal and expedited grace periods.
  */
@@ -336,34 +212,9 @@ struct rcu_data {
 					/* period it is aware of. */
 
 	/* 2) batch handling */
-	/*
-	 * If nxtlist is not NULL, it is partitioned as follows.
-	 * Any of the partitions might be empty, in which case the
-	 * pointer to that partition will be equal to the pointer for
-	 * the following partition.  When the list is empty, all of
-	 * the nxttail elements point to the ->nxtlist pointer itself,
-	 * which in that case is NULL.
-	 *
-	 * [nxtlist, *nxttail[RCU_DONE_TAIL]):
-	 *	Entries that batch # <= ->completed
-	 *	The grace period for these entries has completed, and
-	 *	the other grace-period-completed entries may be moved
-	 *	here temporarily in rcu_process_callbacks().
-	 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
-	 *	Entries that batch # <= ->completed - 1: waiting for current GP
-	 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
-	 *	Entries known to have arrived before current GP ended
-	 * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]):
-	 *	Entries that might have arrived after current GP ended
-	 *	Note that the value of *nxttail[RCU_NEXT_TAIL] will
-	 *	always be NULL, as this is the end of the list.
-	 */
-	struct rcu_head *nxtlist;
-	struct rcu_head **nxttail[RCU_NEXT_SIZE];
-	unsigned long	nxtcompleted[RCU_NEXT_SIZE];
-					/* grace periods for sublists. */
-	long		qlen_lazy;	/* # of lazy queued callbacks */
-	long		qlen;		/* # of queued callbacks, incl lazy */
+	struct rcu_segcblist cblist;	/* Segmented callback list, with */
+					/* different callbacks waiting for */
+					/* different grace periods. */
 	long		qlen_last_fqs_check;
 					/* qlen at last check for QS forcing */
 	unsigned long	n_cbs_invoked;	/* count of RCU cbs invoked. */
@@ -440,9 +291,9 @@ struct rcu_data {
 };
 
 /* Values for nocb_defer_wakeup field in struct rcu_data. */
-#define RCU_NOGP_WAKE_NOT	0
-#define RCU_NOGP_WAKE		1
-#define RCU_NOGP_WAKE_FORCE	2
+#define RCU_NOCB_WAKE_NOT	0
+#define RCU_NOCB_WAKE		1
+#define RCU_NOCB_WAKE_FORCE	2
 
 #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
 					/* For jiffies_till_first_fqs and */
@@ -482,7 +333,6 @@ struct rcu_state {
 	struct rcu_node *level[RCU_NUM_LVLS + 1];
 						/* Hierarchy levels (+1 to */
 						/*  shut bogus gcc warning) */
-	u8 flavor_mask;				/* bit in flavor mask. */
 	struct rcu_data __percpu *rda;		/* pointer of percu rcu_data. */
 	call_rcu_func_t call;			/* call_rcu() flavor. */
 	int ncpus;				/* # CPUs seen so far. */
@@ -502,14 +352,11 @@ struct rcu_state {
 
 	raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp;
 						/* Protect following fields. */
-	struct rcu_head *orphan_nxtlist;	/* Orphaned callbacks that */
+	struct rcu_cblist orphan_pend;		/* Orphaned callbacks that */
 						/*  need a grace period. */
-	struct rcu_head **orphan_nxttail;	/* Tail of above. */
-	struct rcu_head *orphan_donelist;	/* Orphaned callbacks that */
+	struct rcu_cblist orphan_done;		/* Orphaned callbacks that */
 						/*  are ready to invoke. */
-	struct rcu_head **orphan_donetail;	/* Tail of above. */
-	long qlen_lazy;				/* Number of lazy callbacks. */
-	long qlen;				/* Total number of callbacks. */
+						/* (Contains counts.) */
 	/* End of fields guarded by orphan_lock. */
 
 	struct mutex barrier_mutex;		/* Guards barrier fields. */
@@ -596,6 +443,7 @@ extern struct rcu_state rcu_preempt_state;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 
 int rcu_dynticks_snap(struct rcu_dynticks *rdtp);
+bool rcu_eqs_special_set(int cpu);
 
 #ifdef CONFIG_RCU_BOOST
 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
@@ -608,7 +456,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
 
 /* Forward declarations for rcutree_plugin.h */
 static void rcu_bootup_announce(void);
-static void rcu_preempt_note_context_switch(void);
+static void rcu_preempt_note_context_switch(bool preempt);
 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
 static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
@@ -660,89 +508,17 @@ static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp);
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 static void __maybe_unused rcu_kick_nohz_cpu(int cpu);
 static bool init_nocb_callback_list(struct rcu_data *rdp);
-static void rcu_sysidle_enter(int irq);
-static void rcu_sysidle_exit(int irq);
-static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
-				  unsigned long *maxj);
-static bool is_sysidle_rcu_state(struct rcu_state *rsp);
-static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
-				  unsigned long maxj);
 static void rcu_bind_gp_kthread(void);
-static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
 static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
 static void rcu_dynticks_task_enter(void);
 static void rcu_dynticks_task_exit(void);
 
-#endif /* #ifndef RCU_TREE_NONCORE */
+#ifdef CONFIG_SRCU
+void srcu_online_cpu(unsigned int cpu);
+void srcu_offline_cpu(unsigned int cpu);
+#else /* #ifdef CONFIG_SRCU */
+void srcu_online_cpu(unsigned int cpu) { }
+void srcu_offline_cpu(unsigned int cpu) { }
+#endif /* #else #ifdef CONFIG_SRCU */
 
-#ifdef CONFIG_RCU_TRACE
-/* Read out queue lengths for tracing. */
-static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
-{
-#ifdef CONFIG_RCU_NOCB_CPU
-	*ql = atomic_long_read(&rdp->nocb_q_count);
-	*qll = atomic_long_read(&rdp->nocb_q_count_lazy);
-#else /* #ifdef CONFIG_RCU_NOCB_CPU */
-	*ql = 0;
-	*qll = 0;
-#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
-}
-#endif /* #ifdef CONFIG_RCU_TRACE */
-
-/*
- * Wrappers for the rcu_node::lock acquire and release.
- *
- * Because the rcu_nodes form a tree, the tree traversal locking will observe
- * different lock values, this in turn means that an UNLOCK of one level
- * followed by a LOCK of another level does not imply a full memory barrier;
- * and most importantly transitivity is lost.
- *
- * In order to restore full ordering between tree levels, augment the regular
- * lock acquire functions with smp_mb__after_unlock_lock().
- *
- * As ->lock of struct rcu_node is a __private field, therefore one should use
- * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock.
- */
-static inline void raw_spin_lock_rcu_node(struct rcu_node *rnp)
-{
-	raw_spin_lock(&ACCESS_PRIVATE(rnp, lock));
-	smp_mb__after_unlock_lock();
-}
-
-static inline void raw_spin_unlock_rcu_node(struct rcu_node *rnp)
-{
-	raw_spin_unlock(&ACCESS_PRIVATE(rnp, lock));
-}
-
-static inline void raw_spin_lock_irq_rcu_node(struct rcu_node *rnp)
-{
-	raw_spin_lock_irq(&ACCESS_PRIVATE(rnp, lock));
-	smp_mb__after_unlock_lock();
-}
-
-static inline void raw_spin_unlock_irq_rcu_node(struct rcu_node *rnp)
-{
-	raw_spin_unlock_irq(&ACCESS_PRIVATE(rnp, lock));
-}
-
-#define raw_spin_lock_irqsave_rcu_node(rnp, flags)			\
-do {									\
-	typecheck(unsigned long, flags);				\
-	raw_spin_lock_irqsave(&ACCESS_PRIVATE(rnp, lock), flags);	\
-	smp_mb__after_unlock_lock();					\
-} while (0)
-
-#define raw_spin_unlock_irqrestore_rcu_node(rnp, flags)			\
-do {									\
-	typecheck(unsigned long, flags);				\
-	raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(rnp, lock), flags);	\
-} while (0)
-
-static inline bool raw_spin_trylock_rcu_node(struct rcu_node *rnp)
-{
-	bool locked = raw_spin_trylock(&ACCESS_PRIVATE(rnp, lock));
-
-	if (locked)
-		smp_mb__after_unlock_lock();
-	return locked;
-}
+#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index a7b639ccd46e..dd21ca47e4b4 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -147,7 +147,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
  *
  * Caller must hold the rcu_state's exp_mutex.
  */
-static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
+static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 {
 	return rnp->exp_tasks == NULL &&
 	       READ_ONCE(rnp->expmask) == 0;
@@ -292,7 +292,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
 			trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
 						  rnp->grplo, rnp->grphi,
 						  TPS("wait"));
-			wait_event(rnp->exp_wq[(s >> 1) & 0x3],
+			wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
 				   sync_exp_work_done(rsp,
 						      &rdp->exp_workdone2, s));
 			return true;
@@ -331,6 +331,8 @@ static void sync_sched_exp_handler(void *data)
 		return;
 	}
 	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
+	/* Store .exp before .rcu_urgent_qs. */
+	smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
 	resched_cpu(smp_processor_id());
 }
 
@@ -531,7 +533,8 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
 				rnp->exp_seq_rq = s;
 			spin_unlock(&rnp->exp_lock);
 		}
-		wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]);
+		smp_mb(); /* All above changes before wakeup. */
+		wake_up_all(&rnp->exp_wq[rcu_seq_ctr(rsp->expedited_sequence) & 0x3]);
 	}
 	trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
 	mutex_unlock(&rsp->exp_wake_mutex);
@@ -609,9 +612,9 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
 	/* Wait for expedited grace period to complete. */
 	rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
 	rnp = rcu_get_root(rsp);
-	wait_event(rnp->exp_wq[(s >> 1) & 0x3],
-		   sync_exp_work_done(rsp,
-				      &rdp->exp_workdone0, s));
+	wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
+		   sync_exp_work_done(rsp, &rdp->exp_workdone0, s));
+	smp_mb(); /* Workqueue actions happen before return. */
 
 	/* Let the next expedited grace period start. */
 	mutex_unlock(&rsp->exp_mutex);
@@ -735,15 +738,3 @@ void synchronize_rcu_expedited(void)
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
-
-/*
- * Switch to run-time mode once Tree RCU has fully initialized.
- */
-static int __init rcu_exp_runtime_mode(void)
-{
-	rcu_test_sync_prims();
-	rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
-	rcu_test_sync_prims();
-	return 0;
-}
-core_initcall(rcu_exp_runtime_mode);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0a62a8f1caac..908b309d60d7 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -70,7 +70,7 @@ static bool __read_mostly rcu_nocb_poll;    /* Offload kthread are to poll. */
 static void __init rcu_bootup_announce_oddness(void)
 {
 	if (IS_ENABLED(CONFIG_RCU_TRACE))
-		pr_info("\tRCU debugfs-based tracing is enabled.\n");
+		pr_info("\tRCU event tracing is enabled.\n");
 	if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) ||
 	    (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32))
 		pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
@@ -90,8 +90,32 @@ static void __init rcu_bootup_announce_oddness(void)
 		pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
 	if (nr_cpu_ids != NR_CPUS)
 		pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
-	if (IS_ENABLED(CONFIG_RCU_BOOST))
-		pr_info("\tRCU kthread priority: %d.\n", kthread_prio);
+#ifdef CONFIG_RCU_BOOST
+	pr_info("\tRCU priority boosting: priority %d delay %d ms.\n", kthread_prio, CONFIG_RCU_BOOST_DELAY);
+#endif
+	if (blimit != DEFAULT_RCU_BLIMIT)
+		pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit);
+	if (qhimark != DEFAULT_RCU_QHIMARK)
+		pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark);
+	if (qlowmark != DEFAULT_RCU_QLOMARK)
+		pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark);
+	if (jiffies_till_first_fqs != ULONG_MAX)
+		pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
+	if (jiffies_till_next_fqs != ULONG_MAX)
+		pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs);
+	if (rcu_kick_kthreads)
+		pr_info("\tKick kthreads if too-long grace period.\n");
+	if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
+		pr_info("\tRCU callback double-/use-after-free debug enabled.\n");
+	if (gp_preinit_delay)
+		pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay);
+	if (gp_init_delay)
+		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
+	if (gp_cleanup_delay)
+		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
+	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
+		pr_info("\tRCU debug extended QS entry/exit.\n");
+	rcupdate_announce_bootup_oddness();
 }
 
 #ifdef CONFIG_PREEMPT_RCU
@@ -155,6 +179,8 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
 			 (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
 	struct task_struct *t = current;
 
+	lockdep_assert_held(&rnp->lock);
+
 	/*
 	 * Decide where to queue the newly blocked task.  In theory,
 	 * this could be an if-statement.  In practice, when I tried
@@ -263,6 +289,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
  */
 static void rcu_preempt_qs(void)
 {
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n");
 	if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) {
 		trace_rcu_grace_period(TPS("rcu_preempt"),
 				       __this_cpu_read(rcu_data_p->gpnum),
@@ -286,12 +313,14 @@ static void rcu_preempt_qs(void)
  *
  * Caller must disable interrupts.
  */
-static void rcu_preempt_note_context_switch(void)
+static void rcu_preempt_note_context_switch(bool preempt)
 {
 	struct task_struct *t = current;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_preempt_note_context_switch() invoked with interrupts enabled!!!\n");
+	WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
 	if (t->rcu_read_lock_nesting > 0 &&
 	    !t->rcu_read_unlock_special.b.blocked) {
 
@@ -607,6 +636,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
  */
 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 {
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
 	WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
 	if (rcu_preempt_has_tasks(rnp))
 		rnp->gp_tasks = rnp->blkd_tasks.next;
@@ -643,8 +673,37 @@ static void rcu_preempt_do_callbacks(void)
 
 #endif /* #ifdef CONFIG_RCU_BOOST */
 
-/*
- * Queue a preemptible-RCU callback for invocation after a grace period.
+/**
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed.  However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked.  RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+ * and may be nested.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing RCU read-side critical section.  On systems with more
+ * than one CPU, this means that when "func()" is invoked, each CPU is
+ * guaranteed to have executed a full memory barrier since the end of its
+ * last RCU read-side critical section whose beginning preceded the call
+ * to call_rcu().  It also means that each CPU executing an RCU read-side
+ * critical section that continues beyond the start of "func()" must have
+ * executed a memory barrier after the call_rcu() but before the beginning
+ * of that RCU read-side critical section.  Note that these guarantees
+ * include CPUs that are offline, idle, or executing in user mode, as
+ * well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting RCU callback function "func()", then both CPU A and CPU B are
+ * guaranteed to execute a full memory barrier during the time interval
+ * between the call to call_rcu() and the invocation of "func()" -- even
+ * if CPU A and CPU B are the same CPU (but again only if the system has
+ * more than one CPU).
  */
 void call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
@@ -663,8 +722,13 @@ EXPORT_SYMBOL_GPL(call_rcu);
  * synchronize_rcu() was waiting.  RCU read-side critical sections are
  * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
  *
- * See the description of synchronize_sched() for more detailed information
- * on memory ordering guarantees.
+ * See the description of synchronize_sched() for more detailed
+ * information on memory-ordering guarantees.  However, please note
+ * that -only- the memory-ordering guarantees apply.  For example,
+ * synchronize_rcu() is -not- guaranteed to wait on things like code
+ * protected by preempt_disable(), instead, synchronize_rcu() is -only-
+ * guaranteed to wait on RCU read-side critical sections, that is, sections
+ * of code protected by rcu_read_lock().
  */
 void synchronize_rcu(void)
 {
@@ -738,7 +802,7 @@ static void __init rcu_bootup_announce(void)
  * Because preemptible RCU does not exist, we never have to check for
  * CPUs being in quiescent states.
  */
-static void rcu_preempt_note_context_switch(void)
+static void rcu_preempt_note_context_switch(bool preempt)
 {
 }
 
@@ -835,33 +899,6 @@ void exit_rcu(void)
 
 #include "../locking/rtmutex_common.h"
 
-#ifdef CONFIG_RCU_TRACE
-
-static void rcu_initiate_boost_trace(struct rcu_node *rnp)
-{
-	if (!rcu_preempt_has_tasks(rnp))
-		rnp->n_balk_blkd_tasks++;
-	else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
-		rnp->n_balk_exp_gp_tasks++;
-	else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
-		rnp->n_balk_boost_tasks++;
-	else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
-		rnp->n_balk_notblocked++;
-	else if (rnp->gp_tasks != NULL &&
-		 ULONG_CMP_LT(jiffies, rnp->boost_time))
-		rnp->n_balk_notyet++;
-	else
-		rnp->n_balk_nos++;
-}
-
-#else /* #ifdef CONFIG_RCU_TRACE */
-
-static void rcu_initiate_boost_trace(struct rcu_node *rnp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_RCU_TRACE */
-
 static void rcu_wake_cond(struct task_struct *t, int status)
 {
 	/*
@@ -992,8 +1029,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 {
 	struct task_struct *t;
 
+	lockdep_assert_held(&rnp->lock);
 	if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
-		rnp->n_balk_exp_gp_tasks++;
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
@@ -1009,7 +1046,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 		if (t)
 			rcu_wake_cond(t, rnp->boost_kthread_status);
 	} else {
-		rcu_initiate_boost_trace(rnp);
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
 }
@@ -1260,8 +1296,7 @@ static void rcu_prepare_kthreads(int cpu)
 int rcu_needs_cpu(u64 basemono, u64 *nextevt)
 {
 	*nextevt = KTIME_MAX;
-	return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
-	       ? 0 : rcu_cpu_has_callbacks(NULL);
+	return rcu_cpu_has_callbacks(NULL);
 }
 
 /*
@@ -1350,10 +1385,10 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
 		 */
 		if ((rdp->completed != rnp->completed ||
 		     unlikely(READ_ONCE(rdp->gpwrap))) &&
-		    rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
+		    rcu_segcblist_pend_cbs(&rdp->cblist))
 			note_gp_changes(rsp, rdp);
 
-		if (cpu_has_callbacks_ready_to_invoke(rdp))
+		if (rcu_segcblist_ready_cbs(&rdp->cblist))
 			cbs_ready = true;
 	}
 	return cbs_ready;
@@ -1372,10 +1407,7 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt)
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 	unsigned long dj;
 
-	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) {
-		*nextevt = KTIME_MAX;
-		return 0;
-	}
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_needs_cpu() invoked with irqs enabled!!!");
 
 	/* Snapshot to detect later posting of non-lazy callback. */
 	rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
@@ -1424,8 +1456,8 @@ static void rcu_prepare_for_idle(void)
 	struct rcu_state *rsp;
 	int tne;
 
-	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
-	    rcu_is_nocb_cpu(smp_processor_id()))
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_prepare_for_idle() invoked with irqs enabled!!!");
+	if (rcu_is_nocb_cpu(smp_processor_id()))
 		return;
 
 	/* Handle nohz enablement switches conservatively. */
@@ -1461,7 +1493,7 @@ static void rcu_prepare_for_idle(void)
 	rdtp->last_accelerate = jiffies;
 	for_each_rcu_flavor(rsp) {
 		rdp = this_cpu_ptr(rsp->rda);
-		if (!*rdp->nxttail[RCU_DONE_TAIL])
+		if (rcu_segcblist_pend_cbs(&rdp->cblist))
 			continue;
 		rnp = rdp->mynode;
 		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
@@ -1479,8 +1511,8 @@ static void rcu_prepare_for_idle(void)
  */
 static void rcu_cleanup_after_idle(void)
 {
-	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
-	    rcu_is_nocb_cpu(smp_processor_id()))
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_cleanup_after_idle() invoked with irqs enabled!!!");
+	if (rcu_is_nocb_cpu(smp_processor_id()))
 		return;
 	if (rcu_try_advance_all_cbs())
 		invoke_rcu_core();
@@ -1529,7 +1561,7 @@ static void rcu_oom_notify_cpu(void *unused)
 
 	for_each_rcu_flavor(rsp) {
 		rdp = raw_cpu_ptr(rsp->rda);
-		if (rdp->qlen_lazy != 0) {
+		if (rcu_segcblist_n_lazy_cbs(&rdp->cblist)) {
 			atomic_inc(&oom_callback_count);
 			rsp->call(&rdp->oom_head, rcu_oom_callback);
 		}
@@ -1709,7 +1741,7 @@ __setup("rcu_nocbs=", rcu_nocb_setup);
 
 static int __init parse_rcu_nocb_poll(char *arg)
 {
-	rcu_nocb_poll = 1;
+	rcu_nocb_poll = true;
 	return 0;
 }
 early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
@@ -1747,7 +1779,6 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
 	init_swait_queue_head(&rnp->nocb_gp_wq[1]);
 }
 
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
 /* Is the specified CPU a no-CBs CPU? */
 bool rcu_is_nocb_cpu(int cpu)
 {
@@ -1755,7 +1786,6 @@ bool rcu_is_nocb_cpu(int cpu)
 		return cpumask_test_cpu(cpu, rcu_nocb_mask);
 	return false;
 }
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 
 /*
  * Kick the leader kthread for this NOCB group.
@@ -1769,6 +1799,7 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
 	if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
 		/* Prior smp_mb__after_atomic() orders against prior enqueue. */
 		WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
+		smp_mb(); /* ->nocb_leader_sleep before swake_up(). */
 		swake_up(&rdp_leader->nocb_wq);
 	}
 }
@@ -1860,7 +1891,9 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeEmpty"));
 		} else {
-			rdp->nocb_defer_wakeup = RCU_NOGP_WAKE;
+			WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE);
+			/* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */
+			smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeEmptyIsDeferred"));
 		}
@@ -1872,7 +1905,9 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeOvf"));
 		} else {
-			rdp->nocb_defer_wakeup = RCU_NOGP_WAKE_FORCE;
+			WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_FORCE);
+			/* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */
+			smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeOvfIsDeferred"));
 		}
@@ -1930,30 +1965,26 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
 						     struct rcu_data *rdp,
 						     unsigned long flags)
 {
-	long ql = rsp->qlen;
-	long qll = rsp->qlen_lazy;
+	long ql = rsp->orphan_done.len;
+	long qll = rsp->orphan_done.len_lazy;
 
 	/* If this is not a no-CBs CPU, tell the caller to do it the old way. */
 	if (!rcu_is_nocb_cpu(smp_processor_id()))
 		return false;
-	rsp->qlen = 0;
-	rsp->qlen_lazy = 0;
 
 	/* First, enqueue the donelist, if any.  This preserves CB ordering. */
-	if (rsp->orphan_donelist != NULL) {
-		__call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist,
-					rsp->orphan_donetail, ql, qll, flags);
-		ql = qll = 0;
-		rsp->orphan_donelist = NULL;
-		rsp->orphan_donetail = &rsp->orphan_donelist;
+	if (rsp->orphan_done.head) {
+		__call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_done),
+					rcu_cblist_tail(&rsp->orphan_done),
+					ql, qll, flags);
 	}
-	if (rsp->orphan_nxtlist != NULL) {
-		__call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist,
-					rsp->orphan_nxttail, ql, qll, flags);
-		ql = qll = 0;
-		rsp->orphan_nxtlist = NULL;
-		rsp->orphan_nxttail = &rsp->orphan_nxtlist;
+	if (rsp->orphan_pend.head) {
+		__call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_pend),
+					rcu_cblist_tail(&rsp->orphan_pend),
+					ql, qll, flags);
 	}
+	rcu_cblist_init(&rsp->orphan_done);
+	rcu_cblist_init(&rsp->orphan_pend);
 	return true;
 }
 
@@ -2023,6 +2054,7 @@ wait_again:
 	 * nocb_gp_head, where they await a grace period.
 	 */
 	gotcbs = false;
+	smp_mb(); /* wakeup before ->nocb_head reads. */
 	for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
 		rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head);
 		if (!rdp->nocb_gp_head)
@@ -2201,8 +2233,8 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
 	if (!rcu_nocb_need_deferred_wakeup(rdp))
 		return;
 	ndw = READ_ONCE(rdp->nocb_defer_wakeup);
-	WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_NOT);
-	wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE);
+	WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
+	wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE);
 	trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
 }
 
@@ -2212,10 +2244,6 @@ void __init rcu_init_nohz(void)
 	bool need_rcu_nocb_mask = true;
 	struct rcu_state *rsp;
 
-#ifdef CONFIG_RCU_NOCB_CPU_NONE
-	need_rcu_nocb_mask = false;
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
-
 #if defined(CONFIG_NO_HZ_FULL)
 	if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
 		need_rcu_nocb_mask = true;
@@ -2231,14 +2259,6 @@ void __init rcu_init_nohz(void)
 	if (!have_rcu_nocb_mask)
 		return;
 
-#ifdef CONFIG_RCU_NOCB_CPU_ZERO
-	pr_info("\tOffload RCU callbacks from CPU 0\n");
-	cpumask_set_cpu(0, rcu_nocb_mask);
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
-#ifdef CONFIG_RCU_NOCB_CPU_ALL
-	pr_info("\tOffload RCU callbacks from all CPUs\n");
-	cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
 #if defined(CONFIG_NO_HZ_FULL)
 	if (tick_nohz_full_running)
 		cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
@@ -2395,16 +2415,16 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
 		return false;
 
 	/* If there are early-boot callbacks, move them to nocb lists. */
-	if (rdp->nxtlist) {
-		rdp->nocb_head = rdp->nxtlist;
-		rdp->nocb_tail = rdp->nxttail[RCU_NEXT_TAIL];
-		atomic_long_set(&rdp->nocb_q_count, rdp->qlen);
-		atomic_long_set(&rdp->nocb_q_count_lazy, rdp->qlen_lazy);
-		rdp->nxtlist = NULL;
-		rdp->qlen = 0;
-		rdp->qlen_lazy = 0;
+	if (!rcu_segcblist_empty(&rdp->cblist)) {
+		rdp->nocb_head = rcu_segcblist_head(&rdp->cblist);
+		rdp->nocb_tail = rcu_segcblist_tail(&rdp->cblist);
+		atomic_long_set(&rdp->nocb_q_count,
+				rcu_segcblist_n_cbs(&rdp->cblist));
+		atomic_long_set(&rdp->nocb_q_count_lazy,
+				rcu_segcblist_n_lazy_cbs(&rdp->cblist));
+		rcu_segcblist_init(&rdp->cblist);
 	}
-	rdp->nxttail[RCU_NEXT_TAIL] = NULL;
+	rcu_segcblist_disable(&rdp->cblist);
 	return true;
 }
 
@@ -2491,421 +2511,6 @@ static void __maybe_unused rcu_kick_nohz_cpu(int cpu)
 #endif /* #ifdef CONFIG_NO_HZ_FULL */
 }
 
-
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-
-static int full_sysidle_state;		/* Current system-idle state. */
-#define RCU_SYSIDLE_NOT		0	/* Some CPU is not idle. */
-#define RCU_SYSIDLE_SHORT	1	/* All CPUs idle for brief period. */
-#define RCU_SYSIDLE_LONG	2	/* All CPUs idle for long enough. */
-#define RCU_SYSIDLE_FULL	3	/* All CPUs idle, ready for sysidle. */
-#define RCU_SYSIDLE_FULL_NOTED	4	/* Actually entered sysidle state. */
-
-/*
- * Invoked to note exit from irq or task transition to idle.  Note that
- * usermode execution does -not- count as idle here!  After all, we want
- * to detect full-system idle states, not RCU quiescent states and grace
- * periods.  The caller must have disabled interrupts.
- */
-static void rcu_sysidle_enter(int irq)
-{
-	unsigned long j;
-	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-
-	/* If there are no nohz_full= CPUs, no need to track this. */
-	if (!tick_nohz_full_enabled())
-		return;
-
-	/* Adjust nesting, check for fully idle. */
-	if (irq) {
-		rdtp->dynticks_idle_nesting--;
-		WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
-		if (rdtp->dynticks_idle_nesting != 0)
-			return;  /* Still not fully idle. */
-	} else {
-		if ((rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) ==
-		    DYNTICK_TASK_NEST_VALUE) {
-			rdtp->dynticks_idle_nesting = 0;
-		} else {
-			rdtp->dynticks_idle_nesting -= DYNTICK_TASK_NEST_VALUE;
-			WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
-			return;  /* Still not fully idle. */
-		}
-	}
-
-	/* Record start of fully idle period. */
-	j = jiffies;
-	WRITE_ONCE(rdtp->dynticks_idle_jiffies, j);
-	smp_mb__before_atomic();
-	atomic_inc(&rdtp->dynticks_idle);
-	smp_mb__after_atomic();
-	WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1);
-}
-
-/*
- * Unconditionally force exit from full system-idle state.  This is
- * invoked when a normal CPU exits idle, but must be called separately
- * for the timekeeping CPU (tick_do_timer_cpu).  The reason for this
- * is that the timekeeping CPU is permitted to take scheduling-clock
- * interrupts while the system is in system-idle state, and of course
- * rcu_sysidle_exit() has no way of distinguishing a scheduling-clock
- * interrupt from any other type of interrupt.
- */
-void rcu_sysidle_force_exit(void)
-{
-	int oldstate = READ_ONCE(full_sysidle_state);
-	int newoldstate;
-
-	/*
-	 * Each pass through the following loop attempts to exit full
-	 * system-idle state.  If contention proves to be a problem,
-	 * a trylock-based contention tree could be used here.
-	 */
-	while (oldstate > RCU_SYSIDLE_SHORT) {
-		newoldstate = cmpxchg(&full_sysidle_state,
-				      oldstate, RCU_SYSIDLE_NOT);
-		if (oldstate == newoldstate &&
-		    oldstate == RCU_SYSIDLE_FULL_NOTED) {
-			rcu_kick_nohz_cpu(tick_do_timer_cpu);
-			return; /* We cleared it, done! */
-		}
-		oldstate = newoldstate;
-	}
-	smp_mb(); /* Order initial oldstate fetch vs. later non-idle work. */
-}
-
-/*
- * Invoked to note entry to irq or task transition from idle.  Note that
- * usermode execution does -not- count as idle here!  The caller must
- * have disabled interrupts.
- */
-static void rcu_sysidle_exit(int irq)
-{
-	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-
-	/* If there are no nohz_full= CPUs, no need to track this. */
-	if (!tick_nohz_full_enabled())
-		return;
-
-	/* Adjust nesting, check for already non-idle. */
-	if (irq) {
-		rdtp->dynticks_idle_nesting++;
-		WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
-		if (rdtp->dynticks_idle_nesting != 1)
-			return; /* Already non-idle. */
-	} else {
-		/*
-		 * Allow for irq misnesting.  Yes, it really is possible
-		 * to enter an irq handler then never leave it, and maybe
-		 * also vice versa.  Handle both possibilities.
-		 */
-		if (rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) {
-			rdtp->dynticks_idle_nesting += DYNTICK_TASK_NEST_VALUE;
-			WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
-			return; /* Already non-idle. */
-		} else {
-			rdtp->dynticks_idle_nesting = DYNTICK_TASK_EXIT_IDLE;
-		}
-	}
-
-	/* Record end of idle period. */
-	smp_mb__before_atomic();
-	atomic_inc(&rdtp->dynticks_idle);
-	smp_mb__after_atomic();
-	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
-
-	/*
-	 * If we are the timekeeping CPU, we are permitted to be non-idle
-	 * during a system-idle state.  This must be the case, because
-	 * the timekeeping CPU has to take scheduling-clock interrupts
-	 * during the time that the system is transitioning to full
-	 * system-idle state.  This means that the timekeeping CPU must
-	 * invoke rcu_sysidle_force_exit() directly if it does anything
-	 * more than take a scheduling-clock interrupt.
-	 */
-	if (smp_processor_id() == tick_do_timer_cpu)
-		return;
-
-	/* Update system-idle state: We are clearly no longer fully idle! */
-	rcu_sysidle_force_exit();
-}
-
-/*
- * Check to see if the current CPU is idle.  Note that usermode execution
- * does not count as idle.  The caller must have disabled interrupts,
- * and must be running on tick_do_timer_cpu.
- */
-static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
-				  unsigned long *maxj)
-{
-	int cur;
-	unsigned long j;
-	struct rcu_dynticks *rdtp = rdp->dynticks;
-
-	/* If there are no nohz_full= CPUs, don't check system-wide idleness. */
-	if (!tick_nohz_full_enabled())
-		return;
-
-	/*
-	 * If some other CPU has already reported non-idle, if this is
-	 * not the flavor of RCU that tracks sysidle state, or if this
-	 * is an offline or the timekeeping CPU, nothing to do.
-	 */
-	if (!*isidle || rdp->rsp != rcu_state_p ||
-	    cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
-		return;
-	/* Verify affinity of current kthread. */
-	WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
-
-	/* Pick up current idle and NMI-nesting counter and check. */
-	cur = atomic_read(&rdtp->dynticks_idle);
-	if (cur & 0x1) {
-		*isidle = false; /* We are not idle! */
-		return;
-	}
-	smp_mb(); /* Read counters before timestamps. */
-
-	/* Pick up timestamps. */
-	j = READ_ONCE(rdtp->dynticks_idle_jiffies);
-	/* If this CPU entered idle more recently, update maxj timestamp. */
-	if (ULONG_CMP_LT(*maxj, j))
-		*maxj = j;
-}
-
-/*
- * Is this the flavor of RCU that is handling full-system idle?
- */
-static bool is_sysidle_rcu_state(struct rcu_state *rsp)
-{
-	return rsp == rcu_state_p;
-}
-
-/*
- * Return a delay in jiffies based on the number of CPUs, rcu_node
- * leaf fanout, and jiffies tick rate.  The idea is to allow larger
- * systems more time to transition to full-idle state in order to
- * avoid the cache thrashing that otherwise occur on the state variable.
- * Really small systems (less than a couple of tens of CPUs) should
- * instead use a single global atomically incremented counter, and later
- * versions of this will automatically reconfigure themselves accordingly.
- */
-static unsigned long rcu_sysidle_delay(void)
-{
-	if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
-		return 0;
-	return DIV_ROUND_UP(nr_cpu_ids * HZ, rcu_fanout_leaf * 1000);
-}
-
-/*
- * Advance the full-system-idle state.  This is invoked when all of
- * the non-timekeeping CPUs are idle.
- */
-static void rcu_sysidle(unsigned long j)
-{
-	/* Check the current state. */
-	switch (READ_ONCE(full_sysidle_state)) {
-	case RCU_SYSIDLE_NOT:
-
-		/* First time all are idle, so note a short idle period. */
-		WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_SHORT);
-		break;
-
-	case RCU_SYSIDLE_SHORT:
-
-		/*
-		 * Idle for a bit, time to advance to next state?
-		 * cmpxchg failure means race with non-idle, let them win.
-		 */
-		if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
-			(void)cmpxchg(&full_sysidle_state,
-				      RCU_SYSIDLE_SHORT, RCU_SYSIDLE_LONG);
-		break;
-
-	case RCU_SYSIDLE_LONG:
-
-		/*
-		 * Do an additional check pass before advancing to full.
-		 * cmpxchg failure means race with non-idle, let them win.
-		 */
-		if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
-			(void)cmpxchg(&full_sysidle_state,
-				      RCU_SYSIDLE_LONG, RCU_SYSIDLE_FULL);
-		break;
-
-	default:
-		break;
-	}
-}
-
-/*
- * Found a non-idle non-timekeeping CPU, so kick the system-idle state
- * back to the beginning.
- */
-static void rcu_sysidle_cancel(void)
-{
-	smp_mb();
-	if (full_sysidle_state > RCU_SYSIDLE_SHORT)
-		WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_NOT);
-}
-
-/*
- * Update the sysidle state based on the results of a force-quiescent-state
- * scan of the CPUs' dyntick-idle state.
- */
-static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
-			       unsigned long maxj, bool gpkt)
-{
-	if (rsp != rcu_state_p)
-		return;  /* Wrong flavor, ignore. */
-	if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
-		return;  /* Running state machine from timekeeping CPU. */
-	if (isidle)
-		rcu_sysidle(maxj);    /* More idle! */
-	else
-		rcu_sysidle_cancel(); /* Idle is over. */
-}
-
-/*
- * Wrapper for rcu_sysidle_report() when called from the grace-period
- * kthread's context.
- */
-static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
-				  unsigned long maxj)
-{
-	/* If there are no nohz_full= CPUs, no need to track this. */
-	if (!tick_nohz_full_enabled())
-		return;
-
-	rcu_sysidle_report(rsp, isidle, maxj, true);
-}
-
-/* Callback and function for forcing an RCU grace period. */
-struct rcu_sysidle_head {
-	struct rcu_head rh;
-	int inuse;
-};
-
-static void rcu_sysidle_cb(struct rcu_head *rhp)
-{
-	struct rcu_sysidle_head *rshp;
-
-	/*
-	 * The following memory barrier is needed to replace the
-	 * memory barriers that would normally be in the memory
-	 * allocator.
-	 */
-	smp_mb();  /* grace period precedes setting inuse. */
-
-	rshp = container_of(rhp, struct rcu_sysidle_head, rh);
-	WRITE_ONCE(rshp->inuse, 0);
-}
-
-/*
- * Check to see if the system is fully idle, other than the timekeeping CPU.
- * The caller must have disabled interrupts.  This is not intended to be
- * called unless tick_nohz_full_enabled().
- */
-bool rcu_sys_is_idle(void)
-{
-	static struct rcu_sysidle_head rsh;
-	int rss = READ_ONCE(full_sysidle_state);
-
-	if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu))
-		return false;
-
-	/* Handle small-system case by doing a full scan of CPUs. */
-	if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) {
-		int oldrss = rss - 1;
-
-		/*
-		 * One pass to advance to each state up to _FULL.
-		 * Give up if any pass fails to advance the state.
-		 */
-		while (rss < RCU_SYSIDLE_FULL && oldrss < rss) {
-			int cpu;
-			bool isidle = true;
-			unsigned long maxj = jiffies - ULONG_MAX / 4;
-			struct rcu_data *rdp;
-
-			/* Scan all the CPUs looking for nonidle CPUs. */
-			for_each_possible_cpu(cpu) {
-				rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
-				rcu_sysidle_check_cpu(rdp, &isidle, &maxj);
-				if (!isidle)
-					break;
-			}
-			rcu_sysidle_report(rcu_state_p, isidle, maxj, false);
-			oldrss = rss;
-			rss = READ_ONCE(full_sysidle_state);
-		}
-	}
-
-	/* If this is the first observation of an idle period, record it. */
-	if (rss == RCU_SYSIDLE_FULL) {
-		rss = cmpxchg(&full_sysidle_state,
-			      RCU_SYSIDLE_FULL, RCU_SYSIDLE_FULL_NOTED);
-		return rss == RCU_SYSIDLE_FULL;
-	}
-
-	smp_mb(); /* ensure rss load happens before later caller actions. */
-
-	/* If already fully idle, tell the caller (in case of races). */
-	if (rss == RCU_SYSIDLE_FULL_NOTED)
-		return true;
-
-	/*
-	 * If we aren't there yet, and a grace period is not in flight,
-	 * initiate a grace period.  Either way, tell the caller that
-	 * we are not there yet.  We use an xchg() rather than an assignment
-	 * to make up for the memory barriers that would otherwise be
-	 * provided by the memory allocator.
-	 */
-	if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL &&
-	    !rcu_gp_in_progress(rcu_state_p) &&
-	    !rsh.inuse && xchg(&rsh.inuse, 1) == 0)
-		call_rcu(&rsh.rh, rcu_sysidle_cb);
-	return false;
-}
-
-/*
- * Initialize dynticks sysidle state for CPUs coming online.
- */
-static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
-{
-	rdtp->dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE;
-}
-
-#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-
-static void rcu_sysidle_enter(int irq)
-{
-}
-
-static void rcu_sysidle_exit(int irq)
-{
-}
-
-static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
-				  unsigned long *maxj)
-{
-}
-
-static bool is_sysidle_rcu_state(struct rcu_state *rsp)
-{
-	return false;
-}
-
-static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
-				  unsigned long maxj)
-{
-}
-
-static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-
 /*
  * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
  * grace-period kthread will do force_quiescent_state() processing?
@@ -2936,13 +2541,7 @@ static void rcu_bind_gp_kthread(void)
 
 	if (!tick_nohz_full_enabled())
 		return;
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-	cpu = tick_do_timer_cpu;
-	if (cpu >= 0 && cpu < nr_cpu_ids)
-		set_cpus_allowed_ptr(current, cpumask_of(cpu));
-#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 	housekeeping_affine(current);
-#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 }
 
 /* Record the current task on dyntick-idle entry. */
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
deleted file mode 100644
index 8751a748499a..000000000000
--- a/kernel/rcu/tree_trace.c
+++ /dev/null
@@ -1,494 +0,0 @@
-/*
- * Read-Copy Update tracing for hierarchical implementation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * Copyright IBM Corporation, 2008
- * Author: Paul E. McKenney
- *
- * Papers:  http://www.rdrop.com/users/paulmck/RCU
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- *		Documentation/RCU
- *
- */
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/smp.h>
-#include <linux/rcupdate.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <linux/atomic.h>
-#include <linux/bitops.h>
-#include <linux/completion.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/mutex.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-#define RCU_TREE_NONCORE
-#include "tree.h"
-
-DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
-
-static int r_open(struct inode *inode, struct file *file,
-					const struct seq_operations *op)
-{
-	int ret = seq_open(file, op);
-	if (!ret) {
-		struct seq_file *m = (struct seq_file *)file->private_data;
-		m->private = inode->i_private;
-	}
-	return ret;
-}
-
-static void *r_start(struct seq_file *m, loff_t *pos)
-{
-	struct rcu_state *rsp = (struct rcu_state *)m->private;
-	*pos = cpumask_next(*pos - 1, cpu_possible_mask);
-	if ((*pos) < nr_cpu_ids)
-		return per_cpu_ptr(rsp->rda, *pos);
-	return NULL;
-}
-
-static void *r_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	(*pos)++;
-	return r_start(m, pos);
-}
-
-static void r_stop(struct seq_file *m, void *v)
-{
-}
-
-static int show_rcubarrier(struct seq_file *m, void *v)
-{
-	struct rcu_state *rsp = (struct rcu_state *)m->private;
-	seq_printf(m, "bcc: %d bseq: %lu\n",
-		   atomic_read(&rsp->barrier_cpu_count),
-		   rsp->barrier_sequence);
-	return 0;
-}
-
-static int rcubarrier_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_rcubarrier, inode->i_private);
-}
-
-static const struct file_operations rcubarrier_fops = {
-	.owner = THIS_MODULE,
-	.open = rcubarrier_open,
-	.read = seq_read,
-	.llseek = no_llseek,
-	.release = single_release,
-};
-
-#ifdef CONFIG_RCU_BOOST
-
-static char convert_kthread_status(unsigned int kthread_status)
-{
-	if (kthread_status > RCU_KTHREAD_MAX)
-		return '?';
-	return "SRWOY"[kthread_status];
-}
-
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
-static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
-{
-	long ql, qll;
-
-	if (!rdp->beenonline)
-		return;
-	seq_printf(m, "%3d%cc=%ld g=%ld cnq=%d/%d:%d",
-		   rdp->cpu,
-		   cpu_is_offline(rdp->cpu) ? '!' : ' ',
-		   ulong2long(rdp->completed), ulong2long(rdp->gpnum),
-		   rdp->cpu_no_qs.b.norm,
-		   rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu),
-		   rdp->core_needs_qs);
-	seq_printf(m, " dt=%d/%llx/%d df=%lu",
-		   rcu_dynticks_snap(rdp->dynticks),
-		   rdp->dynticks->dynticks_nesting,
-		   rdp->dynticks->dynticks_nmi_nesting,
-		   rdp->dynticks_fqs);
-	seq_printf(m, " of=%lu", rdp->offline_fqs);
-	rcu_nocb_q_lengths(rdp, &ql, &qll);
-	qll += rdp->qlen_lazy;
-	ql += rdp->qlen;
-	seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c",
-		   qll, ql,
-		   ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
-			rdp->nxttail[RCU_NEXT_TAIL]],
-		   ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
-			rdp->nxttail[RCU_NEXT_READY_TAIL]],
-		   ".W"[rdp->nxttail[RCU_DONE_TAIL] !=
-			rdp->nxttail[RCU_WAIT_TAIL]],
-		   ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]);
-#ifdef CONFIG_RCU_BOOST
-	seq_printf(m, " kt=%d/%c ktl=%x",
-		   per_cpu(rcu_cpu_has_work, rdp->cpu),
-		   convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
-					  rdp->cpu)),
-		   per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff);
-#endif /* #ifdef CONFIG_RCU_BOOST */
-	seq_printf(m, " b=%ld", rdp->blimit);
-	seq_printf(m, " ci=%lu nci=%lu co=%lu ca=%lu\n",
-		   rdp->n_cbs_invoked, rdp->n_nocbs_invoked,
-		   rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
-}
-
-static int show_rcudata(struct seq_file *m, void *v)
-{
-	print_one_rcu_data(m, (struct rcu_data *)v);
-	return 0;
-}
-
-static const struct seq_operations rcudate_op = {
-	.start = r_start,
-	.next  = r_next,
-	.stop  = r_stop,
-	.show  = show_rcudata,
-};
-
-static int rcudata_open(struct inode *inode, struct file *file)
-{
-	return r_open(inode, file, &rcudate_op);
-}
-
-static const struct file_operations rcudata_fops = {
-	.owner = THIS_MODULE,
-	.open = rcudata_open,
-	.read = seq_read,
-	.llseek = no_llseek,
-	.release = seq_release,
-};
-
-static int show_rcuexp(struct seq_file *m, void *v)
-{
-	int cpu;
-	struct rcu_state *rsp = (struct rcu_state *)m->private;
-	struct rcu_data *rdp;
-	unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
-
-	for_each_possible_cpu(cpu) {
-		rdp = per_cpu_ptr(rsp->rda, cpu);
-		s0 += atomic_long_read(&rdp->exp_workdone0);
-		s1 += atomic_long_read(&rdp->exp_workdone1);
-		s2 += atomic_long_read(&rdp->exp_workdone2);
-		s3 += atomic_long_read(&rdp->exp_workdone3);
-	}
-	seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu enq=%d sc=%lu\n",
-		   rsp->expedited_sequence, s0, s1, s2, s3,
-		   atomic_read(&rsp->expedited_need_qs),
-		   rsp->expedited_sequence / 2);
-	return 0;
-}
-
-static int rcuexp_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_rcuexp, inode->i_private);
-}
-
-static const struct file_operations rcuexp_fops = {
-	.owner = THIS_MODULE,
-	.open = rcuexp_open,
-	.read = seq_read,
-	.llseek = no_llseek,
-	.release = single_release,
-};
-
-#ifdef CONFIG_RCU_BOOST
-
-static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
-{
-	seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu ",
-		   rnp->grplo, rnp->grphi,
-		   "T."[list_empty(&rnp->blkd_tasks)],
-		   "N."[!rnp->gp_tasks],
-		   "E."[!rnp->exp_tasks],
-		   "B."[!rnp->boost_tasks],
-		   convert_kthread_status(rnp->boost_kthread_status),
-		   rnp->n_tasks_boosted, rnp->n_exp_boosts,
-		   rnp->n_normal_boosts);
-	seq_printf(m, "j=%04x bt=%04x\n",
-		   (int)(jiffies & 0xffff),
-		   (int)(rnp->boost_time & 0xffff));
-	seq_printf(m, "    balk: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",
-		   rnp->n_balk_blkd_tasks,
-		   rnp->n_balk_exp_gp_tasks,
-		   rnp->n_balk_boost_tasks,
-		   rnp->n_balk_notblocked,
-		   rnp->n_balk_notyet,
-		   rnp->n_balk_nos);
-}
-
-static int show_rcu_node_boost(struct seq_file *m, void *unused)
-{
-	struct rcu_node *rnp;
-
-	rcu_for_each_leaf_node(&rcu_preempt_state, rnp)
-		print_one_rcu_node_boost(m, rnp);
-	return 0;
-}
-
-static int rcu_node_boost_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_rcu_node_boost, NULL);
-}
-
-static const struct file_operations rcu_node_boost_fops = {
-	.owner = THIS_MODULE,
-	.open = rcu_node_boost_open,
-	.read = seq_read,
-	.llseek = no_llseek,
-	.release = single_release,
-};
-
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
-static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
-{
-	unsigned long gpnum;
-	int level = 0;
-	struct rcu_node *rnp;
-
-	gpnum = rsp->gpnum;
-	seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x ",
-		   ulong2long(rsp->completed), ulong2long(gpnum),
-		   rsp->gp_state,
-		   (long)(rsp->jiffies_force_qs - jiffies),
-		   (int)(jiffies & 0xffff));
-	seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
-		   rsp->n_force_qs, rsp->n_force_qs_ngp,
-		   rsp->n_force_qs - rsp->n_force_qs_ngp,
-		   READ_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen);
-	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
-		if (rnp->level != level) {
-			seq_puts(m, "\n");
-			level = rnp->level;
-		}
-		seq_printf(m, "%lx/%lx->%lx %c%c>%c %d:%d ^%d    ",
-			   rnp->qsmask, rnp->qsmaskinit, rnp->qsmaskinitnext,
-			   ".G"[rnp->gp_tasks != NULL],
-			   ".E"[rnp->exp_tasks != NULL],
-			   ".T"[!list_empty(&rnp->blkd_tasks)],
-			   rnp->grplo, rnp->grphi, rnp->grpnum);
-	}
-	seq_puts(m, "\n");
-}
-
-static int show_rcuhier(struct seq_file *m, void *v)
-{
-	struct rcu_state *rsp = (struct rcu_state *)m->private;
-	print_one_rcu_state(m, rsp);
-	return 0;
-}
-
-static int rcuhier_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_rcuhier, inode->i_private);
-}
-
-static const struct file_operations rcuhier_fops = {
-	.owner = THIS_MODULE,
-	.open = rcuhier_open,
-	.read = seq_read,
-	.llseek = no_llseek,
-	.release = single_release,
-};
-
-static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
-{
-	unsigned long flags;
-	unsigned long completed;
-	unsigned long gpnum;
-	unsigned long gpage;
-	unsigned long gpmax;
-	struct rcu_node *rnp = &rsp->node[0];
-
-	raw_spin_lock_irqsave_rcu_node(rnp, flags);
-	completed = READ_ONCE(rsp->completed);
-	gpnum = READ_ONCE(rsp->gpnum);
-	if (completed == gpnum)
-		gpage = 0;
-	else
-		gpage = jiffies - rsp->gp_start;
-	gpmax = rsp->gp_max;
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
-	seq_printf(m, "completed=%ld  gpnum=%ld  age=%ld  max=%ld\n",
-		   ulong2long(completed), ulong2long(gpnum), gpage, gpmax);
-}
-
-static int show_rcugp(struct seq_file *m, void *v)
-{
-	struct rcu_state *rsp = (struct rcu_state *)m->private;
-	show_one_rcugp(m, rsp);
-	return 0;
-}
-
-static int rcugp_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_rcugp, inode->i_private);
-}
-
-static const struct file_operations rcugp_fops = {
-	.owner = THIS_MODULE,
-	.open = rcugp_open,
-	.read = seq_read,
-	.llseek = no_llseek,
-	.release = single_release,
-};
-
-static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
-{
-	if (!rdp->beenonline)
-		return;
-	seq_printf(m, "%3d%cnp=%ld ",
-		   rdp->cpu,
-		   cpu_is_offline(rdp->cpu) ? '!' : ' ',
-		   rdp->n_rcu_pending);
-	seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ",
-		   rdp->n_rp_core_needs_qs,
-		   rdp->n_rp_report_qs,
-		   rdp->n_rp_cb_ready,
-		   rdp->n_rp_cpu_needs_gp);
-	seq_printf(m, "gpc=%ld gps=%ld nn=%ld ndw%ld\n",
-		   rdp->n_rp_gp_completed,
-		   rdp->n_rp_gp_started,
-		   rdp->n_rp_nocb_defer_wakeup,
-		   rdp->n_rp_need_nothing);
-}
-
-static int show_rcu_pending(struct seq_file *m, void *v)
-{
-	print_one_rcu_pending(m, (struct rcu_data *)v);
-	return 0;
-}
-
-static const struct seq_operations rcu_pending_op = {
-	.start = r_start,
-	.next  = r_next,
-	.stop  = r_stop,
-	.show  = show_rcu_pending,
-};
-
-static int rcu_pending_open(struct inode *inode, struct file *file)
-{
-	return r_open(inode, file, &rcu_pending_op);
-}
-
-static const struct file_operations rcu_pending_fops = {
-	.owner = THIS_MODULE,
-	.open = rcu_pending_open,
-	.read = seq_read,
-	.llseek = no_llseek,
-	.release = seq_release,
-};
-
-static int show_rcutorture(struct seq_file *m, void *unused)
-{
-	seq_printf(m, "rcutorture test sequence: %lu %s\n",
-		   rcutorture_testseq >> 1,
-		   (rcutorture_testseq & 0x1) ? "(test in progress)" : "");
-	seq_printf(m, "rcutorture update version number: %lu\n",
-		   rcutorture_vernum);
-	return 0;
-}
-
-static int rcutorture_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_rcutorture, NULL);
-}
-
-static const struct file_operations rcutorture_fops = {
-	.owner = THIS_MODULE,
-	.open = rcutorture_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static struct dentry *rcudir;
-
-static int __init rcutree_trace_init(void)
-{
-	struct rcu_state *rsp;
-	struct dentry *retval;
-	struct dentry *rspdir;
-
-	rcudir = debugfs_create_dir("rcu", NULL);
-	if (!rcudir)
-		goto free_out;
-
-	for_each_rcu_flavor(rsp) {
-		rspdir = debugfs_create_dir(rsp->name, rcudir);
-		if (!rspdir)
-			goto free_out;
-
-		retval = debugfs_create_file("rcudata", 0444,
-				rspdir, rsp, &rcudata_fops);
-		if (!retval)
-			goto free_out;
-
-		retval = debugfs_create_file("rcuexp", 0444,
-				rspdir, rsp, &rcuexp_fops);
-		if (!retval)
-			goto free_out;
-
-		retval = debugfs_create_file("rcu_pending", 0444,
-				rspdir, rsp, &rcu_pending_fops);
-		if (!retval)
-			goto free_out;
-
-		retval = debugfs_create_file("rcubarrier", 0444,
-				rspdir, rsp, &rcubarrier_fops);
-		if (!retval)
-			goto free_out;
-
-#ifdef CONFIG_RCU_BOOST
-		if (rsp == &rcu_preempt_state) {
-			retval = debugfs_create_file("rcuboost", 0444,
-				rspdir, NULL, &rcu_node_boost_fops);
-			if (!retval)
-				goto free_out;
-		}
-#endif
-
-		retval = debugfs_create_file("rcugp", 0444,
-				rspdir, rsp, &rcugp_fops);
-		if (!retval)
-			goto free_out;
-
-		retval = debugfs_create_file("rcuhier", 0444,
-				rspdir, rsp, &rcuhier_fops);
-		if (!retval)
-			goto free_out;
-	}
-
-	retval = debugfs_create_file("rcutorture", 0444, rcudir,
-						NULL, &rcutorture_fops);
-	if (!retval)
-		goto free_out;
-	return 0;
-free_out:
-	debugfs_remove_recursive(rcudir);
-	return 1;
-}
-device_initcall(rcutree_trace_init);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 55c8530316c7..00e77c470017 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -62,7 +62,9 @@
 #define MODULE_PARAM_PREFIX "rcupdate."
 
 #ifndef CONFIG_TINY_RCU
+extern int rcu_expedited; /* from sysctl */
 module_param(rcu_expedited, int, 0);
+extern int rcu_normal; /* from sysctl */
 module_param(rcu_normal, int, 0);
 static int rcu_normal_after_boot;
 module_param(rcu_normal_after_boot, int, 0);
@@ -124,7 +126,7 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held);
  * non-expedited counterparts?  Intended for use within RCU.  Note
  * that if the user specifies both rcu_expedited and rcu_normal, then
  * rcu_normal wins.  (Except during the time period during boot from
- * when the first task is spawned until the rcu_exp_runtime_mode()
+ * when the first task is spawned until the rcu_set_runtime_mode()
  * core_initcall() is invoked, at which point everything is expedited.)
  */
 bool rcu_gp_is_normal(void)
@@ -190,6 +192,39 @@ void rcu_end_inkernel_boot(void)
 
 #endif /* #ifndef CONFIG_TINY_RCU */
 
+/*
+ * Test each non-SRCU synchronous grace-period wait API.  This is
+ * useful just after a change in mode for these primitives, and
+ * during early boot.
+ */
+void rcu_test_sync_prims(void)
+{
+	if (!IS_ENABLED(CONFIG_PROVE_RCU))
+		return;
+	synchronize_rcu();
+	synchronize_rcu_bh();
+	synchronize_sched();
+	synchronize_rcu_expedited();
+	synchronize_rcu_bh_expedited();
+	synchronize_sched_expedited();
+}
+
+#if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU)
+
+/*
+ * Switch to run-time mode once RCU has fully initialized.
+ */
+static int __init rcu_set_runtime_mode(void)
+{
+	rcu_test_sync_prims();
+	rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
+	rcu_test_sync_prims();
+	return 0;
+}
+core_initcall(rcu_set_runtime_mode);
+
+#endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */
+
 #ifdef CONFIG_PREEMPT_RCU
 
 /*
@@ -346,6 +381,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
 		   struct rcu_synchronize *rs_array)
 {
 	int i;
+	int j;
 
 	/* Initialize and register callbacks for each flavor specified. */
 	for (i = 0; i < n; i++) {
@@ -357,7 +393,11 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
 		}
 		init_rcu_head_on_stack(&rs_array[i].head);
 		init_completion(&rs_array[i].completion);
-		(crcu_array[i])(&rs_array[i].head, wakeme_after_rcu);
+		for (j = 0; j < i; j++)
+			if (crcu_array[j] == crcu_array[i])
+				break;
+		if (j == i)
+			(crcu_array[i])(&rs_array[i].head, wakeme_after_rcu);
 	}
 
 	/* Wait for all callbacks to be invoked. */
@@ -366,7 +406,11 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
 		    (crcu_array[i] == call_rcu ||
 		     crcu_array[i] == call_rcu_bh))
 			continue;
-		wait_for_completion(&rs_array[i].completion);
+		for (j = 0; j < i; j++)
+			if (crcu_array[j] == crcu_array[i])
+				break;
+		if (j == i)
+			wait_for_completion(&rs_array[i].completion);
 		destroy_rcu_head_on_stack(&rs_array[i].head);
 	}
 }
@@ -527,15 +571,30 @@ static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
 DEFINE_SRCU(tasks_rcu_exit_srcu);
 
 /* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
-static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
+#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
+static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
 module_param(rcu_task_stall_timeout, int, 0644);
 
 static void rcu_spawn_tasks_kthread(void);
 static struct task_struct *rcu_tasks_kthread_ptr;
 
-/*
- * Post an RCU-tasks callback.  First call must be from process context
- * after the scheduler if fully operational.
+/**
+ * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
+ * @rhp: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_tasks() assumes
+ * that the read-side critical sections end at a voluntary context
+ * switch (not a preemption!), entry into idle, or transition to usermode
+ * execution.  As such, there are no read-side primitives analogous to
+ * rcu_read_lock() and rcu_read_unlock() because this primitive is intended
+ * to determine that all tasks have passed through a safe state, not so
+ * much for data-strcuture synchronization.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
  */
 void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
 {
@@ -632,6 +691,7 @@ static void check_holdout_task(struct task_struct *t,
 		put_task_struct(t);
 		return;
 	}
+	rcu_request_urgent_qs_task(t);
 	if (!needreport)
 		return;
 	if (*firstreport) {
@@ -817,23 +877,23 @@ static void rcu_spawn_tasks_kthread(void)
 
 #endif /* #ifdef CONFIG_TASKS_RCU */
 
+#ifndef CONFIG_TINY_RCU
+
 /*
- * Test each non-SRCU synchronous grace-period wait API.  This is
- * useful just after a change in mode for these primitives, and
- * during early boot.
+ * Print any non-default Tasks RCU settings.
  */
-void rcu_test_sync_prims(void)
+static void __init rcu_tasks_bootup_oddness(void)
 {
-	if (!IS_ENABLED(CONFIG_PROVE_RCU))
-		return;
-	synchronize_rcu();
-	synchronize_rcu_bh();
-	synchronize_sched();
-	synchronize_rcu_expedited();
-	synchronize_rcu_bh_expedited();
-	synchronize_sched_expedited();
+#ifdef CONFIG_TASKS_RCU
+	if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
+		pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
+	else
+		pr_info("\tTasks RCU enabled.\n");
+#endif /* #ifdef CONFIG_TASKS_RCU */
 }
 
+#endif /* #ifndef CONFIG_TINY_RCU */
+
 #ifdef CONFIG_PROVE_RCU
 
 /*
@@ -918,3 +978,25 @@ late_initcall(rcu_verify_early_boot_tests);
 #else
 void rcu_early_boot_tests(void) {}
 #endif /* CONFIG_PROVE_RCU */
+
+#ifndef CONFIG_TINY_RCU
+
+/*
+ * Print any significant non-default boot-time settings.
+ */
+void __init rcupdate_announce_bootup_oddness(void)
+{
+	if (rcu_normal)
+		pr_info("\tNo expedited grace period (rcu_normal).\n");
+	else if (rcu_normal_after_boot)
+		pr_info("\tNo expedited grace period (rcu_normal_after_boot).\n");
+	else if (rcu_expedited)
+		pr_info("\tAll grace periods are expedited (rcu_expedited).\n");
+	if (rcu_cpu_stall_suppress)
+		pr_info("\tRCU CPU stall warnings suppressed (rcu_cpu_stall_suppress).\n");
+	if (rcu_cpu_stall_timeout != CONFIG_RCU_CPU_STALL_TIMEOUT)
+		pr_info("\tRCU CPU stall warnings timeout set to %d (rcu_cpu_stall_timeout).\n", rcu_cpu_stall_timeout);
+	rcu_tasks_bootup_oddness();
+}
+
+#endif /* #ifndef CONFIG_TINY_RCU */
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 89ab6758667b..53f0164ed362 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -16,9 +16,9 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
 endif
 
 obj-y += core.o loadavg.o clock.o cputime.o
-obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
-obj-y += wait.o swait.o completion.o idle.o
-obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o
+obj-y += idle_task.o fair.o rt.o deadline.o
+obj-y += wait.o wait_bit.o swait.o completion.o idle.o
+obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index 00a45c45beca..ca0f8fc945c6 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -64,6 +64,7 @@
 #include <linux/workqueue.h>
 #include <linux/compiler.h>
 #include <linux/tick.h>
+#include <linux/init.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -124,14 +125,27 @@ int sched_clock_stable(void)
 	return static_branch_likely(&__sched_clock_stable);
 }
 
+static void __scd_stamp(struct sched_clock_data *scd)
+{
+	scd->tick_gtod = ktime_get_ns();
+	scd->tick_raw = sched_clock();
+}
+
 static void __set_sched_clock_stable(void)
 {
-	struct sched_clock_data *scd = this_scd();
+	struct sched_clock_data *scd;
 
 	/*
+	 * Since we're still unstable and the tick is already running, we have
+	 * to disable IRQs in order to get a consistent scd->tick* reading.
+	 */
+	local_irq_disable();
+	scd = this_scd();
+	/*
 	 * Attempt to make the (initial) unstable->stable transition continuous.
 	 */
 	__sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw);
+	local_irq_enable();
 
 	printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n",
 			scd->tick_gtod, __gtod_offset,
@@ -141,8 +155,38 @@ static void __set_sched_clock_stable(void)
 	tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
 }
 
+/*
+ * If we ever get here, we're screwed, because we found out -- typically after
+ * the fact -- that TSC wasn't good. This means all our clocksources (including
+ * ktime) could have reported wrong values.
+ *
+ * What we do here is an attempt to fix up and continue sort of where we left
+ * off in a coherent manner.
+ *
+ * The only way to fully avoid random clock jumps is to boot with:
+ * "tsc=unstable".
+ */
 static void __sched_clock_work(struct work_struct *work)
 {
+	struct sched_clock_data *scd;
+	int cpu;
+
+	/* take a current timestamp and set 'now' */
+	preempt_disable();
+	scd = this_scd();
+	__scd_stamp(scd);
+	scd->clock = scd->tick_gtod + __gtod_offset;
+	preempt_enable();
+
+	/* clone to all CPUs */
+	for_each_possible_cpu(cpu)
+		per_cpu(sched_clock_data, cpu) = *scd;
+
+	printk(KERN_WARNING "TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.\n");
+	printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
+			scd->tick_gtod, __gtod_offset,
+			scd->tick_raw,  __sched_clock_offset);
+
 	static_branch_disable(&__sched_clock_stable);
 }
 
@@ -150,27 +194,11 @@ static DECLARE_WORK(sched_clock_work, __sched_clock_work);
 
 static void __clear_sched_clock_stable(void)
 {
-	struct sched_clock_data *scd = this_scd();
-
-	/*
-	 * Attempt to make the stable->unstable transition continuous.
-	 *
-	 * Trouble is, this is typically called from the TSC watchdog
-	 * timer, which is late per definition. This means the tick
-	 * values can already be screwy.
-	 *
-	 * Still do what we can.
-	 */
-	__gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod);
-
-	printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
-			scd->tick_gtod, __gtod_offset,
-			scd->tick_raw,  __sched_clock_offset);
+	if (!sched_clock_stable())
+		return;
 
 	tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
-
-	if (sched_clock_stable())
-		schedule_work(&sched_clock_work);
+	schedule_work(&sched_clock_work);
 }
 
 void clear_sched_clock_stable(void)
@@ -183,7 +211,11 @@ void clear_sched_clock_stable(void)
 		__clear_sched_clock_stable();
 }
 
-void sched_clock_init_late(void)
+/*
+ * We run this as late_initcall() such that it runs after all built-in drivers,
+ * notably: acpi_processor and intel_idle, which can mark the TSC as unstable.
+ */
+static int __init sched_clock_init_late(void)
 {
 	sched_clock_running = 2;
 	/*
@@ -197,7 +229,10 @@ void sched_clock_init_late(void)
 
 	if (__sched_clock_stable_early)
 		__set_sched_clock_stable();
+
+	return 0;
 }
+late_initcall(sched_clock_init_late);
 
 /*
  * min, max except they take wrapping into account
@@ -347,21 +382,38 @@ void sched_clock_tick(void)
 {
 	struct sched_clock_data *scd;
 
+	if (sched_clock_stable())
+		return;
+
+	if (unlikely(!sched_clock_running))
+		return;
+
 	WARN_ON_ONCE(!irqs_disabled());
 
+	scd = this_scd();
+	__scd_stamp(scd);
+	sched_clock_local(scd);
+}
+
+void sched_clock_tick_stable(void)
+{
+	u64 gtod, clock;
+
+	if (!sched_clock_stable())
+		return;
+
 	/*
-	 * Update these values even if sched_clock_stable(), because it can
-	 * become unstable at any point in time at which point we need some
-	 * values to fall back on.
+	 * Called under watchdog_lock.
 	 *
-	 * XXX arguably we can skip this if we expose tsc_clocksource_reliable
+	 * The watchdog just found this TSC to (still) be stable, so now is a
+	 * good moment to update our __gtod_offset. Because once we find the
+	 * TSC to be unstable, any computation will be computing crap.
 	 */
-	scd = this_scd();
-	scd->tick_raw  = sched_clock();
-	scd->tick_gtod = ktime_get_ns();
-
-	if (!sched_clock_stable() && likely(sched_clock_running))
-		sched_clock_local(scd);
+	local_irq_disable();
+	gtod = ktime_get_ns();
+	clock = sched_clock();
+	__gtod_offset = (clock + __sched_clock_offset) - gtod;
+	local_irq_enable();
 }
 
 /*
@@ -374,15 +426,21 @@ void sched_clock_idle_sleep_event(void)
 EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
 
 /*
- * We just idled delta nanoseconds (called with irqs disabled):
+ * We just idled; resync with ktime.
  */
-void sched_clock_idle_wakeup_event(u64 delta_ns)
+void sched_clock_idle_wakeup_event(void)
 {
-	if (timekeeping_suspended)
+	unsigned long flags;
+
+	if (sched_clock_stable())
+		return;
+
+	if (unlikely(timekeeping_suspended))
 		return;
 
+	local_irq_save(flags);
 	sched_clock_tick();
-	touch_softlockup_watchdog_sched();
+	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 53f9558fa925..13fc5ae9bf2f 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -66,7 +66,7 @@ do_wait_for_common(struct completion *x,
 	if (!x->done) {
 		DECLARE_WAITQUEUE(wait, current);
 
-		__add_wait_queue_tail_exclusive(&x->wait, &wait);
+		__add_wait_queue_entry_tail_exclusive(&x->wait, &wait);
 		do {
 			if (signal_pending_state(state, current)) {
 				timeout = -ERESTARTSYS;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c51147a1204c..17c667b427b4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10,6 +10,7 @@
 #include <uapi/linux/sched/types.h>
 #include <linux/sched/loadavg.h>
 #include <linux/sched/hotplug.h>
+#include <linux/wait_bit.h>
 #include <linux/cpuset.h>
 #include <linux/delayacct.h>
 #include <linux/init_task.h>
@@ -788,36 +789,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
 	dequeue_task(rq, p, flags);
 }
 
-void sched_set_stop_task(int cpu, struct task_struct *stop)
-{
-	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
-	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-
-	if (stop) {
-		/*
-		 * Make it appear like a SCHED_FIFO task, its something
-		 * userspace knows about and won't get confused about.
-		 *
-		 * Also, it will make PI more or less work without too
-		 * much confusion -- but then, stop work should not
-		 * rely on PI working anyway.
-		 */
-		sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
-
-		stop->sched_class = &stop_sched_class;
-	}
-
-	cpu_rq(cpu)->stop = stop;
-
-	if (old_stop) {
-		/*
-		 * Reset it back to a normal scheduling class so that
-		 * it can die in pieces.
-		 */
-		old_stop->sched_class = &rt_sched_class;
-	}
-}
-
 /*
  * __normal_prio - return the priority that is based on the static prio
  */
@@ -1588,6 +1559,36 @@ static void update_avg(u64 *avg, u64 sample)
 	*avg += diff >> 3;
 }
 
+void sched_set_stop_task(int cpu, struct task_struct *stop)
+{
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
+
+	if (stop) {
+		/*
+		 * Make it appear like a SCHED_FIFO task, its something
+		 * userspace knows about and won't get confused about.
+		 *
+		 * Also, it will make PI more or less work without too
+		 * much confusion -- but then, stop work should not
+		 * rely on PI working anyway.
+		 */
+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
+
+		stop->sched_class = &stop_sched_class;
+	}
+
+	cpu_rq(cpu)->stop = stop;
+
+	if (old_stop) {
+		/*
+		 * Reset it back to a normal scheduling class so that
+		 * it can die in pieces.
+		 */
+		old_stop->sched_class = &rt_sched_class;
+	}
+}
+
 #else
 
 static inline int __set_cpus_allowed_ptr(struct task_struct *p,
@@ -1731,7 +1732,7 @@ void sched_ttwu_pending(void)
 {
 	struct rq *rq = this_rq();
 	struct llist_node *llist = llist_del_all(&rq->wake_list);
-	struct task_struct *p;
+	struct task_struct *p, *t;
 	struct rq_flags rf;
 
 	if (!llist)
@@ -1740,17 +1741,8 @@ void sched_ttwu_pending(void)
 	rq_lock_irqsave(rq, &rf);
 	update_rq_clock(rq);
 
-	while (llist) {
-		int wake_flags = 0;
-
-		p = llist_entry(llist, struct task_struct, wake_entry);
-		llist = llist_next(llist);
-
-		if (p->sched_remote_wakeup)
-			wake_flags = WF_MIGRATED;
-
-		ttwu_do_activate(rq, p, wake_flags, &rf);
-	}
+	llist_for_each_entry_safe(p, t, llist, wake_entry)
+		ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);
 
 	rq_unlock_irqrestore(rq, &rf);
 }
@@ -2148,23 +2140,6 @@ int wake_up_state(struct task_struct *p, unsigned int state)
 }
 
 /*
- * This function clears the sched_dl_entity static params.
- */
-void __dl_clear_params(struct task_struct *p)
-{
-	struct sched_dl_entity *dl_se = &p->dl;
-
-	dl_se->dl_runtime = 0;
-	dl_se->dl_deadline = 0;
-	dl_se->dl_period = 0;
-	dl_se->flags = 0;
-	dl_se->dl_bw = 0;
-
-	dl_se->dl_throttled = 0;
-	dl_se->dl_yielded = 0;
-}
-
-/*
  * Perform scheduler related setup for a newly forked process p.
  * p is forked by current.
  *
@@ -2193,6 +2168,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 
 	RB_CLEAR_NODE(&p->dl.rb_node);
 	init_dl_task_timer(&p->dl);
+	init_dl_inactive_task_timer(&p->dl);
 	__dl_clear_params(p);
 
 	INIT_LIST_HEAD(&p->rt.run_list);
@@ -2430,7 +2406,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 unsigned long to_ratio(u64 period, u64 runtime)
 {
 	if (runtime == RUNTIME_INF)
-		return 1ULL << 20;
+		return BW_UNIT;
 
 	/*
 	 * Doing this here saves a lot of checks in all
@@ -2440,93 +2416,9 @@ unsigned long to_ratio(u64 period, u64 runtime)
 	if (period == 0)
 		return 0;
 
-	return div64_u64(runtime << 20, period);
+	return div64_u64(runtime << BW_SHIFT, period);
 }
 
-#ifdef CONFIG_SMP
-inline struct dl_bw *dl_bw_of(int i)
-{
-	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
-			 "sched RCU must be held");
-	return &cpu_rq(i)->rd->dl_bw;
-}
-
-static inline int dl_bw_cpus(int i)
-{
-	struct root_domain *rd = cpu_rq(i)->rd;
-	int cpus = 0;
-
-	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
-			 "sched RCU must be held");
-	for_each_cpu_and(i, rd->span, cpu_active_mask)
-		cpus++;
-
-	return cpus;
-}
-#else
-inline struct dl_bw *dl_bw_of(int i)
-{
-	return &cpu_rq(i)->dl.dl_bw;
-}
-
-static inline int dl_bw_cpus(int i)
-{
-	return 1;
-}
-#endif
-
-/*
- * We must be sure that accepting a new task (or allowing changing the
- * parameters of an existing one) is consistent with the bandwidth
- * constraints. If yes, this function also accordingly updates the currently
- * allocated bandwidth to reflect the new situation.
- *
- * This function is called while holding p's rq->lock.
- *
- * XXX we should delay bw change until the task's 0-lag point, see
- * __setparam_dl().
- */
-static int dl_overflow(struct task_struct *p, int policy,
-		       const struct sched_attr *attr)
-{
-
-	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
-	u64 period = attr->sched_period ?: attr->sched_deadline;
-	u64 runtime = attr->sched_runtime;
-	u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
-	int cpus, err = -1;
-
-	/* !deadline task may carry old deadline bandwidth */
-	if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
-		return 0;
-
-	/*
-	 * Either if a task, enters, leave, or stays -deadline but changes
-	 * its parameters, we may need to update accordingly the total
-	 * allocated bandwidth of the container.
-	 */
-	raw_spin_lock(&dl_b->lock);
-	cpus = dl_bw_cpus(task_cpu(p));
-	if (dl_policy(policy) && !task_has_dl_policy(p) &&
-	    !__dl_overflow(dl_b, cpus, 0, new_bw)) {
-		__dl_add(dl_b, new_bw);
-		err = 0;
-	} else if (dl_policy(policy) && task_has_dl_policy(p) &&
-		   !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
-		__dl_clear(dl_b, p->dl.dl_bw);
-		__dl_add(dl_b, new_bw);
-		err = 0;
-	} else if (!dl_policy(policy) && task_has_dl_policy(p)) {
-		__dl_clear(dl_b, p->dl.dl_bw);
-		err = 0;
-	}
-	raw_spin_unlock(&dl_b->lock);
-
-	return err;
-}
-
-extern void init_dl_bw(struct dl_bw *dl_b);
-
 /*
  * wake_up_new_task - wake up a newly created task for the first time.
  *
@@ -3382,7 +3274,7 @@ static void __sched notrace __schedule(bool preempt)
 		hrtick_clear(rq);
 
 	local_irq_disable();
-	rcu_note_context_switch();
+	rcu_note_context_switch(preempt);
 
 	/*
 	 * Make sure that signal_pending_state()->signal_pending() below
@@ -3502,6 +3394,31 @@ asmlinkage __visible void __sched schedule(void)
 }
 EXPORT_SYMBOL(schedule);
 
+/*
+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
+ * state (have scheduled out non-voluntarily) by making sure that all
+ * tasks have either left the run queue or have gone into user space.
+ * As idle tasks do not do either, they must not ever be preempted
+ * (schedule out non-voluntarily).
+ *
+ * schedule_idle() is similar to schedule_preempt_disable() except that it
+ * never enables preemption because it does not call sched_submit_work().
+ */
+void __sched schedule_idle(void)
+{
+	/*
+	 * As this skips calling sched_submit_work(), which the idle task does
+	 * regardless because that function is a nop when the task is in a
+	 * TASK_RUNNING state, make sure this isn't used someplace that the
+	 * current task can be in any other state. Note, idle is always in the
+	 * TASK_RUNNING state.
+	 */
+	WARN_ON_ONCE(current->state);
+	do {
+		__schedule(false);
+	} while (need_resched());
+}
+
 #ifdef CONFIG_CONTEXT_TRACKING
 asmlinkage __visible void __sched schedule_user(void)
 {
@@ -3662,7 +3579,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
 	exception_exit(prev_state);
 }
 
-int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
 			  void *key)
 {
 	return try_to_wake_up(curr->private, mode, wake_flags);
@@ -3984,46 +3901,6 @@ static struct task_struct *find_process_by_pid(pid_t pid)
 }
 
 /*
- * This function initializes the sched_dl_entity of a newly becoming
- * SCHED_DEADLINE task.
- *
- * Only the static values are considered here, the actual runtime and the
- * absolute deadline will be properly calculated when the task is enqueued
- * for the first time with its new policy.
- */
-static void
-__setparam_dl(struct task_struct *p, const struct sched_attr *attr)
-{
-	struct sched_dl_entity *dl_se = &p->dl;
-
-	dl_se->dl_runtime = attr->sched_runtime;
-	dl_se->dl_deadline = attr->sched_deadline;
-	dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
-	dl_se->flags = attr->sched_flags;
-	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
-
-	/*
-	 * Changing the parameters of a task is 'tricky' and we're not doing
-	 * the correct thing -- also see task_dead_dl() and switched_from_dl().
-	 *
-	 * What we SHOULD do is delay the bandwidth release until the 0-lag
-	 * point. This would include retaining the task_struct until that time
-	 * and change dl_overflow() to not immediately decrement the current
-	 * amount.
-	 *
-	 * Instead we retain the current runtime/deadline and let the new
-	 * parameters take effect after the current reservation period lapses.
-	 * This is safe (albeit pessimistic) because the 0-lag point is always
-	 * before the current scheduling deadline.
-	 *
-	 * We can still have temporary overloads because we do not delay the
-	 * change in bandwidth until that time; so admission control is
-	 * not on the safe side. It does however guarantee tasks will never
-	 * consume more than promised.
-	 */
-}
-
-/*
  * sched_setparam() passes in -1 for its policy, to let the functions
  * it calls know not to change it.
  */
@@ -4076,59 +3953,6 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
 		p->sched_class = &fair_sched_class;
 }
 
-static void
-__getparam_dl(struct task_struct *p, struct sched_attr *attr)
-{
-	struct sched_dl_entity *dl_se = &p->dl;
-
-	attr->sched_priority = p->rt_priority;
-	attr->sched_runtime = dl_se->dl_runtime;
-	attr->sched_deadline = dl_se->dl_deadline;
-	attr->sched_period = dl_se->dl_period;
-	attr->sched_flags = dl_se->flags;
-}
-
-/*
- * This function validates the new parameters of a -deadline task.
- * We ask for the deadline not being zero, and greater or equal
- * than the runtime, as well as the period of being zero or
- * greater than deadline. Furthermore, we have to be sure that
- * user parameters are above the internal resolution of 1us (we
- * check sched_runtime only since it is always the smaller one) and
- * below 2^63 ns (we have to check both sched_deadline and
- * sched_period, as the latter can be zero).
- */
-static bool
-__checkparam_dl(const struct sched_attr *attr)
-{
-	/* deadline != 0 */
-	if (attr->sched_deadline == 0)
-		return false;
-
-	/*
-	 * Since we truncate DL_SCALE bits, make sure we're at least
-	 * that big.
-	 */
-	if (attr->sched_runtime < (1ULL << DL_SCALE))
-		return false;
-
-	/*
-	 * Since we use the MSB for wrap-around and sign issues, make
-	 * sure it's not set (mind that period can be equal to zero).
-	 */
-	if (attr->sched_deadline & (1ULL << 63) ||
-	    attr->sched_period & (1ULL << 63))
-		return false;
-
-	/* runtime <= deadline <= period (if period != 0) */
-	if ((attr->sched_period != 0 &&
-	     attr->sched_period < attr->sched_deadline) ||
-	    attr->sched_deadline < attr->sched_runtime)
-		return false;
-
-	return true;
-}
-
 /*
  * Check the target process has a UID that matches the current process's:
  */
@@ -4145,19 +3969,6 @@ static bool check_same_owner(struct task_struct *p)
 	return match;
 }
 
-static bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
-{
-	struct sched_dl_entity *dl_se = &p->dl;
-
-	if (dl_se->dl_runtime != attr->sched_runtime ||
-		dl_se->dl_deadline != attr->sched_deadline ||
-		dl_se->dl_period != attr->sched_period ||
-		dl_se->flags != attr->sched_flags)
-		return true;
-
-	return false;
-}
-
 static int __sched_setscheduler(struct task_struct *p,
 				const struct sched_attr *attr,
 				bool user, bool pi)
@@ -4172,8 +3983,8 @@ static int __sched_setscheduler(struct task_struct *p,
 	int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
 	struct rq *rq;
 
-	/* May grab non-irq protected spin_locks: */
-	BUG_ON(in_interrupt());
+	/* The pi code expects interrupts enabled */
+	BUG_ON(pi && in_interrupt());
 recheck:
 	/* Double check policy once rq lock held: */
 	if (policy < 0) {
@@ -4186,7 +3997,8 @@ recheck:
 			return -EINVAL;
 	}
 
-	if (attr->sched_flags & ~(SCHED_FLAG_RESET_ON_FORK))
+	if (attr->sched_flags &
+		~(SCHED_FLAG_RESET_ON_FORK | SCHED_FLAG_RECLAIM))
 		return -EINVAL;
 
 	/*
@@ -4337,7 +4149,7 @@ change:
 	 * of a SCHED_DEADLINE task) we need to check if enough bandwidth
 	 * is available.
 	 */
-	if ((dl_policy(policy) || dl_task(p)) && dl_overflow(p, policy, attr)) {
+	if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) {
 		task_rq_unlock(rq, p, &rf);
 		return -EBUSY;
 	}
@@ -5438,26 +5250,17 @@ void init_idle(struct task_struct *idle, int cpu)
 #endif
 }
 
+#ifdef CONFIG_SMP
+
 int cpuset_cpumask_can_shrink(const struct cpumask *cur,
 			      const struct cpumask *trial)
 {
-	int ret = 1, trial_cpus;
-	struct dl_bw *cur_dl_b;
-	unsigned long flags;
+	int ret = 1;
 
 	if (!cpumask_weight(cur))
 		return ret;
 
-	rcu_read_lock_sched();
-	cur_dl_b = dl_bw_of(cpumask_any(cur));
-	trial_cpus = cpumask_weight(trial);
-
-	raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
-	if (cur_dl_b->bw != -1 &&
-	    cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
-		ret = 0;
-	raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
-	rcu_read_unlock_sched();
+	ret = dl_cpuset_cpumask_can_shrink(cur, trial);
 
 	return ret;
 }
@@ -5481,43 +5284,14 @@ int task_can_attach(struct task_struct *p,
 		goto out;
 	}
 
-#ifdef CONFIG_SMP
 	if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
-					      cs_cpus_allowed)) {
-		unsigned int dest_cpu = cpumask_any_and(cpu_active_mask,
-							cs_cpus_allowed);
-		struct dl_bw *dl_b;
-		bool overflow;
-		int cpus;
-		unsigned long flags;
-
-		rcu_read_lock_sched();
-		dl_b = dl_bw_of(dest_cpu);
-		raw_spin_lock_irqsave(&dl_b->lock, flags);
-		cpus = dl_bw_cpus(dest_cpu);
-		overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
-		if (overflow)
-			ret = -EBUSY;
-		else {
-			/*
-			 * We reserve space for this task in the destination
-			 * root_domain, as we can't fail after this point.
-			 * We will free resources in the source root_domain
-			 * later on (see set_cpus_allowed_dl()).
-			 */
-			__dl_add(dl_b, p->dl.dl_bw);
-		}
-		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-		rcu_read_unlock_sched();
+					      cs_cpus_allowed))
+		ret = dl_task_can_attach(p, cs_cpus_allowed);
 
-	}
-#endif
 out:
 	return ret;
 }
 
-#ifdef CONFIG_SMP
-
 bool sched_smp_initialized __read_mostly;
 
 #ifdef CONFIG_NUMA_BALANCING
@@ -5580,7 +5354,7 @@ void idle_task_exit(void)
 	BUG_ON(cpu_online(smp_processor_id()));
 
 	if (mm != &init_mm) {
-		switch_mm_irqs_off(mm, &init_mm, current);
+		switch_mm(mm, &init_mm, current);
 		finish_arch_post_lock_switch();
 	}
 	mmdrop(mm);
@@ -5780,23 +5554,8 @@ static void cpuset_cpu_active(void)
 
 static int cpuset_cpu_inactive(unsigned int cpu)
 {
-	unsigned long flags;
-	struct dl_bw *dl_b;
-	bool overflow;
-	int cpus;
-
 	if (!cpuhp_tasks_frozen) {
-		rcu_read_lock_sched();
-		dl_b = dl_bw_of(cpu);
-
-		raw_spin_lock_irqsave(&dl_b->lock, flags);
-		cpus = dl_bw_cpus(cpu);
-		overflow = __dl_overflow(dl_b, cpus, 0, 0);
-		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-
-		rcu_read_unlock_sched();
-
-		if (overflow)
+		if (dl_cpu_busy(cpu))
 			return -EBUSY;
 		cpuset_update_active_cpus();
 	} else {
@@ -5849,15 +5608,9 @@ int sched_cpu_deactivate(unsigned int cpu)
 	 * users of this state to go away such that all new such users will
 	 * observe it.
 	 *
-	 * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
-	 * not imply sync_sched(), so wait for both.
-	 *
 	 * Do sync before park smpboot threads to take care the rcu boost case.
 	 */
-	if (IS_ENABLED(CONFIG_PREEMPT))
-		synchronize_rcu_mult(call_rcu, call_rcu_sched);
-	else
-		synchronize_rcu();
+	synchronize_rcu_mult(call_rcu, call_rcu_sched);
 
 	if (!sched_smp_initialized)
 		return 0;
@@ -5933,7 +5686,6 @@ void __init sched_init_smp(void)
 	cpumask_var_t non_isolated_cpus;
 
 	alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
-	alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
 
 	sched_init_numa();
 
@@ -5943,7 +5695,7 @@ void __init sched_init_smp(void)
 	 * happen.
 	 */
 	mutex_lock(&sched_domains_mutex);
-	init_sched_domains(cpu_active_mask);
+	sched_init_domains(cpu_active_mask);
 	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
 	if (cpumask_empty(non_isolated_cpus))
 		cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
@@ -5959,7 +5711,6 @@ void __init sched_init_smp(void)
 	init_sched_dl_class();
 
 	sched_init_smt();
-	sched_clock_init_late();
 
 	sched_smp_initialized = true;
 }
@@ -5975,7 +5726,6 @@ early_initcall(migration_init);
 void __init sched_init_smp(void)
 {
 	sched_init_granularity();
-	sched_clock_init_late();
 }
 #endif /* CONFIG_SMP */
 
@@ -6001,28 +5751,13 @@ static struct kmem_cache *task_group_cache __read_mostly;
 DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
 DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
 
-#define WAIT_TABLE_BITS 8
-#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
-static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
-
-wait_queue_head_t *bit_waitqueue(void *word, int bit)
-{
-	const int shift = BITS_PER_LONG == 32 ? 5 : 6;
-	unsigned long val = (unsigned long)word << shift | bit;
-
-	return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
-}
-EXPORT_SYMBOL(bit_waitqueue);
-
 void __init sched_init(void)
 {
 	int i, j;
 	unsigned long alloc_size = 0, ptr;
 
 	sched_clock_init();
-
-	for (i = 0; i < WAIT_TABLE_SIZE; i++)
-		init_waitqueue_head(bit_wait_table + i);
+	wait_bit_init();
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	alloc_size += 2 * nr_cpu_ids * sizeof(void **);
@@ -6174,7 +5909,6 @@ void __init sched_init(void)
 	calc_load_update = jiffies + LOAD_FREQ;
 
 #ifdef CONFIG_SMP
-	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
 	/* May be allocated at isolcpus cmdline parse time */
 	if (cpu_isolated_map == NULL)
 		zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
@@ -6226,8 +5960,10 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
 
 	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
 	     !is_idle_task(current)) ||
-	    system_state != SYSTEM_RUNNING || oops_in_progress)
+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
+	    oops_in_progress)
 		return;
+
 	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
 		return;
 	prev_jiffy = jiffies;
@@ -6482,385 +6218,6 @@ void sched_move_task(struct task_struct *tsk)
 
 	task_rq_unlock(rq, tsk, &rf);
 }
-#endif /* CONFIG_CGROUP_SCHED */
-
-#ifdef CONFIG_RT_GROUP_SCHED
-/*
- * Ensure that the real time constraints are schedulable.
- */
-static DEFINE_MUTEX(rt_constraints_mutex);
-
-/* Must be called with tasklist_lock held */
-static inline int tg_has_rt_tasks(struct task_group *tg)
-{
-	struct task_struct *g, *p;
-
-	/*
-	 * Autogroups do not have RT tasks; see autogroup_create().
-	 */
-	if (task_group_is_autogroup(tg))
-		return 0;
-
-	for_each_process_thread(g, p) {
-		if (rt_task(p) && task_group(p) == tg)
-			return 1;
-	}
-
-	return 0;
-}
-
-struct rt_schedulable_data {
-	struct task_group *tg;
-	u64 rt_period;
-	u64 rt_runtime;
-};
-
-static int tg_rt_schedulable(struct task_group *tg, void *data)
-{
-	struct rt_schedulable_data *d = data;
-	struct task_group *child;
-	unsigned long total, sum = 0;
-	u64 period, runtime;
-
-	period = ktime_to_ns(tg->rt_bandwidth.rt_period);
-	runtime = tg->rt_bandwidth.rt_runtime;
-
-	if (tg == d->tg) {
-		period = d->rt_period;
-		runtime = d->rt_runtime;
-	}
-
-	/*
-	 * Cannot have more runtime than the period.
-	 */
-	if (runtime > period && runtime != RUNTIME_INF)
-		return -EINVAL;
-
-	/*
-	 * Ensure we don't starve existing RT tasks.
-	 */
-	if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
-		return -EBUSY;
-
-	total = to_ratio(period, runtime);
-
-	/*
-	 * Nobody can have more than the global setting allows.
-	 */
-	if (total > to_ratio(global_rt_period(), global_rt_runtime()))
-		return -EINVAL;
-
-	/*
-	 * The sum of our children's runtime should not exceed our own.
-	 */
-	list_for_each_entry_rcu(child, &tg->children, siblings) {
-		period = ktime_to_ns(child->rt_bandwidth.rt_period);
-		runtime = child->rt_bandwidth.rt_runtime;
-
-		if (child == d->tg) {
-			period = d->rt_period;
-			runtime = d->rt_runtime;
-		}
-
-		sum += to_ratio(period, runtime);
-	}
-
-	if (sum > total)
-		return -EINVAL;
-
-	return 0;
-}
-
-static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
-{
-	int ret;
-
-	struct rt_schedulable_data data = {
-		.tg = tg,
-		.rt_period = period,
-		.rt_runtime = runtime,
-	};
-
-	rcu_read_lock();
-	ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
-	rcu_read_unlock();
-
-	return ret;
-}
-
-static int tg_set_rt_bandwidth(struct task_group *tg,
-		u64 rt_period, u64 rt_runtime)
-{
-	int i, err = 0;
-
-	/*
-	 * Disallowing the root group RT runtime is BAD, it would disallow the
-	 * kernel creating (and or operating) RT threads.
-	 */
-	if (tg == &root_task_group && rt_runtime == 0)
-		return -EINVAL;
-
-	/* No period doesn't make any sense. */
-	if (rt_period == 0)
-		return -EINVAL;
-
-	mutex_lock(&rt_constraints_mutex);
-	read_lock(&tasklist_lock);
-	err = __rt_schedulable(tg, rt_period, rt_runtime);
-	if (err)
-		goto unlock;
-
-	raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
-	tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
-	tg->rt_bandwidth.rt_runtime = rt_runtime;
-
-	for_each_possible_cpu(i) {
-		struct rt_rq *rt_rq = tg->rt_rq[i];
-
-		raw_spin_lock(&rt_rq->rt_runtime_lock);
-		rt_rq->rt_runtime = rt_runtime;
-		raw_spin_unlock(&rt_rq->rt_runtime_lock);
-	}
-	raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
-unlock:
-	read_unlock(&tasklist_lock);
-	mutex_unlock(&rt_constraints_mutex);
-
-	return err;
-}
-
-static int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
-{
-	u64 rt_runtime, rt_period;
-
-	rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
-	rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
-	if (rt_runtime_us < 0)
-		rt_runtime = RUNTIME_INF;
-
-	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
-}
-
-static long sched_group_rt_runtime(struct task_group *tg)
-{
-	u64 rt_runtime_us;
-
-	if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
-		return -1;
-
-	rt_runtime_us = tg->rt_bandwidth.rt_runtime;
-	do_div(rt_runtime_us, NSEC_PER_USEC);
-	return rt_runtime_us;
-}
-
-static int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
-{
-	u64 rt_runtime, rt_period;
-
-	rt_period = rt_period_us * NSEC_PER_USEC;
-	rt_runtime = tg->rt_bandwidth.rt_runtime;
-
-	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
-}
-
-static long sched_group_rt_period(struct task_group *tg)
-{
-	u64 rt_period_us;
-
-	rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
-	do_div(rt_period_us, NSEC_PER_USEC);
-	return rt_period_us;
-}
-#endif /* CONFIG_RT_GROUP_SCHED */
-
-#ifdef CONFIG_RT_GROUP_SCHED
-static int sched_rt_global_constraints(void)
-{
-	int ret = 0;
-
-	mutex_lock(&rt_constraints_mutex);
-	read_lock(&tasklist_lock);
-	ret = __rt_schedulable(NULL, 0, 0);
-	read_unlock(&tasklist_lock);
-	mutex_unlock(&rt_constraints_mutex);
-
-	return ret;
-}
-
-static int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
-{
-	/* Don't accept realtime tasks when there is no way for them to run */
-	if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
-		return 0;
-
-	return 1;
-}
-
-#else /* !CONFIG_RT_GROUP_SCHED */
-static int sched_rt_global_constraints(void)
-{
-	unsigned long flags;
-	int i;
-
-	raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
-	for_each_possible_cpu(i) {
-		struct rt_rq *rt_rq = &cpu_rq(i)->rt;
-
-		raw_spin_lock(&rt_rq->rt_runtime_lock);
-		rt_rq->rt_runtime = global_rt_runtime();
-		raw_spin_unlock(&rt_rq->rt_runtime_lock);
-	}
-	raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
-
-	return 0;
-}
-#endif /* CONFIG_RT_GROUP_SCHED */
-
-static int sched_dl_global_validate(void)
-{
-	u64 runtime = global_rt_runtime();
-	u64 period = global_rt_period();
-	u64 new_bw = to_ratio(period, runtime);
-	struct dl_bw *dl_b;
-	int cpu, ret = 0;
-	unsigned long flags;
-
-	/*
-	 * Here we want to check the bandwidth not being set to some
-	 * value smaller than the currently allocated bandwidth in
-	 * any of the root_domains.
-	 *
-	 * FIXME: Cycling on all the CPUs is overdoing, but simpler than
-	 * cycling on root_domains... Discussion on different/better
-	 * solutions is welcome!
-	 */
-	for_each_possible_cpu(cpu) {
-		rcu_read_lock_sched();
-		dl_b = dl_bw_of(cpu);
-
-		raw_spin_lock_irqsave(&dl_b->lock, flags);
-		if (new_bw < dl_b->total_bw)
-			ret = -EBUSY;
-		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-
-		rcu_read_unlock_sched();
-
-		if (ret)
-			break;
-	}
-
-	return ret;
-}
-
-static void sched_dl_do_global(void)
-{
-	u64 new_bw = -1;
-	struct dl_bw *dl_b;
-	int cpu;
-	unsigned long flags;
-
-	def_dl_bandwidth.dl_period = global_rt_period();
-	def_dl_bandwidth.dl_runtime = global_rt_runtime();
-
-	if (global_rt_runtime() != RUNTIME_INF)
-		new_bw = to_ratio(global_rt_period(), global_rt_runtime());
-
-	/*
-	 * FIXME: As above...
-	 */
-	for_each_possible_cpu(cpu) {
-		rcu_read_lock_sched();
-		dl_b = dl_bw_of(cpu);
-
-		raw_spin_lock_irqsave(&dl_b->lock, flags);
-		dl_b->bw = new_bw;
-		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-
-		rcu_read_unlock_sched();
-	}
-}
-
-static int sched_rt_global_validate(void)
-{
-	if (sysctl_sched_rt_period <= 0)
-		return -EINVAL;
-
-	if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
-		(sysctl_sched_rt_runtime > sysctl_sched_rt_period))
-		return -EINVAL;
-
-	return 0;
-}
-
-static void sched_rt_do_global(void)
-{
-	def_rt_bandwidth.rt_runtime = global_rt_runtime();
-	def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
-}
-
-int sched_rt_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos)
-{
-	int old_period, old_runtime;
-	static DEFINE_MUTEX(mutex);
-	int ret;
-
-	mutex_lock(&mutex);
-	old_period = sysctl_sched_rt_period;
-	old_runtime = sysctl_sched_rt_runtime;
-
-	ret = proc_dointvec(table, write, buffer, lenp, ppos);
-
-	if (!ret && write) {
-		ret = sched_rt_global_validate();
-		if (ret)
-			goto undo;
-
-		ret = sched_dl_global_validate();
-		if (ret)
-			goto undo;
-
-		ret = sched_rt_global_constraints();
-		if (ret)
-			goto undo;
-
-		sched_rt_do_global();
-		sched_dl_do_global();
-	}
-	if (0) {
-undo:
-		sysctl_sched_rt_period = old_period;
-		sysctl_sched_rt_runtime = old_runtime;
-	}
-	mutex_unlock(&mutex);
-
-	return ret;
-}
-
-int sched_rr_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos)
-{
-	int ret;
-	static DEFINE_MUTEX(mutex);
-
-	mutex_lock(&mutex);
-	ret = proc_dointvec(table, write, buffer, lenp, ppos);
-	/*
-	 * Make sure that internally we keep jiffies.
-	 * Also, writing zero resets the timeslice to default:
-	 */
-	if (!ret && write) {
-		sched_rr_timeslice =
-			sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
-			msecs_to_jiffies(sysctl_sched_rr_timeslice);
-	}
-	mutex_unlock(&mutex);
-	return ret;
-}
-
-#ifdef CONFIG_CGROUP_SCHED
 
 static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
 {
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 76877a62b5fa..076a2e31951c 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -101,9 +101,6 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 	if (sg_policy->next_freq == next_freq)
 		return;
 
-	if (sg_policy->next_freq > next_freq)
-		next_freq = (sg_policy->next_freq + next_freq) >> 1;
-
 	sg_policy->next_freq = next_freq;
 	sg_policy->last_freq_update_time = time;
 
@@ -245,11 +242,10 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	sugov_update_commit(sg_policy, time, next_f);
 }
 
-static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
+static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
-	u64 last_freq_update_time = sg_policy->last_freq_update_time;
 	unsigned long util = 0, max = 1;
 	unsigned int j;
 
@@ -265,7 +261,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
 		 * enough, don't take the CPU into account as it probably is
 		 * idle now (and clear iowait_boost for it).
 		 */
-		delta_ns = last_freq_update_time - j_sg_cpu->last_update;
+		delta_ns = time - j_sg_cpu->last_update;
 		if (delta_ns > TICK_NSEC) {
 			j_sg_cpu->iowait_boost = 0;
 			continue;
@@ -309,7 +305,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
 		if (flags & SCHED_CPUFREQ_RT_DL)
 			next_f = sg_policy->policy->cpuinfo.max_freq;
 		else
-			next_f = sugov_next_freq_shared(sg_cpu);
+			next_f = sugov_next_freq_shared(sg_cpu, time);
 
 		sugov_update_commit(sg_policy, time, next_f);
 	}
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index aea3135c5d90..67c70e287647 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -615,19 +615,13 @@ static void cputime_adjust(struct task_cputime *curr,
 	 * userspace. Once a task gets some ticks, the monotonicy code at
 	 * 'update' will ensure things converge to the observed ratio.
 	 */
-	if (stime == 0) {
-		utime = rtime;
-		goto update;
+	if (stime != 0) {
+		if (utime == 0)
+			stime = rtime;
+		else
+			stime = scale_stime(stime, rtime, stime + utime);
 	}
 
-	if (utime == 0) {
-		stime = rtime;
-		goto update;
-	}
-
-	stime = scale_stime(stime, rtime, stime + utime);
-
-update:
 	/*
 	 * Make sure stime doesn't go backwards; this preserves monotonicity
 	 * for utime because rtime is monotonic.
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a2ce59015642..a84299f44b5d 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -17,6 +17,7 @@
 #include "sched.h"
 
 #include <linux/slab.h>
+#include <uapi/linux/sched/types.h>
 
 struct dl_bandwidth def_dl_bandwidth;
 
@@ -43,6 +44,254 @@ static inline int on_dl_rq(struct sched_dl_entity *dl_se)
 	return !RB_EMPTY_NODE(&dl_se->rb_node);
 }
 
+#ifdef CONFIG_SMP
+static inline struct dl_bw *dl_bw_of(int i)
+{
+	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+			 "sched RCU must be held");
+	return &cpu_rq(i)->rd->dl_bw;
+}
+
+static inline int dl_bw_cpus(int i)
+{
+	struct root_domain *rd = cpu_rq(i)->rd;
+	int cpus = 0;
+
+	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+			 "sched RCU must be held");
+	for_each_cpu_and(i, rd->span, cpu_active_mask)
+		cpus++;
+
+	return cpus;
+}
+#else
+static inline struct dl_bw *dl_bw_of(int i)
+{
+	return &cpu_rq(i)->dl.dl_bw;
+}
+
+static inline int dl_bw_cpus(int i)
+{
+	return 1;
+}
+#endif
+
+static inline
+void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
+{
+	u64 old = dl_rq->running_bw;
+
+	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+	dl_rq->running_bw += dl_bw;
+	SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
+	SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
+}
+
+static inline
+void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
+{
+	u64 old = dl_rq->running_bw;
+
+	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+	dl_rq->running_bw -= dl_bw;
+	SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */
+	if (dl_rq->running_bw > old)
+		dl_rq->running_bw = 0;
+}
+
+static inline
+void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
+{
+	u64 old = dl_rq->this_bw;
+
+	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+	dl_rq->this_bw += dl_bw;
+	SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */
+}
+
+static inline
+void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
+{
+	u64 old = dl_rq->this_bw;
+
+	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+	dl_rq->this_bw -= dl_bw;
+	SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */
+	if (dl_rq->this_bw > old)
+		dl_rq->this_bw = 0;
+	SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
+}
+
+void dl_change_utilization(struct task_struct *p, u64 new_bw)
+{
+	struct rq *rq;
+
+	if (task_on_rq_queued(p))
+		return;
+
+	rq = task_rq(p);
+	if (p->dl.dl_non_contending) {
+		sub_running_bw(p->dl.dl_bw, &rq->dl);
+		p->dl.dl_non_contending = 0;
+		/*
+		 * If the timer handler is currently running and the
+		 * timer cannot be cancelled, inactive_task_timer()
+		 * will see that dl_not_contending is not set, and
+		 * will not touch the rq's active utilization,
+		 * so we are still safe.
+		 */
+		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
+			put_task_struct(p);
+	}
+	sub_rq_bw(p->dl.dl_bw, &rq->dl);
+	add_rq_bw(new_bw, &rq->dl);
+}
+
+/*
+ * The utilization of a task cannot be immediately removed from
+ * the rq active utilization (running_bw) when the task blocks.
+ * Instead, we have to wait for the so called "0-lag time".
+ *
+ * If a task blocks before the "0-lag time", a timer (the inactive
+ * timer) is armed, and running_bw is decreased when the timer
+ * fires.
+ *
+ * If the task wakes up again before the inactive timer fires,
+ * the timer is cancelled, whereas if the task wakes up after the
+ * inactive timer fired (and running_bw has been decreased) the
+ * task's utilization has to be added to running_bw again.
+ * A flag in the deadline scheduling entity (dl_non_contending)
+ * is used to avoid race conditions between the inactive timer handler
+ * and task wakeups.
+ *
+ * The following diagram shows how running_bw is updated. A task is
+ * "ACTIVE" when its utilization contributes to running_bw; an
+ * "ACTIVE contending" task is in the TASK_RUNNING state, while an
+ * "ACTIVE non contending" task is a blocked task for which the "0-lag time"
+ * has not passed yet. An "INACTIVE" task is a task for which the "0-lag"
+ * time already passed, which does not contribute to running_bw anymore.
+ *                              +------------------+
+ *             wakeup           |    ACTIVE        |
+ *          +------------------>+   contending     |
+ *          | add_running_bw    |                  |
+ *          |                   +----+------+------+
+ *          |                        |      ^
+ *          |                dequeue |      |
+ * +--------+-------+                |      |
+ * |                |   t >= 0-lag   |      | wakeup
+ * |    INACTIVE    |<---------------+      |
+ * |                | sub_running_bw |      |
+ * +--------+-------+                |      |
+ *          ^                        |      |
+ *          |              t < 0-lag |      |
+ *          |                        |      |
+ *          |                        V      |
+ *          |                   +----+------+------+
+ *          | sub_running_bw    |    ACTIVE        |
+ *          +-------------------+                  |
+ *            inactive timer    |  non contending  |
+ *            fired             +------------------+
+ *
+ * The task_non_contending() function is invoked when a task
+ * blocks, and checks if the 0-lag time already passed or
+ * not (in the first case, it directly updates running_bw;
+ * in the second case, it arms the inactive timer).
+ *
+ * The task_contending() function is invoked when a task wakes
+ * up, and checks if the task is still in the "ACTIVE non contending"
+ * state or not (in the second case, it updates running_bw).
+ */
+static void task_non_contending(struct task_struct *p)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+	struct hrtimer *timer = &dl_se->inactive_timer;
+	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+	struct rq *rq = rq_of_dl_rq(dl_rq);
+	s64 zerolag_time;
+
+	/*
+	 * If this is a non-deadline task that has been boosted,
+	 * do nothing
+	 */
+	if (dl_se->dl_runtime == 0)
+		return;
+
+	WARN_ON(hrtimer_active(&dl_se->inactive_timer));
+	WARN_ON(dl_se->dl_non_contending);
+
+	zerolag_time = dl_se->deadline -
+		 div64_long((dl_se->runtime * dl_se->dl_period),
+			dl_se->dl_runtime);
+
+	/*
+	 * Using relative times instead of the absolute "0-lag time"
+	 * allows to simplify the code
+	 */
+	zerolag_time -= rq_clock(rq);
+
+	/*
+	 * If the "0-lag time" already passed, decrease the active
+	 * utilization now, instead of starting a timer
+	 */
+	if (zerolag_time < 0) {
+		if (dl_task(p))
+			sub_running_bw(dl_se->dl_bw, dl_rq);
+		if (!dl_task(p) || p->state == TASK_DEAD) {
+			struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
+
+			if (p->state == TASK_DEAD)
+				sub_rq_bw(p->dl.dl_bw, &rq->dl);
+			raw_spin_lock(&dl_b->lock);
+			__dl_clear(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
+			__dl_clear_params(p);
+			raw_spin_unlock(&dl_b->lock);
+		}
+
+		return;
+	}
+
+	dl_se->dl_non_contending = 1;
+	get_task_struct(p);
+	hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL);
+}
+
+static void task_contending(struct sched_dl_entity *dl_se, int flags)
+{
+	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+
+	/*
+	 * If this is a non-deadline task that has been boosted,
+	 * do nothing
+	 */
+	if (dl_se->dl_runtime == 0)
+		return;
+
+	if (flags & ENQUEUE_MIGRATED)
+		add_rq_bw(dl_se->dl_bw, dl_rq);
+
+	if (dl_se->dl_non_contending) {
+		dl_se->dl_non_contending = 0;
+		/*
+		 * If the timer handler is currently running and the
+		 * timer cannot be cancelled, inactive_task_timer()
+		 * will see that dl_not_contending is not set, and
+		 * will not touch the rq's active utilization,
+		 * so we are still safe.
+		 */
+		if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
+			put_task_struct(dl_task_of(dl_se));
+	} else {
+		/*
+		 * Since "dl_non_contending" is not set, the
+		 * task's utilization has already been removed from
+		 * active utilization (either when the task blocked,
+		 * when the "inactive timer" fired).
+		 * So, add it back.
+		 */
+		add_running_bw(dl_se->dl_bw, dl_rq);
+	}
+}
+
 static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
 {
 	struct sched_dl_entity *dl_se = &p->dl;
@@ -83,6 +332,10 @@ void init_dl_rq(struct dl_rq *dl_rq)
 #else
 	init_dl_bw(&dl_rq->dl_bw);
 #endif
+
+	dl_rq->running_bw = 0;
+	dl_rq->this_bw = 0;
+	init_dl_rq_bw_ratio(dl_rq);
 }
 
 #ifdef CONFIG_SMP
@@ -484,13 +737,84 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
 }
 
 /*
- * When a -deadline entity is queued back on the runqueue, its runtime and
- * deadline might need updating.
+ * Revised wakeup rule [1]: For self-suspending tasks, rather then
+ * re-initializing task's runtime and deadline, the revised wakeup
+ * rule adjusts the task's runtime to avoid the task to overrun its
+ * density.
+ *
+ * Reasoning: a task may overrun the density if:
+ *    runtime / (deadline - t) > dl_runtime / dl_deadline
+ *
+ * Therefore, runtime can be adjusted to:
+ *     runtime = (dl_runtime / dl_deadline) * (deadline - t)
+ *
+ * In such way that runtime will be equal to the maximum density
+ * the task can use without breaking any rule.
+ *
+ * [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant
+ * bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24.
+ */
+static void
+update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq)
+{
+	u64 laxity = dl_se->deadline - rq_clock(rq);
+
+	/*
+	 * If the task has deadline < period, and the deadline is in the past,
+	 * it should already be throttled before this check.
+	 *
+	 * See update_dl_entity() comments for further details.
+	 */
+	WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq)));
+
+	dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT;
+}
+
+/*
+ * Regarding the deadline, a task with implicit deadline has a relative
+ * deadline == relative period. A task with constrained deadline has a
+ * relative deadline <= relative period.
+ *
+ * We support constrained deadline tasks. However, there are some restrictions
+ * applied only for tasks which do not have an implicit deadline. See
+ * update_dl_entity() to know more about such restrictions.
+ *
+ * The dl_is_implicit() returns true if the task has an implicit deadline.
+ */
+static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
+{
+	return dl_se->dl_deadline == dl_se->dl_period;
+}
+
+/*
+ * When a deadline entity is placed in the runqueue, its runtime and deadline
+ * might need to be updated. This is done by a CBS wake up rule. There are two
+ * different rules: 1) the original CBS; and 2) the Revisited CBS.
+ *
+ * When the task is starting a new period, the Original CBS is used. In this
+ * case, the runtime is replenished and a new absolute deadline is set.
+ *
+ * When a task is queued before the begin of the next period, using the
+ * remaining runtime and deadline could make the entity to overflow, see
+ * dl_entity_overflow() to find more about runtime overflow. When such case
+ * is detected, the runtime and deadline need to be updated.
+ *
+ * If the task has an implicit deadline, i.e., deadline == period, the Original
+ * CBS is applied. the runtime is replenished and a new absolute deadline is
+ * set, as in the previous cases.
+ *
+ * However, the Original CBS does not work properly for tasks with
+ * deadline < period, which are said to have a constrained deadline. By
+ * applying the Original CBS, a constrained deadline task would be able to run
+ * runtime/deadline in a period. With deadline < period, the task would
+ * overrun the runtime/period allowed bandwidth, breaking the admission test.
  *
- * The policy here is that we update the deadline of the entity only if:
- *  - the current deadline is in the past,
- *  - using the remaining runtime with the current deadline would make
- *    the entity exceed its bandwidth.
+ * In order to prevent this misbehave, the Revisited CBS is used for
+ * constrained deadline tasks when a runtime overflow is detected. In the
+ * Revisited CBS, rather than replenishing & setting a new absolute deadline,
+ * the remaining runtime of the task is reduced to avoid runtime overflow.
+ * Please refer to the comments update_dl_revised_wakeup() function to find
+ * more about the Revised CBS rule.
  */
 static void update_dl_entity(struct sched_dl_entity *dl_se,
 			     struct sched_dl_entity *pi_se)
@@ -500,6 +824,14 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
 
 	if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
 	    dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
+
+		if (unlikely(!dl_is_implicit(dl_se) &&
+			     !dl_time_before(dl_se->deadline, rq_clock(rq)) &&
+			     !dl_se->dl_boosted)){
+			update_dl_revised_wakeup(dl_se, rq);
+			return;
+		}
+
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
 		dl_se->runtime = pi_se->dl_runtime;
 	}
@@ -593,10 +925,8 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 	 * The task might have changed its scheduling policy to something
 	 * different than SCHED_DEADLINE (through switched_from_dl()).
 	 */
-	if (!dl_task(p)) {
-		__dl_clear_params(p);
+	if (!dl_task(p))
 		goto unlock;
-	}
 
 	/*
 	 * The task might have been boosted by someone else and might be in the
@@ -723,6 +1053,8 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
 		if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
 			return;
 		dl_se->dl_throttled = 1;
+		if (dl_se->runtime > 0)
+			dl_se->runtime = 0;
 	}
 }
 
@@ -735,6 +1067,47 @@ int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
 extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
 
 /*
+ * This function implements the GRUB accounting rule:
+ * according to the GRUB reclaiming algorithm, the runtime is
+ * not decreased as "dq = -dt", but as
+ * "dq = -max{u / Umax, (1 - Uinact - Uextra)} dt",
+ * where u is the utilization of the task, Umax is the maximum reclaimable
+ * utilization, Uinact is the (per-runqueue) inactive utilization, computed
+ * as the difference between the "total runqueue utilization" and the
+ * runqueue active utilization, and Uextra is the (per runqueue) extra
+ * reclaimable utilization.
+ * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations
+ * multiplied by 2^BW_SHIFT, the result has to be shifted right by
+ * BW_SHIFT.
+ * Since rq->dl.bw_ratio contains 1 / Umax multipled by 2^RATIO_SHIFT,
+ * dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
+ * Since delta is a 64 bit variable, to have an overflow its value
+ * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
+ * So, overflow is not an issue here.
+ */
+u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
+{
+	u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
+	u64 u_act;
+	u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> RATIO_SHIFT;
+
+	/*
+	 * Instead of computing max{u * bw_ratio, (1 - u_inact - u_extra)},
+	 * we compare u_inact + rq->dl.extra_bw with
+	 * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because
+	 * u_inact + rq->dl.extra_bw can be larger than
+	 * 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative
+	 * leading to wrong results)
+	 */
+	if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min)
+		u_act = u_act_min;
+	else
+		u_act = BW_UNIT - u_inact - rq->dl.extra_bw;
+
+	return (delta * u_act) >> BW_SHIFT;
+}
+
+/*
  * Update the current task's runtime statistics (provided it is still
  * a -deadline task and has not been removed from the dl_rq).
  */
@@ -776,6 +1149,8 @@ static void update_curr_dl(struct rq *rq)
 
 	sched_rt_avg_update(rq, delta_exec);
 
+	if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM))
+		delta_exec = grub_reclaim(delta_exec, rq, &curr->dl);
 	dl_se->runtime -= delta_exec;
 
 throttle:
@@ -815,6 +1190,56 @@ throttle:
 	}
 }
 
+static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
+{
+	struct sched_dl_entity *dl_se = container_of(timer,
+						     struct sched_dl_entity,
+						     inactive_timer);
+	struct task_struct *p = dl_task_of(dl_se);
+	struct rq_flags rf;
+	struct rq *rq;
+
+	rq = task_rq_lock(p, &rf);
+
+	if (!dl_task(p) || p->state == TASK_DEAD) {
+		struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
+
+		if (p->state == TASK_DEAD && dl_se->dl_non_contending) {
+			sub_running_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl));
+			sub_rq_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl));
+			dl_se->dl_non_contending = 0;
+		}
+
+		raw_spin_lock(&dl_b->lock);
+		__dl_clear(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
+		raw_spin_unlock(&dl_b->lock);
+		__dl_clear_params(p);
+
+		goto unlock;
+	}
+	if (dl_se->dl_non_contending == 0)
+		goto unlock;
+
+	sched_clock_tick();
+	update_rq_clock(rq);
+
+	sub_running_bw(dl_se->dl_bw, &rq->dl);
+	dl_se->dl_non_contending = 0;
+unlock:
+	task_rq_unlock(rq, p, &rf);
+	put_task_struct(p);
+
+	return HRTIMER_NORESTART;
+}
+
+void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
+{
+	struct hrtimer *timer = &dl_se->inactive_timer;
+
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	timer->function = inactive_task_timer;
+}
+
 #ifdef CONFIG_SMP
 
 static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
@@ -946,10 +1371,12 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
 	 * parameters of the task might need updating. Otherwise,
 	 * we want a replenishment of its runtime.
 	 */
-	if (flags & ENQUEUE_WAKEUP)
+	if (flags & ENQUEUE_WAKEUP) {
+		task_contending(dl_se, flags);
 		update_dl_entity(dl_se, pi_se);
-	else if (flags & ENQUEUE_REPLENISH)
+	} else if (flags & ENQUEUE_REPLENISH) {
 		replenish_dl_entity(dl_se, pi_se);
+	}
 
 	__enqueue_dl_entity(dl_se);
 }
@@ -959,11 +1386,6 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
 	__dequeue_dl_entity(dl_se);
 }
 
-static inline bool dl_is_constrained(struct sched_dl_entity *dl_se)
-{
-	return dl_se->dl_deadline < dl_se->dl_period;
-}
-
 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct task_struct *pi_task = rt_mutex_get_top_task(p);
@@ -995,17 +1417,32 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 	 * If that is the case, the task will be throttled and
 	 * the replenishment timer will be set to the next period.
 	 */
-	if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
+	if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
 		dl_check_constrained_dl(&p->dl);
 
+	if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
+		add_rq_bw(p->dl.dl_bw, &rq->dl);
+		add_running_bw(p->dl.dl_bw, &rq->dl);
+	}
+
 	/*
-	 * If p is throttled, we do nothing. In fact, if it exhausted
+	 * If p is throttled, we do not enqueue it. In fact, if it exhausted
 	 * its budget it needs a replenishment and, since it now is on
 	 * its rq, the bandwidth timer callback (which clearly has not
 	 * run yet) will take care of this.
+	 * However, the active utilization does not depend on the fact
+	 * that the task is on the runqueue or not (but depends on the
+	 * task's state - in GRUB parlance, "inactive" vs "active contending").
+	 * In other words, even if a task is throttled its utilization must
+	 * be counted in the active utilization; hence, we need to call
+	 * add_running_bw().
 	 */
-	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH))
+	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
+		if (flags & ENQUEUE_WAKEUP)
+			task_contending(&p->dl, flags);
+
 		return;
+	}
 
 	enqueue_dl_entity(&p->dl, pi_se, flags);
 
@@ -1023,6 +1460,23 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
 	update_curr_dl(rq);
 	__dequeue_task_dl(rq, p, flags);
+
+	if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
+		sub_running_bw(p->dl.dl_bw, &rq->dl);
+		sub_rq_bw(p->dl.dl_bw, &rq->dl);
+	}
+
+	/*
+	 * This check allows to start the inactive timer (or to immediately
+	 * decrease the active utilization, if needed) in two cases:
+	 * when the task blocks and when it is terminating
+	 * (p->state == TASK_DEAD). We can handle the two cases in the same
+	 * way, because from GRUB's point of view the same thing is happening
+	 * (the task moves from "active contending" to "active non contending"
+	 * or "inactive")
+	 */
+	if (flags & DEQUEUE_SLEEP)
+		task_non_contending(p);
 }
 
 /*
@@ -1100,6 +1554,37 @@ out:
 	return cpu;
 }
 
+static void migrate_task_rq_dl(struct task_struct *p)
+{
+	struct rq *rq;
+
+	if (p->state != TASK_WAKING)
+		return;
+
+	rq = task_rq(p);
+	/*
+	 * Since p->state == TASK_WAKING, set_task_cpu() has been called
+	 * from try_to_wake_up(). Hence, p->pi_lock is locked, but
+	 * rq->lock is not... So, lock it
+	 */
+	raw_spin_lock(&rq->lock);
+	if (p->dl.dl_non_contending) {
+		sub_running_bw(p->dl.dl_bw, &rq->dl);
+		p->dl.dl_non_contending = 0;
+		/*
+		 * If the timer handler is currently running and the
+		 * timer cannot be cancelled, inactive_task_timer()
+		 * will see that dl_not_contending is not set, and
+		 * will not touch the rq's active utilization,
+		 * so we are still safe.
+		 */
+		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
+			put_task_struct(p);
+	}
+	sub_rq_bw(p->dl.dl_bw, &rq->dl);
+	raw_spin_unlock(&rq->lock);
+}
+
 static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 {
 	/*
@@ -1255,19 +1740,6 @@ static void task_fork_dl(struct task_struct *p)
 	 */
 }
 
-static void task_dead_dl(struct task_struct *p)
-{
-	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
-
-	/*
-	 * Since we are TASK_DEAD we won't slip out of the domain!
-	 */
-	raw_spin_lock_irq(&dl_b->lock);
-	/* XXX we should retain the bw until 0-lag */
-	dl_b->total_bw -= p->dl.dl_bw;
-	raw_spin_unlock_irq(&dl_b->lock);
-}
-
 static void set_curr_task_dl(struct rq *rq)
 {
 	struct task_struct *p = rq->curr;
@@ -1533,7 +2005,7 @@ retry:
 		 * then possible that next_task has migrated.
 		 */
 		task = pick_next_pushable_dl_task(rq);
-		if (task_cpu(next_task) == rq->cpu && task == next_task) {
+		if (task == next_task) {
 			/*
 			 * The task is still there. We don't try
 			 * again, some other cpu will pull it when ready.
@@ -1551,7 +2023,11 @@ retry:
 	}
 
 	deactivate_task(rq, next_task, 0);
+	sub_running_bw(next_task->dl.dl_bw, &rq->dl);
+	sub_rq_bw(next_task->dl.dl_bw, &rq->dl);
 	set_task_cpu(next_task, later_rq->cpu);
+	add_rq_bw(next_task->dl.dl_bw, &later_rq->dl);
+	add_running_bw(next_task->dl.dl_bw, &later_rq->dl);
 	activate_task(later_rq, next_task, 0);
 	ret = 1;
 
@@ -1639,7 +2115,11 @@ static void pull_dl_task(struct rq *this_rq)
 			resched = true;
 
 			deactivate_task(src_rq, p, 0);
+			sub_running_bw(p->dl.dl_bw, &src_rq->dl);
+			sub_rq_bw(p->dl.dl_bw, &src_rq->dl);
 			set_task_cpu(p, this_cpu);
+			add_rq_bw(p->dl.dl_bw, &this_rq->dl);
+			add_running_bw(p->dl.dl_bw, &this_rq->dl);
 			activate_task(this_rq, p, 0);
 			dmin = p->dl.deadline;
 
@@ -1695,7 +2175,7 @@ static void set_cpus_allowed_dl(struct task_struct *p,
 		 * until we complete the update.
 		 */
 		raw_spin_lock(&src_dl_b->lock);
-		__dl_clear(src_dl_b, p->dl.dl_bw);
+		__dl_clear(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
 		raw_spin_unlock(&src_dl_b->lock);
 	}
 
@@ -1737,13 +2217,26 @@ void __init init_sched_dl_class(void)
 static void switched_from_dl(struct rq *rq, struct task_struct *p)
 {
 	/*
-	 * Start the deadline timer; if we switch back to dl before this we'll
-	 * continue consuming our current CBS slice. If we stay outside of
-	 * SCHED_DEADLINE until the deadline passes, the timer will reset the
-	 * task.
+	 * task_non_contending() can start the "inactive timer" (if the 0-lag
+	 * time is in the future). If the task switches back to dl before
+	 * the "inactive timer" fires, it can continue to consume its current
+	 * runtime using its current deadline. If it stays outside of
+	 * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
+	 * will reset the task parameters.
 	 */
-	if (!start_dl_timer(p))
-		__dl_clear_params(p);
+	if (task_on_rq_queued(p) && p->dl.dl_runtime)
+		task_non_contending(p);
+
+	if (!task_on_rq_queued(p))
+		sub_rq_bw(p->dl.dl_bw, &rq->dl);
+
+	/*
+	 * We cannot use inactive_task_timer() to invoke sub_running_bw()
+	 * at the 0-lag time, because the task could have been migrated
+	 * while SCHED_OTHER in the meanwhile.
+	 */
+	if (p->dl.dl_non_contending)
+		p->dl.dl_non_contending = 0;
 
 	/*
 	 * Since this might be the only -deadline task on the rq,
@@ -1762,11 +2255,15 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
  */
 static void switched_to_dl(struct rq *rq, struct task_struct *p)
 {
+	if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
+		put_task_struct(p);
 
 	/* If p is not queued we will update its parameters at next wakeup. */
-	if (!task_on_rq_queued(p))
-		return;
+	if (!task_on_rq_queued(p)) {
+		add_rq_bw(p->dl.dl_bw, &rq->dl);
 
+		return;
+	}
 	/*
 	 * If p is boosted we already updated its params in
 	 * rt_mutex_setprio()->enqueue_task(..., ENQUEUE_REPLENISH),
@@ -1836,6 +2333,7 @@ const struct sched_class dl_sched_class = {
 
 #ifdef CONFIG_SMP
 	.select_task_rq		= select_task_rq_dl,
+	.migrate_task_rq	= migrate_task_rq_dl,
 	.set_cpus_allowed       = set_cpus_allowed_dl,
 	.rq_online              = rq_online_dl,
 	.rq_offline             = rq_offline_dl,
@@ -1845,7 +2343,6 @@ const struct sched_class dl_sched_class = {
 	.set_curr_task		= set_curr_task_dl,
 	.task_tick		= task_tick_dl,
 	.task_fork              = task_fork_dl,
-	.task_dead		= task_dead_dl,
 
 	.prio_changed           = prio_changed_dl,
 	.switched_from		= switched_from_dl,
@@ -1854,6 +2351,317 @@ const struct sched_class dl_sched_class = {
 	.update_curr		= update_curr_dl,
 };
 
+int sched_dl_global_validate(void)
+{
+	u64 runtime = global_rt_runtime();
+	u64 period = global_rt_period();
+	u64 new_bw = to_ratio(period, runtime);
+	struct dl_bw *dl_b;
+	int cpu, ret = 0;
+	unsigned long flags;
+
+	/*
+	 * Here we want to check the bandwidth not being set to some
+	 * value smaller than the currently allocated bandwidth in
+	 * any of the root_domains.
+	 *
+	 * FIXME: Cycling on all the CPUs is overdoing, but simpler than
+	 * cycling on root_domains... Discussion on different/better
+	 * solutions is welcome!
+	 */
+	for_each_possible_cpu(cpu) {
+		rcu_read_lock_sched();
+		dl_b = dl_bw_of(cpu);
+
+		raw_spin_lock_irqsave(&dl_b->lock, flags);
+		if (new_bw < dl_b->total_bw)
+			ret = -EBUSY;
+		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+
+		rcu_read_unlock_sched();
+
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
+{
+	if (global_rt_runtime() == RUNTIME_INF) {
+		dl_rq->bw_ratio = 1 << RATIO_SHIFT;
+		dl_rq->extra_bw = 1 << BW_SHIFT;
+	} else {
+		dl_rq->bw_ratio = to_ratio(global_rt_runtime(),
+			  global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
+		dl_rq->extra_bw = to_ratio(global_rt_period(),
+						    global_rt_runtime());
+	}
+}
+
+void sched_dl_do_global(void)
+{
+	u64 new_bw = -1;
+	struct dl_bw *dl_b;
+	int cpu;
+	unsigned long flags;
+
+	def_dl_bandwidth.dl_period = global_rt_period();
+	def_dl_bandwidth.dl_runtime = global_rt_runtime();
+
+	if (global_rt_runtime() != RUNTIME_INF)
+		new_bw = to_ratio(global_rt_period(), global_rt_runtime());
+
+	/*
+	 * FIXME: As above...
+	 */
+	for_each_possible_cpu(cpu) {
+		rcu_read_lock_sched();
+		dl_b = dl_bw_of(cpu);
+
+		raw_spin_lock_irqsave(&dl_b->lock, flags);
+		dl_b->bw = new_bw;
+		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+
+		rcu_read_unlock_sched();
+		init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
+	}
+}
+
+/*
+ * We must be sure that accepting a new task (or allowing changing the
+ * parameters of an existing one) is consistent with the bandwidth
+ * constraints. If yes, this function also accordingly updates the currently
+ * allocated bandwidth to reflect the new situation.
+ *
+ * This function is called while holding p's rq->lock.
+ */
+int sched_dl_overflow(struct task_struct *p, int policy,
+		      const struct sched_attr *attr)
+{
+	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
+	u64 period = attr->sched_period ?: attr->sched_deadline;
+	u64 runtime = attr->sched_runtime;
+	u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
+	int cpus, err = -1;
+
+	/* !deadline task may carry old deadline bandwidth */
+	if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
+		return 0;
+
+	/*
+	 * Either if a task, enters, leave, or stays -deadline but changes
+	 * its parameters, we may need to update accordingly the total
+	 * allocated bandwidth of the container.
+	 */
+	raw_spin_lock(&dl_b->lock);
+	cpus = dl_bw_cpus(task_cpu(p));
+	if (dl_policy(policy) && !task_has_dl_policy(p) &&
+	    !__dl_overflow(dl_b, cpus, 0, new_bw)) {
+		if (hrtimer_active(&p->dl.inactive_timer))
+			__dl_clear(dl_b, p->dl.dl_bw, cpus);
+		__dl_add(dl_b, new_bw, cpus);
+		err = 0;
+	} else if (dl_policy(policy) && task_has_dl_policy(p) &&
+		   !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
+		/*
+		 * XXX this is slightly incorrect: when the task
+		 * utilization decreases, we should delay the total
+		 * utilization change until the task's 0-lag point.
+		 * But this would require to set the task's "inactive
+		 * timer" when the task is not inactive.
+		 */
+		__dl_clear(dl_b, p->dl.dl_bw, cpus);
+		__dl_add(dl_b, new_bw, cpus);
+		dl_change_utilization(p, new_bw);
+		err = 0;
+	} else if (!dl_policy(policy) && task_has_dl_policy(p)) {
+		/*
+		 * Do not decrease the total deadline utilization here,
+		 * switched_from_dl() will take care to do it at the correct
+		 * (0-lag) time.
+		 */
+		err = 0;
+	}
+	raw_spin_unlock(&dl_b->lock);
+
+	return err;
+}
+
+/*
+ * This function initializes the sched_dl_entity of a newly becoming
+ * SCHED_DEADLINE task.
+ *
+ * Only the static values are considered here, the actual runtime and the
+ * absolute deadline will be properly calculated when the task is enqueued
+ * for the first time with its new policy.
+ */
+void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+
+	dl_se->dl_runtime = attr->sched_runtime;
+	dl_se->dl_deadline = attr->sched_deadline;
+	dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
+	dl_se->flags = attr->sched_flags;
+	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+	dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
+}
+
+void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+
+	attr->sched_priority = p->rt_priority;
+	attr->sched_runtime = dl_se->dl_runtime;
+	attr->sched_deadline = dl_se->dl_deadline;
+	attr->sched_period = dl_se->dl_period;
+	attr->sched_flags = dl_se->flags;
+}
+
+/*
+ * This function validates the new parameters of a -deadline task.
+ * We ask for the deadline not being zero, and greater or equal
+ * than the runtime, as well as the period of being zero or
+ * greater than deadline. Furthermore, we have to be sure that
+ * user parameters are above the internal resolution of 1us (we
+ * check sched_runtime only since it is always the smaller one) and
+ * below 2^63 ns (we have to check both sched_deadline and
+ * sched_period, as the latter can be zero).
+ */
+bool __checkparam_dl(const struct sched_attr *attr)
+{
+	/* deadline != 0 */
+	if (attr->sched_deadline == 0)
+		return false;
+
+	/*
+	 * Since we truncate DL_SCALE bits, make sure we're at least
+	 * that big.
+	 */
+	if (attr->sched_runtime < (1ULL << DL_SCALE))
+		return false;
+
+	/*
+	 * Since we use the MSB for wrap-around and sign issues, make
+	 * sure it's not set (mind that period can be equal to zero).
+	 */
+	if (attr->sched_deadline & (1ULL << 63) ||
+	    attr->sched_period & (1ULL << 63))
+		return false;
+
+	/* runtime <= deadline <= period (if period != 0) */
+	if ((attr->sched_period != 0 &&
+	     attr->sched_period < attr->sched_deadline) ||
+	    attr->sched_deadline < attr->sched_runtime)
+		return false;
+
+	return true;
+}
+
+/*
+ * This function clears the sched_dl_entity static params.
+ */
+void __dl_clear_params(struct task_struct *p)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+
+	dl_se->dl_runtime = 0;
+	dl_se->dl_deadline = 0;
+	dl_se->dl_period = 0;
+	dl_se->flags = 0;
+	dl_se->dl_bw = 0;
+	dl_se->dl_density = 0;
+
+	dl_se->dl_throttled = 0;
+	dl_se->dl_yielded = 0;
+	dl_se->dl_non_contending = 0;
+}
+
+bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+
+	if (dl_se->dl_runtime != attr->sched_runtime ||
+	    dl_se->dl_deadline != attr->sched_deadline ||
+	    dl_se->dl_period != attr->sched_period ||
+	    dl_se->flags != attr->sched_flags)
+		return true;
+
+	return false;
+}
+
+#ifdef CONFIG_SMP
+int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
+{
+	unsigned int dest_cpu = cpumask_any_and(cpu_active_mask,
+							cs_cpus_allowed);
+	struct dl_bw *dl_b;
+	bool overflow;
+	int cpus, ret;
+	unsigned long flags;
+
+	rcu_read_lock_sched();
+	dl_b = dl_bw_of(dest_cpu);
+	raw_spin_lock_irqsave(&dl_b->lock, flags);
+	cpus = dl_bw_cpus(dest_cpu);
+	overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
+	if (overflow)
+		ret = -EBUSY;
+	else {
+		/*
+		 * We reserve space for this task in the destination
+		 * root_domain, as we can't fail after this point.
+		 * We will free resources in the source root_domain
+		 * later on (see set_cpus_allowed_dl()).
+		 */
+		__dl_add(dl_b, p->dl.dl_bw, cpus);
+		ret = 0;
+	}
+	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+	rcu_read_unlock_sched();
+	return ret;
+}
+
+int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
+				 const struct cpumask *trial)
+{
+	int ret = 1, trial_cpus;
+	struct dl_bw *cur_dl_b;
+	unsigned long flags;
+
+	rcu_read_lock_sched();
+	cur_dl_b = dl_bw_of(cpumask_any(cur));
+	trial_cpus = cpumask_weight(trial);
+
+	raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
+	if (cur_dl_b->bw != -1 &&
+	    cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
+		ret = 0;
+	raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
+	rcu_read_unlock_sched();
+	return ret;
+}
+
+bool dl_cpu_busy(unsigned int cpu)
+{
+	unsigned long flags;
+	struct dl_bw *dl_b;
+	bool overflow;
+	int cpus;
+
+	rcu_read_lock_sched();
+	dl_b = dl_bw_of(cpu);
+	raw_spin_lock_irqsave(&dl_b->lock, flags);
+	cpus = dl_bw_cpus(cpu);
+	overflow = __dl_overflow(dl_b, cpus, 0, 0);
+	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+	rcu_read_unlock_sched();
+	return overflow;
+}
+#endif
+
 #ifdef CONFIG_SCHED_DEBUG
 extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
 
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 38f019324f1a..4fa66de52bd6 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -552,15 +552,21 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 
 #define P(x) \
 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
+#define PU(x) \
+	SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
 #define PN(x) \
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
 
-	P(rt_nr_running);
+	PU(rt_nr_running);
+#ifdef CONFIG_SMP
+	PU(rt_nr_migratory);
+#endif
 	P(rt_throttled);
 	PN(rt_time);
 	PN(rt_runtime);
 
 #undef PN
+#undef PU
 #undef P
 }
 
@@ -569,14 +575,21 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
 	struct dl_bw *dl_bw;
 
 	SEQ_printf(m, "\ndl_rq[%d]:\n", cpu);
-	SEQ_printf(m, "  .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running);
+
+#define PU(x) \
+	SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
+
+	PU(dl_nr_running);
 #ifdef CONFIG_SMP
+	PU(dl_nr_migratory);
 	dl_bw = &cpu_rq(cpu)->rd->dl_bw;
 #else
 	dl_bw = &dl_rq->dl_bw;
 #endif
 	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
 	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
+
+#undef PU
 }
 
 extern __read_mostly int sched_clock_running;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d71109321841..008c514dc241 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -369,8 +369,9 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 }
 
 /* Iterate thr' all leaf cfs_rq's on a runqueue */
-#define for_each_leaf_cfs_rq(rq, cfs_rq) \
-	list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
+#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos)			\
+	list_for_each_entry_safe(cfs_rq, pos, &rq->leaf_cfs_rq_list,	\
+				 leaf_cfs_rq_list)
 
 /* Do the two (enqueued) entities belong to the same group ? */
 static inline struct cfs_rq *
@@ -463,8 +464,8 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 {
 }
 
-#define for_each_leaf_cfs_rq(rq, cfs_rq) \
-		for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
+#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos)	\
+		for (cfs_rq = &rq->cfs, pos = NULL; cfs_rq; cfs_rq = pos)
 
 static inline struct sched_entity *parent_entity(struct sched_entity *se)
 {
@@ -1381,7 +1382,6 @@ static unsigned long weighted_cpuload(const int cpu);
 static unsigned long source_load(int cpu, int type);
 static unsigned long target_load(int cpu, int type);
 static unsigned long capacity_of(int cpu);
-static long effective_load(struct task_group *tg, int cpu, long wl, long wg);
 
 /* Cached statistics for all CPUs within a node */
 struct numa_stats {
@@ -2469,7 +2469,8 @@ void task_numa_work(struct callback_head *work)
 		return;
 
 
-	down_read(&mm->mmap_sem);
+	if (!down_read_trylock(&mm->mmap_sem))
+		return;
 	vma = find_vma(mm, start);
 	if (!vma) {
 		reset_ptenuma_scan(p);
@@ -2584,6 +2585,60 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
 		}
 	}
 }
+
+/*
+ * Can a task be moved from prev_cpu to this_cpu without causing a load
+ * imbalance that would trigger the load balancer?
+ */
+static inline bool numa_wake_affine(struct sched_domain *sd,
+				    struct task_struct *p, int this_cpu,
+				    int prev_cpu, int sync)
+{
+	struct numa_stats prev_load, this_load;
+	s64 this_eff_load, prev_eff_load;
+
+	update_numa_stats(&prev_load, cpu_to_node(prev_cpu));
+	update_numa_stats(&this_load, cpu_to_node(this_cpu));
+
+	/*
+	 * If sync wakeup then subtract the (maximum possible)
+	 * effect of the currently running task from the load
+	 * of the current CPU:
+	 */
+	if (sync) {
+		unsigned long current_load = task_h_load(current);
+
+		if (this_load.load > current_load)
+			this_load.load -= current_load;
+		else
+			this_load.load = 0;
+	}
+
+	/*
+	 * In low-load situations, where this_cpu's node is idle due to the
+	 * sync cause above having dropped this_load.load to 0, move the task.
+	 * Moving to an idle socket will not create a bad imbalance.
+	 *
+	 * Otherwise check if the nodes are near enough in load to allow this
+	 * task to be woken on this_cpu's node.
+	 */
+	if (this_load.load > 0) {
+		unsigned long task_load = task_h_load(p);
+
+		this_eff_load = 100;
+		this_eff_load *= prev_load.compute_capacity;
+
+		prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
+		prev_eff_load *= this_load.compute_capacity;
+
+		this_eff_load *= this_load.load + task_load;
+		prev_eff_load *= prev_load.load - task_load;
+
+		return this_eff_load <= prev_eff_load;
+	}
+
+	return true;
+}
 #else
 static void task_tick_numa(struct rq *rq, struct task_struct *curr)
 {
@@ -2596,6 +2651,15 @@ static inline void account_numa_enqueue(struct rq *rq, struct task_struct *p)
 static inline void account_numa_dequeue(struct rq *rq, struct task_struct *p)
 {
 }
+
+#ifdef CONFIG_SMP
+static inline bool numa_wake_affine(struct sched_domain *sd,
+				    struct task_struct *p, int this_cpu,
+				    int prev_cpu, int sync)
+{
+	return true;
+}
+#endif /* !SMP */
 #endif /* CONFIG_NUMA_BALANCING */
 
 static void
@@ -2916,12 +2980,12 @@ ___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 	/*
 	 * Step 2: update *_avg.
 	 */
-	sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
+	sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX - 1024 + sa->period_contrib);
 	if (cfs_rq) {
 		cfs_rq->runnable_load_avg =
-			div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
+			div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX - 1024 + sa->period_contrib);
 	}
-	sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
+	sa->util_avg = sa->util_sum / (LOAD_AVG_MAX - 1024 + sa->period_contrib);
 
 	return 1;
 }
@@ -2982,8 +3046,7 @@ __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
  * differential update where we store the last value we propagated. This in
  * turn allows skipping updates if the differential is 'small'.
  *
- * Updating tg's load_avg is necessary before update_cfs_share() (which is
- * done) and effective_load() (which is not done because it is too costly).
+ * Updating tg's load_avg is necessary before update_cfs_share().
  */
 static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
 {
@@ -3563,7 +3626,7 @@ static inline void check_schedstat_required(void)
 			trace_sched_stat_runtime_enabled())  {
 		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
 			     "stat_blocked and stat_runtime require the "
-			     "kernel parameter schedstats=enabled or "
+			     "kernel parameter schedstats=enable or "
 			     "kernel.sched_schedstats=1\n");
 	}
 #endif
@@ -4642,24 +4705,43 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 	hrtimer_cancel(&cfs_b->slack_timer);
 }
 
+/*
+ * Both these cpu hotplug callbacks race against unregister_fair_sched_group()
+ *
+ * The race is harmless, since modifying bandwidth settings of unhooked group
+ * bits doesn't do much.
+ */
+
+/* cpu online calback */
 static void __maybe_unused update_runtime_enabled(struct rq *rq)
 {
-	struct cfs_rq *cfs_rq;
+	struct task_group *tg;
 
-	for_each_leaf_cfs_rq(rq, cfs_rq) {
-		struct cfs_bandwidth *cfs_b = &cfs_rq->tg->cfs_bandwidth;
+	lockdep_assert_held(&rq->lock);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(tg, &task_groups, list) {
+		struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+		struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
 
 		raw_spin_lock(&cfs_b->lock);
 		cfs_rq->runtime_enabled = cfs_b->quota != RUNTIME_INF;
 		raw_spin_unlock(&cfs_b->lock);
 	}
+	rcu_read_unlock();
 }
 
+/* cpu offline callback */
 static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
 {
-	struct cfs_rq *cfs_rq;
+	struct task_group *tg;
+
+	lockdep_assert_held(&rq->lock);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(tg, &task_groups, list) {
+		struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
 
-	for_each_leaf_cfs_rq(rq, cfs_rq) {
 		if (!cfs_rq->runtime_enabled)
 			continue;
 
@@ -4677,6 +4759,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
 		if (cfs_rq_throttled(cfs_rq))
 			unthrottle_cfs_rq(cfs_rq);
 	}
+	rcu_read_unlock();
 }
 
 #else /* CONFIG_CFS_BANDWIDTH */
@@ -5215,126 +5298,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
 	return 0;
 }
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-/*
- * effective_load() calculates the load change as seen from the root_task_group
- *
- * Adding load to a group doesn't make a group heavier, but can cause movement
- * of group shares between cpus. Assuming the shares were perfectly aligned one
- * can calculate the shift in shares.
- *
- * Calculate the effective load difference if @wl is added (subtracted) to @tg
- * on this @cpu and results in a total addition (subtraction) of @wg to the
- * total group weight.
- *
- * Given a runqueue weight distribution (rw_i) we can compute a shares
- * distribution (s_i) using:
- *
- *   s_i = rw_i / \Sum rw_j						(1)
- *
- * Suppose we have 4 CPUs and our @tg is a direct child of the root group and
- * has 7 equal weight tasks, distributed as below (rw_i), with the resulting
- * shares distribution (s_i):
- *
- *   rw_i = {   2,   4,   1,   0 }
- *   s_i  = { 2/7, 4/7, 1/7,   0 }
- *
- * As per wake_affine() we're interested in the load of two CPUs (the CPU the
- * task used to run on and the CPU the waker is running on), we need to
- * compute the effect of waking a task on either CPU and, in case of a sync
- * wakeup, compute the effect of the current task going to sleep.
- *
- * So for a change of @wl to the local @cpu with an overall group weight change
- * of @wl we can compute the new shares distribution (s'_i) using:
- *
- *   s'_i = (rw_i + @wl) / (@wg + \Sum rw_j)				(2)
- *
- * Suppose we're interested in CPUs 0 and 1, and want to compute the load
- * differences in waking a task to CPU 0. The additional task changes the
- * weight and shares distributions like:
- *
- *   rw'_i = {   3,   4,   1,   0 }
- *   s'_i  = { 3/8, 4/8, 1/8,   0 }
- *
- * We can then compute the difference in effective weight by using:
- *
- *   dw_i = S * (s'_i - s_i)						(3)
- *
- * Where 'S' is the group weight as seen by its parent.
- *
- * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7)
- * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 -
- * 4/7) times the weight of the group.
- */
-static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
-{
-	struct sched_entity *se = tg->se[cpu];
-
-	if (!tg->parent)	/* the trivial, non-cgroup case */
-		return wl;
-
-	for_each_sched_entity(se) {
-		struct cfs_rq *cfs_rq = se->my_q;
-		long W, w = cfs_rq_load_avg(cfs_rq);
-
-		tg = cfs_rq->tg;
-
-		/*
-		 * W = @wg + \Sum rw_j
-		 */
-		W = wg + atomic_long_read(&tg->load_avg);
-
-		/* Ensure \Sum rw_j >= rw_i */
-		W -= cfs_rq->tg_load_avg_contrib;
-		W += w;
-
-		/*
-		 * w = rw_i + @wl
-		 */
-		w += wl;
-
-		/*
-		 * wl = S * s'_i; see (2)
-		 */
-		if (W > 0 && w < W)
-			wl = (w * (long)scale_load_down(tg->shares)) / W;
-		else
-			wl = scale_load_down(tg->shares);
-
-		/*
-		 * Per the above, wl is the new se->load.weight value; since
-		 * those are clipped to [MIN_SHARES, ...) do so now. See
-		 * calc_cfs_shares().
-		 */
-		if (wl < MIN_SHARES)
-			wl = MIN_SHARES;
-
-		/*
-		 * wl = dw_i = S * (s'_i - s_i); see (3)
-		 */
-		wl -= se->avg.load_avg;
-
-		/*
-		 * Recursively apply this logic to all parent groups to compute
-		 * the final effective load change on the root group. Since
-		 * only the @tg group gets extra weight, all parent groups can
-		 * only redistribute existing shares. @wl is the shift in shares
-		 * resulting from this level per the above.
-		 */
-		wg = 0;
-	}
-
-	return wl;
-}
-#else
-
-static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
-{
-	return wl;
-}
-
-#endif
-
 static void record_wakee(struct task_struct *p)
 {
 	/*
@@ -5385,67 +5348,25 @@ static int wake_wide(struct task_struct *p)
 static int wake_affine(struct sched_domain *sd, struct task_struct *p,
 		       int prev_cpu, int sync)
 {
-	s64 this_load, load;
-	s64 this_eff_load, prev_eff_load;
-	int idx, this_cpu;
-	struct task_group *tg;
-	unsigned long weight;
-	int balanced;
-
-	idx	  = sd->wake_idx;
-	this_cpu  = smp_processor_id();
-	load	  = source_load(prev_cpu, idx);
-	this_load = target_load(this_cpu, idx);
-
-	/*
-	 * If sync wakeup then subtract the (maximum possible)
-	 * effect of the currently running task from the load
-	 * of the current CPU:
-	 */
-	if (sync) {
-		tg = task_group(current);
-		weight = current->se.avg.load_avg;
-
-		this_load += effective_load(tg, this_cpu, -weight, -weight);
-		load += effective_load(tg, prev_cpu, 0, -weight);
-	}
-
-	tg = task_group(p);
-	weight = p->se.avg.load_avg;
+	int this_cpu = smp_processor_id();
+	bool affine = false;
 
 	/*
-	 * In low-load situations, where prev_cpu is idle and this_cpu is idle
-	 * due to the sync cause above having dropped this_load to 0, we'll
-	 * always have an imbalance, but there's really nothing you can do
-	 * about that, so that's good too.
-	 *
-	 * Otherwise check if either cpus are near enough in load to allow this
-	 * task to be woken on this_cpu.
+	 * Common case: CPUs are in the same socket, and select_idle_sibling()
+	 * will do its thing regardless of what we return:
 	 */
-	this_eff_load = 100;
-	this_eff_load *= capacity_of(prev_cpu);
-
-	prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
-	prev_eff_load *= capacity_of(this_cpu);
-
-	if (this_load > 0) {
-		this_eff_load *= this_load +
-			effective_load(tg, this_cpu, weight, weight);
-
-		prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
-	}
-
-	balanced = this_eff_load <= prev_eff_load;
+	if (cpus_share_cache(prev_cpu, this_cpu))
+		affine = true;
+	else
+		affine = numa_wake_affine(sd, p, this_cpu, prev_cpu, sync);
 
 	schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
+	if (affine) {
+		schedstat_inc(sd->ttwu_move_affine);
+		schedstat_inc(p->se.statistics.nr_wakeups_affine);
+	}
 
-	if (!balanced)
-		return 0;
-
-	schedstat_inc(sd->ttwu_move_affine);
-	schedstat_inc(p->se.statistics.nr_wakeups_affine);
-
-	return 1;
+	return affine;
 }
 
 static inline int task_util(struct task_struct *p);
@@ -5484,12 +5405,12 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 		int i;
 
 		/* Skip over this group if it has no CPUs allowed */
-		if (!cpumask_intersects(sched_group_cpus(group),
+		if (!cpumask_intersects(sched_group_span(group),
 					&p->cpus_allowed))
 			continue;
 
 		local_group = cpumask_test_cpu(this_cpu,
-					       sched_group_cpus(group));
+					       sched_group_span(group));
 
 		/*
 		 * Tally up the load of all CPUs in the group and find
@@ -5499,7 +5420,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 		runnable_load = 0;
 		max_spare_cap = 0;
 
-		for_each_cpu(i, sched_group_cpus(group)) {
+		for_each_cpu(i, sched_group_span(group)) {
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
 				load = source_load(i, load_idx);
@@ -5602,10 +5523,10 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 
 	/* Check if we have any choice: */
 	if (group->group_weight == 1)
-		return cpumask_first(sched_group_cpus(group));
+		return cpumask_first(sched_group_span(group));
 
 	/* Traverse only the allowed CPUs */
-	for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
+	for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
 		if (idle_cpu(i)) {
 			struct rq *rq = cpu_rq(i);
 			struct cpuidle_state *idle = idle_get_state(rq);
@@ -5640,43 +5561,6 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 	return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
 }
 
-/*
- * Implement a for_each_cpu() variant that starts the scan at a given cpu
- * (@start), and wraps around.
- *
- * This is used to scan for idle CPUs; such that not all CPUs looking for an
- * idle CPU find the same CPU. The down-side is that tasks tend to cycle
- * through the LLC domain.
- *
- * Especially tbench is found sensitive to this.
- */
-
-static int cpumask_next_wrap(int n, const struct cpumask *mask, int start, int *wrapped)
-{
-	int next;
-
-again:
-	next = find_next_bit(cpumask_bits(mask), nr_cpumask_bits, n+1);
-
-	if (*wrapped) {
-		if (next >= start)
-			return nr_cpumask_bits;
-	} else {
-		if (next >= nr_cpumask_bits) {
-			*wrapped = 1;
-			n = -1;
-			goto again;
-		}
-	}
-
-	return next;
-}
-
-#define for_each_cpu_wrap(cpu, mask, start, wrap)				\
-	for ((wrap) = 0, (cpu) = (start)-1;					\
-		(cpu) = cpumask_next_wrap((cpu), (mask), (start), &(wrap)),	\
-		(cpu) < nr_cpumask_bits; )
-
 #ifdef CONFIG_SCHED_SMT
 
 static inline void set_idle_cores(int cpu, int val)
@@ -5736,7 +5620,7 @@ unlock:
 static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
 {
 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-	int core, cpu, wrap;
+	int core, cpu;
 
 	if (!static_branch_likely(&sched_smt_present))
 		return -1;
@@ -5746,7 +5630,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
 
 	cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
 
-	for_each_cpu_wrap(core, cpus, target, wrap) {
+	for_each_cpu_wrap(core, cpus, target) {
 		bool idle = true;
 
 		for_each_cpu(cpu, cpu_smt_mask(core)) {
@@ -5809,27 +5693,38 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
 static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
 {
 	struct sched_domain *this_sd;
-	u64 avg_cost, avg_idle = this_rq()->avg_idle;
+	u64 avg_cost, avg_idle;
 	u64 time, cost;
 	s64 delta;
-	int cpu, wrap;
+	int cpu, nr = INT_MAX;
 
 	this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
 	if (!this_sd)
 		return -1;
 
-	avg_cost = this_sd->avg_scan_cost;
-
 	/*
 	 * Due to large variance we need a large fuzz factor; hackbench in
 	 * particularly is sensitive here.
 	 */
-	if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost)
+	avg_idle = this_rq()->avg_idle / 512;
+	avg_cost = this_sd->avg_scan_cost + 1;
+
+	if (sched_feat(SIS_AVG_CPU) && avg_idle < avg_cost)
 		return -1;
 
+	if (sched_feat(SIS_PROP)) {
+		u64 span_avg = sd->span_weight * avg_idle;
+		if (span_avg > 4*avg_cost)
+			nr = div_u64(span_avg, avg_cost);
+		else
+			nr = 4;
+	}
+
 	time = local_clock();
 
-	for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {
+	for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
+		if (!--nr)
+			return -1;
 		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
 			continue;
 		if (idle_cpu(cpu))
@@ -6011,11 +5906,15 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 
 	if (affine_sd) {
 		sd = NULL; /* Prefer wake_affine over balance flags */
-		if (cpu != prev_cpu && wake_affine(affine_sd, p, prev_cpu, sync))
+		if (cpu == prev_cpu)
+			goto pick_cpu;
+
+		if (wake_affine(affine_sd, p, prev_cpu, sync))
 			new_cpu = cpu;
 	}
 
 	if (!sd) {
+ pick_cpu:
 		if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
 			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
 
@@ -6168,8 +6067,11 @@ static void set_last_buddy(struct sched_entity *se)
 	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
 		return;
 
-	for_each_sched_entity(se)
+	for_each_sched_entity(se) {
+		if (SCHED_WARN_ON(!se->on_rq))
+			return;
 		cfs_rq_of(se)->last = se;
+	}
 }
 
 static void set_next_buddy(struct sched_entity *se)
@@ -6177,8 +6079,11 @@ static void set_next_buddy(struct sched_entity *se)
 	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
 		return;
 
-	for_each_sched_entity(se)
+	for_each_sched_entity(se) {
+		if (SCHED_WARN_ON(!se->on_rq))
+			return;
 		cfs_rq_of(se)->next = se;
+	}
 }
 
 static void set_skip_buddy(struct sched_entity *se)
@@ -6686,6 +6591,10 @@ static int migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
 	if (dst_nid == p->numa_preferred_nid)
 		return 0;
 
+	/* Leaving a core idle is often worse than degrading locality. */
+	if (env->idle != CPU_NOT_IDLE)
+		return -1;
+
 	if (numa_group) {
 		src_faults = group_faults(p, src_nid);
 		dst_faults = group_faults(p, dst_nid);
@@ -6970,10 +6879,28 @@ static void attach_tasks(struct lb_env *env)
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+
+static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+{
+	if (cfs_rq->load.weight)
+		return false;
+
+	if (cfs_rq->avg.load_sum)
+		return false;
+
+	if (cfs_rq->avg.util_sum)
+		return false;
+
+	if (cfs_rq->runnable_load_sum)
+		return false;
+
+	return true;
+}
+
 static void update_blocked_averages(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
-	struct cfs_rq *cfs_rq;
+	struct cfs_rq *cfs_rq, *pos;
 	struct rq_flags rf;
 
 	rq_lock_irqsave(rq, &rf);
@@ -6983,7 +6910,7 @@ static void update_blocked_averages(int cpu)
 	 * Iterates the task_group tree in a bottom up fashion, see
 	 * list_add_leaf_cfs_rq() for details.
 	 */
-	for_each_leaf_cfs_rq(rq, cfs_rq) {
+	for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
 		struct sched_entity *se;
 
 		/* throttled entities do not contribute to load */
@@ -6997,6 +6924,13 @@ static void update_blocked_averages(int cpu)
 		se = cfs_rq->tg->se[cpu];
 		if (se && !skip_blocked_update(se))
 			update_load_avg(se, 0);
+
+		/*
+		 * There can be a lot of idle CPU cgroups.  Don't let fully
+		 * decayed cfs_rqs linger on the list.
+		 */
+		if (cfs_rq_is_decayed(cfs_rq))
+			list_del_leaf_cfs_rq(cfs_rq);
 	}
 	rq_unlock_irqrestore(rq, &rf);
 }
@@ -7229,7 +7163,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 		 * span the current group.
 		 */
 
-		for_each_cpu(cpu, sched_group_cpus(sdg)) {
+		for_each_cpu(cpu, sched_group_span(sdg)) {
 			struct sched_group_capacity *sgc;
 			struct rq *rq = cpu_rq(cpu);
 
@@ -7408,7 +7342,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 
 	memset(sgs, 0, sizeof(*sgs));
 
-	for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
+	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
 		struct rq *rq = cpu_rq(i);
 
 		/* Bias balancing toward cpus of our domain */
@@ -7572,7 +7506,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 		struct sg_lb_stats *sgs = &tmp_sgs;
 		int local_group;
 
-		local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
+		local_group = cpumask_test_cpu(env->dst_cpu, sched_group_span(sg));
 		if (local_group) {
 			sds->local = sg;
 			sgs = local;
@@ -7927,7 +7861,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 	unsigned long busiest_load = 0, busiest_capacity = 1;
 	int i;
 
-	for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
+	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
 		unsigned long capacity, wl;
 		enum fbq_type rt;
 
@@ -8033,7 +7967,6 @@ static int active_load_balance_cpu_stop(void *data);
 static int should_we_balance(struct lb_env *env)
 {
 	struct sched_group *sg = env->sd->groups;
-	struct cpumask *sg_cpus, *sg_mask;
 	int cpu, balance_cpu = -1;
 
 	/*
@@ -8043,11 +7976,9 @@ static int should_we_balance(struct lb_env *env)
 	if (env->idle == CPU_NEWLY_IDLE)
 		return 1;
 
-	sg_cpus = sched_group_cpus(sg);
-	sg_mask = sched_group_mask(sg);
 	/* Try to find first idle cpu */
-	for_each_cpu_and(cpu, sg_cpus, env->cpus) {
-		if (!cpumask_test_cpu(cpu, sg_mask) || !idle_cpu(cpu))
+	for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) {
+		if (!idle_cpu(cpu))
 			continue;
 
 		balance_cpu = cpu;
@@ -8083,7 +8014,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 		.sd		= sd,
 		.dst_cpu	= this_cpu,
 		.dst_rq		= this_rq,
-		.dst_grpmask    = sched_group_cpus(sd->groups),
+		.dst_grpmask    = sched_group_span(sd->groups),
 		.idle		= idle,
 		.loop_break	= sched_nr_migrate_break,
 		.cpus		= cpus,
@@ -8659,6 +8590,10 @@ void nohz_balance_enter_idle(int cpu)
 	if (!cpu_active(cpu))
 		return;
 
+	/* Spare idle load balancing on CPUs that don't want to be disturbed: */
+	if (!is_housekeeping_cpu(cpu))
+		return;
+
 	if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))
 		return;
 
@@ -9523,10 +9458,10 @@ const struct sched_class fair_sched_class = {
 #ifdef CONFIG_SCHED_DEBUG
 void print_cfs_stats(struct seq_file *m, int cpu)
 {
-	struct cfs_rq *cfs_rq;
+	struct cfs_rq *cfs_rq, *pos;
 
 	rcu_read_lock();
-	for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
+	for_each_leaf_cfs_rq_safe(cpu_rq(cpu), cfs_rq, pos)
 		print_cfs_rq(m, cpu, cfs_rq);
 	rcu_read_unlock();
 }
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 11192e0cb122..d3fb15555291 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -55,6 +55,7 @@ SCHED_FEAT(TTWU_QUEUE, true)
  * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
  */
 SCHED_FEAT(SIS_AVG_CPU, false)
+SCHED_FEAT(SIS_PROP, true)
 
 /*
  * Issue a WARN when we do multiple update_rq_clock() calls
@@ -76,7 +77,6 @@ SCHED_FEAT(WARN_DOUBLE_CLOCK, false)
 SCHED_FEAT(RT_PUSH_IPI, true)
 #endif
 
-SCHED_FEAT(FORCE_SD_OVERLAP, false)
 SCHED_FEAT(RT_RUNTIME_SHARE, true)
 SCHED_FEAT(LB_MIN, false)
 SCHED_FEAT(ATTACH_AGE_LOAD, true)
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 2a25a9ec2c6e..6c23e30c0e5c 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -219,6 +219,7 @@ static void do_idle(void)
 	 */
 
 	__current_set_polling();
+	quiet_vmstat();
 	tick_nohz_idle_enter();
 
 	while (!need_resched()) {
@@ -265,7 +266,7 @@ static void do_idle(void)
 	smp_mb__after_atomic();
 
 	sched_ttwu_pending();
-	schedule_preempt_disabled();
+	schedule_idle();
 
 	if (unlikely(klp_patch_pending(current)))
 		klp_update_patch_state(current);
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index f15fb2bdbc0d..f14716a3522f 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -117,7 +117,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
  * load-average relies on per-cpu sampling from the tick, it is affected by
  * NO_HZ.
  *
- * The basic idea is to fold the nr_active delta into a global idle-delta upon
+ * The basic idea is to fold the nr_active delta into a global NO_HZ-delta upon
  * entering NO_HZ state such that we can include this as an 'extra' cpu delta
  * when we read the global state.
  *
@@ -126,7 +126,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
  *  - When we go NO_HZ idle during the window, we can negate our sample
  *    contribution, causing under-accounting.
  *
- *    We avoid this by keeping two idle-delta counters and flipping them
+ *    We avoid this by keeping two NO_HZ-delta counters and flipping them
  *    when the window starts, thus separating old and new NO_HZ load.
  *
  *    The only trick is the slight shift in index flip for read vs write.
@@ -137,22 +137,22 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
  *    r:0 0 1           1 0           0 1           1 0
  *    w:0 1 1           0 0           1 1           0 0
  *
- *    This ensures we'll fold the old idle contribution in this window while
+ *    This ensures we'll fold the old NO_HZ contribution in this window while
  *    accumlating the new one.
  *
- *  - When we wake up from NO_HZ idle during the window, we push up our
+ *  - When we wake up from NO_HZ during the window, we push up our
  *    contribution, since we effectively move our sample point to a known
  *    busy state.
  *
  *    This is solved by pushing the window forward, and thus skipping the
- *    sample, for this cpu (effectively using the idle-delta for this cpu which
+ *    sample, for this cpu (effectively using the NO_HZ-delta for this cpu which
  *    was in effect at the time the window opened). This also solves the issue
- *    of having to deal with a cpu having been in NOHZ idle for multiple
- *    LOAD_FREQ intervals.
+ *    of having to deal with a cpu having been in NO_HZ for multiple LOAD_FREQ
+ *    intervals.
  *
  * When making the ILB scale, we should try to pull this in as well.
  */
-static atomic_long_t calc_load_idle[2];
+static atomic_long_t calc_load_nohz[2];
 static int calc_load_idx;
 
 static inline int calc_load_write_idx(void)
@@ -167,7 +167,7 @@ static inline int calc_load_write_idx(void)
 
 	/*
 	 * If the folding window started, make sure we start writing in the
-	 * next idle-delta.
+	 * next NO_HZ-delta.
 	 */
 	if (!time_before(jiffies, READ_ONCE(calc_load_update)))
 		idx++;
@@ -180,24 +180,24 @@ static inline int calc_load_read_idx(void)
 	return calc_load_idx & 1;
 }
 
-void calc_load_enter_idle(void)
+void calc_load_nohz_start(void)
 {
 	struct rq *this_rq = this_rq();
 	long delta;
 
 	/*
-	 * We're going into NOHZ mode, if there's any pending delta, fold it
-	 * into the pending idle delta.
+	 * We're going into NO_HZ mode, if there's any pending delta, fold it
+	 * into the pending NO_HZ delta.
 	 */
 	delta = calc_load_fold_active(this_rq, 0);
 	if (delta) {
 		int idx = calc_load_write_idx();
 
-		atomic_long_add(delta, &calc_load_idle[idx]);
+		atomic_long_add(delta, &calc_load_nohz[idx]);
 	}
 }
 
-void calc_load_exit_idle(void)
+void calc_load_nohz_stop(void)
 {
 	struct rq *this_rq = this_rq();
 
@@ -217,13 +217,13 @@ void calc_load_exit_idle(void)
 		this_rq->calc_load_update += LOAD_FREQ;
 }
 
-static long calc_load_fold_idle(void)
+static long calc_load_nohz_fold(void)
 {
 	int idx = calc_load_read_idx();
 	long delta = 0;
 
-	if (atomic_long_read(&calc_load_idle[idx]))
-		delta = atomic_long_xchg(&calc_load_idle[idx], 0);
+	if (atomic_long_read(&calc_load_nohz[idx]))
+		delta = atomic_long_xchg(&calc_load_nohz[idx], 0);
 
 	return delta;
 }
@@ -299,9 +299,9 @@ calc_load_n(unsigned long load, unsigned long exp,
 
 /*
  * NO_HZ can leave us missing all per-cpu ticks calling
- * calc_load_account_active(), but since an idle CPU folds its delta into
- * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
- * in the pending idle delta if our idle period crossed a load cycle boundary.
+ * calc_load_fold_active(), but since a NO_HZ CPU folds its delta into
+ * calc_load_nohz per calc_load_nohz_start(), all we need to do is fold
+ * in the pending NO_HZ delta if our NO_HZ period crossed a load cycle boundary.
  *
  * Once we've updated the global active value, we need to apply the exponential
  * weights adjusted to the number of cycles missed.
@@ -330,7 +330,7 @@ static void calc_global_nohz(void)
 	}
 
 	/*
-	 * Flip the idle index...
+	 * Flip the NO_HZ index...
 	 *
 	 * Make sure we first write the new time then flip the index, so that
 	 * calc_load_write_idx() will see the new time when it reads the new
@@ -341,7 +341,7 @@ static void calc_global_nohz(void)
 }
 #else /* !CONFIG_NO_HZ_COMMON */
 
-static inline long calc_load_fold_idle(void) { return 0; }
+static inline long calc_load_nohz_fold(void) { return 0; }
 static inline void calc_global_nohz(void) { }
 
 #endif /* CONFIG_NO_HZ_COMMON */
@@ -362,9 +362,9 @@ void calc_global_load(unsigned long ticks)
 		return;
 
 	/*
-	 * Fold the 'old' idle-delta to include all NO_HZ cpus.
+	 * Fold the 'old' NO_HZ-delta to include all NO_HZ cpus.
 	 */
-	delta = calc_load_fold_idle();
+	delta = calc_load_nohz_fold();
 	if (delta)
 		atomic_long_add(delta, &calc_load_tasks);
 
@@ -378,7 +378,8 @@ void calc_global_load(unsigned long ticks)
 	WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ);
 
 	/*
-	 * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
+	 * In case we went to NO_HZ for multiple LOAD_FREQ intervals
+	 * catch up in bulk.
 	 */
 	calc_global_nohz();
 }
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 979b7341008a..45caf937ef90 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -840,6 +840,17 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 		int enqueue = 0;
 		struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
 		struct rq *rq = rq_of_rt_rq(rt_rq);
+		int skip;
+
+		/*
+		 * When span == cpu_online_mask, taking each rq->lock
+		 * can be time-consuming. Try to avoid it when possible.
+		 */
+		raw_spin_lock(&rt_rq->rt_runtime_lock);
+		skip = !rt_rq->rt_time && !rt_rq->rt_nr_running;
+		raw_spin_unlock(&rt_rq->rt_runtime_lock);
+		if (skip)
+			continue;
 
 		raw_spin_lock(&rq->lock);
 		if (rt_rq->rt_time) {
@@ -1819,7 +1830,7 @@ retry:
 		 * pushing.
 		 */
 		task = pick_next_pushable_task(rq);
-		if (task_cpu(next_task) == rq->cpu && task == next_task) {
+		if (task == next_task) {
 			/*
 			 * The task hasn't migrated, and is still the next
 			 * eligible task, but we failed to find a run-queue
@@ -2438,6 +2449,316 @@ const struct sched_class rt_sched_class = {
 	.update_curr		= update_curr_rt,
 };
 
+#ifdef CONFIG_RT_GROUP_SCHED
+/*
+ * Ensure that the real time constraints are schedulable.
+ */
+static DEFINE_MUTEX(rt_constraints_mutex);
+
+/* Must be called with tasklist_lock held */
+static inline int tg_has_rt_tasks(struct task_group *tg)
+{
+	struct task_struct *g, *p;
+
+	/*
+	 * Autogroups do not have RT tasks; see autogroup_create().
+	 */
+	if (task_group_is_autogroup(tg))
+		return 0;
+
+	for_each_process_thread(g, p) {
+		if (rt_task(p) && task_group(p) == tg)
+			return 1;
+	}
+
+	return 0;
+}
+
+struct rt_schedulable_data {
+	struct task_group *tg;
+	u64 rt_period;
+	u64 rt_runtime;
+};
+
+static int tg_rt_schedulable(struct task_group *tg, void *data)
+{
+	struct rt_schedulable_data *d = data;
+	struct task_group *child;
+	unsigned long total, sum = 0;
+	u64 period, runtime;
+
+	period = ktime_to_ns(tg->rt_bandwidth.rt_period);
+	runtime = tg->rt_bandwidth.rt_runtime;
+
+	if (tg == d->tg) {
+		period = d->rt_period;
+		runtime = d->rt_runtime;
+	}
+
+	/*
+	 * Cannot have more runtime than the period.
+	 */
+	if (runtime > period && runtime != RUNTIME_INF)
+		return -EINVAL;
+
+	/*
+	 * Ensure we don't starve existing RT tasks.
+	 */
+	if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
+		return -EBUSY;
+
+	total = to_ratio(period, runtime);
+
+	/*
+	 * Nobody can have more than the global setting allows.
+	 */
+	if (total > to_ratio(global_rt_period(), global_rt_runtime()))
+		return -EINVAL;
+
+	/*
+	 * The sum of our children's runtime should not exceed our own.
+	 */
+	list_for_each_entry_rcu(child, &tg->children, siblings) {
+		period = ktime_to_ns(child->rt_bandwidth.rt_period);
+		runtime = child->rt_bandwidth.rt_runtime;
+
+		if (child == d->tg) {
+			period = d->rt_period;
+			runtime = d->rt_runtime;
+		}
+
+		sum += to_ratio(period, runtime);
+	}
+
+	if (sum > total)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
+{
+	int ret;
+
+	struct rt_schedulable_data data = {
+		.tg = tg,
+		.rt_period = period,
+		.rt_runtime = runtime,
+	};
+
+	rcu_read_lock();
+	ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int tg_set_rt_bandwidth(struct task_group *tg,
+		u64 rt_period, u64 rt_runtime)
+{
+	int i, err = 0;
+
+	/*
+	 * Disallowing the root group RT runtime is BAD, it would disallow the
+	 * kernel creating (and or operating) RT threads.
+	 */
+	if (tg == &root_task_group && rt_runtime == 0)
+		return -EINVAL;
+
+	/* No period doesn't make any sense. */
+	if (rt_period == 0)
+		return -EINVAL;
+
+	mutex_lock(&rt_constraints_mutex);
+	read_lock(&tasklist_lock);
+	err = __rt_schedulable(tg, rt_period, rt_runtime);
+	if (err)
+		goto unlock;
+
+	raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
+	tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
+	tg->rt_bandwidth.rt_runtime = rt_runtime;
+
+	for_each_possible_cpu(i) {
+		struct rt_rq *rt_rq = tg->rt_rq[i];
+
+		raw_spin_lock(&rt_rq->rt_runtime_lock);
+		rt_rq->rt_runtime = rt_runtime;
+		raw_spin_unlock(&rt_rq->rt_runtime_lock);
+	}
+	raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
+unlock:
+	read_unlock(&tasklist_lock);
+	mutex_unlock(&rt_constraints_mutex);
+
+	return err;
+}
+
+int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
+{
+	u64 rt_runtime, rt_period;
+
+	rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
+	rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
+	if (rt_runtime_us < 0)
+		rt_runtime = RUNTIME_INF;
+
+	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
+}
+
+long sched_group_rt_runtime(struct task_group *tg)
+{
+	u64 rt_runtime_us;
+
+	if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
+		return -1;
+
+	rt_runtime_us = tg->rt_bandwidth.rt_runtime;
+	do_div(rt_runtime_us, NSEC_PER_USEC);
+	return rt_runtime_us;
+}
+
+int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
+{
+	u64 rt_runtime, rt_period;
+
+	rt_period = rt_period_us * NSEC_PER_USEC;
+	rt_runtime = tg->rt_bandwidth.rt_runtime;
+
+	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
+}
+
+long sched_group_rt_period(struct task_group *tg)
+{
+	u64 rt_period_us;
+
+	rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
+	do_div(rt_period_us, NSEC_PER_USEC);
+	return rt_period_us;
+}
+
+static int sched_rt_global_constraints(void)
+{
+	int ret = 0;
+
+	mutex_lock(&rt_constraints_mutex);
+	read_lock(&tasklist_lock);
+	ret = __rt_schedulable(NULL, 0, 0);
+	read_unlock(&tasklist_lock);
+	mutex_unlock(&rt_constraints_mutex);
+
+	return ret;
+}
+
+int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
+{
+	/* Don't accept realtime tasks when there is no way for them to run */
+	if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
+		return 0;
+
+	return 1;
+}
+
+#else /* !CONFIG_RT_GROUP_SCHED */
+static int sched_rt_global_constraints(void)
+{
+	unsigned long flags;
+	int i;
+
+	raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
+	for_each_possible_cpu(i) {
+		struct rt_rq *rt_rq = &cpu_rq(i)->rt;
+
+		raw_spin_lock(&rt_rq->rt_runtime_lock);
+		rt_rq->rt_runtime = global_rt_runtime();
+		raw_spin_unlock(&rt_rq->rt_runtime_lock);
+	}
+	raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
+
+	return 0;
+}
+#endif /* CONFIG_RT_GROUP_SCHED */
+
+static int sched_rt_global_validate(void)
+{
+	if (sysctl_sched_rt_period <= 0)
+		return -EINVAL;
+
+	if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
+		(sysctl_sched_rt_runtime > sysctl_sched_rt_period))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void sched_rt_do_global(void)
+{
+	def_rt_bandwidth.rt_runtime = global_rt_runtime();
+	def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
+}
+
+int sched_rt_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int old_period, old_runtime;
+	static DEFINE_MUTEX(mutex);
+	int ret;
+
+	mutex_lock(&mutex);
+	old_period = sysctl_sched_rt_period;
+	old_runtime = sysctl_sched_rt_runtime;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+	if (!ret && write) {
+		ret = sched_rt_global_validate();
+		if (ret)
+			goto undo;
+
+		ret = sched_dl_global_validate();
+		if (ret)
+			goto undo;
+
+		ret = sched_rt_global_constraints();
+		if (ret)
+			goto undo;
+
+		sched_rt_do_global();
+		sched_dl_do_global();
+	}
+	if (0) {
+undo:
+		sysctl_sched_rt_period = old_period;
+		sysctl_sched_rt_runtime = old_runtime;
+	}
+	mutex_unlock(&mutex);
+
+	return ret;
+}
+
+int sched_rr_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int ret;
+	static DEFINE_MUTEX(mutex);
+
+	mutex_lock(&mutex);
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	/*
+	 * Make sure that internally we keep jiffies.
+	 * Also, writing zero resets the timeslice to default:
+	 */
+	if (!ret && write) {
+		sched_rr_timeslice =
+			sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
+			msecs_to_jiffies(sysctl_sched_rr_timeslice);
+	}
+	mutex_unlock(&mutex);
+	return ret;
+}
+
 #ifdef CONFIG_SCHED_DEBUG
 extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7808ab050599..eeef1a3086d1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -39,9 +39,9 @@
 #include "cpuacct.h"
 
 #ifdef CONFIG_SCHED_DEBUG
-#define SCHED_WARN_ON(x)	WARN_ONCE(x, #x)
+# define SCHED_WARN_ON(x)	WARN_ONCE(x, #x)
 #else
-#define SCHED_WARN_ON(x)	((void)(x))
+# define SCHED_WARN_ON(x)	({ (void)(x), 0; })
 #endif
 
 struct rq;
@@ -218,23 +218,25 @@ static inline int dl_bandwidth_enabled(void)
 	return sysctl_sched_rt_runtime >= 0;
 }
 
-extern struct dl_bw *dl_bw_of(int i);
-
 struct dl_bw {
 	raw_spinlock_t lock;
 	u64 bw, total_bw;
 };
 
+static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
+
 static inline
-void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw)
+void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
 {
 	dl_b->total_bw -= tsk_bw;
+	__dl_update(dl_b, (s32)tsk_bw / cpus);
 }
 
 static inline
-void __dl_add(struct dl_bw *dl_b, u64 tsk_bw)
+void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
 {
 	dl_b->total_bw += tsk_bw;
+	__dl_update(dl_b, -((s32)tsk_bw / cpus));
 }
 
 static inline
@@ -244,7 +246,22 @@ bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
 	       dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
 }
 
+void dl_change_utilization(struct task_struct *p, u64 new_bw);
 extern void init_dl_bw(struct dl_bw *dl_b);
+extern int sched_dl_global_validate(void);
+extern void sched_dl_do_global(void);
+extern int sched_dl_overflow(struct task_struct *p, int policy,
+			     const struct sched_attr *attr);
+extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
+extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
+extern bool __checkparam_dl(const struct sched_attr *attr);
+extern void __dl_clear_params(struct task_struct *p);
+extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
+extern int dl_task_can_attach(struct task_struct *p,
+			      const struct cpumask *cs_cpus_allowed);
+extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
+					const struct cpumask *trial);
+extern bool dl_cpu_busy(unsigned int cpu);
 
 #ifdef CONFIG_CGROUP_SCHED
 
@@ -366,6 +383,11 @@ extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent
 extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
 		struct sched_rt_entity *rt_se, int cpu,
 		struct sched_rt_entity *parent);
+extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us);
+extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us);
+extern long sched_group_rt_runtime(struct task_group *tg);
+extern long sched_group_rt_period(struct task_group *tg);
+extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
 
 extern struct task_group *sched_create_group(struct task_group *parent);
 extern void sched_online_group(struct task_group *tg,
@@ -558,6 +580,30 @@ struct dl_rq {
 #else
 	struct dl_bw dl_bw;
 #endif
+	/*
+	 * "Active utilization" for this runqueue: increased when a
+	 * task wakes up (becomes TASK_RUNNING) and decreased when a
+	 * task blocks
+	 */
+	u64 running_bw;
+
+	/*
+	 * Utilization of the tasks "assigned" to this runqueue (including
+	 * the tasks that are in runqueue and the tasks that executed on this
+	 * CPU and blocked). Increased when a task moves to this runqueue, and
+	 * decreased when the task moves away (migrates, changes scheduling
+	 * policy, or terminates).
+	 * This is needed to compute the "inactive utilization" for the
+	 * runqueue (inactive utilization = this_bw - running_bw).
+	 */
+	u64 this_bw;
+	u64 extra_bw;
+
+	/*
+	 * Inverse of the fraction of CPU utilization that can be reclaimed
+	 * by the GRUB algorithm.
+	 */
+	u64 bw_ratio;
 };
 
 #ifdef CONFIG_SMP
@@ -606,11 +652,9 @@ struct root_domain {
 
 extern struct root_domain def_root_domain;
 extern struct mutex sched_domains_mutex;
-extern cpumask_var_t fallback_doms;
-extern cpumask_var_t sched_domains_tmpmask;
 
 extern void init_defrootdomain(void);
-extern int init_sched_domains(const struct cpumask *cpu_map);
+extern int sched_init_domains(const struct cpumask *cpu_map);
 extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
 
 #endif /* CONFIG_SMP */
@@ -1025,7 +1069,11 @@ struct sched_group_capacity {
 	unsigned long next_update;
 	int imbalance; /* XXX unrelated to capacity but shared group state */
 
-	unsigned long cpumask[0]; /* iteration mask */
+#ifdef CONFIG_SCHED_DEBUG
+	int id;
+#endif
+
+	unsigned long cpumask[0]; /* balance mask */
 };
 
 struct sched_group {
@@ -1046,16 +1094,15 @@ struct sched_group {
 	unsigned long cpumask[0];
 };
 
-static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
+static inline struct cpumask *sched_group_span(struct sched_group *sg)
 {
 	return to_cpumask(sg->cpumask);
 }
 
 /*
- * cpumask masking which cpus in the group are allowed to iterate up the domain
- * tree.
+ * See build_balance_mask().
  */
-static inline struct cpumask *sched_group_mask(struct sched_group *sg)
+static inline struct cpumask *group_balance_mask(struct sched_group *sg)
 {
 	return to_cpumask(sg->sgc->cpumask);
 }
@@ -1066,7 +1113,7 @@ static inline struct cpumask *sched_group_mask(struct sched_group *sg)
  */
 static inline unsigned int group_first_cpu(struct sched_group *group)
 {
-	return cpumask_first(sched_group_cpus(group));
+	return cpumask_first(sched_group_span(group));
 }
 
 extern int group_balance_cpu(struct sched_group *sg);
@@ -1422,7 +1469,11 @@ static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
 	curr->sched_class->set_curr_task(rq);
 }
 
+#ifdef CONFIG_SMP
 #define sched_class_highest (&stop_sched_class)
+#else
+#define sched_class_highest (&dl_sched_class)
+#endif
 #define for_each_class(class) \
    for (class = sched_class_highest; class; class = class->next)
 
@@ -1467,6 +1518,8 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq)
 }
 #endif
 
+extern void schedule_idle(void);
+
 extern void sysrq_sched_debug_show(void);
 extern void sched_init_granularity(void);
 extern void update_max_interval(void);
@@ -1484,7 +1537,12 @@ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime
 extern struct dl_bandwidth def_dl_bandwidth;
 extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
 extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
+extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
+extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
 
+#define BW_SHIFT	20
+#define BW_UNIT		(1 << BW_SHIFT)
+#define RATIO_SHIFT	8
 unsigned long to_ratio(u64 period, u64 runtime);
 
 extern void init_entity_runnable_average(struct sched_entity *se);
@@ -1926,6 +1984,33 @@ extern void nohz_balance_exit_idle(unsigned int cpu);
 static inline void nohz_balance_exit_idle(unsigned int cpu) { }
 #endif
 
+
+#ifdef CONFIG_SMP
+static inline
+void __dl_update(struct dl_bw *dl_b, s64 bw)
+{
+	struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
+	int i;
+
+	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+			 "sched RCU must be held");
+	for_each_cpu_and(i, rd->span, cpu_active_mask) {
+		struct rq *rq = cpu_rq(i);
+
+		rq->dl.extra_bw += bw;
+	}
+}
+#else
+static inline
+void __dl_update(struct dl_bw *dl_b, s64 bw)
+{
+	struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
+
+	dl->extra_bw += bw;
+}
+#endif
+
+
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 struct irqtime {
 	u64			total;
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 1b0b4fb12837..79895aec281e 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -10,6 +10,7 @@ DEFINE_MUTEX(sched_domains_mutex);
 
 /* Protected by sched_domains_mutex: */
 cpumask_var_t sched_domains_tmpmask;
+cpumask_var_t sched_domains_tmpmask2;
 
 #ifdef CONFIG_SCHED_DEBUG
 
@@ -35,7 +36,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
 	cpumask_clear(groupmask);
 
-	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
+	printk(KERN_DEBUG "%*s domain-%d: ", level, "", level);
 
 	if (!(sd->flags & SD_LOAD_BALANCE)) {
 		printk("does not load-balance\n");
@@ -45,14 +46,14 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 		return -1;
 	}
 
-	printk(KERN_CONT "span %*pbl level %s\n",
+	printk(KERN_CONT "span=%*pbl level=%s\n",
 	       cpumask_pr_args(sched_domain_span(sd)), sd->name);
 
 	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 		printk(KERN_ERR "ERROR: domain->span does not contain "
 				"CPU%d\n", cpu);
 	}
-	if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
+	if (!cpumask_test_cpu(cpu, sched_group_span(group))) {
 		printk(KERN_ERR "ERROR: domain->groups does not contain"
 				" CPU%d\n", cpu);
 	}
@@ -65,29 +66,47 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 			break;
 		}
 
-		if (!cpumask_weight(sched_group_cpus(group))) {
+		if (!cpumask_weight(sched_group_span(group))) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: empty group\n");
 			break;
 		}
 
 		if (!(sd->flags & SD_OVERLAP) &&
-		    cpumask_intersects(groupmask, sched_group_cpus(group))) {
+		    cpumask_intersects(groupmask, sched_group_span(group))) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: repeated CPUs\n");
 			break;
 		}
 
-		cpumask_or(groupmask, groupmask, sched_group_cpus(group));
+		cpumask_or(groupmask, groupmask, sched_group_span(group));
 
-		printk(KERN_CONT " %*pbl",
-		       cpumask_pr_args(sched_group_cpus(group)));
-		if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
-			printk(KERN_CONT " (cpu_capacity = %lu)",
-				group->sgc->capacity);
+		printk(KERN_CONT " %d:{ span=%*pbl",
+				group->sgc->id,
+				cpumask_pr_args(sched_group_span(group)));
+
+		if ((sd->flags & SD_OVERLAP) &&
+		    !cpumask_equal(group_balance_mask(group), sched_group_span(group))) {
+			printk(KERN_CONT " mask=%*pbl",
+				cpumask_pr_args(group_balance_mask(group)));
+		}
+
+		if (group->sgc->capacity != SCHED_CAPACITY_SCALE)
+			printk(KERN_CONT " cap=%lu", group->sgc->capacity);
+
+		if (group == sd->groups && sd->child &&
+		    !cpumask_equal(sched_domain_span(sd->child),
+				   sched_group_span(group))) {
+			printk(KERN_ERR "ERROR: domain->groups does not match domain->child\n");
 		}
 
+		printk(KERN_CONT " }");
+
 		group = group->next;
+
+		if (group != sd->groups)
+			printk(KERN_CONT ",");
+
 	} while (group != sd->groups);
 	printk(KERN_CONT "\n");
 
@@ -113,7 +132,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 		return;
 	}
 
-	printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
+	printk(KERN_DEBUG "CPU%d attaching sched-domain(s):\n", cpu);
 
 	for (;;) {
 		if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
@@ -477,46 +496,214 @@ enum s_alloc {
 };
 
 /*
- * Build an iteration mask that can exclude certain CPUs from the upwards
- * domain traversal.
+ * Return the canonical balance CPU for this group, this is the first CPU
+ * of this group that's also in the balance mask.
  *
- * Asymmetric node setups can result in situations where the domain tree is of
- * unequal depth, make sure to skip domains that already cover the entire
- * range.
+ * The balance mask are all those CPUs that could actually end up at this
+ * group. See build_balance_mask().
  *
- * In that case build_sched_domains() will have terminated the iteration early
- * and our sibling sd spans will be empty. Domains should always include the
- * CPU they're built on, so check that.
+ * Also see should_we_balance().
  */
-static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
+int group_balance_cpu(struct sched_group *sg)
 {
-	const struct cpumask *span = sched_domain_span(sd);
+	return cpumask_first(group_balance_mask(sg));
+}
+
+
+/*
+ * NUMA topology (first read the regular topology blurb below)
+ *
+ * Given a node-distance table, for example:
+ *
+ *   node   0   1   2   3
+ *     0:  10  20  30  20
+ *     1:  20  10  20  30
+ *     2:  30  20  10  20
+ *     3:  20  30  20  10
+ *
+ * which represents a 4 node ring topology like:
+ *
+ *   0 ----- 1
+ *   |       |
+ *   |       |
+ *   |       |
+ *   3 ----- 2
+ *
+ * We want to construct domains and groups to represent this. The way we go
+ * about doing this is to build the domains on 'hops'. For each NUMA level we
+ * construct the mask of all nodes reachable in @level hops.
+ *
+ * For the above NUMA topology that gives 3 levels:
+ *
+ * NUMA-2	0-3		0-3		0-3		0-3
+ *  groups:	{0-1,3},{1-3}	{0-2},{0,2-3}	{1-3},{0-1,3}	{0,2-3},{0-2}
+ *
+ * NUMA-1	0-1,3		0-2		1-3		0,2-3
+ *  groups:	{0},{1},{3}	{0},{1},{2}	{1},{2},{3}	{0},{2},{3}
+ *
+ * NUMA-0	0		1		2		3
+ *
+ *
+ * As can be seen; things don't nicely line up as with the regular topology.
+ * When we iterate a domain in child domain chunks some nodes can be
+ * represented multiple times -- hence the "overlap" naming for this part of
+ * the topology.
+ *
+ * In order to minimize this overlap, we only build enough groups to cover the
+ * domain. For instance Node-0 NUMA-2 would only get groups: 0-1,3 and 1-3.
+ *
+ * Because:
+ *
+ *  - the first group of each domain is its child domain; this
+ *    gets us the first 0-1,3
+ *  - the only uncovered node is 2, who's child domain is 1-3.
+ *
+ * However, because of the overlap, computing a unique CPU for each group is
+ * more complicated. Consider for instance the groups of NODE-1 NUMA-2, both
+ * groups include the CPUs of Node-0, while those CPUs would not in fact ever
+ * end up at those groups (they would end up in group: 0-1,3).
+ *
+ * To correct this we have to introduce the group balance mask. This mask
+ * will contain those CPUs in the group that can reach this group given the
+ * (child) domain tree.
+ *
+ * With this we can once again compute balance_cpu and sched_group_capacity
+ * relations.
+ *
+ * XXX include words on how balance_cpu is unique and therefore can be
+ * used for sched_group_capacity links.
+ *
+ *
+ * Another 'interesting' topology is:
+ *
+ *   node   0   1   2   3
+ *     0:  10  20  20  30
+ *     1:  20  10  20  20
+ *     2:  20  20  10  20
+ *     3:  30  20  20  10
+ *
+ * Which looks a little like:
+ *
+ *   0 ----- 1
+ *   |     / |
+ *   |   /   |
+ *   | /     |
+ *   2 ----- 3
+ *
+ * This topology is asymmetric, nodes 1,2 are fully connected, but nodes 0,3
+ * are not.
+ *
+ * This leads to a few particularly weird cases where the sched_domain's are
+ * not of the same number for each cpu. Consider:
+ *
+ * NUMA-2	0-3						0-3
+ *  groups:	{0-2},{1-3}					{1-3},{0-2}
+ *
+ * NUMA-1	0-2		0-3		0-3		1-3
+ *
+ * NUMA-0	0		1		2		3
+ *
+ */
+
+
+/*
+ * Build the balance mask; it contains only those CPUs that can arrive at this
+ * group and should be considered to continue balancing.
+ *
+ * We do this during the group creation pass, therefore the group information
+ * isn't complete yet, however since each group represents a (child) domain we
+ * can fully construct this using the sched_domain bits (which are already
+ * complete).
+ */
+static void
+build_balance_mask(struct sched_domain *sd, struct sched_group *sg, struct cpumask *mask)
+{
+	const struct cpumask *sg_span = sched_group_span(sg);
 	struct sd_data *sdd = sd->private;
 	struct sched_domain *sibling;
 	int i;
 
-	for_each_cpu(i, span) {
+	cpumask_clear(mask);
+
+	for_each_cpu(i, sg_span) {
 		sibling = *per_cpu_ptr(sdd->sd, i);
-		if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
+
+		/*
+		 * Can happen in the asymmetric case, where these siblings are
+		 * unused. The mask will not be empty because those CPUs that
+		 * do have the top domain _should_ span the domain.
+		 */
+		if (!sibling->child)
 			continue;
 
-		cpumask_set_cpu(i, sched_group_mask(sg));
+		/* If we would not end up here, we can't continue from here */
+		if (!cpumask_equal(sg_span, sched_domain_span(sibling->child)))
+			continue;
+
+		cpumask_set_cpu(i, mask);
 	}
+
+	/* We must not have empty masks here */
+	WARN_ON_ONCE(cpumask_empty(mask));
 }
 
 /*
- * Return the canonical balance CPU for this group, this is the first CPU
- * of this group that's also in the iteration mask.
+ * XXX: This creates per-node group entries; since the load-balancer will
+ * immediately access remote memory to construct this group's load-balance
+ * statistics having the groups node local is of dubious benefit.
  */
-int group_balance_cpu(struct sched_group *sg)
+static struct sched_group *
+build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)
 {
-	return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg));
+	struct sched_group *sg;
+	struct cpumask *sg_span;
+
+	sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
+			GFP_KERNEL, cpu_to_node(cpu));
+
+	if (!sg)
+		return NULL;
+
+	sg_span = sched_group_span(sg);
+	if (sd->child)
+		cpumask_copy(sg_span, sched_domain_span(sd->child));
+	else
+		cpumask_copy(sg_span, sched_domain_span(sd));
+
+	return sg;
+}
+
+static void init_overlap_sched_group(struct sched_domain *sd,
+				     struct sched_group *sg)
+{
+	struct cpumask *mask = sched_domains_tmpmask2;
+	struct sd_data *sdd = sd->private;
+	struct cpumask *sg_span;
+	int cpu;
+
+	build_balance_mask(sd, sg, mask);
+	cpu = cpumask_first_and(sched_group_span(sg), mask);
+
+	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
+	if (atomic_inc_return(&sg->sgc->ref) == 1)
+		cpumask_copy(group_balance_mask(sg), mask);
+	else
+		WARN_ON_ONCE(!cpumask_equal(group_balance_mask(sg), mask));
+
+	/*
+	 * Initialize sgc->capacity such that even if we mess up the
+	 * domains and no possible iteration will get us here, we won't
+	 * die on a /0 trap.
+	 */
+	sg_span = sched_group_span(sg);
+	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
+	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
 }
 
 static int
 build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 {
-	struct sched_group *first = NULL, *last = NULL, *groups = NULL, *sg;
+	struct sched_group *first = NULL, *last = NULL, *sg;
 	const struct cpumask *span = sched_domain_span(sd);
 	struct cpumask *covered = sched_domains_tmpmask;
 	struct sd_data *sdd = sd->private;
@@ -525,7 +712,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 
 	cpumask_clear(covered);
 
-	for_each_cpu(i, span) {
+	for_each_cpu_wrap(i, span, cpu) {
 		struct cpumask *sg_span;
 
 		if (cpumask_test_cpu(i, covered))
@@ -533,44 +720,27 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 
 		sibling = *per_cpu_ptr(sdd->sd, i);
 
-		/* See the comment near build_group_mask(). */
+		/*
+		 * Asymmetric node setups can result in situations where the
+		 * domain tree is of unequal depth, make sure to skip domains
+		 * that already cover the entire range.
+		 *
+		 * In that case build_sched_domains() will have terminated the
+		 * iteration early and our sibling sd spans will be empty.
+		 * Domains should always include the CPU they're built on, so
+		 * check that.
+		 */
 		if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
 			continue;
 
-		sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
-				GFP_KERNEL, cpu_to_node(cpu));
-
+		sg = build_group_from_child_sched_domain(sibling, cpu);
 		if (!sg)
 			goto fail;
 
-		sg_span = sched_group_cpus(sg);
-		if (sibling->child)
-			cpumask_copy(sg_span, sched_domain_span(sibling->child));
-		else
-			cpumask_set_cpu(i, sg_span);
-
+		sg_span = sched_group_span(sg);
 		cpumask_or(covered, covered, sg_span);
 
-		sg->sgc = *per_cpu_ptr(sdd->sgc, i);
-		if (atomic_inc_return(&sg->sgc->ref) == 1)
-			build_group_mask(sd, sg);
-
-		/*
-		 * Initialize sgc->capacity such that even if we mess up the
-		 * domains and no possible iteration will get us here, we won't
-		 * die on a /0 trap.
-		 */
-		sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
-		sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
-
-		/*
-		 * Make sure the first group of this domain contains the
-		 * canonical balance CPU. Otherwise the sched_domain iteration
-		 * breaks. See update_sg_lb_stats().
-		 */
-		if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
-		    group_balance_cpu(sg) == cpu)
-			groups = sg;
+		init_overlap_sched_group(sd, sg);
 
 		if (!first)
 			first = sg;
@@ -579,7 +749,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 		last = sg;
 		last->next = first;
 	}
-	sd->groups = groups;
+	sd->groups = first;
 
 	return 0;
 
@@ -589,23 +759,106 @@ fail:
 	return -ENOMEM;
 }
 
-static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
+
+/*
+ * Package topology (also see the load-balance blurb in fair.c)
+ *
+ * The scheduler builds a tree structure to represent a number of important
+ * topology features. By default (default_topology[]) these include:
+ *
+ *  - Simultaneous multithreading (SMT)
+ *  - Multi-Core Cache (MC)
+ *  - Package (DIE)
+ *
+ * Where the last one more or less denotes everything up to a NUMA node.
+ *
+ * The tree consists of 3 primary data structures:
+ *
+ *	sched_domain -> sched_group -> sched_group_capacity
+ *	    ^ ^             ^ ^
+ *          `-'             `-'
+ *
+ * The sched_domains are per-cpu and have a two way link (parent & child) and
+ * denote the ever growing mask of CPUs belonging to that level of topology.
+ *
+ * Each sched_domain has a circular (double) linked list of sched_group's, each
+ * denoting the domains of the level below (or individual CPUs in case of the
+ * first domain level). The sched_group linked by a sched_domain includes the
+ * CPU of that sched_domain [*].
+ *
+ * Take for instance a 2 threaded, 2 core, 2 cache cluster part:
+ *
+ * CPU   0   1   2   3   4   5   6   7
+ *
+ * DIE  [                             ]
+ * MC   [             ] [             ]
+ * SMT  [     ] [     ] [     ] [     ]
+ *
+ *  - or -
+ *
+ * DIE  0-7 0-7 0-7 0-7 0-7 0-7 0-7 0-7
+ * MC	0-3 0-3 0-3 0-3 4-7 4-7 4-7 4-7
+ * SMT  0-1 0-1 2-3 2-3 4-5 4-5 6-7 6-7
+ *
+ * CPU   0   1   2   3   4   5   6   7
+ *
+ * One way to think about it is: sched_domain moves you up and down among these
+ * topology levels, while sched_group moves you sideways through it, at child
+ * domain granularity.
+ *
+ * sched_group_capacity ensures each unique sched_group has shared storage.
+ *
+ * There are two related construction problems, both require a CPU that
+ * uniquely identify each group (for a given domain):
+ *
+ *  - The first is the balance_cpu (see should_we_balance() and the
+ *    load-balance blub in fair.c); for each group we only want 1 CPU to
+ *    continue balancing at a higher domain.
+ *
+ *  - The second is the sched_group_capacity; we want all identical groups
+ *    to share a single sched_group_capacity.
+ *
+ * Since these topologies are exclusive by construction. That is, its
+ * impossible for an SMT thread to belong to multiple cores, and cores to
+ * be part of multiple caches. There is a very clear and unique location
+ * for each CPU in the hierarchy.
+ *
+ * Therefore computing a unique CPU for each group is trivial (the iteration
+ * mask is redundant and set all 1s; all CPUs in a group will end up at _that_
+ * group), we can simply pick the first CPU in each group.
+ *
+ *
+ * [*] in other words, the first group of each domain is its child domain.
+ */
+
+static struct sched_group *get_group(int cpu, struct sd_data *sdd)
 {
 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
 	struct sched_domain *child = sd->child;
+	struct sched_group *sg;
 
 	if (child)
 		cpu = cpumask_first(sched_domain_span(child));
 
-	if (sg) {
-		*sg = *per_cpu_ptr(sdd->sg, cpu);
-		(*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu);
+	sg = *per_cpu_ptr(sdd->sg, cpu);
+	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
+
+	/* For claim_allocations: */
+	atomic_inc(&sg->ref);
+	atomic_inc(&sg->sgc->ref);
 
-		/* For claim_allocations: */
-		atomic_set(&(*sg)->sgc->ref, 1);
+	if (child) {
+		cpumask_copy(sched_group_span(sg), sched_domain_span(child));
+		cpumask_copy(group_balance_mask(sg), sched_group_span(sg));
+	} else {
+		cpumask_set_cpu(cpu, sched_group_span(sg));
+		cpumask_set_cpu(cpu, group_balance_mask(sg));
 	}
 
-	return cpu;
+	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
+	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+
+	return sg;
 }
 
 /*
@@ -624,34 +877,20 @@ build_sched_groups(struct sched_domain *sd, int cpu)
 	struct cpumask *covered;
 	int i;
 
-	get_group(cpu, sdd, &sd->groups);
-	atomic_inc(&sd->groups->ref);
-
-	if (cpu != cpumask_first(span))
-		return 0;
-
 	lockdep_assert_held(&sched_domains_mutex);
 	covered = sched_domains_tmpmask;
 
 	cpumask_clear(covered);
 
-	for_each_cpu(i, span) {
+	for_each_cpu_wrap(i, span, cpu) {
 		struct sched_group *sg;
-		int group, j;
 
 		if (cpumask_test_cpu(i, covered))
 			continue;
 
-		group = get_group(i, sdd, &sg);
-		cpumask_setall(sched_group_mask(sg));
+		sg = get_group(i, sdd);
 
-		for_each_cpu(j, span) {
-			if (get_group(j, sdd, NULL) != group)
-				continue;
-
-			cpumask_set_cpu(j, covered);
-			cpumask_set_cpu(j, sched_group_cpus(sg));
-		}
+		cpumask_or(covered, covered, sched_group_span(sg));
 
 		if (!first)
 			first = sg;
@@ -660,6 +899,7 @@ build_sched_groups(struct sched_domain *sd, int cpu)
 		last = sg;
 	}
 	last->next = first;
+	sd->groups = first;
 
 	return 0;
 }
@@ -683,12 +923,12 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
 	do {
 		int cpu, max_cpu = -1;
 
-		sg->group_weight = cpumask_weight(sched_group_cpus(sg));
+		sg->group_weight = cpumask_weight(sched_group_span(sg));
 
 		if (!(sd->flags & SD_ASYM_PACKING))
 			goto next;
 
-		for_each_cpu(cpu, sched_group_cpus(sg)) {
+		for_each_cpu(cpu, sched_group_span(sg)) {
 			if (max_cpu < 0)
 				max_cpu = cpu;
 			else if (sched_asym_prefer(cpu, max_cpu))
@@ -1308,6 +1548,10 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 			if (!sgc)
 				return -ENOMEM;
 
+#ifdef CONFIG_SCHED_DEBUG
+			sgc->id = j;
+#endif
+
 			*per_cpu_ptr(sdd->sgc, j) = sgc;
 		}
 	}
@@ -1407,7 +1651,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 			sd = build_sched_domain(tl, cpu_map, attr, sd, i);
 			if (tl == sched_domain_topology)
 				*per_cpu_ptr(d.sd, i) = sd;
-			if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP))
+			if (tl->flags & SDTL_OVERLAP)
 				sd->flags |= SD_OVERLAP;
 			if (cpumask_equal(cpu_map, sched_domain_span(sd)))
 				break;
@@ -1478,7 +1722,7 @@ static struct sched_domain_attr		*dattr_cur;
  * cpumask) fails, then fallback to a single sched domain,
  * as determined by the single cpumask fallback_doms.
  */
-cpumask_var_t				fallback_doms;
+static cpumask_var_t			fallback_doms;
 
 /*
  * arch_update_cpu_topology lets virtualized architectures update the
@@ -1520,10 +1764,14 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
  * For now this just excludes isolated CPUs, but could be used to
  * exclude other special cases in the future.
  */
-int init_sched_domains(const struct cpumask *cpu_map)
+int sched_init_domains(const struct cpumask *cpu_map)
 {
 	int err;
 
+	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_KERNEL);
+	zalloc_cpumask_var(&sched_domains_tmpmask2, GFP_KERNEL);
+	zalloc_cpumask_var(&fallback_doms, GFP_KERNEL);
+
 	arch_update_cpu_topology();
 	ndoms_cur = 1;
 	doms_cur = alloc_sched_domains(ndoms_cur);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index b8c84c6dee64..17f11c6b0a9f 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -12,44 +12,44 @@
 #include <linux/hash.h>
 #include <linux/kthread.h>
 
-void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
+void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key)
 {
-	spin_lock_init(&q->lock);
-	lockdep_set_class_and_name(&q->lock, key, name);
-	INIT_LIST_HEAD(&q->task_list);
+	spin_lock_init(&wq_head->lock);
+	lockdep_set_class_and_name(&wq_head->lock, key, name);
+	INIT_LIST_HEAD(&wq_head->head);
 }
 
 EXPORT_SYMBOL(__init_waitqueue_head);
 
-void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
-	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	__add_wait_queue(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
+	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__add_wait_queue_entry_tail(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(add_wait_queue);
 
-void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	__add_wait_queue_tail(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__add_wait_queue_entry_tail(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(add_wait_queue_exclusive);
 
-void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&q->lock, flags);
-	__remove_wait_queue(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__remove_wait_queue(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(remove_wait_queue);
 
@@ -63,12 +63,12 @@ EXPORT_SYMBOL(remove_wait_queue);
  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
  * zero in this (rare) case, and we handle it by continuing to scan the queue.
  */
-static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, int wake_flags, void *key)
 {
-	wait_queue_t *curr, *next;
+	wait_queue_entry_t *curr, *next;
 
-	list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
+	list_for_each_entry_safe(curr, next, &wq_head->head, entry) {
 		unsigned flags = curr->flags;
 
 		if (curr->func(curr, mode, wake_flags, key) &&
@@ -79,7 +79,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 
 /**
  * __wake_up - wake up threads blocked on a waitqueue.
- * @q: the waitqueue
+ * @wq_head: the waitqueue
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
  * @key: is directly passed to the wakeup function
@@ -87,35 +87,35 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
  * It may be assumed that this function implies a write memory barrier before
  * changing the task state if and only if any tasks are woken up.
  */
-void __wake_up(wait_queue_head_t *q, unsigned int mode,
+void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, void *key)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&q->lock, flags);
-	__wake_up_common(q, mode, nr_exclusive, 0, key);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__wake_up_common(wq_head, mode, nr_exclusive, 0, key);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(__wake_up);
 
 /*
  * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
  */
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
+void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr)
 {
-	__wake_up_common(q, mode, nr, 0, NULL);
+	__wake_up_common(wq_head, mode, nr, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked);
 
-void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
+void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
 {
-	__wake_up_common(q, mode, 1, 0, key);
+	__wake_up_common(wq_head, mode, 1, 0, key);
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked_key);
 
 /**
  * __wake_up_sync_key - wake up threads blocked on a waitqueue.
- * @q: the waitqueue
+ * @wq_head: the waitqueue
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
  * @key: opaque value to be passed to wakeup targets
@@ -130,30 +130,30 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key);
  * It may be assumed that this function implies a write memory barrier before
  * changing the task state if and only if any tasks are woken up.
  */
-void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
+void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, void *key)
 {
 	unsigned long flags;
 	int wake_flags = 1; /* XXX WF_SYNC */
 
-	if (unlikely(!q))
+	if (unlikely(!wq_head))
 		return;
 
 	if (unlikely(nr_exclusive != 1))
 		wake_flags = 0;
 
-	spin_lock_irqsave(&q->lock, flags);
-	__wake_up_common(q, mode, nr_exclusive, wake_flags, key);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync_key);
 
 /*
  * __wake_up_sync - see __wake_up_sync_key()
  */
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive)
 {
-	__wake_up_sync_key(q, mode, nr_exclusive, NULL);
+	__wake_up_sync_key(wq_head, mode, nr_exclusive, NULL);
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
 
@@ -170,48 +170,48 @@ EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
  * loads to move into the critical region).
  */
 void
-prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
+prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 
-	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	if (list_empty(&wait->task_list))
-		__add_wait_queue(q, wait);
+	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	if (list_empty(&wq_entry->entry))
+		__add_wait_queue(wq_head, wq_entry);
 	set_current_state(state);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait);
 
 void
-prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	if (list_empty(&wait->task_list))
-		__add_wait_queue_tail(q, wait);
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	if (list_empty(&wq_entry->entry))
+		__add_wait_queue_entry_tail(wq_head, wq_entry);
 	set_current_state(state);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 
-void init_wait_entry(wait_queue_t *wait, int flags)
+void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
 {
-	wait->flags = flags;
-	wait->private = current;
-	wait->func = autoremove_wake_function;
-	INIT_LIST_HEAD(&wait->task_list);
+	wq_entry->flags = flags;
+	wq_entry->private = current;
+	wq_entry->func = autoremove_wake_function;
+	INIT_LIST_HEAD(&wq_entry->entry);
 }
 EXPORT_SYMBOL(init_wait_entry);
 
-long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
+long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 	long ret = 0;
 
-	spin_lock_irqsave(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
 	if (unlikely(signal_pending_state(state, current))) {
 		/*
 		 * Exclusive waiter must not fail if it was selected by wakeup,
@@ -219,24 +219,24 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
 		 *
 		 * The caller will recheck the condition and return success if
 		 * we were already woken up, we can not miss the event because
-		 * wakeup locks/unlocks the same q->lock.
+		 * wakeup locks/unlocks the same wq_head->lock.
 		 *
 		 * But we need to ensure that set-condition + wakeup after that
 		 * can't see us, it should wake up another exclusive waiter if
 		 * we fail.
 		 */
-		list_del_init(&wait->task_list);
+		list_del_init(&wq_entry->entry);
 		ret = -ERESTARTSYS;
 	} else {
-		if (list_empty(&wait->task_list)) {
-			if (wait->flags & WQ_FLAG_EXCLUSIVE)
-				__add_wait_queue_tail(q, wait);
+		if (list_empty(&wq_entry->entry)) {
+			if (wq_entry->flags & WQ_FLAG_EXCLUSIVE)
+				__add_wait_queue_entry_tail(wq_head, wq_entry);
 			else
-				__add_wait_queue(q, wait);
+				__add_wait_queue(wq_head, wq_entry);
 		}
 		set_current_state(state);
 	}
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 
 	return ret;
 }
@@ -249,10 +249,10 @@ EXPORT_SYMBOL(prepare_to_wait_event);
  * condition in the caller before they add the wait
  * entry to the wake queue.
  */
-int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait)
+int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 {
-	if (likely(list_empty(&wait->task_list)))
-		__add_wait_queue_tail(wq, wait);
+	if (likely(list_empty(&wait->entry)))
+		__add_wait_queue_entry_tail(wq, wait);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	if (signal_pending(current))
@@ -265,10 +265,10 @@ int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait)
 }
 EXPORT_SYMBOL(do_wait_intr);
 
-int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_t *wait)
+int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 {
-	if (likely(list_empty(&wait->task_list)))
-		__add_wait_queue_tail(wq, wait);
+	if (likely(list_empty(&wait->entry)))
+		__add_wait_queue_entry_tail(wq, wait);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	if (signal_pending(current))
@@ -283,14 +283,14 @@ EXPORT_SYMBOL(do_wait_intr_irq);
 
 /**
  * finish_wait - clean up after waiting in a queue
- * @q: waitqueue waited on
- * @wait: wait descriptor
+ * @wq_head: waitqueue waited on
+ * @wq_entry: wait descriptor
  *
  * Sets current thread back to running state and removes
  * the wait descriptor from the given waitqueue if still
  * queued.
  */
-void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
@@ -308,20 +308,20 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
 	 *    have _one_ other CPU that looks at or modifies
 	 *    the list).
 	 */
-	if (!list_empty_careful(&wait->task_list)) {
-		spin_lock_irqsave(&q->lock, flags);
-		list_del_init(&wait->task_list);
-		spin_unlock_irqrestore(&q->lock, flags);
+	if (!list_empty_careful(&wq_entry->entry)) {
+		spin_lock_irqsave(&wq_head->lock, flags);
+		list_del_init(&wq_entry->entry);
+		spin_unlock_irqrestore(&wq_head->lock, flags);
 	}
 }
 EXPORT_SYMBOL(finish_wait);
 
-int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
 {
-	int ret = default_wake_function(wait, mode, sync, key);
+	int ret = default_wake_function(wq_entry, mode, sync, key);
 
 	if (ret)
-		list_del_init(&wait->task_list);
+		list_del_init(&wq_entry->entry);
 	return ret;
 }
 EXPORT_SYMBOL(autoremove_wake_function);
@@ -334,24 +334,24 @@ static inline bool is_kthread_should_stop(void)
 /*
  * DEFINE_WAIT_FUNC(wait, woken_wake_func);
  *
- * add_wait_queue(&wq, &wait);
+ * add_wait_queue(&wq_head, &wait);
  * for (;;) {
  *     if (condition)
  *         break;
  *
  *     p->state = mode;				condition = true;
  *     smp_mb(); // A				smp_wmb(); // C
- *     if (!wait->flags & WQ_FLAG_WOKEN)	wait->flags |= WQ_FLAG_WOKEN;
+ *     if (!wq_entry->flags & WQ_FLAG_WOKEN)	wq_entry->flags |= WQ_FLAG_WOKEN;
  *         schedule()				try_to_wake_up();
  *     p->state = TASK_RUNNING;		    ~~~~~~~~~~~~~~~~~~
- *     wait->flags &= ~WQ_FLAG_WOKEN;		condition = true;
+ *     wq_entry->flags &= ~WQ_FLAG_WOKEN;		condition = true;
  *     smp_mb() // B				smp_wmb(); // C
- *						wait->flags |= WQ_FLAG_WOKEN;
+ *						wq_entry->flags |= WQ_FLAG_WOKEN;
  * }
- * remove_wait_queue(&wq, &wait);
+ * remove_wait_queue(&wq_head, &wait);
  *
  */
-long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
+long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout)
 {
 	set_current_state(mode); /* A */
 	/*
@@ -359,7 +359,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
 	 * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must
 	 * also observe all state before the wakeup.
 	 */
-	if (!(wait->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
+	if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
 		timeout = schedule_timeout(timeout);
 	__set_current_state(TASK_RUNNING);
 
@@ -369,13 +369,13 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
 	 * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
 	 * an event.
 	 */
-	smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
+	smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */
 
 	return timeout;
 }
 EXPORT_SYMBOL(wait_woken);
 
-int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
 {
 	/*
 	 * Although this function is called under waitqueue lock, LOCK
@@ -385,267 +385,8 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
 	 * and is paired with smp_store_mb() in wait_woken().
 	 */
 	smp_wmb(); /* C */
-	wait->flags |= WQ_FLAG_WOKEN;
+	wq_entry->flags |= WQ_FLAG_WOKEN;
 
-	return default_wake_function(wait, mode, sync, key);
+	return default_wake_function(wq_entry, mode, sync, key);
 }
 EXPORT_SYMBOL(woken_wake_function);
-
-int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
-{
-	struct wait_bit_key *key = arg;
-	struct wait_bit_queue *wait_bit
-		= container_of(wait, struct wait_bit_queue, wait);
-
-	if (wait_bit->key.flags != key->flags ||
-			wait_bit->key.bit_nr != key->bit_nr ||
-			test_bit(key->bit_nr, key->flags))
-		return 0;
-	else
-		return autoremove_wake_function(wait, mode, sync, key);
-}
-EXPORT_SYMBOL(wake_bit_function);
-
-/*
- * To allow interruptible waiting and asynchronous (i.e. nonblocking)
- * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
- * permitted return codes. Nonzero return codes halt waiting and return.
- */
-int __sched
-__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
-	      wait_bit_action_f *action, unsigned mode)
-{
-	int ret = 0;
-
-	do {
-		prepare_to_wait(wq, &q->wait, mode);
-		if (test_bit(q->key.bit_nr, q->key.flags))
-			ret = (*action)(&q->key, mode);
-	} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
-	finish_wait(wq, &q->wait);
-	return ret;
-}
-EXPORT_SYMBOL(__wait_on_bit);
-
-int __sched out_of_line_wait_on_bit(void *word, int bit,
-				    wait_bit_action_f *action, unsigned mode)
-{
-	wait_queue_head_t *wq = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wait, word, bit);
-
-	return __wait_on_bit(wq, &wait, action, mode);
-}
-EXPORT_SYMBOL(out_of_line_wait_on_bit);
-
-int __sched out_of_line_wait_on_bit_timeout(
-	void *word, int bit, wait_bit_action_f *action,
-	unsigned mode, unsigned long timeout)
-{
-	wait_queue_head_t *wq = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wait, word, bit);
-
-	wait.key.timeout = jiffies + timeout;
-	return __wait_on_bit(wq, &wait, action, mode);
-}
-EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
-
-int __sched
-__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
-			wait_bit_action_f *action, unsigned mode)
-{
-	int ret = 0;
-
-	for (;;) {
-		prepare_to_wait_exclusive(wq, &q->wait, mode);
-		if (test_bit(q->key.bit_nr, q->key.flags)) {
-			ret = action(&q->key, mode);
-			/*
-			 * See the comment in prepare_to_wait_event().
-			 * finish_wait() does not necessarily takes wq->lock,
-			 * but test_and_set_bit() implies mb() which pairs with
-			 * smp_mb__after_atomic() before wake_up_page().
-			 */
-			if (ret)
-				finish_wait(wq, &q->wait);
-		}
-		if (!test_and_set_bit(q->key.bit_nr, q->key.flags)) {
-			if (!ret)
-				finish_wait(wq, &q->wait);
-			return 0;
-		} else if (ret) {
-			return ret;
-		}
-	}
-}
-EXPORT_SYMBOL(__wait_on_bit_lock);
-
-int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
-					 wait_bit_action_f *action, unsigned mode)
-{
-	wait_queue_head_t *wq = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wait, word, bit);
-
-	return __wait_on_bit_lock(wq, &wait, action, mode);
-}
-EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
-
-void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
-{
-	struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
-	if (waitqueue_active(wq))
-		__wake_up(wq, TASK_NORMAL, 1, &key);
-}
-EXPORT_SYMBOL(__wake_up_bit);
-
-/**
- * wake_up_bit - wake up a waiter on a bit
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that wakes up waiters
- * on a bit. For instance, if one were to have waiters on a bitflag,
- * one would call wake_up_bit() after clearing the bit.
- *
- * In order for this to function properly, as it uses waitqueue_active()
- * internally, some kind of memory barrier must be done prior to calling
- * this. Typically, this will be smp_mb__after_atomic(), but in some
- * cases where bitflags are manipulated non-atomically under a lock, one
- * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
- * because spin_unlock() does not guarantee a memory barrier.
- */
-void wake_up_bit(void *word, int bit)
-{
-	__wake_up_bit(bit_waitqueue(word, bit), word, bit);
-}
-EXPORT_SYMBOL(wake_up_bit);
-
-/*
- * Manipulate the atomic_t address to produce a better bit waitqueue table hash
- * index (we're keying off bit -1, but that would produce a horrible hash
- * value).
- */
-static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
-{
-	if (BITS_PER_LONG == 64) {
-		unsigned long q = (unsigned long)p;
-		return bit_waitqueue((void *)(q & ~1), q & 1);
-	}
-	return bit_waitqueue(p, 0);
-}
-
-static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync,
-				  void *arg)
-{
-	struct wait_bit_key *key = arg;
-	struct wait_bit_queue *wait_bit
-		= container_of(wait, struct wait_bit_queue, wait);
-	atomic_t *val = key->flags;
-
-	if (wait_bit->key.flags != key->flags ||
-	    wait_bit->key.bit_nr != key->bit_nr ||
-	    atomic_read(val) != 0)
-		return 0;
-	return autoremove_wake_function(wait, mode, sync, key);
-}
-
-/*
- * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
- * the actions of __wait_on_atomic_t() are permitted return codes.  Nonzero
- * return codes halt waiting and return.
- */
-static __sched
-int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
-		       int (*action)(atomic_t *), unsigned mode)
-{
-	atomic_t *val;
-	int ret = 0;
-
-	do {
-		prepare_to_wait(wq, &q->wait, mode);
-		val = q->key.flags;
-		if (atomic_read(val) == 0)
-			break;
-		ret = (*action)(val);
-	} while (!ret && atomic_read(val) != 0);
-	finish_wait(wq, &q->wait);
-	return ret;
-}
-
-#define DEFINE_WAIT_ATOMIC_T(name, p)					\
-	struct wait_bit_queue name = {					\
-		.key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p),		\
-		.wait	= {						\
-			.private	= current,			\
-			.func		= wake_atomic_t_function,	\
-			.task_list	=				\
-				LIST_HEAD_INIT((name).wait.task_list),	\
-		},							\
-	}
-
-__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
-					 unsigned mode)
-{
-	wait_queue_head_t *wq = atomic_t_waitqueue(p);
-	DEFINE_WAIT_ATOMIC_T(wait, p);
-
-	return __wait_on_atomic_t(wq, &wait, action, mode);
-}
-EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
-
-/**
- * wake_up_atomic_t - Wake up a waiter on a atomic_t
- * @p: The atomic_t being waited on, a kernel virtual address
- *
- * Wake up anyone waiting for the atomic_t to go to zero.
- *
- * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
- * check is done by the waiter's wake function, not the by the waker itself).
- */
-void wake_up_atomic_t(atomic_t *p)
-{
-	__wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
-}
-EXPORT_SYMBOL(wake_up_atomic_t);
-
-__sched int bit_wait(struct wait_bit_key *word, int mode)
-{
-	schedule();
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL(bit_wait);
-
-__sched int bit_wait_io(struct wait_bit_key *word, int mode)
-{
-	io_schedule();
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL(bit_wait_io);
-
-__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
-{
-	unsigned long now = READ_ONCE(jiffies);
-	if (time_after_eq(now, word->timeout))
-		return -EAGAIN;
-	schedule_timeout(word->timeout - now);
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bit_wait_timeout);
-
-__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
-{
-	unsigned long now = READ_ONCE(jiffies);
-	if (time_after_eq(now, word->timeout))
-		return -EAGAIN;
-	io_schedule_timeout(word->timeout - now);
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
new file mode 100644
index 000000000000..f8159698aa4d
--- /dev/null
+++ b/kernel/sched/wait_bit.c
@@ -0,0 +1,286 @@
+/*
+ * The implementation of the wait_bit*() and related waiting APIs:
+ */
+#include <linux/wait_bit.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
+#include <linux/hash.h>
+
+#define WAIT_TABLE_BITS 8
+#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
+
+static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
+
+wait_queue_head_t *bit_waitqueue(void *word, int bit)
+{
+	const int shift = BITS_PER_LONG == 32 ? 5 : 6;
+	unsigned long val = (unsigned long)word << shift | bit;
+
+	return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
+}
+EXPORT_SYMBOL(bit_waitqueue);
+
+int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg)
+{
+	struct wait_bit_key *key = arg;
+	struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry);
+
+	if (wait_bit->key.flags != key->flags ||
+			wait_bit->key.bit_nr != key->bit_nr ||
+			test_bit(key->bit_nr, key->flags))
+		return 0;
+	else
+		return autoremove_wake_function(wq_entry, mode, sync, key);
+}
+EXPORT_SYMBOL(wake_bit_function);
+
+/*
+ * To allow interruptible waiting and asynchronous (i.e. nonblocking)
+ * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
+ * permitted return codes. Nonzero return codes halt waiting and return.
+ */
+int __sched
+__wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
+	      wait_bit_action_f *action, unsigned mode)
+{
+	int ret = 0;
+
+	do {
+		prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
+		if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
+			ret = (*action)(&wbq_entry->key, mode);
+	} while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
+	finish_wait(wq_head, &wbq_entry->wq_entry);
+	return ret;
+}
+EXPORT_SYMBOL(__wait_on_bit);
+
+int __sched out_of_line_wait_on_bit(void *word, int bit,
+				    wait_bit_action_f *action, unsigned mode)
+{
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
+
+	return __wait_on_bit(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_bit);
+
+int __sched out_of_line_wait_on_bit_timeout(
+	void *word, int bit, wait_bit_action_f *action,
+	unsigned mode, unsigned long timeout)
+{
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
+
+	wq_entry.key.timeout = jiffies + timeout;
+	return __wait_on_bit(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
+
+int __sched
+__wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
+			wait_bit_action_f *action, unsigned mode)
+{
+	int ret = 0;
+
+	for (;;) {
+		prepare_to_wait_exclusive(wq_head, &wbq_entry->wq_entry, mode);
+		if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) {
+			ret = action(&wbq_entry->key, mode);
+			/*
+			 * See the comment in prepare_to_wait_event().
+			 * finish_wait() does not necessarily takes wwq_head->lock,
+			 * but test_and_set_bit() implies mb() which pairs with
+			 * smp_mb__after_atomic() before wake_up_page().
+			 */
+			if (ret)
+				finish_wait(wq_head, &wbq_entry->wq_entry);
+		}
+		if (!test_and_set_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) {
+			if (!ret)
+				finish_wait(wq_head, &wbq_entry->wq_entry);
+			return 0;
+		} else if (ret) {
+			return ret;
+		}
+	}
+}
+EXPORT_SYMBOL(__wait_on_bit_lock);
+
+int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
+					 wait_bit_action_f *action, unsigned mode)
+{
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
+
+	return __wait_on_bit_lock(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
+
+void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit)
+{
+	struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
+	if (waitqueue_active(wq_head))
+		__wake_up(wq_head, TASK_NORMAL, 1, &key);
+}
+EXPORT_SYMBOL(__wake_up_bit);
+
+/**
+ * wake_up_bit - wake up a waiter on a bit
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ *
+ * There is a standard hashed waitqueue table for generic use. This
+ * is the part of the hashtable's accessor API that wakes up waiters
+ * on a bit. For instance, if one were to have waiters on a bitflag,
+ * one would call wake_up_bit() after clearing the bit.
+ *
+ * In order for this to function properly, as it uses waitqueue_active()
+ * internally, some kind of memory barrier must be done prior to calling
+ * this. Typically, this will be smp_mb__after_atomic(), but in some
+ * cases where bitflags are manipulated non-atomically under a lock, one
+ * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
+ * because spin_unlock() does not guarantee a memory barrier.
+ */
+void wake_up_bit(void *word, int bit)
+{
+	__wake_up_bit(bit_waitqueue(word, bit), word, bit);
+}
+EXPORT_SYMBOL(wake_up_bit);
+
+/*
+ * Manipulate the atomic_t address to produce a better bit waitqueue table hash
+ * index (we're keying off bit -1, but that would produce a horrible hash
+ * value).
+ */
+static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
+{
+	if (BITS_PER_LONG == 64) {
+		unsigned long q = (unsigned long)p;
+		return bit_waitqueue((void *)(q & ~1), q & 1);
+	}
+	return bit_waitqueue(p, 0);
+}
+
+static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync,
+				  void *arg)
+{
+	struct wait_bit_key *key = arg;
+	struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry);
+	atomic_t *val = key->flags;
+
+	if (wait_bit->key.flags != key->flags ||
+	    wait_bit->key.bit_nr != key->bit_nr ||
+	    atomic_read(val) != 0)
+		return 0;
+	return autoremove_wake_function(wq_entry, mode, sync, key);
+}
+
+/*
+ * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
+ * the actions of __wait_on_atomic_t() are permitted return codes.  Nonzero
+ * return codes halt waiting and return.
+ */
+static __sched
+int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
+		       int (*action)(atomic_t *), unsigned mode)
+{
+	atomic_t *val;
+	int ret = 0;
+
+	do {
+		prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
+		val = wbq_entry->key.flags;
+		if (atomic_read(val) == 0)
+			break;
+		ret = (*action)(val);
+	} while (!ret && atomic_read(val) != 0);
+	finish_wait(wq_head, &wbq_entry->wq_entry);
+	return ret;
+}
+
+#define DEFINE_WAIT_ATOMIC_T(name, p)					\
+	struct wait_bit_queue_entry name = {				\
+		.key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p),		\
+		.wq_entry = {						\
+			.private	= current,			\
+			.func		= wake_atomic_t_function,	\
+			.entry		=				\
+				LIST_HEAD_INIT((name).wq_entry.entry),	\
+		},							\
+	}
+
+__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
+					 unsigned mode)
+{
+	struct wait_queue_head *wq_head = atomic_t_waitqueue(p);
+	DEFINE_WAIT_ATOMIC_T(wq_entry, p);
+
+	return __wait_on_atomic_t(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
+
+/**
+ * wake_up_atomic_t - Wake up a waiter on a atomic_t
+ * @p: The atomic_t being waited on, a kernel virtual address
+ *
+ * Wake up anyone waiting for the atomic_t to go to zero.
+ *
+ * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
+ * check is done by the waiter's wake function, not the by the waker itself).
+ */
+void wake_up_atomic_t(atomic_t *p)
+{
+	__wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
+}
+EXPORT_SYMBOL(wake_up_atomic_t);
+
+__sched int bit_wait(struct wait_bit_key *word, int mode)
+{
+	schedule();
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL(bit_wait);
+
+__sched int bit_wait_io(struct wait_bit_key *word, int mode)
+{
+	io_schedule();
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL(bit_wait_io);
+
+__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
+{
+	unsigned long now = READ_ONCE(jiffies);
+	if (time_after_eq(now, word->timeout))
+		return -EAGAIN;
+	schedule_timeout(word->timeout - now);
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bit_wait_timeout);
+
+__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
+{
+	unsigned long now = READ_ONCE(jiffies);
+	if (time_after_eq(now, word->timeout))
+		return -EAGAIN;
+	io_schedule_timeout(word->timeout - now);
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
+
+void __init wait_bit_init(void)
+{
+	int i;
+
+	for (i = 0; i < WAIT_TABLE_SIZE; i++)
+		init_waitqueue_head(bit_wait_table + i);
+}
diff --git a/kernel/signal.c b/kernel/signal.c
index a8c54f384553..9b28954e058f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -510,7 +510,8 @@ int unhandled_signal(struct task_struct *tsk, int sig)
 	return !tsk->ptrace;
 }
 
-static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+static void collect_signal(int sig, struct sigpending *list, siginfo_t *info,
+			   bool *resched_timer)
 {
 	struct sigqueue *q, *first = NULL;
 
@@ -532,6 +533,12 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 still_pending:
 		list_del_init(&first->list);
 		copy_siginfo(info, &first->info);
+
+		*resched_timer =
+			(first->flags & SIGQUEUE_PREALLOC) &&
+			(info->si_code == SI_TIMER) &&
+			(info->si_sys_private);
+
 		__sigqueue_free(first);
 	} else {
 		/*
@@ -548,12 +555,12 @@ still_pending:
 }
 
 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
-			siginfo_t *info)
+			siginfo_t *info, bool *resched_timer)
 {
 	int sig = next_signal(pending, mask);
 
 	if (sig)
-		collect_signal(sig, pending, info);
+		collect_signal(sig, pending, info, resched_timer);
 	return sig;
 }
 
@@ -565,15 +572,16 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
  */
 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 {
+	bool resched_timer = false;
 	int signr;
 
 	/* We only dequeue private signals from ourselves, we don't let
 	 * signalfd steal them
 	 */
-	signr = __dequeue_signal(&tsk->pending, mask, info);
+	signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer);
 	if (!signr) {
 		signr = __dequeue_signal(&tsk->signal->shared_pending,
-					 mask, info);
+					 mask, info, &resched_timer);
 #ifdef CONFIG_POSIX_TIMERS
 		/*
 		 * itimer signal ?
@@ -621,7 +629,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 		current->jobctl |= JOBCTL_STOP_DEQUEUED;
 	}
 #ifdef CONFIG_POSIX_TIMERS
-	if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
+	if (resched_timer) {
 		/*
 		 * Release the siglock to ensure proper locking order
 		 * of timer locks outside of siglocks.  Note, we leave
@@ -1237,7 +1245,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
 		}
 		/*
 		 * This sighand can be already freed and even reused, but
-		 * we rely on SLAB_DESTROY_BY_RCU and sighand_ctor() which
+		 * we rely on SLAB_TYPESAFE_BY_RCU and sighand_ctor() which
 		 * initializes ->siglock: this slab can't go away, it has
 		 * the same object type, ->siglock can't be reinitialized.
 		 *
@@ -2092,7 +2100,6 @@ static void do_jobctl_trap(void)
 
 static int ptrace_signal(int signr, siginfo_t *info)
 {
-	ptrace_signal_deliver();
 	/*
 	 * We do not check sig_kernel_stop(signr) but set this marker
 	 * unconditionally because we do not know whether debugger will
diff --git a/kernel/smp.c b/kernel/smp.c
index a817769b53c0..3061483cb3ad 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -30,6 +30,7 @@ enum {
 struct call_function_data {
 	struct call_single_data	__percpu *csd;
 	cpumask_var_t		cpumask;
+	cpumask_var_t		cpumask_ipi;
 };
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
@@ -45,9 +46,15 @@ int smpcfd_prepare_cpu(unsigned int cpu)
 	if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
 				     cpu_to_node(cpu)))
 		return -ENOMEM;
+	if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
+				     cpu_to_node(cpu))) {
+		free_cpumask_var(cfd->cpumask);
+		return -ENOMEM;
+	}
 	cfd->csd = alloc_percpu(struct call_single_data);
 	if (!cfd->csd) {
 		free_cpumask_var(cfd->cpumask);
+		free_cpumask_var(cfd->cpumask_ipi);
 		return -ENOMEM;
 	}
 
@@ -59,6 +66,7 @@ int smpcfd_dead_cpu(unsigned int cpu)
 	struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
 
 	free_cpumask_var(cfd->cpumask);
+	free_cpumask_var(cfd->cpumask_ipi);
 	free_percpu(cfd->csd);
 	return 0;
 }
@@ -428,12 +436,13 @@ void smp_call_function_many(const struct cpumask *mask,
 	cfd = this_cpu_ptr(&cfd_data);
 
 	cpumask_and(cfd->cpumask, mask, cpu_online_mask);
-	cpumask_clear_cpu(this_cpu, cfd->cpumask);
+	__cpumask_clear_cpu(this_cpu, cfd->cpumask);
 
 	/* Some callers race with other cpus changing the passed mask */
 	if (unlikely(!cpumask_weight(cfd->cpumask)))
 		return;
 
+	cpumask_clear(cfd->cpumask_ipi);
 	for_each_cpu(cpu, cfd->cpumask) {
 		struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);
 
@@ -442,11 +451,12 @@ void smp_call_function_many(const struct cpumask *mask,
 			csd->flags |= CSD_FLAG_SYNCHRONOUS;
 		csd->func = func;
 		csd->info = info;
-		llist_add(&csd->llist, &per_cpu(call_single_queue, cpu));
+		if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
+			__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
 	}
 
 	/* Send a message to all CPUs in the map */
-	arch_send_call_function_ipi_mask(cfd->cpumask);
+	arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
 
 	if (wait) {
 		for_each_cpu(cpu, cfd->cpumask) {
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index ece4b177052b..939a158eab11 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1119,7 +1119,7 @@ static ssize_t bin_uuid(struct file *file,
 	/* Only supports reads */
 	if (oldval && oldlen) {
 		char buf[UUID_STRING_LEN + 1];
-		uuid_be uuid;
+		uuid_t uuid;
 
 		result = kernel_read(file, 0, buf, sizeof(buf) - 1);
 		if (result < 0)
@@ -1128,7 +1128,7 @@ static ssize_t bin_uuid(struct file *file,
 		buf[result] = '\0';
 
 		result = -EIO;
-		if (uuid_be_to_bin(buf, &uuid))
+		if (uuid_parse(buf, &uuid))
 			goto out;
 
 		if (oldlen > 16)
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 4008d9f95dd7..ac09bc29eb08 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -126,56 +126,6 @@ config NO_HZ_FULL_ALL
 	 Note the boot CPU will still be kept outside the range to
 	 handle the timekeeping duty.
 
-config NO_HZ_FULL_SYSIDLE
-	bool "Detect full-system idle state for full dynticks system"
-	depends on NO_HZ_FULL
-	default n
-	help
-	 At least one CPU must keep the scheduling-clock tick running for
-	 timekeeping purposes whenever there is a non-idle CPU, where
-	 "non-idle" also includes dynticks CPUs as long as they are
-	 running non-idle tasks.  Because the underlying adaptive-tick
-	 support cannot distinguish between all CPUs being idle and
-	 all CPUs each running a single task in dynticks mode, the
-	 underlying support simply ensures that there is always a CPU
-	 handling the scheduling-clock tick, whether or not all CPUs
-	 are idle.  This Kconfig option enables scalable detection of
-	 the all-CPUs-idle state, thus allowing the scheduling-clock
-	 tick to be disabled when all CPUs are idle.  Note that scalable
-	 detection of the all-CPUs-idle state means that larger systems
-	 will be slower to declare the all-CPUs-idle state.
-
-	 Say Y if you would like to help debug all-CPUs-idle detection.
-
-	 Say N if you are unsure.
-
-config NO_HZ_FULL_SYSIDLE_SMALL
-	int "Number of CPUs above which large-system approach is used"
-	depends on NO_HZ_FULL_SYSIDLE
-	range 1 NR_CPUS
-	default 8
-	help
-	 The full-system idle detection mechanism takes a lazy approach
-	 on large systems, as is required to attain decent scalability.
-	 However, on smaller systems, scalability is not anywhere near as
-	 large a concern as is energy efficiency.  The sysidle subsystem
-	 therefore uses a fast but non-scalable algorithm for small
-	 systems and a lazier but scalable algorithm for large systems.
-	 This Kconfig parameter defines the number of CPUs in the largest
-	 system that will be considered to be "small".
-
-	 The default value will be fine in most cases.	Battery-powered
-	 systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger
-	 numbers of CPUs, and (3) are suffering from battery-lifetime
-	 problems due to long sysidle latencies might wish to experiment
-	 with larger values for this Kconfig parameter.  On the other
-	 hand, they might be even better served by disabling NO_HZ_FULL
-	 entirely, given that NO_HZ_FULL is intended for HPC and
-	 real-time workloads that at present do not tend to be run on
-	 battery-powered systems.
-
-	 Take the default if you are unsure.
-
 config NO_HZ
 	bool "Old Idle dynticks config"
 	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 5cb5b0008d97..ee2f4202d82a 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -387,7 +387,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start)
 {
 	struct alarm_base *base = &alarm_bases[alarm->type];
 
-	start = ktime_add(start, base->gettime());
+	start = ktime_add_safe(start, base->gettime());
 	alarm_start(alarm, start);
 }
 EXPORT_SYMBOL_GPL(alarm_start_relative);
@@ -475,7 +475,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
 		overrun++;
 	}
 
-	alarm->node.expires = ktime_add(alarm->node.expires, interval);
+	alarm->node.expires = ktime_add_safe(alarm->node.expires, interval);
 	return overrun;
 }
 EXPORT_SYMBOL_GPL(alarm_forward);
@@ -660,13 +660,21 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 
 	/* start the timer */
 	timr->it.alarm.interval = timespec64_to_ktime(new_setting->it_interval);
+
+	/*
+	 * Rate limit to the tick as a hot fix to prevent DOS. Will be
+	 * mopped up later.
+	 */
+	if (timr->it.alarm.interval < TICK_NSEC)
+		timr->it.alarm.interval = TICK_NSEC;
+
 	exp = timespec64_to_ktime(new_setting->it_value);
 	/* Convert (if necessary) to absolute time */
 	if (flags != TIMER_ABSTIME) {
 		ktime_t now;
 
 		now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
-		exp = ktime_add(now, exp);
+		exp = ktime_add_safe(now, exp);
 	}
 
 	alarm_start(&timr->it.alarm.alarmtimer, exp);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 93621ae718d3..03918a19cf2d 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -233,6 +233,9 @@ static void clocksource_watchdog(unsigned long data)
 			continue;
 		}
 
+		if (cs == curr_clocksource && cs->tick_stable)
+			cs->tick_stable(cs);
+
 		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
 		    (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
 		    (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 1370f067fb51..d2a1e6dd0291 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -825,8 +825,10 @@ static void check_thread_timers(struct task_struct *tsk,
 			 * At the hard limit, we just die.
 			 * No need to calculate anything else now.
 			 */
-			pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
 			return;
 		}
@@ -838,8 +840,10 @@ static void check_thread_timers(struct task_struct *tsk,
 				soft += USEC_PER_SEC;
 				sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
 			}
-			pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
 		}
 	}
@@ -936,8 +940,10 @@ static void check_process_timers(struct task_struct *tsk,
 			 * At the hard limit, we just die.
 			 * No need to calculate anything else now.
 			 */
-			pr_info("RT Watchdog Timeout (hard): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("RT Watchdog Timeout (hard): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
 			return;
 		}
@@ -945,8 +951,10 @@ static void check_process_timers(struct task_struct *tsk,
 			/*
 			 * At the soft limit, send a SIGXCPU every second.
 			 */
-			pr_info("CPU Watchdog Timeout (soft): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("CPU Watchdog Timeout (soft): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
 			if (soft < hard) {
 				soft++;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 987e496bb51a..b398c2ea69b2 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -37,9 +37,11 @@ static int tick_broadcast_forced;
 static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
 
 #ifdef CONFIG_TICK_ONESHOT
+static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 static void tick_broadcast_clear_oneshot(int cpu);
 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 #else
+static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_clear_oneshot(int cpu) { }
 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
 #endif
@@ -867,7 +869,7 @@ static void tick_broadcast_init_next_event(struct cpumask *mask,
 /**
  * tick_broadcast_setup_oneshot - setup the broadcast device
  */
-void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
+static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 {
 	int cpu = smp_processor_id();
 
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index f738251000fe..be0ac01f2e12 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -126,7 +126,6 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 
 /* Functions related to oneshot broadcasting */
 #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
-extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 extern void tick_broadcast_switch_to_oneshot(void);
 extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
 extern int tick_broadcast_oneshot_active(void);
@@ -134,7 +133,6 @@ extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
 extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
 #else /* !(BROADCAST && ONESHOT): */
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
 static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 64c97fc130c4..c7a899c5ce64 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -150,6 +150,12 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 		touch_softlockup_watchdog_sched();
 		if (is_idle_task(current))
 			ts->idle_jiffies++;
+		/*
+		 * In case the current tick fired too early past its expected
+		 * expiration, make sure we don't bypass the next clock reprogramming
+		 * to the same deadline.
+		 */
+		ts->next_tick = 0;
 	}
 #endif
 	update_process_times(user_mode(regs));
@@ -554,7 +560,7 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
 	update_ts_time_stats(smp_processor_id(), ts, now, NULL);
 	ts->idle_active = 0;
 
-	sched_clock_idle_wakeup_event(0);
+	sched_clock_idle_wakeup_event();
 }
 
 static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
@@ -660,6 +666,12 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 		hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
 	else
 		tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
+
+	/*
+	 * Reset to make sure next tick stop doesn't get fooled by past
+	 * cached clock deadline.
+	 */
+	ts->next_tick = 0;
 }
 
 static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
@@ -701,8 +713,6 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	 */
 	delta = next_tick - basemono;
 	if (delta <= (u64)TICK_NSEC) {
-		tick = 0;
-
 		/*
 		 * Tell the timer code that the base is not idle, i.e. undo
 		 * the effect of get_next_timer_interrupt():
@@ -712,23 +722,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		 * We've not stopped the tick yet, and there's a timer in the
 		 * next period, so no point in stopping it either, bail.
 		 */
-		if (!ts->tick_stopped)
-			goto out;
-
-		/*
-		 * If, OTOH, we did stop it, but there's a pending (expired)
-		 * timer reprogram the timer hardware to fire now.
-		 *
-		 * We will not restart the tick proper, just prod the timer
-		 * hardware into firing an interrupt to process the pending
-		 * timers. Just like tick_irq_exit() will not restart the tick
-		 * for 'normal' interrupts.
-		 *
-		 * Only once we exit the idle loop will we re-enable the tick,
-		 * see tick_nohz_idle_exit().
-		 */
-		if (delta == 0) {
-			tick_nohz_restart(ts, now);
+		if (!ts->tick_stopped) {
+			tick = 0;
 			goto out;
 		}
 	}
@@ -771,8 +766,16 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	tick = expires;
 
 	/* Skip reprogram of event if its not changed */
-	if (ts->tick_stopped && (expires == dev->next_event))
-		goto out;
+	if (ts->tick_stopped && (expires == ts->next_tick)) {
+		/* Sanity check: make sure clockevent is actually programmed */
+		if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
+			goto out;
+
+		WARN_ON_ONCE(1);
+		printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
+			    basemono, ts->next_tick, dev->next_event,
+			    hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer));
+	}
 
 	/*
 	 * nohz_stop_sched_tick can be called several times before
@@ -782,8 +785,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	 * the scheduler tick in nohz_restart_sched_tick.
 	 */
 	if (!ts->tick_stopped) {
-		nohz_balance_enter_idle(cpu);
-		calc_load_enter_idle();
+		calc_load_nohz_start();
 		cpu_load_update_nohz_start();
 
 		ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
@@ -791,6 +793,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		trace_tick_stop(1, TICK_DEP_MASK_NONE);
 	}
 
+	ts->next_tick = tick;
+
 	/*
 	 * If the expiration time == KTIME_MAX, then we simply stop
 	 * the tick timer.
@@ -801,12 +805,17 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		goto out;
 	}
 
+	hrtimer_set_expires(&ts->sched_timer, tick);
+
 	if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
-		hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
+		hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
 	else
 		tick_program_event(tick, 1);
 out:
-	/* Update the estimated sleep length */
+	/*
+	 * Update the estimated sleep length until the next timer
+	 * (not only the tick).
+	 */
 	ts->sleep_length = ktime_sub(dev->next_event, now);
 	return tick;
 }
@@ -823,7 +832,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 	 */
 	timer_clear_idle();
 
-	calc_load_exit_idle();
+	calc_load_nohz_stop();
 	touch_softlockup_watchdog_sched();
 	/*
 	 * Cancel the scheduled timer and restore the tick
@@ -864,6 +873,11 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 	if (unlikely(!cpu_online(cpu))) {
 		if (cpu == tick_do_timer_cpu)
 			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+		/*
+		 * Make sure the CPU doesn't get fooled by obsolete tick
+		 * deadline if it comes back online later.
+		 */
+		ts->next_tick = 0;
 		return false;
 	}
 
@@ -923,8 +937,10 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts)
 			ts->idle_expires = expires;
 		}
 
-		if (!was_stopped && ts->tick_stopped)
+		if (!was_stopped && ts->tick_stopped) {
 			ts->idle_jiffies = ts->last_jiffies;
+			nohz_balance_enter_idle(cpu);
+		}
 	}
 }
 
@@ -1172,6 +1188,8 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
 	 */
 	if (regs)
 		tick_sched_handle(ts, regs);
+	else
+		ts->next_tick = 0;
 
 	/* No need to reprogram if we are in idle or full dynticks mode */
 	if (unlikely(ts->tick_stopped))
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index bf38226e5c17..075444e3d48e 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -27,6 +27,7 @@ enum tick_nohz_mode {
  *			timer is modified for nohz sleeps. This is necessary
  *			to resume the tick timer operation in the timeline
  *			when the CPU returns from nohz sleep.
+ * @next_tick:		Next tick to be fired when in dynticks mode.
  * @tick_stopped:	Indicator that the idle tick has been stopped
  * @idle_jiffies:	jiffies at the entry to idle for idle time accounting
  * @idle_calls:		Total number of idle calls
@@ -44,6 +45,7 @@ struct tick_sched {
 	unsigned long			check_clocks;
 	enum tick_nohz_mode		nohz_mode;
 	ktime_t				last_tick;
+	ktime_t				next_tick;
 	int				inidle;
 	int				tick_stopped;
 	unsigned long			idle_jiffies;
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 6574bba44b55..49c73c6ed648 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -230,20 +230,6 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
 	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
 }
 
-/**
- * current_fs_time - Return FS time
- * @sb: Superblock.
- *
- * Return the current time truncated to the time granularity supported by
- * the fs.
- */
-struct timespec current_fs_time(struct super_block *sb)
-{
-	struct timespec now = current_kernel_time();
-	return timespec_trunc(now, sb->s_time_gran);
-}
-EXPORT_SYMBOL(current_fs_time);
-
 /*
  * Convert jiffies to milliseconds and back.
  *
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 9652bc57fd09..b602c48cb841 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -118,6 +118,26 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 	tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
 
+/*
+ * tk_clock_read - atomic clocksource read() helper
+ *
+ * This helper is necessary to use in the read paths because, while the
+ * seqlock ensures we don't return a bad value while structures are updated,
+ * it doesn't protect from potential crashes. There is the possibility that
+ * the tkr's clocksource may change between the read reference, and the
+ * clock reference passed to the read function.  This can cause crashes if
+ * the wrong clocksource is passed to the wrong read function.
+ * This isn't necessary to use when holding the timekeeper_lock or doing
+ * a read of the fast-timekeeper tkrs (which is protected by its own locking
+ * and update logic).
+ */
+static inline u64 tk_clock_read(struct tk_read_base *tkr)
+{
+	struct clocksource *clock = READ_ONCE(tkr->clock);
+
+	return clock->read(clock);
+}
+
 #ifdef CONFIG_DEBUG_TIMEKEEPING
 #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
 
@@ -175,7 +195,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 	 */
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		now = tkr->read(tkr->clock);
+		now = tk_clock_read(tkr);
 		last = tkr->cycle_last;
 		mask = tkr->mask;
 		max = tkr->clock->max_cycles;
@@ -209,7 +229,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 	u64 cycle_now, delta;
 
 	/* read clocksource */
-	cycle_now = tkr->read(tkr->clock);
+	cycle_now = tk_clock_read(tkr);
 
 	/* calculate the delta since the last update_wall_time */
 	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -238,12 +258,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	++tk->cs_was_changed_seq;
 	old_clock = tk->tkr_mono.clock;
 	tk->tkr_mono.clock = clock;
-	tk->tkr_mono.read = clock->read;
 	tk->tkr_mono.mask = clock->mask;
-	tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+	tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
 
 	tk->tkr_raw.clock = clock;
-	tk->tkr_raw.read = clock->read;
 	tk->tkr_raw.mask = clock->mask;
 	tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
 
@@ -262,7 +280,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	/* Go back from cycles -> shifted ns */
 	tk->xtime_interval = interval * clock->mult;
 	tk->xtime_remainder = ntpinterval - tk->xtime_interval;
-	tk->raw_interval = (interval * clock->mult) >> clock->shift;
+	tk->raw_interval = interval * clock->mult;
 
 	 /* if changing clocks, convert xtime_nsec shift units */
 	if (old_clock) {
@@ -404,7 +422,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 
 		now += timekeeping_delta_to_ns(tkr,
 				clocksource_delta(
-					tkr->read(tkr->clock),
+					tk_clock_read(tkr),
 					tkr->cycle_last,
 					tkr->mask));
 	} while (read_seqcount_retry(&tkf->seq, seq));
@@ -461,6 +479,10 @@ static u64 dummy_clock_read(struct clocksource *cs)
 	return cycles_at_suspend;
 }
 
+static struct clocksource dummy_clock = {
+	.read = dummy_clock_read,
+};
+
 /**
  * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
  * @tk: Timekeeper to snapshot.
@@ -477,13 +499,13 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
 	struct tk_read_base *tkr = &tk->tkr_mono;
 
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-	cycles_at_suspend = tkr->read(tkr->clock);
-	tkr_dummy.read = dummy_clock_read;
+	cycles_at_suspend = tk_clock_read(tkr);
+	tkr_dummy.clock = &dummy_clock;
 	update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
 
 	tkr = &tk->tkr_raw;
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-	tkr_dummy.read = dummy_clock_read;
+	tkr_dummy.clock = &dummy_clock;
 	update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 }
 
@@ -649,11 +671,10 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
  */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-	struct clocksource *clock = tk->tkr_mono.clock;
 	u64 cycle_now, delta;
 	u64 nsec;
 
-	cycle_now = tk->tkr_mono.read(clock);
+	cycle_now = tk_clock_read(&tk->tkr_mono);
 	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 	tk->tkr_mono.cycle_last = cycle_now;
 	tk->tkr_raw.cycle_last  = cycle_now;
@@ -929,8 +950,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-
-		now = tk->tkr_mono.read(tk->tkr_mono.clock);
+		now = tk_clock_read(&tk->tkr_mono);
 		systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
 		systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
 		base_real = ktime_add(tk->tkr_mono.base,
@@ -1108,7 +1128,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
 		 * Check whether the system counter value provided by the
 		 * device driver is on the current timekeeping interval.
 		 */
-		now = tk->tkr_mono.read(tk->tkr_mono.clock);
+		now = tk_clock_read(&tk->tkr_mono);
 		interval_start = tk->tkr_mono.cycle_last;
 		if (!cycle_between(interval_start, cycles, now)) {
 			clock_was_set_seq = tk->clock_was_set_seq;
@@ -1629,7 +1649,7 @@ void timekeeping_resume(void)
 	 * The less preferred source will only be tried if there is no better
 	 * usable source. The rtc part is handled separately in rtc core code.
 	 */
-	cycle_now = tk->tkr_mono.read(clock);
+	cycle_now = tk_clock_read(&tk->tkr_mono);
 	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
 		cycle_now > tk->tkr_mono.cycle_last) {
 		u64 nsec, cyc_delta;
@@ -1976,7 +1996,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
 				    u32 shift, unsigned int *clock_set)
 {
 	u64 interval = tk->cycle_interval << shift;
-	u64 raw_nsecs;
+	u64 snsec_per_sec;
 
 	/* If the offset is smaller than a shifted interval, do nothing */
 	if (offset < interval)
@@ -1991,14 +2011,15 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
 	*clock_set |= accumulate_nsecs_to_secs(tk);
 
 	/* Accumulate raw time */
-	raw_nsecs = (u64)tk->raw_interval << shift;
-	raw_nsecs += tk->raw_time.tv_nsec;
-	if (raw_nsecs >= NSEC_PER_SEC) {
-		u64 raw_secs = raw_nsecs;
-		raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
-		tk->raw_time.tv_sec += raw_secs;
+	tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
+	tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
+	snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+	while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
+		tk->tkr_raw.xtime_nsec -= snsec_per_sec;
+		tk->raw_time.tv_sec++;
 	}
-	tk->raw_time.tv_nsec = raw_nsecs;
+	tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift;
+	tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
 
 	/* Accumulate error between NTP and clock interval */
 	tk->ntp_error += tk->ntp_tick << shift;
@@ -2030,7 +2051,7 @@ void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 	offset = real_tk->cycle_interval;
 #else
-	offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+	offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
 				   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 #endif
 
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index bd8ae8d5ae9c..bc364f86100a 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -867,7 +867,7 @@ static void blk_add_trace_split(void *ignore,
 
 		__blk_add_trace(bt, bio->bi_iter.bi_sector,
 				bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
-				BLK_TA_SPLIT, bio->bi_error, sizeof(rpdu),
+				BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
 				&rpdu);
 	}
 }
@@ -900,7 +900,7 @@ static void blk_add_trace_bio_remap(void *ignore,
 	r.sector_from = cpu_to_be64(from);
 
 	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
-			bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_error,
+			bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
 			sizeof(r), &r);
 }
 
@@ -1662,14 +1662,14 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
 		goto out;
 
 	if (attr == &dev_attr_act_mask) {
-		if (sscanf(buf, "%llx", &value) != 1) {
+		if (kstrtoull(buf, 0, &value)) {
 			/* Assume it is a list of trace category names */
 			ret = blk_trace_str2mask(buf);
 			if (ret < 0)
 				goto out;
 			value = ret;
 		}
-	} else if (sscanf(buf, "%llu", &value) != 1)
+	} else if (kstrtoull(buf, 0, &value))
 		goto out;
 
 	ret = -ENXIO;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 39dca4e86a94..b308be30dfb9 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4144,9 +4144,9 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
 	int i, ret = -ENODEV;
 	int size;
 
-	if (glob && (strcmp(glob, "*") == 0 || !strlen(glob)))
+	if (!glob || !strlen(glob) || !strcmp(glob, "*"))
 		func_g.search = NULL;
-	else if (glob) {
+	else {
 		int not;
 
 		func_g.type = filter_parse_regex(glob, strlen(glob),
@@ -4256,6 +4256,14 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
 	return ret;
 }
 
+void clear_ftrace_function_probes(struct trace_array *tr)
+{
+	struct ftrace_func_probe *probe, *n;
+
+	list_for_each_entry_safe(probe, n, &tr->func_probes, list)
+		unregister_ftrace_function_probe_func(NULL, tr, probe->probe_ops);
+}
+
 static LIST_HEAD(ftrace_commands);
 static DEFINE_MUTEX(ftrace_cmd_mutex);
 
@@ -4329,9 +4337,6 @@ static int ftrace_process_regex(struct ftrace_iterator *iter,
 
 	command = strsep(&next, ":");
 
-	if (WARN_ON_ONCE(!tr))
-		return -EINVAL;
-
 	mutex_lock(&ftrace_cmd_mutex);
 	list_for_each_entry(p, &ftrace_commands, list) {
 		if (strcmp(p->name, command) == 0) {
@@ -5055,7 +5060,7 @@ ftrace_graph_release(struct inode *inode, struct file *file)
 	}
 
  out:
-	kfree(fgd->new_hash);
+	free_ftrace_hash(fgd->new_hash);
 	kfree(fgd);
 
 	return ret;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4ad4420b33d6..091e801145c9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1558,7 +1558,7 @@ static __init int init_trace_selftests(void)
 
 	return 0;
 }
-early_initcall(init_trace_selftests);
+core_initcall(init_trace_selftests);
 #else
 static inline int run_tracer_selftest(struct tracer *type)
 {
@@ -2568,7 +2568,36 @@ static inline void ftrace_trace_stack(struct trace_array *tr,
 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
 		   int pc)
 {
-	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
+	struct ring_buffer *buffer = tr->trace_buffer.buffer;
+
+	if (rcu_is_watching()) {
+		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
+		return;
+	}
+
+	/*
+	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
+	 * but if the above rcu_is_watching() failed, then the NMI
+	 * triggered someplace critical, and rcu_irq_enter() should
+	 * not be called from NMI.
+	 */
+	if (unlikely(in_nmi()))
+		return;
+
+	/*
+	 * It is possible that a function is being traced in a
+	 * location that RCU is not watching. A call to
+	 * rcu_irq_enter() will make sure that it is, but there's
+	 * a few internal rcu functions that could be traced
+	 * where that wont work either. In those cases, we just
+	 * do nothing.
+	 */
+	if (unlikely(rcu_irq_enter_disabled()))
+		return;
+
+	rcu_irq_enter_irqson();
+	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
+	rcu_irq_exit_irqson();
 }
 
 /**
@@ -3311,13 +3340,14 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
 		return;
 
-	if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
+	if (cpumask_available(iter->started) &&
+	    cpumask_test_cpu(iter->cpu, iter->started))
 		return;
 
 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
 		return;
 
-	if (iter->started)
+	if (cpumask_available(iter->started))
 		cpumask_set_cpu(iter->cpu, iter->started);
 
 	/* Don't print started cpu buffer for the first entry of the trace */
@@ -6851,6 +6881,9 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
 	char *number;
 	int ret;
 
+	if (!tr)
+		return -ENODEV;
+
 	/* hash funcs only work with set_ftrace_filter */
 	if (!enable)
 		return -EINVAL;
@@ -7549,6 +7582,7 @@ static int instance_rmdir(const char *name)
 	}
 
 	tracing_set_nop(tr);
+	clear_ftrace_function_probes(tr);
 	event_trace_del_tracer(tr);
 	ftrace_clear_pids(tr);
 	ftrace_destroy_function_files(tr);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 291a1bca5748..39fd77330aab 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -980,6 +980,7 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr,
 extern int
 unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
 				      struct ftrace_probe_ops *ops);
+extern void clear_ftrace_function_probes(struct trace_array *tr);
 
 int register_ftrace_command(struct ftrace_func_command *cmd);
 int unregister_ftrace_command(struct ftrace_func_command *cmd);
@@ -998,6 +999,10 @@ static inline __init int unregister_ftrace_command(char *cmd_name)
 {
 	return -EINVAL;
 }
+static inline void clear_ftrace_function_probes(struct trace_array *tr)
+{
+}
+
 /*
  * The ops parameter passed in is usually undefined.
  * This must be a macro.
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index a3bddbfd0874..a0910c0cdf2e 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -654,6 +654,9 @@ ftrace_trace_onoff_callback(struct trace_array *tr, struct ftrace_hash *hash,
 {
 	struct ftrace_probe_ops *ops;
 
+	if (!tr)
+		return -ENODEV;
+
 	/* we register both traceon and traceoff to this callback */
 	if (strcmp(cmd, "traceon") == 0)
 		ops = param ? &traceon_count_probe_ops : &traceon_probe_ops;
@@ -670,6 +673,9 @@ ftrace_stacktrace_callback(struct trace_array *tr, struct ftrace_hash *hash,
 {
 	struct ftrace_probe_ops *ops;
 
+	if (!tr)
+		return -ENODEV;
+
 	ops = param ? &stacktrace_count_probe_ops : &stacktrace_probe_ops;
 
 	return ftrace_trace_probe_callback(tr, ops, hash, glob, cmd,
@@ -682,6 +688,9 @@ ftrace_dump_callback(struct trace_array *tr, struct ftrace_hash *hash,
 {
 	struct ftrace_probe_ops *ops;
 
+	if (!tr)
+		return -ENODEV;
+
 	ops = &dump_probe_ops;
 
 	/* Only dump once. */
@@ -695,6 +704,9 @@ ftrace_cpudump_callback(struct trace_array *tr, struct ftrace_hash *hash,
 {
 	struct ftrace_probe_ops *ops;
 
+	if (!tr)
+		return -ENODEV;
+
 	ops = &cpudump_probe_ops;
 
 	/* Only dump once. */
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8485f6738a87..b53c8d369163 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -707,20 +707,16 @@ static int create_trace_kprobe(int argc, char **argv)
 		pr_info("Probe point is not specified.\n");
 		return -EINVAL;
 	}
-	if (isdigit(argv[1][0])) {
-		/* an address specified */
-		ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
-		if (ret) {
-			pr_info("Failed to parse address.\n");
-			return ret;
-		}
-	} else {
+
+	/* try to parse an address. if that fails, try to read the
+	 * input as a symbol. */
+	if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
 		/* a symbol specified */
 		symbol = argv[1];
 		/* TODO: support .init module functions */
 		ret = traceprobe_split_symbol_offset(symbol, &offset);
 		if (ret) {
-			pr_info("Failed to parse symbol.\n");
+			pr_info("Failed to parse either an address or a symbol.\n");
 			return ret;
 		}
 		if (offset && is_return &&
@@ -1535,6 +1531,11 @@ static __init int kprobe_trace_self_tests_init(void)
 
 end:
 	release_all_trace_kprobes();
+	/*
+	 * Wait for the optimizer work to finish. Otherwise it might fiddle
+	 * with probes in already freed __init text.
+	 */
+	wait_for_kprobe_optimizer();
 	if (warn)
 		pr_cont("NG: Some tests are failed. Please check them.\n");
 	else
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 76aa04d4c925..b4a751e8f9d6 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -409,7 +409,9 @@ static const struct file_operations stack_trace_fops = {
 static int
 stack_trace_filter_open(struct inode *inode, struct file *file)
 {
-	return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
+	struct ftrace_ops *ops = inode->i_private;
+
+	return ftrace_regex_open(ops, FTRACE_ITER_FILTER,
 				 inode, file);
 }
 
@@ -476,7 +478,7 @@ static __init int stack_trace_init(void)
 			NULL, &stack_trace_fops);
 
 	trace_create_file("stack_trace_filter", 0444, d_tracer,
-			NULL, &stack_trace_filter_fops);
+			  &trace_ops, &stack_trace_filter_fops);
 
 	if (stack_trace_filter_buf[0])
 		ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c74bf39ef764..a86688fabc55 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2864,11 +2864,11 @@ bool flush_work(struct work_struct *work)
 EXPORT_SYMBOL_GPL(flush_work);
 
 struct cwt_wait {
-	wait_queue_t		wait;
+	wait_queue_entry_t		wait;
 	struct work_struct	*work;
 };
 
-static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e4587ebe52c7..9c5d40a50930 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1052,6 +1052,7 @@ config DEBUG_LOCK_ALLOC
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
 	select DEBUG_SPINLOCK
 	select DEBUG_MUTEXES
+	select DEBUG_RT_MUTEXES if RT_MUTEXES
 	select LOCKDEP
 	help
 	 This feature will check whether any held lock (spinlock, rwlock,
@@ -1067,6 +1068,7 @@ config PROVE_LOCKING
 	select LOCKDEP
 	select DEBUG_SPINLOCK
 	select DEBUG_MUTEXES
+	select DEBUG_RT_MUTEXES if RT_MUTEXES
 	select DEBUG_LOCK_ALLOC
 	select TRACE_IRQFLAGS
 	default n
@@ -1121,6 +1123,7 @@ config LOCK_STAT
 	select LOCKDEP
 	select DEBUG_SPINLOCK
 	select DEBUG_MUTEXES
+	select DEBUG_RT_MUTEXES if RT_MUTEXES
 	select DEBUG_LOCK_ALLOC
 	default n
 	help
@@ -1301,189 +1304,7 @@ config DEBUG_CREDENTIALS
 
 	  If unsure, say N.
 
-menu "RCU Debugging"
-
-config PROVE_RCU
-	def_bool PROVE_LOCKING
-
-config PROVE_RCU_REPEATEDLY
-	bool "RCU debugging: don't disable PROVE_RCU on first splat"
-	depends on PROVE_RCU
-	default n
-	help
-	 By itself, PROVE_RCU will disable checking upon issuing the
-	 first warning (or "splat").  This feature prevents such
-	 disabling, allowing multiple RCU-lockdep warnings to be printed
-	 on a single reboot.
-
-	 Say Y to allow multiple RCU-lockdep warnings per boot.
-
-	 Say N if you are unsure.
-
-config SPARSE_RCU_POINTER
-	bool "RCU debugging: sparse-based checks for pointer usage"
-	default n
-	help
-	 This feature enables the __rcu sparse annotation for
-	 RCU-protected pointers.  This annotation will cause sparse
-	 to flag any non-RCU used of annotated pointers.  This can be
-	 helpful when debugging RCU usage.  Please note that this feature
-	 is not intended to enforce code cleanliness; it is instead merely
-	 a debugging aid.
-
-	 Say Y to make sparse flag questionable use of RCU-protected pointers
-
-	 Say N if you are unsure.
-
-config TORTURE_TEST
-	tristate
-	default n
-
-config RCU_PERF_TEST
-	tristate "performance tests for RCU"
-	depends on DEBUG_KERNEL
-	select TORTURE_TEST
-	select SRCU
-	select TASKS_RCU
-	default n
-	help
-	  This option provides a kernel module that runs performance
-	  tests on the RCU infrastructure.  The kernel module may be built
-	  after the fact on the running kernel to be tested, if desired.
-
-	  Say Y here if you want RCU performance tests to be built into
-	  the kernel.
-	  Say M if you want the RCU performance tests to build as a module.
-	  Say N if you are unsure.
-
-config RCU_TORTURE_TEST
-	tristate "torture tests for RCU"
-	depends on DEBUG_KERNEL
-	select TORTURE_TEST
-	select SRCU
-	select TASKS_RCU
-	default n
-	help
-	  This option provides a kernel module that runs torture tests
-	  on the RCU infrastructure.  The kernel module may be built
-	  after the fact on the running kernel to be tested, if desired.
-
-	  Say Y here if you want RCU torture tests to be built into
-	  the kernel.
-	  Say M if you want the RCU torture tests to build as a module.
-	  Say N if you are unsure.
-
-config RCU_TORTURE_TEST_SLOW_PREINIT
-	bool "Slow down RCU grace-period pre-initialization to expose races"
-	depends on RCU_TORTURE_TEST
-	help
-	  This option delays grace-period pre-initialization (the
-	  propagation of CPU-hotplug changes up the rcu_node combining
-	  tree) for a few jiffies between initializing each pair of
-	  consecutive rcu_node structures.  This helps to expose races
-	  involving grace-period pre-initialization, in other words, it
-	  makes your kernel less stable.  It can also greatly increase
-	  grace-period latency, especially on systems with large numbers
-	  of CPUs.  This is useful when torture-testing RCU, but in
-	  almost no other circumstance.
-
-	  Say Y here if you want your system to crash and hang more often.
-	  Say N if you want a sane system.
-
-config RCU_TORTURE_TEST_SLOW_PREINIT_DELAY
-	int "How much to slow down RCU grace-period pre-initialization"
-	range 0 5
-	default 3
-	depends on RCU_TORTURE_TEST_SLOW_PREINIT
-	help
-	  This option specifies the number of jiffies to wait between
-	  each rcu_node structure pre-initialization step.
-
-config RCU_TORTURE_TEST_SLOW_INIT
-	bool "Slow down RCU grace-period initialization to expose races"
-	depends on RCU_TORTURE_TEST
-	help
-	  This option delays grace-period initialization for a few
-	  jiffies between initializing each pair of consecutive
-	  rcu_node structures.	This helps to expose races involving
-	  grace-period initialization, in other words, it makes your
-	  kernel less stable.  It can also greatly increase grace-period
-	  latency, especially on systems with large numbers of CPUs.
-	  This is useful when torture-testing RCU, but in almost no
-	  other circumstance.
-
-	  Say Y here if you want your system to crash and hang more often.
-	  Say N if you want a sane system.
-
-config RCU_TORTURE_TEST_SLOW_INIT_DELAY
-	int "How much to slow down RCU grace-period initialization"
-	range 0 5
-	default 3
-	depends on RCU_TORTURE_TEST_SLOW_INIT
-	help
-	  This option specifies the number of jiffies to wait between
-	  each rcu_node structure initialization.
-
-config RCU_TORTURE_TEST_SLOW_CLEANUP
-	bool "Slow down RCU grace-period cleanup to expose races"
-	depends on RCU_TORTURE_TEST
-	help
-	  This option delays grace-period cleanup for a few jiffies
-	  between cleaning up each pair of consecutive rcu_node
-	  structures.  This helps to expose races involving grace-period
-	  cleanup, in other words, it makes your kernel less stable.
-	  It can also greatly increase grace-period latency, especially
-	  on systems with large numbers of CPUs.  This is useful when
-	  torture-testing RCU, but in almost no other circumstance.
-
-	  Say Y here if you want your system to crash and hang more often.
-	  Say N if you want a sane system.
-
-config RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY
-	int "How much to slow down RCU grace-period cleanup"
-	range 0 5
-	default 3
-	depends on RCU_TORTURE_TEST_SLOW_CLEANUP
-	help
-	  This option specifies the number of jiffies to wait between
-	  each rcu_node structure cleanup operation.
-
-config RCU_CPU_STALL_TIMEOUT
-	int "RCU CPU stall timeout in seconds"
-	depends on RCU_STALL_COMMON
-	range 3 300
-	default 21
-	help
-	  If a given RCU grace period extends more than the specified
-	  number of seconds, a CPU stall warning is printed.  If the
-	  RCU grace period persists, additional CPU stall warnings are
-	  printed at more widely spaced intervals.
-
-config RCU_TRACE
-	bool "Enable tracing for RCU"
-	depends on DEBUG_KERNEL
-	default y if TREE_RCU
-	select TRACE_CLOCK
-	help
-	  This option provides tracing in RCU which presents stats
-	  in debugfs for debugging RCU implementation.  It also enables
-	  additional tracepoints for ftrace-style event tracing.
-
-	  Say Y here if you want to enable RCU tracing
-	  Say N if you are unsure.
-
-config RCU_EQS_DEBUG
-	bool "Provide debugging asserts for adding NO_HZ support to an arch"
-	depends on DEBUG_KERNEL
-	help
-	  This option provides consistency checks in RCU's handling of
-	  NO_HZ.  These checks have proven quite helpful in detecting
-	  bugs in arch-specific NO_HZ code.
-
-	  Say N here if you need ultimate kernel/user switch latencies
-	  Say Y if you are unsure
-
-endmenu # "RCU Debugging"
+source "kernel/rcu/Kconfig.debug"
 
 config DEBUG_WQ_FORCE_RR_CPU
 	bool "Force round-robin CPU selection for unbound work items"
diff --git a/lib/Makefile b/lib/Makefile
index 0166fbc0fa81..07fbe6a75692 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -25,9 +25,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 earlycpio.o seq_buf.o siphash.o \
 	 nmi_backtrace.o nodemask.o win_minmax.o
 
-CFLAGS_radix-tree.o += -DCONFIG_SPARSE_RCU_POINTER
-CFLAGS_idr.o += -DCONFIG_SPARSE_RCU_POINTER
-
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o
diff --git a/lib/cmdline.c b/lib/cmdline.c
index 3c6432df7e63..4c0888c4a68d 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -23,14 +23,14 @@
  *	the values[M, M+1, ..., N] into the ints array in get_options.
  */
 
-static int get_range(char **str, int *pint)
+static int get_range(char **str, int *pint, int n)
 {
 	int x, inc_counter, upper_range;
 
 	(*str)++;
 	upper_range = simple_strtol((*str), NULL, 0);
 	inc_counter = upper_range - *pint;
-	for (x = *pint; x < upper_range; x++)
+	for (x = *pint; n && x < upper_range; x++, n--)
 		*pint++ = x;
 	return inc_counter;
 }
@@ -97,7 +97,7 @@ char *get_options(const char *str, int nints, int *ints)
 			break;
 		if (res == 3) {
 			int range_nums;
-			range_nums = get_range((char **)&str, ints + i);
+			range_nums = get_range((char **)&str, ints + i, nints - i);
 			if (range_nums < 0)
 				break;
 			/*
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 81dedaab36cc..4731a0895760 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -43,6 +43,38 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
 }
 EXPORT_SYMBOL(cpumask_any_but);
 
+/**
+ * cpumask_next_wrap - helper to implement for_each_cpu_wrap
+ * @n: the cpu prior to the place to search
+ * @mask: the cpumask pointer
+ * @start: the start point of the iteration
+ * @wrap: assume @n crossing @start terminates the iteration
+ *
+ * Returns >= nr_cpu_ids on completion
+ *
+ * Note: the @wrap argument is required for the start condition when
+ * we cannot assume @start is set in @mask.
+ */
+int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
+{
+	int next;
+
+again:
+	next = cpumask_next(n, mask);
+
+	if (wrap && n < start && next >= start) {
+		return nr_cpumask_bits;
+
+	} else if (next >= nr_cpumask_bits) {
+		wrap = true;
+		n = -1;
+		goto again;
+	}
+
+	return next;
+}
+EXPORT_SYMBOL(cpumask_next_wrap);
+
 /* These are not inline because of header tangles. */
 #ifdef CONFIG_CPUMASK_OFFSTACK
 /**
diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
index 74a54b7f2562..9f79547d1b97 100644
--- a/lib/libcrc32c.c
+++ b/lib/libcrc32c.c
@@ -43,7 +43,7 @@ static struct crypto_shash *tfm;
 u32 crc32c(u32 crc, const void *address, unsigned int length)
 {
 	SHASH_DESC_ON_STACK(shash, tfm);
-	u32 *ctx = (u32 *)shash_desc_ctx(shash);
+	u32 ret, *ctx = (u32 *)shash_desc_ctx(shash);
 	int err;
 
 	shash->tfm = tfm;
@@ -53,7 +53,9 @@ u32 crc32c(u32 crc, const void *address, unsigned int length)
 	err = crypto_shash_update(shash, address, length);
 	BUG_ON(err);
 
-	return *ctx;
+	ret = *ctx;
+	barrier_data(ctx);
+	return ret;
 }
 
 EXPORT_SYMBOL(crc32c);
diff --git a/lib/locking-selftest-rtmutex.h b/lib/locking-selftest-rtmutex.h
new file mode 100644
index 000000000000..e3cb83989d16
--- /dev/null
+++ b/lib/locking-selftest-rtmutex.h
@@ -0,0 +1,11 @@
+#undef LOCK
+#define LOCK		RTL
+
+#undef UNLOCK
+#define UNLOCK		RTU
+
+#undef RLOCK
+#undef WLOCK
+
+#undef INIT
+#define INIT		RTI
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index f3a217ea0388..6f2b135dc5e8 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
 #include <linux/irqflags.h>
+#include <linux/rtmutex.h>
 
 /*
  * Change this to 1 if you want to see the failure printouts:
@@ -46,6 +47,7 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose);
 #define LOCKTYPE_MUTEX	0x4
 #define LOCKTYPE_RWSEM	0x8
 #define LOCKTYPE_WW	0x10
+#define LOCKTYPE_RTMUTEX 0x20
 
 static struct ww_acquire_ctx t, t2;
 static struct ww_mutex o, o2, o3;
@@ -74,6 +76,15 @@ static DECLARE_RWSEM(rwsem_B);
 static DECLARE_RWSEM(rwsem_C);
 static DECLARE_RWSEM(rwsem_D);
 
+#ifdef CONFIG_RT_MUTEXES
+
+static DEFINE_RT_MUTEX(rtmutex_A);
+static DEFINE_RT_MUTEX(rtmutex_B);
+static DEFINE_RT_MUTEX(rtmutex_C);
+static DEFINE_RT_MUTEX(rtmutex_D);
+
+#endif
+
 /*
  * Locks that we initialize dynamically as well so that
  * e.g. X1 and X2 becomes two instances of the same class,
@@ -108,6 +119,17 @@ static DECLARE_RWSEM(rwsem_Y2);
 static DECLARE_RWSEM(rwsem_Z1);
 static DECLARE_RWSEM(rwsem_Z2);
 
+#ifdef CONFIG_RT_MUTEXES
+
+static DEFINE_RT_MUTEX(rtmutex_X1);
+static DEFINE_RT_MUTEX(rtmutex_X2);
+static DEFINE_RT_MUTEX(rtmutex_Y1);
+static DEFINE_RT_MUTEX(rtmutex_Y2);
+static DEFINE_RT_MUTEX(rtmutex_Z1);
+static DEFINE_RT_MUTEX(rtmutex_Z2);
+
+#endif
+
 /*
  * non-inlined runtime initializers, to let separate locks share
  * the same lock-class:
@@ -129,6 +151,17 @@ INIT_CLASS_FUNC(Z)
 
 static void init_shared_classes(void)
 {
+#ifdef CONFIG_RT_MUTEXES
+	static struct lock_class_key rt_X, rt_Y, rt_Z;
+
+	__rt_mutex_init(&rtmutex_X1, __func__, &rt_X);
+	__rt_mutex_init(&rtmutex_X2, __func__, &rt_X);
+	__rt_mutex_init(&rtmutex_Y1, __func__, &rt_Y);
+	__rt_mutex_init(&rtmutex_Y2, __func__, &rt_Y);
+	__rt_mutex_init(&rtmutex_Z1, __func__, &rt_Z);
+	__rt_mutex_init(&rtmutex_Z2, __func__, &rt_Z);
+#endif
+
 	init_class_X(&lock_X1, &rwlock_X1, &mutex_X1, &rwsem_X1);
 	init_class_X(&lock_X2, &rwlock_X2, &mutex_X2, &rwsem_X2);
 
@@ -193,6 +226,10 @@ static void init_shared_classes(void)
 #define MU(x)			mutex_unlock(&mutex_##x)
 #define MI(x)			mutex_init(&mutex_##x)
 
+#define RTL(x)			rt_mutex_lock(&rtmutex_##x)
+#define RTU(x)			rt_mutex_unlock(&rtmutex_##x)
+#define RTI(x)			rt_mutex_init(&rtmutex_##x)
+
 #define WSL(x)			down_write(&rwsem_##x)
 #define WSU(x)			up_write(&rwsem_##x)
 
@@ -264,6 +301,11 @@ GENERATE_TESTCASE(AA_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(AA_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(AA_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -345,6 +387,11 @@ GENERATE_TESTCASE(ABBA_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(ABBA_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(ABBA_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -373,6 +420,11 @@ GENERATE_TESTCASE(ABBCCA_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(ABBCCA_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(ABBCCA_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -401,6 +453,11 @@ GENERATE_TESTCASE(ABCABC_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(ABCABC_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(ABCABC_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -430,6 +487,11 @@ GENERATE_TESTCASE(ABBCCDDA_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(ABBCCDDA_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(ABBCCDDA_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -458,6 +520,11 @@ GENERATE_TESTCASE(ABCDBDDA_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(ABCDBDDA_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(ABCDBDDA_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -486,6 +553,11 @@ GENERATE_TESTCASE(ABCDBCDA_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(ABCDBCDA_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(ABCDBCDA_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -513,33 +585,10 @@ GENERATE_TESTCASE(double_unlock_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(double_unlock_rsem)
 
-#undef E
-
-/*
- * Bad unlock ordering:
- */
-#define E()					\
-						\
-	LOCK(A);				\
-	LOCK(B);				\
-	UNLOCK(A); /* fail */			\
-	UNLOCK(B);
-
-/*
- * 6 testcases:
- */
-#include "locking-selftest-spin.h"
-GENERATE_TESTCASE(bad_unlock_order_spin)
-#include "locking-selftest-wlock.h"
-GENERATE_TESTCASE(bad_unlock_order_wlock)
-#include "locking-selftest-rlock.h"
-GENERATE_TESTCASE(bad_unlock_order_rlock)
-#include "locking-selftest-mutex.h"
-GENERATE_TESTCASE(bad_unlock_order_mutex)
-#include "locking-selftest-wsem.h"
-GENERATE_TESTCASE(bad_unlock_order_wsem)
-#include "locking-selftest-rsem.h"
-GENERATE_TESTCASE(bad_unlock_order_rsem)
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(double_unlock_rtmutex);
+#endif
 
 #undef E
 
@@ -567,6 +616,11 @@ GENERATE_TESTCASE(init_held_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(init_held_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(init_held_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -916,6 +970,9 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
 # define I_MUTEX(x)	lockdep_reset_lock(&mutex_##x.dep_map)
 # define I_RWSEM(x)	lockdep_reset_lock(&rwsem_##x.dep_map)
 # define I_WW(x)	lockdep_reset_lock(&x.dep_map)
+#ifdef CONFIG_RT_MUTEXES
+# define I_RTMUTEX(x)	lockdep_reset_lock(&rtmutex_##x.dep_map)
+#endif
 #else
 # define I_SPINLOCK(x)
 # define I_RWLOCK(x)
@@ -924,12 +981,23 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
 # define I_WW(x)
 #endif
 
+#ifndef I_RTMUTEX
+# define I_RTMUTEX(x)
+#endif
+
+#ifdef CONFIG_RT_MUTEXES
+#define I2_RTMUTEX(x)	rt_mutex_init(&rtmutex_##x)
+#else
+#define I2_RTMUTEX(x)
+#endif
+
 #define I1(x)					\
 	do {					\
 		I_SPINLOCK(x);			\
 		I_RWLOCK(x);			\
 		I_MUTEX(x);			\
 		I_RWSEM(x);			\
+		I_RTMUTEX(x);			\
 	} while (0)
 
 #define I2(x)					\
@@ -938,6 +1006,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
 		rwlock_init(&rwlock_##x);	\
 		mutex_init(&mutex_##x);		\
 		init_rwsem(&rwsem_##x);		\
+		I2_RTMUTEX(x);			\
 	} while (0)
 
 static void reset_locks(void)
@@ -1013,6 +1082,12 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
 	reset_locks();
 }
 
+#ifdef CONFIG_RT_MUTEXES
+#define dotest_rt(fn, e, m)	dotest((fn), (e), (m))
+#else
+#define dotest_rt(fn, e, m)
+#endif
+
 static inline void print_testname(const char *testname)
 {
 	printk("%33s:", testname);
@@ -1050,6 +1125,7 @@ static inline void print_testname(const char *testname)
 	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
 	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
 	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
+	dotest_rt(name##_rtmutex, FAILURE, LOCKTYPE_RTMUTEX);	\
 	pr_cont("\n");
 
 #define DO_TESTCASE_6_SUCCESS(desc, name)			\
@@ -1060,6 +1136,7 @@ static inline void print_testname(const char *testname)
 	dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX);		\
 	dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM);		\
 	dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM);		\
+	dotest_rt(name##_rtmutex, SUCCESS, LOCKTYPE_RTMUTEX);	\
 	pr_cont("\n");
 
 /*
@@ -1073,6 +1150,7 @@ static inline void print_testname(const char *testname)
 	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
 	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
 	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
+	dotest_rt(name##_rtmutex, FAILURE, LOCKTYPE_RTMUTEX);	\
 	pr_cont("\n");
 
 #define DO_TESTCASE_2I(desc, name, nr)				\
@@ -1825,7 +1903,6 @@ void locking_selftest(void)
 	DO_TESTCASE_6R("A-B-C-D-B-C-D-A deadlock", ABCDBCDA);
 	DO_TESTCASE_6("double unlock", double_unlock);
 	DO_TESTCASE_6("initialize held", init_held);
-	DO_TESTCASE_6_SUCCESS("bad unlock order", bad_unlock_order);
 
 	printk("  --------------------------------------------------------------------------\n");
 	print_testname("recursive read-lock");
diff --git a/lib/refcount.c b/lib/refcount.c
index 9f906783987e..5d0582a9480c 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -37,6 +37,8 @@
 #include <linux/refcount.h>
 #include <linux/bug.h>
 
+#ifdef CONFIG_REFCOUNT_FULL
+
 /**
  * refcount_add_not_zero - add a value to a refcount unless it is 0
  * @i: the value to add to the refcount
@@ -225,6 +227,7 @@ void refcount_dec(refcount_t *r)
 	WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
 }
 EXPORT_SYMBOL(refcount_dec);
+#endif /* CONFIG_REFCOUNT_FULL */
 
 /**
  * refcount_dec_if_one - decrement a refcount if it is 1
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index c6cf82242d65..be7b4dd6b68d 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -751,3 +751,38 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
 	return sg_copy_buffer(sgl, nents, buf, buflen, skip, true);
 }
 EXPORT_SYMBOL(sg_pcopy_to_buffer);
+
+/**
+ * sg_zero_buffer - Zero-out a part of a SG list
+ * @sgl:		 The SG list
+ * @nents:		 Number of SG entries
+ * @buflen:		 The number of bytes to zero out
+ * @skip:		 Number of bytes to skip before zeroing
+ *
+ * Returns the number of bytes zeroed.
+ **/
+size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
+		       size_t buflen, off_t skip)
+{
+	unsigned int offset = 0;
+	struct sg_mapping_iter miter;
+	unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG;
+
+	sg_miter_start(&miter, sgl, nents, sg_flags);
+
+	if (!sg_miter_skip(&miter, skip))
+		return false;
+
+	while (offset < buflen && sg_miter_next(&miter)) {
+		unsigned int len;
+
+		len = min(miter.length, buflen - offset);
+		memset(miter.addr, 0, len);
+
+		offset += len;
+	}
+
+	sg_miter_stop(&miter);
+	return offset;
+}
+EXPORT_SYMBOL(sg_zero_buffer);
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 690d75b132fa..2fb007be0212 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -28,7 +28,7 @@ notrace static unsigned int check_preemption_disabled(const char *what1,
 	/*
 	 * It is valid to assume CPU-locality during early bootup:
 	 */
-	if (system_state != SYSTEM_RUNNING)
+	if (system_state < SYSTEM_SCHEDULING)
 		goto out;
 
 	/*
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 889bc31785be..be88cbaadde3 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -4504,6 +4504,44 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	{
+		"JMP_JSGE_K: Signed jump: value walk 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, -3),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 6),
+			BPF_ALU64_IMM(BPF_ADD, R1, 1),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 4),
+			BPF_ALU64_IMM(BPF_ADD, R1, 1),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 2),
+			BPF_ALU64_IMM(BPF_ADD, R1, 1),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 1),
+			BPF_EXIT_INSN(),		/* bad exit */
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),	/* good exit */
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JMP_JSGE_K: Signed jump: value walk 2",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, -3),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 4),
+			BPF_ALU64_IMM(BPF_ADD, R1, 2),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 2),
+			BPF_ALU64_IMM(BPF_ADD, R1, 2),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 1),
+			BPF_EXIT_INSN(),		/* bad exit */
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),	/* good exit */
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JGT | BPF_K */
 	{
 		"JMP_JGT_K: if (3 > 2) return 1",
diff --git a/lib/test_uuid.c b/lib/test_uuid.c
index 547d3127a3cf..478c049630b5 100644
--- a/lib/test_uuid.c
+++ b/lib/test_uuid.c
@@ -11,25 +11,25 @@
 
 struct test_uuid_data {
 	const char *uuid;
-	uuid_le le;
-	uuid_be be;
+	guid_t le;
+	uuid_t be;
 };
 
 static const struct test_uuid_data test_uuid_test_data[] = {
 	{
 		.uuid = "c33f4995-3701-450e-9fbf-206a2e98e576",
-		.le = UUID_LE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
-		.be = UUID_BE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
+		.le = GUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
+		.be = UUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
 	},
 	{
 		.uuid = "64b4371c-77c1-48f9-8221-29f054fc023b",
-		.le = UUID_LE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
-		.be = UUID_BE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
+		.le = GUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
+		.be = UUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
 	},
 	{
 		.uuid = "0cb4ddff-a545-4401-9d06-688af53e7f84",
-		.le = UUID_LE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
-		.be = UUID_BE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
+		.le = GUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
+		.be = UUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
 	},
 };
 
@@ -61,28 +61,28 @@ static void __init test_uuid_failed(const char *prefix, bool wrong, bool be,
 
 static void __init test_uuid_test(const struct test_uuid_data *data)
 {
-	uuid_le le;
-	uuid_be be;
+	guid_t le;
+	uuid_t be;
 	char buf[48];
 
 	/* LE */
 	total_tests++;
-	if (uuid_le_to_bin(data->uuid, &le))
+	if (guid_parse(data->uuid, &le))
 		test_uuid_failed("conversion", false, false, data->uuid, NULL);
 
 	total_tests++;
-	if (uuid_le_cmp(data->le, le)) {
+	if (!guid_equal(&data->le, &le)) {
 		sprintf(buf, "%pUl", &le);
 		test_uuid_failed("cmp", false, false, data->uuid, buf);
 	}
 
 	/* BE */
 	total_tests++;
-	if (uuid_be_to_bin(data->uuid, &be))
+	if (uuid_parse(data->uuid, &be))
 		test_uuid_failed("conversion", false, true, data->uuid, NULL);
 
 	total_tests++;
-	if (uuid_be_cmp(data->be, be)) {
+	if (uuid_equal(&data->be, &be)) {
 		sprintf(buf, "%pUb", &be);
 		test_uuid_failed("cmp", false, true, data->uuid, buf);
 	}
@@ -90,17 +90,17 @@ static void __init test_uuid_test(const struct test_uuid_data *data)
 
 static void __init test_uuid_wrong(const char *data)
 {
-	uuid_le le;
-	uuid_be be;
+	guid_t le;
+	uuid_t be;
 
 	/* LE */
 	total_tests++;
-	if (!uuid_le_to_bin(data, &le))
+	if (!guid_parse(data, &le))
 		test_uuid_failed("negative", true, false, data, NULL);
 
 	/* BE */
 	total_tests++;
-	if (!uuid_be_to_bin(data, &be))
+	if (!uuid_parse(data, &be))
 		test_uuid_failed("negative", true, true, data, NULL);
 }
 
diff --git a/lib/uuid.c b/lib/uuid.c
index 37687af77ff8..680b9fb9ba09 100644
--- a/lib/uuid.c
+++ b/lib/uuid.c
@@ -21,10 +21,13 @@
 #include <linux/uuid.h>
 #include <linux/random.h>
 
-const u8 uuid_le_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
-EXPORT_SYMBOL(uuid_le_index);
-const u8 uuid_be_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
-EXPORT_SYMBOL(uuid_be_index);
+const guid_t guid_null;
+EXPORT_SYMBOL(guid_null);
+const uuid_t uuid_null;
+EXPORT_SYMBOL(uuid_null);
+
+const u8 guid_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
+const u8 uuid_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
 
 /***************************************************************
  * Random UUID interface
@@ -53,21 +56,21 @@ static void __uuid_gen_common(__u8 b[16])
 	b[8] = (b[8] & 0x3F) | 0x80;
 }
 
-void uuid_le_gen(uuid_le *lu)
+void guid_gen(guid_t *lu)
 {
 	__uuid_gen_common(lu->b);
 	/* version 4 : random generation */
 	lu->b[7] = (lu->b[7] & 0x0F) | 0x40;
 }
-EXPORT_SYMBOL_GPL(uuid_le_gen);
+EXPORT_SYMBOL_GPL(guid_gen);
 
-void uuid_be_gen(uuid_be *bu)
+void uuid_gen(uuid_t *bu)
 {
 	__uuid_gen_common(bu->b);
 	/* version 4 : random generation */
 	bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
 }
-EXPORT_SYMBOL_GPL(uuid_be_gen);
+EXPORT_SYMBOL_GPL(uuid_gen);
 
 /**
   * uuid_is_valid - checks if UUID string valid
@@ -97,7 +100,7 @@ bool uuid_is_valid(const char *uuid)
 }
 EXPORT_SYMBOL(uuid_is_valid);
 
-static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16])
+static int __uuid_parse(const char *uuid, __u8 b[16], const u8 ei[16])
 {
 	static const u8 si[16] = {0,2,4,6,9,11,14,16,19,21,24,26,28,30,32,34};
 	unsigned int i;
@@ -115,14 +118,14 @@ static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16])
 	return 0;
 }
 
-int uuid_le_to_bin(const char *uuid, uuid_le *u)
+int guid_parse(const char *uuid, guid_t *u)
 {
-	return __uuid_to_bin(uuid, u->b, uuid_le_index);
+	return __uuid_parse(uuid, u->b, guid_index);
 }
-EXPORT_SYMBOL(uuid_le_to_bin);
+EXPORT_SYMBOL(guid_parse);
 
-int uuid_be_to_bin(const char *uuid, uuid_be *u)
+int uuid_parse(const char *uuid, uuid_t *u)
 {
-	return __uuid_to_bin(uuid, u->b, uuid_be_index);
+	return __uuid_parse(uuid, u->b, uuid_index);
 }
-EXPORT_SYMBOL(uuid_be_to_bin);
+EXPORT_SYMBOL(uuid_parse);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 2d41de3f98a1..9f37d6208e99 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1308,14 +1308,14 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	char uuid[UUID_STRING_LEN + 1];
 	char *p = uuid;
 	int i;
-	const u8 *index = uuid_be_index;
+	const u8 *index = uuid_index;
 	bool uc = false;
 
 	switch (*(++fmt)) {
 	case 'L':
 		uc = true;		/* fall-through */
 	case 'l':
-		index = uuid_le_index;
+		index = guid_index;
 		break;
 	case 'B':
 		uc = true;
diff --git a/mm/Kconfig b/mm/Kconfig
index beb7a455915d..398b46064544 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -137,7 +137,7 @@ config HAVE_MEMBLOCK_NODE_MAP
 config HAVE_MEMBLOCK_PHYS_MAP
 	bool
 
-config HAVE_GENERIC_RCU_GUP
+config HAVE_GENERIC_GUP
 	bool
 
 config ARCH_DISCARD_MEMBLOCK
diff --git a/mm/cleancache.c b/mm/cleancache.c
index ba5d8f3e6d68..f7b9fdc79d97 100644
--- a/mm/cleancache.c
+++ b/mm/cleancache.c
@@ -130,7 +130,7 @@ void __cleancache_init_shared_fs(struct super_block *sb)
 	int pool_id = CLEANCACHE_NO_BACKEND_SHARED;
 
 	if (cleancache_ops) {
-		pool_id = cleancache_ops->init_shared_fs(sb->s_uuid, PAGE_SIZE);
+		pool_id = cleancache_ops->init_shared_fs(&sb->s_uuid, PAGE_SIZE);
 		if (pool_id < 0)
 			pool_id = CLEANCACHE_NO_POOL;
 	}
diff --git a/mm/filemap.c b/mm/filemap.c
index 6f1be573a5e6..aea58e983a73 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -376,6 +376,38 @@ int filemap_flush(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_flush);
 
+/**
+ * filemap_range_has_page - check if a page exists in range.
+ * @mapping:           address space within which to check
+ * @start_byte:        offset in bytes where the range starts
+ * @end_byte:          offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback.
+ */
+bool filemap_range_has_page(struct address_space *mapping,
+			   loff_t start_byte, loff_t end_byte)
+{
+	pgoff_t index = start_byte >> PAGE_SHIFT;
+	pgoff_t end = end_byte >> PAGE_SHIFT;
+	struct pagevec pvec;
+	bool ret;
+
+	if (end_byte < start_byte)
+		return false;
+
+	if (mapping->nrpages == 0)
+		return false;
+
+	pagevec_init(&pvec, 0);
+	if (!pagevec_lookup(&pvec, mapping, index, 1))
+		return false;
+	ret = (pvec.pages[0]->index <= end);
+	pagevec_release(&pvec);
+	return ret;
+}
+EXPORT_SYMBOL(filemap_range_has_page);
+
 static int __filemap_fdatawait_range(struct address_space *mapping,
 				     loff_t start_byte, loff_t end_byte)
 {
@@ -768,10 +800,10 @@ struct wait_page_key {
 struct wait_page_queue {
 	struct page *page;
 	int bit_nr;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 };
 
-static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
+static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
 {
 	struct wait_page_key *key = arg;
 	struct wait_page_queue *wait_page
@@ -834,7 +866,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 		struct page *page, int bit_nr, int state, bool lock)
 {
 	struct wait_page_queue wait_page;
-	wait_queue_t *wait = &wait_page.wait;
+	wait_queue_entry_t *wait = &wait_page.wait;
 	int ret = 0;
 
 	init_wait(wait);
@@ -845,9 +877,9 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 	for (;;) {
 		spin_lock_irq(&q->lock);
 
-		if (likely(list_empty(&wait->task_list))) {
+		if (likely(list_empty(&wait->entry))) {
 			if (lock)
-				__add_wait_queue_tail_exclusive(q, wait);
+				__add_wait_queue_entry_tail_exclusive(q, wait);
 			else
 				__add_wait_queue(q, wait);
 			SetPageWaiters(page);
@@ -907,7 +939,7 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
  *
  * Add an arbitrary @waiter to the wait queue for the nominated @page.
  */
-void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
+void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter)
 {
 	wait_queue_head_t *q = page_waitqueue(page);
 	unsigned long flags;
@@ -2038,10 +2070,17 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 		loff_t size;
 
 		size = i_size_read(inode);
-		retval = filemap_write_and_wait_range(mapping, iocb->ki_pos,
-					iocb->ki_pos + count - 1);
-		if (retval < 0)
-			goto out;
+		if (iocb->ki_flags & IOCB_NOWAIT) {
+			if (filemap_range_has_page(mapping, iocb->ki_pos,
+						   iocb->ki_pos + count - 1))
+				return -EAGAIN;
+		} else {
+			retval = filemap_write_and_wait_range(mapping,
+						iocb->ki_pos,
+					        iocb->ki_pos + count - 1);
+			if (retval < 0)
+				goto out;
+		}
 
 		file_accessed(file);
 
@@ -2642,6 +2681,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 
 	pos = iocb->ki_pos;
 
+	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+		return -EINVAL;
+
 	if (limit != RLIM_INFINITY) {
 		if (iocb->ki_pos >= limit) {
 			send_sig(SIGXFSZ, current, 0);
@@ -2710,9 +2752,17 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	write_len = iov_iter_count(from);
 	end = (pos + write_len - 1) >> PAGE_SHIFT;
 
-	written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
-	if (written)
-		goto out;
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		/* If there are pages to writeback, return */
+		if (filemap_range_has_page(inode->i_mapping, pos,
+					   pos + iov_iter_count(from)))
+			return -EAGAIN;
+	} else {
+		written = filemap_write_and_wait_range(mapping, pos,
+							pos + write_len - 1);
+		if (written)
+			goto out;
+	}
 
 	/*
 	 * After a write we want buffered reads to be sure to go to disk to get
diff --git a/mm/gup.c b/mm/gup.c
index d9e6fddcc51f..3ab78dc3db7d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -387,11 +387,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 	/* mlock all present pages, but do not fault in new pages */
 	if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
 		return -ENOENT;
-	/* For mm_populate(), just skip the stack guard page. */
-	if ((*flags & FOLL_POPULATE) &&
-			(stack_guard_page_start(vma, address) ||
-			 stack_guard_page_end(vma, address + PAGE_SIZE)))
-		return -ENOENT;
 	if (*flags & FOLL_WRITE)
 		fault_flags |= FAULT_FLAG_WRITE;
 	if (*flags & FOLL_REMOTE)
@@ -407,12 +402,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 
 	ret = handle_mm_fault(vma, address, fault_flags);
 	if (ret & VM_FAULT_ERROR) {
-		if (ret & VM_FAULT_OOM)
-			return -ENOMEM;
-		if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
-			return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
-		if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
-			return -EFAULT;
+		int err = vm_fault_to_errno(ret, *flags);
+
+		if (err)
+			return err;
 		BUG();
 	}
 
@@ -723,12 +716,10 @@ retry:
 	ret = handle_mm_fault(vma, address, fault_flags);
 	major |= ret & VM_FAULT_MAJOR;
 	if (ret & VM_FAULT_ERROR) {
-		if (ret & VM_FAULT_OOM)
-			return -ENOMEM;
-		if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
-			return -EHWPOISON;
-		if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
-			return -EFAULT;
+		int err = vm_fault_to_errno(ret, 0);
+
+		if (err)
+			return err;
 		BUG();
 	}
 
@@ -1155,7 +1146,7 @@ struct page *get_dump_page(unsigned long addr)
 #endif /* CONFIG_ELF_CORE */
 
 /*
- * Generic RCU Fast GUP
+ * Generic Fast GUP
  *
  * get_user_pages_fast attempts to pin user pages by walking the page
  * tables directly and avoids taking locks. Thus the walker needs to be
@@ -1176,8 +1167,8 @@ struct page *get_dump_page(unsigned long addr)
  * Before activating this code, please be aware that the following assumptions
  * are currently made:
  *
- *  *) HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table is used to free
- *      pages containing page tables.
+ *  *) Either HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table() is used to
+ *  free pages containing page tables or TLB flushing requires IPI broadcast.
  *
  *  *) ptes can be read atomically by the architecture.
  *
@@ -1187,7 +1178,7 @@ struct page *get_dump_page(unsigned long addr)
  *
  * This code is based heavily on the PowerPC implementation by Nick Piggin.
  */
-#ifdef CONFIG_HAVE_GENERIC_RCU_GUP
+#ifdef CONFIG_HAVE_GENERIC_GUP
 
 #ifndef gup_get_pte
 /*
@@ -1677,4 +1668,4 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 	return ret;
 }
 
-#endif /* CONFIG_HAVE_GENERIC_RCU_GUP */
+#endif /* CONFIG_HAVE_GENERIC_GUP */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a84909cf20d3..88c6167f194d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1426,8 +1426,11 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 	 */
 	if (unlikely(pmd_trans_migrating(*vmf->pmd))) {
 		page = pmd_page(*vmf->pmd);
+		if (!get_page_unless_zero(page))
+			goto out_unlock;
 		spin_unlock(vmf->ptl);
 		wait_on_page_locked(page);
+		put_page(page);
 		goto out;
 	}
 
@@ -1459,9 +1462,12 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 
 	/* Migration could have started since the pmd_trans_migrating check */
 	if (!page_locked) {
+		page_nid = -1;
+		if (!get_page_unless_zero(page))
+			goto out_unlock;
 		spin_unlock(vmf->ptl);
 		wait_on_page_locked(page);
-		page_nid = -1;
+		put_page(page);
 		goto out;
 	}
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e5828875f7bb..3eedb187e549 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4170,6 +4170,11 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			}
 			ret = hugetlb_fault(mm, vma, vaddr, fault_flags);
 			if (ret & VM_FAULT_ERROR) {
+				int err = vm_fault_to_errno(ret, flags);
+
+				if (err)
+					return err;
+
 				remainder = 0;
 				break;
 			}
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index b10da59cf765..c81549d5c833 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -413,7 +413,7 @@ void kasan_cache_create(struct kmem_cache *cache, size_t *size,
 	*size += sizeof(struct kasan_alloc_meta);
 
 	/* Add free meta. */
-	if (cache->flags & SLAB_DESTROY_BY_RCU || cache->ctor ||
+	if (cache->flags & SLAB_TYPESAFE_BY_RCU || cache->ctor ||
 	    cache->object_size < sizeof(struct kasan_free_meta)) {
 		cache->kasan_info.free_meta_offset = *size;
 		*size += sizeof(struct kasan_free_meta);
@@ -561,7 +561,7 @@ static void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
 	unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
 
 	/* RCU slabs could be legally used after free within the RCU period */
-	if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
+	if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
 		return;
 
 	kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
@@ -572,7 +572,7 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object)
 	s8 shadow_byte;
 
 	/* RCU slabs could be legally used after free within the RCU period */
-	if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
+	if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
 		return false;
 
 	shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object));
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 7cb9c88bb4a3..df4ebdb2b10a 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -612,7 +612,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
 				      spinlock_t *ptl)
 {
 	pte_t *_pte;
-	for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++) {
+	for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
+				_pte++, page++, address += PAGE_SIZE) {
 		pte_t pteval = *_pte;
 		struct page *src_page;
 
@@ -651,9 +652,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
 			spin_unlock(ptl);
 			free_page_and_swap_cache(src_page);
 		}
-
-		address += PAGE_SIZE;
-		page++;
 	}
 }
 
@@ -907,8 +905,10 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 				return false;
 			}
 			/* check if the pmd is still valid */
-			if (mm_find_pmd(mm, address) != pmd)
+			if (mm_find_pmd(mm, address) != pmd) {
+				trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
 				return false;
+			}
 		}
 		if (ret & VM_FAULT_ERROR) {
 			trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c
index 5bf191756a4a..2d5959c5f7c5 100644
--- a/mm/kmemcheck.c
+++ b/mm/kmemcheck.c
@@ -95,7 +95,7 @@ void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
 void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size)
 {
 	/* TODO: RCU freeing is unsupported for now; hide false positives. */
-	if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU))
+	if (!s->ctor && !(s->flags & SLAB_TYPESAFE_BY_RCU))
 		kmemcheck_mark_freed(object, size);
 }
 
diff --git a/mm/ksm.c b/mm/ksm.c
index d9fc0e456128..216184af0e19 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1028,8 +1028,7 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
 		goto out;
 
 	if (PageTransCompound(page)) {
-		err = split_huge_page(page);
-		if (err)
+		if (split_huge_page(page))
 			goto out_unlock;
 	}
 
diff --git a/mm/memblock.c b/mm/memblock.c
index b049c9b2dba8..7b8a5db76a2f 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1739,6 +1739,29 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
 	}
 }
 
+extern unsigned long __init_memblock
+memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr)
+{
+	struct memblock_region *rgn;
+	unsigned long size = 0;
+	int idx;
+
+	for_each_memblock_type((&memblock.reserved), rgn) {
+		phys_addr_t start, end;
+
+		if (rgn->base + rgn->size < start_addr)
+			continue;
+		if (rgn->base > end_addr)
+			continue;
+
+		start = rgn->base;
+		end = start + rgn->size;
+		size += end - start;
+	}
+
+	return size;
+}
+
 void __init_memblock __memblock_dump_all(void)
 {
 	pr_info("MEMBLOCK configuration:\n");
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ff73899af61a..d75b38b66ef6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -170,7 +170,7 @@ struct mem_cgroup_event {
 	 */
 	poll_table pt;
 	wait_queue_head_t *wqh;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct work_struct remove;
 };
 
@@ -1479,10 +1479,10 @@ static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
 
 struct oom_wait_info {
 	struct mem_cgroup *memcg;
-	wait_queue_t	wait;
+	wait_queue_entry_t	wait;
 };
 
-static int memcg_oom_wake_function(wait_queue_t *wait,
+static int memcg_oom_wake_function(wait_queue_entry_t *wait,
 	unsigned mode, int sync, void *arg)
 {
 	struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg;
@@ -1570,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
 	owait.wait.flags = 0;
 	owait.wait.func = memcg_oom_wake_function;
 	owait.wait.private = current;
-	INIT_LIST_HEAD(&owait.wait.task_list);
+	INIT_LIST_HEAD(&owait.wait.entry);
 
 	prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
 	mem_cgroup_mark_under_oom(memcg);
@@ -3725,7 +3725,7 @@ static void memcg_event_remove(struct work_struct *work)
  *
  * Called with wqh->lock held and interrupts disabled.
  */
-static int memcg_event_wake(wait_queue_t *wait, unsigned mode,
+static int memcg_event_wake(wait_queue_entry_t *wait, unsigned mode,
 			    int sync, void *key)
 {
 	struct mem_cgroup_event *event =
@@ -5528,7 +5528,7 @@ static void uncharge_list(struct list_head *page_list)
 		next = page->lru.next;
 
 		VM_BUG_ON_PAGE(PageLRU(page), page);
-		VM_BUG_ON_PAGE(page_count(page), page);
+		VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page);
 
 		if (!page->mem_cgroup)
 			continue;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 73066b80d14a..ecc183fd94f3 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -539,6 +539,13 @@ static int delete_from_lru_cache(struct page *p)
 		 */
 		ClearPageActive(p);
 		ClearPageUnevictable(p);
+
+		/*
+		 * Poisoned page might never drop its ref count to 0 so we have
+		 * to uncharge it manually from its memcg.
+		 */
+		mem_cgroup_uncharge(p);
+
 		/*
 		 * drop the page count elevated by isolate_lru_page()
 		 */
@@ -1177,7 +1184,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	 * page_remove_rmap() in try_to_unmap_one(). So to determine page status
 	 * correctly, we save a copy of the page flags at this time.
 	 */
-	page_flags = p->flags;
+	if (PageHuge(p))
+		page_flags = hpage->flags;
+	else
+		page_flags = p->flags;
 
 	/*
 	 * unpoison always clear PG_hwpoison inside page lock
@@ -1588,12 +1598,8 @@ static int soft_offline_huge_page(struct page *page, int flags)
 	if (ret) {
 		pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n",
 			pfn, ret, page->flags, &page->flags);
-		/*
-		 * We know that soft_offline_huge_page() tries to migrate
-		 * only one hugepage pointed to by hpage, so we need not
-		 * run through the pagelist here.
-		 */
-		putback_active_hugepage(hpage);
+		if (!list_empty(&pagelist))
+			putback_movable_pages(&pagelist);
 		if (ret > 0)
 			ret = -EIO;
 	} else {
diff --git a/mm/memory.c b/mm/memory.c
index 6ff5d729ded0..bb11c474857e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2855,40 +2855,6 @@ out_release:
 }
 
 /*
- * This is like a special single-page "expand_{down|up}wards()",
- * except we must first make sure that 'address{-|+}PAGE_SIZE'
- * doesn't hit another vma.
- */
-static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
-{
-	address &= PAGE_MASK;
-	if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
-		struct vm_area_struct *prev = vma->vm_prev;
-
-		/*
-		 * Is there a mapping abutting this one below?
-		 *
-		 * That's only ok if it's the same stack mapping
-		 * that has gotten split..
-		 */
-		if (prev && prev->vm_end == address)
-			return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
-
-		return expand_downwards(vma, address - PAGE_SIZE);
-	}
-	if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
-		struct vm_area_struct *next = vma->vm_next;
-
-		/* As VM_GROWSDOWN but s/below/above/ */
-		if (next && next->vm_start == address + PAGE_SIZE)
-			return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
-
-		return expand_upwards(vma, address + PAGE_SIZE);
-	}
-	return 0;
-}
-
-/*
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
@@ -2904,10 +2870,6 @@ static int do_anonymous_page(struct vm_fault *vmf)
 	if (vma->vm_flags & VM_SHARED)
 		return VM_FAULT_SIGBUS;
 
-	/* Check if we need to add a guard page to the stack */
-	if (check_stack_guard_page(vma, vmf->address) < 0)
-		return VM_FAULT_SIGSEGV;
-
 	/*
 	 * Use pte_alloc() instead of pte_alloc_map().  We can't run
 	 * pte_offset_map() on pmds where a huge pmd might be created
@@ -3029,6 +2991,17 @@ static int __do_fault(struct vm_fault *vmf)
 	return ret;
 }
 
+/*
+ * The ordering of these checks is important for pmds with _PAGE_DEVMAP set.
+ * If we check pmd_trans_unstable() first we will trip the bad_pmd() check
+ * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly
+ * returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
+ */
+static int pmd_devmap_trans_unstable(pmd_t *pmd)
+{
+	return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
+}
+
 static int pte_alloc_one_map(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
@@ -3052,18 +3025,27 @@ static int pte_alloc_one_map(struct vm_fault *vmf)
 map_pte:
 	/*
 	 * If a huge pmd materialized under us just retry later.  Use
-	 * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
-	 * didn't become pmd_trans_huge under us and then back to pmd_none, as
-	 * a result of MADV_DONTNEED running immediately after a huge pmd fault
-	 * in a different thread of this mm, in turn leading to a misleading
-	 * pmd_trans_huge() retval.  All we have to ensure is that it is a
-	 * regular pmd that we can walk with pte_offset_map() and we can do that
-	 * through an atomic read in C, which is what pmd_trans_unstable()
-	 * provides.
+	 * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of
+	 * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge
+	 * under us and then back to pmd_none, as a result of MADV_DONTNEED
+	 * running immediately after a huge pmd fault in a different thread of
+	 * this mm, in turn leading to a misleading pmd_trans_huge() retval.
+	 * All we have to ensure is that it is a regular pmd that we can walk
+	 * with pte_offset_map() and we can do that through an atomic read in
+	 * C, which is what pmd_trans_unstable() provides.
 	 */
-	if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
+	if (pmd_devmap_trans_unstable(vmf->pmd))
 		return VM_FAULT_NOPAGE;
 
+	/*
+	 * At this point we know that our vmf->pmd points to a page of ptes
+	 * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge()
+	 * for the duration of the fault.  If a racing MADV_DONTNEED runs and
+	 * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still
+	 * be valid and we will re-check to make sure the vmf->pte isn't
+	 * pte_none() under vmf->ptl protection when we return to
+	 * alloc_set_pte().
+	 */
 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
 			&vmf->ptl);
 	return 0;
@@ -3690,7 +3672,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
 		vmf->pte = NULL;
 	} else {
 		/* See comment in pte_alloc_one_map() */
-		if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
+		if (pmd_devmap_trans_unstable(vmf->pmd))
 			return 0;
 		/*
 		 * A regular pmd is established and it can't morph into a huge
diff --git a/mm/mempool.c b/mm/mempool.c
index 47a659dedd44..1c0294858527 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -312,7 +312,7 @@ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
 {
 	void *element;
 	unsigned long flags;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	gfp_t gfp_temp;
 
 	VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
diff --git a/mm/mlock.c b/mm/mlock.c
index c483c5c20b4b..b562b5523a65 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -284,7 +284,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
 {
 	int i;
 	int nr = pagevec_count(pvec);
-	int delta_munlocked;
+	int delta_munlocked = -nr;
 	struct pagevec pvec_putback;
 	int pgrescued = 0;
 
@@ -304,6 +304,8 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
 				continue;
 			else
 				__munlock_isolation_failed(page);
+		} else {
+			delta_munlocked++;
 		}
 
 		/*
@@ -315,7 +317,6 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
 		pagevec_add(&pvec_putback, pvec->pages[i]);
 		pvec->pages[i] = NULL;
 	}
-	delta_munlocked = -nr + pagevec_count(&pvec_putback);
 	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
 	spin_unlock_irq(zone_lru_lock(zone));
 
diff --git a/mm/mmap.c b/mm/mmap.c
index f82741e199c0..a5e3dcd75e79 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -183,6 +183,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	unsigned long retval;
 	unsigned long newbrk, oldbrk;
 	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *next;
 	unsigned long min_brk;
 	bool populate;
 	LIST_HEAD(uf);
@@ -229,7 +230,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	}
 
 	/* Check against existing mmap mappings. */
-	if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+	next = find_vma(mm, oldbrk);
+	if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
 		goto out;
 
 	/* Ok, looks good - let it rip. */
@@ -253,10 +255,22 @@ out:
 
 static long vma_compute_subtree_gap(struct vm_area_struct *vma)
 {
-	unsigned long max, subtree_gap;
-	max = vma->vm_start;
-	if (vma->vm_prev)
-		max -= vma->vm_prev->vm_end;
+	unsigned long max, prev_end, subtree_gap;
+
+	/*
+	 * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
+	 * allow two stack_guard_gaps between them here, and when choosing
+	 * an unmapped area; whereas when expanding we only require one.
+	 * That's a little inconsistent, but keeps the code here simpler.
+	 */
+	max = vm_start_gap(vma);
+	if (vma->vm_prev) {
+		prev_end = vm_end_gap(vma->vm_prev);
+		if (max > prev_end)
+			max -= prev_end;
+		else
+			max = 0;
+	}
 	if (vma->vm_rb.rb_left) {
 		subtree_gap = rb_entry(vma->vm_rb.rb_left,
 				struct vm_area_struct, vm_rb)->rb_subtree_gap;
@@ -352,7 +366,7 @@ static void validate_mm(struct mm_struct *mm)
 			anon_vma_unlock_read(anon_vma);
 		}
 
-		highest_address = vma->vm_end;
+		highest_address = vm_end_gap(vma);
 		vma = vma->vm_next;
 		i++;
 	}
@@ -541,7 +555,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (vma->vm_next)
 		vma_gap_update(vma->vm_next);
 	else
-		mm->highest_vm_end = vma->vm_end;
+		mm->highest_vm_end = vm_end_gap(vma);
 
 	/*
 	 * vma->vm_prev wasn't known when we followed the rbtree to find the
@@ -856,7 +870,7 @@ again:
 			vma_gap_update(vma);
 		if (end_changed) {
 			if (!next)
-				mm->highest_vm_end = end;
+				mm->highest_vm_end = vm_end_gap(vma);
 			else if (!adjust_next)
 				vma_gap_update(next);
 		}
@@ -941,7 +955,7 @@ again:
 			 * mm->highest_vm_end doesn't need any update
 			 * in remove_next == 1 case.
 			 */
-			VM_WARN_ON(mm->highest_vm_end != end);
+			VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
 		}
 	}
 	if (insert && file)
@@ -1787,7 +1801,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 
 	while (true) {
 		/* Visit left subtree if it looks promising */
-		gap_end = vma->vm_start;
+		gap_end = vm_start_gap(vma);
 		if (gap_end >= low_limit && vma->vm_rb.rb_left) {
 			struct vm_area_struct *left =
 				rb_entry(vma->vm_rb.rb_left,
@@ -1798,12 +1812,13 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 			}
 		}
 
-		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
 check_current:
 		/* Check if current node has a suitable gap */
 		if (gap_start > high_limit)
 			return -ENOMEM;
-		if (gap_end >= low_limit && gap_end - gap_start >= length)
+		if (gap_end >= low_limit &&
+		    gap_end > gap_start && gap_end - gap_start >= length)
 			goto found;
 
 		/* Visit right subtree if it looks promising */
@@ -1825,8 +1840,8 @@ check_current:
 			vma = rb_entry(rb_parent(prev),
 				       struct vm_area_struct, vm_rb);
 			if (prev == vma->vm_rb.rb_left) {
-				gap_start = vma->vm_prev->vm_end;
-				gap_end = vma->vm_start;
+				gap_start = vm_end_gap(vma->vm_prev);
+				gap_end = vm_start_gap(vma);
 				goto check_current;
 			}
 		}
@@ -1890,7 +1905,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 
 	while (true) {
 		/* Visit right subtree if it looks promising */
-		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
 		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
 			struct vm_area_struct *right =
 				rb_entry(vma->vm_rb.rb_right,
@@ -1903,10 +1918,11 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 
 check_current:
 		/* Check if current node has a suitable gap */
-		gap_end = vma->vm_start;
+		gap_end = vm_start_gap(vma);
 		if (gap_end < low_limit)
 			return -ENOMEM;
-		if (gap_start <= high_limit && gap_end - gap_start >= length)
+		if (gap_start <= high_limit &&
+		    gap_end > gap_start && gap_end - gap_start >= length)
 			goto found;
 
 		/* Visit left subtree if it looks promising */
@@ -1929,7 +1945,7 @@ check_current:
 				       struct vm_area_struct, vm_rb);
 			if (prev == vma->vm_rb.rb_right) {
 				gap_start = vma->vm_prev ?
-					vma->vm_prev->vm_end : 0;
+					vm_end_gap(vma->vm_prev) : 0;
 				goto check_current;
 			}
 		}
@@ -1967,7 +1983,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *prev;
 	struct vm_unmapped_area_info info;
 
 	if (len > TASK_SIZE - mmap_min_addr)
@@ -1978,9 +1994,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 	if (addr) {
 		addr = PAGE_ALIGN(addr);
-		vma = find_vma(mm, addr);
+		vma = find_vma_prev(mm, addr, &prev);
 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)) &&
+		    (!prev || addr >= vm_end_gap(prev)))
 			return addr;
 	}
 
@@ -2003,7 +2020,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			  const unsigned long len, const unsigned long pgoff,
 			  const unsigned long flags)
 {
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *prev;
 	struct mm_struct *mm = current->mm;
 	unsigned long addr = addr0;
 	struct vm_unmapped_area_info info;
@@ -2018,9 +2035,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	/* requesting a specific address */
 	if (addr) {
 		addr = PAGE_ALIGN(addr);
-		vma = find_vma(mm, addr);
+		vma = find_vma_prev(mm, addr, &prev);
 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)) &&
+				(!prev || addr >= vm_end_gap(prev)))
 			return addr;
 	}
 
@@ -2155,21 +2173,19 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
  * update accounting. This is shared with both the
  * grow-up and grow-down cases.
  */
-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
+static int acct_stack_growth(struct vm_area_struct *vma,
+			     unsigned long size, unsigned long grow)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct rlimit *rlim = current->signal->rlim;
-	unsigned long new_start, actual_size;
+	unsigned long new_start;
 
 	/* address space limit tests */
 	if (!may_expand_vm(mm, vma->vm_flags, grow))
 		return -ENOMEM;
 
 	/* Stack limit test */
-	actual_size = size;
-	if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
-		actual_size -= PAGE_SIZE;
-	if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+	if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
 		return -ENOMEM;
 
 	/* mlock limit tests */
@@ -2207,16 +2223,32 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	struct vm_area_struct *next;
+	unsigned long gap_addr;
 	int error = 0;
 
 	if (!(vma->vm_flags & VM_GROWSUP))
 		return -EFAULT;
 
-	/* Guard against wrapping around to address 0. */
-	if (address < PAGE_ALIGN(address+4))
-		address = PAGE_ALIGN(address+4);
-	else
+	/* Guard against exceeding limits of the address space. */
+	address &= PAGE_MASK;
+	if (address >= TASK_SIZE)
 		return -ENOMEM;
+	address += PAGE_SIZE;
+
+	/* Enforce stack_guard_gap */
+	gap_addr = address + stack_guard_gap;
+
+	/* Guard against overflow */
+	if (gap_addr < address || gap_addr > TASK_SIZE)
+		gap_addr = TASK_SIZE;
+
+	next = vma->vm_next;
+	if (next && next->vm_start < gap_addr) {
+		if (!(next->vm_flags & VM_GROWSUP))
+			return -ENOMEM;
+		/* Check that both stack segments have the same anon_vma? */
+	}
 
 	/* We must make sure the anon_vma is allocated. */
 	if (unlikely(anon_vma_prepare(vma)))
@@ -2261,7 +2293,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 				if (vma->vm_next)
 					vma_gap_update(vma->vm_next);
 				else
-					mm->highest_vm_end = address;
+					mm->highest_vm_end = vm_end_gap(vma);
 				spin_unlock(&mm->page_table_lock);
 
 				perf_event_mmap(vma);
@@ -2282,6 +2314,8 @@ int expand_downwards(struct vm_area_struct *vma,
 				   unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	struct vm_area_struct *prev;
+	unsigned long gap_addr;
 	int error;
 
 	address &= PAGE_MASK;
@@ -2289,6 +2323,17 @@ int expand_downwards(struct vm_area_struct *vma,
 	if (error)
 		return error;
 
+	/* Enforce stack_guard_gap */
+	gap_addr = address - stack_guard_gap;
+	if (gap_addr > address)
+		return -ENOMEM;
+	prev = vma->vm_prev;
+	if (prev && prev->vm_end > gap_addr) {
+		if (!(prev->vm_flags & VM_GROWSDOWN))
+			return -ENOMEM;
+		/* Check that both stack segments have the same anon_vma? */
+	}
+
 	/* We must make sure the anon_vma is allocated. */
 	if (unlikely(anon_vma_prepare(vma)))
 		return -ENOMEM;
@@ -2343,28 +2388,25 @@ int expand_downwards(struct vm_area_struct *vma,
 	return error;
 }
 
-/*
- * Note how expand_stack() refuses to expand the stack all the way to
- * abut the next virtual mapping, *unless* that mapping itself is also
- * a stack mapping. We want to leave room for a guard page, after all
- * (the guard page itself is not added here, that is done by the
- * actual page faulting logic)
- *
- * This matches the behavior of the guard page logic (see mm/memory.c:
- * check_stack_guard_page()), which only allows the guard page to be
- * removed under these circumstances.
- */
+/* enforced gap between the expanding stack and other mappings. */
+unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+
+static int __init cmdline_parse_stack_guard_gap(char *p)
+{
+	unsigned long val;
+	char *endptr;
+
+	val = simple_strtoul(p, &endptr, 10);
+	if (!*endptr)
+		stack_guard_gap = val << PAGE_SHIFT;
+
+	return 0;
+}
+__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
+
 #ifdef CONFIG_STACK_GROWSUP
 int expand_stack(struct vm_area_struct *vma, unsigned long address)
 {
-	struct vm_area_struct *next;
-
-	address &= PAGE_MASK;
-	next = vma->vm_next;
-	if (next && next->vm_start == address + PAGE_SIZE) {
-		if (!(next->vm_flags & VM_GROWSUP))
-			return -ENOMEM;
-	}
 	return expand_upwards(vma, address);
 }
 
@@ -2386,14 +2428,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 #else
 int expand_stack(struct vm_area_struct *vma, unsigned long address)
 {
-	struct vm_area_struct *prev;
-
-	address &= PAGE_MASK;
-	prev = vma->vm_prev;
-	if (prev && prev->vm_end == address) {
-		if (!(prev->vm_flags & VM_GROWSDOWN))
-			return -ENOMEM;
-	}
 	return expand_downwards(vma, address);
 }
 
@@ -2491,7 +2525,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 		vma->vm_prev = prev;
 		vma_gap_update(vma);
 	} else
-		mm->highest_vm_end = prev ? prev->vm_end : 0;
+		mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
 	tail_vma->vm_next = NULL;
 
 	/* Kill the cache */
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index a7652acd2ab9..54ca54562928 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -21,7 +21,7 @@
 #include <linux/slab.h>
 
 /* global SRCU for all MMs */
-static struct srcu_struct srcu;
+DEFINE_STATIC_SRCU(srcu);
 
 /*
  * This function allows mmu_notifier::release callback to delay a call to
@@ -252,12 +252,6 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
 
 	BUG_ON(atomic_read(&mm->mm_users) <= 0);
 
-	/*
-	 * Verify that mmu_notifier_init() already run and the global srcu is
-	 * initialized.
-	 */
-	BUG_ON(!srcu.per_cpu_ref);
-
 	ret = -ENOMEM;
 	mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
 	if (unlikely(!mmu_notifier_mm))
@@ -406,9 +400,3 @@ void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
 	mmdrop(mm);
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
-
-static int __init mmu_notifier_init(void)
-{
-	return init_srcu_struct(&srcu);
-}
-subsys_initcall(mmu_notifier_init);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f9e450c6b6e4..2302f250d6b1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -292,6 +292,26 @@ int page_group_by_mobility_disabled __read_mostly;
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 static inline void reset_deferred_meminit(pg_data_t *pgdat)
 {
+	unsigned long max_initialise;
+	unsigned long reserved_lowmem;
+
+	/*
+	 * Initialise at least 2G of a node but also take into account that
+	 * two large system hashes that can take up 1GB for 0.25TB/node.
+	 */
+	max_initialise = max(2UL << (30 - PAGE_SHIFT),
+		(pgdat->node_spanned_pages >> 8));
+
+	/*
+	 * Compensate the all the memblock reservations (e.g. crash kernel)
+	 * from the initial estimation to make sure we will initialize enough
+	 * memory to boot.
+	 */
+	reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn,
+			pgdat->node_start_pfn + max_initialise);
+	max_initialise += reserved_lowmem;
+
+	pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages);
 	pgdat->first_deferred_pfn = ULONG_MAX;
 }
 
@@ -314,20 +334,11 @@ static inline bool update_defer_init(pg_data_t *pgdat,
 				unsigned long pfn, unsigned long zone_end,
 				unsigned long *nr_initialised)
 {
-	unsigned long max_initialise;
-
 	/* Always populate low zones for address-contrained allocations */
 	if (zone_end < pgdat_end_pfn(pgdat))
 		return true;
-	/*
-	 * Initialise at least 2G of a node but also take into account that
-	 * two large system hashes that can take up 1GB for 0.25TB/node.
-	 */
-	max_initialise = max(2UL << (30 - PAGE_SHIFT),
-		(pgdat->node_spanned_pages >> 8));
-
 	(*nr_initialised)++;
-	if ((*nr_initialised > max_initialise) &&
+	if ((*nr_initialised > pgdat->static_init_size) &&
 	    (pfn & (PAGES_PER_SECTION - 1)) == 0) {
 		pgdat->first_deferred_pfn = pfn;
 		return false;
@@ -3870,7 +3881,9 @@ retry:
 		goto got_pg;
 
 	/* Avoid allocations with no watermarks from looping endlessly */
-	if (test_thread_flag(TIF_MEMDIE))
+	if (test_thread_flag(TIF_MEMDIE) &&
+	    (alloc_flags == ALLOC_NO_WATERMARKS ||
+	     (gfp_mask & __GFP_NOMEMALLOC)))
 		goto nopage;
 
 	/* Retry as long as the OOM killer is making progress */
@@ -6136,7 +6149,6 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 	/* pg_data_t should be reset to zero when it's allocated */
 	WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx);
 
-	reset_deferred_meminit(pgdat);
 	pgdat->node_id = nid;
 	pgdat->node_start_pfn = node_start_pfn;
 	pgdat->per_cpu_nodestats = NULL;
@@ -6158,6 +6170,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 		(unsigned long)pgdat->node_mem_map);
 #endif
 
+	reset_deferred_meminit(pgdat);
 	free_area_init_core(pgdat);
 }
 
diff --git a/mm/page_io.c b/mm/page_io.c
index 23f6d0d3470f..2da71e627812 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -45,7 +45,7 @@ void end_swap_bio_write(struct bio *bio)
 {
 	struct page *page = bio->bi_io_vec[0].bv_page;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		SetPageError(page);
 		/*
 		 * We failed to write the page out to swap-space.
@@ -118,7 +118,7 @@ static void end_swap_bio_read(struct bio *bio)
 {
 	struct page *page = bio->bi_io_vec[0].bv_page;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		SetPageError(page);
 		ClearPageUptodate(page);
 		pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
diff --git a/mm/rmap.c b/mm/rmap.c
index 3ff241f714eb..130c238fe384 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -430,7 +430,7 @@ static void anon_vma_ctor(void *data)
 void __init anon_vma_init(void)
 {
 	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
-			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
+			0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
 			anon_vma_ctor);
 	anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
 			SLAB_PANIC|SLAB_ACCOUNT);
@@ -481,7 +481,7 @@ struct anon_vma *page_get_anon_vma(struct page *page)
 	 * If this page is still mapped, then its anon_vma cannot have been
 	 * freed.  But if it has been unmapped, we have no security against the
 	 * anon_vma structure being freed and reused (for another anon_vma:
-	 * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero()
+	 * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
 	 * above cannot corrupt).
 	 */
 	if (!page_mapped(page)) {
@@ -579,25 +579,13 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
 void try_to_unmap_flush(void)
 {
 	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
-	int cpu;
 
 	if (!tlb_ubc->flush_required)
 		return;
 
-	cpu = get_cpu();
-
-	if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask)) {
-		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
-		local_flush_tlb();
-		trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
-	}
-
-	if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids)
-		flush_tlb_others(&tlb_ubc->cpumask, NULL, 0, TLB_FLUSH_ALL);
-	cpumask_clear(&tlb_ubc->cpumask);
+	arch_tlbbatch_flush(&tlb_ubc->arch);
 	tlb_ubc->flush_required = false;
 	tlb_ubc->writable = false;
-	put_cpu();
 }
 
 /* Flush iff there are potentially writable TLB entries that can race with IO */
@@ -613,7 +601,7 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
 {
 	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
 
-	cpumask_or(&tlb_ubc->cpumask, &tlb_ubc->cpumask, mm_cpumask(mm));
+	arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
 	tlb_ubc->flush_required = true;
 
 	/*
diff --git a/mm/shmem.c b/mm/shmem.c
index e67d6ba4e98e..9100c4952698 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -75,6 +75,7 @@ static struct vfsmount *shm_mnt;
 #include <uapi/linux/memfd.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/rmap.h>
+#include <linux/uuid.h>
 
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
@@ -1902,10 +1903,10 @@ unlock:
  * entry unconditionally - even if something else had already woken the
  * target.
  */
-static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	int ret = default_wake_function(wait, mode, sync, key);
-	list_del_init(&wait->task_list);
+	list_del_init(&wait->entry);
 	return ret;
 }
 
@@ -2840,7 +2841,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		spin_lock(&inode->i_lock);
 		inode->i_private = NULL;
 		wake_up_all(&shmem_falloc_waitq);
-		WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.task_list));
+		WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head));
 		spin_unlock(&inode->i_lock);
 		error = 0;
 		goto out;
@@ -3761,6 +3762,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 #ifdef CONFIG_TMPFS_POSIX_ACL
 	sb->s_flags |= MS_POSIXACL;
 #endif
+	uuid_gen(&sb->s_uuid);
 
 	inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
 	if (!inode)
diff --git a/mm/slab.c b/mm/slab.c
index 1880d482a0cb..2a31ee3c5814 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1728,7 +1728,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
 
 	freelist = page->freelist;
 	slab_destroy_debugcheck(cachep, page);
-	if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
+	if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU))
 		call_rcu(&page->rcu_head, kmem_rcu_free);
 	else
 		kmem_freepages(cachep, page);
@@ -1924,7 +1924,7 @@ static bool set_objfreelist_slab_cache(struct kmem_cache *cachep,
 
 	cachep->num = 0;
 
-	if (cachep->ctor || flags & SLAB_DESTROY_BY_RCU)
+	if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU)
 		return false;
 
 	left = calculate_slab_order(cachep, size,
@@ -2030,7 +2030,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
 						2 * sizeof(unsigned long long)))
 		flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
-	if (!(flags & SLAB_DESTROY_BY_RCU))
+	if (!(flags & SLAB_TYPESAFE_BY_RCU))
 		flags |= SLAB_POISON;
 #endif
 #endif
diff --git a/mm/slab.h b/mm/slab.h
index 65e7c3fcac72..9cfcf099709c 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -126,7 +126,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size,
 
 /* Legal flag mask for kmem_cache_create(), for various configurations */
 #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
-			 SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS )
+			 SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
 
 #if defined(CONFIG_DEBUG_SLAB)
 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
@@ -415,7 +415,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
 	 * back there or track user information then we can
 	 * only use the space before that information.
 	 */
-	if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
+	if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER))
 		return s->inuse;
 	/*
 	 * Else we can use all the padding etc for the allocation
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 09d0e849b07f..01a0fe2eb332 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -39,7 +39,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
  * Set of flags that will prevent slab merging
  */
 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
-		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
+		SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
 		SLAB_FAILSLAB | SLAB_KASAN)
 
 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
@@ -500,7 +500,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
 	struct kmem_cache *s, *s2;
 
 	/*
-	 * On destruction, SLAB_DESTROY_BY_RCU kmem_caches are put on the
+	 * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
 	 * @slab_caches_to_rcu_destroy list.  The slab pages are freed
 	 * through RCU and and the associated kmem_cache are dereferenced
 	 * while freeing the pages, so the kmem_caches should be freed only
@@ -537,7 +537,7 @@ static int shutdown_cache(struct kmem_cache *s)
 	memcg_unlink_cache(s);
 	list_del(&s->list);
 
-	if (s->flags & SLAB_DESTROY_BY_RCU) {
+	if (s->flags & SLAB_TYPESAFE_BY_RCU) {
 		list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
 		schedule_work(&slab_caches_to_rcu_destroy_work);
 	} else {
diff --git a/mm/slob.c b/mm/slob.c
index eac04d4357ec..1bae78d71096 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -126,7 +126,7 @@ static inline void clear_slob_page_free(struct page *sp)
 
 /*
  * struct slob_rcu is inserted at the tail of allocated slob blocks, which
- * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
+ * were created with a SLAB_TYPESAFE_BY_RCU slab. slob_rcu is used to free
  * the block using call_rcu.
  */
 struct slob_rcu {
@@ -524,7 +524,7 @@ EXPORT_SYMBOL(ksize);
 
 int __kmem_cache_create(struct kmem_cache *c, unsigned long flags)
 {
-	if (flags & SLAB_DESTROY_BY_RCU) {
+	if (flags & SLAB_TYPESAFE_BY_RCU) {
 		/* leave room for rcu footer at the end of object */
 		c->size += sizeof(struct slob_rcu);
 	}
@@ -598,7 +598,7 @@ static void kmem_rcu_free(struct rcu_head *head)
 void kmem_cache_free(struct kmem_cache *c, void *b)
 {
 	kmemleak_free_recursive(b, c->flags);
-	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
+	if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) {
 		struct slob_rcu *slob_rcu;
 		slob_rcu = b + (c->size - sizeof(struct slob_rcu));
 		slob_rcu->size = c->size;
diff --git a/mm/slub.c b/mm/slub.c
index 7f4bc7027ed5..8addc535bcdc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1687,7 +1687,7 @@ static void rcu_free_slab(struct rcu_head *h)
 
 static void free_slab(struct kmem_cache *s, struct page *page)
 {
-	if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
+	if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
 		struct rcu_head *head;
 
 		if (need_reserve_slab_rcu) {
@@ -2963,7 +2963,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
 	 * slab_free_freelist_hook() could have put the items into quarantine.
 	 * If so, no need to free them.
 	 */
-	if (s->flags & SLAB_KASAN && !(s->flags & SLAB_DESTROY_BY_RCU))
+	if (s->flags & SLAB_KASAN && !(s->flags & SLAB_TYPESAFE_BY_RCU))
 		return;
 	do_slab_free(s, page, head, tail, cnt, addr);
 }
@@ -3433,7 +3433,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	 * the slab may touch the object after free or before allocation
 	 * then we should never poison the object itself.
 	 */
-	if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
+	if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
 			!s->ctor)
 		s->flags |= __OBJECT_POISON;
 	else
@@ -3455,7 +3455,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	 */
 	s->inuse = size;
 
-	if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
+	if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
 		s->ctor)) {
 		/*
 		 * Relocate free pointer after the object if it is not
@@ -3537,7 +3537,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
 	s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
 	s->reserved = 0;
 
-	if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
+	if (need_reserve_slab_rcu && (s->flags & SLAB_TYPESAFE_BY_RCU))
 		s->reserved = sizeof(struct rcu_head);
 
 	if (!calculate_sizes(s, -1))
@@ -5042,7 +5042,7 @@ SLAB_ATTR_RO(cache_dma);
 
 static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
 {
-	return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
+	return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
 }
 SLAB_ATTR_RO(destroy_by_rcu);
 
@@ -5512,6 +5512,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
 		char mbuf[64];
 		char *buf;
 		struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
+		ssize_t len;
 
 		if (!attr || !attr->store || !attr->show)
 			continue;
@@ -5536,8 +5537,9 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
 			buf = buffer;
 		}
 
-		attr->show(root_cache, buf);
-		attr->store(s, buf, strlen(buf));
+		len = attr->show(root_cache, buf);
+		if (len > 0)
+			attr->store(s, buf, len);
 	}
 
 	if (buffer)
@@ -5623,6 +5625,28 @@ static char *create_unique_id(struct kmem_cache *s)
 	return name;
 }
 
+static void sysfs_slab_remove_workfn(struct work_struct *work)
+{
+	struct kmem_cache *s =
+		container_of(work, struct kmem_cache, kobj_remove_work);
+
+	if (!s->kobj.state_in_sysfs)
+		/*
+		 * For a memcg cache, this may be called during
+		 * deactivation and again on shutdown.  Remove only once.
+		 * A cache is never shut down before deactivation is
+		 * complete, so no need to worry about synchronization.
+		 */
+		return;
+
+#ifdef CONFIG_MEMCG
+	kset_unregister(s->memcg_kset);
+#endif
+	kobject_uevent(&s->kobj, KOBJ_REMOVE);
+	kobject_del(&s->kobj);
+	kobject_put(&s->kobj);
+}
+
 static int sysfs_slab_add(struct kmem_cache *s)
 {
 	int err;
@@ -5630,6 +5654,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
 	struct kset *kset = cache_kset(s);
 	int unmergeable = slab_unmergeable(s);
 
+	INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
+
 	if (!kset) {
 		kobject_init(&s->kobj, &slab_ktype);
 		return 0;
@@ -5693,20 +5719,8 @@ static void sysfs_slab_remove(struct kmem_cache *s)
 		 */
 		return;
 
-	if (!s->kobj.state_in_sysfs)
-		/*
-		 * For a memcg cache, this may be called during
-		 * deactivation and again on shutdown.  Remove only once.
-		 * A cache is never shut down before deactivation is
-		 * complete, so no need to worry about synchronization.
-		 */
-		return;
-
-#ifdef CONFIG_MEMCG
-	kset_unregister(s->memcg_kset);
-#endif
-	kobject_uevent(&s->kobj, KOBJ_REMOVE);
-	kobject_del(&s->kobj);
+	kobject_get(&s->kobj);
+	schedule_work(&s->kobj_remove_work);
 }
 
 void sysfs_slab_release(struct kmem_cache *s)
diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index ac6318a064d3..3405b4ee1757 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -48,6 +48,9 @@ static int swap_cgroup_prepare(int type)
 		if (!page)
 			goto not_enough_page;
 		ctrl->map[idx] = page;
+
+		if (!(idx % SWAP_CLUSTER_MAX))
+			cond_resched();
 	}
 	return 0;
 not_enough_page:
diff --git a/mm/truncate.c b/mm/truncate.c
index 83a059e8cd1d..6479ed2afc53 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -67,17 +67,14 @@ static void truncate_exceptional_entry(struct address_space *mapping,
 
 /*
  * Invalidate exceptional entry if easily possible. This handles exceptional
- * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and
- * clean entries.
+ * entries for invalidate_inode_pages().
  */
 static int invalidate_exceptional_entry(struct address_space *mapping,
 					pgoff_t index, void *entry)
 {
-	/* Handled by shmem itself */
-	if (shmem_mapping(mapping))
+	/* Handled by shmem itself, or for DAX we do nothing. */
+	if (shmem_mapping(mapping) || dax_mapping(mapping))
 		return 1;
-	if (dax_mapping(mapping))
-		return dax_invalidate_mapping_entry(mapping, index);
 	clear_shadow_entry(mapping, index, entry);
 	return 1;
 }
@@ -689,7 +686,17 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 		cond_resched();
 		index++;
 	}
-
+	/*
+	 * For DAX we invalidate page tables after invalidating radix tree.  We
+	 * could invalidate page tables while invalidating each entry however
+	 * that would be expensive. And doing range unmapping before doesn't
+	 * work as we have no cheap way to find whether radix tree entry didn't
+	 * get remapped later.
+	 */
+	if (dax_mapping(mapping)) {
+		unmap_mapping_range(mapping, (loff_t)start << PAGE_SHIFT,
+				    (loff_t)(end - start + 1) << PAGE_SHIFT, 0);
+	}
 out:
 	cleancache_invalidate_inode(mapping);
 	return ret;
diff --git a/mm/util.c b/mm/util.c
index 718154debc87..26be6407abd7 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -357,8 +357,11 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
 	WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL);
 
 	/*
-	 * Make sure that larger requests are not too disruptive - no OOM
-	 * killer and no allocation failure warnings as we have a fallback
+	 * We want to attempt a large physically contiguous block first because
+	 * it is less likely to fragment multiple larger blocks and therefore
+	 * contribute to a long term fragmentation less than vmalloc fallback.
+	 * However make sure that larger requests are not too disruptive - no
+	 * OOM killer and no allocation failure warnings as we have a fallback.
 	 */
 	if (size > PAGE_SIZE) {
 		kmalloc_flags |= __GFP_NOWARN;
@@ -382,7 +385,8 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
 	if (ret || size <= PAGE_SIZE)
 		return ret;
 
-	return __vmalloc_node_flags(size, node, flags);
+	return __vmalloc_node_flags_caller(size, node, flags,
+			__builtin_return_address(0));
 }
 EXPORT_SYMBOL(kvmalloc_node);
 
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 1dda6d8a200a..ecc97f74ab18 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -287,10 +287,21 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
 	if (p4d_none(*p4d))
 		return NULL;
 	pud = pud_offset(p4d, addr);
-	if (pud_none(*pud))
+
+	/*
+	 * Don't dereference bad PUD or PMD (below) entries. This will also
+	 * identify huge mappings, which we may encounter on architectures
+	 * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be
+	 * identified as vmalloc addresses by is_vmalloc_addr(), but are
+	 * not [unambiguously] associated with a struct page, so there is
+	 * no correct value to return for them.
+	 */
+	WARN_ON_ONCE(pud_bad(*pud));
+	if (pud_none(*pud) || pud_bad(*pud))
 		return NULL;
 	pmd = pmd_offset(pud, addr);
-	if (pmd_none(*pmd))
+	WARN_ON_ONCE(pmd_bad(*pmd));
+	if (pmd_none(*pmd) || pmd_bad(*pmd))
 		return NULL;
 
 	ptep = pte_offset_map(pmd, addr);
@@ -521,7 +532,7 @@ overflow:
 		}
 	}
 
-	if (printk_ratelimit())
+	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
 		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
 			size);
 	kfree(va);
@@ -1649,6 +1660,9 @@ void *vmap(struct page **pages, unsigned int count,
 }
 EXPORT_SYMBOL(vmap);
 
+static void *__vmalloc_node(unsigned long size, unsigned long align,
+			    gfp_t gfp_mask, pgprot_t prot,
+			    int node, const void *caller);
 static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 				 pgprot_t prot, int node)
 {
@@ -1791,7 +1805,7 @@ fail:
  *	with mm people.
  *
  */
-void *__vmalloc_node(unsigned long size, unsigned long align,
+static void *__vmalloc_node(unsigned long size, unsigned long align,
 			    gfp_t gfp_mask, pgprot_t prot,
 			    int node, const void *caller)
 {
@@ -1806,6 +1820,20 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 }
 EXPORT_SYMBOL(__vmalloc);
 
+static inline void *__vmalloc_node_flags(unsigned long size,
+					int node, gfp_t flags)
+{
+	return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
+					node, __builtin_return_address(0));
+}
+
+
+void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags,
+				  void *caller)
+{
+	return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller);
+}
+
 /**
  *	vmalloc  -  allocate virtually contiguous memory
  *	@size:		allocation size
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 6063581f705c..ce0618bfa8d0 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -115,9 +115,9 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
 	unsigned long pressure = 0;
 
 	/*
-	 * reclaimed can be greater than scanned in cases
-	 * like THP, where the scanned is 1 and reclaimed
-	 * could be 512
+	 * reclaimed can be greater than scanned for things such as reclaimed
+	 * slab pages. shrink_node() just adds reclaimed pages without a
+	 * related increment to scanned pages.
 	 */
 	if (reclaimed >= scanned)
 		goto out;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2f45c0520f43..c3c1c6ac62da 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1449,7 +1449,7 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
  *
  * Appropriate locks must be held before calling this function.
  *
- * @nr_to_scan:	The number of pages to look through on the list.
+ * @nr_to_scan:	The number of eligible pages to look through on the list.
  * @lruvec:	The LRU vector to pull pages from.
  * @dst:	The temp list to put pages on to.
  * @nr_scanned:	The number of pages that were scanned.
@@ -1469,11 +1469,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 	unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 };
 	unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
 	unsigned long skipped = 0;
-	unsigned long scan, nr_pages;
+	unsigned long scan, total_scan, nr_pages;
 	LIST_HEAD(pages_skipped);
 
-	for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan &&
-					!list_empty(src); scan++) {
+	scan = 0;
+	for (total_scan = 0;
+	     scan < nr_to_scan && nr_taken < nr_to_scan && !list_empty(src);
+	     total_scan++) {
 		struct page *page;
 
 		page = lru_to_page(src);
@@ -1487,6 +1489,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 			continue;
 		}
 
+		/*
+		 * Do not count skipped pages because that makes the function
+		 * return with no isolated pages if the LRU mostly contains
+		 * ineligible pages.  This causes the VM to not reclaim any
+		 * pages, triggering a premature OOM.
+		 */
+		scan++;
 		switch (__isolate_lru_page(page, mode)) {
 		case 0:
 			nr_pages = hpage_nr_pages(page);
@@ -1524,9 +1533,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 			skipped += nr_skipped[zid];
 		}
 	}
-	*nr_scanned = scan;
+	*nr_scanned = total_scan;
 	trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
-				    scan, skipped, nr_taken, mode, lru);
+				    total_scan, skipped, nr_taken, mode, lru);
 	update_lru_sizes(lruvec, lru, nr_zone_taken);
 	return nr_taken;
 }
@@ -3643,7 +3652,7 @@ int kswapd_run(int nid)
 	pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
 	if (IS_ERR(pgdat->kswapd)) {
 		/* failure at boot is fatal */
-		BUG_ON(system_state == SYSTEM_BOOTING);
+		BUG_ON(system_state < SYSTEM_RUNNING);
 		pr_err("Failed to start kswapd on node %d\n", nid);
 		ret = PTR_ERR(pgdat->kswapd);
 		pgdat->kswapd = NULL;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index f5fa1bd1eb16..76f73670200a 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1359,8 +1359,6 @@ static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
 			return zone == compare;
 	}
 
-	/* The zone must be somewhere! */
-	WARN_ON_ONCE(1);
 	return false;
 }
 
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 467069b73ce1..9649579b5b9f 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -277,7 +277,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	return 0;
 
 out_free_newdev:
-	free_netdev(new_dev);
+	if (new_dev->reg_state == NETREG_UNINITIALIZED)
+		free_netdev(new_dev);
 	return err;
 }
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 953b6728bd00..abc5f400fc71 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -813,7 +813,6 @@ static void vlan_dev_free(struct net_device *dev)
 
 	free_percpu(vlan->vlan_pcpu_stats);
 	vlan->vlan_pcpu_stats = NULL;
-	free_netdev(dev);
 }
 
 void vlan_setup(struct net_device *dev)
@@ -826,7 +825,8 @@ void vlan_setup(struct net_device *dev)
 	netif_keep_dst(dev);
 
 	dev->netdev_ops		= &vlan_netdev_ops;
-	dev->destructor		= vlan_dev_free;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= vlan_dev_free;
 	dev->ethtool_ops	= &vlan_ethtool_ops;
 
 	dev->min_mtu		= 0;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 7bc2208b6cc4..dca3cdd1a014 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -95,7 +95,7 @@ enum {
 
 struct p9_poll_wait {
 	struct p9_conn *conn;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	wait_queue_head_t *wait_addr;
 };
 
@@ -522,7 +522,7 @@ error:
 	clear_bit(Wworksched, &m->wsched);
 }
 
-static int p9_pollwake(wait_queue_t *wait, unsigned int mode, int sync, void *key)
+static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
 {
 	struct p9_poll_wait *pwait =
 		container_of(wait, struct p9_poll_wait, wait);
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 71e85643b3f9..6ad3e043c617 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -454,8 +454,8 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
 			goto error_xenbus;
 	}
 	priv->tag = xenbus_read(xbt, dev->nodename, "tag", NULL);
-	if (!priv->tag) {
-		ret = -EINVAL;
+	if (IS_ERR(priv->tag)) {
+		ret = PTR_ERR(priv->tag);
 		goto error_xenbus;
 	}
 	ret = xenbus_transaction_end(xbt, 0);
@@ -525,7 +525,7 @@ static struct xenbus_driver xen_9pfs_front_driver = {
 	.otherend_changed = xen_9pfs_front_changed,
 };
 
-int p9_trans_xen_init(void)
+static int p9_trans_xen_init(void)
 {
 	if (!xen_domain())
 		return -ENODEV;
@@ -537,7 +537,7 @@ int p9_trans_xen_init(void)
 }
 module_init(p9_trans_xen_init);
 
-void p9_trans_xen_exit(void)
+static void p9_trans_xen_exit(void)
 {
 	v9fs_unregister_trans(&p9_xen_trans);
 	return xenbus_unregister_driver(&xen_9pfs_front_driver);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 013e970eff39..000ca2f113ab 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1064,8 +1064,9 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
 
 		skb_new->protocol = eth_type_trans(skb_new, soft_iface);
 
-		soft_iface->stats.rx_packets++;
-		soft_iface->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size;
+		batadv_inc_counter(bat_priv, BATADV_CNT_RX);
+		batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
+				   skb->len + ETH_HLEN + hdr_size);
 
 		netif_rx(skb_new);
 		batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index e1ebe14ee2a6..ae9f4d37d34f 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -987,7 +987,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
 				batadv_dbg(BATADV_DBG_BLA, bat_priv,
 					   "recv_unicast_packet(): Dropped unicast pkt received from another backbone gw %pM.\n",
 					   orig_addr_gw);
-				return NET_RX_DROP;
+				goto free_skb;
 			}
 		}
 
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index b25789abf7b9..10f7edfb176e 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1034,8 +1034,6 @@ static void batadv_softif_free(struct net_device *dev)
 	 * netdev and its private data (bat_priv)
 	 */
 	rcu_barrier();
-
-	free_netdev(dev);
 }
 
 /**
@@ -1047,7 +1045,8 @@ static void batadv_softif_init_early(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops = &batadv_netdev_ops;
-	dev->destructor = batadv_softif_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = batadv_softif_free;
 	dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL;
 	dev->priv_flags |= IFF_NO_QUEUE;
 
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 608959989f8e..ab3b654b05cc 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -598,7 +598,7 @@ static void netdev_setup(struct net_device *dev)
 
 	dev->netdev_ops		= &netdev_ops;
 	dev->header_ops		= &header_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 static struct device_type bt_type = {
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index fbf251fef70f..5c4808b3da2d 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -484,7 +484,7 @@ static int bnep_session(void *arg)
 	struct net_device *dev = s->dev;
 	struct sock *sk = s->sock->sk;
 	struct sk_buff *skb;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	BT_DBG("");
 
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 9e59b6654126..14f7c8135c31 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -280,7 +280,7 @@ static int cmtp_session(void *arg)
 	struct cmtp_session *session = arg;
 	struct sock *sk = session->sock->sk;
 	struct sk_buff *skb;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	BT_DBG("session %p", session);
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 0bec4588c3c8..fc31161e98f2 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -1244,7 +1244,7 @@ static void hidp_session_run(struct hidp_session *session)
 static int hidp_session_thread(void *arg)
 {
 	struct hidp_session *session = arg;
-	wait_queue_t ctrl_wait, intr_wait;
+	wait_queue_entry_t ctrl_wait, intr_wait;
 
 	BT_DBG("session %p", session);
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 430b53e7d941..f0f3447e8aa4 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -379,7 +379,7 @@ void br_dev_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops = &br_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->ethtool_ops = &br_ethtool_ops;
 	SET_NETDEV_DEVTYPE(dev, &br_type);
 	dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index c5ce7745b230..32bd3ead9ba1 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -595,7 +595,7 @@ static int br_afspec(struct net_bridge *br,
 		err = 0;
 		switch (nla_type(attr)) {
 		case IFLA_BRIDGE_VLAN_TUNNEL_INFO:
-			if (!(p->flags & BR_VLAN_TUNNEL))
+			if (!p || !(p->flags & BR_VLAN_TUNNEL))
 				return -EINVAL;
 			err = br_parse_vlan_tunnel_info(attr, &tinfo_curr);
 			if (err)
@@ -835,6 +835,13 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
 			return -EPROTONOSUPPORT;
 		}
 	}
+
+	if (data[IFLA_BR_VLAN_DEFAULT_PVID]) {
+		__u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]);
+
+		if (defpvid >= VLAN_VID_MASK)
+			return -EINVAL;
+	}
 #endif
 
 	return 0;
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 08341d2aa9c9..6f12a5271219 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -179,6 +179,8 @@ static void br_stp_start(struct net_bridge *br)
 		br_debug(br, "using kernel STP\n");
 
 		/* To start timers on any ports left in blocking */
+		if (br->dev->flags & IFF_UP)
+			mod_timer(&br->hello_timer, jiffies + br->hello_time);
 		br_port_state_selection(br);
 	}
 
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index c98b3e5c140a..60b6fe277a8b 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -40,7 +40,7 @@ static void br_hello_timer_expired(unsigned long arg)
 	if (br->dev->flags & IFF_UP) {
 		br_config_bpdu_generation(br);
 
-		if (br->stp_enabled != BR_USER_STP)
+		if (br->stp_enabled == BR_KERNEL_STP)
 			mod_timer(&br->hello_timer,
 				  round_jiffies(jiffies + br->hello_time));
 	}
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 5929309beaa1..db85230e49c3 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -68,6 +68,9 @@ static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
 	if (e->ethproto != htons(ETH_P_ARP) ||
 	    e->invflags & EBT_IPROTO)
 		return -EINVAL;
+	if (ebt_invalid_target(info->target))
+		return -EINVAL;
+
 	return 0;
 }
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 9ec0c9f908fa..9c6e619f452b 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1373,7 +1373,8 @@ static inline int ebt_obj_to_user(char __user *um, const char *_name,
 	strlcpy(name, _name, sizeof(name));
 	if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) ||
 	    put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) ||
-	    xt_data_to_user(um + entrysize, data, usersize, datasize))
+	    xt_data_to_user(um + entrysize, data, usersize, datasize,
+			    XT_ALIGN(datasize)))
 		return -EFAULT;
 
 	return 0;
@@ -1658,7 +1659,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
 		if (match->compat_to_user(cm->data, m->data))
 			return -EFAULT;
 	} else {
-		if (xt_data_to_user(cm->data, m->data, match->usersize, msize))
+		if (xt_data_to_user(cm->data, m->data, match->usersize, msize,
+				    COMPAT_XT_ALIGN(msize)))
 			return -EFAULT;
 	}
 
@@ -1687,7 +1689,8 @@ static int compat_target_to_user(struct ebt_entry_target *t,
 		if (target->compat_to_user(cm->data, t->data))
 			return -EFAULT;
 	} else {
-		if (xt_data_to_user(cm->data, t->data, target->usersize, tsize))
+		if (xt_data_to_user(cm->data, t->data, target->usersize, tsize,
+				    COMPAT_XT_ALIGN(tsize)))
 			return -EFAULT;
 	}
 
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index adcad344c843..21f18ea2fce4 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -754,6 +754,10 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 
 	lock_sock(sk);
 
+	err = -EINVAL;
+	if (addr_len < offsetofend(struct sockaddr, sa_family))
+		goto out;
+
 	err = -EAFNOSUPPORT;
 	if (uaddr->sa_family != AF_CAIF)
 		goto out;
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 59ce1fcc220c..71b6ab240dea 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -81,11 +81,7 @@ static struct cfpkt *cfpkt_create_pfx(u16 len, u16 pfx)
 {
 	struct sk_buff *skb;
 
-	if (likely(in_interrupt()))
-		skb = alloc_skb(len + pfx, GFP_ATOMIC);
-	else
-		skb = alloc_skb(len + pfx, GFP_KERNEL);
-
+	skb = alloc_skb(len + pfx, GFP_ATOMIC);
 	if (unlikely(skb == NULL))
 		return NULL;
 
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 1816fc9f1ee7..fe3c53efb949 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -392,14 +392,14 @@ static void chnl_net_destructor(struct net_device *dev)
 {
 	struct chnl_net *priv = netdev_priv(dev);
 	caif_free_client(&priv->chnl);
-	free_netdev(dev);
 }
 
 static void ipcaif_net_setup(struct net_device *dev)
 {
 	struct chnl_net *priv;
 	dev->netdev_ops = &netdev_ops;
-	dev->destructor = chnl_net_destructor;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = chnl_net_destructor;
 	dev->flags |= IFF_NOARP;
 	dev->flags |= IFF_POINTOPOINT;
 	dev->mtu = GPRS_PDP_MTU;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index b6406fe33c76..88edac0f3e36 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -872,8 +872,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
 
 static int can_pernet_init(struct net *net)
 {
-	net->can.can_rcvlists_lock =
-		__SPIN_LOCK_UNLOCKED(net->can.can_rcvlists_lock);
+	spin_lock_init(&net->can.can_rcvlists_lock);
 	net->can.can_rx_alldev_list =
 		kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL);
 
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 2034fb926670..8757fb87dab8 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -151,7 +151,7 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	struct timespec validity;
 	void *tp, *tpend;
 	void **ptp;
-	struct ceph_crypto_key new_session_key;
+	struct ceph_crypto_key new_session_key = { 0 };
 	struct ceph_buffer *new_ticket_blob;
 	unsigned long new_expires, new_renew_after;
 	u64 new_secret_id;
@@ -215,6 +215,9 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	dout(" ticket blob is %d bytes\n", dlen);
 	ceph_decode_need(ptp, tpend, 1 + sizeof(u64), bad);
 	blob_struct_v = ceph_decode_8(ptp);
+	if (blob_struct_v != 1)
+		goto bad;
+
 	new_secret_id = ceph_decode_64(ptp);
 	ret = ceph_decode_buffer(&new_ticket_blob, ptp, tpend);
 	if (ret)
@@ -234,13 +237,13 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	     type, ceph_entity_type_name(type), th->secret_id,
 	     (int)th->ticket_blob->vec.iov_len);
 	xi->have_keys |= th->service;
-
-out:
-	return ret;
+	return 0;
 
 bad:
 	ret = -EINVAL;
-	goto out;
+out:
+	ceph_crypto_key_destroy(&new_session_key);
+	return ret;
 }
 
 static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4eb773ccce11..47e94b560ba0 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -45,18 +45,16 @@ bool libceph_compatible(void *data)
 }
 EXPORT_SYMBOL(libceph_compatible);
 
-/*
- * find filename portion of a path (/foo/bar/baz -> baz)
- */
-const char *ceph_file_part(const char *s, int len)
+static int param_get_supported_features(char *buffer,
+					const struct kernel_param *kp)
 {
-	const char *e = s + len;
-
-	while (e != s && *(e-1) != '/')
-		e--;
-	return e;
+	return sprintf(buffer, "0x%llx", CEPH_FEATURES_SUPPORTED_DEFAULT);
 }
-EXPORT_SYMBOL(ceph_file_part);
+static const struct kernel_param_ops param_ops_supported_features = {
+	.get = param_get_supported_features,
+};
+module_param_cb(supported_features, &param_ops_supported_features, NULL,
+		S_IRUGO);
 
 const char *ceph_msg_type_name(int type)
 {
@@ -596,9 +594,7 @@ EXPORT_SYMBOL(ceph_client_gid);
 /*
  * create a fresh client instance
  */
-struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
-				       u64 supported_features,
-				       u64 required_features)
+struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
 {
 	struct ceph_client *client;
 	struct ceph_entity_addr *myaddr = NULL;
@@ -615,14 +611,12 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
 	init_waitqueue_head(&client->auth_wq);
 	client->auth_err = 0;
 
-	if (!ceph_test_opt(client, NOMSGAUTH))
-		required_features |= CEPH_FEATURE_MSG_AUTH;
-
 	client->extra_mon_dispatch = NULL;
-	client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT |
-		supported_features;
-	client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT |
-		required_features;
+	client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+	client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT;
+
+	if (!ceph_test_opt(client, NOMSGAUTH))
+		client->required_features |= CEPH_FEATURE_MSG_AUTH;
 
 	/* msgr */
 	if (ceph_test_opt(client, MYIP))
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index b9233b990399..08ada893f01e 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -179,6 +179,57 @@ int ceph_cls_break_lock(struct ceph_osd_client *osdc,
 }
 EXPORT_SYMBOL(ceph_cls_break_lock);
 
+int ceph_cls_set_cookie(struct ceph_osd_client *osdc,
+			struct ceph_object_id *oid,
+			struct ceph_object_locator *oloc,
+			char *lock_name, u8 type, char *old_cookie,
+			char *tag, char *new_cookie)
+{
+	int cookie_op_buf_size;
+	int name_len = strlen(lock_name);
+	int old_cookie_len = strlen(old_cookie);
+	int tag_len = strlen(tag);
+	int new_cookie_len = strlen(new_cookie);
+	void *p, *end;
+	struct page *cookie_op_page;
+	int ret;
+
+	cookie_op_buf_size = name_len + sizeof(__le32) +
+			     old_cookie_len + sizeof(__le32) +
+			     tag_len + sizeof(__le32) +
+			     new_cookie_len + sizeof(__le32) +
+			     sizeof(u8) + CEPH_ENCODING_START_BLK_LEN;
+	if (cookie_op_buf_size > PAGE_SIZE)
+		return -E2BIG;
+
+	cookie_op_page = alloc_page(GFP_NOIO);
+	if (!cookie_op_page)
+		return -ENOMEM;
+
+	p = page_address(cookie_op_page);
+	end = p + cookie_op_buf_size;
+
+	/* encode cls_lock_set_cookie_op struct */
+	ceph_start_encoding(&p, 1, 1,
+			    cookie_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
+	ceph_encode_string(&p, end, lock_name, name_len);
+	ceph_encode_8(&p, type);
+	ceph_encode_string(&p, end, old_cookie, old_cookie_len);
+	ceph_encode_string(&p, end, tag, tag_len);
+	ceph_encode_string(&p, end, new_cookie, new_cookie_len);
+
+	dout("%s lock_name %s type %d old_cookie %s tag %s new_cookie %s\n",
+	     __func__, lock_name, type, old_cookie, tag, new_cookie);
+	ret = ceph_osdc_call(osdc, oid, oloc, "lock", "set_cookie",
+			     CEPH_OSD_FLAG_WRITE, cookie_op_page,
+			     cookie_op_buf_size, NULL, NULL);
+
+	dout("%s: status %d\n", __func__, ret);
+	__free_page(cookie_op_page);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_cls_set_cookie);
+
 void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers)
 {
 	int i;
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index c62b2b029a6e..71ba13927b3d 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -62,7 +62,8 @@ static int osdmap_show(struct seq_file *s, void *p)
 		return 0;
 
 	down_read(&osdc->lock);
-	seq_printf(s, "epoch %d flags 0x%x\n", map->epoch, map->flags);
+	seq_printf(s, "epoch %u barrier %u flags 0x%x\n", map->epoch,
+			osdc->epoch_barrier, map->flags);
 
 	for (n = rb_first(&map->pg_pools); n; n = rb_next(n)) {
 		struct ceph_pg_pool_info *pi =
@@ -177,9 +178,7 @@ static void dump_request(struct seq_file *s, struct ceph_osd_request *req)
 	seq_printf(s, "%llu\t", req->r_tid);
 	dump_target(s, &req->r_t);
 
-	seq_printf(s, "\t%d\t%u'%llu", req->r_attempts,
-		   le32_to_cpu(req->r_replay_version.epoch),
-		   le64_to_cpu(req->r_replay_version.version));
+	seq_printf(s, "\t%d", req->r_attempts);
 
 	for (i = 0; i < req->r_num_ops; i++) {
 		struct ceph_osd_req_op *op = &req->r_ops[i];
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 5766a6c896c4..588a91930051 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1174,8 +1174,8 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
  * Returns true if the result moves the cursor on to the next piece
  * of the data item.
  */
-static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
-				size_t bytes)
+static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
+				  size_t bytes)
 {
 	bool new_piece;
 
@@ -1207,8 +1207,6 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
 		new_piece = true;
 	}
 	cursor->need_crc = new_piece;
-
-	return new_piece;
 }
 
 static size_t sizeof_footer(struct ceph_connection *con)
@@ -1577,7 +1575,6 @@ static int write_partial_message_data(struct ceph_connection *con)
 		size_t page_offset;
 		size_t length;
 		bool last_piece;
-		bool need_crc;
 		int ret;
 
 		page = ceph_msg_data_next(cursor, &page_offset, &length,
@@ -1592,7 +1589,7 @@ static int write_partial_message_data(struct ceph_connection *con)
 		}
 		if (do_datacrc && cursor->need_crc)
 			crc = ceph_crc32c_page(crc, page, page_offset, length);
-		need_crc = ceph_msg_data_advance(cursor, (size_t)ret);
+		ceph_msg_data_advance(cursor, (size_t)ret);
 	}
 
 	dout("%s %p msg %p done\n", __func__, con, msg);
@@ -2231,10 +2228,18 @@ static void process_ack(struct ceph_connection *con)
 	struct ceph_msg *m;
 	u64 ack = le64_to_cpu(con->in_temp_ack);
 	u64 seq;
+	bool reconnect = (con->in_tag == CEPH_MSGR_TAG_SEQ);
+	struct list_head *list = reconnect ? &con->out_queue : &con->out_sent;
 
-	while (!list_empty(&con->out_sent)) {
-		m = list_first_entry(&con->out_sent, struct ceph_msg,
-				     list_head);
+	/*
+	 * In the reconnect case, con_fault() has requeued messages
+	 * in out_sent. We should cleanup old messages according to
+	 * the reconnect seq.
+	 */
+	while (!list_empty(list)) {
+		m = list_first_entry(list, struct ceph_msg, list_head);
+		if (reconnect && m->needs_out_seq)
+			break;
 		seq = le64_to_cpu(m->hdr.seq);
 		if (seq > ack)
 			break;
@@ -2243,6 +2248,7 @@ static void process_ack(struct ceph_connection *con)
 		m->ack_stamp = jiffies;
 		ceph_msg_remove(m);
 	}
+
 	prepare_read_tag(con);
 }
 
@@ -2299,7 +2305,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
 
 		if (do_datacrc)
 			crc = ceph_crc32c_page(crc, page, page_offset, ret);
-		(void) ceph_msg_data_advance(cursor, (size_t)ret);
+		ceph_msg_data_advance(cursor, (size_t)ret);
 	}
 	if (do_datacrc)
 		con->in_data_crc = crc;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 29a0ef351c5e..250f11f78609 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -43,15 +43,13 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
 	int i, err = -EINVAL;
 	struct ceph_fsid fsid;
 	u32 epoch, num_mon;
-	u16 version;
 	u32 len;
 
 	ceph_decode_32_safe(&p, end, len, bad);
 	ceph_decode_need(&p, end, len, bad);
 
 	dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p));
-
-	ceph_decode_16_safe(&p, end, version, bad);
+	p += sizeof(u16);  /* skip version */
 
 	ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
 	ceph_decode_copy(&p, &fsid, sizeof(fsid));
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 242d7c0d92f8..924f07c36ddb 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -961,6 +961,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 				       truncate_size, truncate_seq);
 	}
 
+	req->r_abort_on_full = true;
 	req->r_flags = flags;
 	req->r_base_oloc.pool = layout->pool_id;
 	req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns);
@@ -1005,7 +1006,7 @@ static bool osd_registered(struct ceph_osd *osd)
  */
 static void osd_init(struct ceph_osd *osd)
 {
-	atomic_set(&osd->o_ref, 1);
+	refcount_set(&osd->o_ref, 1);
 	RB_CLEAR_NODE(&osd->o_node);
 	osd->o_requests = RB_ROOT;
 	osd->o_linger_requests = RB_ROOT;
@@ -1050,9 +1051,9 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
 
 static struct ceph_osd *get_osd(struct ceph_osd *osd)
 {
-	if (atomic_inc_not_zero(&osd->o_ref)) {
-		dout("get_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref)-1,
-		     atomic_read(&osd->o_ref));
+	if (refcount_inc_not_zero(&osd->o_ref)) {
+		dout("get_osd %p %d -> %d\n", osd, refcount_read(&osd->o_ref)-1,
+		     refcount_read(&osd->o_ref));
 		return osd;
 	} else {
 		dout("get_osd %p FAIL\n", osd);
@@ -1062,9 +1063,9 @@ static struct ceph_osd *get_osd(struct ceph_osd *osd)
 
 static void put_osd(struct ceph_osd *osd)
 {
-	dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
-	     atomic_read(&osd->o_ref) - 1);
-	if (atomic_dec_and_test(&osd->o_ref)) {
+	dout("put_osd %p %d -> %d\n", osd, refcount_read(&osd->o_ref),
+	     refcount_read(&osd->o_ref) - 1);
+	if (refcount_dec_and_test(&osd->o_ref)) {
 		osd_cleanup(osd);
 		kfree(osd);
 	}
@@ -1297,8 +1298,9 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
 		       __pool_full(pi);
 
 	WARN_ON(pi->id != t->base_oloc.pool);
-	return (t->flags & CEPH_OSD_FLAG_READ && pauserd) ||
-	       (t->flags & CEPH_OSD_FLAG_WRITE && pausewr);
+	return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) ||
+	       ((t->flags & CEPH_OSD_FLAG_WRITE) && pausewr) ||
+	       (osdc->osdmap->epoch < osdc->epoch_barrier);
 }
 
 enum calc_target_result {
@@ -1503,9 +1505,10 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
 	ceph_encode_32(&p, req->r_flags);
 	ceph_encode_timespec(p, &req->r_mtime);
 	p += sizeof(struct ceph_timespec);
-	/* aka reassert_version */
-	memcpy(p, &req->r_replay_version, sizeof(req->r_replay_version));
-	p += sizeof(req->r_replay_version);
+
+	/* reassert_version */
+	memset(p, 0, sizeof(struct ceph_eversion));
+	p += sizeof(struct ceph_eversion);
 
 	/* oloc */
 	ceph_start_encoding(&p, 5, 4,
@@ -1626,6 +1629,7 @@ static void maybe_request_map(struct ceph_osd_client *osdc)
 		ceph_monc_renew_subs(&osdc->client->monc);
 }
 
+static void complete_request(struct ceph_osd_request *req, int err);
 static void send_map_check(struct ceph_osd_request *req);
 
 static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
@@ -1635,6 +1639,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
 	enum calc_target_result ct_res;
 	bool need_send = false;
 	bool promoted = false;
+	bool need_abort = false;
 
 	WARN_ON(req->r_tid);
 	dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
@@ -1650,8 +1655,13 @@ again:
 		goto promote;
 	}
 
-	if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
-	    ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) {
+	if (osdc->osdmap->epoch < osdc->epoch_barrier) {
+		dout("req %p epoch %u barrier %u\n", req, osdc->osdmap->epoch,
+		     osdc->epoch_barrier);
+		req->r_t.paused = true;
+		maybe_request_map(osdc);
+	} else if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
+		   ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) {
 		dout("req %p pausewr\n", req);
 		req->r_t.paused = true;
 		maybe_request_map(osdc);
@@ -1669,6 +1679,8 @@ again:
 		pr_warn_ratelimited("FULL or reached pool quota\n");
 		req->r_t.paused = true;
 		maybe_request_map(osdc);
+		if (req->r_abort_on_full)
+			need_abort = true;
 	} else if (!osd_homeless(osd)) {
 		need_send = true;
 	} else {
@@ -1685,6 +1697,8 @@ again:
 	link_request(osd, req);
 	if (need_send)
 		send_request(req);
+	else if (need_abort)
+		complete_request(req, -ENOSPC);
 	mutex_unlock(&osd->lock);
 
 	if (ct_res == CALC_TARGET_POOL_DNE)
@@ -1799,6 +1813,97 @@ static void abort_request(struct ceph_osd_request *req, int err)
 	complete_request(req, err);
 }
 
+static void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
+{
+	if (likely(eb > osdc->epoch_barrier)) {
+		dout("updating epoch_barrier from %u to %u\n",
+				osdc->epoch_barrier, eb);
+		osdc->epoch_barrier = eb;
+		/* Request map if we're not to the barrier yet */
+		if (eb > osdc->osdmap->epoch)
+			maybe_request_map(osdc);
+	}
+}
+
+void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
+{
+	down_read(&osdc->lock);
+	if (unlikely(eb > osdc->epoch_barrier)) {
+		up_read(&osdc->lock);
+		down_write(&osdc->lock);
+		update_epoch_barrier(osdc, eb);
+		up_write(&osdc->lock);
+	} else {
+		up_read(&osdc->lock);
+	}
+}
+EXPORT_SYMBOL(ceph_osdc_update_epoch_barrier);
+
+/*
+ * Drop all pending requests that are stalled waiting on a full condition to
+ * clear, and complete them with ENOSPC as the return code. Set the
+ * osdc->epoch_barrier to the latest map epoch that we've seen if any were
+ * cancelled.
+ */
+static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc)
+{
+	struct rb_node *n;
+	bool victims = false;
+
+	dout("enter abort_on_full\n");
+
+	if (!ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && !have_pool_full(osdc))
+		goto out;
+
+	/* Scan list and see if there is anything to abort */
+	for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
+		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
+		struct rb_node *m;
+
+		m = rb_first(&osd->o_requests);
+		while (m) {
+			struct ceph_osd_request *req = rb_entry(m,
+					struct ceph_osd_request, r_node);
+			m = rb_next(m);
+
+			if (req->r_abort_on_full) {
+				victims = true;
+				break;
+			}
+		}
+		if (victims)
+			break;
+	}
+
+	if (!victims)
+		goto out;
+
+	/*
+	 * Update the barrier to current epoch if it's behind that point,
+	 * since we know we have some calls to be aborted in the tree.
+	 */
+	update_epoch_barrier(osdc, osdc->osdmap->epoch);
+
+	for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
+		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
+		struct rb_node *m;
+
+		m = rb_first(&osd->o_requests);
+		while (m) {
+			struct ceph_osd_request *req = rb_entry(m,
+					struct ceph_osd_request, r_node);
+			m = rb_next(m);
+
+			if (req->r_abort_on_full &&
+			    (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
+			     pool_full(osdc, req->r_t.target_oloc.pool)))
+				abort_request(req, -ENOSPC);
+		}
+	}
+out:
+	dout("return abort_on_full barrier=%u\n", osdc->epoch_barrier);
+}
+
 static void check_pool_dne(struct ceph_osd_request *req)
 {
 	struct ceph_osd_client *osdc = req->r_osdc;
@@ -3252,11 +3357,13 @@ done:
 	pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) ||
 		  ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
 		  have_pool_full(osdc);
-	if (was_pauserd || was_pausewr || pauserd || pausewr)
+	if (was_pauserd || was_pausewr || pauserd || pausewr ||
+	    osdc->osdmap->epoch < osdc->epoch_barrier)
 		maybe_request_map(osdc);
 
 	kick_requests(osdc, &need_resend, &need_resend_linger);
 
+	ceph_osdc_abort_on_full(osdc);
 	ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP,
 			  osdc->osdmap->epoch);
 	up_write(&osdc->lock);
@@ -4126,7 +4233,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
 		close_osd(osd);
 	}
 	up_write(&osdc->lock);
-	WARN_ON(atomic_read(&osdc->homeless_osd.o_ref) != 1);
+	WARN_ON(refcount_read(&osdc->homeless_osd.o_ref) != 1);
 	osd_cleanup(&osdc->homeless_osd);
 
 	WARN_ON(!list_empty(&osdc->osd_lru));
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index ffe9e904d4d1..55e3a477f92d 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -317,6 +317,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 		u32 yes;
 		struct crush_rule *r;
 
+		err = -EINVAL;
 		ceph_decode_32_safe(p, end, yes, bad);
 		if (!yes) {
 			dout("crush_decode NO rule %d off %x %p to %p\n",
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 6864007e64fc..ce09f73be759 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -16,7 +16,7 @@ static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
 
 void ceph_pagelist_release(struct ceph_pagelist *pl)
 {
-	if (!atomic_dec_and_test(&pl->refcnt))
+	if (!refcount_dec_and_test(&pl->refcnt))
 		return;
 	ceph_pagelist_unmap_tail(pl);
 	while (!list_empty(&pl->head)) {
diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c
index 705414e78ae0..e14a5d038656 100644
--- a/net/ceph/snapshot.c
+++ b/net/ceph/snapshot.c
@@ -49,7 +49,7 @@ struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
 	if (!snapc)
 		return NULL;
 
-	atomic_set(&snapc->nref, 1);
+	refcount_set(&snapc->nref, 1);
 	snapc->num_snaps = snap_count;
 
 	return snapc;
@@ -59,7 +59,7 @@ EXPORT_SYMBOL(ceph_create_snap_context);
 struct ceph_snap_context *ceph_get_snap_context(struct ceph_snap_context *sc)
 {
 	if (sc)
-		atomic_inc(&sc->nref);
+		refcount_inc(&sc->nref);
 	return sc;
 }
 EXPORT_SYMBOL(ceph_get_snap_context);
@@ -68,7 +68,7 @@ void ceph_put_snap_context(struct ceph_snap_context *sc)
 {
 	if (!sc)
 		return;
-	if (atomic_dec_and_test(&sc->nref)) {
+	if (refcount_dec_and_test(&sc->nref)) {
 		/*printk(" deleting snap_context %p\n", sc);*/
 		kfree(sc);
 	}
diff --git a/net/core/datagram.c b/net/core/datagram.c
index db1866f2ffcf..34678828e2bb 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -68,7 +68,7 @@ static inline int connection_based(struct sock *sk)
 	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
 }
 
-static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
+static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
 				  void *key)
 {
 	unsigned long bits = (unsigned long)key;
diff --git a/net/core/dev.c b/net/core/dev.c
index 96cf83da0d66..416137c64bf8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1253,8 +1253,9 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 	if (!new_ifalias)
 		return -ENOMEM;
 	dev->ifalias = new_ifalias;
+	memcpy(dev->ifalias, alias, len);
+	dev->ifalias[len] = 0;
 
-	strlcpy(dev->ifalias, alias, len+1);
 	return len;
 }
 
@@ -4766,6 +4767,13 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
 }
 EXPORT_SYMBOL(gro_find_complete_by_type);
 
+static void napi_skb_free_stolen_head(struct sk_buff *skb)
+{
+	skb_dst_drop(skb);
+	secpath_reset(skb);
+	kmem_cache_free(skbuff_head_cache, skb);
+}
+
 static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 {
 	switch (ret) {
@@ -4779,13 +4787,10 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 		break;
 
 	case GRO_MERGED_FREE:
-		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
-			skb_dst_drop(skb);
-			secpath_reset(skb);
-			kmem_cache_free(skbuff_head_cache, skb);
-		} else {
+		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+			napi_skb_free_stolen_head(skb);
+		else
 			__kfree_skb(skb);
-		}
 		break;
 
 	case GRO_HELD:
@@ -4857,10 +4862,16 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
 		break;
 
 	case GRO_DROP:
-	case GRO_MERGED_FREE:
 		napi_reuse_skb(napi, skb);
 		break;
 
+	case GRO_MERGED_FREE:
+		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+			napi_skb_free_stolen_head(skb);
+		else
+			napi_reuse_skb(napi, skb);
+		break;
+
 	case GRO_MERGED:
 	case GRO_CONSUMED:
 		break;
@@ -4948,6 +4959,19 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(__skb_gro_checksum_complete);
 
+static void net_rps_send_ipi(struct softnet_data *remsd)
+{
+#ifdef CONFIG_RPS
+	while (remsd) {
+		struct softnet_data *next = remsd->rps_ipi_next;
+
+		if (cpu_online(remsd->cpu))
+			smp_call_function_single_async(remsd->cpu, &remsd->csd);
+		remsd = next;
+	}
+#endif
+}
+
 /*
  * net_rps_action_and_irq_enable sends any pending IPI's for rps.
  * Note: called with local irq disabled, but exits with local irq enabled.
@@ -4963,14 +4987,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
 		local_irq_enable();
 
 		/* Send pending IPI's to kick RPS processing on remote cpus. */
-		while (remsd) {
-			struct softnet_data *next = remsd->rps_ipi_next;
-
-			if (cpu_online(remsd->cpu))
-				smp_call_function_single_async(remsd->cpu,
-							   &remsd->csd);
-			remsd = next;
-		}
+		net_rps_send_ipi(remsd);
 	} else
 #endif
 		local_irq_enable();
@@ -5199,8 +5216,6 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
 	if (rc == BUSY_POLL_BUDGET)
 		__napi_schedule(napi);
 	local_bh_enable();
-	if (local_softirq_pending())
-		do_softirq();
 }
 
 void napi_busy_loop(unsigned int napi_id,
@@ -6852,6 +6867,32 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
+bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op)
+{
+	struct netdev_xdp xdp;
+
+	memset(&xdp, 0, sizeof(xdp));
+	xdp.command = XDP_QUERY_PROG;
+
+	/* Query must always succeed. */
+	WARN_ON(xdp_op(dev, &xdp) < 0);
+	return xdp.prog_attached;
+}
+
+static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
+			   struct netlink_ext_ack *extack,
+			   struct bpf_prog *prog)
+{
+	struct netdev_xdp xdp;
+
+	memset(&xdp, 0, sizeof(xdp));
+	xdp.command = XDP_SETUP_PROG;
+	xdp.extack = extack;
+	xdp.prog = prog;
+
+	return xdp_op(dev, &xdp);
+}
+
 /**
  *	dev_change_xdp_fd - set or clear a bpf program for a device rx path
  *	@dev: device
@@ -6864,41 +6905,34 @@ EXPORT_SYMBOL(dev_change_proto_down);
 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		      int fd, u32 flags)
 {
-	int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp);
 	const struct net_device_ops *ops = dev->netdev_ops;
 	struct bpf_prog *prog = NULL;
-	struct netdev_xdp xdp;
+	xdp_op_t xdp_op, xdp_chk;
 	int err;
 
 	ASSERT_RTNL();
 
-	xdp_op = ops->ndo_xdp;
+	xdp_op = xdp_chk = ops->ndo_xdp;
+	if (!xdp_op && (flags & XDP_FLAGS_DRV_MODE))
+		return -EOPNOTSUPP;
 	if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
 		xdp_op = generic_xdp_install;
+	if (xdp_op == xdp_chk)
+		xdp_chk = generic_xdp_install;
 
 	if (fd >= 0) {
-		if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
-			memset(&xdp, 0, sizeof(xdp));
-			xdp.command = XDP_QUERY_PROG;
-
-			err = xdp_op(dev, &xdp);
-			if (err < 0)
-				return err;
-			if (xdp.prog_attached)
-				return -EBUSY;
-		}
+		if (xdp_chk && __dev_xdp_attached(dev, xdp_chk))
+			return -EEXIST;
+		if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
+		    __dev_xdp_attached(dev, xdp_op))
+			return -EBUSY;
 
 		prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
 		if (IS_ERR(prog))
 			return PTR_ERR(prog);
 	}
 
-	memset(&xdp, 0, sizeof(xdp));
-	xdp.command = XDP_SETUP_PROG;
-	xdp.extack = extack;
-	xdp.prog = prog;
-
-	err = xdp_op(dev, &xdp);
+	err = dev_xdp_install(dev, xdp_op, extack, prog);
 	if (err < 0 && prog)
 		bpf_prog_put(prog);
 
@@ -7482,6 +7516,8 @@ out:
 err_uninit:
 	if (dev->netdev_ops->ndo_uninit)
 		dev->netdev_ops->ndo_uninit(dev);
+	if (dev->priv_destructor)
+		dev->priv_destructor(dev);
 	goto out;
 }
 EXPORT_SYMBOL(register_netdevice);
@@ -7689,8 +7725,10 @@ void netdev_run_todo(void)
 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
 		WARN_ON(dev->dn_ptr);
 
-		if (dev->destructor)
-			dev->destructor(dev);
+		if (dev->priv_destructor)
+			dev->priv_destructor(dev);
+		if (dev->needs_free_netdev)
+			free_netdev(dev);
 
 		/* Report a network device has been unregistered */
 		rtnl_lock();
@@ -7755,9 +7793,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 	} else {
 		netdev_stats_to_stats64(storage, &dev->stats);
 	}
-	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
-	storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
-	storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
+	storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
+	storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
+	storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
 	return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
@@ -8173,7 +8211,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
 	struct sk_buff **list_skb;
 	struct sk_buff *skb;
 	unsigned int cpu;
-	struct softnet_data *sd, *oldsd;
+	struct softnet_data *sd, *oldsd, *remsd = NULL;
 
 	local_irq_disable();
 	cpu = smp_processor_id();
@@ -8214,6 +8252,13 @@ static int dev_cpu_dead(unsigned int oldcpu)
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_enable();
 
+#ifdef CONFIG_RPS
+	remsd = oldsd->rps_ipi_list;
+	oldsd->rps_ipi_list = NULL;
+#endif
+	/* send out pending IPI's on offline CPU */
+	net_rps_send_ipi(remsd);
+
 	/* Process offline CPU's input_pkt_queue */
 	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
 		netif_rx_ni(skb);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d293506..27fad31784a8 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -410,6 +410,22 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 	if (cmd == SIOCGIFNAME)
 		return dev_ifname(net, (struct ifreq __user *)arg);
 
+	/*
+	 * Take care of Wireless Extensions. Unfortunately struct iwreq
+	 * isn't a proper subset of struct ifreq (it's 8 byte shorter)
+	 * so we need to treat it specially, otherwise applications may
+	 * fault if the struct they're passing happens to land at the
+	 * end of a mapped page.
+	 */
+	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+		struct iwreq iwr;
+
+		if (copy_from_user(&iwr, arg, sizeof(iwr)))
+			return -EFAULT;
+
+		return wext_handle_ioctl(net, &iwr, cmd, arg);
+	}
+
 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 		return -EFAULT;
 
@@ -559,9 +575,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 				ret = -EFAULT;
 			return ret;
 		}
-		/* Take care of Wireless Extensions */
-		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
-			return wext_handle_ioctl(net, &ifr, cmd, arg);
 		return -ENOTTY;
 	}
 }
diff --git a/net/core/devlink.c b/net/core/devlink.c
index b0b87a292e7c..a0adfc31a3fe 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1680,8 +1680,10 @@ start_again:
 
 	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
 			  &devlink_nl_family, NLM_F_MULTI, cmd);
-	if (!hdr)
+	if (!hdr) {
+		nlmsg_free(skb);
 		return -EMSGSIZE;
+	}
 
 	if (devlink_nl_put_handle(skb, devlink))
 		goto nla_put_failure;
@@ -2098,8 +2100,10 @@ start_again:
 
 	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
 			  &devlink_nl_family, NLM_F_MULTI, cmd);
-	if (!hdr)
+	if (!hdr) {
+		nlmsg_free(skb);
 		return -EMSGSIZE;
+	}
 
 	if (devlink_nl_put_handle(skb, devlink))
 		goto nla_put_failure;
diff --git a/net/core/dst.c b/net/core/dst.c
index 960e503b5a52..13ba4a090c41 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dst_discard_out);
 
-const u32 dst_default_metrics[RTAX_MAX + 1] = {
+const struct dst_metrics dst_default_metrics = {
 	/* This initializer is needed to force linker to place this variable
 	 * into const section. Otherwise it might end into bss section.
 	 * We really want to avoid false sharing on this variable, and catch
 	 * any writes on it.
 	 */
-	[RTAX_MAX] = 0xdeadbeef,
+	.refcnt = ATOMIC_INIT(1),
 };
 
 void dst_init(struct dst_entry *dst, struct dst_ops *ops,
@@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
 	if (dev)
 		dev_hold(dev);
 	dst->ops = ops;
-	dst_init_metrics(dst, dst_default_metrics, true);
+	dst_init_metrics(dst, dst_default_metrics.metrics, true);
 	dst->expires = 0UL;
 	dst->path = dst;
 	dst->from = NULL;
@@ -314,25 +314,30 @@ EXPORT_SYMBOL(dst_release);
 
 u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
 {
-	u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+	struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC);
 
 	if (p) {
-		u32 *old_p = __DST_METRICS_PTR(old);
+		struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
 		unsigned long prev, new;
 
-		memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+		atomic_set(&p->refcnt, 1);
+		memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
 
 		new = (unsigned long) p;
 		prev = cmpxchg(&dst->_metrics, old, new);
 
 		if (prev != old) {
 			kfree(p);
-			p = __DST_METRICS_PTR(prev);
+			p = (struct dst_metrics *)__DST_METRICS_PTR(prev);
 			if (prev & DST_METRICS_READ_ONLY)
 				p = NULL;
+		} else if (prev & DST_METRICS_REFCOUNTED) {
+			if (atomic_dec_and_test(&old_p->refcnt))
+				kfree(old_p);
 		}
 	}
-	return p;
+	BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0);
+	return (u32 *)p;
 }
 EXPORT_SYMBOL(dst_cow_metrics_generic);
 
@@ -341,7 +346,7 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
 {
 	unsigned long prev, new;
 
-	new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY;
+	new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY;
 	prev = cmpxchg(&dst->_metrics, old, new);
 	if (prev == old)
 		kfree(__DST_METRICS_PTR(old));
@@ -464,6 +469,20 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
 		spin_lock_bh(&dst_garbage.lock);
 		dst = dst_garbage.list;
 		dst_garbage.list = NULL;
+		/* The code in dst_ifdown places a hold on the loopback device.
+		 * If the gc entry processing is set to expire after a lengthy
+		 * interval, this hold can cause netdev_wait_allrefs() to hang
+		 * out and wait for a long time -- until the the loopback
+		 * interface is released.  If we're really unlucky, it'll emit
+		 * pr_emerg messages to console too.  Reset the interval here,
+		 * so dst cleanups occur in a more timely fashion.
+		 */
+		if (dst_garbage.timer_inc > DST_GC_INC) {
+			dst_garbage.timer_inc = DST_GC_INC;
+			dst_garbage.timer_expires = DST_GC_MIN;
+			mod_delayed_work(system_wq, &dst_gc_work,
+					 dst_garbage.timer_expires);
+		}
 		spin_unlock_bh(&dst_garbage.lock);
 
 		if (last)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index f21c4d3aeae0..3bba291c6c32 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -568,7 +568,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net *net = sock_net(skb->sk);
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
 	struct fib_rules_ops *ops = NULL;
-	struct fib_rule *rule, *tmp;
+	struct fib_rule *rule, *r;
 	struct nlattr *tb[FRA_MAX+1];
 	struct fib_kuid_range range;
 	int err = -EINVAL;
@@ -668,16 +668,23 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 		/*
 		 * Check if this rule is a target to any of them. If so,
+		 * adjust to the next one with the same preference or
 		 * disable them. As this operation is eventually very
-		 * expensive, it is only performed if goto rules have
-		 * actually been added.
+		 * expensive, it is only performed if goto rules, except
+		 * current if it is goto rule, have actually been added.
 		 */
 		if (ops->nr_goto_rules > 0) {
-			list_for_each_entry(tmp, &ops->rules_list, list) {
-				if (rtnl_dereference(tmp->ctarget) == rule) {
-					RCU_INIT_POINTER(tmp->ctarget, NULL);
+			struct fib_rule *n;
+
+			n = list_next_entry(rule, list);
+			if (&n->list == &ops->rules_list || n->pref != rule->pref)
+				n = NULL;
+			list_for_each_entry(r, &ops->rules_list, list) {
+				if (rtnl_dereference(r->ctarget) != rule)
+					continue;
+				rcu_assign_pointer(r->ctarget, n);
+				if (!n)
 					ops->unresolved_rules++;
-				}
 			}
 		}
 
diff --git a/net/core/filter.c b/net/core/filter.c
index a253a6197e6b..a6bb95fa87b2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2281,6 +2281,7 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_skb_change_head ||
 	    func == bpf_skb_change_tail ||
 	    func == bpf_skb_pull_data ||
+	    func == bpf_clone_redirect ||
 	    func == bpf_l3_csum_replace ||
 	    func == bpf_l4_csum_replace ||
 	    func == bpf_xdp_adjust_head)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 58b0bcc125b5..d274f81fcc2c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1132,10 +1132,6 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 		lladdr = neigh->ha;
 	}
 
-	if (new & NUD_CONNECTED)
-		neigh->confirmed = jiffies;
-	neigh->updated = jiffies;
-
 	/* If entry was valid and address is not changed,
 	   do not change entry state, if new one is STALE.
 	 */
@@ -1157,6 +1153,16 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 		}
 	}
 
+	/* Update timestamps only once we know we will make a change to the
+	 * neighbour entry. Otherwise we risk to move the locktime window with
+	 * noop updates and ignore relevant ARP updates.
+	 */
+	if (new != old || lladdr != neigh->ha) {
+		if (new & NUD_CONNECTED)
+			neigh->confirmed = jiffies;
+		neigh->updated = jiffies;
+	}
+
 	if (new != old) {
 		neigh_del_timer(neigh);
 		if (new & NUD_PROBE)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1934efd4a9d4..26bbfababff2 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -315,6 +315,25 @@ out_undo:
 	goto out;
 }
 
+static int __net_init net_defaults_init_net(struct net *net)
+{
+	net->core.sysctl_somaxconn = SOMAXCONN;
+	return 0;
+}
+
+static struct pernet_operations net_defaults_ops = {
+	.init = net_defaults_init_net,
+};
+
+static __init int net_defaults_init(void)
+{
+	if (register_pernet_subsys(&net_defaults_ops))
+		panic("Cannot initialize net default settings");
+
+	return 0;
+}
+
+core_initcall(net_defaults_init);
 
 #ifdef CONFIG_NET_NS
 static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bcb0f610ee42..467a2f4510a7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -899,8 +899,7 @@ static size_t rtnl_port_size(const struct net_device *dev,
 static size_t rtnl_xdp_size(void)
 {
 	size_t xdp_size = nla_total_size(0) +	/* nest IFLA_XDP */
-			  nla_total_size(1) +	/* XDP_ATTACHED */
-			  nla_total_size(4);	/* XDP_FLAGS */
+			  nla_total_size(1);	/* XDP_ATTACHED */
 
 	return xdp_size;
 }
@@ -932,6 +931,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
 	       + nla_total_size(4) /* IFLA_LINK_NETNSID */
+	       + nla_total_size(4) /* IFLA_GROUP */
 	       + nla_total_size(ext_filter_mask
 			        & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
 	       + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
@@ -1125,6 +1125,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
 	struct ifla_vf_mac vf_mac;
 	struct ifla_vf_info ivi;
 
+	memset(&ivi, 0, sizeof(ivi));
+
 	/* Not all SR-IOV capable drivers support the
 	 * spoofcheck and "RSS query enable" query.  Preset to
 	 * -1 so the user space tool can detect that the driver
@@ -1133,7 +1135,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
 	ivi.spoofchk = -1;
 	ivi.rss_query_en = -1;
 	ivi.trusted = -1;
-	memset(ivi.mac, 0, sizeof(ivi.mac));
 	/* The default value for VF link state is "auto"
 	 * IFLA_VF_LINK_STATE_AUTO which equals zero
 	 */
@@ -1247,37 +1248,34 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
+static u8 rtnl_xdp_attached_mode(struct net_device *dev)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	ASSERT_RTNL();
+
+	if (rcu_access_pointer(dev->xdp_prog))
+		return XDP_ATTACHED_SKB;
+	if (ops->ndo_xdp && __dev_xdp_attached(dev, ops->ndo_xdp))
+		return XDP_ATTACHED_DRV;
+
+	return XDP_ATTACHED_NONE;
+}
+
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 {
 	struct nlattr *xdp;
-	u32 xdp_flags = 0;
-	u8 val = 0;
 	int err;
 
 	xdp = nla_nest_start(skb, IFLA_XDP);
 	if (!xdp)
 		return -EMSGSIZE;
-	if (rcu_access_pointer(dev->xdp_prog)) {
-		xdp_flags = XDP_FLAGS_SKB_MODE;
-		val = 1;
-	} else if (dev->netdev_ops->ndo_xdp) {
-		struct netdev_xdp xdp_op = {};
-
-		xdp_op.command = XDP_QUERY_PROG;
-		err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
-		if (err)
-			goto err_cancel;
-		val = xdp_op.prog_attached;
-	}
-	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val);
+
+	err = nla_put_u8(skb, IFLA_XDP_ATTACHED,
+			 rtnl_xdp_attached_mode(dev));
 	if (err)
 		goto err_cancel;
 
-	if (xdp_flags) {
-		err = nla_put_u32(skb, IFLA_XDP_FLAGS, xdp_flags);
-		if (err)
-			goto err_cancel;
-	}
 	nla_nest_end(skb, xdp);
 	return 0;
 
@@ -1471,6 +1469,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINK_NETNSID]	= { .type = NLA_S32 },
 	[IFLA_PROTO_DOWN]	= { .type = NLA_U8 },
 	[IFLA_XDP]		= { .type = NLA_NESTED },
+	[IFLA_GROUP]		= { .type = NLA_U32 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1631,13 +1630,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 					       cb->nlh->nlmsg_seq, 0,
 					       flags,
 					       ext_filter_mask);
-			/* If we ran out of room on the first message,
-			 * we're in trouble
-			 */
-			WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
 
-			if (err < 0)
-				goto out;
+			if (err < 0) {
+				if (likely(skb->len))
+					goto out;
+
+				goto out_err;
+			}
 
 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
@@ -1645,10 +1644,12 @@ cont:
 		}
 	}
 out:
+	err = skb->len;
+out_err:
 	cb->args[1] = idx;
 	cb->args[0] = h;
 
-	return skb->len;
+	return err;
 }
 
 int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
@@ -2199,6 +2200,11 @@ static int do_setlink(const struct sk_buff *skb,
 				err = -EINVAL;
 				goto errout;
 			}
+			if ((xdp_flags & XDP_FLAGS_SKB_MODE) &&
+			    (xdp_flags & XDP_FLAGS_DRV_MODE)) {
+				err = -EINVAL;
+				goto errout;
+			}
 		}
 
 		if (xdp[IFLA_XDP_FD]) {
@@ -3228,8 +3234,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	int err = 0;
 	int fidx = 0;
 
-	if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
-			IFLA_MAX, ifla_policy, NULL) == 0) {
+	err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
+			  IFLA_MAX, ifla_policy, NULL);
+	if (err < 0) {
+		return -EINVAL;
+	} else if (err == 0) {
 		if (tb[IFLA_MASTER])
 			br_idx = nla_get_u32(tb[IFLA_MASTER]);
 	}
@@ -3452,8 +3461,12 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
 				err = br_dev->netdev_ops->ndo_bridge_getlink(
 						skb, portid, seq, dev,
 						filter_mask, NLM_F_MULTI);
-				if (err < 0 && err != -EOPNOTSUPP)
-					break;
+				if (err < 0 && err != -EOPNOTSUPP) {
+					if (likely(skb->len))
+						break;
+
+					goto out_err;
+				}
 			}
 			idx++;
 		}
@@ -3464,16 +3477,22 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
 							      seq, dev,
 							      filter_mask,
 							      NLM_F_MULTI);
-				if (err < 0 && err != -EOPNOTSUPP)
-					break;
+				if (err < 0 && err != -EOPNOTSUPP) {
+					if (likely(skb->len))
+						break;
+
+					goto out_err;
+				}
 			}
 			idx++;
 		}
 	}
+	err = skb->len;
+out_err:
 	rcu_read_unlock();
 	cb->args[0] = idx;
 
-	return skb->len;
+	return err;
 }
 
 static inline size_t bridge_nlmsg_size(void)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 346d3e85dfbc..b1be7c01efe2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3754,8 +3754,11 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
 
 	spin_lock_irqsave(&q->lock, flags);
 	skb = __skb_dequeue(q);
-	if (skb && (skb_next = skb_peek(q)))
+	if (skb && (skb_next = skb_peek(q))) {
 		icmp_next = is_icmp_err_skb(skb_next);
+		if (icmp_next)
+			sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin;
+	}
 	spin_unlock_irqrestore(&q->lock, flags);
 
 	if (is_icmp_err_skb(skb) && !icmp_next)
diff --git a/net/core/sock.c b/net/core/sock.c
index 79c6aee6af9b..727f924b7f91 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -139,10 +139,7 @@
 
 #include <trace/events/sock.h>
 
-#ifdef CONFIG_INET
 #include <net/tcp.h>
-#endif
-
 #include <net/busy_poll.h>
 
 static DEFINE_MUTEX(proto_list_mutex);
@@ -1803,28 +1800,24 @@ EXPORT_SYMBOL(skb_set_owner_w);
  * delay queue. We want to allow the owner socket to send more
  * packets, as if they were already TX completed by a typical driver.
  * But we also want to keep skb->sk set because some packet schedulers
- * rely on it (sch_fq for example). So we set skb->truesize to a small
- * amount (1) and decrease sk_wmem_alloc accordingly.
+ * rely on it (sch_fq for example).
  */
 void skb_orphan_partial(struct sk_buff *skb)
 {
-	/* If this skb is a TCP pure ACK or already went here,
-	 * we have nothing to do. 2 is already a very small truesize.
-	 */
-	if (skb->truesize <= 2)
+	if (skb_is_tcp_pure_ack(skb))
 		return;
 
-	/* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
-	 * so we do not completely orphan skb, but transfert all
-	 * accounted bytes but one, to avoid unexpected reorders.
-	 */
 	if (skb->destructor == sock_wfree
 #ifdef CONFIG_INET
 	    || skb->destructor == tcp_wfree
 #endif
 		) {
-		atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
-		skb->truesize = 1;
+		struct sock *sk = skb->sk;
+
+		if (atomic_inc_not_zero(&sk->sk_refcnt)) {
+			atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+			skb->destructor = sock_efree;
+		}
 	} else {
 		skb_orphan(skb);
 	}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index ea23254b2457..b7cd9aafe99e 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -479,8 +479,6 @@ static __net_init int sysctl_core_net_init(struct net *net)
 {
 	struct ctl_table *tbl;
 
-	net->core.sysctl_somaxconn = SOMAXCONN;
-
 	tbl = netns_core_table;
 	if (!net_eq(net, &init_net)) {
 		tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b99168b0fabf..f75482bdee9a 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -951,7 +951,7 @@ static struct proto dccp_v4_prot = {
 	.orphan_count		= &dccp_orphan_count,
 	.max_header		= MAX_DCCP_HEADER,
 	.obj_size		= sizeof(struct dccp_sock),
-	.slab_flags		= SLAB_DESTROY_BY_RCU,
+	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
 	.rsk_prot		= &dccp_request_sock_ops,
 	.twsk_prot		= &dccp_timewait_sock_ops,
 	.h.hashinfo		= &dccp_hashinfo,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d9b6a4e403e7..992621172220 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -426,6 +426,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
 		newsk->sk_backlog_rcv = dccp_v4_do_rcv;
 		newnp->pktoptions  = NULL;
 		newnp->opt	   = NULL;
+		newnp->ipv6_mc_list = NULL;
+		newnp->ipv6_ac_list = NULL;
+		newnp->ipv6_fl_list = NULL;
 		newnp->mcast_oif   = inet6_iif(skb);
 		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
 
@@ -490,6 +493,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
 	/* Clone RX bits */
 	newnp->rxopt.all = np->rxopt.all;
 
+	newnp->ipv6_mc_list = NULL;
+	newnp->ipv6_ac_list = NULL;
+	newnp->ipv6_fl_list = NULL;
 	newnp->pktoptions = NULL;
 	newnp->opt	  = NULL;
 	newnp->mcast_oif  = inet6_iif(skb);
@@ -1014,7 +1020,7 @@ static struct proto dccp_v6_prot = {
 	.orphan_count	   = &dccp_orphan_count,
 	.max_header	   = MAX_DCCP_HEADER,
 	.obj_size	   = sizeof(struct dccp6_sock),
-	.slab_flags	   = SLAB_DESTROY_BY_RCU,
+	.slab_flags	   = SLAB_TYPESAFE_BY_RCU,
 	.rsk_prot	   = &dccp6_request_sock_ops,
 	.twsk_prot	   = &dccp6_timewait_sock_ops,
 	.h.hashinfo	   = &dccp_hashinfo,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 4b9518a0d248..6f95612b4d32 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -188,12 +188,6 @@ static inline void dnrt_free(struct dn_route *rt)
 	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
-static inline void dnrt_drop(struct dn_route *rt)
-{
-	dst_release(&rt->dst);
-	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
-}
-
 static void dn_dst_check_expire(unsigned long dummy)
 {
 	int i;
@@ -248,7 +242,7 @@ static int dn_dst_gc(struct dst_ops *ops)
 			}
 			*rtp = rt->dst.dn_next;
 			rt->dst.dn_next = NULL;
-			dnrt_drop(rt);
+			dnrt_free(rt);
 			break;
 		}
 		spin_unlock_bh(&dn_rt_hash_table[i].lock);
@@ -350,7 +344,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
 			dst_use(&rth->dst, now);
 			spin_unlock_bh(&dn_rt_hash_table[hash].lock);
 
-			dnrt_drop(rt);
+			dst_free(&rt->dst);
 			*rp = rth;
 			return 0;
 		}
@@ -380,7 +374,7 @@ static void dn_run_flush(unsigned long dummy)
 		for(; rt; rt = next) {
 			next = rcu_dereference_raw(rt->dst.dn_next);
 			RCU_INIT_POINTER(rt->dst.dn_next, NULL);
-			dst_free((struct dst_entry *)rt);
+			dnrt_free(rt);
 		}
 
 nothing_to_declare:
@@ -1187,7 +1181,7 @@ make_route:
 	if (dev_out->flags & IFF_LOOPBACK)
 		flags |= RTCF_LOCAL;
 
-	rt = dst_alloc(&dn_dst_ops, dev_out, 1, DST_OBSOLETE_NONE, DST_HOST);
+	rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, DST_HOST);
 	if (rt == NULL)
 		goto e_nobufs;
 
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 1ed81ac6dd1a..aa8ffecc46a4 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -102,7 +102,9 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(skb);
 
-	if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+	if (skb->len < sizeof(*nlh) ||
+	    nlh->nlmsg_len < sizeof(*nlh) ||
+	    skb->len < nlh->nlmsg_len)
 		return;
 
 	if (!netlink_capable(skb, CAP_NET_ADMIN))
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 26130ae438da..90038d45a547 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -223,6 +223,53 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+int dsa_switch_suspend(struct dsa_switch *ds)
+{
+	int i, ret = 0;
+
+	/* Suspend slave network devices */
+	for (i = 0; i < ds->num_ports; i++) {
+		if (!dsa_is_port_initialized(ds, i))
+			continue;
+
+		ret = dsa_slave_suspend(ds->ports[i].netdev);
+		if (ret)
+			return ret;
+	}
+
+	if (ds->ops->suspend)
+		ret = ds->ops->suspend(ds);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dsa_switch_suspend);
+
+int dsa_switch_resume(struct dsa_switch *ds)
+{
+	int i, ret = 0;
+
+	if (ds->ops->resume)
+		ret = ds->ops->resume(ds);
+
+	if (ret)
+		return ret;
+
+	/* Resume slave network devices */
+	for (i = 0; i < ds->num_ports; i++) {
+		if (!dsa_is_port_initialized(ds, i))
+			continue;
+
+		ret = dsa_slave_resume(ds->ports[i].netdev);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dsa_switch_resume);
+#endif
+
 static struct packet_type dsa_pack_type __read_mostly = {
 	.type	= cpu_to_be16(ETH_P_XDSA),
 	.func	= dsa_switch_rcv,
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 033b3bfb63dc..7796580e99ee 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -484,8 +484,10 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst)
 		dsa_ds_unapply(dst, ds);
 	}
 
-	if (dst->cpu_switch)
+	if (dst->cpu_switch) {
 		dsa_cpu_port_ethtool_restore(dst->cpu_switch);
+		dst->cpu_switch = NULL;
+	}
 
 	pr_info("DSA: tree %d unapplied\n", dst->tree);
 	dst->applied = false;
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index ad345c8b0b06..7281098df04e 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -289,53 +289,6 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
 	dsa_switch_unregister_notifier(ds);
 }
 
-#ifdef CONFIG_PM_SLEEP
-int dsa_switch_suspend(struct dsa_switch *ds)
-{
-	int i, ret = 0;
-
-	/* Suspend slave network devices */
-	for (i = 0; i < ds->num_ports; i++) {
-		if (!dsa_is_port_initialized(ds, i))
-			continue;
-
-		ret = dsa_slave_suspend(ds->ports[i].netdev);
-		if (ret)
-			return ret;
-	}
-
-	if (ds->ops->suspend)
-		ret = ds->ops->suspend(ds);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(dsa_switch_suspend);
-
-int dsa_switch_resume(struct dsa_switch *ds)
-{
-	int i, ret = 0;
-
-	if (ds->ops->resume)
-		ret = ds->ops->resume(ds);
-
-	if (ret)
-		return ret;
-
-	/* Resume slave network devices */
-	for (i = 0; i < ds->num_ports; i++) {
-		if (!dsa_is_port_initialized(ds, i))
-			continue;
-
-		ret = dsa_slave_resume(ds->ports[i].netdev);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(dsa_switch_resume);
-#endif
-
 /* platform driver init and cleanup *****************************************/
 static int dev_is_class(struct device *dev, void *class)
 {
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index c73160fb11e7..0a0a392dc2bd 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -378,7 +378,6 @@ static void hsr_dev_destroy(struct net_device *hsr_dev)
 	del_timer_sync(&hsr->announce_timer);
 
 	synchronize_rcu();
-	free_netdev(hsr_dev);
 }
 
 static const struct net_device_ops hsr_device_ops = {
@@ -404,7 +403,8 @@ void hsr_dev_setup(struct net_device *dev)
 	SET_NETDEV_DEVTYPE(dev, &hsr_type);
 	dev->priv_flags |= IFF_NO_QUEUE;
 
-	dev->destructor = hsr_dev_destroy;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = hsr_dev_destroy;
 
 	dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
 			   NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 4ebe2aa3e7d3..04b5450c5a55 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -324,8 +324,7 @@ static int hsr_fill_frame_info(struct hsr_frame_info *frame,
 	unsigned long irqflags;
 
 	frame->is_supervision = is_supervision_frame(port->hsr, skb);
-	frame->node_src = hsr_get_node(&port->hsr->node_db, skb,
-				       frame->is_supervision);
+	frame->node_src = hsr_get_node(port, skb, frame->is_supervision);
 	if (frame->node_src == NULL)
 		return -1; /* Unknown node and !is_supervision, or no mem */
 
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 7ea925816f79..284a9b820df8 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -158,9 +158,10 @@ struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
 
 /* Get the hsr_node from which 'skb' was sent.
  */
-struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb,
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
 			      bool is_sup)
 {
+	struct list_head *node_db = &port->hsr->node_db;
 	struct hsr_node *node;
 	struct ethhdr *ethhdr;
 	u16 seq_out;
@@ -186,7 +187,11 @@ struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb,
 		 */
 		seq_out = hsr_get_skb_sequence_nr(skb) - 1;
 	} else {
-		WARN_ONCE(1, "%s: Non-HSR frame\n", __func__);
+		/* this is called also for frames from master port and
+		 * so warn only for non master ports
+		 */
+		if (port->type != HSR_PT_MASTER)
+			WARN_ONCE(1, "%s: Non-HSR frame\n", __func__);
 		seq_out = HSR_SEQNR_START;
 	}
 
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 438b40f98f5a..4e04f0e868e9 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -18,7 +18,7 @@ struct hsr_node;
 
 struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
 			      u16 seq_out);
-struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb,
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
 			      bool is_sup);
 void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
 			  struct hsr_port *port);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index d7efbf0dad20..0a866f332290 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -107,7 +107,7 @@ static void lowpan_setup(struct net_device *ldev)
 
 	ldev->netdev_ops	= &lowpan_netdev_ops;
 	ldev->header_ops	= &lowpan_header_ops;
-	ldev->destructor	= free_netdev;
+	ldev->needs_free_netdev	= true;
 	ldev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f3dad1661343..58925b6597de 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1043,7 +1043,7 @@ static struct inet_protosw inetsw_array[] =
 		.type =       SOCK_DGRAM,
 		.protocol =   IPPROTO_ICMP,
 		.prot =       &ping_prot,
-		.ops =        &inet_dgram_ops,
+		.ops =        &inet_sockraw_ops,
 		.flags =      INET_PROTOSW_REUSE,
        },
 
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 0937b34c27ca..e9f3386a528b 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -641,6 +641,32 @@ void arp_xmit(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(arp_xmit);
 
+static bool arp_is_garp(struct net *net, struct net_device *dev,
+			int *addr_type, __be16 ar_op,
+			__be32 sip, __be32 tip,
+			unsigned char *sha, unsigned char *tha)
+{
+	bool is_garp = tip == sip;
+
+	/* Gratuitous ARP _replies_ also require target hwaddr to be
+	 * the same as source.
+	 */
+	if (is_garp && ar_op == htons(ARPOP_REPLY))
+		is_garp =
+			/* IPv4 over IEEE 1394 doesn't provide target
+			 * hardware address field in its ARP payload.
+			 */
+			tha &&
+			!memcmp(tha, sha, dev->addr_len);
+
+	if (is_garp) {
+		*addr_type = inet_addr_type_dev_table(net, dev, sip);
+		if (*addr_type != RTN_UNICAST)
+			is_garp = false;
+	}
+	return is_garp;
+}
+
 /*
  *	Process an arp request.
  */
@@ -653,6 +679,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 	unsigned char *arp_ptr;
 	struct rtable *rt;
 	unsigned char *sha;
+	unsigned char *tha = NULL;
 	__be32 sip, tip;
 	u16 dev_type = dev->type;
 	int addr_type;
@@ -724,6 +751,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 		break;
 #endif
 	default:
+		tha = arp_ptr;
 		arp_ptr += dev->addr_len;
 	}
 	memcpy(&tip, arp_ptr, 4);
@@ -835,19 +863,25 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
 
-	if (IN_DEV_ARP_ACCEPT(in_dev)) {
-		unsigned int addr_type = inet_addr_type_dev_table(net, dev, sip);
+	addr_type = -1;
+	if (n || IN_DEV_ARP_ACCEPT(in_dev)) {
+		is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op,
+				      sip, tip, sha, tha);
+	}
 
+	if (IN_DEV_ARP_ACCEPT(in_dev)) {
 		/* Unsolicited ARP is not accepted by default.
 		   It is possible, that this option should be enabled for some
 		   devices (strip is candidate)
 		 */
-		is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip &&
-			  addr_type == RTN_UNICAST;
-
 		if (!n &&
-		    ((arp->ar_op == htons(ARPOP_REPLY)  &&
-				addr_type == RTN_UNICAST) || is_garp))
+		    (is_garp ||
+		     (arp->ar_op == htons(ARPOP_REPLY) &&
+		      (addr_type == RTN_UNICAST ||
+		       (addr_type < 0 &&
+			/* postpone calculation to as late as possible */
+			inet_addr_type_dev_table(net, dev, sip) ==
+				RTN_UNICAST)))))
 			n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
 	}
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 65cc02bd82bc..93322f895eab 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -248,6 +248,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	u8 *tail;
 	u8 *vaddr;
 	int nfrags;
+	int esph_offset;
 	struct page *page;
 	struct sk_buff *trailer;
 	int tailen = esp->tailen;
@@ -313,11 +314,13 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	}
 
 cow:
+	esph_offset = (unsigned char *)esp->esph - skb_transport_header(skb);
+
 	nfrags = skb_cow_data(skb, tailen, &trailer);
 	if (nfrags < 0)
 		goto out;
 	tail = skb_tail_pointer(trailer);
-	esp->esph = ip_esp_hdr(skb);
+	esp->esph = (struct ip_esp_hdr *)(skb_transport_header(skb) + esph_offset);
 
 skip_cow:
 	esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 39bd1edee676..83e3ed258467 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -763,7 +763,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	unsigned int e = 0, s_e;
 	struct fib_table *tb;
 	struct hlist_head *head;
-	int dumped = 0;
+	int dumped = 0, err;
 
 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
@@ -783,20 +783,27 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 			if (dumped)
 				memset(&cb->args[2], 0, sizeof(cb->args) -
 						 2 * sizeof(cb->args[0]));
-			if (fib_table_dump(tb, skb, cb) < 0)
-				goto out;
+			err = fib_table_dump(tb, skb, cb);
+			if (err < 0) {
+				if (likely(skb->len))
+					goto out;
+
+				goto out_err;
+			}
 			dumped = 1;
 next:
 			e++;
 		}
 	}
 out:
+	err = skb->len;
+out_err:
 	rcu_read_unlock();
 
 	cb->args[1] = e;
 	cb->args[0] = h;
 
-	return skb->len;
+	return err;
 }
 
 /* Prepare and feed intra-kernel routing request.
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index da449ddb8cc1..ad9ad4aab5da 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -203,6 +203,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
 static void free_fib_info_rcu(struct rcu_head *head)
 {
 	struct fib_info *fi = container_of(head, struct fib_info, rcu);
+	struct dst_metrics *m;
 
 	change_nexthops(fi) {
 		if (nexthop_nh->nh_dev)
@@ -213,8 +214,9 @@ static void free_fib_info_rcu(struct rcu_head *head)
 		rt_fibinfo_free(&nexthop_nh->nh_rth_input);
 	} endfor_nexthops(fi);
 
-	if (fi->fib_metrics != (u32 *) dst_default_metrics)
-		kfree(fi->fib_metrics);
+	m = fi->fib_metrics;
+	if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
+		kfree(m);
 	kfree(fi);
 }
 
@@ -971,11 +973,11 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
 			val = 255;
 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
 			return -EINVAL;
-		fi->fib_metrics[type - 1] = val;
+		fi->fib_metrics->metrics[type - 1] = val;
 	}
 
 	if (ecn_ca)
-		fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
+		fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
 
 	return 0;
 }
@@ -1033,11 +1035,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		goto failure;
 	fib_info_cnt++;
 	if (cfg->fc_mx) {
-		fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+		fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
 		if (!fi->fib_metrics)
 			goto failure;
+		atomic_set(&fi->fib_metrics->refcnt, 1);
 	} else
-		fi->fib_metrics = (u32 *) dst_default_metrics;
+		fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
 
 	fi->fib_net = net;
 	fi->fib_protocol = cfg->fc_protocol;
@@ -1238,7 +1241,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 	if (fi->fib_priority &&
 	    nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
 		goto nla_put_failure;
-	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
+	if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
 		goto nla_put_failure;
 
 	if (fi->fib_prefsrc &&
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1201409ba1dc..51182ff2b441 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1983,6 +1983,8 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
 
 	/* rcu_read_lock is hold by caller */
 	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+		int err;
+
 		if (i < s_i) {
 			i++;
 			continue;
@@ -1993,17 +1995,14 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
 			continue;
 		}
 
-		if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
-				  cb->nlh->nlmsg_seq,
-				  RTM_NEWROUTE,
-				  tb->tb_id,
-				  fa->fa_type,
-				  xkey,
-				  KEYLENGTH - fa->fa_slen,
-				  fa->fa_tos,
-				  fa->fa_info, NLM_F_MULTI) < 0) {
+		err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
+				    cb->nlh->nlmsg_seq, RTM_NEWROUTE,
+				    tb->tb_id, fa->fa_type,
+				    xkey, KEYLENGTH - fa->fa_slen,
+				    fa->fa_tos, fa->fa_info, NLM_F_MULTI);
+		if (err < 0) {
 			cb->args[4] = i;
-			return -1;
+			return err;
 		}
 		i++;
 	}
@@ -2025,10 +2024,13 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
 	t_key key = cb->args[3];
 
 	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
-		if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
+		int err;
+
+		err = fn_trie_dump_leaf(l, tb, skb, cb);
+		if (err < 0) {
 			cb->args[3] = key;
 			cb->args[2] = count;
-			return -1;
+			return err;
 		}
 
 		++count;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 43318b5f5647..9144fa7df2ad 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -657,8 +657,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	/* Needed by both icmp_global_allow and icmp_xmit_lock */
 	local_bh_disable();
 
-	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
-	if (!icmpv4_global_allow(net, type, code))
+	/* Check global sysctl_icmp_msgs_per_sec ratelimit, unless
+	 * incoming dev is loopback.  If outgoing dev change to not be
+	 * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
+	 */
+	if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
+	      !icmpv4_global_allow(net, type, code))
 		goto out_bh_enable;
 
 	sk = icmp_xmit_lock(net);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 44fd86de2823..ec9a396fa466 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1112,6 +1112,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 	pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
 	if (!pmc)
 		return;
+	spin_lock_init(&pmc->lock);
 	spin_lock_bh(&im->lock);
 	pmc->interface = im->interface;
 	in_dev_hold(in_dev);
@@ -2071,21 +2072,26 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 
 static void ip_mc_clear_src(struct ip_mc_list *pmc)
 {
-	struct ip_sf_list *psf, *nextpsf;
+	struct ip_sf_list *psf, *nextpsf, *tomb, *sources;
 
-	for (psf = pmc->tomb; psf; psf = nextpsf) {
+	spin_lock_bh(&pmc->lock);
+	tomb = pmc->tomb;
+	pmc->tomb = NULL;
+	sources = pmc->sources;
+	pmc->sources = NULL;
+	pmc->sfmode = MCAST_EXCLUDE;
+	pmc->sfcount[MCAST_INCLUDE] = 0;
+	pmc->sfcount[MCAST_EXCLUDE] = 1;
+	spin_unlock_bh(&pmc->lock);
+
+	for (psf = tomb; psf; psf = nextpsf) {
 		nextpsf = psf->sf_next;
 		kfree(psf);
 	}
-	pmc->tomb = NULL;
-	for (psf = pmc->sources; psf; psf = nextpsf) {
+	for (psf = sources; psf; psf = nextpsf) {
 		nextpsf = psf->sf_next;
 		kfree(psf);
 	}
-	pmc->sources = NULL;
-	pmc->sfmode = MCAST_EXCLUDE;
-	pmc->sfcount[MCAST_INCLUDE] = 0;
-	pmc->sfcount[MCAST_EXCLUDE] = 1;
 }
 
 /* Join a multicast group
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7a3fd25e8913..532b36e9ce2a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -964,7 +964,8 @@ static int __ip_append_data(struct sock *sk,
 		csummode = CHECKSUM_PARTIAL;
 
 	cork->length += length;
-	if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
+	if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
+	     (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
 	    (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index b878ecbc0608..129d1a3616f8 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -446,6 +446,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
 	return 0;
 
 drop:
+	if (tun_dst)
+		dst_release((struct dst_entry *)tun_dst);
 	kfree_skb(skb);
 	return 0;
 }
@@ -967,7 +969,6 @@ static void ip_tunnel_dev_free(struct net_device *dev)
 	gro_cells_destroy(&tunnel->gro_cells);
 	dst_cache_destroy(&tunnel->dst_cache);
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
@@ -1155,7 +1156,8 @@ int ip_tunnel_init(struct net_device *dev)
 	struct iphdr *iph = &tunnel->parms.iph;
 	int err;
 
-	dev->destructor	= ip_tunnel_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ip_tunnel_dev_free;
 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 	if (!dev->tstats)
 		return -ENOMEM;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3a02d52ed50e..8ae425cad818 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -101,8 +101,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id);
 static void ipmr_free_table(struct mr_table *mrt);
 
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
-			  struct sk_buff *skb, struct mfc_cache *cache,
-			  int local);
+			  struct net_device *dev, struct sk_buff *skb,
+			  struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
@@ -501,7 +501,7 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 	dev->flags		= IFF_NOARP;
 	dev->netdev_ops		= &reg_vif_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
@@ -988,7 +988,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
 
 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 		} else {
-			ip_mr_forward(net, mrt, skb, c, 0);
+			ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
 		}
 	}
 }
@@ -1073,7 +1073,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 
 /* Queue a packet for resolution. It gets locked cache entry! */
 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
-				 struct sk_buff *skb)
+				 struct sk_buff *skb, struct net_device *dev)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct mfc_cache *c;
@@ -1130,6 +1130,10 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 		kfree_skb(skb);
 		err = -ENOBUFS;
 	} else {
+		if (dev) {
+			skb->dev = dev;
+			skb->skb_iif = dev->ifindex;
+		}
 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
 		err = 0;
 	}
@@ -1828,10 +1832,10 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
 
 /* "local" means that we should preserve one skb (for local delivery) */
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
-			  struct sk_buff *skb, struct mfc_cache *cache,
-			  int local)
+			  struct net_device *dev, struct sk_buff *skb,
+			  struct mfc_cache *cache, int local)
 {
-	int true_vifi = ipmr_find_vif(mrt, skb->dev);
+	int true_vifi = ipmr_find_vif(mrt, dev);
 	int psend = -1;
 	int vif, ct;
 
@@ -1853,13 +1857,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 	}
 
 	/* Wrong interface: drop packet and (maybe) send PIM assert. */
-	if (mrt->vif_table[vif].dev != skb->dev) {
-		struct net_device *mdev;
-
-		mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev);
-		if (mdev == skb->dev)
-			goto forward;
-
+	if (mrt->vif_table[vif].dev != dev) {
 		if (rt_is_output_route(skb_rtable(skb))) {
 			/* It is our own packet, looped back.
 			 * Very complicated situation...
@@ -1980,6 +1978,20 @@ int ip_mr_input(struct sk_buff *skb)
 	struct net *net = dev_net(skb->dev);
 	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
 	struct mr_table *mrt;
+	struct net_device *dev;
+
+	/* skb->dev passed in is the loX master dev for vrfs.
+	 * As there are no vifs associated with loopback devices,
+	 * get the proper interface that does have a vif associated with it.
+	 */
+	dev = skb->dev;
+	if (netif_is_l3_master(skb->dev)) {
+		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
+		if (!dev) {
+			kfree_skb(skb);
+			return -ENODEV;
+		}
+	}
 
 	/* Packet is looped back after forward, it should not be
 	 * forwarded second time, but still can be delivered locally.
@@ -2017,7 +2029,7 @@ int ip_mr_input(struct sk_buff *skb)
 	/* already under rcu_read_lock() */
 	cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
 	if (!cache) {
-		int vif = ipmr_find_vif(mrt, skb->dev);
+		int vif = ipmr_find_vif(mrt, dev);
 
 		if (vif >= 0)
 			cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
@@ -2037,9 +2049,9 @@ int ip_mr_input(struct sk_buff *skb)
 		}
 
 		read_lock(&mrt_lock);
-		vif = ipmr_find_vif(mrt, skb->dev);
+		vif = ipmr_find_vif(mrt, dev);
 		if (vif >= 0) {
-			int err2 = ipmr_cache_unresolved(mrt, vif, skb);
+			int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev);
 			read_unlock(&mrt_lock);
 
 			return err2;
@@ -2050,7 +2062,7 @@ int ip_mr_input(struct sk_buff *skb)
 	}
 
 	read_lock(&mrt_lock);
-	ip_mr_forward(net, mrt, skb, cache, local);
+	ip_mr_forward(net, mrt, dev, skb, cache, local);
 	read_unlock(&mrt_lock);
 
 	if (local)
@@ -2224,7 +2236,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
 		iph->saddr = saddr;
 		iph->daddr = daddr;
 		iph->version = 0;
-		err = ipmr_cache_unresolved(mrt, vif, skb2);
+		err = ipmr_cache_unresolved(mrt, vif, skb2, dev);
 		read_unlock(&mrt_lock);
 		rcu_read_unlock();
 		return err;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 655d9eebe43e..6883b3d4ba8f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1385,8 +1385,12 @@ static void rt_add_uncached_list(struct rtable *rt)
 
 static void ipv4_dst_destroy(struct dst_entry *dst)
 {
+	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
 	struct rtable *rt = (struct rtable *) dst;
 
+	if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+		kfree(p);
+
 	if (!list_empty(&rt->rt_uncached)) {
 		struct uncached_list *ul = rt->rt_uncached_list;
 
@@ -1438,7 +1442,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 			rt->rt_gateway = nh->nh_gw;
 			rt->rt_uses_gateway = 1;
 		}
-		dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+		dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
+		if (fi->fib_metrics != &dst_default_metrics) {
+			rt->dst._metrics |= DST_METRICS_REFCOUNTED;
+			atomic_inc(&fi->fib_metrics->refcnt);
+		}
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		rt->dst.tclassid = nh->nh_tclassid;
 #endif
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1e4c76d2b827..40aca7803cf2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1084,9 +1084,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
+	struct sockaddr *uaddr = msg->msg_name;
 	int err, flags;
 
-	if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
+	if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+	    (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
+	     uaddr->sa_family == AF_UNSPEC))
 		return -EOPNOTSUPP;
 	if (tp->fastopen_req)
 		return -EALREADY; /* Another Fast Open is in progress */
@@ -1108,7 +1111,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 		}
 	}
 	flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
-	err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
+	err = __inet_stream_connect(sk->sk_socket, uaddr,
 				    msg->msg_namelen, flags, 1);
 	/* fastopen_req could already be freed in __inet_stream_connect
 	 * if the connection times out or gets rst
@@ -2320,9 +2323,15 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
+	/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
+	 * issue in __tcp_select_window()
+	 */
+	icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
 	tcp_init_send_head(sk);
 	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
 	__sk_dst_reset(sk);
+	dst_release(sk->sk_rx_dst);
+	sk->sk_rx_dst = NULL;
 	tcp_saved_syn_free(tp);
 
 	/* Clean up fastopen related fields */
@@ -2374,9 +2383,10 @@ static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int l
 	return 0;
 }
 
-static int tcp_repair_options_est(struct tcp_sock *tp,
+static int tcp_repair_options_est(struct sock *sk,
 		struct tcp_repair_opt __user *optbuf, unsigned int len)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_repair_opt opt;
 
 	while (len >= sizeof(opt)) {
@@ -2389,6 +2399,7 @@ static int tcp_repair_options_est(struct tcp_sock *tp,
 		switch (opt.opt_code) {
 		case TCPOPT_MSS:
 			tp->rx_opt.mss_clamp = opt.opt_val;
+			tcp_mtup_init(sk);
 			break;
 		case TCPOPT_WINDOW:
 			{
@@ -2548,7 +2559,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		if (!tp->repair)
 			err = -EINVAL;
 		else if (sk->sk_state == TCP_ESTABLISHED)
-			err = tcp_repair_options_est(tp,
+			err = tcp_repair_options_est(sk,
 					(struct tcp_repair_opt __user *)optval,
 					optlen);
 		else
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 6e3c512054a6..324c9bcc5456 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -180,6 +180,7 @@ void tcp_init_congestion_control(struct sock *sk)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
+	tcp_sk(sk)->prior_ssthresh = 0;
 	if (icsk->icsk_ca_ops->init)
 		icsk->icsk_ca_ops->init(sk);
 	if (tcp_ca_needs_ecn(sk))
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5a3ad09e2786..174d4376baa5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1179,13 +1179,14 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 		 */
 		if (pkt_len > mss) {
 			unsigned int new_len = (pkt_len / mss) * mss;
-			if (!in_sack && new_len < pkt_len) {
+			if (!in_sack && new_len < pkt_len)
 				new_len += mss;
-				if (new_len >= skb->len)
-					return 0;
-			}
 			pkt_len = new_len;
 		}
+
+		if (pkt_len >= skb->len && !in_sack)
+			return 0;
+
 		err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
 		if (err < 0)
 			return err;
@@ -3189,7 +3190,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			int delta;
 
 			/* Non-retransmitted hole got filled? That's reordering */
-			if (reord < prior_fackets)
+			if (reord < prior_fackets && reord <= tp->fackets_out)
 				tcp_update_reordering(sk, tp->fackets_out - reord, 0);
 
 			delta = tcp_is_fack(tp) ? pkts_acked :
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3a51582bef55..5ab2aac5ca19 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2395,7 +2395,7 @@ struct proto tcp_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp_sock),
-	.slab_flags		= SLAB_DESTROY_BY_RCU,
+	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
 	.twsk_prot		= &tcp_timewait_sock_ops,
 	.rsk_prot		= &tcp_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ea6e4cff9faf..1d6219bf2d6b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1612,7 +1612,7 @@ static void udp_v4_rehash(struct sock *sk)
 	udp_lib_rehash(sk, new_hash);
 }
 
-int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int rc;
 
@@ -1657,7 +1657,7 @@ EXPORT_SYMBOL(udp_encap_enable);
  * Note that in the success and error cases, the skb is assumed to
  * have either been requeued or freed.
  */
-int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct udp_sock *up = udp_sk(sk);
 	int is_udplite = IS_UDPLITE(sk);
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index feb50a16398d..a8cf8c6fb60c 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -25,7 +25,6 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 		int flags, int *addr_len);
 int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
 		 int flags);
-int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 void udp_destroy_sock(struct sock *sk);
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8d297a79b568..1d2dbace42ff 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -332,9 +332,9 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev,
 static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
 				   unsigned long delay)
 {
-	if (!delayed_work_pending(&ifp->dad_work))
-		in6_ifa_hold(ifp);
-	mod_delayed_work(addrconf_wq, &ifp->dad_work, delay);
+	in6_ifa_hold(ifp);
+	if (mod_delayed_work(addrconf_wq, &ifp->dad_work, delay))
+		in6_ifa_put(ifp);
 }
 
 static int snmp6_alloc_dev(struct inet6_dev *idev)
@@ -1022,7 +1022,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	INIT_HLIST_NODE(&ifa->addr_lst);
 	ifa->scope = scope;
 	ifa->prefix_len = pfxlen;
-	ifa->flags = flags | IFA_F_TENTATIVE;
+	ifa->flags = flags;
+	/* No need to add the TENTATIVE flag for addresses with NODAD */
+	if (!(flags & IFA_F_NODAD))
+		ifa->flags |= IFA_F_TENTATIVE;
 	ifa->valid_lft = valid_lft;
 	ifa->prefered_lft = prefered_lft;
 	ifa->cstamp = ifa->tstamp = jiffies;
@@ -3366,6 +3369,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct netdev_notifier_changeupper_info *info;
 	struct inet6_dev *idev = __in6_dev_get(dev);
+	struct net *net = dev_net(dev);
 	int run_pending = 0;
 	int err;
 
@@ -3381,7 +3385,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 	case NETDEV_CHANGEMTU:
 		/* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */
 		if (dev->mtu < IPV6_MIN_MTU) {
-			addrconf_ifdown(dev, 1);
+			addrconf_ifdown(dev, dev != net->loopback_dev);
 			break;
 		}
 
@@ -3497,7 +3501,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			 * IPV6_MIN_MTU stop IPv6 on this interface.
 			 */
 			if (dev->mtu < IPV6_MIN_MTU)
-				addrconf_ifdown(dev, 1);
+				addrconf_ifdown(dev, dev != net->loopback_dev);
 		}
 		break;
 
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 37ac9de713c6..8d772fea1dde 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -1319,7 +1319,7 @@ static int calipso_skbuff_setattr(struct sk_buff *skb,
 	struct ipv6hdr *ip6_hdr;
 	struct ipv6_opt_hdr *hop;
 	unsigned char buf[CALIPSO_MAX_BUFFER];
-	int len_delta, new_end, pad;
+	int len_delta, new_end, pad, payload;
 	unsigned int start, end;
 
 	ip6_hdr = ipv6_hdr(skb);
@@ -1346,6 +1346,8 @@ static int calipso_skbuff_setattr(struct sk_buff *skb,
 	if (ret_val < 0)
 		return ret_val;
 
+	ip6_hdr = ipv6_hdr(skb); /* Reset as skb_cow() may have moved it */
+
 	if (len_delta) {
 		if (len_delta > 0)
 			skb_push(skb, len_delta);
@@ -1355,6 +1357,8 @@ static int calipso_skbuff_setattr(struct sk_buff *skb,
 			sizeof(*ip6_hdr) + start);
 		skb_reset_network_header(skb);
 		ip6_hdr = ipv6_hdr(skb);
+		payload = ntohs(ip6_hdr->payload_len);
+		ip6_hdr->payload_len = htons(payload + len_delta);
 	}
 
 	hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index e011122ebd43..5c786f5ab961 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -250,8 +250,14 @@ ipv4_connected:
 	 */
 
 	err = ip6_datagram_dst_update(sk, true);
-	if (err)
+	if (err) {
+		/* Reset daddr and dport so that udp_v6_early_demux()
+		 * fails to find this socket
+		 */
+		memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
+		inet->inet_dport = 0;
 		goto out;
+	}
 
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index d950d43ba255..f02f131f6435 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -30,6 +30,25 @@
 #include <net/ipv6.h>
 #include <linux/icmpv6.h>
 
+static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
+{
+	int off = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr;
+
+	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+		return offsetof(struct ipv6hdr, nexthdr);
+
+	while (off < nhlen) {
+		exthdr = (void *)ipv6_hdr + off;
+		if (exthdr->nexthdr == NEXTHDR_ESP)
+			return off;
+
+		off += ipv6_optlen(exthdr);
+	}
+
+	return 0;
+}
+
 static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
 					 struct sk_buff *skb)
 {
@@ -38,6 +57,7 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
 	struct xfrm_state *x;
 	__be32 seq;
 	__be32 spi;
+	int nhoff;
 	int err;
 
 	skb_pull(skb, offset);
@@ -72,6 +92,11 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
 
 	xo->flags |= XFRM_GRO;
 
+	nhoff = esp6_nexthdr_esp_offset(ipv6_hdr(skb), offset);
+	if (!nhoff)
+		goto out;
+
+	IP6CB(skb)->nhoff = nhoff;
 	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
 	XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index eea23b57c6a5..ec849d88a662 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -32,7 +32,6 @@ struct fib6_rule {
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 				   int flags, pol_lookup_t lookup)
 {
-	struct rt6_info *rt;
 	struct fib_lookup_arg arg = {
 		.lookup_ptr = lookup,
 		.flags = FIB_LOOKUP_NOREF,
@@ -44,21 +43,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 	fib_rules_lookup(net->ipv6.fib6_rules_ops,
 			 flowi6_to_flowi(fl6), flags, &arg);
 
-	rt = arg.result;
+	if (arg.result)
+		return arg.result;
 
-	if (!rt) {
-		dst_hold(&net->ipv6.ip6_null_entry->dst);
-		return &net->ipv6.ip6_null_entry->dst;
-	}
-
-	if (rt->rt6i_flags & RTF_REJECT &&
-	    rt->dst.error == -EAGAIN) {
-		ip6_rt_put(rt);
-		rt = net->ipv6.ip6_null_entry;
-		dst_hold(&rt->dst);
-	}
-
-	return &rt->dst;
+	dst_hold(&net->ipv6.ip6_null_entry->dst);
+	return &net->ipv6.ip6_null_entry->dst;
 }
 
 static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
@@ -121,7 +110,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 			flp6->saddr = saddr;
 		}
 		err = rt->dst.error;
-		goto out;
+		if (err != -EAGAIN)
+			goto out;
 	}
 again:
 	ip6_rt_put(rt);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 230b5aac9f03..8d7b113958b1 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -491,7 +491,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	local_bh_disable();
 
 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
-	if (!icmpv6_global_allow(type))
+	if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
 		goto out_bh_enable;
 
 	mip6_addr_swap(skb);
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 2fd5ca151dcf..77f7f8c7d93d 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -62,6 +62,7 @@ static inline u32 ila_locator_hash(struct ila_locator loc)
 {
 	u32 *v = (u32 *)loc.v32;
 
+	__ila_hash_secret_init();
 	return jhash_2words(v[0], v[1], hashrnd);
 }
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index d4bf2c68a545..e6b78ba0e636 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -289,8 +289,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 	struct rt6_info *rt;
 
 	rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
-	if (rt->rt6i_flags & RTF_REJECT &&
-	    rt->dst.error == -EAGAIN) {
+	if (rt->dst.error == -EAGAIN) {
 		ip6_rt_put(rt);
 		rt = net->ipv6.ip6_null_entry;
 		dst_hold(&rt->dst);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 8d128ba79b66..64eea3962733 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -537,11 +537,10 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
 
 	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
 
-	dsfield = ipv4_get_dsfield(iph);
-
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
-					  & IPV6_TCLASS_MASK;
+		dsfield = ipv4_get_dsfield(iph);
+	else
+		dsfield = ip6_tclass(t->parms.flowinfo);
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
 		fl6.flowi6_mark = skb->mark;
 	else
@@ -598,9 +597,11 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
 
 	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
 
-	dsfield = ipv6_get_dsfield(ipv6h);
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+		dsfield = ipv6_get_dsfield(ipv6h);
+	else
+		dsfield = ip6_tclass(t->parms.flowinfo);
+
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
 		fl6.flowlabel |= ip6_flowlabel(ipv6h);
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
@@ -990,13 +991,13 @@ static void ip6gre_dev_free(struct net_device *dev)
 
 	dst_cache_destroy(&t->dst_cache);
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 static void ip6gre_tunnel_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &ip6gre_netdev_ops;
-	dev->destructor = ip6gre_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ip6gre_dev_free;
 
 	dev->type = ARPHRD_IP6GRE;
 
@@ -1147,7 +1148,7 @@ static int __net_init ip6gre_init_net(struct net *net)
 	return 0;
 
 err_reg_dev:
-	ip6gre_dev_free(ign->fb_tunnel_dev);
+	free_netdev(ign->fb_tunnel_dev);
 err_alloc_dev:
 	return err;
 }
@@ -1299,7 +1300,8 @@ static void ip6gre_tap_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops = &ip6gre_tap_netdev_ops;
-	dev->destructor = ip6gre_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ip6gre_dev_free;
 
 	dev->features |= NETIF_F_NETNS_LOCAL;
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 93e58a5e1837..cdb3728faca7 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -63,7 +63,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 	const struct net_offload *ops;
 	int proto;
 	struct frag_hdr *fptr;
-	unsigned int unfrag_ip6hlen;
 	unsigned int payload_len;
 	u8 *prevhdr;
 	int offset = 0;
@@ -116,8 +115,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		skb->network_header = (u8 *)ipv6h - skb->head;
 
 		if (udpfrag) {
-			unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
-			fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);
+			int err = ip6_find_1stfragopt(skb, &prevhdr);
+			if (err < 0) {
+				kfree_skb_list(segs);
+				return ERR_PTR(err);
+			}
+			fptr = (struct frag_hdr *)((u8 *)ipv6h + err);
 			fptr->frag_off = htons(offset);
 			if (skb->next)
 				fptr->frag_off |= htons(IP6_MF);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 58f6288e9ba5..1699acb2fa2c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -597,7 +597,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 	int ptr, offset = 0, err = 0;
 	u8 *prevhdr, nexthdr = 0;
 
-	hlen = ip6_find_1stfragopt(skb, &prevhdr);
+	err = ip6_find_1stfragopt(skb, &prevhdr);
+	if (err < 0)
+		goto fail;
+	hlen = err;
 	nexthdr = *prevhdr;
 
 	mtu = ip6_skb_dst_mtu(skb);
@@ -1387,7 +1390,7 @@ emsgsize:
 	 */
 
 	cork->length += length;
-	if ((((length + fragheaderlen) > mtu) ||
+	if ((((length + (skb ? skb->len : headersize)) > mtu) ||
 	     (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
@@ -1463,6 +1466,11 @@ alloc_new_skb:
 			 */
 			alloclen += sizeof(struct frag_hdr);
 
+			copy = datalen - transhdrlen - fraggap;
+			if (copy < 0) {
+				err = -EINVAL;
+				goto error;
+			}
 			if (transhdrlen) {
 				skb = sock_alloc_send_skb(sk,
 						alloclen + hh_len,
@@ -1512,13 +1520,9 @@ alloc_new_skb:
 				data += fraggap;
 				pskb_trim_unique(skb_prev, maxfraglen);
 			}
-			copy = datalen - transhdrlen - fraggap;
-
-			if (copy < 0) {
-				err = -EINVAL;
-				kfree_skb(skb);
-				goto error;
-			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
+			if (copy > 0 &&
+			    getfrag(from, data + transhdrlen, offset,
+				    copy, fraggap, skb) < 0) {
 				err = -EFAULT;
 				kfree_skb(skb);
 				goto error;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6eb2ae507500..8c6c3c8e7eef 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -254,7 +254,6 @@ static void ip6_dev_free(struct net_device *dev)
 	gro_cells_destroy(&t->gro_cells);
 	dst_cache_destroy(&t->dst_cache);
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 static int ip6_tnl_create2(struct net_device *dev)
@@ -322,7 +321,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 	return t;
 
 failed_free:
-	ip6_dev_free(dev);
+	free_netdev(dev);
 failed:
 	return ERR_PTR(err);
 }
@@ -859,6 +858,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
 	return 0;
 
 drop:
+	if (tun_dst)
+		dst_release((struct dst_entry *)tun_dst);
 	kfree_skb(skb);
 	return 0;
 }
@@ -1095,6 +1096,9 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 
 	if (!dst) {
 route_lookup:
+		/* add dsfield to flowlabel for route lookup */
+		fl6->flowlabel = ip6_make_flowinfo(dsfield, fl6->flowlabel);
+
 		dst = ip6_route_output(net, NULL, fl6);
 
 		if (dst->error)
@@ -1196,7 +1200,7 @@ route_lookup:
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	ipv6h = ipv6_hdr(skb);
-	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+	ip6_flow_hdr(ipv6h, dsfield,
 		     ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
 	ipv6h->hop_limit = hop_limit;
 	ipv6h->nexthdr = proto;
@@ -1231,8 +1235,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (tproto != IPPROTO_IPIP && tproto != 0)
 		return -1;
 
-	dsfield = ipv4_get_dsfield(iph);
-
 	if (t->parms.collect_md) {
 		struct ip_tunnel_info *tun_info;
 		const struct ip_tunnel_key *key;
@@ -1246,6 +1248,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPIP;
 		fl6.daddr = key->u.ipv6.dst;
 		fl6.flowlabel = key->label;
+		dsfield =  key->tos;
 	} else {
 		if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 			encap_limit = t->parms.encap_limit;
@@ -1254,8 +1257,9 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPIP;
 
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-			fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
-					 & IPV6_TCLASS_MASK;
+			dsfield = ipv4_get_dsfield(iph);
+		else
+			dsfield = ip6_tclass(t->parms.flowinfo);
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
 			fl6.flowi6_mark = skb->mark;
 		else
@@ -1267,6 +1271,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
+	dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
+
 	skb_set_inner_ipproto(skb, IPPROTO_IPIP);
 
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
@@ -1300,8 +1306,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	    ip6_tnl_addr_conflict(t, ipv6h))
 		return -1;
 
-	dsfield = ipv6_get_dsfield(ipv6h);
-
 	if (t->parms.collect_md) {
 		struct ip_tunnel_info *tun_info;
 		const struct ip_tunnel_key *key;
@@ -1315,6 +1319,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPV6;
 		fl6.daddr = key->u.ipv6.dst;
 		fl6.flowlabel = key->label;
+		dsfield = key->tos;
 	} else {
 		offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
 		/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
@@ -1337,7 +1342,9 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPV6;
 
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-			fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK);
+			dsfield = ipv6_get_dsfield(ipv6h);
+		else
+			dsfield = ip6_tclass(t->parms.flowinfo);
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
 			fl6.flowlabel |= ip6_flowlabel(ipv6h);
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
@@ -1351,6 +1358,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
+	dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
+
 	skb_set_inner_ipproto(skb, IPPROTO_IPV6);
 
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
@@ -1769,7 +1778,8 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
 static void ip6_tnl_dev_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &ip6_tnl_netdev_ops;
-	dev->destructor = ip6_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ip6_dev_free;
 
 	dev->type = ARPHRD_TUNNEL6;
 	dev->flags |= IFF_NOARP;
@@ -2216,7 +2226,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
 	return 0;
 
 err_register:
-	ip6_dev_free(ip6n->fb_tnl_dev);
+	free_netdev(ip6n->fb_tnl_dev);
 err_alloc_dev:
 	return err;
 }
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index d67ef56454b2..837ea1eefe7f 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -180,7 +180,6 @@ vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t)
 static void vti6_dev_free(struct net_device *dev)
 {
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 static int vti6_tnl_create2(struct net_device *dev)
@@ -235,7 +234,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p
 	return t;
 
 failed_free:
-	vti6_dev_free(dev);
+	free_netdev(dev);
 failed:
 	return NULL;
 }
@@ -842,7 +841,8 @@ static const struct net_device_ops vti6_netdev_ops = {
 static void vti6_dev_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &vti6_netdev_ops;
-	dev->destructor = vti6_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = vti6_dev_free;
 
 	dev->type = ARPHRD_TUNNEL6;
 	dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
@@ -1100,7 +1100,7 @@ static int __net_init vti6_init_net(struct net *net)
 	return 0;
 
 err_register:
-	vti6_dev_free(ip6n->fb_tnl_dev);
+	free_netdev(ip6n->fb_tnl_dev);
 err_alloc_dev:
 	return err;
 }
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 374997d26488..2ecb39b943b5 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -733,7 +733,7 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
 	dev->flags		= IFF_NOARP;
 	dev->netdev_ops		= &reg_vif_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index cd4252346a32..e9065b8d3af8 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -79,14 +79,13 @@ EXPORT_SYMBOL(ipv6_select_ident);
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 {
 	u16 offset = sizeof(struct ipv6hdr);
-	struct ipv6_opt_hdr *exthdr =
-				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 	unsigned int packet_len = skb_tail_pointer(skb) -
 		skb_network_header(skb);
 	int found_rhdr = 0;
 	*nexthdr = &ipv6_hdr(skb)->nexthdr;
 
-	while (offset + 1 <= packet_len) {
+	while (offset <= packet_len) {
+		struct ipv6_opt_hdr *exthdr;
 
 		switch (**nexthdr) {
 
@@ -107,13 +106,16 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 			return offset;
 		}
 
-		offset += ipv6_optlen(exthdr);
-		*nexthdr = &exthdr->nexthdr;
+		if (offset + sizeof(struct ipv6_opt_hdr) > packet_len)
+			return -EINVAL;
+
 		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 						 offset);
+		offset += ipv6_optlen(exthdr);
+		*nexthdr = &exthdr->nexthdr;
 	}
 
-	return offset;
+	return -EINVAL;
 }
 EXPORT_SYMBOL(ip6_find_1stfragopt);
 
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 9b522fa90e6d..ac826dd338ff 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -192,7 +192,7 @@ static struct inet_protosw pingv6_protosw = {
 	.type =      SOCK_DGRAM,
 	.protocol =  IPPROTO_ICMPV6,
 	.prot =      &pingv6_prot,
-	.ops =       &inet6_dgram_ops,
+	.ops =       &inet6_sockraw_ops,
 	.flags =     INET_PROTOSW_REUSE,
 };
 
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index cc8e3ae9ca73..e88bcb8ff0fd 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -219,7 +219,7 @@ static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
 	u64 buff64[SNMP_MIB_MAX];
 	int i;
 
-	memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
+	memset(buff64, 0, sizeof(u64) * SNMP_MIB_MAX);
 
 	snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff);
 	for (i = 0; itemlist[i].name; i++)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 1f992d9e261d..60be012fe708 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1338,7 +1338,7 @@ void raw6_proc_exit(void)
 #endif	/* CONFIG_PROC_FS */
 
 /* Same as inet6_dgram_ops, sans udp_poll.  */
-static const struct proto_ops inet6_sockraw_ops = {
+const struct proto_ops inet6_sockraw_ops = {
 	.family		   = PF_INET6,
 	.owner		   = THIS_MODULE,
 	.release	   = inet6_release,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dc61b0b5e64e..322bd62e688b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2804,6 +2804,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg)
 	if ((rt->dst.dev == dev || !dev) &&
 	    rt != adn->net->ipv6.ip6_null_entry &&
 	    (rt->rt6i_nsiblings == 0 ||
+	     (dev && netdev_unregistering(dev)) ||
 	     !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
 		return -1;
 
@@ -3721,7 +3722,11 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
 #endif
-	 } else if (event == NETDEV_UNREGISTER) {
+	 } else if (event == NETDEV_UNREGISTER &&
+		    dev->reg_state != NETREG_UNREGISTERED) {
+		/* NETDEV_UNREGISTER could be fired for multiple times by
+		 * netdev_wait_allrefs(). Make sure we only call this once.
+		 */
 		in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 		in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 61e5902f0687..f8ad15891cd7 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -265,7 +265,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
 	return nt;
 
 failed_free:
-	ipip6_dev_free(dev);
+	free_netdev(dev);
 failed:
 	return NULL;
 }
@@ -305,7 +305,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
 	 * we try harder to allocate.
 	 */
 	kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
-		kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
+		kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
 		NULL;
 
 	rcu_read_lock();
@@ -1336,7 +1336,6 @@ static void ipip6_dev_free(struct net_device *dev)
 
 	dst_cache_destroy(&tunnel->dst_cache);
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 #define SIT_FEATURES (NETIF_F_SG	   | \
@@ -1351,7 +1350,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 
 	dev->netdev_ops		= &ipip6_netdev_ops;
-	dev->destructor		= ipip6_dev_free;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= ipip6_dev_free;
 
 	dev->type		= ARPHRD_SIT;
 	dev->hard_header_len	= LL_MAX_HEADER + t_hlen;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index aeb9497b5bb7..4f4310a36a04 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1062,6 +1062,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
 #endif
 
+		newnp->ipv6_mc_list = NULL;
 		newnp->ipv6_ac_list = NULL;
 		newnp->ipv6_fl_list = NULL;
 		newnp->pktoptions  = NULL;
@@ -1131,6 +1132,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 	   First: no IPv4 options.
 	 */
 	newinet->inet_opt = NULL;
+	newnp->ipv6_mc_list = NULL;
 	newnp->ipv6_ac_list = NULL;
 	newnp->ipv6_fl_list = NULL;
 
@@ -1917,7 +1919,7 @@ struct proto tcpv6_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp6_sock),
-	.slab_flags		= SLAB_DESTROY_BY_RCU,
+	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
 	.twsk_prot		= &tcp6_timewait_sock_ops,
 	.rsk_prot		= &tcp6_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 04862abfe4ec..75703fda23e7 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -526,7 +526,7 @@ out:
 	return;
 }
 
-int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int rc;
 
@@ -569,7 +569,7 @@ void udpv6_encap_enable(void)
 }
 EXPORT_SYMBOL(udpv6_encap_enable);
 
-int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct udp_sock *up = udp_sk(sk);
 	int is_udplite = IS_UDPLITE(sk);
@@ -879,7 +879,8 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
 	struct sock *sk;
 
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
-		if (INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+		if (sk->sk_state == TCP_ESTABLISHED &&
+		    INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
 			return sk;
 		/* Only check first socket in chain */
 		break;
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index e78bdc76dcc3..f180b3d85e31 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -26,7 +26,6 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
 int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
 int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 		  int flags, int *addr_len);
-int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 void udpv6_destroy_sock(struct sock *sk);
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index ac858c480f2f..a2267f80febb 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -29,6 +29,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 	u8 frag_hdr_sz = sizeof(struct frag_hdr);
 	__wsum csum;
 	int tnl_hlen;
+	int err;
 
 	mss = skb_shinfo(skb)->gso_size;
 	if (unlikely(skb->len <= mss))
@@ -90,7 +91,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		/* Find the unfragmentable header and shift it left by frag_hdr_sz
 		 * bytes to insert fragment header.
 		 */
-		unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+		err = ip6_find_1stfragopt(skb, &prevhdr);
+		if (err < 0)
+			return ERR_PTR(err);
+		unfrag_ip6hlen = err;
 		nexthdr = *prevhdr;
 		*prevhdr = NEXTHDR_FRAGMENT;
 		unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 08a807b29298..3ef5d913e7a3 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -43,8 +43,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 		return 1;
 #endif
 
-	ipv6_hdr(skb)->payload_len = htons(skb->len);
 	__skb_push(skb, skb->data - skb_network_header(skb));
+	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 
 	if (xo && (xo->flags & XFRM_GRO)) {
 		skb_mac_header_rebuild(skb);
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 0e015906f9ca..07d36573f50b 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -47,6 +47,8 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 	iph = ipv6_hdr(skb);
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+	if (hdr_len < 0)
+		return hdr_len;
 	skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
 	skb_set_network_header(skb, -x->props.header_len);
 	skb->transport_header = skb->network_header + hdr_len;
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 7a92c0f31912..9ad07a91708e 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -30,6 +30,8 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 	skb_set_inner_transport_header(skb, skb_transport_offset(skb));
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+	if (hdr_len < 0)
+		return hdr_len;
 	skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
 	skb_set_network_header(skb, -x->props.header_len);
 	skb->transport_header = skb->network_header + hdr_len;
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 74d09f91709e..3be852808a9d 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -65,7 +65,7 @@ static void irlan_eth_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops		= &irlan_eth_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->min_mtu		= 0;
 	dev->max_mtu		= ETH_MAX_MTU;
 
diff --git a/net/key/af_key.c b/net/key/af_key.c
index c1950bb14735..b1432b668033 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1157,6 +1157,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 			goto out;
 	}
 
+	err = -ENOBUFS;
 	key = ext_hdrs[SADB_EXT_KEY_AUTH - 1];
 	if (sa->sadb_sa_auth) {
 		int keysize = 0;
@@ -1168,8 +1169,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 		if (key)
 			keysize = (key->sadb_key_bits + 7) / 8;
 		x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL);
-		if (!x->aalg)
+		if (!x->aalg) {
+			err = -ENOMEM;
 			goto out;
+		}
 		strcpy(x->aalg->alg_name, a->name);
 		x->aalg->alg_key_len = 0;
 		if (key) {
@@ -1188,8 +1191,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 				goto out;
 			}
 			x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL);
-			if (!x->calg)
+			if (!x->calg) {
+				err = -ENOMEM;
 				goto out;
+			}
 			strcpy(x->calg->alg_name, a->name);
 			x->props.calgo = sa->sadb_sa_encrypt;
 		} else {
@@ -1203,8 +1208,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 			if (key)
 				keysize = (key->sadb_key_bits + 7) / 8;
 			x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL);
-			if (!x->ealg)
+			if (!x->ealg) {
+				err = -ENOMEM;
 				goto out;
+			}
 			strcpy(x->ealg->alg_name, a->name);
 			x->ealg->alg_key_len = 0;
 			if (key) {
@@ -1249,8 +1256,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 		struct xfrm_encap_tmpl *natt;
 
 		x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
-		if (!x->encap)
+		if (!x->encap) {
+			err = -ENOMEM;
 			goto out;
+		}
 
 		natt = x->encap;
 		n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1];
@@ -2755,6 +2764,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
 	int err, err2;
 
 	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
+	if (!err)
+		xfrm_garbage_collect(net);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
 		if (err == -ESRCH) /* empty table - old silent behavior */
@@ -3285,7 +3296,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
 		p += pol->sadb_x_policy_len*8;
 		sec_ctx = (struct sadb_x_sec_ctx *)p;
 		if (len < pol->sadb_x_policy_len*8 +
-		    sec_ctx->sadb_x_sec_len) {
+		    sec_ctx->sadb_x_sec_len*8) {
 			*dir = -EINVAL;
 			goto out;
 		}
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8b21af7321b9..4de2ec94b08c 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -114,12 +114,13 @@ static void l2tp_eth_get_stats64(struct net_device *dev,
 {
 	struct l2tp_eth *priv = netdev_priv(dev);
 
-	stats->tx_bytes   = atomic_long_read(&priv->tx_bytes);
-	stats->tx_packets = atomic_long_read(&priv->tx_packets);
-	stats->tx_dropped = atomic_long_read(&priv->tx_dropped);
-	stats->rx_bytes   = atomic_long_read(&priv->rx_bytes);
-	stats->rx_packets = atomic_long_read(&priv->rx_packets);
-	stats->rx_errors  = atomic_long_read(&priv->rx_errors);
+	stats->tx_bytes   = (unsigned long) atomic_long_read(&priv->tx_bytes);
+	stats->tx_packets = (unsigned long) atomic_long_read(&priv->tx_packets);
+	stats->tx_dropped = (unsigned long) atomic_long_read(&priv->tx_dropped);
+	stats->rx_bytes   = (unsigned long) atomic_long_read(&priv->rx_bytes);
+	stats->rx_packets = (unsigned long) atomic_long_read(&priv->rx_packets);
+	stats->rx_errors  = (unsigned long) atomic_long_read(&priv->rx_errors);
+
 }
 
 static const struct net_device_ops l2tp_eth_netdev_ops = {
@@ -141,7 +142,7 @@ static void l2tp_eth_dev_setup(struct net_device *dev)
 	dev->priv_flags		&= ~IFF_TX_SKB_SHARING;
 	dev->features		|= NETIF_F_LLTX;
 	dev->netdev_ops		= &l2tp_eth_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index cb4fff785cbf..c38d16f22d2a 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -142,7 +142,7 @@ static struct proto llc_proto = {
 	.name	  = "LLC",
 	.owner	  = THIS_MODULE,
 	.obj_size = sizeof(struct llc_sock),
-	.slab_flags = SLAB_DESTROY_BY_RCU,
+	.slab_flags = SLAB_TYPESAFE_BY_RCU,
 };
 
 /**
@@ -311,6 +311,8 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 	int rc = -EINVAL;
 
 	dprintk("%s: binding %02X\n", __func__, addr->sllc_sap);
+
+	lock_sock(sk);
 	if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr)))
 		goto out;
 	rc = -EAFNOSUPPORT;
@@ -382,6 +384,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 out_put:
 	llc_sap_put(sap);
 out:
+	release_sock(sk);
 	return rc;
 }
 
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 8bc5a1bd2d45..9b02c13d258b 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -506,7 +506,7 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap,
 again:
 	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_estab_match(sap, daddr, laddr, rc)) {
-			/* Extra checks required by SLAB_DESTROY_BY_RCU */
+			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
 				goto again;
 			if (unlikely(llc_sk(rc)->sap != sap ||
@@ -565,7 +565,7 @@ static struct sock *__llc_lookup_listener(struct llc_sap *sap,
 again:
 	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_listener_match(sap, laddr, rc)) {
-			/* Extra checks required by SLAB_DESTROY_BY_RCU */
+			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
 				goto again;
 			if (unlikely(llc_sk(rc)->sap != sap ||
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 5404d0d195cc..63b6ab056370 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -328,7 +328,7 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap,
 again:
 	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_dgram_match(sap, laddr, rc)) {
-			/* Extra checks required by SLAB_DESTROY_BY_RCU */
+			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
 				goto again;
 			if (unlikely(llc_sk(rc)->sap != sap ||
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 60e2a62f7bef..cf2392b2ac71 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -7,7 +7,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
- * Copyright(c) 2015 Intel Deutschland GmbH
+ * Copyright(c) 2015-2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -741,46 +741,43 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 	ieee80211_agg_start_txq(sta, tid, true);
 }
 
-void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
+void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
+			      struct tid_ampdu_tx *tid_tx)
 {
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
-	struct sta_info *sta;
-	struct tid_ampdu_tx *tid_tx;
 
-	trace_api_start_tx_ba_cb(sdata, ra, tid);
+	if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)))
+		return;
+
+	if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state))
+		ieee80211_agg_tx_operational(local, sta, tid);
+}
+
+static struct tid_ampdu_tx *
+ieee80211_lookup_tid_tx(struct ieee80211_sub_if_data *sdata,
+			const u8 *ra, u16 tid, struct sta_info **sta)
+{
+	struct tid_ampdu_tx *tid_tx;
 
 	if (tid >= IEEE80211_NUM_TIDS) {
 		ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n",
 		       tid, IEEE80211_NUM_TIDS);
-		return;
+		return NULL;
 	}
 
-	mutex_lock(&local->sta_mtx);
-	sta = sta_info_get_bss(sdata, ra);
-	if (!sta) {
-		mutex_unlock(&local->sta_mtx);
+	*sta = sta_info_get_bss(sdata, ra);
+	if (!*sta) {
 		ht_dbg(sdata, "Could not find station: %pM\n", ra);
-		return;
+		return NULL;
 	}
 
-	mutex_lock(&sta->ampdu_mlme.mtx);
-	tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
+	tid_tx = rcu_dereference((*sta)->ampdu_mlme.tid_tx[tid]);
 
-	if (WARN_ON(!tid_tx)) {
+	if (WARN_ON(!tid_tx))
 		ht_dbg(sdata, "addBA was not requested!\n");
-		goto unlock;
-	}
 
-	if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)))
-		goto unlock;
-
-	if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state))
-		ieee80211_agg_tx_operational(local, sta, tid);
-
- unlock:
-	mutex_unlock(&sta->ampdu_mlme.mtx);
-	mutex_unlock(&local->sta_mtx);
+	return tid_tx;
 }
 
 void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
@@ -788,19 +785,20 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
 {
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_ra_tid *ra_tid;
-	struct sk_buff *skb = dev_alloc_skb(0);
+	struct sta_info *sta;
+	struct tid_ampdu_tx *tid_tx;
 
-	if (unlikely(!skb))
-		return;
+	trace_api_start_tx_ba_cb(sdata, ra, tid);
 
-	ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
-	memcpy(&ra_tid->ra, ra, ETH_ALEN);
-	ra_tid->tid = tid;
+	rcu_read_lock();
+	tid_tx = ieee80211_lookup_tid_tx(sdata, ra, tid, &sta);
+	if (!tid_tx)
+		goto out;
 
-	skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_START;
-	skb_queue_tail(&sdata->skb_queue, skb);
-	ieee80211_queue_work(&local->hw, &sdata->work);
+	set_bit(HT_AGG_STATE_START_CB, &tid_tx->state);
+	ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+ out:
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
 
@@ -860,37 +858,18 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 }
 EXPORT_SYMBOL(ieee80211_stop_tx_ba_session);
 
-void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
+void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid,
+			     struct tid_ampdu_tx *tid_tx)
 {
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
-	struct ieee80211_local *local = sdata->local;
-	struct sta_info *sta;
-	struct tid_ampdu_tx *tid_tx;
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	bool send_delba = false;
 
-	trace_api_stop_tx_ba_cb(sdata, ra, tid);
-
-	if (tid >= IEEE80211_NUM_TIDS) {
-		ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n",
-		       tid, IEEE80211_NUM_TIDS);
-		return;
-	}
-
-	ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", ra, tid);
-
-	mutex_lock(&local->sta_mtx);
-
-	sta = sta_info_get_bss(sdata, ra);
-	if (!sta) {
-		ht_dbg(sdata, "Could not find station: %pM\n", ra);
-		goto unlock;
-	}
+	ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n",
+	       sta->sta.addr, tid);
 
-	mutex_lock(&sta->ampdu_mlme.mtx);
 	spin_lock_bh(&sta->lock);
-	tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
 
-	if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
+	if (!test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
 		ht_dbg(sdata,
 		       "unexpected callback to A-MPDU stop for %pM tid %d\n",
 		       sta->sta.addr, tid);
@@ -906,12 +885,8 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 	spin_unlock_bh(&sta->lock);
 
 	if (send_delba)
-		ieee80211_send_delba(sdata, ra, tid,
+		ieee80211_send_delba(sdata, sta->sta.addr, tid,
 			WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
-
-	mutex_unlock(&sta->ampdu_mlme.mtx);
- unlock:
-	mutex_unlock(&local->sta_mtx);
 }
 
 void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
@@ -919,19 +894,20 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
 {
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_ra_tid *ra_tid;
-	struct sk_buff *skb = dev_alloc_skb(0);
+	struct sta_info *sta;
+	struct tid_ampdu_tx *tid_tx;
 
-	if (unlikely(!skb))
-		return;
+	trace_api_stop_tx_ba_cb(sdata, ra, tid);
 
-	ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
-	memcpy(&ra_tid->ra, ra, ETH_ALEN);
-	ra_tid->tid = tid;
+	rcu_read_lock();
+	tid_tx = ieee80211_lookup_tid_tx(sdata, ra, tid, &sta);
+	if (!tid_tx)
+		goto out;
 
-	skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_STOP;
-	skb_queue_tail(&sdata->skb_queue, skb);
-	ieee80211_queue_work(&local->hw, &sdata->work);
+	set_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state);
+	ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+ out:
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe);
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 6c2e6060cd54..4a388fe8c2d1 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -902,6 +902,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 	default:
 		return -EINVAL;
 	}
+	sdata->u.ap.req_smps = sdata->smps_mode;
+
 	sdata->needed_rx_chains = sdata->local->rx_chains;
 
 	sdata->vif.bss_conf.beacon_int = params->beacon_interval;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index f4a528773563..6ca5442b1e03 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -7,6 +7,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
+ * Copyright 2017	Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -289,8 +290,6 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
 {
 	int i;
 
-	cancel_work_sync(&sta->ampdu_mlme.work);
-
 	for (i = 0; i <  IEEE80211_NUM_TIDS; i++) {
 		__ieee80211_stop_tx_ba_session(sta, i, reason);
 		__ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
@@ -298,6 +297,9 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
 					       reason != AGG_STOP_DESTROY_STA &&
 					       reason != AGG_STOP_PEER_REQUEST);
 	}
+
+	/* stopping might queue the work again - so cancel only afterwards */
+	cancel_work_sync(&sta->ampdu_mlme.work);
 }
 
 void ieee80211_ba_session_work(struct work_struct *work)
@@ -352,10 +354,16 @@ void ieee80211_ba_session_work(struct work_struct *work)
 		spin_unlock_bh(&sta->lock);
 
 		tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
-		if (tid_tx && test_and_clear_bit(HT_AGG_STATE_WANT_STOP,
-						 &tid_tx->state))
+		if (!tid_tx)
+			continue;
+
+		if (test_and_clear_bit(HT_AGG_STATE_START_CB, &tid_tx->state))
+			ieee80211_start_tx_ba_cb(sta, tid, tid_tx);
+		if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state))
 			___ieee80211_stop_tx_ba_session(sta, tid,
 							AGG_STOP_LOCAL_REQUEST);
+		if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state))
+			ieee80211_stop_tx_ba_cb(sta, tid, tid_tx);
 	}
 	mutex_unlock(&sta->ampdu_mlme.mtx);
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f8f6c148f554..5e002f62c235 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1036,8 +1036,6 @@ struct ieee80211_rx_agg {
 
 enum sdata_queue_type {
 	IEEE80211_SDATA_QUEUE_TYPE_FRAME	= 0,
-	IEEE80211_SDATA_QUEUE_AGG_START		= 1,
-	IEEE80211_SDATA_QUEUE_AGG_STOP		= 2,
 	IEEE80211_SDATA_QUEUE_RX_AGG_START	= 3,
 	IEEE80211_SDATA_QUEUE_RX_AGG_STOP	= 4,
 };
@@ -1427,12 +1425,6 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata)
 	return local->hw.wiphy->bands[band];
 }
 
-/* this struct represents 802.11n's RA/TID combination */
-struct ieee80211_ra_tid {
-	u8 ra[ETH_ALEN];
-	u16 tid;
-};
-
 /* this struct holds the value parsing from channel switch IE  */
 struct ieee80211_csa_ie {
 	struct cfg80211_chan_def chandef;
@@ -1539,7 +1531,7 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status)
 		return true;
 	/* can't handle non-legacy preamble yet */
 	if (status->flag & RX_FLAG_MACTIME_PLCP_START &&
-	    status->encoding != RX_ENC_LEGACY)
+	    status->encoding == RX_ENC_LEGACY)
 		return true;
 	return false;
 }
@@ -1794,8 +1786,10 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 				   enum ieee80211_agg_stop_reason reason);
 int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 				    enum ieee80211_agg_stop_reason reason);
-void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
-void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
+void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
+			      struct tid_ampdu_tx *tid_tx);
+void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid,
+			     struct tid_ampdu_tx *tid_tx);
 void ieee80211_ba_session_work(struct work_struct *work);
 void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid);
 void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 3bd5b81f5d81..f5f50150ba1c 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1213,7 +1213,6 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
 static void ieee80211_if_free(struct net_device *dev)
 {
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 static void ieee80211_if_setup(struct net_device *dev)
@@ -1221,7 +1220,8 @@ static void ieee80211_if_setup(struct net_device *dev)
 	ether_setup(dev);
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->netdev_ops = &ieee80211_dataif_ops;
-	dev->destructor = ieee80211_if_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ieee80211_if_free;
 }
 
 static void ieee80211_if_setup_no_queue(struct net_device *dev)
@@ -1237,7 +1237,6 @@ static void ieee80211_iface_work(struct work_struct *work)
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct sta_info *sta;
-	struct ieee80211_ra_tid *ra_tid;
 	struct ieee80211_rx_agg *rx_agg;
 
 	if (!ieee80211_sdata_running(sdata))
@@ -1253,15 +1252,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 	while ((skb = skb_dequeue(&sdata->skb_queue))) {
 		struct ieee80211_mgmt *mgmt = (void *)skb->data;
 
-		if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_START) {
-			ra_tid = (void *)&skb->cb;
-			ieee80211_start_tx_ba_cb(&sdata->vif, ra_tid->ra,
-						 ra_tid->tid);
-		} else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_STOP) {
-			ra_tid = (void *)&skb->cb;
-			ieee80211_stop_tx_ba_cb(&sdata->vif, ra_tid->ra,
-						ra_tid->tid);
-		} else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_START) {
+		if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_START) {
 			rx_agg = (void *)&skb->cb;
 			mutex_lock(&local->sta_mtx);
 			sta = sta_info_get_bss(sdata, rx_agg->addr);
@@ -1825,6 +1816,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 		ret = dev_alloc_name(ndev, ndev->name);
 		if (ret < 0) {
 			ieee80211_if_free(ndev);
+			free_netdev(ndev);
 			return ret;
 		}
 
@@ -1914,7 +1906,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 		ret = register_netdevice(ndev);
 		if (ret) {
-			ieee80211_if_free(ndev);
+			free_netdev(ndev);
 			return ret;
 		}
 	}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0ea9712bd99e..cc8e6ea1b27e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -601,7 +601,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_chanctx_conf *chanctx_conf;
 	struct ieee80211_channel *chan;
-	u32 rate_flags, rates = 0;
+	u32 rates = 0;
 
 	sdata_assert_lock(sdata);
 
@@ -612,7 +612,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		return;
 	}
 	chan = chanctx_conf->def.chan;
-	rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
 	rcu_read_unlock();
 	sband = local->hw.wiphy->bands[chan->band];
 	shift = ieee80211_vif_get_shift(&sdata->vif);
@@ -636,9 +635,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		 */
 		rates_len = 0;
 		for (i = 0; i < sband->n_bitrates; i++) {
-			if ((rate_flags & sband->bitrates[i].flags)
-			    != rate_flags)
-				continue;
 			rates |= BIT(i);
 			rates_len++;
 		}
@@ -2818,7 +2814,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
 				u32 *rates, u32 *basic_rates,
 				bool *have_higher_than_11mbit,
 				int *min_rate, int *min_rate_index,
-				int shift, u32 rate_flags)
+				int shift)
 {
 	int i, j;
 
@@ -2846,8 +2842,6 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
 			int brate;
 
 			br = &sband->bitrates[j];
-			if ((rate_flags & br->flags) != rate_flags)
-				continue;
 
 			brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5);
 			if (brate == rate) {
@@ -4398,40 +4392,32 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 			return -ENOMEM;
 	}
 
-	if (new_sta || override) {
-		err = ieee80211_prep_channel(sdata, cbss);
-		if (err) {
-			if (new_sta)
-				sta_info_free(local, new_sta);
-			return -EINVAL;
-		}
-	}
-
+	/*
+	 * Set up the information for the new channel before setting the
+	 * new channel. We can't - completely race-free - change the basic
+	 * rates bitmap and the channel (sband) that it refers to, but if
+	 * we set it up before we at least avoid calling into the driver's
+	 * bss_info_changed() method with invalid information (since we do
+	 * call that from changing the channel - only for IDLE and perhaps
+	 * some others, but ...).
+	 *
+	 * So to avoid that, just set up all the new information before the
+	 * channel, but tell the driver to apply it only afterwards, since
+	 * it might need the new channel for that.
+	 */
 	if (new_sta) {
 		u32 rates = 0, basic_rates = 0;
 		bool have_higher_than_11mbit;
 		int min_rate = INT_MAX, min_rate_index = -1;
-		struct ieee80211_chanctx_conf *chanctx_conf;
 		const struct cfg80211_bss_ies *ies;
 		int shift = ieee80211_vif_get_shift(&sdata->vif);
-		u32 rate_flags;
-
-		rcu_read_lock();
-		chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
-		if (WARN_ON(!chanctx_conf)) {
-			rcu_read_unlock();
-			sta_info_free(local, new_sta);
-			return -EINVAL;
-		}
-		rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
-		rcu_read_unlock();
 
 		ieee80211_get_rates(sband, bss->supp_rates,
 				    bss->supp_rates_len,
 				    &rates, &basic_rates,
 				    &have_higher_than_11mbit,
 				    &min_rate, &min_rate_index,
-				    shift, rate_flags);
+				    shift);
 
 		/*
 		 * This used to be a workaround for basic rates missing
@@ -4489,8 +4475,22 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 			sdata->vif.bss_conf.sync_dtim_count = 0;
 		}
 		rcu_read_unlock();
+	}
 
-		/* tell driver about BSSID, basic rates and timing */
+	if (new_sta || override) {
+		err = ieee80211_prep_channel(sdata, cbss);
+		if (err) {
+			if (new_sta)
+				sta_info_free(local, new_sta);
+			return -EINVAL;
+		}
+	}
+
+	if (new_sta) {
+		/*
+		 * tell driver about BSSID, basic rates and timing
+		 * this was set up above, before setting the channel
+		 */
 		ieee80211_bss_info_change_notify(sdata,
 			BSS_CHANGED_BSSID | BSS_CHANGED_BASIC_RATES |
 			BSS_CHANGED_BEACON_INT);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 35f4c7d7a500..3674fe3d67dc 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1613,12 +1613,16 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	 */
 	if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) &&
 	    !ieee80211_has_morefrags(hdr->frame_control) &&
+	    !ieee80211_is_back_req(hdr->frame_control) &&
 	    !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
 	    (rx->sdata->vif.type == NL80211_IFTYPE_AP ||
 	     rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
-	    /* PM bit is only checked in frames where it isn't reserved,
+	    /*
+	     * PM bit is only checked in frames where it isn't reserved,
 	     * in AP mode it's reserved in non-bufferable management frames
 	     * (cf. IEEE 802.11-2012 8.2.4.1.7 Power Management field)
+	     * BAR frames should be ignored as specified in
+	     * IEEE 802.11-2012 10.2.1.2.
 	     */
 	    (!ieee80211_is_mgmt(hdr->frame_control) ||
 	     ieee80211_is_bufferable_mmpdu(hdr->frame_control))) {
@@ -2492,7 +2496,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 		if (is_multicast_ether_addr(hdr->addr1)) {
 			mpp_addr = hdr->addr3;
 			proxied_addr = mesh_hdr->eaddr1;
-		} else if (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6) {
+		} else if ((mesh_hdr->flags & MESH_FLAGS_AE) ==
+			    MESH_FLAGS_AE_A5_A6) {
 			/* has_a4 already checked in ieee80211_rx_mesh_check */
 			mpp_addr = hdr->addr4;
 			proxied_addr = mesh_hdr->eaddr2;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 7cdf7a835bb0..403e3cc58b57 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -2155,7 +2155,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			struct ieee80211_sta_rx_stats *cpurxs;
 
 			cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
-			sinfo->rx_packets += cpurxs->dropped;
+			sinfo->rx_dropped_misc += cpurxs->dropped;
 		}
 	}
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5609cacb20d5..ea0747d6a6da 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -116,6 +116,8 @@ enum ieee80211_sta_info_flags {
 #define HT_AGG_STATE_STOPPING		3
 #define HT_AGG_STATE_WANT_START		4
 #define HT_AGG_STATE_WANT_STOP		5
+#define HT_AGG_STATE_START_CB		6
+#define HT_AGG_STATE_STOP_CB		7
 
 enum ieee80211_agg_stop_reason {
 	AGG_STOP_DECLINED,
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index c1ef22df865f..cc19614ff4e6 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -17,6 +17,7 @@
 #include <asm/unaligned.h>
 #include <net/mac80211.h>
 #include <crypto/aes.h>
+#include <crypto/algapi.h>
 
 #include "ieee80211_i.h"
 #include "michael.h"
@@ -153,7 +154,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 	data_len = skb->len - hdrlen - MICHAEL_MIC_LEN;
 	key = &rx->key->conf.key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY];
 	michael_mic(key, hdr, data, data_len, mic);
-	if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0)
+	if (crypto_memneq(mic, data + data_len, MICHAEL_MIC_LEN))
 		goto mic_fail;
 
 	/* remove Michael MIC from payload */
@@ -1048,7 +1049,7 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
 		bip_aad(skb, aad);
 		ieee80211_aes_cmac(key->u.aes_cmac.tfm, aad,
 				   skb->data + 24, skb->len - 24, mic);
-		if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+		if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
 			key->u.aes_cmac.icverrors++;
 			return RX_DROP_UNUSABLE;
 		}
@@ -1098,7 +1099,7 @@ ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx)
 		bip_aad(skb, aad);
 		ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad,
 				       skb->data + 24, skb->len - 24, mic);
-		if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+		if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
 			key->u.aes_cmac.icverrors++;
 			return RX_DROP_UNUSABLE;
 		}
@@ -1202,7 +1203,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx)
 		if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce,
 				       skb->data + 24, skb->len - 24,
 				       mic) < 0 ||
-		    memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+		    crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
 			key->u.aes_gmac.icverrors++;
 			return RX_DROP_UNUSABLE;
 		}
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 06019dba4b10..bd88a9b80773 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -526,8 +526,6 @@ static void mac802154_wpan_free(struct net_device *dev)
 	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
 
 	mac802154_llsec_destroy(&sdata->sec);
-
-	free_netdev(dev);
 }
 
 static void ieee802154_if_setup(struct net_device *dev)
@@ -593,7 +591,8 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
 					sdata->dev->dev_addr);
 
 		sdata->dev->header_ops = &mac802154_header_ops;
-		sdata->dev->destructor = mac802154_wpan_free;
+		sdata->dev->needs_free_netdev = true;
+		sdata->dev->priv_destructor = mac802154_wpan_free;
 		sdata->dev->netdev_ops = &mac802154_wpan_ops;
 		sdata->dev->ml_priv = &mac802154_mlme_wpan;
 		wpan_dev->promiscuous_mode = false;
@@ -608,7 +607,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
 
 		break;
 	case NL802154_IFTYPE_MONITOR:
-		sdata->dev->destructor = free_netdev;
+		sdata->dev->needs_free_netdev = true;
 		sdata->dev->netdev_ops = &mac802154_monitor_ops;
 		wpan_dev->promiscuous_mode = true;
 		break;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 257ec66009da..7b05fd1497ce 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1418,7 +1418,7 @@ static void mpls_ifup(struct net_device *dev, unsigned int flags)
 				continue;
 			alive++;
 			nh_flags &= ~flags;
-			WRITE_ONCE(nh->nh_flags, flags);
+			WRITE_ONCE(nh->nh_flags, nh_flags);
 		} endfor_nexthops(rt);
 
 		WRITE_ONCE(rt->rt_nhn_alive, alive);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index d2d7bdf1d510..ad99c1ceea6f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -849,10 +849,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 {
 	unsigned int verdict = NF_DROP;
 
-	if (IP_VS_FWD_METHOD(cp) != 0) {
-		pr_err("shouldn't reach here, because the box is on the "
-		       "half connection in the tun/dr module.\n");
-	}
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		goto ignore_cp;
 
 	/* Ensure the checksum is correct */
 	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
@@ -886,6 +884,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 		ip_vs_notrack(skb);
 	else
 		ip_vs_update_conntrack(skb, cp, 0);
+
+ignore_cp:
 	verdict = NF_ACCEPT;
 
 out:
@@ -1385,8 +1385,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
 	 */
 	cp = pp->conn_out_get(ipvs, af, skb, &iph);
 
-	if (likely(cp))
+	if (likely(cp)) {
+		if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+			goto ignore_cp;
 		return handle_response(af, skb, pd, cp, &iph, hooknum);
+	}
 
 	/* Check for real-server-started requests */
 	if (atomic_read(&ipvs->conn_out_counter)) {
@@ -1444,9 +1447,15 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
 			}
 		}
 	}
+
+out:
 	IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
 		      "ip_vs_out: packet continues traversal as normal");
 	return NF_ACCEPT;
+
+ignore_cp:
+	__ip_vs_conn_put(cp);
+	goto out;
 }
 
 /*
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3c8f1ed2f555..e847dbaa0c6b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -911,7 +911,7 @@ static unsigned int early_drop_list(struct net *net,
 			continue;
 
 		/* kill only if still in same netns -- might have moved due to
-		 * SLAB_DESTROY_BY_RCU rules.
+		 * SLAB_TYPESAFE_BY_RCU rules.
 		 *
 		 * We steal the timer reference.  If that fails timer has
 		 * already fired or someone else deleted it. Just drop ref
@@ -1114,7 +1114,7 @@ __nf_conntrack_alloc(struct net *net,
 
 	/*
 	 * Do not use kmem_cache_zalloc(), as this cache uses
-	 * SLAB_DESTROY_BY_RCU.
+	 * SLAB_TYPESAFE_BY_RCU.
 	 */
 	ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
 	if (ct == NULL)
@@ -1159,7 +1159,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 	struct net *net = nf_ct_net(ct);
 
 	/* A freed object has refcnt == 0, that's
-	 * the golden rule for SLAB_DESTROY_BY_RCU
+	 * the golden rule for SLAB_TYPESAFE_BY_RCU
 	 */
 	NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
 
@@ -1929,7 +1929,7 @@ int nf_conntrack_init_start(void)
 	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
 						sizeof(struct nf_conn),
 						NFCT_INFOMASK + 1,
-						SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
+						SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
 	if (!nf_conntrack_cachep)
 		goto err_cachep;
 
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 3a60efa7799b..7f6100ca63be 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -174,6 +174,10 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
 #endif
 	if (h != NULL && !try_module_get(h->me))
 		h = NULL;
+	if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) {
+		module_put(h->me);
+		h = NULL;
+	}
 
 	rcu_read_unlock();
 
@@ -181,6 +185,13 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
 
+void nf_conntrack_helper_put(struct nf_conntrack_helper *helper)
+{
+	refcount_dec(&helper->refcnt);
+	module_put(helper->me);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
+
 struct nf_conn_help *
 nf_ct_helper_ext_add(struct nf_conn *ct,
 		     struct nf_conntrack_helper *helper, gfp_t gfp)
@@ -417,6 +428,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 			}
 		}
 	}
+	refcount_set(&me->refcnt, 1);
 	hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
 	nf_ct_helper_count++;
 out:
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dcf561b5c97a..a8be9b72e6cd 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,6 +45,8 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_l4proto.h>
@@ -888,8 +890,13 @@ restart:
 	}
 out:
 	local_bh_enable();
-	if (last)
+	if (last) {
+		/* nf ct hash resize happened, now clear the leftover. */
+		if ((struct nf_conn *)cb->args[1] == last)
+			cb->args[1] = 0;
+
 		nf_ct_put(last);
+	}
 
 	while (i) {
 		i--;
@@ -1007,9 +1014,8 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
 
 static int
 ctnetlink_parse_tuple(const struct nlattr * const cda[],
-		      struct nf_conntrack_tuple *tuple,
-		      enum ctattr_type type, u_int8_t l3num,
-		      struct nf_conntrack_zone *zone)
+		      struct nf_conntrack_tuple *tuple, u32 type,
+		      u_int8_t l3num, struct nf_conntrack_zone *zone)
 {
 	struct nlattr *tb[CTA_TUPLE_MAX+1];
 	int err;
@@ -1828,6 +1834,8 @@ ctnetlink_create_conntrack(struct net *net,
 	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
 	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
 	nf_ct_labels_ext_add(ct);
+	nfct_seqadj_ext_add(ct);
+	nfct_synproxy_ext_add(ct);
 
 	/* we must add conntrack extensions before confirmation. */
 	ct->status |= IPS_CONFIRMED;
@@ -2447,7 +2455,7 @@ static struct nfnl_ct_hook ctnetlink_glue_hook = {
 
 static int ctnetlink_exp_dump_tuple(struct sk_buff *skb,
 				    const struct nf_conntrack_tuple *tuple,
-				    enum ctattr_expect type)
+				    u32 type)
 {
 	struct nlattr *nest_parms;
 
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 13875d599a85..1c5b14a6cab3 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -512,16 +512,19 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
 		      u8 pf, unsigned int hooknum)
 {
 	const struct sctphdr *sh;
-	struct sctphdr _sctph;
 	const char *logmsg;
 
-	sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
-	if (!sh) {
+	if (skb->len < dataoff + sizeof(struct sctphdr)) {
 		logmsg = "nf_ct_sctp: short packet ";
 		goto out_invalid;
 	}
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    skb->ip_summed == CHECKSUM_NONE) {
+		if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) {
+			logmsg = "nf_ct_sctp: failed to read header ";
+			goto out_invalid;
+		}
+		sh = (const struct sctphdr *)(skb->data + dataoff);
 		if (sh->checksum != sctp_compute_cksum(skb, dataoff)) {
 			logmsg = "nf_ct_sctp: bad CRC ";
 			goto out_invalid;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b48d6b5aae8a..6c72922d20ca 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -409,6 +409,10 @@ nf_nat_setup_info(struct nf_conn *ct,
 {
 	struct nf_conntrack_tuple curr_tuple, new_tuple;
 
+	/* Can't setup nat info for confirmed ct. */
+	if (nf_ct_is_confirmed(ct))
+		return NF_ACCEPT;
+
 	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
 		     maniptype == NF_NAT_MANIP_DST);
 	BUG_ON(nf_nat_initialized(ct, maniptype));
@@ -562,7 +566,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
 	 * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
 	 * will delete entry from already-freed table.
 	 */
-	ct->status &= ~IPS_NAT_DONE_MASK;
+	clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
 	rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
 			nf_nat_bysource_params);
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 559225029740..da314be0c048 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3367,35 +3367,50 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
 	return nf_tables_fill_setelem(args->skb, set, elem);
 }
 
+struct nft_set_dump_ctx {
+	const struct nft_set	*set;
+	struct nft_ctx		ctx;
+};
+
 static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct nft_set_dump_ctx *dump_ctx = cb->data;
 	struct net *net = sock_net(skb->sk);
-	u8 genmask = nft_genmask_cur(net);
+	struct nft_af_info *afi;
+	struct nft_table *table;
 	struct nft_set *set;
 	struct nft_set_dump_args args;
-	struct nft_ctx ctx;
-	struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1];
+	bool set_found = false;
 	struct nfgenmsg *nfmsg;
 	struct nlmsghdr *nlh;
 	struct nlattr *nest;
 	u32 portid, seq;
-	int event, err;
+	int event;
 
-	err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla,
-			  NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy,
-			  NULL);
-	if (err < 0)
-		return err;
+	rcu_read_lock();
+	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+		if (afi != dump_ctx->ctx.afi)
+			continue;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh,
-					 (void *)nla, genmask);
-	if (err < 0)
-		return err;
+		list_for_each_entry_rcu(table, &afi->tables, list) {
+			if (table != dump_ctx->ctx.table)
+				continue;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
-				   genmask);
-	if (IS_ERR(set))
-		return PTR_ERR(set);
+			list_for_each_entry_rcu(set, &table->sets, list) {
+				if (set == dump_ctx->set) {
+					set_found = true;
+					break;
+				}
+			}
+			break;
+		}
+		break;
+	}
+
+	if (!set_found) {
+		rcu_read_unlock();
+		return -ENOENT;
+	}
 
 	event  = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM);
 	portid = NETLINK_CB(cb->skb).portid;
@@ -3407,11 +3422,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 		goto nla_put_failure;
 
 	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = ctx.afi->family;
+	nfmsg->nfgen_family = afi->family;
 	nfmsg->version      = NFNETLINK_V0;
-	nfmsg->res_id	    = htons(ctx.net->nft.base_seq & 0xffff);
+	nfmsg->res_id	    = htons(net->nft.base_seq & 0xffff);
 
-	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
+	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name))
 		goto nla_put_failure;
 	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
 		goto nla_put_failure;
@@ -3422,12 +3437,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 
 	args.cb			= cb;
 	args.skb		= skb;
-	args.iter.genmask	= nft_genmask_cur(ctx.net);
+	args.iter.genmask	= nft_genmask_cur(net);
 	args.iter.skip		= cb->args[0];
 	args.iter.count		= 0;
 	args.iter.err		= 0;
 	args.iter.fn		= nf_tables_dump_setelem;
-	set->ops->walk(&ctx, set, &args.iter);
+	set->ops->walk(&dump_ctx->ctx, set, &args.iter);
+	rcu_read_unlock();
 
 	nla_nest_end(skb, nest);
 	nlmsg_end(skb, nlh);
@@ -3441,9 +3457,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 
 nla_put_failure:
+	rcu_read_unlock();
 	return -ENOSPC;
 }
 
+static int nf_tables_dump_set_done(struct netlink_callback *cb)
+{
+	kfree(cb->data);
+	return 0;
+}
+
 static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
 				const struct nlattr * const nla[])
@@ -3465,7 +3488,18 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = nf_tables_dump_set,
+			.done = nf_tables_dump_set_done,
 		};
+		struct nft_set_dump_ctx *dump_ctx;
+
+		dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
+		if (!dump_ctx)
+			return -ENOMEM;
+
+		dump_ctx->set = set;
+		dump_ctx->ctx = ctx;
+
+		c.data = dump_ctx;
 		return netlink_dump_start(nlsk, skb, nlh, &c);
 	}
 	return -EOPNOTSUPP;
@@ -3593,9 +3627,9 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
 {
 	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
 
-	nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
+	nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
-		nft_data_uninit(nft_set_ext_data(ext), set->dtype);
+		nft_data_release(nft_set_ext_data(ext), set->dtype);
 	if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
 		nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
@@ -3604,6 +3638,18 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
 }
 EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
 
+/* Only called from commit path, nft_set_elem_deactivate() already deals with
+ * the refcounting from the preparation phase.
+ */
+static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem)
+{
+	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+		nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
+	kfree(elem);
+}
+
 static int nft_setelem_parse_flags(const struct nft_set *set,
 				   const struct nlattr *attr, u32 *flags)
 {
@@ -3815,9 +3861,9 @@ err4:
 	kfree(elem.priv);
 err3:
 	if (nla[NFTA_SET_ELEM_DATA] != NULL)
-		nft_data_uninit(&data, d2.type);
+		nft_data_release(&data, d2.type);
 err2:
-	nft_data_uninit(&elem.key.val, d1.type);
+	nft_data_release(&elem.key.val, d1.type);
 err1:
 	return err;
 }
@@ -3862,6 +3908,53 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 	return err;
 }
 
+/**
+ *	nft_data_hold - hold a nft_data item
+ *
+ *	@data: struct nft_data to release
+ *	@type: type of data
+ *
+ *	Hold a nft_data item. NFT_DATA_VALUE types can be silently discarded,
+ *	NFT_DATA_VERDICT bumps the reference to chains in case of NFT_JUMP and
+ *	NFT_GOTO verdicts. This function must be called on active data objects
+ *	from the second phase of the commit protocol.
+ */
+static void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
+{
+	if (type == NFT_DATA_VERDICT) {
+		switch (data->verdict.code) {
+		case NFT_JUMP:
+		case NFT_GOTO:
+			data->verdict.chain->use++;
+			break;
+		}
+	}
+}
+
+static void nft_set_elem_activate(const struct net *net,
+				  const struct nft_set *set,
+				  struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		nft_data_hold(nft_set_ext_data(ext), set->dtype);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+		(*nft_set_ext_obj(ext))->use++;
+}
+
+static void nft_set_elem_deactivate(const struct net *net,
+				    const struct nft_set *set,
+				    struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		nft_data_release(nft_set_ext_data(ext), set->dtype);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+		(*nft_set_ext_obj(ext))->use--;
+}
+
 static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 			   const struct nlattr *attr)
 {
@@ -3927,6 +4020,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 	kfree(elem.priv);
 	elem.priv = priv;
 
+	nft_set_elem_deactivate(ctx->net, set, &elem);
+
 	nft_trans_elem(trans) = elem;
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
@@ -3936,7 +4031,7 @@ err4:
 err3:
 	kfree(elem.priv);
 err2:
-	nft_data_uninit(&elem.key.val, desc.type);
+	nft_data_release(&elem.key.val, desc.type);
 err1:
 	return err;
 }
@@ -4743,8 +4838,8 @@ static void nf_tables_commit_release(struct nft_trans *trans)
 		nft_set_destroy(nft_trans_set(trans));
 		break;
 	case NFT_MSG_DELSETELEM:
-		nft_set_elem_destroy(nft_trans_elem_set(trans),
-				     nft_trans_elem(trans).priv, true);
+		nf_tables_set_elem_destroy(nft_trans_elem_set(trans),
+					   nft_trans_elem(trans).priv);
 		break;
 	case NFT_MSG_DELOBJ:
 		nft_obj_destroy(nft_trans_obj(trans));
@@ -4979,6 +5074,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 		case NFT_MSG_DELSETELEM:
 			te = (struct nft_trans_elem *)trans->data;
 
+			nft_set_elem_activate(net, te->set, &te->elem);
 			te->set->ops->activate(net, te->set, &te->elem);
 			te->set->ndeact--;
 
@@ -5464,7 +5560,7 @@ int nft_data_init(const struct nft_ctx *ctx,
 EXPORT_SYMBOL_GPL(nft_data_init);
 
 /**
- *	nft_data_uninit - release a nft_data item
+ *	nft_data_release - release a nft_data item
  *
  *	@data: struct nft_data to release
  *	@type: type of data
@@ -5472,7 +5568,7 @@ EXPORT_SYMBOL_GPL(nft_data_init);
  *	Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
  *	all others need to be released by calling this function.
  */
-void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
+void nft_data_release(const struct nft_data *data, enum nft_data_types type)
 {
 	if (type < NFT_DATA_VERDICT)
 		return;
@@ -5483,7 +5579,7 @@ void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
 		WARN_ON(1);
 	}
 }
-EXPORT_SYMBOL_GPL(nft_data_uninit);
+EXPORT_SYMBOL_GPL(nft_data_release);
 
 int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
 		  enum nft_data_types type, unsigned int len)
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 950bf6eadc65..be678a323598 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -686,6 +686,7 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 		tuple_set = true;
 	}
 
+	ret = -ENOENT;
 	list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
 		cur = &nlcth->helper;
 		j++;
@@ -699,16 +700,20 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 		     tuple.dst.protonum != cur->tuple.dst.protonum))
 			continue;
 
-		found = true;
-		nf_conntrack_helper_unregister(cur);
-		kfree(cur->expect_policy);
+		if (refcount_dec_if_one(&cur->refcnt)) {
+			found = true;
+			nf_conntrack_helper_unregister(cur);
+			kfree(cur->expect_policy);
 
-		list_del(&nlcth->list);
-		kfree(nlcth);
+			list_del(&nlcth->list);
+			kfree(nlcth);
+		} else {
+			ret = -EBUSY;
+		}
 	}
 
 	/* Make sure we return success if we flush and there is no helpers */
-	return (found || j == 0) ? 0 : -ENOENT;
+	return (found || j == 0) ? 0 : ret;
 }
 
 static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = {
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 877d9acd91ef..fff8073e2a56 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -83,17 +83,26 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
 			    tb[NFTA_BITWISE_MASK]);
 	if (err < 0)
 		return err;
-	if (d1.len != priv->len)
-		return -EINVAL;
+	if (d1.len != priv->len) {
+		err = -EINVAL;
+		goto err1;
+	}
 
 	err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2,
 			    tb[NFTA_BITWISE_XOR]);
 	if (err < 0)
-		return err;
-	if (d2.len != priv->len)
-		return -EINVAL;
+		goto err1;
+	if (d2.len != priv->len) {
+		err = -EINVAL;
+		goto err2;
+	}
 
 	return 0;
+err2:
+	nft_data_release(&priv->xor, d2.type);
+err1:
+	nft_data_release(&priv->mask, d1.type);
+	return err;
 }
 
 static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 2b96effeadc1..c2945eb3397c 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -201,10 +201,18 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
 	if (err < 0)
 		return ERR_PTR(err);
 
+	if (desc.type != NFT_DATA_VALUE) {
+		err = -EINVAL;
+		goto err1;
+	}
+
 	if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ)
 		return &nft_cmp_fast_ops;
-	else
-		return &nft_cmp_ops;
+
+	return &nft_cmp_ops;
+err1:
+	nft_data_release(&data, desc.type);
+	return ERR_PTR(-EINVAL);
 }
 
 struct nft_expr_type nft_cmp_type __read_mostly = {
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index a34ceb38fc55..1678e9e75e8e 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -826,9 +826,9 @@ static void nft_ct_helper_obj_destroy(struct nft_object *obj)
 	struct nft_ct_helper_obj *priv = nft_obj_data(obj);
 
 	if (priv->helper4)
-		module_put(priv->helper4->me);
+		nf_conntrack_helper_put(priv->helper4);
 	if (priv->helper6)
-		module_put(priv->helper6->me);
+		nf_conntrack_helper_put(priv->helper6);
 }
 
 static void nft_ct_helper_obj_eval(struct nft_object *obj,
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 728baf88295a..4717d7796927 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -65,7 +65,7 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
 	return 0;
 
 err1:
-	nft_data_uninit(&priv->data, desc.type);
+	nft_data_release(&priv->data, desc.type);
 	return err;
 }
 
@@ -73,7 +73,8 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
 				  const struct nft_expr *expr)
 {
 	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
-	return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
+
+	return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg));
 }
 
 static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 9edc74eedc10..cedb96c3619f 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -102,9 +102,9 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
 	priv->len = desc_from.len;
 	return 0;
 err2:
-	nft_data_uninit(&priv->data_to, desc_to.type);
+	nft_data_release(&priv->data_to, desc_to.type);
 err1:
-	nft_data_uninit(&priv->data_from, desc_from.type);
+	nft_data_release(&priv->data_from, desc_from.type);
 	return err;
 }
 
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 8ec086b6b56b..3d3a6df4ce70 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -222,7 +222,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
 	struct nft_set_elem elem;
 	int err;
 
-	err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+	err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC);
 	iter->err = err;
 	if (err)
 		return;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index e97e2fb53f0a..fbdbaa00dd5f 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -116,17 +116,17 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 		else if (d > 0)
 			p = &parent->rb_right;
 		else {
-			if (nft_set_elem_active(&rbe->ext, genmask)) {
-				if (nft_rbtree_interval_end(rbe) &&
-				    !nft_rbtree_interval_end(new))
-					p = &parent->rb_left;
-				else if (!nft_rbtree_interval_end(rbe) &&
-					 nft_rbtree_interval_end(new))
-					p = &parent->rb_right;
-				else {
-					*ext = &rbe->ext;
-					return -EEXIST;
-				}
+			if (nft_rbtree_interval_end(rbe) &&
+			    !nft_rbtree_interval_end(new)) {
+				p = &parent->rb_left;
+			} else if (!nft_rbtree_interval_end(rbe) &&
+				   nft_rbtree_interval_end(new)) {
+				p = &parent->rb_right;
+			} else if (nft_set_elem_active(&rbe->ext, genmask)) {
+				*ext = &rbe->ext;
+				return -EEXIST;
+			} else {
+				p = &parent->rb_left;
 			}
 		}
 	}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8876b7da6884..1770c1d9b37f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -283,28 +283,30 @@ static int xt_obj_to_user(u16 __user *psize, u16 size,
 		       &U->u.user.revision, K->u.kernel.TYPE->revision)
 
 int xt_data_to_user(void __user *dst, const void *src,
-		    int usersize, int size)
+		    int usersize, int size, int aligned_size)
 {
 	usersize = usersize ? : size;
 	if (copy_to_user(dst, src, usersize))
 		return -EFAULT;
-	if (usersize != size && clear_user(dst + usersize, size - usersize))
+	if (usersize != aligned_size &&
+	    clear_user(dst + usersize, aligned_size - usersize))
 		return -EFAULT;
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_data_to_user);
 
-#define XT_DATA_TO_USER(U, K, TYPE, C_SIZE)				\
+#define XT_DATA_TO_USER(U, K, TYPE)					\
 	xt_data_to_user(U->data, K->data,				\
 			K->u.kernel.TYPE->usersize,			\
-			C_SIZE ? : K->u.kernel.TYPE->TYPE##size)
+			K->u.kernel.TYPE->TYPE##size,			\
+			XT_ALIGN(K->u.kernel.TYPE->TYPE##size))
 
 int xt_match_to_user(const struct xt_entry_match *m,
 		     struct xt_entry_match __user *u)
 {
 	return XT_OBJ_TO_USER(u, m, match, 0) ||
-	       XT_DATA_TO_USER(u, m, match, 0);
+	       XT_DATA_TO_USER(u, m, match);
 }
 EXPORT_SYMBOL_GPL(xt_match_to_user);
 
@@ -312,7 +314,7 @@ int xt_target_to_user(const struct xt_entry_target *t,
 		      struct xt_entry_target __user *u)
 {
 	return XT_OBJ_TO_USER(u, t, target, 0) ||
-	       XT_DATA_TO_USER(u, t, target, 0);
+	       XT_DATA_TO_USER(u, t, target);
 }
 EXPORT_SYMBOL_GPL(xt_target_to_user);
 
@@ -611,6 +613,12 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
 }
 EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
 
+#define COMPAT_XT_DATA_TO_USER(U, K, TYPE, C_SIZE)			\
+	xt_data_to_user(U->data, K->data,				\
+			K->u.kernel.TYPE->usersize,			\
+			C_SIZE,						\
+			COMPAT_XT_ALIGN(C_SIZE))
+
 int xt_compat_match_to_user(const struct xt_entry_match *m,
 			    void __user **dstptr, unsigned int *size)
 {
@@ -626,7 +634,7 @@ int xt_compat_match_to_user(const struct xt_entry_match *m,
 		if (match->compat_to_user((void __user *)cm->data, m->data))
 			return -EFAULT;
 	} else {
-		if (XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
+		if (COMPAT_XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
 			return -EFAULT;
 	}
 
@@ -972,7 +980,7 @@ int xt_compat_target_to_user(const struct xt_entry_target *t,
 		if (target->compat_to_user((void __user *)ct->data, t->data))
 			return -EFAULT;
 	} else {
-		if (XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
+		if (COMPAT_XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
 			return -EFAULT;
 	}
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index bb7ad82dcd56..623ef37de886 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -96,7 +96,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
 
 	help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL);
 	if (help == NULL) {
-		module_put(helper->me);
+		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
 	}
 
@@ -263,7 +263,7 @@ out:
 err4:
 	help = nfct_help(ct);
 	if (help)
-		module_put(help->helper->me);
+		nf_conntrack_helper_put(help->helper);
 err3:
 	nf_ct_tmpl_free(ct);
 err2:
@@ -346,7 +346,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
 	if (ct) {
 		help = nfct_help(ct);
 		if (help)
-			module_put(help->helper->me);
+			nf_conntrack_helper_put(help->helper);
 
 		nf_ct_netns_put(par->net, par->family);
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index ee841f00a6ec..7586d446d7dc 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -62,6 +62,7 @@
 #include <asm/cacheflush.h>
 #include <linux/hash.h>
 #include <linux/genetlink.h>
+#include <linux/net_namespace.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -1415,7 +1416,8 @@ static void do_one_broadcast(struct sock *sk,
 		goto out;
 	}
 	NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net);
-	NETLINK_CB(p->skb2).nsid_is_set = true;
+	if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED)
+		NETLINK_CB(p->skb2).nsid_is_set = true;
 	val = netlink_broadcast_deliver(sk, p->skb2);
 	if (val < 0) {
 		netlink_overrun(sk);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index bf602e33c40a..08679ebb3068 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1123,7 +1123,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
 
 	help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
 	if (!help) {
-		module_put(helper->me);
+		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
 	}
 
@@ -1584,7 +1584,7 @@ void ovs_ct_free_action(const struct nlattr *a)
 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
 {
 	if (ct_info->helper)
-		module_put(ct_info->helper->me);
+		nf_conntrack_helper_put(ct_info->helper);
 	if (ct_info->ct)
 		nf_ct_tmpl_free(ct_info->ct);
 }
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 89193a634da4..04a3128adcf0 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -94,7 +94,6 @@ static void internal_dev_destructor(struct net_device *dev)
 	struct vport *vport = ovs_internal_dev_get_vport(dev);
 
 	ovs_vport_free(vport);
-	free_netdev(dev);
 }
 
 static void
@@ -156,7 +155,8 @@ static void do_setup(struct net_device *netdev)
 	netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
 			      IFF_PHONY_HEADROOM | IFF_NO_QUEUE;
-	netdev->destructor = internal_dev_destructor;
+	netdev->needs_free_netdev = true;
+	netdev->priv_destructor = internal_dev_destructor;
 	netdev->ethtool_ops = &internal_dev_ethtool_ops;
 	netdev->rtnl_link_ops = &internal_dev_link_ops;
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f4001763134d..e3eeed19cc7a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2658,13 +2658,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
 	}
 
-	sockc.tsflags = po->sk.sk_tsflags;
-	if (msg->msg_controllen) {
-		err = sock_cmsg_send(&po->sk, msg, &sockc);
-		if (unlikely(err))
-			goto out;
-	}
-
 	err = -ENXIO;
 	if (unlikely(dev == NULL))
 		goto out;
@@ -2672,6 +2665,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_put;
 
+	sockc.tsflags = po->sk.sk_tsflags;
+	if (msg->msg_controllen) {
+		err = sock_cmsg_send(&po->sk, msg, &sockc);
+		if (unlikely(err))
+			goto out_put;
+	}
+
 	if (po->sk.sk_socket->type == SOCK_RAW)
 		reserve = dev->hard_header_len;
 	size_max = po->tx_ring.frame_size
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index 21c28b51be94..2c9337946e30 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -236,7 +236,7 @@ static void gprs_setup(struct net_device *dev)
 	dev->tx_queue_len	= 10;
 
 	dev->netdev_ops		= &gprs_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 /*
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 0a4e28477ad9..54369225766e 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -217,7 +217,7 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ,
 				       unsigned int *_toklen)
 {
 	const __be32 *xdr = *_xdr;
-	unsigned int toklen = *_toklen, n_parts, loop, tmp;
+	unsigned int toklen = *_toklen, n_parts, loop, tmp, paddedlen;
 
 	/* there must be at least one name, and at least #names+1 length
 	 * words */
@@ -247,16 +247,16 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ,
 		toklen -= 4;
 		if (tmp <= 0 || tmp > AFSTOKEN_STRING_MAX)
 			return -EINVAL;
-		if (tmp > toklen)
+		paddedlen = (tmp + 3) & ~3;
+		if (paddedlen > toklen)
 			return -EINVAL;
 		princ->name_parts[loop] = kmalloc(tmp + 1, GFP_KERNEL);
 		if (!princ->name_parts[loop])
 			return -ENOMEM;
 		memcpy(princ->name_parts[loop], xdr, tmp);
 		princ->name_parts[loop][tmp] = 0;
-		tmp = (tmp + 3) & ~3;
-		toklen -= tmp;
-		xdr += tmp >> 2;
+		toklen -= paddedlen;
+		xdr += paddedlen >> 2;
 	}
 
 	if (toklen < 4)
@@ -265,16 +265,16 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ,
 	toklen -= 4;
 	if (tmp <= 0 || tmp > AFSTOKEN_K5_REALM_MAX)
 		return -EINVAL;
-	if (tmp > toklen)
+	paddedlen = (tmp + 3) & ~3;
+	if (paddedlen > toklen)
 		return -EINVAL;
 	princ->realm = kmalloc(tmp + 1, GFP_KERNEL);
 	if (!princ->realm)
 		return -ENOMEM;
 	memcpy(princ->realm, xdr, tmp);
 	princ->realm[tmp] = 0;
-	tmp = (tmp + 3) & ~3;
-	toklen -= tmp;
-	xdr += tmp >> 2;
+	toklen -= paddedlen;
+	xdr += paddedlen >> 2;
 
 	_debug("%s/...@%s", princ->name_parts[0], princ->realm);
 
@@ -293,7 +293,7 @@ static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td,
 					 unsigned int *_toklen)
 {
 	const __be32 *xdr = *_xdr;
-	unsigned int toklen = *_toklen, len;
+	unsigned int toklen = *_toklen, len, paddedlen;
 
 	/* there must be at least one tag and one length word */
 	if (toklen <= 8)
@@ -307,15 +307,17 @@ static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td,
 	toklen -= 8;
 	if (len > max_data_size)
 		return -EINVAL;
+	paddedlen = (len + 3) & ~3;
+	if (paddedlen > toklen)
+		return -EINVAL;
 	td->data_len = len;
 
 	if (len > 0) {
 		td->data = kmemdup(xdr, len, GFP_KERNEL);
 		if (!td->data)
 			return -ENOMEM;
-		len = (len + 3) & ~3;
-		toklen -= len;
-		xdr += len >> 2;
+		toklen -= paddedlen;
+		xdr += paddedlen >> 2;
 	}
 
 	_debug("tag %x len %x", td->tag, td->data_len);
@@ -387,7 +389,7 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
 				    const __be32 **_xdr, unsigned int *_toklen)
 {
 	const __be32 *xdr = *_xdr;
-	unsigned int toklen = *_toklen, len;
+	unsigned int toklen = *_toklen, len, paddedlen;
 
 	/* there must be at least one length word */
 	if (toklen <= 4)
@@ -399,6 +401,9 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
 	toklen -= 4;
 	if (len > AFSTOKEN_K5_TIX_MAX)
 		return -EINVAL;
+	paddedlen = (len + 3) & ~3;
+	if (paddedlen > toklen)
+		return -EINVAL;
 	*_tktlen = len;
 
 	_debug("ticket len %u", len);
@@ -407,9 +412,8 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
 		*_ticket = kmemdup(xdr, len, GFP_KERNEL);
 		if (!*_ticket)
 			return -ENOMEM;
-		len = (len + 3) & ~3;
-		toklen -= len;
-		xdr += len >> 2;
+		toklen -= paddedlen;
+		xdr += paddedlen >> 2;
 	}
 
 	*_xdr = xdr;
@@ -552,7 +556,7 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep)
 {
 	const __be32 *xdr = prep->data, *token;
 	const char *cp;
-	unsigned int len, tmp, loop, ntoken, toklen, sec_ix;
+	unsigned int len, paddedlen, loop, ntoken, toklen, sec_ix;
 	size_t datalen = prep->datalen;
 	int ret;
 
@@ -578,22 +582,21 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep)
 	if (len < 1 || len > AFSTOKEN_CELL_MAX)
 		goto not_xdr;
 	datalen -= 4;
-	tmp = (len + 3) & ~3;
-	if (tmp > datalen)
+	paddedlen = (len + 3) & ~3;
+	if (paddedlen > datalen)
 		goto not_xdr;
 
 	cp = (const char *) xdr;
 	for (loop = 0; loop < len; loop++)
 		if (!isprint(cp[loop]))
 			goto not_xdr;
-	if (len < tmp)
-		for (; loop < tmp; loop++)
-			if (cp[loop])
-				goto not_xdr;
+	for (; loop < paddedlen; loop++)
+		if (cp[loop])
+			goto not_xdr;
 	_debug("cellname: [%u/%u] '%*.*s'",
-	       len, tmp, len, len, (const char *) xdr);
-	datalen -= tmp;
-	xdr += tmp >> 2;
+	       len, paddedlen, len, len, (const char *) xdr);
+	datalen -= paddedlen;
+	xdr += paddedlen >> 2;
 
 	/* get the token count */
 	if (datalen < 12)
@@ -614,10 +617,11 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep)
 		sec_ix = ntohl(*xdr);
 		datalen -= 4;
 		_debug("token: [%x/%zx] %x", toklen, datalen, sec_ix);
-		if (toklen < 20 || toklen > datalen)
+		paddedlen = (toklen + 3) & ~3;
+		if (toklen < 20 || toklen > datalen || paddedlen > datalen)
 			goto not_xdr;
-		datalen -= (toklen + 3) & ~3;
-		xdr += (toklen + 3) >> 2;
+		datalen -= paddedlen;
+		xdr += paddedlen >> 2;
 
 	} while (--loop > 0);
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 164b5ac094be..7dc5892671c8 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -94,8 +94,10 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla,
 		k++;
 	}
 
-	if (n)
+	if (n) {
+		err = -EINVAL;
 		goto err_out;
+	}
 
 	return keys_ex;
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index f42008b29311..b062bc80c7cb 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -132,21 +132,21 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 		}
 	}
 
-	spin_lock_bh(&police->tcf_lock);
 	if (est) {
 		err = gen_replace_estimator(&police->tcf_bstats, NULL,
 					    &police->tcf_rate_est,
 					    &police->tcf_lock,
 					    NULL, est);
 		if (err)
-			goto failure_unlock;
+			goto failure;
 	} else if (tb[TCA_POLICE_AVRATE] &&
 		   (ret == ACT_P_CREATED ||
 		    !gen_estimator_active(&police->tcf_rate_est))) {
 		err = -EINVAL;
-		goto failure_unlock;
+		goto failure;
 	}
 
+	spin_lock_bh(&police->tcf_lock);
 	/* No failure allowed after this point */
 	police->tcfp_mtu = parm->mtu;
 	if (police->tcfp_mtu == 0) {
@@ -192,8 +192,6 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 
 	return ret;
 
-failure_unlock:
-	spin_unlock_bh(&police->tcf_lock);
 failure:
 	qdisc_put_rtab(P_tab);
 	qdisc_put_rtab(R_tab);
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index dee469fed967..51859b8edd7e 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -203,7 +203,6 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 
 	*arg = (unsigned long) head;
 	rcu_assign_pointer(tp->root, new);
-	call_rcu(&head->rcu, mall_destroy_rcu);
 	return 0;
 
 err_replace_hw_filter:
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index bbe57d57b67f..cfdbfa18a95e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1019,7 +1019,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
 		return sch;
 	}
 	/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
-	ops->destroy(sch);
+	if (ops->destroy)
+		ops->destroy(sch);
 err_out3:
 	dev_put(dev);
 	kfree((char *) sch - sch->padded);
@@ -1831,6 +1832,12 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
 	if (!qdisc_dev(root))
 		return 0;
 
+	if (tcm->tcm_parent) {
+		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
+		if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
+			return -1;
+		return 0;
+	}
 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
 			return -1;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index a9708da28eb5..95238284c422 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1176,7 +1176,9 @@ void sctp_assoc_update(struct sctp_association *asoc,
 
 		asoc->ctsn_ack_point = asoc->next_tsn - 1;
 		asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
-		if (!asoc->stream) {
+
+		if (sctp_state(asoc, COOKIE_WAIT)) {
+			sctp_stream_free(asoc->stream);
 			asoc->stream = new->stream;
 			new->stream = NULL;
 		}
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 8c589230794f..3dcd0ecf3d99 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -275,6 +275,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
 		if (sctp_sk(sk)->bind_hash)
 			sctp_put_port(sk);
 
+		sctp_sk(sk)->ep = NULL;
 		sock_put(sk);
 	}
 
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 0e06a278d2a9..ba9ad32fc447 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -473,15 +473,14 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
 			     struct sctp_association **app,
 			     struct sctp_transport **tpp)
 {
+	struct sctp_init_chunk *chunkhdr, _chunkhdr;
 	union sctp_addr saddr;
 	union sctp_addr daddr;
 	struct sctp_af *af;
 	struct sock *sk = NULL;
 	struct sctp_association *asoc;
 	struct sctp_transport *transport = NULL;
-	struct sctp_init_chunk *chunkhdr;
 	__u32 vtag = ntohl(sctphdr->vtag);
-	int len = skb->len - ((void *)sctphdr - (void *)skb->data);
 
 	*app = NULL; *tpp = NULL;
 
@@ -516,13 +515,16 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
 	 * discard the packet.
 	 */
 	if (vtag == 0) {
-		chunkhdr = (void *)sctphdr + sizeof(struct sctphdr);
-		if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t)
-			  + sizeof(__be32) ||
+		/* chunk header + first 4 octects of init header */
+		chunkhdr = skb_header_pointer(skb, skb_transport_offset(skb) +
+					      sizeof(struct sctphdr),
+					      sizeof(struct sctp_chunkhdr) +
+					      sizeof(__be32), &_chunkhdr);
+		if (!chunkhdr ||
 		    chunkhdr->chunk_hdr.type != SCTP_CID_INIT ||
-		    ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) {
+		    ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag)
 			goto out;
-		}
+
 	} else if (vtag != asoc->c.peer_vtag) {
 		goto out;
 	}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 961ee59f696a..f5b45b8b8b16 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -240,12 +240,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	struct sctp_bind_addr *bp;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sctp_sockaddr_entry *laddr;
-	union sctp_addr *baddr = NULL;
 	union sctp_addr *daddr = &t->ipaddr;
 	union sctp_addr dst_saddr;
 	struct in6_addr *final_p, final;
 	__u8 matchlen = 0;
-	__u8 bmatchlen;
 	sctp_scope_t scope;
 
 	memset(fl6, 0, sizeof(struct flowi6));
@@ -312,23 +310,37 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	 */
 	rcu_read_lock();
 	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
-		if (!laddr->valid)
+		struct dst_entry *bdst;
+		__u8 bmatchlen;
+
+		if (!laddr->valid ||
+		    laddr->state != SCTP_ADDR_SRC ||
+		    laddr->a.sa.sa_family != AF_INET6 ||
+		    scope > sctp_scope(&laddr->a))
 			continue;
-		if ((laddr->state == SCTP_ADDR_SRC) &&
-		    (laddr->a.sa.sa_family == AF_INET6) &&
-		    (scope <= sctp_scope(&laddr->a))) {
-			bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
-			if (!baddr || (matchlen < bmatchlen)) {
-				baddr = &laddr->a;
-				matchlen = bmatchlen;
-			}
-		}
-	}
-	if (baddr) {
-		fl6->saddr = baddr->v6.sin6_addr;
-		fl6->fl6_sport = baddr->v6.sin6_port;
+
+		fl6->saddr = laddr->a.v6.sin6_addr;
+		fl6->fl6_sport = laddr->a.v6.sin6_port;
 		final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
-		dst = ip6_dst_lookup_flow(sk, fl6, final_p);
+		bdst = ip6_dst_lookup_flow(sk, fl6, final_p);
+
+		if (!IS_ERR(bdst) &&
+		    ipv6_chk_addr(dev_net(bdst->dev),
+				  &laddr->a.v6.sin6_addr, bdst->dev, 1)) {
+			if (!IS_ERR_OR_NULL(dst))
+				dst_release(dst);
+			dst = bdst;
+			break;
+		}
+
+		bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
+		if (matchlen > bmatchlen)
+			continue;
+
+		if (!IS_ERR_OR_NULL(dst))
+			dst_release(dst);
+		dst = bdst;
+		matchlen = bmatchlen;
 	}
 	rcu_read_unlock();
 
@@ -665,6 +677,9 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
 	newnp = inet6_sk(newsk);
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+	newnp->ipv6_mc_list = NULL;
+	newnp->ipv6_ac_list = NULL;
+	newnp->ipv6_fl_list = NULL;
 
 	rcu_read_lock();
 	opt = rcu_dereference(np->opt);
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index 048954eee984..9a647214a91e 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -278,7 +278,6 @@ out:
 
 static int sctp_sock_dump(struct sock *sk, void *p)
 {
-	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_comm_param *commp = p;
 	struct sk_buff *skb = commp->skb;
 	struct netlink_callback *cb = commp->cb;
@@ -287,7 +286,9 @@ static int sctp_sock_dump(struct sock *sk, void *p)
 	int err = 0;
 
 	lock_sock(sk);
-	list_for_each_entry(assoc, &ep->asocs, asocs) {
+	if (!sctp_sk(sk)->ep)
+		goto release;
+	list_for_each_entry(assoc, &sctp_sk(sk)->ep->asocs, asocs) {
 		if (cb->args[4] < cb->args[1])
 			goto next;
 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 8a08f13469c4..92e332e17391 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2454,16 +2454,11 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 	 * stream sequence number shall be set to 0.
 	 */
 
-	/* Allocate storage for the negotiated streams if it is not a temporary
-	 * association.
-	 */
-	if (!asoc->temp) {
-		if (sctp_stream_init(asoc, gfp))
-			goto clean_up;
+	if (sctp_stream_init(asoc, gfp))
+		goto clean_up;
 
-		if (sctp_assoc_set_id(asoc, gfp))
-			goto clean_up;
-	}
+	if (!asoc->temp && sctp_assoc_set_id(asoc, gfp))
+		goto clean_up;
 
 	/* ADDIP Section 4.1 ASCONF Chunk Procedures
 	 *
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 4f5e6cfc7f60..f863b5573e42 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2088,6 +2088,9 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
 		}
 	}
 
+	/* Set temp so that it won't be added into hashtable */
+	new_asoc->temp = 1;
+
 	/* Compare the tie_tag in cookie with the verification tag of
 	 * current association.
 	 */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f16c8d97b7f3..3a8318e518f1 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4622,13 +4622,13 @@ int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *),
 
 	for (head = sctp_ep_hashtable; hash < sctp_ep_hashsize;
 	     hash++, head++) {
-		read_lock(&head->lock);
+		read_lock_bh(&head->lock);
 		sctp_for_each_hentry(epb, &head->chain) {
 			err = cb(sctp_ep(epb), p);
 			if (err)
 				break;
 		}
-		read_unlock(&head->lock);
+		read_unlock_bh(&head->lock);
 	}
 
 	return err;
@@ -4666,9 +4666,8 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
 	if (err)
 		return err;
 
-	sctp_transport_get_idx(net, &hti, pos);
-	obj = sctp_transport_get_next(net, &hti);
-	for (; obj && !IS_ERR(obj); obj = sctp_transport_get_next(net, &hti)) {
+	obj = sctp_transport_get_idx(net, &hti, pos + 1);
+	for (; !IS_ERR_OR_NULL(obj); obj = sctp_transport_get_next(net, &hti)) {
 		struct sctp_transport *transport = obj;
 
 		if (!sctp_transport_hold(transport))
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index c717ef0896aa..33954852f3f8 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -8,6 +8,10 @@ config SMC
 	  The Linux implementation of the SMC-R solution is designed as
 	  a separate socket family SMC.
 
+	  Warning: SMC will expose all memory for remote reads and writes
+	  once a connection is established.  Don't enable this option except
+	  for tightly controlled lab environment.
+
 	  Select this option if you want to run SMC socket applications
 
 config SMC_DIAG
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5b6ee21368a6..6793d7348cc8 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -101,7 +101,7 @@ struct proto smc_proto = {
 	.unhash		= smc_unhash_sk,
 	.obj_size	= sizeof(struct smc_sock),
 	.h.smc_hash	= &smc_v4_hashinfo,
-	.slab_flags	= SLAB_DESTROY_BY_RCU,
+	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
 };
 EXPORT_SYMBOL_GPL(smc_proto);
 
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index e41f594a1e1d..03ec058d18df 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -204,7 +204,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
 	memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
 	hton24(cclc.qpn, link->roce_qp->qp_num);
 	cclc.rmb_rkey =
-		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+		htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
 	cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
 	cclc.rmbe_alert_token = htonl(conn->alert_token_local);
 	cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
@@ -256,7 +256,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
 	memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
 	hton24(aclc.qpn, link->roce_qp->qp_num);
 	aclc.rmb_rkey =
-		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+		htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
 	aclc.conn_idx = 1;			/* as long as 1 RMB = 1 RMBE */
 	aclc.rmbe_alert_token = htonl(conn->alert_token_local);
 	aclc.qp_mtu = link->path_mtu;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 65020e93ff21..3ac09a629ea1 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -613,19 +613,8 @@ int smc_rmb_create(struct smc_sock *smc)
 			rmb_desc = NULL;
 			continue; /* if mapping failed, try smaller one */
 		}
-		rc = smc_ib_get_memory_region(lgr->lnk[SMC_SINGLE_LINK].roce_pd,
-					      IB_ACCESS_REMOTE_WRITE |
-					      IB_ACCESS_LOCAL_WRITE,
-					     &rmb_desc->mr_rx[SMC_SINGLE_LINK]);
-		if (rc) {
-			smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-					 tmp_bufsize, rmb_desc,
-					 DMA_FROM_DEVICE);
-			kfree(rmb_desc->cpu_addr);
-			kfree(rmb_desc);
-			rmb_desc = NULL;
-			continue;
-		}
+		rmb_desc->rkey[SMC_SINGLE_LINK] =
+			lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey;
 		rmb_desc->used = 1;
 		write_lock_bh(&lgr->rmbs_lock);
 		list_add(&rmb_desc->list,
@@ -668,6 +657,7 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn,
 
 	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
 		if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
+		    (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
 		    test_bit(i, lgr->rtokens_used_mask)) {
 			conn->rtoken_idx = i;
 			return 0;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 27eb38056a27..b013cb43a327 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -93,7 +93,7 @@ struct smc_buf_desc {
 	u64			dma_addr[SMC_LINKS_PER_LGR_MAX];
 						/* mapped address of buffer */
 	void			*cpu_addr;	/* virtual address of buffer */
-	struct ib_mr		*mr_rx[SMC_LINKS_PER_LGR_MAX];
+	u32			rkey[SMC_LINKS_PER_LGR_MAX];
 						/* for rmb only:
 						 * rkey provided to peer
 						 */
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index cb69ab977cd7..b31715505a35 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -37,24 +37,6 @@ u8 local_systemid[SMC_SYSTEMID_LEN] = SMC_LOCAL_SYSTEMID_RESET;	/* unique system
 								 * identifier
 								 */
 
-int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
-			     struct ib_mr **mr)
-{
-	int rc;
-
-	if (*mr)
-		return 0; /* already done */
-
-	/* obtain unique key -
-	 * next invocation of get_dma_mr returns a different key!
-	 */
-	*mr = pd->device->get_dma_mr(pd, access_flags);
-	rc = PTR_ERR_OR_ZERO(*mr);
-	if (IS_ERR(*mr))
-		*mr = NULL;
-	return rc;
-}
-
 static int smc_ib_modify_qp_init(struct smc_link *lnk)
 {
 	struct ib_qp_attr qp_attr;
@@ -210,7 +192,8 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
 {
 	int rc;
 
-	lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0);
+	lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev,
+				   IB_PD_UNSAFE_GLOBAL_RKEY);
 	rc = PTR_ERR_OR_ZERO(lnk->roce_pd);
 	if (IS_ERR(lnk->roce_pd))
 		lnk->roce_pd = NULL;
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 7e1f0e24d177..b567152a526d 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -61,8 +61,6 @@ void smc_ib_dealloc_protection_domain(struct smc_link *lnk);
 int smc_ib_create_protection_domain(struct smc_link *lnk);
 void smc_ib_destroy_queue_pair(struct smc_link *lnk);
 int smc_ib_create_queue_pair(struct smc_link *lnk);
-int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
-			     struct ib_mr **mr);
 int smc_ib_ready_link(struct smc_link *lnk);
 int smc_ib_modify_qp_rts(struct smc_link *lnk);
 int smc_ib_modify_qp_reset(struct smc_link *lnk);
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 04ce2c0b660e..ac09ca803296 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -52,6 +52,7 @@ config SUNRPC_XPRT_RDMA
 	tristate "RPC-over-RDMA transport"
 	depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
 	default SUNRPC && INFINIBAND
+	select SG_POOL
 	help
 	  This option allows the NFS client and server to use RDMA
 	  transports (InfiniBand, iWARP, or RoCE).
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 52da3ce54bb5..b5cb921775a0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1042,8 +1042,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
 	struct rpc_task *task;
 
 	task = rpc_new_task(task_setup_data);
-	if (IS_ERR(task))
-		goto out;
 
 	rpc_task_set_client(task, task_setup_data->rpc_client);
 	rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
@@ -1053,7 +1051,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
 
 	atomic_inc(&task->tk_count);
 	rpc_execute(task);
-out:
 	return task;
 }
 EXPORT_SYMBOL_GPL(rpc_run_task);
@@ -1140,10 +1137,6 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 	 * Create an rpc_task to send the data
 	 */
 	task = rpc_new_task(&task_setup_data);
-	if (IS_ERR(task)) {
-		xprt_free_bc_request(req);
-		goto out;
-	}
 	task->tk_rqstp = req;
 
 	/*
@@ -1158,7 +1151,6 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 	WARN_ON_ONCE(atomic_read(&task->tk_count) != 2);
 	rpc_execute(task);
 
-out:
 	dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
 	return task;
 }
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5db68b371db2..0cc83839c13c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -965,11 +965,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
 
 	if (task == NULL) {
 		task = rpc_alloc_task();
-		if (task == NULL) {
-			rpc_release_calldata(setup_data->callback_ops,
-					setup_data->callback_data);
-			return ERR_PTR(-ENOMEM);
-		}
 		flags = RPC_TASK_DYNAMIC;
 	}
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a08aeb56b8e4..bc0f5a0ecbdc 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -702,59 +702,32 @@ found_pool:
 	return task;
 }
 
-/*
- * Create or destroy enough new threads to make the number
- * of threads the given number.  If `pool' is non-NULL, applies
- * only to threads in that pool, otherwise round-robins between
- * all pools.  Caller must ensure that mutual exclusion between this and
- * server startup or shutdown.
- *
- * Destroying threads relies on the service threads filling in
- * rqstp->rq_task, which only the nfs ones do.  Assumes the serv
- * has been created using svc_create_pooled().
- *
- * Based on code that used to be in nfsd_svc() but tweaked
- * to be pool-aware.
- */
-int
-svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+/* create new threads */
+static int
+svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 {
 	struct svc_rqst	*rqstp;
 	struct task_struct *task;
 	struct svc_pool *chosen_pool;
-	int error = 0;
 	unsigned int state = serv->sv_nrthreads-1;
 	int node;
 
-	if (pool == NULL) {
-		/* The -1 assumes caller has done a svc_get() */
-		nrservs -= (serv->sv_nrthreads-1);
-	} else {
-		spin_lock_bh(&pool->sp_lock);
-		nrservs -= pool->sp_nrthreads;
-		spin_unlock_bh(&pool->sp_lock);
-	}
-
-	/* create new threads */
-	while (nrservs > 0) {
+	do {
 		nrservs--;
 		chosen_pool = choose_pool(serv, pool, &state);
 
 		node = svc_pool_map_get_node(chosen_pool->sp_id);
 		rqstp = svc_prepare_thread(serv, chosen_pool, node);
-		if (IS_ERR(rqstp)) {
-			error = PTR_ERR(rqstp);
-			break;
-		}
+		if (IS_ERR(rqstp))
+			return PTR_ERR(rqstp);
 
 		__module_get(serv->sv_ops->svo_module);
 		task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp,
 					      node, "%s", serv->sv_name);
 		if (IS_ERR(task)) {
-			error = PTR_ERR(task);
 			module_put(serv->sv_ops->svo_module);
 			svc_exit_thread(rqstp);
-			break;
+			return PTR_ERR(task);
 		}
 
 		rqstp->rq_task = task;
@@ -763,18 +736,103 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 
 		svc_sock_update_bufs(serv);
 		wake_up_process(task);
-	}
+	} while (nrservs > 0);
+
+	return 0;
+}
+
+
+/* destroy old threads */
+static int
+svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+	struct task_struct *task;
+	unsigned int state = serv->sv_nrthreads-1;
+
 	/* destroy old threads */
-	while (nrservs < 0 &&
-	       (task = choose_victim(serv, pool, &state)) != NULL) {
+	do {
+		task = choose_victim(serv, pool, &state);
+		if (task == NULL)
+			break;
 		send_sig(SIGINT, task, 1);
 		nrservs++;
+	} while (nrservs < 0);
+
+	return 0;
+}
+
+/*
+ * Create or destroy enough new threads to make the number
+ * of threads the given number.  If `pool' is non-NULL, applies
+ * only to threads in that pool, otherwise round-robins between
+ * all pools.  Caller must ensure that mutual exclusion between this and
+ * server startup or shutdown.
+ *
+ * Destroying threads relies on the service threads filling in
+ * rqstp->rq_task, which only the nfs ones do.  Assumes the serv
+ * has been created using svc_create_pooled().
+ *
+ * Based on code that used to be in nfsd_svc() but tweaked
+ * to be pool-aware.
+ */
+int
+svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+	if (pool == NULL) {
+		/* The -1 assumes caller has done a svc_get() */
+		nrservs -= (serv->sv_nrthreads-1);
+	} else {
+		spin_lock_bh(&pool->sp_lock);
+		nrservs -= pool->sp_nrthreads;
+		spin_unlock_bh(&pool->sp_lock);
 	}
 
-	return error;
+	if (nrservs > 0)
+		return svc_start_kthreads(serv, pool, nrservs);
+	if (nrservs < 0)
+		return svc_signal_kthreads(serv, pool, nrservs);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(svc_set_num_threads);
 
+/* destroy old threads */
+static int
+svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+	struct task_struct *task;
+	unsigned int state = serv->sv_nrthreads-1;
+
+	/* destroy old threads */
+	do {
+		task = choose_victim(serv, pool, &state);
+		if (task == NULL)
+			break;
+		kthread_stop(task);
+		nrservs++;
+	} while (nrservs < 0);
+	return 0;
+}
+
+int
+svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+	if (pool == NULL) {
+		/* The -1 assumes caller has done a svc_get() */
+		nrservs -= (serv->sv_nrthreads-1);
+	} else {
+		spin_lock_bh(&pool->sp_lock);
+		nrservs -= pool->sp_nrthreads;
+		spin_unlock_bh(&pool->sp_lock);
+	}
+
+	if (nrservs > 0)
+		return svc_start_kthreads(serv, pool, nrservs);
+	if (nrservs < 0)
+		return svc_stop_kthreads(serv, pool, nrservs);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(svc_set_num_threads_sync);
+
 /*
  * Called from a server thread as it's exiting. Caller must hold the "service
  * mutex" for the service.
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 1f7082144e01..e34f4ee7f2b6 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -807,7 +807,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 EXPORT_SYMBOL_GPL(xdr_init_decode);
 
 /**
- * xdr_init_decode - Initialize an xdr_stream for decoding data.
+ * xdr_init_decode_pages - Initialize an xdr_stream for decoding into pages
  * @xdr: pointer to xdr_stream struct
  * @buf: pointer to XDR buffer from which to decode data
  * @pages: list of pages to decode into
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index b530a2852ba8..3e63c5e97ebe 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -651,6 +651,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
 	xprt_wake_pending_tasks(xprt, -EAGAIN);
 	spin_unlock_bh(&xprt->transport_lock);
 }
+EXPORT_SYMBOL_GPL(xprt_force_disconnect);
 
 /**
  * xprt_conditional_disconnect - force a transport to disconnect
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index ef19fa42c50f..c1ae8142ab73 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -4,5 +4,5 @@ rpcrdma-y := transport.o rpc_rdma.o verbs.o \
 	fmr_ops.o frwr_ops.o \
 	svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
 	svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
-	module.o
+	svc_rdma_rw.o module.o
 rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 24fedd4b117e..03f6b5840764 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -119,11 +119,9 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
 
 	for (i = 0; i < (reqs << 1); i++) {
 		rqst = kzalloc(sizeof(*rqst), GFP_KERNEL);
-		if (!rqst) {
-			pr_err("RPC:       %s: Failed to create bc rpc_rqst\n",
-			       __func__);
+		if (!rqst)
 			goto out_free;
-		}
+
 		dprintk("RPC:       %s: new rqst %p\n", __func__, rqst);
 
 		rqst->rq_xprt = &r_xprt->rx_xprt;
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a044be2d6ad7..694e9b13ecf0 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -494,7 +494,7 @@ rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 	}
 	sge->length = len;
 
-	ib_dma_sync_single_for_device(ia->ri_device, sge->addr,
+	ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
 				      sge->length, DMA_TO_DEVICE);
 	req->rl_send_wr.num_sge++;
 	return true;
@@ -523,7 +523,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 	sge[sge_no].addr = rdmab_addr(rb);
 	sge[sge_no].length = xdr->head[0].iov_len;
 	sge[sge_no].lkey = rdmab_lkey(rb);
-	ib_dma_sync_single_for_device(device, sge[sge_no].addr,
+	ib_dma_sync_single_for_device(rdmab_device(rb), sge[sge_no].addr,
 				      sge[sge_no].length, DMA_TO_DEVICE);
 
 	/* If there is a Read chunk, the page list is being handled
@@ -781,9 +781,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	return 0;
 
 out_err:
-	pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
-	       PTR_ERR(iptr));
-	r_xprt->rx_stats.failed_marshal_count++;
+	if (PTR_ERR(iptr) != -ENOBUFS) {
+		pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
+		       PTR_ERR(iptr));
+		r_xprt->rx_stats.failed_marshal_count++;
+	}
 	return PTR_ERR(iptr);
 }
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index c846ca9f1eba..a4a8f6989ee7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -58,9 +58,9 @@ unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
 unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
 static unsigned int min_max_requests = 4;
 static unsigned int max_max_requests = 16384;
-unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
-static unsigned int min_max_inline = 4096;
-static unsigned int max_max_inline = 65536;
+unsigned int svcrdma_max_req_size = RPCRDMA_DEF_INLINE_THRESH;
+static unsigned int min_max_inline = RPCRDMA_DEF_INLINE_THRESH;
+static unsigned int max_max_inline = RPCRDMA_MAX_INLINE_THRESH;
 
 atomic_t rdma_stat_recv;
 atomic_t rdma_stat_read;
@@ -247,8 +247,6 @@ int svc_rdma_init(void)
 	dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
 	dprintk("\tsvcrdma_ord      : %d\n", svcrdma_ord);
 	dprintk("\tmax_requests     : %u\n", svcrdma_max_requests);
-	dprintk("\tsq_depth         : %u\n",
-		svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
 	dprintk("\tmax_bc_requests  : %u\n", svcrdma_max_bc_requests);
 	dprintk("\tmax_inline       : %d\n", svcrdma_max_req_size);
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index ff1df40f0d26..c676ed0efb5a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -12,7 +12,17 @@
 
 #undef SVCRDMA_BACKCHANNEL_DEBUG
 
-int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
+/**
+ * svc_rdma_handle_bc_reply - Process incoming backchannel reply
+ * @xprt: controlling backchannel transport
+ * @rdma_resp: pointer to incoming transport header
+ * @rcvbuf: XDR buffer into which to decode the reply
+ *
+ * Returns:
+ *	%0 if @rcvbuf is filled in, xprt_complete_rqst called,
+ *	%-EAGAIN if server should call ->recvfrom again.
+ */
+int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
 			     struct xdr_buf *rcvbuf)
 {
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
@@ -27,13 +37,13 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
 
 	p = (__be32 *)src->iov_base;
 	len = src->iov_len;
-	xid = rmsgp->rm_xid;
+	xid = *rdma_resp;
 
 #ifdef SVCRDMA_BACKCHANNEL_DEBUG
 	pr_info("%s: xid=%08x, length=%zu\n",
 		__func__, be32_to_cpu(xid), len);
 	pr_info("%s: RPC/RDMA: %*ph\n",
-		__func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp);
+		__func__, (int)RPCRDMA_HDRLEN_MIN, rdma_resp);
 	pr_info("%s:      RPC: %*ph\n",
 		__func__, (int)len, p);
 #endif
@@ -53,7 +63,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
 		goto out_unlock;
 	memcpy(dst->iov_base, p, len);
 
-	credits = be32_to_cpu(rmsgp->rm_credit);
+	credits = be32_to_cpup(rdma_resp + 2);
 	if (credits == 0)
 		credits = 1;	/* don't deadlock */
 	else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
@@ -90,9 +100,9 @@ out_notfound:
  * Caller holds the connection's mutex and has already marshaled
  * the RPC/RDMA request.
  *
- * This is similar to svc_rdma_reply, but takes an rpc_rqst
- * instead, does not support chunks, and avoids blocking memory
- * allocation.
+ * This is similar to svc_rdma_send_reply_msg, but takes a struct
+ * rpc_rqst instead, does not support chunks, and avoids blocking
+ * memory allocation.
  *
  * XXX: There is still an opportunity to block in svc_rdma_send()
  * if there are no SQ entries to post the Send. This may occur if
@@ -101,59 +111,36 @@ out_notfound:
 static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
 			      struct rpc_rqst *rqst)
 {
-	struct xdr_buf *sndbuf = &rqst->rq_snd_buf;
 	struct svc_rdma_op_ctxt *ctxt;
-	struct svc_rdma_req_map *vec;
-	struct ib_send_wr send_wr;
 	int ret;
 
-	vec = svc_rdma_get_req_map(rdma);
-	ret = svc_rdma_map_xdr(rdma, sndbuf, vec, false);
-	if (ret)
+	ctxt = svc_rdma_get_context(rdma);
+
+	/* rpcrdma_bc_send_request builds the transport header and
+	 * the backchannel RPC message in the same buffer. Thus only
+	 * one SGE is needed to send both.
+	 */
+	ret = svc_rdma_map_reply_hdr(rdma, ctxt, rqst->rq_buffer,
+				     rqst->rq_snd_buf.len);
+	if (ret < 0)
 		goto out_err;
 
 	ret = svc_rdma_repost_recv(rdma, GFP_NOIO);
 	if (ret)
 		goto out_err;
 
-	ctxt = svc_rdma_get_context(rdma);
-	ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
-	ctxt->count = 1;
-
-	ctxt->direction = DMA_TO_DEVICE;
-	ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
-	ctxt->sge[0].length = sndbuf->len;
-	ctxt->sge[0].addr =
-	    ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0,
-			    sndbuf->len, DMA_TO_DEVICE);
-	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) {
-		ret = -EIO;
-		goto out_unmap;
-	}
-	svc_rdma_count_mappings(rdma, ctxt);
-
-	memset(&send_wr, 0, sizeof(send_wr));
-	ctxt->cqe.done = svc_rdma_wc_send;
-	send_wr.wr_cqe = &ctxt->cqe;
-	send_wr.sg_list = ctxt->sge;
-	send_wr.num_sge = 1;
-	send_wr.opcode = IB_WR_SEND;
-	send_wr.send_flags = IB_SEND_SIGNALED;
-
-	ret = svc_rdma_send(rdma, &send_wr);
-	if (ret) {
-		ret = -EIO;
+	ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
+	if (ret)
 		goto out_unmap;
-	}
 
 out_err:
-	svc_rdma_put_req_map(rdma, vec);
 	dprintk("svcrdma: %s returns %d\n", __func__, ret);
 	return ret;
 
 out_unmap:
 	svc_rdma_unmap_dma(ctxt);
 	svc_rdma_put_context(ctxt, 1);
+	ret = -EIO;
 	goto out_err;
 }
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index 1c4aabf0f657..bdcf7d85a3dc 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -166,92 +166,3 @@ out_inval:
 	dprintk("svcrdma: failed to parse transport header\n");
 	return -EINVAL;
 }
-
-int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
-			      struct rpcrdma_msg *rmsgp,
-			      enum rpcrdma_errcode err, __be32 *va)
-{
-	__be32 *startp = va;
-
-	*va++ = rmsgp->rm_xid;
-	*va++ = rmsgp->rm_vers;
-	*va++ = xprt->sc_fc_credits;
-	*va++ = rdma_error;
-	*va++ = cpu_to_be32(err);
-	if (err == ERR_VERS) {
-		*va++ = rpcrdma_version;
-		*va++ = rpcrdma_version;
-	}
-
-	return (int)((unsigned long)va - (unsigned long)startp);
-}
-
-/**
- * svc_rdma_xdr_get_reply_hdr_length - Get length of Reply transport header
- * @rdma_resp: buffer containing Reply transport header
- *
- * Returns length of transport header, in bytes.
- */
-unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp)
-{
-	unsigned int nsegs;
-	__be32 *p;
-
-	p = rdma_resp;
-
-	/* RPC-over-RDMA V1 replies never have a Read list. */
-	p += rpcrdma_fixed_maxsz + 1;
-
-	/* Skip Write list. */
-	while (*p++ != xdr_zero) {
-		nsegs = be32_to_cpup(p++);
-		p += nsegs * rpcrdma_segment_maxsz;
-	}
-
-	/* Skip Reply chunk. */
-	if (*p++ != xdr_zero) {
-		nsegs = be32_to_cpup(p++);
-		p += nsegs * rpcrdma_segment_maxsz;
-	}
-
-	return (unsigned long)p - (unsigned long)rdma_resp;
-}
-
-void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
-{
-	struct rpcrdma_write_array *ary;
-
-	/* no read-list */
-	rmsgp->rm_body.rm_chunks[0] = xdr_zero;
-
-	/* write-array discrim */
-	ary = (struct rpcrdma_write_array *)
-		&rmsgp->rm_body.rm_chunks[1];
-	ary->wc_discrim = xdr_one;
-	ary->wc_nchunks = cpu_to_be32(chunks);
-
-	/* write-list terminator */
-	ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
-
-	/* reply-array discriminator */
-	ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
-}
-
-void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
-				 int chunks)
-{
-	ary->wc_discrim = xdr_one;
-	ary->wc_nchunks = cpu_to_be32(chunks);
-}
-
-void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
-				     int chunk_no,
-				     __be32 rs_handle,
-				     __be64 rs_offset,
-				     u32 write_len)
-{
-	struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
-	seg->rs_handle = rs_handle;
-	seg->rs_offset = rs_offset;
-	seg->rs_length = cpu_to_be32(write_len);
-}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index f7b2daf72a86..27a99bf5b1a6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -558,33 +558,85 @@ static void rdma_read_complete(struct svc_rqst *rqstp,
 	rqstp->rq_arg.buflen = head->arg.buflen;
 }
 
+static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
+				__be32 *rdma_argp, int status)
+{
+	struct svc_rdma_op_ctxt *ctxt;
+	__be32 *p, *err_msgp;
+	unsigned int length;
+	struct page *page;
+	int ret;
+
+	ret = svc_rdma_repost_recv(xprt, GFP_KERNEL);
+	if (ret)
+		return;
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page)
+		return;
+	err_msgp = page_address(page);
+
+	p = err_msgp;
+	*p++ = *rdma_argp;
+	*p++ = *(rdma_argp + 1);
+	*p++ = xprt->sc_fc_credits;
+	*p++ = rdma_error;
+	if (status == -EPROTONOSUPPORT) {
+		*p++ = err_vers;
+		*p++ = rpcrdma_version;
+		*p++ = rpcrdma_version;
+	} else {
+		*p++ = err_chunk;
+	}
+	length = (unsigned long)p - (unsigned long)err_msgp;
+
+	/* Map transport header; no RPC message payload */
+	ctxt = svc_rdma_get_context(xprt);
+	ret = svc_rdma_map_reply_hdr(xprt, ctxt, err_msgp, length);
+	if (ret) {
+		dprintk("svcrdma: Error %d mapping send for protocol error\n",
+			ret);
+		return;
+	}
+
+	ret = svc_rdma_post_send_wr(xprt, ctxt, 1, 0);
+	if (ret) {
+		dprintk("svcrdma: Error %d posting send for protocol error\n",
+			ret);
+		svc_rdma_unmap_dma(ctxt);
+		svc_rdma_put_context(ctxt, 1);
+	}
+}
+
 /* By convention, backchannel calls arrive via rdma_msg type
  * messages, and never populate the chunk lists. This makes
  * the RPC/RDMA header small and fixed in size, so it is
  * straightforward to check the RPC header's direction field.
  */
-static bool
-svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp)
+static bool svc_rdma_is_backchannel_reply(struct svc_xprt *xprt,
+					  __be32 *rdma_resp)
 {
-	__be32 *p = (__be32 *)rmsgp;
+	__be32 *p;
 
 	if (!xprt->xpt_bc_xprt)
 		return false;
 
-	if (rmsgp->rm_type != rdma_msg)
+	p = rdma_resp + 3;
+	if (*p++ != rdma_msg)
 		return false;
-	if (rmsgp->rm_body.rm_chunks[0] != xdr_zero)
+
+	if (*p++ != xdr_zero)
 		return false;
-	if (rmsgp->rm_body.rm_chunks[1] != xdr_zero)
+	if (*p++ != xdr_zero)
 		return false;
-	if (rmsgp->rm_body.rm_chunks[2] != xdr_zero)
+	if (*p++ != xdr_zero)
 		return false;
 
-	/* sanity */
-	if (p[7] != rmsgp->rm_xid)
+	/* XID sanity */
+	if (*p++ != *rdma_resp)
 		return false;
 	/* call direction */
-	if (p[8] == cpu_to_be32(RPC_CALL))
+	if (*p == cpu_to_be32(RPC_CALL))
 		return false;
 
 	return true;
@@ -650,8 +702,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		goto out_drop;
 	rqstp->rq_xprt_hlen = ret;
 
-	if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
-		ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
+	if (svc_rdma_is_backchannel_reply(xprt, &rmsgp->rm_xid)) {
+		ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt,
+					       &rmsgp->rm_xid,
 					       &rqstp->rq_arg);
 		svc_rdma_put_context(ctxt, 0);
 		if (ret)
@@ -686,7 +739,7 @@ complete:
 	return ret;
 
 out_err:
-	svc_rdma_send_error(rdma_xprt, rmsgp, ret);
+	svc_rdma_send_error(rdma_xprt, &rmsgp->rm_xid, ret);
 	svc_rdma_put_context(ctxt, 0);
 	return 0;
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
new file mode 100644
index 000000000000..0cf620277693
--- /dev/null
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -0,0 +1,512 @@
+/*
+ * Copyright (c) 2016 Oracle.  All rights reserved.
+ *
+ * Use the core R/W API to move RPC-over-RDMA Read and Write chunks.
+ */
+
+#include <linux/sunrpc/rpc_rdma.h>
+#include <linux/sunrpc/svc_rdma.h>
+#include <linux/sunrpc/debug.h>
+
+#include <rdma/rw.h>
+
+#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
+
+/* Each R/W context contains state for one chain of RDMA Read or
+ * Write Work Requests.
+ *
+ * Each WR chain handles a single contiguous server-side buffer,
+ * because scatterlist entries after the first have to start on
+ * page alignment. xdr_buf iovecs cannot guarantee alignment.
+ *
+ * Each WR chain handles only one R_key. Each RPC-over-RDMA segment
+ * from a client may contain a unique R_key, so each WR chain moves
+ * up to one segment at a time.
+ *
+ * The scatterlist makes this data structure over 4KB in size. To
+ * make it less likely to fail, and to handle the allocation for
+ * smaller I/O requests without disabling bottom-halves, these
+ * contexts are created on demand, but cached and reused until the
+ * controlling svcxprt_rdma is destroyed.
+ */
+struct svc_rdma_rw_ctxt {
+	struct list_head	rw_list;
+	struct rdma_rw_ctx	rw_ctx;
+	int			rw_nents;
+	struct sg_table		rw_sg_table;
+	struct scatterlist	rw_first_sgl[0];
+};
+
+static inline struct svc_rdma_rw_ctxt *
+svc_rdma_next_ctxt(struct list_head *list)
+{
+	return list_first_entry_or_null(list, struct svc_rdma_rw_ctxt,
+					rw_list);
+}
+
+static struct svc_rdma_rw_ctxt *
+svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
+{
+	struct svc_rdma_rw_ctxt *ctxt;
+
+	spin_lock(&rdma->sc_rw_ctxt_lock);
+
+	ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts);
+	if (ctxt) {
+		list_del(&ctxt->rw_list);
+		spin_unlock(&rdma->sc_rw_ctxt_lock);
+	} else {
+		spin_unlock(&rdma->sc_rw_ctxt_lock);
+		ctxt = kmalloc(sizeof(*ctxt) +
+			       SG_CHUNK_SIZE * sizeof(struct scatterlist),
+			       GFP_KERNEL);
+		if (!ctxt)
+			goto out;
+		INIT_LIST_HEAD(&ctxt->rw_list);
+	}
+
+	ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl;
+	if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges,
+				   ctxt->rw_sg_table.sgl)) {
+		kfree(ctxt);
+		ctxt = NULL;
+	}
+out:
+	return ctxt;
+}
+
+static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
+				 struct svc_rdma_rw_ctxt *ctxt)
+{
+	sg_free_table_chained(&ctxt->rw_sg_table, true);
+
+	spin_lock(&rdma->sc_rw_ctxt_lock);
+	list_add(&ctxt->rw_list, &rdma->sc_rw_ctxts);
+	spin_unlock(&rdma->sc_rw_ctxt_lock);
+}
+
+/**
+ * svc_rdma_destroy_rw_ctxts - Free accumulated R/W contexts
+ * @rdma: transport about to be destroyed
+ *
+ */
+void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma)
+{
+	struct svc_rdma_rw_ctxt *ctxt;
+
+	while ((ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts)) != NULL) {
+		list_del(&ctxt->rw_list);
+		kfree(ctxt);
+	}
+}
+
+/* A chunk context tracks all I/O for moving one Read or Write
+ * chunk. This is a a set of rdma_rw's that handle data movement
+ * for all segments of one chunk.
+ *
+ * These are small, acquired with a single allocator call, and
+ * no more than one is needed per chunk. They are allocated on
+ * demand, and not cached.
+ */
+struct svc_rdma_chunk_ctxt {
+	struct ib_cqe		cc_cqe;
+	struct svcxprt_rdma	*cc_rdma;
+	struct list_head	cc_rwctxts;
+	int			cc_sqecount;
+	enum dma_data_direction cc_dir;
+};
+
+static void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
+			     struct svc_rdma_chunk_ctxt *cc,
+			     enum dma_data_direction dir)
+{
+	cc->cc_rdma = rdma;
+	svc_xprt_get(&rdma->sc_xprt);
+
+	INIT_LIST_HEAD(&cc->cc_rwctxts);
+	cc->cc_sqecount = 0;
+	cc->cc_dir = dir;
+}
+
+static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc)
+{
+	struct svcxprt_rdma *rdma = cc->cc_rdma;
+	struct svc_rdma_rw_ctxt *ctxt;
+
+	while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) {
+		list_del(&ctxt->rw_list);
+
+		rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
+				    rdma->sc_port_num, ctxt->rw_sg_table.sgl,
+				    ctxt->rw_nents, cc->cc_dir);
+		svc_rdma_put_rw_ctxt(rdma, ctxt);
+	}
+	svc_xprt_put(&rdma->sc_xprt);
+}
+
+/* State for sending a Write or Reply chunk.
+ *  - Tracks progress of writing one chunk over all its segments
+ *  - Stores arguments for the SGL constructor functions
+ */
+struct svc_rdma_write_info {
+	/* write state of this chunk */
+	unsigned int		wi_seg_off;
+	unsigned int		wi_seg_no;
+	unsigned int		wi_nsegs;
+	__be32			*wi_segs;
+
+	/* SGL constructor arguments */
+	struct xdr_buf		*wi_xdr;
+	unsigned char		*wi_base;
+	unsigned int		wi_next_off;
+
+	struct svc_rdma_chunk_ctxt	wi_cc;
+};
+
+static struct svc_rdma_write_info *
+svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, __be32 *chunk)
+{
+	struct svc_rdma_write_info *info;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return info;
+
+	info->wi_seg_off = 0;
+	info->wi_seg_no = 0;
+	info->wi_nsegs = be32_to_cpup(++chunk);
+	info->wi_segs = ++chunk;
+	svc_rdma_cc_init(rdma, &info->wi_cc, DMA_TO_DEVICE);
+	return info;
+}
+
+static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
+{
+	svc_rdma_cc_release(&info->wi_cc);
+	kfree(info);
+}
+
+/**
+ * svc_rdma_write_done - Write chunk completion
+ * @cq: controlling Completion Queue
+ * @wc: Work Completion
+ *
+ * Pages under I/O are freed by a subsequent Send completion.
+ */
+static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct svc_rdma_chunk_ctxt *cc =
+			container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
+	struct svcxprt_rdma *rdma = cc->cc_rdma;
+	struct svc_rdma_write_info *info =
+			container_of(cc, struct svc_rdma_write_info, wi_cc);
+
+	atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
+	wake_up(&rdma->sc_send_wait);
+
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			pr_err("svcrdma: write ctx: %s (%u/0x%x)\n",
+			       ib_wc_status_msg(wc->status),
+			       wc->status, wc->vendor_err);
+	}
+
+	svc_rdma_write_info_free(info);
+}
+
+/* This function sleeps when the transport's Send Queue is congested.
+ *
+ * Assumptions:
+ * - If ib_post_send() succeeds, only one completion is expected,
+ *   even if one or more WRs are flushed. This is true when posting
+ *   an rdma_rw_ctx or when posting a single signaled WR.
+ */
+static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
+{
+	struct svcxprt_rdma *rdma = cc->cc_rdma;
+	struct svc_xprt *xprt = &rdma->sc_xprt;
+	struct ib_send_wr *first_wr, *bad_wr;
+	struct list_head *tmp;
+	struct ib_cqe *cqe;
+	int ret;
+
+	first_wr = NULL;
+	cqe = &cc->cc_cqe;
+	list_for_each(tmp, &cc->cc_rwctxts) {
+		struct svc_rdma_rw_ctxt *ctxt;
+
+		ctxt = list_entry(tmp, struct svc_rdma_rw_ctxt, rw_list);
+		first_wr = rdma_rw_ctx_wrs(&ctxt->rw_ctx, rdma->sc_qp,
+					   rdma->sc_port_num, cqe, first_wr);
+		cqe = NULL;
+	}
+
+	do {
+		if (atomic_sub_return(cc->cc_sqecount,
+				      &rdma->sc_sq_avail) > 0) {
+			ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+			if (ret)
+				break;
+			return 0;
+		}
+
+		atomic_inc(&rdma_stat_sq_starve);
+		atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
+		wait_event(rdma->sc_send_wait,
+			   atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount);
+	} while (1);
+
+	pr_err("svcrdma: ib_post_send failed (%d)\n", ret);
+	set_bit(XPT_CLOSE, &xprt->xpt_flags);
+
+	/* If even one was posted, there will be a completion. */
+	if (bad_wr != first_wr)
+		return 0;
+
+	atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
+	wake_up(&rdma->sc_send_wait);
+	return -ENOTCONN;
+}
+
+/* Build and DMA-map an SGL that covers one kvec in an xdr_buf
+ */
+static void svc_rdma_vec_to_sg(struct svc_rdma_write_info *info,
+			       unsigned int len,
+			       struct svc_rdma_rw_ctxt *ctxt)
+{
+	struct scatterlist *sg = ctxt->rw_sg_table.sgl;
+
+	sg_set_buf(&sg[0], info->wi_base, len);
+	info->wi_base += len;
+
+	ctxt->rw_nents = 1;
+}
+
+/* Build and DMA-map an SGL that covers part of an xdr_buf's pagelist.
+ */
+static void svc_rdma_pagelist_to_sg(struct svc_rdma_write_info *info,
+				    unsigned int remaining,
+				    struct svc_rdma_rw_ctxt *ctxt)
+{
+	unsigned int sge_no, sge_bytes, page_off, page_no;
+	struct xdr_buf *xdr = info->wi_xdr;
+	struct scatterlist *sg;
+	struct page **page;
+
+	page_off = (info->wi_next_off + xdr->page_base) & ~PAGE_MASK;
+	page_no = (info->wi_next_off + xdr->page_base) >> PAGE_SHIFT;
+	page = xdr->pages + page_no;
+	info->wi_next_off += remaining;
+	sg = ctxt->rw_sg_table.sgl;
+	sge_no = 0;
+	do {
+		sge_bytes = min_t(unsigned int, remaining,
+				  PAGE_SIZE - page_off);
+		sg_set_page(sg, *page, sge_bytes, page_off);
+
+		remaining -= sge_bytes;
+		sg = sg_next(sg);
+		page_off = 0;
+		sge_no++;
+		page++;
+	} while (remaining);
+
+	ctxt->rw_nents = sge_no;
+}
+
+/* Construct RDMA Write WRs to send a portion of an xdr_buf containing
+ * an RPC Reply.
+ */
+static int
+svc_rdma_build_writes(struct svc_rdma_write_info *info,
+		      void (*constructor)(struct svc_rdma_write_info *info,
+					  unsigned int len,
+					  struct svc_rdma_rw_ctxt *ctxt),
+		      unsigned int remaining)
+{
+	struct svc_rdma_chunk_ctxt *cc = &info->wi_cc;
+	struct svcxprt_rdma *rdma = cc->cc_rdma;
+	struct svc_rdma_rw_ctxt *ctxt;
+	__be32 *seg;
+	int ret;
+
+	cc->cc_cqe.done = svc_rdma_write_done;
+	seg = info->wi_segs + info->wi_seg_no * rpcrdma_segment_maxsz;
+	do {
+		unsigned int write_len;
+		u32 seg_length, seg_handle;
+		u64 seg_offset;
+
+		if (info->wi_seg_no >= info->wi_nsegs)
+			goto out_overflow;
+
+		seg_handle = be32_to_cpup(seg);
+		seg_length = be32_to_cpup(seg + 1);
+		xdr_decode_hyper(seg + 2, &seg_offset);
+		seg_offset += info->wi_seg_off;
+
+		write_len = min(remaining, seg_length - info->wi_seg_off);
+		ctxt = svc_rdma_get_rw_ctxt(rdma,
+					    (write_len >> PAGE_SHIFT) + 2);
+		if (!ctxt)
+			goto out_noctx;
+
+		constructor(info, write_len, ctxt);
+		ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp,
+				       rdma->sc_port_num, ctxt->rw_sg_table.sgl,
+				       ctxt->rw_nents, 0, seg_offset,
+				       seg_handle, DMA_TO_DEVICE);
+		if (ret < 0)
+			goto out_initerr;
+
+		list_add(&ctxt->rw_list, &cc->cc_rwctxts);
+		cc->cc_sqecount += ret;
+		if (write_len == seg_length - info->wi_seg_off) {
+			seg += 4;
+			info->wi_seg_no++;
+			info->wi_seg_off = 0;
+		} else {
+			info->wi_seg_off += write_len;
+		}
+		remaining -= write_len;
+	} while (remaining);
+
+	return 0;
+
+out_overflow:
+	dprintk("svcrdma: inadequate space in Write chunk (%u)\n",
+		info->wi_nsegs);
+	return -E2BIG;
+
+out_noctx:
+	dprintk("svcrdma: no R/W ctxs available\n");
+	return -ENOMEM;
+
+out_initerr:
+	svc_rdma_put_rw_ctxt(rdma, ctxt);
+	pr_err("svcrdma: failed to map pagelist (%d)\n", ret);
+	return -EIO;
+}
+
+/* Send one of an xdr_buf's kvecs by itself. To send a Reply
+ * chunk, the whole RPC Reply is written back to the client.
+ * This function writes either the head or tail of the xdr_buf
+ * containing the Reply.
+ */
+static int svc_rdma_send_xdr_kvec(struct svc_rdma_write_info *info,
+				  struct kvec *vec)
+{
+	info->wi_base = vec->iov_base;
+	return svc_rdma_build_writes(info, svc_rdma_vec_to_sg,
+				     vec->iov_len);
+}
+
+/* Send an xdr_buf's page list by itself. A Write chunk is
+ * just the page list. a Reply chunk is the head, page list,
+ * and tail. This function is shared between the two types
+ * of chunk.
+ */
+static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info,
+				      struct xdr_buf *xdr)
+{
+	info->wi_xdr = xdr;
+	info->wi_next_off = 0;
+	return svc_rdma_build_writes(info, svc_rdma_pagelist_to_sg,
+				     xdr->page_len);
+}
+
+/**
+ * svc_rdma_send_write_chunk - Write all segments in a Write chunk
+ * @rdma: controlling RDMA transport
+ * @wr_ch: Write chunk provided by client
+ * @xdr: xdr_buf containing the data payload
+ *
+ * Returns a non-negative number of bytes the chunk consumed, or
+ *	%-E2BIG if the payload was larger than the Write chunk,
+ *	%-ENOMEM if rdma_rw context pool was exhausted,
+ *	%-ENOTCONN if posting failed (connection is lost),
+ *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
+ */
+int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch,
+			      struct xdr_buf *xdr)
+{
+	struct svc_rdma_write_info *info;
+	int ret;
+
+	if (!xdr->page_len)
+		return 0;
+
+	info = svc_rdma_write_info_alloc(rdma, wr_ch);
+	if (!info)
+		return -ENOMEM;
+
+	ret = svc_rdma_send_xdr_pagelist(info, xdr);
+	if (ret < 0)
+		goto out_err;
+
+	ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
+	if (ret < 0)
+		goto out_err;
+	return xdr->page_len;
+
+out_err:
+	svc_rdma_write_info_free(info);
+	return ret;
+}
+
+/**
+ * svc_rdma_send_reply_chunk - Write all segments in the Reply chunk
+ * @rdma: controlling RDMA transport
+ * @rp_ch: Reply chunk provided by client
+ * @writelist: true if client provided a Write list
+ * @xdr: xdr_buf containing an RPC Reply
+ *
+ * Returns a non-negative number of bytes the chunk consumed, or
+ *	%-E2BIG if the payload was larger than the Reply chunk,
+ *	%-ENOMEM if rdma_rw context pool was exhausted,
+ *	%-ENOTCONN if posting failed (connection is lost),
+ *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
+ */
+int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch,
+			      bool writelist, struct xdr_buf *xdr)
+{
+	struct svc_rdma_write_info *info;
+	int consumed, ret;
+
+	info = svc_rdma_write_info_alloc(rdma, rp_ch);
+	if (!info)
+		return -ENOMEM;
+
+	ret = svc_rdma_send_xdr_kvec(info, &xdr->head[0]);
+	if (ret < 0)
+		goto out_err;
+	consumed = xdr->head[0].iov_len;
+
+	/* Send the page list in the Reply chunk only if the
+	 * client did not provide Write chunks.
+	 */
+	if (!writelist && xdr->page_len) {
+		ret = svc_rdma_send_xdr_pagelist(info, xdr);
+		if (ret < 0)
+			goto out_err;
+		consumed += xdr->page_len;
+	}
+
+	if (xdr->tail[0].iov_len) {
+		ret = svc_rdma_send_xdr_kvec(info, &xdr->tail[0]);
+		if (ret < 0)
+			goto out_err;
+		consumed += xdr->tail[0].iov_len;
+	}
+
+	ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
+	if (ret < 0)
+		goto out_err;
+	return consumed;
+
+out_err:
+	svc_rdma_write_info_free(info);
+	return ret;
+}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 515221b16d09..1736337f3a55 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2016 Oracle. All rights reserved.
  * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  *
@@ -40,6 +41,63 @@
  * Author: Tom Tucker <tom@opengridcomputing.com>
  */
 
+/* Operation
+ *
+ * The main entry point is svc_rdma_sendto. This is called by the
+ * RPC server when an RPC Reply is ready to be transmitted to a client.
+ *
+ * The passed-in svc_rqst contains a struct xdr_buf which holds an
+ * XDR-encoded RPC Reply message. sendto must construct the RPC-over-RDMA
+ * transport header, post all Write WRs needed for this Reply, then post
+ * a Send WR conveying the transport header and the RPC message itself to
+ * the client.
+ *
+ * svc_rdma_sendto must fully transmit the Reply before returning, as
+ * the svc_rqst will be recycled as soon as sendto returns. Remaining
+ * resources referred to by the svc_rqst are also recycled at that time.
+ * Therefore any resources that must remain longer must be detached
+ * from the svc_rqst and released later.
+ *
+ * Page Management
+ *
+ * The I/O that performs Reply transmission is asynchronous, and may
+ * complete well after sendto returns. Thus pages under I/O must be
+ * removed from the svc_rqst before sendto returns.
+ *
+ * The logic here depends on Send Queue and completion ordering. Since
+ * the Send WR is always posted last, it will always complete last. Thus
+ * when it completes, it is guaranteed that all previous Write WRs have
+ * also completed.
+ *
+ * Write WRs are constructed and posted. Each Write segment gets its own
+ * svc_rdma_rw_ctxt, allowing the Write completion handler to find and
+ * DMA-unmap the pages under I/O for that Write segment. The Write
+ * completion handler does not release any pages.
+ *
+ * When the Send WR is constructed, it also gets its own svc_rdma_op_ctxt.
+ * The ownership of all of the Reply's pages are transferred into that
+ * ctxt, the Send WR is posted, and sendto returns.
+ *
+ * The svc_rdma_op_ctxt is presented when the Send WR completes. The
+ * Send completion handler finally releases the Reply's pages.
+ *
+ * This mechanism also assumes that completions on the transport's Send
+ * Completion Queue do not run in parallel. Otherwise a Write completion
+ * and Send completion running at the same time could release pages that
+ * are still DMA-mapped.
+ *
+ * Error Handling
+ *
+ * - If the Send WR is posted successfully, it will either complete
+ *   successfully, or get flushed. Either way, the Send completion
+ *   handler releases the Reply's pages.
+ * - If the Send WR cannot be not posted, the forward path releases
+ *   the Reply's pages.
+ *
+ * This handles the case, without the use of page reference counting,
+ * where two different Write segments send portions of the same page.
+ */
+
 #include <linux/sunrpc/debug.h>
 #include <linux/sunrpc/rpc_rdma.h>
 #include <linux/spinlock.h>
@@ -55,113 +113,141 @@ static u32 xdr_padsize(u32 len)
 	return (len & 3) ? (4 - (len & 3)) : 0;
 }
 
-int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
-		     struct xdr_buf *xdr,
-		     struct svc_rdma_req_map *vec,
-		     bool write_chunk_present)
+/* Returns length of transport header, in bytes.
+ */
+static unsigned int svc_rdma_reply_hdr_len(__be32 *rdma_resp)
 {
-	int sge_no;
-	u32 sge_bytes;
-	u32 page_bytes;
-	u32 page_off;
-	int page_no;
-
-	if (xdr->len !=
-	    (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
-		pr_err("svcrdma: %s: XDR buffer length error\n", __func__);
-		return -EIO;
-	}
+	unsigned int nsegs;
+	__be32 *p;
 
-	/* Skip the first sge, this is for the RPCRDMA header */
-	sge_no = 1;
+	p = rdma_resp;
+
+	/* RPC-over-RDMA V1 replies never have a Read list. */
+	p += rpcrdma_fixed_maxsz + 1;
 
-	/* Head SGE */
-	vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
-	vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
-	sge_no++;
-
-	/* pages SGE */
-	page_no = 0;
-	page_bytes = xdr->page_len;
-	page_off = xdr->page_base;
-	while (page_bytes) {
-		vec->sge[sge_no].iov_base =
-			page_address(xdr->pages[page_no]) + page_off;
-		sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
-		page_bytes -= sge_bytes;
-		vec->sge[sge_no].iov_len = sge_bytes;
-
-		sge_no++;
-		page_no++;
-		page_off = 0; /* reset for next time through loop */
+	/* Skip Write list. */
+	while (*p++ != xdr_zero) {
+		nsegs = be32_to_cpup(p++);
+		p += nsegs * rpcrdma_segment_maxsz;
 	}
 
-	/* Tail SGE */
-	if (xdr->tail[0].iov_len) {
-		unsigned char *base = xdr->tail[0].iov_base;
-		size_t len = xdr->tail[0].iov_len;
-		u32 xdr_pad = xdr_padsize(xdr->page_len);
+	/* Skip Reply chunk. */
+	if (*p++ != xdr_zero) {
+		nsegs = be32_to_cpup(p++);
+		p += nsegs * rpcrdma_segment_maxsz;
+	}
 
-		if (write_chunk_present && xdr_pad) {
-			base += xdr_pad;
-			len -= xdr_pad;
-		}
+	return (unsigned long)p - (unsigned long)rdma_resp;
+}
 
-		if (len) {
-			vec->sge[sge_no].iov_base = base;
-			vec->sge[sge_no].iov_len = len;
-			sge_no++;
+/* One Write chunk is copied from Call transport header to Reply
+ * transport header. Each segment's length field is updated to
+ * reflect number of bytes consumed in the segment.
+ *
+ * Returns number of segments in this chunk.
+ */
+static unsigned int xdr_encode_write_chunk(__be32 *dst, __be32 *src,
+					   unsigned int remaining)
+{
+	unsigned int i, nsegs;
+	u32 seg_len;
+
+	/* Write list discriminator */
+	*dst++ = *src++;
+
+	/* number of segments in this chunk */
+	nsegs = be32_to_cpup(src);
+	*dst++ = *src++;
+
+	for (i = nsegs; i; i--) {
+		/* segment's RDMA handle */
+		*dst++ = *src++;
+
+		/* bytes returned in this segment */
+		seg_len = be32_to_cpu(*src);
+		if (remaining >= seg_len) {
+			/* entire segment was consumed */
+			*dst = *src;
+			remaining -= seg_len;
+		} else {
+			/* segment only partly filled */
+			*dst = cpu_to_be32(remaining);
+			remaining = 0;
 		}
-	}
+		dst++; src++;
 
-	dprintk("svcrdma: %s: sge_no %d page_no %d "
-		"page_base %u page_len %u head_len %zu tail_len %zu\n",
-		__func__, sge_no, page_no, xdr->page_base, xdr->page_len,
-		xdr->head[0].iov_len, xdr->tail[0].iov_len);
+		/* segment's RDMA offset */
+		*dst++ = *src++;
+		*dst++ = *src++;
+	}
 
-	vec->count = sge_no;
-	return 0;
+	return nsegs;
 }
 
-static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
-			      struct xdr_buf *xdr,
-			      u32 xdr_off, size_t len, int dir)
+/* The client provided a Write list in the Call message. Fill in
+ * the segments in the first Write chunk in the Reply's transport
+ * header with the number of bytes consumed in each segment.
+ * Remaining chunks are returned unused.
+ *
+ * Assumptions:
+ *  - Client has provided only one Write chunk
+ */
+static void svc_rdma_xdr_encode_write_list(__be32 *rdma_resp, __be32 *wr_ch,
+					   unsigned int consumed)
 {
-	struct page *page;
-	dma_addr_t dma_addr;
-	if (xdr_off < xdr->head[0].iov_len) {
-		/* This offset is in the head */
-		xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
-		page = virt_to_page(xdr->head[0].iov_base);
-	} else {
-		xdr_off -= xdr->head[0].iov_len;
-		if (xdr_off < xdr->page_len) {
-			/* This offset is in the page list */
-			xdr_off += xdr->page_base;
-			page = xdr->pages[xdr_off >> PAGE_SHIFT];
-			xdr_off &= ~PAGE_MASK;
-		} else {
-			/* This offset is in the tail */
-			xdr_off -= xdr->page_len;
-			xdr_off += (unsigned long)
-				xdr->tail[0].iov_base & ~PAGE_MASK;
-			page = virt_to_page(xdr->tail[0].iov_base);
-		}
+	unsigned int nsegs;
+	__be32 *p, *q;
+
+	/* RPC-over-RDMA V1 replies never have a Read list. */
+	p = rdma_resp + rpcrdma_fixed_maxsz + 1;
+
+	q = wr_ch;
+	while (*q != xdr_zero) {
+		nsegs = xdr_encode_write_chunk(p, q, consumed);
+		q += 2 + nsegs * rpcrdma_segment_maxsz;
+		p += 2 + nsegs * rpcrdma_segment_maxsz;
+		consumed = 0;
 	}
-	dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
-				   min_t(size_t, PAGE_SIZE, len), dir);
-	return dma_addr;
+
+	/* Terminate Write list */
+	*p++ = xdr_zero;
+
+	/* Reply chunk discriminator; may be replaced later */
+	*p = xdr_zero;
+}
+
+/* The client provided a Reply chunk in the Call message. Fill in
+ * the segments in the Reply chunk in the Reply message with the
+ * number of bytes consumed in each segment.
+ *
+ * Assumptions:
+ * - Reply can always fit in the provided Reply chunk
+ */
+static void svc_rdma_xdr_encode_reply_chunk(__be32 *rdma_resp, __be32 *rp_ch,
+					    unsigned int consumed)
+{
+	__be32 *p;
+
+	/* Find the Reply chunk in the Reply's xprt header.
+	 * RPC-over-RDMA V1 replies never have a Read list.
+	 */
+	p = rdma_resp + rpcrdma_fixed_maxsz + 1;
+
+	/* Skip past Write list */
+	while (*p++ != xdr_zero)
+		p += 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+
+	xdr_encode_write_chunk(p, rp_ch, consumed);
 }
 
 /* Parse the RPC Call's transport header.
  */
-static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
-				      struct rpcrdma_write_array **write,
-				      struct rpcrdma_write_array **reply)
+static void svc_rdma_get_write_arrays(__be32 *rdma_argp,
+				      __be32 **write, __be32 **reply)
 {
 	__be32 *p;
 
-	p = (__be32 *)&rmsgp->rm_body.rm_chunks[0];
+	p = rdma_argp + rpcrdma_fixed_maxsz;
 
 	/* Read list */
 	while (*p++ != xdr_zero)
@@ -169,7 +255,7 @@ static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
 
 	/* Write list */
 	if (*p != xdr_zero) {
-		*write = (struct rpcrdma_write_array *)p;
+		*write = p;
 		while (*p++ != xdr_zero)
 			p += 1 + be32_to_cpu(*p) * 4;
 	} else {
@@ -179,7 +265,7 @@ static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
 
 	/* Reply chunk */
 	if (*p != xdr_zero)
-		*reply = (struct rpcrdma_write_array *)p;
+		*reply = p;
 	else
 		*reply = NULL;
 }
@@ -189,360 +275,321 @@ static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
  * Invalidate, and responder chooses one rkey to invalidate.
  *
  * Find a candidate rkey to invalidate when sending a reply.  Picks the
- * first rkey it finds in the chunks lists.
+ * first R_key it finds in the chunk lists.
  *
  * Returns zero if RPC's chunk lists are empty.
  */
-static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
-				 struct rpcrdma_write_array *wr_ary,
-				 struct rpcrdma_write_array *rp_ary)
+static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp,
+				 __be32 *wr_lst, __be32 *rp_ch)
 {
-	struct rpcrdma_read_chunk *rd_ary;
-	struct rpcrdma_segment *arg_ch;
+	__be32 *p;
 
-	rd_ary = (struct rpcrdma_read_chunk *)&rdma_argp->rm_body.rm_chunks[0];
-	if (rd_ary->rc_discrim != xdr_zero)
-		return be32_to_cpu(rd_ary->rc_target.rs_handle);
+	p = rdma_argp + rpcrdma_fixed_maxsz;
+	if (*p != xdr_zero)
+		p += 2;
+	else if (wr_lst && be32_to_cpup(wr_lst + 1))
+		p = wr_lst + 2;
+	else if (rp_ch && be32_to_cpup(rp_ch + 1))
+		p = rp_ch + 2;
+	else
+		return 0;
+	return be32_to_cpup(p);
+}
 
-	if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) {
-		arg_ch = &wr_ary->wc_array[0].wc_target;
-		return be32_to_cpu(arg_ch->rs_handle);
-	}
+/* ib_dma_map_page() is used here because svc_rdma_dma_unmap()
+ * is used during completion to DMA-unmap this memory, and
+ * it uses ib_dma_unmap_page() exclusively.
+ */
+static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
+				struct svc_rdma_op_ctxt *ctxt,
+				unsigned int sge_no,
+				unsigned char *base,
+				unsigned int len)
+{
+	unsigned long offset = (unsigned long)base & ~PAGE_MASK;
+	struct ib_device *dev = rdma->sc_cm_id->device;
+	dma_addr_t dma_addr;
 
-	if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) {
-		arg_ch = &rp_ary->wc_array[0].wc_target;
-		return be32_to_cpu(arg_ch->rs_handle);
-	}
+	dma_addr = ib_dma_map_page(dev, virt_to_page(base),
+				   offset, len, DMA_TO_DEVICE);
+	if (ib_dma_mapping_error(dev, dma_addr))
+		return -EIO;
 
+	ctxt->sge[sge_no].addr = dma_addr;
+	ctxt->sge[sge_no].length = len;
+	ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
+	svc_rdma_count_mappings(rdma, ctxt);
 	return 0;
 }
 
-/* Assumptions:
- * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
- */
-static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
-		      u32 rmr, u64 to,
-		      u32 xdr_off, int write_len,
-		      struct svc_rdma_req_map *vec)
+static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
+				 struct svc_rdma_op_ctxt *ctxt,
+				 unsigned int sge_no,
+				 struct page *page,
+				 unsigned int offset,
+				 unsigned int len)
 {
-	struct ib_rdma_wr write_wr;
-	struct ib_sge *sge;
-	int xdr_sge_no;
-	int sge_no;
-	int sge_bytes;
-	int sge_off;
-	int bc;
-	struct svc_rdma_op_ctxt *ctxt;
+	struct ib_device *dev = rdma->sc_cm_id->device;
+	dma_addr_t dma_addr;
 
-	if (vec->count > RPCSVC_MAXPAGES) {
-		pr_err("svcrdma: Too many pages (%lu)\n", vec->count);
+	dma_addr = ib_dma_map_page(dev, page, offset, len, DMA_TO_DEVICE);
+	if (ib_dma_mapping_error(dev, dma_addr))
 		return -EIO;
-	}
 
-	dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
-		"write_len=%d, vec->sge=%p, vec->count=%lu\n",
-		rmr, (unsigned long long)to, xdr_off,
-		write_len, vec->sge, vec->count);
+	ctxt->sge[sge_no].addr = dma_addr;
+	ctxt->sge[sge_no].length = len;
+	ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
+	svc_rdma_count_mappings(rdma, ctxt);
+	return 0;
+}
 
-	ctxt = svc_rdma_get_context(xprt);
+/**
+ * svc_rdma_map_reply_hdr - DMA map the transport header buffer
+ * @rdma: controlling transport
+ * @ctxt: op_ctxt for the Send WR
+ * @rdma_resp: buffer containing transport header
+ * @len: length of transport header
+ *
+ * Returns:
+ *	%0 if the header is DMA mapped,
+ *	%-EIO if DMA mapping failed.
+ */
+int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma,
+			   struct svc_rdma_op_ctxt *ctxt,
+			   __be32 *rdma_resp,
+			   unsigned int len)
+{
 	ctxt->direction = DMA_TO_DEVICE;
-	sge = ctxt->sge;
-
-	/* Find the SGE associated with xdr_off */
-	for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
-	     xdr_sge_no++) {
-		if (vec->sge[xdr_sge_no].iov_len > bc)
-			break;
-		bc -= vec->sge[xdr_sge_no].iov_len;
-	}
-
-	sge_off = bc;
-	bc = write_len;
-	sge_no = 0;
-
-	/* Copy the remaining SGE */
-	while (bc != 0) {
-		sge_bytes = min_t(size_t,
-			  bc, vec->sge[xdr_sge_no].iov_len-sge_off);
-		sge[sge_no].length = sge_bytes;
-		sge[sge_no].addr =
-			dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
-				    sge_bytes, DMA_TO_DEVICE);
-		xdr_off += sge_bytes;
-		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-					 sge[sge_no].addr))
-			goto err;
-		svc_rdma_count_mappings(xprt, ctxt);
-		sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
-		ctxt->count++;
-		sge_off = 0;
-		sge_no++;
-		xdr_sge_no++;
-		if (xdr_sge_no > vec->count) {
-			pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no);
-			goto err;
-		}
-		bc -= sge_bytes;
-		if (sge_no == xprt->sc_max_sge)
-			break;
-	}
-
-	/* Prepare WRITE WR */
-	memset(&write_wr, 0, sizeof write_wr);
-	ctxt->cqe.done = svc_rdma_wc_write;
-	write_wr.wr.wr_cqe = &ctxt->cqe;
-	write_wr.wr.sg_list = &sge[0];
-	write_wr.wr.num_sge = sge_no;
-	write_wr.wr.opcode = IB_WR_RDMA_WRITE;
-	write_wr.wr.send_flags = IB_SEND_SIGNALED;
-	write_wr.rkey = rmr;
-	write_wr.remote_addr = to;
-
-	/* Post It */
-	atomic_inc(&rdma_stat_write);
-	if (svc_rdma_send(xprt, &write_wr.wr))
-		goto err;
-	return write_len - bc;
- err:
-	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_context(ctxt, 0);
-	return -EIO;
+	ctxt->pages[0] = virt_to_page(rdma_resp);
+	ctxt->count = 1;
+	return svc_rdma_dma_map_page(rdma, ctxt, 0, ctxt->pages[0], 0, len);
 }
 
-noinline
-static int send_write_chunks(struct svcxprt_rdma *xprt,
-			     struct rpcrdma_write_array *wr_ary,
-			     struct rpcrdma_msg *rdma_resp,
-			     struct svc_rqst *rqstp,
-			     struct svc_rdma_req_map *vec)
+/* Load the xdr_buf into the ctxt's sge array, and DMA map each
+ * element as it is added.
+ *
+ * Returns the number of sge elements loaded on success, or
+ * a negative errno on failure.
+ */
+static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
+				  struct svc_rdma_op_ctxt *ctxt,
+				  struct xdr_buf *xdr, __be32 *wr_lst)
 {
-	u32 xfer_len = rqstp->rq_res.page_len;
-	int write_len;
-	u32 xdr_off;
-	int chunk_off;
-	int chunk_no;
-	int nchunks;
-	struct rpcrdma_write_array *res_ary;
+	unsigned int len, sge_no, remaining, page_off;
+	struct page **ppages;
+	unsigned char *base;
+	u32 xdr_pad;
 	int ret;
 
-	res_ary = (struct rpcrdma_write_array *)
-		&rdma_resp->rm_body.rm_chunks[1];
-
-	/* Write chunks start at the pagelist */
-	nchunks = be32_to_cpu(wr_ary->wc_nchunks);
-	for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
-	     xfer_len && chunk_no < nchunks;
-	     chunk_no++) {
-		struct rpcrdma_segment *arg_ch;
-		u64 rs_offset;
-
-		arg_ch = &wr_ary->wc_array[chunk_no].wc_target;
-		write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
-
-		/* Prepare the response chunk given the length actually
-		 * written */
-		xdr_decode_hyper((__be32 *)&arg_ch->rs_offset, &rs_offset);
-		svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
-						arg_ch->rs_handle,
-						arg_ch->rs_offset,
-						write_len);
-		chunk_off = 0;
-		while (write_len) {
-			ret = send_write(xprt, rqstp,
-					 be32_to_cpu(arg_ch->rs_handle),
-					 rs_offset + chunk_off,
-					 xdr_off,
-					 write_len,
-					 vec);
-			if (ret <= 0)
-				goto out_err;
-			chunk_off += ret;
-			xdr_off += ret;
-			xfer_len -= ret;
-			write_len -= ret;
+	sge_no = 1;
+
+	ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++,
+				   xdr->head[0].iov_base,
+				   xdr->head[0].iov_len);
+	if (ret < 0)
+		return ret;
+
+	/* If a Write chunk is present, the xdr_buf's page list
+	 * is not included inline. However the Upper Layer may
+	 * have added XDR padding in the tail buffer, and that
+	 * should not be included inline.
+	 */
+	if (wr_lst) {
+		base = xdr->tail[0].iov_base;
+		len = xdr->tail[0].iov_len;
+		xdr_pad = xdr_padsize(xdr->page_len);
+
+		if (len && xdr_pad) {
+			base += xdr_pad;
+			len -= xdr_pad;
 		}
+
+		goto tail;
+	}
+
+	ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+	page_off = xdr->page_base & ~PAGE_MASK;
+	remaining = xdr->page_len;
+	while (remaining) {
+		len = min_t(u32, PAGE_SIZE - page_off, remaining);
+
+		ret = svc_rdma_dma_map_page(rdma, ctxt, sge_no++,
+					    *ppages++, page_off, len);
+		if (ret < 0)
+			return ret;
+
+		remaining -= len;
+		page_off = 0;
 	}
-	/* Update the req with the number of chunks actually used */
-	svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no);
 
-	return rqstp->rq_res.page_len;
+	base = xdr->tail[0].iov_base;
+	len = xdr->tail[0].iov_len;
+tail:
+	if (len) {
+		ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++, base, len);
+		if (ret < 0)
+			return ret;
+	}
 
-out_err:
-	pr_err("svcrdma: failed to send write chunks, rc=%d\n", ret);
-	return -EIO;
+	return sge_no - 1;
 }
 
-noinline
-static int send_reply_chunks(struct svcxprt_rdma *xprt,
-			     struct rpcrdma_write_array *rp_ary,
-			     struct rpcrdma_msg *rdma_resp,
-			     struct svc_rqst *rqstp,
-			     struct svc_rdma_req_map *vec)
+/* The svc_rqst and all resources it owns are released as soon as
+ * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt
+ * so they are released by the Send completion handler.
+ */
+static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
+				   struct svc_rdma_op_ctxt *ctxt)
 {
-	u32 xfer_len = rqstp->rq_res.len;
-	int write_len;
-	u32 xdr_off;
-	int chunk_no;
-	int chunk_off;
-	int nchunks;
-	struct rpcrdma_segment *ch;
-	struct rpcrdma_write_array *res_ary;
-	int ret;
+	int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
 
-	/* XXX: need to fix when reply lists occur with read-list and or
-	 * write-list */
-	res_ary = (struct rpcrdma_write_array *)
-		&rdma_resp->rm_body.rm_chunks[2];
-
-	/* xdr offset starts at RPC message */
-	nchunks = be32_to_cpu(rp_ary->wc_nchunks);
-	for (xdr_off = 0, chunk_no = 0;
-	     xfer_len && chunk_no < nchunks;
-	     chunk_no++) {
-		u64 rs_offset;
-		ch = &rp_ary->wc_array[chunk_no].wc_target;
-		write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
-
-		/* Prepare the reply chunk given the length actually
-		 * written */
-		xdr_decode_hyper((__be32 *)&ch->rs_offset, &rs_offset);
-		svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
-						ch->rs_handle, ch->rs_offset,
-						write_len);
-		chunk_off = 0;
-		while (write_len) {
-			ret = send_write(xprt, rqstp,
-					 be32_to_cpu(ch->rs_handle),
-					 rs_offset + chunk_off,
-					 xdr_off,
-					 write_len,
-					 vec);
-			if (ret <= 0)
-				goto out_err;
-			chunk_off += ret;
-			xdr_off += ret;
-			xfer_len -= ret;
-			write_len -= ret;
-		}
+	ctxt->count += pages;
+	for (i = 0; i < pages; i++) {
+		ctxt->pages[i + 1] = rqstp->rq_respages[i];
+		rqstp->rq_respages[i] = NULL;
 	}
-	/* Update the req with the number of chunks actually used */
-	svc_rdma_xdr_encode_reply_array(res_ary, chunk_no);
+	rqstp->rq_next_page = rqstp->rq_respages + 1;
+}
 
-	return rqstp->rq_res.len;
+/**
+ * svc_rdma_post_send_wr - Set up and post one Send Work Request
+ * @rdma: controlling transport
+ * @ctxt: op_ctxt for transmitting the Send WR
+ * @num_sge: number of SGEs to send
+ * @inv_rkey: R_key argument to Send With Invalidate, or zero
+ *
+ * Returns:
+ *	%0 if the Send* was posted successfully,
+ *	%-ENOTCONN if the connection was lost or dropped,
+ *	%-EINVAL if there was a problem with the Send we built,
+ *	%-ENOMEM if ib_post_send failed.
+ */
+int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma,
+			  struct svc_rdma_op_ctxt *ctxt, int num_sge,
+			  u32 inv_rkey)
+{
+	struct ib_send_wr *send_wr = &ctxt->send_wr;
 
-out_err:
-	pr_err("svcrdma: failed to send reply chunks, rc=%d\n", ret);
-	return -EIO;
+	dprintk("svcrdma: posting Send WR with %u sge(s)\n", num_sge);
+
+	send_wr->next = NULL;
+	ctxt->cqe.done = svc_rdma_wc_send;
+	send_wr->wr_cqe = &ctxt->cqe;
+	send_wr->sg_list = ctxt->sge;
+	send_wr->num_sge = num_sge;
+	send_wr->send_flags = IB_SEND_SIGNALED;
+	if (inv_rkey) {
+		send_wr->opcode = IB_WR_SEND_WITH_INV;
+		send_wr->ex.invalidate_rkey = inv_rkey;
+	} else {
+		send_wr->opcode = IB_WR_SEND;
+	}
+
+	return svc_rdma_send(rdma, send_wr);
 }
 
-/* This function prepares the portion of the RPCRDMA message to be
- * sent in the RDMA_SEND. This function is called after data sent via
- * RDMA has already been transmitted. There are three cases:
- * - The RPCRDMA header, RPC header, and payload are all sent in a
- *   single RDMA_SEND. This is the "inline" case.
- * - The RPCRDMA header and some portion of the RPC header and data
- *   are sent via this RDMA_SEND and another portion of the data is
- *   sent via RDMA.
- * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC
- *   header and data are all transmitted via RDMA.
- * In all three cases, this function prepares the RPCRDMA header in
- * sge[0], the 'type' parameter indicates the type to place in the
- * RPCRDMA header, and the 'byte_count' field indicates how much of
- * the XDR to include in this RDMA_SEND. NB: The offset of the payload
- * to send is zero in the XDR.
+/* Prepare the portion of the RPC Reply that will be transmitted
+ * via RDMA Send. The RPC-over-RDMA transport header is prepared
+ * in sge[0], and the RPC xdr_buf is prepared in following sges.
+ *
+ * Depending on whether a Write list or Reply chunk is present,
+ * the server may send all, a portion of, or none of the xdr_buf.
+ * In the latter case, only the transport header (sge[0]) is
+ * transmitted.
+ *
+ * RDMA Send is the last step of transmitting an RPC reply. Pages
+ * involved in the earlier RDMA Writes are here transferred out
+ * of the rqstp and into the ctxt's page array. These pages are
+ * DMA unmapped by each Write completion, but the subsequent Send
+ * completion finally releases these pages.
+ *
+ * Assumptions:
+ * - The Reply's transport header will never be larger than a page.
  */
-static int send_reply(struct svcxprt_rdma *rdma,
-		      struct svc_rqst *rqstp,
-		      struct page *page,
-		      struct rpcrdma_msg *rdma_resp,
-		      struct svc_rdma_req_map *vec,
-		      int byte_count,
-		      u32 inv_rkey)
+static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
+				   __be32 *rdma_argp, __be32 *rdma_resp,
+				   struct svc_rqst *rqstp,
+				   __be32 *wr_lst, __be32 *rp_ch)
 {
 	struct svc_rdma_op_ctxt *ctxt;
-	struct ib_send_wr send_wr;
-	u32 xdr_off;
-	int sge_no;
-	int sge_bytes;
-	int page_no;
-	int pages;
-	int ret = -EIO;
-
-	/* Prepare the context */
+	u32 inv_rkey;
+	int ret;
+
+	dprintk("svcrdma: sending %s reply: head=%zu, pagelen=%u, tail=%zu\n",
+		(rp_ch ? "RDMA_NOMSG" : "RDMA_MSG"),
+		rqstp->rq_res.head[0].iov_len,
+		rqstp->rq_res.page_len,
+		rqstp->rq_res.tail[0].iov_len);
+
 	ctxt = svc_rdma_get_context(rdma);
-	ctxt->direction = DMA_TO_DEVICE;
-	ctxt->pages[0] = page;
-	ctxt->count = 1;
 
-	/* Prepare the SGE for the RPCRDMA Header */
-	ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
-	ctxt->sge[0].length =
-	    svc_rdma_xdr_get_reply_hdr_len((__be32 *)rdma_resp);
-	ctxt->sge[0].addr =
-	    ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
-			    ctxt->sge[0].length, DMA_TO_DEVICE);
-	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
+	ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp,
+				     svc_rdma_reply_hdr_len(rdma_resp));
+	if (ret < 0)
 		goto err;
-	svc_rdma_count_mappings(rdma, ctxt);
-
-	ctxt->direction = DMA_TO_DEVICE;
 
-	/* Map the payload indicated by 'byte_count' */
-	xdr_off = 0;
-	for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
-		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
-		byte_count -= sge_bytes;
-		ctxt->sge[sge_no].addr =
-			dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
-				    sge_bytes, DMA_TO_DEVICE);
-		xdr_off += sge_bytes;
-		if (ib_dma_mapping_error(rdma->sc_cm_id->device,
-					 ctxt->sge[sge_no].addr))
+	if (!rp_ch) {
+		ret = svc_rdma_map_reply_msg(rdma, ctxt,
+					     &rqstp->rq_res, wr_lst);
+		if (ret < 0)
 			goto err;
-		svc_rdma_count_mappings(rdma, ctxt);
-		ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
-		ctxt->sge[sge_no].length = sge_bytes;
 	}
-	if (byte_count != 0) {
-		pr_err("svcrdma: Could not map %d bytes\n", byte_count);
+
+	svc_rdma_save_io_pages(rqstp, ctxt);
+
+	inv_rkey = 0;
+	if (rdma->sc_snd_w_inv)
+		inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch);
+	ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, inv_rkey);
+	if (ret)
 		goto err;
-	}
 
-	/* Save all respages in the ctxt and remove them from the
-	 * respages array. They are our pages until the I/O
-	 * completes.
+	return 0;
+
+err:
+	pr_err("svcrdma: failed to post Send WR (%d)\n", ret);
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 1);
+	return ret;
+}
+
+/* Given the client-provided Write and Reply chunks, the server was not
+ * able to form a complete reply. Return an RDMA_ERROR message so the
+ * client can retire this RPC transaction. As above, the Send completion
+ * routine releases payload pages that were part of a previous RDMA Write.
+ *
+ * Remote Invalidation is skipped for simplicity.
+ */
+static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
+				   __be32 *rdma_resp, struct svc_rqst *rqstp)
+{
+	struct svc_rdma_op_ctxt *ctxt;
+	__be32 *p;
+	int ret;
+
+	ctxt = svc_rdma_get_context(rdma);
+
+	/* Replace the original transport header with an
+	 * RDMA_ERROR response. XID etc are preserved.
 	 */
-	pages = rqstp->rq_next_page - rqstp->rq_respages;
-	for (page_no = 0; page_no < pages; page_no++) {
-		ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
-		ctxt->count++;
-		rqstp->rq_respages[page_no] = NULL;
-	}
-	rqstp->rq_next_page = rqstp->rq_respages + 1;
+	p = rdma_resp + 3;
+	*p++ = rdma_error;
+	*p   = err_chunk;
 
-	if (sge_no > rdma->sc_max_sge) {
-		pr_err("svcrdma: Too many sges (%d)\n", sge_no);
+	ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp, 20);
+	if (ret < 0)
 		goto err;
-	}
-	memset(&send_wr, 0, sizeof send_wr);
-	ctxt->cqe.done = svc_rdma_wc_send;
-	send_wr.wr_cqe = &ctxt->cqe;
-	send_wr.sg_list = ctxt->sge;
-	send_wr.num_sge = sge_no;
-	if (inv_rkey) {
-		send_wr.opcode = IB_WR_SEND_WITH_INV;
-		send_wr.ex.invalidate_rkey = inv_rkey;
-	} else
-		send_wr.opcode = IB_WR_SEND;
-	send_wr.send_flags =  IB_SEND_SIGNALED;
 
-	ret = svc_rdma_send(rdma, &send_wr);
+	svc_rdma_save_io_pages(rqstp, ctxt);
+
+	ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, 0);
 	if (ret)
 		goto err;
 
 	return 0;
 
- err:
+err:
+	pr_err("svcrdma: failed to post Send WR (%d)\n", ret);
 	svc_rdma_unmap_dma(ctxt);
 	svc_rdma_put_context(ctxt, 1);
 	return ret;
@@ -552,39 +599,36 @@ void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
 {
 }
 
+/**
+ * svc_rdma_sendto - Transmit an RPC reply
+ * @rqstp: processed RPC request, reply XDR already in ::rq_res
+ *
+ * Any resources still associated with @rqstp are released upon return.
+ * If no reply message was possible, the connection is closed.
+ *
+ * Returns:
+ *	%0 if an RPC reply has been successfully posted,
+ *	%-ENOMEM if a resource shortage occurred (connection is lost),
+ *	%-ENOTCONN if posting failed (connection is lost).
+ */
 int svc_rdma_sendto(struct svc_rqst *rqstp)
 {
 	struct svc_xprt *xprt = rqstp->rq_xprt;
 	struct svcxprt_rdma *rdma =
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
-	struct rpcrdma_msg *rdma_argp;
-	struct rpcrdma_msg *rdma_resp;
-	struct rpcrdma_write_array *wr_ary, *rp_ary;
-	int ret;
-	int inline_bytes;
+	__be32 *p, *rdma_argp, *rdma_resp, *wr_lst, *rp_ch;
+	struct xdr_buf *xdr = &rqstp->rq_res;
 	struct page *res_page;
-	struct svc_rdma_req_map *vec;
-	u32 inv_rkey;
-	__be32 *p;
-
-	dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
+	int ret;
 
-	/* Get the RDMA request header. The receive logic always
-	 * places this at the start of page 0.
+	/* Find the call's chunk lists to decide how to send the reply.
+	 * Receive places the Call's xprt header at the start of page 0.
 	 */
 	rdma_argp = page_address(rqstp->rq_pages[0]);
-	svc_rdma_get_write_arrays(rdma_argp, &wr_ary, &rp_ary);
-
-	inv_rkey = 0;
-	if (rdma->sc_snd_w_inv)
-		inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary);
+	svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch);
 
-	/* Build an req vec for the XDR */
-	vec = svc_rdma_get_req_map(rdma);
-	ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
-	if (ret)
-		goto err0;
-	inline_bytes = rqstp->rq_res.len;
+	dprintk("svcrdma: preparing response for XID 0x%08x\n",
+		be32_to_cpup(rdma_argp));
 
 	/* Create the RDMA response header. xprt->xpt_mutex,
 	 * acquired in svc_send(), serializes RPC replies. The
@@ -598,115 +642,57 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 		goto err0;
 	rdma_resp = page_address(res_page);
 
-	p = &rdma_resp->rm_xid;
-	*p++ = rdma_argp->rm_xid;
-	*p++ = rdma_argp->rm_vers;
+	p = rdma_resp;
+	*p++ = *rdma_argp;
+	*p++ = *(rdma_argp + 1);
 	*p++ = rdma->sc_fc_credits;
-	*p++ = rp_ary ? rdma_nomsg : rdma_msg;
+	*p++ = rp_ch ? rdma_nomsg : rdma_msg;
 
 	/* Start with empty chunks */
 	*p++ = xdr_zero;
 	*p++ = xdr_zero;
 	*p   = xdr_zero;
 
-	/* Send any write-chunk data and build resp write-list */
-	if (wr_ary) {
-		ret = send_write_chunks(rdma, wr_ary, rdma_resp, rqstp, vec);
+	if (wr_lst) {
+		/* XXX: Presume the client sent only one Write chunk */
+		ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr);
 		if (ret < 0)
-			goto err1;
-		inline_bytes -= ret + xdr_padsize(ret);
+			goto err2;
+		svc_rdma_xdr_encode_write_list(rdma_resp, wr_lst, ret);
 	}
-
-	/* Send any reply-list data and update resp reply-list */
-	if (rp_ary) {
-		ret = send_reply_chunks(rdma, rp_ary, rdma_resp, rqstp, vec);
+	if (rp_ch) {
+		ret = svc_rdma_send_reply_chunk(rdma, rp_ch, wr_lst, xdr);
 		if (ret < 0)
-			goto err1;
-		inline_bytes -= ret;
+			goto err2;
+		svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret);
 	}
 
-	/* Post a fresh Receive buffer _before_ sending the reply */
 	ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
 	if (ret)
 		goto err1;
-
-	ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec,
-			 inline_bytes, inv_rkey);
+	ret = svc_rdma_send_reply_msg(rdma, rdma_argp, rdma_resp, rqstp,
+				      wr_lst, rp_ch);
 	if (ret < 0)
 		goto err0;
+	return 0;
 
-	svc_rdma_put_req_map(rdma, vec);
-	dprintk("svcrdma: send_reply returns %d\n", ret);
-	return ret;
+ err2:
+	if (ret != -E2BIG)
+		goto err1;
+
+	ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
+	if (ret)
+		goto err1;
+	ret = svc_rdma_send_error_msg(rdma, rdma_resp, rqstp);
+	if (ret < 0)
+		goto err0;
+	return 0;
 
  err1:
 	put_page(res_page);
  err0:
-	svc_rdma_put_req_map(rdma, vec);
 	pr_err("svcrdma: Could not send reply, err=%d. Closing transport.\n",
 	       ret);
-	set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+	set_bit(XPT_CLOSE, &xprt->xpt_flags);
 	return -ENOTCONN;
 }
-
-void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
-			 int status)
-{
-	struct ib_send_wr err_wr;
-	struct page *p;
-	struct svc_rdma_op_ctxt *ctxt;
-	enum rpcrdma_errcode err;
-	__be32 *va;
-	int length;
-	int ret;
-
-	ret = svc_rdma_repost_recv(xprt, GFP_KERNEL);
-	if (ret)
-		return;
-
-	p = alloc_page(GFP_KERNEL);
-	if (!p)
-		return;
-	va = page_address(p);
-
-	/* XDR encode an error reply */
-	err = ERR_CHUNK;
-	if (status == -EPROTONOSUPPORT)
-		err = ERR_VERS;
-	length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
-
-	ctxt = svc_rdma_get_context(xprt);
-	ctxt->direction = DMA_TO_DEVICE;
-	ctxt->count = 1;
-	ctxt->pages[0] = p;
-
-	/* Prepare SGE for local address */
-	ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
-	ctxt->sge[0].length = length;
-	ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
-					    p, 0, length, DMA_TO_DEVICE);
-	if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
-		dprintk("svcrdma: Error mapping buffer for protocol error\n");
-		svc_rdma_put_context(ctxt, 1);
-		return;
-	}
-	svc_rdma_count_mappings(xprt, ctxt);
-
-	/* Prepare SEND WR */
-	memset(&err_wr, 0, sizeof(err_wr));
-	ctxt->cqe.done = svc_rdma_wc_send;
-	err_wr.wr_cqe = &ctxt->cqe;
-	err_wr.sg_list = ctxt->sge;
-	err_wr.num_sge = 1;
-	err_wr.opcode = IB_WR_SEND;
-	err_wr.send_flags = IB_SEND_SIGNALED;
-
-	/* Post It */
-	ret = svc_rdma_send(xprt, &err_wr);
-	if (ret) {
-		dprintk("svcrdma: Error %d posting send for protocol error\n",
-			ret);
-		svc_rdma_unmap_dma(ctxt);
-		svc_rdma_put_context(ctxt, 1);
-	}
-}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index fc8f14c7bfec..a9d9cb1ba4c6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -272,85 +272,6 @@ static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
 	}
 }
 
-static struct svc_rdma_req_map *alloc_req_map(gfp_t flags)
-{
-	struct svc_rdma_req_map *map;
-
-	map = kmalloc(sizeof(*map), flags);
-	if (map)
-		INIT_LIST_HEAD(&map->free);
-	return map;
-}
-
-static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt)
-{
-	unsigned int i;
-
-	/* One for each receive buffer on this connection. */
-	i = xprt->sc_max_requests;
-
-	while (i--) {
-		struct svc_rdma_req_map *map;
-
-		map = alloc_req_map(GFP_KERNEL);
-		if (!map) {
-			dprintk("svcrdma: No memory for request map\n");
-			return false;
-		}
-		list_add(&map->free, &xprt->sc_maps);
-	}
-	return true;
-}
-
-struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt)
-{
-	struct svc_rdma_req_map *map = NULL;
-
-	spin_lock(&xprt->sc_map_lock);
-	if (list_empty(&xprt->sc_maps))
-		goto out_empty;
-
-	map = list_first_entry(&xprt->sc_maps,
-			       struct svc_rdma_req_map, free);
-	list_del_init(&map->free);
-	spin_unlock(&xprt->sc_map_lock);
-
-out:
-	map->count = 0;
-	return map;
-
-out_empty:
-	spin_unlock(&xprt->sc_map_lock);
-
-	/* Pre-allocation amount was incorrect */
-	map = alloc_req_map(GFP_NOIO);
-	if (map)
-		goto out;
-
-	WARN_ONCE(1, "svcrdma: empty request map list?\n");
-	return NULL;
-}
-
-void svc_rdma_put_req_map(struct svcxprt_rdma *xprt,
-			  struct svc_rdma_req_map *map)
-{
-	spin_lock(&xprt->sc_map_lock);
-	list_add(&map->free, &xprt->sc_maps);
-	spin_unlock(&xprt->sc_map_lock);
-}
-
-static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
-{
-	while (!list_empty(&xprt->sc_maps)) {
-		struct svc_rdma_req_map *map;
-
-		map = list_first_entry(&xprt->sc_maps,
-				       struct svc_rdma_req_map, free);
-		list_del(&map->free);
-		kfree(map);
-	}
-}
-
 /* QP event handler */
 static void qp_event_handler(struct ib_event *event, void *context)
 {
@@ -474,24 +395,6 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
 }
 
 /**
- * svc_rdma_wc_write - Invoked by RDMA provider for each polled Write WC
- * @cq:        completion queue
- * @wc:        completed WR
- *
- */
-void svc_rdma_wc_write(struct ib_cq *cq, struct ib_wc *wc)
-{
-	struct ib_cqe *cqe = wc->wr_cqe;
-	struct svc_rdma_op_ctxt *ctxt;
-
-	svc_rdma_send_wc_common_put(cq, wc, "write");
-
-	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
-	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_context(ctxt, 0);
-}
-
-/**
  * svc_rdma_wc_reg - Invoked by RDMA provider for each polled FASTREG WC
  * @cq:        completion queue
  * @wc:        completed WR
@@ -561,14 +464,14 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
-	INIT_LIST_HEAD(&cma_xprt->sc_maps);
+	INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
 	init_waitqueue_head(&cma_xprt->sc_send_wait);
 
 	spin_lock_init(&cma_xprt->sc_lock);
 	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
 	spin_lock_init(&cma_xprt->sc_frmr_q_lock);
 	spin_lock_init(&cma_xprt->sc_ctxt_lock);
-	spin_lock_init(&cma_xprt->sc_map_lock);
+	spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
 
 	/*
 	 * Note that this implies that the underlying transport support
@@ -999,6 +902,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 		newxprt, newxprt->sc_cm_id);
 
 	dev = newxprt->sc_cm_id->device;
+	newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
 
 	/* Qualify the transport resource defaults with the
 	 * capabilities of this particular device */
@@ -1014,13 +918,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 					    svcrdma_max_bc_requests);
 	newxprt->sc_rq_depth = newxprt->sc_max_requests +
 			       newxprt->sc_max_bc_requests;
-	newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
+	newxprt->sc_sq_depth = newxprt->sc_rq_depth;
 	atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
 
 	if (!svc_rdma_prealloc_ctxts(newxprt))
 		goto errout;
-	if (!svc_rdma_prealloc_maps(newxprt))
-		goto errout;
 
 	/*
 	 * Limit ORD based on client limit, local device limit, and
@@ -1050,6 +952,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	memset(&qp_attr, 0, sizeof qp_attr);
 	qp_attr.event_handler = qp_event_handler;
 	qp_attr.qp_context = &newxprt->sc_xprt;
+	qp_attr.port_num = newxprt->sc_cm_id->port_num;
+	qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests;
 	qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
 	qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
 	qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
@@ -1248,8 +1152,8 @@ static void __svc_rdma_free(struct work_struct *work)
 	}
 
 	rdma_dealloc_frmr_q(rdma);
+	svc_rdma_destroy_rw_ctxts(rdma);
 	svc_rdma_destroy_ctxts(rdma);
-	svc_rdma_destroy_maps(rdma);
 
 	/* Destroy the QP if present (not a listener) */
 	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index c717f5410776..62ecbccd9748 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -66,8 +66,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
 unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_inline_write_padding;
-static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
-		int xprt_rdma_pad_optimize = 0;
+unsigned int xprt_rdma_memreg_strategy		= RPCRDMA_FRMR;
+int xprt_rdma_pad_optimize;
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 
@@ -396,7 +396,7 @@ xprt_setup_rdma(struct xprt_create *args)
 
 	new_xprt = rpcx_to_rdmax(xprt);
 
-	rc = rpcrdma_ia_open(new_xprt, sap, xprt_rdma_memreg_strategy);
+	rc = rpcrdma_ia_open(new_xprt, sap);
 	if (rc)
 		goto out1;
 
@@ -457,19 +457,33 @@ out1:
 	return ERR_PTR(rc);
 }
 
-/*
- * Close a connection, during shutdown or timeout/reconnect
+/**
+ * xprt_rdma_close - Close down RDMA connection
+ * @xprt: generic transport to be closed
+ *
+ * Called during transport shutdown reconnect, or device
+ * removal. Caller holds the transport's write lock.
  */
 static void
 xprt_rdma_close(struct rpc_xprt *xprt)
 {
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+	dprintk("RPC:       %s: closing xprt %p\n", __func__, xprt);
 
-	dprintk("RPC:       %s: closing\n", __func__);
-	if (r_xprt->rx_ep.rep_connected > 0)
+	if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
+		xprt_clear_connected(xprt);
+		rpcrdma_ia_remove(ia);
+		return;
+	}
+	if (ep->rep_connected == -ENODEV)
+		return;
+	if (ep->rep_connected > 0)
 		xprt->reestablish_timeout = 0;
 	xprt_disconnect_done(xprt);
-	rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+	rpcrdma_ep_disconnect(ep, ia);
 }
 
 static void
@@ -484,6 +498,27 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
 	dprintk("RPC:       %s: %u\n", __func__, port);
 }
 
+/**
+ * xprt_rdma_timer - invoked when an RPC times out
+ * @xprt: controlling RPC transport
+ * @task: RPC task that timed out
+ *
+ * Invoked when the transport is still connected, but an RPC
+ * retransmit timeout occurs.
+ *
+ * Since RDMA connections don't have a keep-alive, forcibly
+ * disconnect and retry to connect. This drives full
+ * detection of the network path, and retransmissions of
+ * all pending RPCs.
+ */
+static void
+xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+	dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt);
+
+	xprt_force_disconnect(xprt);
+}
+
 static void
 xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 {
@@ -659,6 +694,8 @@ xprt_rdma_free(struct rpc_task *task)
  * xprt_rdma_send_request - marshal and send an RPC request
  * @task: RPC task with an RPC message in rq_snd_buf
  *
+ * Caller holds the transport's write lock.
+ *
  * Return values:
  *        0:	The request has been sent
  * ENOTCONN:	Caller needs to invoke connect logic then call again
@@ -685,6 +722,9 @@ xprt_rdma_send_request(struct rpc_task *task)
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 	int rc = 0;
 
+	if (!xprt_connected(xprt))
+		goto drop_connection;
+
 	/* On retransmit, remove any previously registered chunks */
 	if (unlikely(!list_empty(&req->rl_registered)))
 		r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
@@ -776,6 +816,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
 	.alloc_slot		= xprt_alloc_slot,
 	.release_request	= xprt_release_rqst_cong,       /* ditto */
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def, /* ditto */
+	.timer			= xprt_rdma_timer,
 	.rpcbind		= rpcb_getport_async,	/* sunrpc/rpcb_clnt.c */
 	.set_port		= xprt_rdma_set_port,
 	.connect		= xprt_rdma_connect,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3b332b395045..3dbce9ac4327 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -53,7 +53,7 @@
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc_rdma.h>
 #include <asm/bitops.h>
-#include <linux/module.h> /* try_module_get()/module_put() */
+
 #include <rdma/ib_cm.h>
 
 #include "xprt_rdma.h"
@@ -69,8 +69,11 @@
 /*
  * internal functions
  */
+static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
+static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
+static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
 
-static struct workqueue_struct *rpcrdma_receive_wq;
+static struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
 
 int
 rpcrdma_alloc_wq(void)
@@ -180,7 +183,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 	rep->rr_wc_flags = wc->wc_flags;
 	rep->rr_inv_rkey = wc->ex.invalidate_rkey;
 
-	ib_dma_sync_single_for_cpu(rep->rr_device,
+	ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf),
 				   rdmab_addr(rep->rr_rdmabuf),
 				   rep->rr_len, DMA_FROM_DEVICE);
 
@@ -262,6 +265,21 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 			__func__, ep);
 		complete(&ia->ri_done);
 		break;
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+		pr_info("rpcrdma: removing device for %pIS:%u\n",
+			sap, rpc_get_port(sap));
+#endif
+		set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
+		ep->rep_connected = -ENODEV;
+		xprt_force_disconnect(&xprt->rx_xprt);
+		wait_for_completion(&ia->ri_remove_done);
+
+		ia->ri_id = NULL;
+		ia->ri_pd = NULL;
+		ia->ri_device = NULL;
+		/* Return 1 to ensure the core destroys the id. */
+		return 1;
 	case RDMA_CM_EVENT_ESTABLISHED:
 		connstate = 1;
 		ib_query_qp(ia->ri_id->qp, attr,
@@ -291,9 +309,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 		goto connected;
 	case RDMA_CM_EVENT_DISCONNECTED:
 		connstate = -ECONNABORTED;
-		goto connected;
-	case RDMA_CM_EVENT_DEVICE_REMOVAL:
-		connstate = -ENODEV;
 connected:
 		dprintk("RPC:       %s: %sconnected\n",
 					__func__, connstate > 0 ? "" : "dis");
@@ -329,14 +344,6 @@ connected:
 	return 0;
 }
 
-static void rpcrdma_destroy_id(struct rdma_cm_id *id)
-{
-	if (id) {
-		module_put(id->device->owner);
-		rdma_destroy_id(id);
-	}
-}
-
 static struct rdma_cm_id *
 rpcrdma_create_id(struct rpcrdma_xprt *xprt,
 			struct rpcrdma_ia *ia, struct sockaddr *addr)
@@ -346,6 +353,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
 	int rc;
 
 	init_completion(&ia->ri_done);
+	init_completion(&ia->ri_remove_done);
 
 	id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
 			    IB_QPT_RC);
@@ -370,16 +378,6 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
 		goto out;
 	}
 
-	/* FIXME:
-	 * Until xprtrdma supports DEVICE_REMOVAL, the provider must
-	 * be pinned while there are active NFS/RDMA mounts to prevent
-	 * hangs and crashes at umount time.
-	 */
-	if (!ia->ri_async_rc && !try_module_get(id->device->owner)) {
-		dprintk("RPC:       %s: Failed to get device module\n",
-			__func__);
-		ia->ri_async_rc = -ENODEV;
-	}
 	rc = ia->ri_async_rc;
 	if (rc)
 		goto out;
@@ -389,21 +387,20 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
 	if (rc) {
 		dprintk("RPC:       %s: rdma_resolve_route() failed %i\n",
 			__func__, rc);
-		goto put;
+		goto out;
 	}
 	rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
 	if (rc < 0) {
 		dprintk("RPC:       %s: wait() exited: %i\n",
 			__func__, rc);
-		goto put;
+		goto out;
 	}
 	rc = ia->ri_async_rc;
 	if (rc)
-		goto put;
+		goto out;
 
 	return id;
-put:
-	module_put(id->device->owner);
+
 out:
 	rdma_destroy_id(id);
 	return ERR_PTR(rc);
@@ -413,13 +410,16 @@ out:
  * Exported functions.
  */
 
-/*
- * Open and initialize an Interface Adapter.
- *  o initializes fields of struct rpcrdma_ia, including
- *    interface and provider attributes and protection zone.
+/**
+ * rpcrdma_ia_open - Open and initialize an Interface Adapter.
+ * @xprt: controlling transport
+ * @addr: IP address of remote peer
+ *
+ * Returns 0 on success, negative errno if an appropriate
+ * Interface Adapter could not be found and opened.
  */
 int
-rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
 {
 	struct rpcrdma_ia *ia = &xprt->rx_ia;
 	int rc;
@@ -427,7 +427,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 	ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
 	if (IS_ERR(ia->ri_id)) {
 		rc = PTR_ERR(ia->ri_id);
-		goto out1;
+		goto out_err;
 	}
 	ia->ri_device = ia->ri_id->device;
 
@@ -435,10 +435,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 	if (IS_ERR(ia->ri_pd)) {
 		rc = PTR_ERR(ia->ri_pd);
 		pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
-		goto out2;
+		goto out_err;
 	}
 
-	switch (memreg) {
+	switch (xprt_rdma_memreg_strategy) {
 	case RPCRDMA_FRMR:
 		if (frwr_is_supported(ia)) {
 			ia->ri_ops = &rpcrdma_frwr_memreg_ops;
@@ -452,28 +452,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 		}
 		/*FALLTHROUGH*/
 	default:
-		pr_err("rpcrdma: Unsupported memory registration mode: %d\n",
-		       memreg);
+		pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
+		       ia->ri_device->name, xprt_rdma_memreg_strategy);
 		rc = -EINVAL;
-		goto out3;
+		goto out_err;
 	}
 
 	return 0;
 
-out3:
-	ib_dealloc_pd(ia->ri_pd);
-	ia->ri_pd = NULL;
-out2:
-	rpcrdma_destroy_id(ia->ri_id);
-	ia->ri_id = NULL;
-out1:
+out_err:
+	rpcrdma_ia_close(ia);
 	return rc;
 }
 
-/*
- * Clean up/close an IA.
- *   o if event handles and PD have been initialized, free them.
- *   o close the IA
+/**
+ * rpcrdma_ia_remove - Handle device driver unload
+ * @ia: interface adapter being removed
+ *
+ * Divest transport H/W resources associated with this adapter,
+ * but allow it to be restored later.
+ */
+void
+rpcrdma_ia_remove(struct rpcrdma_ia *ia)
+{
+	struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
+						   rx_ia);
+	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	struct rpcrdma_req *req;
+	struct rpcrdma_rep *rep;
+
+	cancel_delayed_work_sync(&buf->rb_refresh_worker);
+
+	/* This is similar to rpcrdma_ep_destroy, but:
+	 * - Don't cancel the connect worker.
+	 * - Don't call rpcrdma_ep_disconnect, which waits
+	 *   for another conn upcall, which will deadlock.
+	 * - rdma_disconnect is unneeded, the underlying
+	 *   connection is already gone.
+	 */
+	if (ia->ri_id->qp) {
+		ib_drain_qp(ia->ri_id->qp);
+		rdma_destroy_qp(ia->ri_id);
+		ia->ri_id->qp = NULL;
+	}
+	ib_free_cq(ep->rep_attr.recv_cq);
+	ib_free_cq(ep->rep_attr.send_cq);
+
+	/* The ULP is responsible for ensuring all DMA
+	 * mappings and MRs are gone.
+	 */
+	list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
+		rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf);
+	list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
+		rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf);
+		rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
+		rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
+	}
+	rpcrdma_destroy_mrs(buf);
+
+	/* Allow waiters to continue */
+	complete(&ia->ri_remove_done);
+}
+
+/**
+ * rpcrdma_ia_close - Clean up/close an IA.
+ * @ia: interface adapter to close
+ *
  */
 void
 rpcrdma_ia_close(struct rpcrdma_ia *ia)
@@ -482,13 +527,15 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
 	if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
 		if (ia->ri_id->qp)
 			rdma_destroy_qp(ia->ri_id);
-		rpcrdma_destroy_id(ia->ri_id);
-		ia->ri_id = NULL;
+		rdma_destroy_id(ia->ri_id);
 	}
+	ia->ri_id = NULL;
+	ia->ri_device = NULL;
 
 	/* If the pd is still busy, xprtrdma missed freeing a resource */
 	if (ia->ri_pd && !IS_ERR(ia->ri_pd))
 		ib_dealloc_pd(ia->ri_pd);
+	ia->ri_pd = NULL;
 }
 
 /*
@@ -646,6 +693,99 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 	ib_free_cq(ep->rep_attr.send_cq);
 }
 
+/* Re-establish a connection after a device removal event.
+ * Unlike a normal reconnection, a fresh PD and a new set
+ * of MRs and buffers is needed.
+ */
+static int
+rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
+			 struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+	struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
+	int rc, err;
+
+	pr_info("%s: r_xprt = %p\n", __func__, r_xprt);
+
+	rc = -EHOSTUNREACH;
+	if (rpcrdma_ia_open(r_xprt, sap))
+		goto out1;
+
+	rc = -ENOMEM;
+	err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data);
+	if (err) {
+		pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err);
+		goto out2;
+	}
+
+	rc = -ENETUNREACH;
+	err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+	if (err) {
+		pr_err("rpcrdma: rdma_create_qp returned %d\n", err);
+		goto out3;
+	}
+
+	rpcrdma_create_mrs(r_xprt);
+	return 0;
+
+out3:
+	rpcrdma_ep_destroy(ep, ia);
+out2:
+	rpcrdma_ia_close(ia);
+out1:
+	return rc;
+}
+
+static int
+rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
+		     struct rpcrdma_ia *ia)
+{
+	struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
+	struct rdma_cm_id *id, *old;
+	int err, rc;
+
+	dprintk("RPC:       %s: reconnecting...\n", __func__);
+
+	rpcrdma_ep_disconnect(ep, ia);
+
+	rc = -EHOSTUNREACH;
+	id = rpcrdma_create_id(r_xprt, ia, sap);
+	if (IS_ERR(id))
+		goto out;
+
+	/* As long as the new ID points to the same device as the
+	 * old ID, we can reuse the transport's existing PD and all
+	 * previously allocated MRs. Also, the same device means
+	 * the transport's previous DMA mappings are still valid.
+	 *
+	 * This is a sanity check only. There should be no way these
+	 * point to two different devices here.
+	 */
+	old = id;
+	rc = -ENETUNREACH;
+	if (ia->ri_device != id->device) {
+		pr_err("rpcrdma: can't reconnect on different device!\n");
+		goto out_destroy;
+	}
+
+	err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
+	if (err) {
+		dprintk("RPC:       %s: rdma_create_qp returned %d\n",
+			__func__, err);
+		goto out_destroy;
+	}
+
+	/* Atomically replace the transport's ID and QP. */
+	rc = 0;
+	old = ia->ri_id;
+	ia->ri_id = id;
+	rdma_destroy_qp(old);
+
+out_destroy:
+	rdma_destroy_id(old);
+out:
+	return rc;
+}
+
 /*
  * Connect unconnected endpoint.
  */
@@ -654,61 +794,30 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 {
 	struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
 						   rx_ia);
-	struct rdma_cm_id *id, *old;
-	struct sockaddr *sap;
 	unsigned int extras;
-	int rc = 0;
+	int rc;
 
-	if (ep->rep_connected != 0) {
 retry:
-		dprintk("RPC:       %s: reconnecting...\n", __func__);
-
-		rpcrdma_ep_disconnect(ep, ia);
-
-		sap = (struct sockaddr *)&r_xprt->rx_data.addr;
-		id = rpcrdma_create_id(r_xprt, ia, sap);
-		if (IS_ERR(id)) {
-			rc = -EHOSTUNREACH;
-			goto out;
-		}
-		/* TEMP TEMP TEMP - fail if new device:
-		 * Deregister/remarshal *all* requests!
-		 * Close and recreate adapter, pd, etc!
-		 * Re-determine all attributes still sane!
-		 * More stuff I haven't thought of!
-		 * Rrrgh!
-		 */
-		if (ia->ri_device != id->device) {
-			printk("RPC:       %s: can't reconnect on "
-				"different device!\n", __func__);
-			rpcrdma_destroy_id(id);
-			rc = -ENETUNREACH;
-			goto out;
-		}
-		/* END TEMP */
-		rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
-		if (rc) {
-			dprintk("RPC:       %s: rdma_create_qp failed %i\n",
-				__func__, rc);
-			rpcrdma_destroy_id(id);
-			rc = -ENETUNREACH;
-			goto out;
-		}
-
-		old = ia->ri_id;
-		ia->ri_id = id;
-
-		rdma_destroy_qp(old);
-		rpcrdma_destroy_id(old);
-	} else {
+	switch (ep->rep_connected) {
+	case 0:
 		dprintk("RPC:       %s: connecting...\n", __func__);
 		rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
 		if (rc) {
 			dprintk("RPC:       %s: rdma_create_qp failed %i\n",
 				__func__, rc);
-			/* do not update ep->rep_connected */
-			return -ENETUNREACH;
+			rc = -ENETUNREACH;
+			goto out_noupdate;
 		}
+		break;
+	case -ENODEV:
+		rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia);
+		if (rc)
+			goto out_noupdate;
+		break;
+	default:
+		rc = rpcrdma_ep_reconnect(r_xprt, ep, ia);
+		if (rc)
+			goto out;
 	}
 
 	ep->rep_connected = 0;
@@ -736,6 +845,8 @@ retry:
 out:
 	if (rc)
 		ep->rep_connected = rc;
+
+out_noupdate:
 	return rc;
 }
 
@@ -878,7 +989,6 @@ struct rpcrdma_rep *
 rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
-	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_rep *rep;
 	int rc;
 
@@ -894,7 +1004,6 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
 		goto out_free;
 	}
 
-	rep->rr_device = ia->ri_device;
 	rep->rr_cqe.done = rpcrdma_wc_receive;
 	rep->rr_rxprt = r_xprt;
 	INIT_WORK(&rep->rr_work, rpcrdma_reply_handler);
@@ -1037,6 +1146,7 @@ void
 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 {
 	cancel_delayed_work_sync(&buf->rb_recovery_worker);
+	cancel_delayed_work_sync(&buf->rb_refresh_worker);
 
 	while (!list_empty(&buf->rb_recv_bufs)) {
 		struct rpcrdma_rep *rep;
@@ -1081,7 +1191,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
 
 out_nomws:
 	dprintk("RPC:       %s: no MWs available\n", __func__);
-	schedule_delayed_work(&buf->rb_refresh_worker, 0);
+	if (r_xprt->rx_ep.rep_connected != -ENODEV)
+		schedule_delayed_work(&buf->rb_refresh_worker, 0);
 
 	/* Allow the reply handler and refresh worker to run */
 	cond_resched();
@@ -1231,17 +1342,19 @@ rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
 bool
 __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
 {
+	struct ib_device *device = ia->ri_device;
+
 	if (rb->rg_direction == DMA_NONE)
 		return false;
 
-	rb->rg_iov.addr = ib_dma_map_single(ia->ri_device,
+	rb->rg_iov.addr = ib_dma_map_single(device,
 					    (void *)rb->rg_base,
 					    rdmab_length(rb),
 					    rb->rg_direction);
-	if (ib_dma_mapping_error(ia->ri_device, rdmab_addr(rb)))
+	if (ib_dma_mapping_error(device, rdmab_addr(rb)))
 		return false;
 
-	rb->rg_device = ia->ri_device;
+	rb->rg_device = device;
 	rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
 	return true;
 }
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 171a35116de9..1d66acf1a723 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -69,6 +69,7 @@ struct rpcrdma_ia {
 	struct rdma_cm_id 	*ri_id;
 	struct ib_pd		*ri_pd;
 	struct completion	ri_done;
+	struct completion	ri_remove_done;
 	int			ri_async_rc;
 	unsigned int		ri_max_segs;
 	unsigned int		ri_max_frmr_depth;
@@ -78,10 +79,15 @@ struct rpcrdma_ia {
 	bool			ri_reminv_expected;
 	bool			ri_implicit_roundup;
 	enum ib_mr_type		ri_mrtype;
+	unsigned long		ri_flags;
 	struct ib_qp_attr	ri_qp_attr;
 	struct ib_qp_init_attr	ri_qp_init_attr;
 };
 
+enum {
+	RPCRDMA_IAF_REMOVING = 0,
+};
+
 /*
  * RDMA Endpoint -- one per transport instance
  */
@@ -164,6 +170,12 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
 	return (struct rpcrdma_msg *)rb->rg_base;
 }
 
+static inline struct ib_device *
+rdmab_device(struct rpcrdma_regbuf *rb)
+{
+	return rb->rg_device;
+}
+
 #define RPCRDMA_DEF_GFP		(GFP_NOIO | __GFP_NOWARN)
 
 /* To ensure a transport can always make forward progress,
@@ -209,7 +221,6 @@ struct rpcrdma_rep {
 	unsigned int		rr_len;
 	int			rr_wc_flags;
 	u32			rr_inv_rkey;
-	struct ib_device	*rr_device;
 	struct rpcrdma_xprt	*rr_rxprt;
 	struct work_struct	rr_work;
 	struct list_head	rr_list;
@@ -380,7 +391,6 @@ struct rpcrdma_buffer {
 	spinlock_t		rb_mwlock;	/* protect rb_mws list */
 	struct list_head	rb_mws;
 	struct list_head	rb_all;
-	char			*rb_pool;
 
 	spinlock_t		rb_lock;	/* protect buf lists */
 	int			rb_send_count, rb_recv_count;
@@ -497,10 +507,16 @@ struct rpcrdma_xprt {
  * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
 extern int xprt_rdma_pad_optimize;
 
+/* This setting controls the hunt for a supported memory
+ * registration strategy.
+ */
+extern unsigned int xprt_rdma_memreg_strategy;
+
 /*
  * Interface Adapter calls - xprtrdma/verbs.c
  */
-int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int);
+int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
+void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
 void rpcrdma_ia_close(struct rpcrdma_ia *);
 bool frwr_is_supported(struct rpcrdma_ia *);
 bool fmr_is_supported(struct rpcrdma_ia *);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 16aff8ddc16f..d5b54c020dec 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2432,7 +2432,12 @@ static void xs_tcp_setup_socket(struct work_struct *work)
 	case -ENETUNREACH:
 	case -EADDRINUSE:
 	case -ENOBUFS:
-		/* retry with existing socket, after a delay */
+		/*
+		 * xs_tcp_force_close() wakes tasks with -EIO.
+		 * We need to wake them first to ensure the
+		 * correct error code.
+		 */
+		xprt_wake_pending_tasks(xprt, status);
 		xs_tcp_force_close(xprt);
 		goto out;
 	}
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 312ef7de57d7..ab3087687a32 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -508,7 +508,7 @@ bool tipc_msg_reverse(u32 own_node,  struct sk_buff **skb, int err)
 	}
 
 	if (skb_cloned(_skb) &&
-	    pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_KERNEL))
+	    pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
 		goto exit;
 
 	/* Now reverse the concerned fields */
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 0d4f2f455a7c..1b92b72e812f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -362,25 +362,25 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout)
 	return 0;
 }
 
-#define tipc_wait_for_cond(sock_, timeout_, condition_)			\
-({								        \
-	int rc_ = 0;							\
-	int done_ = 0;							\
-									\
-	while (!(condition_) && !done_) {				\
-		struct sock *sk_ = sock->sk;				\
-		DEFINE_WAIT_FUNC(wait_, woken_wake_function);		\
-									\
-		rc_ = tipc_sk_sock_err(sock_, timeout_);		\
-		if (rc_)						\
-			break;						\
-		prepare_to_wait(sk_sleep(sk_), &wait_,			\
-				TASK_INTERRUPTIBLE);			\
-		done_ = sk_wait_event(sk_, timeout_,			\
-				      (condition_), &wait_);		\
-		remove_wait_queue(sk_sleep(sk_), &wait_);		\
-	}								\
-	rc_;								\
+#define tipc_wait_for_cond(sock_, timeo_, condition_)			       \
+({                                                                             \
+	struct sock *sk_;						       \
+	int rc_;							       \
+									       \
+	while ((rc_ = !(condition_))) {					       \
+		DEFINE_WAIT_FUNC(wait_, woken_wake_function);	               \
+		sk_ = (sock_)->sk;					       \
+		rc_ = tipc_sk_sock_err((sock_), timeo_);		       \
+		if (rc_)						       \
+			break;						       \
+		prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE);    \
+		release_sock(sk_);					       \
+		*(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
+		sched_annotate_sleep();				               \
+		lock_sock(sk_);						       \
+		remove_wait_queue(sk_sleep(sk_), &wait_);		       \
+	}								       \
+	rc_;								       \
 })
 
 /**
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 6a7fe7660551..c77ced0109b7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -343,7 +343,7 @@ found:
  * are still connected to it and there's no way to inform "a polling
  * implementation" that it should let go of a certain wait queue
  *
- * In order to propagate a wake up, a wait_queue_t of the client
+ * In order to propagate a wake up, a wait_queue_entry_t of the client
  * socket is enqueued on the peer_wait queue of the server socket
  * whose wake function does a wake_up on the ordinary client socket
  * wait queue. This connection is established whenever a write (or
@@ -352,7 +352,7 @@ found:
  * was relayed.
  */
 
-static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
+static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 				      void *key)
 {
 	struct unix_sock *u;
@@ -999,7 +999,8 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct path path = { };
 
 	err = -EINVAL;
-	if (sunaddr->sun_family != AF_UNIX)
+	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
+	    sunaddr->sun_family != AF_UNIX)
 		goto out;
 
 	if (addr_len == sizeof(short)) {
@@ -1110,6 +1111,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 	unsigned int hash;
 	int err;
 
+	err = -EINVAL;
+	if (alen < offsetofend(struct sockaddr, sa_family))
+		goto out;
+
 	if (addr->sa_family != AF_UNSPEC) {
 		err = unix_mkname(sunaddr, alen, &hash);
 		if (err < 0)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 6f7f6757ceef..dfc8c51e4d74 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1540,8 +1540,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	long timeout;
 	int err;
 	struct vsock_transport_send_notify_data send_data;
-
-	DEFINE_WAIT(wait);
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
 	sk = sock->sk;
 	vsk = vsock_sk(sk);
@@ -1584,11 +1583,10 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	if (err < 0)
 		goto out;
 
-
 	while (total_written < len) {
 		ssize_t written;
 
-		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		add_wait_queue(sk_sleep(sk), &wait);
 		while (vsock_stream_has_space(vsk) == 0 &&
 		       sk->sk_err == 0 &&
 		       !(sk->sk_shutdown & SEND_SHUTDOWN) &&
@@ -1597,33 +1595,30 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 			/* Don't wait for non-blocking sockets. */
 			if (timeout == 0) {
 				err = -EAGAIN;
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
 
 			err = transport->notify_send_pre_block(vsk, &send_data);
 			if (err < 0) {
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
 
 			release_sock(sk);
-			timeout = schedule_timeout(timeout);
+			timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout);
 			lock_sock(sk);
 			if (signal_pending(current)) {
 				err = sock_intr_errno(timeout);
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			} else if (timeout == 0) {
 				err = -EAGAIN;
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
-
-			prepare_to_wait(sk_sleep(sk), &wait,
-					TASK_INTERRUPTIBLE);
 		}
-		finish_wait(sk_sleep(sk), &wait);
+		remove_wait_queue(sk_sleep(sk), &wait);
 
 		/* These checks occur both as part of and after the loop
 		 * conditional since we need to check before and after
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 9dffe0282ad4..403d86e80162 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -576,9 +576,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
 
 	vsock->vdev = vdev;
 
-	ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
-					    vsock->vqs, callbacks, names,
-					    NULL);
+	ret = virtio_find_vqs(vsock->vdev, VSOCK_VQ_MAX,
+			      vsock->vqs, callbacks, names,
+			      NULL);
 	if (ret < 0)
 		goto out;
 
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 14d5f0c8c45f..9f0901f3e42b 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -322,9 +322,9 @@ cfg80211_find_sched_scan_req(struct cfg80211_registered_device *rdev, u64 reqid)
 {
 	struct cfg80211_sched_scan_request *pos;
 
-	ASSERT_RTNL();
+	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
 
-	list_for_each_entry(pos, &rdev->sched_scan_req_list, list) {
+	list_for_each_entry_rcu(pos, &rdev->sched_scan_req_list, list) {
 		if (pos->reqid == reqid)
 			return pos;
 	}
@@ -398,13 +398,13 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid)
 	trace_cfg80211_sched_scan_results(wiphy, reqid);
 	/* ignore if we're not scanning */
 
-	rtnl_lock();
+	rcu_read_lock();
 	request = cfg80211_find_sched_scan_req(rdev, reqid);
 	if (request) {
 		request->report_results = true;
 		queue_work(cfg80211_wq, &rdev->sched_scan_res_wk);
 	}
-	rtnl_unlock();
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(cfg80211_sched_scan_results);
 
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 7198373e2920..4992f1025c9d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -454,6 +454,8 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 	if (iftype == NL80211_IFTYPE_MESH_POINT)
 		skb_copy_bits(skb, hdrlen, &mesh_flags, 1);
 
+	mesh_flags &= MESH_FLAGS_AE;
+
 	switch (hdr->frame_control &
 		cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
 	case cpu_to_le16(IEEE80211_FCTL_TODS):
@@ -469,9 +471,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 			     iftype != NL80211_IFTYPE_STATION))
 			return -1;
 		if (iftype == NL80211_IFTYPE_MESH_POINT) {
-			if (mesh_flags & MESH_FLAGS_AE_A4)
+			if (mesh_flags == MESH_FLAGS_AE_A4)
 				return -1;
-			if (mesh_flags & MESH_FLAGS_AE_A5_A6) {
+			if (mesh_flags == MESH_FLAGS_AE_A5_A6) {
 				skb_copy_bits(skb, hdrlen +
 					offsetof(struct ieee80211s_hdr, eaddr1),
 					tmp.h_dest, 2 * ETH_ALEN);
@@ -487,9 +489,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 		     ether_addr_equal(tmp.h_source, addr)))
 			return -1;
 		if (iftype == NL80211_IFTYPE_MESH_POINT) {
-			if (mesh_flags & MESH_FLAGS_AE_A5_A6)
+			if (mesh_flags == MESH_FLAGS_AE_A5_A6)
 				return -1;
-			if (mesh_flags & MESH_FLAGS_AE_A4)
+			if (mesh_flags == MESH_FLAGS_AE_A4)
 				skb_copy_bits(skb, hdrlen +
 					offsetof(struct ieee80211s_hdr, eaddr1),
 					tmp.h_source, ETH_ALEN);
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 1a4db6790e20..6cdb054484d6 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -914,13 +914,12 @@ int call_commit_handler(struct net_device *dev)
  * Main IOCTl dispatcher.
  * Check the type of IOCTL and call the appropriate wrapper...
  */
-static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
+static int wireless_process_ioctl(struct net *net, struct iwreq *iwr,
 				  unsigned int cmd,
 				  struct iw_request_info *info,
 				  wext_ioctl_func standard,
 				  wext_ioctl_func private)
 {
-	struct iwreq *iwr = (struct iwreq *) ifr;
 	struct net_device *dev;
 	iw_handler	handler;
 
@@ -928,7 +927,7 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
 	 * The copy_to/from_user() of ifr is also dealt with in there */
 
 	/* Make sure the device exist */
-	if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL)
+	if ((dev = __dev_get_by_name(net, iwr->ifr_name)) == NULL)
 		return -ENODEV;
 
 	/* A bunch of special cases, then the generic case...
@@ -957,9 +956,6 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
 		else if (private)
 			return private(dev, iwr, cmd, info, handler);
 	}
-	/* Old driver API : call driver ioctl handler */
-	if (dev->netdev_ops->ndo_do_ioctl)
-		return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
 	return -EOPNOTSUPP;
 }
 
@@ -977,7 +973,7 @@ static int wext_permission_check(unsigned int cmd)
 }
 
 /* entry point from dev ioctl */
-static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr,
+static int wext_ioctl_dispatch(struct net *net, struct iwreq *iwr,
 			       unsigned int cmd, struct iw_request_info *info,
 			       wext_ioctl_func standard,
 			       wext_ioctl_func private)
@@ -987,9 +983,9 @@ static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr,
 	if (ret)
 		return ret;
 
-	dev_load(net, ifr->ifr_name);
+	dev_load(net, iwr->ifr_name);
 	rtnl_lock();
-	ret = wireless_process_ioctl(net, ifr, cmd, info, standard, private);
+	ret = wireless_process_ioctl(net, iwr, cmd, info, standard, private);
 	rtnl_unlock();
 
 	return ret;
@@ -1039,18 +1035,18 @@ static int ioctl_standard_call(struct net_device *	dev,
 }
 
 
-int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
 		      void __user *arg)
 {
 	struct iw_request_info info = { .cmd = cmd, .flags = 0 };
 	int ret;
 
-	ret = wext_ioctl_dispatch(net, ifr, cmd, &info,
+	ret = wext_ioctl_dispatch(net, iwr, cmd, &info,
 				  ioctl_standard_call,
 				  ioctl_private_call);
 	if (ret >= 0 &&
 	    IW_IS_GET(cmd) &&
-	    copy_to_user(arg, ifr, sizeof(struct iwreq)))
+	    copy_to_user(arg, iwr, sizeof(struct iwreq)))
 		return -EFAULT;
 
 	return ret;
@@ -1107,7 +1103,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
 	info.cmd = cmd;
 	info.flags = IW_REQUEST_FLAG_COMPAT;
 
-	ret = wext_ioctl_dispatch(net, (struct ifreq *) &iwr, cmd, &info,
+	ret = wext_ioctl_dispatch(net, &iwr, cmd, &info,
 				  compat_standard_call,
 				  compat_private_call);
 
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 8b911c29860e..5a1a98df3499 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1791,32 +1791,40 @@ void x25_kill_by_neigh(struct x25_neigh *nb)
 
 static int __init x25_init(void)
 {
-	int rc = proto_register(&x25_proto, 0);
+	int rc;
 
-	if (rc != 0)
+	rc = proto_register(&x25_proto, 0);
+	if (rc)
 		goto out;
 
 	rc = sock_register(&x25_family_ops);
-	if (rc != 0)
+	if (rc)
 		goto out_proto;
 
 	dev_add_pack(&x25_packet_type);
 
 	rc = register_netdevice_notifier(&x25_dev_notifier);
-	if (rc != 0)
+	if (rc)
 		goto out_sock;
 
-	pr_info("Linux Version 0.2\n");
+	rc = x25_register_sysctl();
+	if (rc)
+		goto out_dev;
 
-	x25_register_sysctl();
 	rc = x25_proc_init();
-	if (rc != 0)
-		goto out_dev;
+	if (rc)
+		goto out_sysctl;
+
+	pr_info("Linux Version 0.2\n");
+
 out:
 	return rc;
+out_sysctl:
+	x25_unregister_sysctl();
 out_dev:
 	unregister_netdevice_notifier(&x25_dev_notifier);
 out_sock:
+	dev_remove_pack(&x25_packet_type);
 	sock_unregister(AF_X25);
 out_proto:
 	proto_unregister(&x25_proto);
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index a06dfe143c67..ba078c85f0a1 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -73,9 +73,12 @@ static struct ctl_table x25_table[] = {
 	{ },
 };
 
-void __init x25_register_sysctl(void)
+int __init x25_register_sysctl(void)
 {
 	x25_table_header = register_net_sysctl(&init_net, "net/x25", x25_table);
+	if (!x25_table_header)
+		return -ENOMEM;
+	return 0;
 }
 
 void x25_unregister_sysctl(void)
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index abf81b329dc1..55b2ac300995 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -4,8 +4,7 @@
 
 obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
 		      xfrm_input.o xfrm_output.o \
-		      xfrm_sysctl.o xfrm_replay.o
-obj-$(CONFIG_XFRM_OFFLOAD) += xfrm_device.o
+		      xfrm_sysctl.o xfrm_replay.o xfrm_device.o
 obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
 obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 8ec8a3fcf8d4..5aba03685d7d 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -22,6 +22,7 @@
 #include <net/xfrm.h>
 #include <linux/notifier.h>
 
+#ifdef CONFIG_XFRM_OFFLOAD
 int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
 {
 	int err;
@@ -137,6 +138,7 @@ ok:
 	return true;
 }
 EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);
+#endif
 
 int xfrm_dev_register(struct net_device *dev)
 {
@@ -170,7 +172,7 @@ static int xfrm_dev_feat_change(struct net_device *dev)
 
 static int xfrm_dev_down(struct net_device *dev)
 {
-	if (dev->hw_features & NETIF_F_HW_ESP)
+	if (dev->features & NETIF_F_HW_ESP)
 		xfrm_dev_state_flush(dev_net(dev), dev, true);
 
 	xfrm_garbage_collect(dev_net(dev));
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b00a1d5a7f52..643a18f72032 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1006,10 +1006,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
 		err = -ESRCH;
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
-
-	if (cnt)
-		xfrm_garbage_collect(net);
-
 	return err;
 }
 EXPORT_SYMBOL(xfrm_policy_flush);
@@ -1797,43 +1793,6 @@ free_dst:
 	goto out;
 }
 
-#ifdef CONFIG_XFRM_SUB_POLICY
-static int xfrm_dst_alloc_copy(void **target, const void *src, int size)
-{
-	if (!*target) {
-		*target = kmalloc(size, GFP_ATOMIC);
-		if (!*target)
-			return -ENOMEM;
-	}
-
-	memcpy(*target, src, size);
-	return 0;
-}
-#endif
-
-static int xfrm_dst_update_parent(struct dst_entry *dst,
-				  const struct xfrm_selector *sel)
-{
-#ifdef CONFIG_XFRM_SUB_POLICY
-	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
-	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
-				   sel, sizeof(*sel));
-#else
-	return 0;
-#endif
-}
-
-static int xfrm_dst_update_origin(struct dst_entry *dst,
-				  const struct flowi *fl)
-{
-#ifdef CONFIG_XFRM_SUB_POLICY
-	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
-	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
-#else
-	return 0;
-#endif
-}
-
 static int xfrm_expand_policies(const struct flowi *fl, u16 family,
 				struct xfrm_policy **pols,
 				int *num_pols, int *num_xfrms)
@@ -1905,16 +1864,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 
 	xdst = (struct xfrm_dst *)dst;
 	xdst->num_xfrms = err;
-	if (num_pols > 1)
-		err = xfrm_dst_update_parent(dst, &pols[1]->selector);
-	else
-		err = xfrm_dst_update_origin(dst, fl);
-	if (unlikely(err)) {
-		dst_free(dst);
-		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
-		return ERR_PTR(err);
-	}
-
 	xdst->num_pols = num_pols;
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 	xdst->policy_genid = atomic_read(&pols[0]->genid);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index fc3c5aa38754..2e291bc5f1fc 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1383,6 +1383,8 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig)
 	x->curlft.add_time = orig->curlft.add_time;
 	x->km.state = orig->km.state;
 	x->km.seq = orig->km.seq;
+	x->replay = orig->replay;
+	x->preplay = orig->preplay;
 
 	return x;
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 38614df33ec8..86116e9aaf3d 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2027,6 +2027,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			return 0;
 		return err;
 	}
+	xfrm_garbage_collect(net);
 
 	c.data.type = type;
 	c.event = nlh->nlmsg_type;
diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c
index b08ab4e88929..9d751e209f31 100644
--- a/samples/bpf/cookie_uid_helper_example.c
+++ b/samples/bpf/cookie_uid_helper_example.c
@@ -306,7 +306,9 @@ int main(int argc, char *argv[])
 	prog_attach_iptables(argv[2]);
 	if (cfg_test_traffic) {
 		if (signal(SIGINT, finish) == SIG_ERR)
-			error(1, errno, "register handler failed");
+			error(1, errno, "register SIGINT handler failed");
+		if (signal(SIGTERM, finish) == SIG_ERR)
+			error(1, errno, "register SIGTERM handler failed");
 		while (!test_finish) {
 			print_table();
 			printf("\n");
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
index 9cce2a66bd66..512f87a5fd20 100644
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
@@ -100,6 +100,7 @@ int main(int argc, char **argv)
 	setrlimit(RLIMIT_MEMLOCK, &r);
 
 	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
 
 	if (load_kallsyms()) {
 		printf("failed to process /proc/kallsyms\n");
diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c
index be59d7dcbdde..4ed690b907ff 100644
--- a/samples/bpf/sampleip_user.c
+++ b/samples/bpf/sampleip_user.c
@@ -180,6 +180,7 @@ int main(int argc, char **argv)
 		return 1;
 	}
 	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
 
 	/* do sampling */
 	printf("Sampling at %d Hertz for %d seconds. Ctrl-C also ends.\n",
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index 0c5561d193a4..fa4336423da5 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -192,6 +192,7 @@ int main(int argc, char **argv)
 	setrlimit(RLIMIT_MEMLOCK, &r);
 
 	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
 
 	if (load_kallsyms()) {
 		printf("failed to process /proc/kallsyms\n");
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index 7fee0f1ba9a3..7321a3f253c9 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -127,6 +127,7 @@ int main(int ac, char **argv)
 	}
 
 	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
 
 	/* start 'ping' in the background to have some kfree_skb events */
 	f = popen("ping -c5 localhost", "r");
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index 378850c70eb8..2431c0321b71 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -62,13 +62,14 @@ static void usage(const char *prog)
 	fprintf(stderr,
 		"usage: %s [OPTS] IFINDEX\n\n"
 		"OPTS:\n"
-		"    -S    use skb-mode\n",
+		"    -S    use skb-mode\n"
+		"    -N    enforce native mode\n",
 		prog);
 }
 
 int main(int argc, char **argv)
 {
-	const char *optstr = "S";
+	const char *optstr = "SN";
 	char filename[256];
 	int opt;
 
@@ -77,6 +78,9 @@ int main(int argc, char **argv)
 		case 'S':
 			xdp_flags |= XDP_FLAGS_SKB_MODE;
 			break;
+		case 'N':
+			xdp_flags |= XDP_FLAGS_DRV_MODE;
+			break;
 		default:
 			usage(basename(argv[0]));
 			return 1;
@@ -102,6 +106,7 @@ int main(int argc, char **argv)
 	}
 
 	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
 
 	if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
 		printf("link set xdp fd failed\n");
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index 92b8bde9337c..715cd12eaca5 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -79,6 +79,8 @@ static void usage(const char *cmd)
 	printf("    -m <dest-MAC> Used in sending the IP Tunneled pkt\n");
 	printf("    -T <stop-after-X-seconds> Default: 0 (forever)\n");
 	printf("    -P <IP-Protocol> Default is TCP\n");
+	printf("    -S use skb-mode\n");
+	printf("    -N enforce native mode\n");
 	printf("    -h Display this help\n");
 }
 
@@ -138,7 +140,7 @@ int main(int argc, char **argv)
 {
 	unsigned char opt_flags[256] = {};
 	unsigned int kill_after_s = 0;
-	const char *optstr = "i:a:p:s:d:m:T:P:Sh";
+	const char *optstr = "i:a:p:s:d:m:T:P:SNh";
 	int min_port = 0, max_port = 0;
 	struct iptnl_info tnl = {};
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
@@ -206,6 +208,9 @@ int main(int argc, char **argv)
 		case 'S':
 			xdp_flags |= XDP_FLAGS_SKB_MODE;
 			break;
+		case 'N':
+			xdp_flags |= XDP_FLAGS_DRV_MODE;
+			break;
 		default:
 			usage(argv[0]);
 			return 1;
@@ -239,6 +244,7 @@ int main(int argc, char **argv)
 	}
 
 	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
 
 	while (min_port <= max_port) {
 		vip.dport = htons(min_port++);
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index afe3fd3af1e4..61f87a99bf0a 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -116,12 +116,12 @@ CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS))
 # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
 
 cc-option = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+	$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
 
 # cc-option-yn
 # Usage: flag := $(call cc-option-yn,-march=winchip-c6)
 cc-option-yn = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
+	$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
 
 # cc-option-align
 # Prefix align with either -falign or -malign
@@ -131,7 +131,7 @@ cc-option-align = $(subst -functions=0,,\
 # cc-disable-warning
 # Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
 cc-disable-warning = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+	$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
 
 # cc-name
 # Expands to either gcc or clang
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index d883116ebaa4..733e044fff8b 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -177,6 +177,14 @@ cmd_cc_symtypes_c =                                                         \
 $(obj)/%.symtypes : $(src)/%.c FORCE
 	$(call cmd,cc_symtypes_c)
 
+# LLVM assembly
+# Generate .ll files from .c
+quiet_cmd_cc_ll_c = CC $(quiet_modtag)  $@
+      cmd_cc_ll_c = $(CC) $(c_flags) -emit-llvm -S -o $@ $<
+
+$(obj)/%.ll: $(src)/%.c FORCE
+	$(call if_changed_dep,cc_ll_c)
+
 # C (.c) files
 # The C file is compiled and updated dependency information is generated.
 # (See cmd_cc_o_c + relevant part of rule_cc_o_c)
@@ -272,14 +280,14 @@ define rule_cc_o_c
 	$(call echo-cmd,checksrc) $(cmd_checksrc)			  \
 	$(call cmd_and_fixdep,cc_o_c)					  \
 	$(cmd_modversions_c)						  \
-	$(cmd_objtool)						          \
+	$(call echo-cmd,objtool) $(cmd_objtool)				  \
 	$(call echo-cmd,record_mcount) $(cmd_record_mcount)
 endef
 
 define rule_as_o_S
 	$(call cmd_and_fixdep,as_o_S)					  \
 	$(cmd_modversions_S)						  \
-	$(cmd_objtool)
+	$(call echo-cmd,objtool) $(cmd_objtool)
 endef
 
 # List module undefined symbols (or empty line if not enabled)
diff --git a/scripts/Makefile.dtbinst b/scripts/Makefile.dtbinst
index a1be75d0a5fd..34614a48b717 100644
--- a/scripts/Makefile.dtbinst
+++ b/scripts/Makefile.dtbinst
@@ -20,12 +20,6 @@ include include/config/auto.conf
 include scripts/Kbuild.include
 include $(src)/Makefile
 
-PHONY += __dtbs_install_prep
-__dtbs_install_prep:
-ifeq ("$(dtbinst-root)", "$(obj)")
-	$(Q)mkdir -p $(INSTALL_DTBS_PATH)
-endif
-
 dtbinst-files	:= $(dtb-y)
 dtbinst-dirs	:= $(dts-dirs)
 
@@ -35,8 +29,6 @@ quiet_cmd_dtb_install =	INSTALL $<
 
 install-dir = $(patsubst $(dtbinst-root)%,$(INSTALL_DTBS_PATH)%,$(obj))
 
-$(dtbinst-files) $(dtbinst-dirs): | __dtbs_install_prep
-
 $(dtbinst-files): %.dtb: $(obj)/%.dtb
 	$(call cmd,dtb_install,$(install-dir))
 
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index 7c321a603b07..fb3522fd8702 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -64,7 +64,6 @@ ifeq ($(cc-name),clang)
 KBUILD_CFLAGS += $(call cc-disable-warning, initializer-overrides)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-value)
 KBUILD_CFLAGS += $(call cc-disable-warning, format)
-KBUILD_CFLAGS += $(call cc-disable-warning, unknown-warning-option)
 KBUILD_CFLAGS += $(call cc-disable-warning, sign-compare)
 KBUILD_CFLAGS += $(call cc-disable-warning, format-zero-length)
 KBUILD_CFLAGS += $(call cc-disable-warning, uninitialized)
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index 1106d6ca3a38..c583a1e1bd3c 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -1,33 +1,64 @@
 # ==========================================================================
 # Installing headers
 #
-# header-y  - list files to be installed. They are preprocessed
-#             to remove __KERNEL__ section of the file
-# genhdr-y  - Same as header-y but in a generated/ directory
+# All headers under include/uapi, include/generated/uapi,
+# arch/<arch>/include/uapi and arch/<arch>/include/generated/uapi are
+# exported.
+# They are preprocessed to remove __KERNEL__ section of the file.
 #
 # ==========================================================================
 
+PHONY := __headers
+__headers:
+
+include scripts/Kbuild.include
+
+srcdir        := $(srctree)/$(obj)
+
+# When make is run under a fakechroot environment, the function
+# $(wildcard $(srcdir)/*/.) doesn't only return directories, but also regular
+# files. So, we are using a combination of sort/dir/wildcard which works
+# with fakechroot.
+subdirs       := $(patsubst $(srcdir)/%/,%,\
+		 $(filter-out $(srcdir)/,\
+		 $(sort $(dir $(wildcard $(srcdir)/*/)))))
+
+# caller may set destination dir (when installing to asm/)
+_dst          := $(if $(dst),$(dst),$(obj))
+
+# Recursion
+__headers: $(subdirs)
+
+.PHONY: $(subdirs)
+$(subdirs):
+	$(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(_dst)/$@
+
+# Skip header install/check for include/uapi and arch/$(hdr-arch)/include/uapi.
+# We have only sub-directories there.
+skip-inst := $(if $(filter %/uapi,$(obj)),1)
+
+ifeq ($(skip-inst),)
+
 # generated header directory
 gen := $(if $(gen),$(gen),$(subst include/,include/generated/,$(obj)))
 
+# Kbuild file is optional
 kbuild-file := $(srctree)/$(obj)/Kbuild
-include $(kbuild-file)
-
-# called may set destination dir (when installing to asm/)
-_dst := $(if $(destination-y),$(destination-y),$(if $(dst),$(dst),$(obj)))
+-include $(kbuild-file)
 
 old-kbuild-file := $(srctree)/$(subst uapi/,,$(obj))/Kbuild
 ifneq ($(wildcard $(old-kbuild-file)),)
 include $(old-kbuild-file)
 endif
 
-include scripts/Kbuild.include
-
 installdir    := $(INSTALL_HDR_PATH)/$(subst uapi/,,$(_dst))
 
-header-y      := $(sort $(header-y))
-subdirs       := $(patsubst %/,%,$(filter %/, $(header-y)))
-header-y      := $(filter-out %/, $(header-y))
+gendir        := $(objtree)/$(gen)
+header-files  := $(notdir $(wildcard $(srcdir)/*.h))
+header-files  += $(notdir $(wildcard $(srcdir)/*.agh))
+header-files  := $(filter-out $(no-export-headers), $(header-files))
+genhdr-files  := $(notdir $(wildcard $(gendir)/*.h))
+genhdr-files  := $(filter-out $(header-files), $(genhdr-files))
 
 # files used to track state of install/check
 install-file  := $(installdir)/.install
@@ -35,36 +66,20 @@ check-file    := $(installdir)/.check
 
 # generic-y list all files an architecture uses from asm-generic
 # Use this to build a list of headers which require a wrapper
-wrapper-files := $(filter $(header-y), $(generic-y))
-
-srcdir        := $(srctree)/$(obj)
-gendir        := $(objtree)/$(gen)
-
-oldsrcdir     := $(srctree)/$(subst /uapi,,$(obj))
+generic-files := $(notdir $(wildcard $(srctree)/include/uapi/asm-generic/*.h))
+wrapper-files := $(filter $(generic-files), $(generic-y))
+wrapper-files := $(filter-out $(header-files), $(wrapper-files))
 
 # all headers files for this dir
-header-y      := $(filter-out $(generic-y), $(header-y))
-all-files     := $(header-y) $(genhdr-y) $(wrapper-files)
+all-files     := $(header-files) $(genhdr-files) $(wrapper-files)
 output-files  := $(addprefix $(installdir)/, $(all-files))
 
-input-files1  := $(foreach hdr, $(header-y), \
-		   $(if $(wildcard $(srcdir)/$(hdr)), \
-			$(wildcard $(srcdir)/$(hdr))) \
-		   )
-input-files1-name := $(notdir $(input-files1))
-input-files2  := $(foreach hdr, $(header-y), \
-		   $(if  $(wildcard $(srcdir)/$(hdr)),, \
-			$(if $(wildcard $(oldsrcdir)/$(hdr)), \
-				$(wildcard $(oldsrcdir)/$(hdr)), \
-				$(error Missing UAPI file $(srcdir)/$(hdr))) \
-		   ))
-input-files2-name := $(notdir $(input-files2))
-input-files3  := $(foreach hdr, $(genhdr-y), \
-		   $(if	$(wildcard $(gendir)/$(hdr)), \
-			$(wildcard $(gendir)/$(hdr)), \
-			$(error Missing generated UAPI file $(gendir)/$(hdr)) \
-		   ))
-input-files3-name := $(notdir $(input-files3))
+ifneq ($(mandatory-y),)
+missing       := $(filter-out $(all-files),$(mandatory-y))
+ifneq ($(missing),)
+$(error Some mandatory headers ($(missing)) are missing in $(obj))
+endif
+endif
 
 # Work out what needs to be removed
 oldheaders    := $(patsubst $(installdir)/%,%,$(wildcard $(installdir)/*.h))
@@ -78,9 +93,8 @@ printdir = $(patsubst $(INSTALL_HDR_PATH)/%/,%,$(dir $@))
 quiet_cmd_install = INSTALL $(printdir) ($(words $(all-files))\
                             file$(if $(word 2, $(all-files)),s))
       cmd_install = \
-        $(CONFIG_SHELL) $< $(installdir) $(srcdir) $(input-files1-name); \
-        $(CONFIG_SHELL) $< $(installdir) $(oldsrcdir) $(input-files2-name); \
-        $(CONFIG_SHELL) $< $(installdir) $(gendir) $(input-files3-name); \
+        $(CONFIG_SHELL) $< $(installdir) $(srcdir) $(header-files); \
+        $(CONFIG_SHELL) $< $(installdir) $(gendir) $(genhdr-files); \
         for F in $(wrapper-files); do                                   \
                 echo "\#include <asm-generic/$$F>" > $(installdir)/$$F;    \
         done;                                                           \
@@ -98,21 +112,21 @@ quiet_cmd_check = CHECK   $(printdir) ($(words $(all-files)) files)
                   $(PERL) $< $(INSTALL_HDR_PATH)/include $(SRCARCH); \
 	          touch $@
 
-PHONY += __headersinst __headerscheck
-
 ifndef HDRCHECK
 # Rules for installing headers
-__headersinst: $(subdirs) $(install-file)
+__headers: $(install-file)
 	@:
 
 targets += $(install-file)
-$(install-file): scripts/headers_install.sh $(input-files1) $(input-files2) $(input-files3) FORCE
+$(install-file): scripts/headers_install.sh \
+		 $(addprefix $(srcdir)/,$(header-files)) \
+		 $(addprefix $(gendir)/,$(genhdr-files)) FORCE
 	$(if $(unwanted),$(call cmd,remove),)
 	$(if $(wildcard $(dir $@)),,$(shell mkdir -p $(dir $@)))
 	$(call if_changed,install)
 
 else
-__headerscheck: $(subdirs) $(check-file)
+__headers: $(check-file)
 	@:
 
 targets += $(check-file)
@@ -121,11 +135,6 @@ $(check-file): scripts/headers_check.pl $(output-files) FORCE
 
 endif
 
-# Recursion
-.PHONY: $(subdirs)
-$(subdirs):
-	$(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(_dst)/$@
-
 targets := $(wildcard $(sort $(targets)))
 cmd_files := $(wildcard \
              $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd))
@@ -134,6 +143,8 @@ ifneq ($(cmd_files),)
 	include $(cmd_files)
 endif
 
+endif # skip-inst
+
 .PHONY: $(PHONY)
 PHONY += FORCE
 FORCE: ;
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index c9f975ab9840..58c05e5d9870 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -175,7 +175,7 @@ ld_flags       = $(LDFLAGS) $(ldflags-y)
 
 dtc_cpp_flags  = -Wp,-MD,$(depfile).pre.tmp -nostdinc                    \
 		 -I$(srctree)/arch/$(SRCARCH)/boot/dts                   \
-		 -I$(srctree)/arch/$(SRCARCH)/boot/dts/include           \
+		 -I$(srctree)/scripts/dtc/include-prefixes               \
 		 -I$(srctree)/drivers/of/testcase-data                   \
 		 -undef -D__DTS__
 
@@ -420,3 +420,34 @@ quiet_cmd_xzmisc = XZMISC  $@
 cmd_xzmisc = (cat $(filter-out FORCE,$^) | \
 	xz --check=crc32 --lzma2=dict=1MiB) > $@ || \
 	(rm -f $@ ; false)
+
+# ASM offsets
+# ---------------------------------------------------------------------------
+
+# Default sed regexp - multiline due to syntax constraints
+#
+# Use [:space:] because LLVM's integrated assembler inserts <tab> around
+# the .ascii directive whereas GCC keeps the <space> as-is.
+define sed-offsets
+	's:^[[:space:]]*\.ascii[[:space:]]*"\(.*\)".*:\1:; \
+	/^->/{s:->#\(.*\):/* \1 */:; \
+	s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
+	s:->::; p;}'
+endef
+
+# Use filechk to avoid rebuilds when a header changes, but the resulting file
+# does not
+define filechk_offsets
+	(set -e; \
+	 echo "#ifndef $2"; \
+	 echo "#define $2"; \
+	 echo "/*"; \
+	 echo " * DO NOT MODIFY."; \
+	 echo " *"; \
+	 echo " * This file was generated by Kbuild"; \
+	 echo " */"; \
+	 echo ""; \
+	 sed -ne $(sed-offsets); \
+	 echo ""; \
+	 echo "#endif" )
+endef
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 4b9569fa931b..c7e4d73fe1ce 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -5533,23 +5533,6 @@ sub process {
 			}
 		}
 
-# Check for expedited grace periods that interrupt non-idle non-nohz
-# online CPUs.  These expedited can therefore degrade real-time response
-# if used carelessly, and should be avoided where not absolutely
-# needed.  It is always OK to use synchronize_rcu_expedited() and
-# synchronize_sched_expedited() at boot time (before real-time applications
-# start) and in error situations where real-time response is compromised in
-# any case.  Note that synchronize_srcu_expedited() does -not- interrupt
-# other CPUs, so don't warn on uses of synchronize_srcu_expedited().
-# Of course, nothing comes for free, and srcu_read_lock() and
-# srcu_read_unlock() do contain full memory barriers in payment for
-# synchronize_srcu_expedited() non-interruption properties.
-		if ($line =~ /\b(synchronize_rcu_expedited|synchronize_sched_expedited)\(/) {
-			WARN("EXPEDITED_RCU_GRACE_PERIOD",
-			     "expedited RCU grace periods should be avoided where they can degrade real-time response\n" . $herecurr);
-
-		}
-
 # check of hardware specific defines
 		if ($line =~ m@^.\s*\#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) {
 			CHK("ARCH_DEFINES",
diff --git a/scripts/dtc/checks.c b/scripts/dtc/checks.c
index 5adfc8f52b4f..4b72b530c84f 100644
--- a/scripts/dtc/checks.c
+++ b/scripts/dtc/checks.c
@@ -873,7 +873,7 @@ static void check_simple_bus_reg(struct check *c, struct dt_info *dti, struct no
 	while (size--)
 		reg = (reg << 32) | fdt32_to_cpu(*(cells++));
 
-	snprintf(unit_addr, sizeof(unit_addr), "%lx", reg);
+	snprintf(unit_addr, sizeof(unit_addr), "%zx", reg);
 	if (!streq(unitname, unit_addr))
 		FAIL(c, dti, "Node %s simple-bus unit address format error, expected \"%s\"",
 		     node->fullpath, unit_addr);
diff --git a/scripts/dtc/include-prefixes/arc b/scripts/dtc/include-prefixes/arc
new file mode 120000
index 000000000000..5d21b5a69a11
--- /dev/null
+++ b/scripts/dtc/include-prefixes/arc
@@ -0,0 +1 @@
+../../../arch/arc/boot/dts
+\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/arm b/scripts/dtc/include-prefixes/arm
new file mode 120000
index 000000000000..eb14d4515a57
--- /dev/null
+++ b/scripts/dtc/include-prefixes/arm
@@ -0,0 +1 @@
+../../../arch/arm/boot/dts
+\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/arm64 b/scripts/dtc/include-prefixes/arm64
new file mode 120000
index 000000000000..275c42c21d71
--- /dev/null
+++ b/scripts/dtc/include-prefixes/arm64
@@ -0,0 +1 @@
+../../../arch/arm64/boot/dts
+\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/c6x b/scripts/dtc/include-prefixes/c6x
new file mode 120000
index 000000000000..49ded4cae2be
--- /dev/null
+++ b/scripts/dtc/include-prefixes/c6x
@@ -0,0 +1 @@
+../../../arch/c6x/boot/dts
+\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/cris b/scripts/dtc/include-prefixes/cris
new file mode 120000
index 000000000000..736d998ba506
--- /dev/null
+++ b/scripts/dtc/include-prefixes/cris
@@ -0,0 +1 @@
+../../../arch/cris/boot/dts
+\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/dt-bindings b/scripts/dtc/include-prefixes/dt-bindings
new file mode 120000
index 000000000000..04fdbb3af016
--- /dev/null
+++ b/scripts/dtc/include-prefixes/dt-bindings
@@ -0,0 +1 @@
+../../../include/dt-bindings
+\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/h8300 b/scripts/dtc/include-prefixes/h8300
new file mode 120000
index 000000000000..3bdaa332c54c
--- /dev/null
+++ b/scripts/dtc/include-prefixes/h8300
@@ -0,0 +1 @@
+../../../arch/h8300/boot/dts
+\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/metag b/scripts/dtc/include-prefixes/metag
new file mode 120000
index 000000000000..87a3c847db8f
--- /dev/null
+++ b/scripts/dtc/include-prefixes/metag
@@ -0,0 +1 @@
+../../../arch/metag/boot/dts
+\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/microblaze b/scripts/dtc/include-prefixes/microblaze
new file mode 120000
index 000000000000..d9830330a21d
--- /dev/null
+++ b/scripts/dtc/include-prefixes/microblaze
@@ -0,0 +1 @@
+../../../arch/microblaze/boot/dts
+\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/mips b/scripts/dtc/include-prefixes/mips
new file mode 120000
index 000000000000..ae8d4948dc8d
--- /dev/null
+++ b/scripts/dtc/include-prefixes/mips
@@ -0,0 +1 @@
+../../../arch/mips/boot/dts
+\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/nios2 b/scripts/dtc/include-prefixes/nios2
new file mode 120000
index 000000000000..51772336d13f
--- /dev/null
+++ b/scripts/dtc/include-prefixes/nios2
@@ -0,0 +1 @@
+../../../arch/nios2/boot/dts
+\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/openrisc b/scripts/dtc/include-prefixes/openrisc
new file mode 120000
index 000000000000..71c3bc75c560
--- /dev/null
+++ b/scripts/dtc/include-prefixes/openrisc
@@ -0,0 +1 @@
+../../../arch/openrisc/boot/dts
+\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/powerpc b/scripts/dtc/include-prefixes/powerpc
new file mode 120000
index 000000000000..7cd6ec16e899
--- /dev/null
+++ b/scripts/dtc/include-prefixes/powerpc
@@ -0,0 +1 @@
+../../../arch/powerpc/boot/dts
+\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/sh b/scripts/dtc/include-prefixes/sh
new file mode 120000
index 000000000000..67d37808c599
--- /dev/null
+++ b/scripts/dtc/include-prefixes/sh
@@ -0,0 +1 @@
+../../../arch/sh/boot/dts
+\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/xtensa b/scripts/dtc/include-prefixes/xtensa
new file mode 120000
index 000000000000..d1eaf6ec7a2b
--- /dev/null
+++ b/scripts/dtc/include-prefixes/xtensa
@@ -0,0 +1 @@
+../../../arch/xtensa/boot/dts
+\ No newline at end of file
diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py
index f9b92ece7834..5afd1098e33a 100644
--- a/scripts/gdb/linux/dmesg.py
+++ b/scripts/gdb/linux/dmesg.py
@@ -23,10 +23,11 @@ class LxDmesg(gdb.Command):
         super(LxDmesg, self).__init__("lx-dmesg", gdb.COMMAND_DATA)
 
     def invoke(self, arg, from_tty):
-        log_buf_addr = int(str(gdb.parse_and_eval("log_buf")).split()[0], 16)
-        log_first_idx = int(gdb.parse_and_eval("log_first_idx"))
-        log_next_idx = int(gdb.parse_and_eval("log_next_idx"))
-        log_buf_len = int(gdb.parse_and_eval("log_buf_len"))
+        log_buf_addr = int(str(gdb.parse_and_eval(
+            "'printk.c'::log_buf")).split()[0], 16)
+        log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx"))
+        log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx"))
+        log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len"))
 
         inf = gdb.inferiors()[0]
         start = log_buf_addr + log_first_idx
diff --git a/scripts/genksyms/genksyms.h b/scripts/genksyms/genksyms.h
index 3bffdcaaa274..b724a0290c75 100644
--- a/scripts/genksyms/genksyms.h
+++ b/scripts/genksyms/genksyms.h
@@ -75,7 +75,7 @@ struct string_list *copy_list_range(struct string_list *start,
 int yylex(void);
 int yyparse(void);
 
-void error_with_pos(const char *, ...);
+void error_with_pos(const char *, ...) __attribute__ ((format(printf, 1, 2)));
 
 /*----------------------------------------------------------------------*/
 #define xmalloc(size) ({ void *__ptr = malloc(size);		\
diff --git a/scripts/genksyms/parse.tab.c_shipped b/scripts/genksyms/parse.tab.c_shipped
index 69148d30ca3f..d02258bafe7b 100644
--- a/scripts/genksyms/parse.tab.c_shipped
+++ b/scripts/genksyms/parse.tab.c_shipped
@@ -440,16 +440,16 @@ union yyalloc
 /* YYFINAL -- State number of the termination state.  */
 #define YYFINAL  4
 /* YYLAST -- Last index in YYTABLE.  */
-#define YYLAST   524
+#define YYLAST   522
 
 /* YYNTOKENS -- Number of terminals.  */
 #define YYNTOKENS  55
 /* YYNNTS -- Number of nonterminals.  */
 #define YYNNTS  49
 /* YYNRULES -- Number of rules.  */
-#define YYNRULES  134
+#define YYNRULES  133
 /* YYNRULES -- Number of states.  */
-#define YYNSTATES  189
+#define YYNSTATES  187
 
 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX.  */
 #define YYUNDEFTOK  2
@@ -506,13 +506,13 @@ static const yytype_uint16 yyprhs[] =
       97,   101,   105,   109,   112,   115,   118,   120,   122,   124,
      126,   128,   130,   132,   134,   136,   138,   140,   142,   145,
      146,   148,   150,   153,   155,   157,   159,   161,   164,   166,
-     168,   170,   175,   180,   183,   187,   191,   194,   196,   198,
-     200,   205,   210,   213,   217,   221,   224,   226,   230,   231,
-     233,   235,   239,   242,   245,   247,   248,   250,   252,   257,
-     262,   265,   269,   273,   277,   278,   280,   283,   287,   291,
-     292,   294,   296,   299,   303,   306,   307,   309,   311,   315,
-     318,   321,   323,   326,   327,   330,   334,   339,   341,   345,
-     347,   351,   354,   355,   357
+     168,   170,   175,   180,   183,   187,   190,   192,   194,   196,
+     201,   206,   209,   213,   217,   220,   222,   226,   227,   229,
+     231,   235,   238,   241,   243,   244,   246,   248,   253,   258,
+     261,   265,   269,   273,   274,   276,   279,   283,   287,   288,
+     290,   292,   295,   299,   302,   303,   305,   307,   311,   314,
+     317,   319,   322,   323,   326,   330,   335,   337,   341,   343,
+     347,   350,   351,   353
 };
 
 /* YYRHS -- A `-1'-separated list of the rules' RHS.  */
@@ -536,25 +536,24 @@ static const yytype_int8 yyrhs[] =
       74,    75,    -1,     8,    -1,    27,    -1,    32,    -1,    18,
       -1,    72,    76,    -1,    77,    -1,    39,    -1,    43,    -1,
       77,    49,    80,    50,    -1,    77,    49,     1,    50,    -1,
-      77,    35,    -1,    49,    76,    50,    -1,    49,     1,    50,
-      -1,    72,    78,    -1,    79,    -1,    39,    -1,    43,    -1,
-      79,    49,    80,    50,    -1,    79,    49,     1,    50,    -1,
-      79,    35,    -1,    49,    78,    50,    -1,    49,     1,    50,
-      -1,    81,    38,    -1,    81,    -1,    82,    48,    38,    -1,
-      -1,    82,    -1,    83,    -1,    82,    48,    83,    -1,    67,
-      84,    -1,    72,    84,    -1,    85,    -1,    -1,    39,    -1,
-      43,    -1,    85,    49,    80,    50,    -1,    85,    49,     1,
-      50,    -1,    85,    35,    -1,    49,    84,    50,    -1,    49,
-       1,    50,    -1,    66,    76,    34,    -1,    -1,    88,    -1,
-      52,    36,    -1,    53,    90,    47,    -1,    53,     1,    47,
-      -1,    -1,    91,    -1,    92,    -1,    91,    92,    -1,    66,
-      93,    46,    -1,     1,    46,    -1,    -1,    94,    -1,    95,
-      -1,    94,    48,    95,    -1,    78,    97,    -1,    39,    96,
-      -1,    96,    -1,    54,    36,    -1,    -1,    97,    32,    -1,
-      53,    99,    47,    -1,    53,    99,    48,    47,    -1,   100,
-      -1,    99,    48,   100,    -1,    39,    -1,    39,    52,    36,
-      -1,    31,    46,    -1,    -1,    31,    -1,    30,    49,    39,
-      50,    46,    -1
+      77,    35,    -1,    49,    76,    50,    -1,    72,    78,    -1,
+      79,    -1,    39,    -1,    43,    -1,    79,    49,    80,    50,
+      -1,    79,    49,     1,    50,    -1,    79,    35,    -1,    49,
+      78,    50,    -1,    49,     1,    50,    -1,    81,    38,    -1,
+      81,    -1,    82,    48,    38,    -1,    -1,    82,    -1,    83,
+      -1,    82,    48,    83,    -1,    67,    84,    -1,    72,    84,
+      -1,    85,    -1,    -1,    39,    -1,    43,    -1,    85,    49,
+      80,    50,    -1,    85,    49,     1,    50,    -1,    85,    35,
+      -1,    49,    84,    50,    -1,    49,     1,    50,    -1,    66,
+      76,    34,    -1,    -1,    88,    -1,    52,    36,    -1,    53,
+      90,    47,    -1,    53,     1,    47,    -1,    -1,    91,    -1,
+      92,    -1,    91,    92,    -1,    66,    93,    46,    -1,     1,
+      46,    -1,    -1,    94,    -1,    95,    -1,    94,    48,    95,
+      -1,    78,    97,    -1,    39,    96,    -1,    96,    -1,    54,
+      36,    -1,    -1,    97,    32,    -1,    53,    99,    47,    -1,
+      53,    99,    48,    47,    -1,   100,    -1,    99,    48,   100,
+      -1,    39,    -1,    39,    52,    36,    -1,    31,    46,    -1,
+      -1,    31,    -1,    30,    49,    39,    50,    46,    -1
 };
 
 /* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
@@ -567,13 +566,13 @@ static const yytype_uint16 yyrline[] =
      238,   240,   242,   247,   250,   251,   255,   256,   257,   258,
      259,   260,   261,   262,   263,   264,   265,   266,   270,   275,
      276,   280,   281,   285,   285,   285,   286,   294,   295,   299,
-     308,   317,   319,   321,   323,   325,   332,   333,   337,   338,
-     339,   341,   343,   345,   347,   352,   353,   354,   358,   359,
-     363,   364,   369,   374,   376,   380,   381,   389,   393,   395,
-     397,   399,   401,   406,   415,   416,   421,   426,   427,   431,
-     432,   436,   437,   441,   443,   448,   449,   453,   454,   458,
-     459,   460,   464,   468,   469,   473,   474,   478,   479,   482,
-     487,   495,   499,   500,   504
+     308,   317,   319,   321,   323,   330,   331,   335,   336,   337,
+     339,   341,   343,   345,   350,   351,   352,   356,   357,   361,
+     362,   367,   372,   374,   378,   379,   387,   391,   393,   395,
+     397,   399,   404,   413,   414,   419,   424,   425,   429,   430,
+     434,   435,   439,   441,   446,   447,   451,   452,   456,   457,
+     458,   462,   466,   467,   471,   472,   476,   477,   480,   485,
+     493,   497,   498,   502
 };
 #endif
 
@@ -636,13 +635,13 @@ static const yytype_uint8 yyr1[] =
       70,    70,    70,    70,    70,    70,    71,    71,    71,    71,
       71,    71,    71,    71,    71,    71,    71,    71,    72,    73,
       73,    74,    74,    75,    75,    75,    75,    76,    76,    77,
-      77,    77,    77,    77,    77,    77,    78,    78,    79,    79,
-      79,    79,    79,    79,    79,    80,    80,    80,    81,    81,
-      82,    82,    83,    84,    84,    85,    85,    85,    85,    85,
-      85,    85,    85,    86,    87,    87,    88,    89,    89,    90,
-      90,    91,    91,    92,    92,    93,    93,    94,    94,    95,
-      95,    95,    96,    97,    97,    98,    98,    99,    99,   100,
-     100,   101,   102,   102,   103
+      77,    77,    77,    77,    77,    78,    78,    79,    79,    79,
+      79,    79,    79,    79,    80,    80,    80,    81,    81,    82,
+      82,    83,    84,    84,    85,    85,    85,    85,    85,    85,
+      85,    85,    86,    87,    87,    88,    89,    89,    90,    90,
+      91,    91,    92,    92,    93,    93,    94,    94,    95,    95,
+      95,    96,    97,    97,    98,    98,    99,    99,   100,   100,
+     101,   102,   102,   103
 };
 
 /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN.  */
@@ -655,13 +654,13 @@ static const yytype_uint8 yyr2[] =
        3,     3,     3,     2,     2,     2,     1,     1,     1,     1,
        1,     1,     1,     1,     1,     1,     1,     1,     2,     0,
        1,     1,     2,     1,     1,     1,     1,     2,     1,     1,
-       1,     4,     4,     2,     3,     3,     2,     1,     1,     1,
-       4,     4,     2,     3,     3,     2,     1,     3,     0,     1,
-       1,     3,     2,     2,     1,     0,     1,     1,     4,     4,
-       2,     3,     3,     3,     0,     1,     2,     3,     3,     0,
-       1,     1,     2,     3,     2,     0,     1,     1,     3,     2,
-       2,     1,     2,     0,     2,     3,     4,     1,     3,     1,
-       3,     2,     0,     1,     5
+       1,     4,     4,     2,     3,     2,     1,     1,     1,     4,
+       4,     2,     3,     3,     2,     1,     3,     0,     1,     1,
+       3,     2,     2,     1,     0,     1,     1,     4,     4,     2,
+       3,     3,     3,     0,     1,     2,     3,     3,     0,     1,
+       1,     2,     3,     2,     0,     1,     1,     3,     2,     2,
+       1,     2,     0,     2,     3,     4,     1,     3,     1,     3,
+       2,     0,     1,     5
 };
 
 /* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM.
@@ -675,189 +674,189 @@ static const yytype_uint8 yydefact[] =
        0,    56,     0,     0,    65,    36,    57,     5,    10,    17,
       23,    24,    26,    27,    33,    34,    11,    12,    13,    14,
       15,    39,     0,    43,     6,    37,     0,    44,    22,    38,
-      45,     0,     0,   131,    69,    70,     0,    59,     0,    18,
-      19,     0,   132,    68,    25,    42,   129,     0,   127,    22,
-      40,     0,   115,     0,     0,   111,     9,    17,    41,    95,
-       0,     0,     0,     0,    58,    60,    61,    16,     0,    67,
-     133,   103,   123,    73,     0,     0,   125,     0,     7,   114,
-     108,    78,    79,     0,     0,     0,   123,    77,     0,   116,
-     117,   121,   107,     0,   112,   132,    96,    57,     0,    95,
-      92,    94,    35,     0,    75,    74,    62,    20,   104,     0,
-       0,    86,    89,    90,   130,   126,   128,   120,     0,    78,
-       0,   122,    76,   119,    82,     0,   113,     0,     0,    97,
-       0,    93,   100,     0,   134,   124,     0,    21,   105,    72,
-      71,    85,     0,    84,    83,     0,     0,   118,   102,   101,
-       0,     0,   106,    87,    91,    81,    80,    99,    98
+      45,     0,     0,   130,    69,    70,     0,    59,     0,    18,
+      19,     0,   131,    68,    25,    42,   128,     0,   126,    22,
+      40,     0,   114,     0,     0,   110,     9,    17,    41,    94,
+       0,     0,     0,    58,    60,    61,    16,     0,    67,   132,
+     102,   122,    73,     0,     0,   124,     0,     7,   113,   107,
+      77,    78,     0,     0,     0,   122,    76,     0,   115,   116,
+     120,   106,     0,   111,   131,    95,    57,     0,    94,    91,
+      93,    35,     0,    74,    62,    20,   103,     0,     0,    85,
+      88,    89,   129,   125,   127,   119,     0,    77,     0,   121,
+      75,   118,    81,     0,   112,     0,     0,    96,     0,    92,
+      99,     0,   133,   123,     0,    21,   104,    72,    71,    84,
+       0,    83,    82,     0,     0,   117,   101,   100,     0,     0,
+     105,    86,    90,    80,    79,    98,    97
 };
 
 /* YYDEFGOTO[NTERM-NUM].  */
 static const yytype_int16 yydefgoto[] =
 {
       -1,     1,     2,     3,    37,    79,    58,    38,    68,    69,
-      70,    82,    40,    41,    42,    43,    44,    71,    94,    95,
-      45,   125,    73,   116,   117,   140,   141,   142,   143,   130,
-     131,    46,   167,   168,    57,    83,    84,    85,   118,   119,
-     120,   121,   138,    53,    77,    78,    47,   102,    48
+      70,    82,    40,    41,    42,    43,    44,    71,    93,    94,
+      45,   124,    73,   115,   116,   138,   139,   140,   141,   129,
+     130,    46,   165,   166,    57,    83,    84,    85,   117,   118,
+     119,   120,   136,    53,    77,    78,    47,   101,    48
 };
 
 /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
    STATE-NUM.  */
-#define YYPACT_NINF -111
+#define YYPACT_NINF -94
 static const yytype_int16 yypact[] =
 {
-    -111,    13,  -111,   210,  -111,  -111,    28,  -111,  -111,  -111,
-    -111,  -111,   -27,  -111,    44,  -111,  -111,  -111,  -111,  -111,
-    -111,  -111,  -111,  -111,   -24,  -111,   -20,  -111,  -111,  -111,
-      31,  -111,    32,    42,  -111,  -111,  -111,  -111,  -111,    34,
-     481,  -111,  -111,  -111,  -111,  -111,  -111,  -111,  -111,  -111,
-    -111,    51,    56,  -111,  -111,    52,   108,  -111,   481,    52,
-    -111,   481,    58,  -111,  -111,  -111,    19,     0,    54,    55,
-    -111,    34,    30,   -18,  -111,  -111,    68,   -25,  -111,   481,
-    -111,    45,    33,    59,   159,  -111,  -111,    34,  -111,   395,
-      57,    60,    81,    88,  -111,     0,  -111,  -111,    34,  -111,
-    -111,  -111,  -111,  -111,   257,    72,  -111,   -23,  -111,  -111,
-    -111,    85,  -111,    20,   106,    47,  -111,   -10,    97,    96,
-    -111,  -111,  -111,    99,  -111,   115,  -111,  -111,     5,    50,
-    -111,    11,  -111,   102,  -111,  -111,  -111,  -111,   -22,   100,
-     103,   111,   104,  -111,  -111,  -111,  -111,  -111,   113,  -111,
-     121,  -111,  -111,   124,  -111,   303,  -111,    33,   132,  -111,
-     139,  -111,  -111,   349,  -111,  -111,   122,  -111,  -111,  -111,
-    -111,  -111,   442,  -111,  -111,   140,   143,  -111,  -111,  -111,
-     144,   145,  -111,  -111,  -111,  -111,  -111,  -111,  -111
+     -94,    15,   -94,   208,   -94,   -94,    34,   -94,   -94,   -94,
+     -94,   -94,   -27,   -94,    -5,   -94,   -94,   -94,   -94,   -94,
+     -94,   -94,   -94,   -94,   -25,   -94,   -16,   -94,   -94,   -94,
+      -4,   -94,    19,   -24,   -94,   -94,   -94,   -94,   -94,    24,
+     479,   -94,   -94,   -94,   -94,   -94,   -94,   -94,   -94,   -94,
+     -94,    29,    48,   -94,   -94,    37,   106,   -94,   479,    37,
+     -94,   479,    54,   -94,   -94,   -94,    24,    -2,    49,    53,
+     -94,    24,   -14,   -11,   -94,   -94,    47,    38,   -94,   479,
+     -94,    51,    23,    55,   157,   -94,   -94,    24,   -94,   393,
+      56,    58,    68,   -94,    -2,   -94,   -94,    24,   -94,   -94,
+     -94,   -94,   -94,   255,    67,   -94,     5,   -94,   -94,   -94,
+      50,   -94,     7,    69,    40,   -94,    -8,    83,    88,   -94,
+     -94,   -94,    91,   -94,   109,   -94,   -94,     4,    45,   -94,
+      16,   -94,    95,   -94,   -94,   -94,   -23,    92,    93,   108,
+      96,   -94,   -94,   -94,   -94,   -94,    97,   -94,    98,   -94,
+     -94,   118,   -94,   301,   -94,    23,   101,   -94,   104,   -94,
+     -94,   347,   -94,   -94,   120,   -94,   -94,   -94,   -94,   -94,
+     440,   -94,   -94,   111,   119,   -94,   -94,   -94,   130,   137,
+     -94,   -94,   -94,   -94,   -94,   -94,   -94
 };
 
 /* YYPGOTO[NTERM-NUM].  */
 static const yytype_int16 yypgoto[] =
 {
-    -111,  -111,   160,  -111,  -111,  -111,  -111,   -51,  -111,  -111,
-      98,    -1,   -61,   -37,  -111,  -111,  -111,   -78,  -111,  -111,
-     -53,   -30,  -111,   -66,  -111,  -110,  -111,  -111,   -60,   -63,
-    -111,  -111,  -111,  -111,   -21,  -111,  -111,   116,  -111,  -111,
-      40,    90,    83,   152,  -111,   105,  -111,  -111,  -111
+     -94,   -94,   158,   -94,   -94,   -94,   -94,   -45,   -94,   -94,
+      94,    -1,   -61,   -29,   -94,   -94,   -94,   -79,   -94,   -94,
+     -63,    -7,   -94,   -93,   -94,   -92,   -94,   -94,   -60,   -57,
+     -94,   -94,   -94,   -94,   -19,   -94,   -94,   110,   -94,   -94,
+      33,    82,    78,   144,   -94,    99,   -94,   -94,   -94
 };
 
 /* YYTABLE[YYPACT[STATE-NUM]].  What to do in state STATE-NUM.  If
    positive, shift that token.  If negative, reduce the rule which
    number is the opposite.  If YYTABLE_NINF, syntax error.  */
-#define YYTABLE_NINF -111
+#define YYTABLE_NINF -110
 static const yytype_int16 yytable[] =
 {
-      89,    90,    39,    74,   115,    60,   158,    86,    10,    72,
-     165,   129,    51,     4,    96,    55,    76,   103,    20,    59,
-      92,   148,   106,   107,   145,   154,    52,    29,   108,    56,
-     166,   104,    34,    56,    80,   115,    93,   115,    88,   155,
-     -95,    99,   136,    89,   126,   176,   162,   150,   159,   152,
-     129,   129,    74,   181,   128,   -95,    67,    87,    64,   149,
-     163,   100,    65,   112,   101,   160,   161,    54,    66,   113,
-      67,    67,   111,    64,    49,    50,   112,    65,    87,   115,
-      61,    62,   113,    66,    67,    67,   149,   114,    63,   126,
-     112,   109,   110,   159,    89,    76,   113,    91,    67,   128,
-      97,    67,    89,    98,    52,    56,   122,   132,   144,    81,
-     133,    89,   184,     7,     8,     9,    10,    11,    12,    13,
-     105,    15,    16,    17,    18,    19,    20,    21,    22,    23,
-      24,   134,    26,    27,    28,    29,    30,    31,   135,   114,
-      34,    35,   151,   156,   157,   109,   100,   -22,   164,   171,
-     169,    36,   172,   170,   -22,  -109,   165,   -22,   182,   -22,
-     123,     5,   -22,   173,     7,     8,     9,    10,    11,    12,
-      13,   174,    15,    16,    17,    18,    19,    20,    21,    22,
-      23,    24,   178,    26,    27,    28,    29,    30,    31,   179,
-     185,    34,    35,   186,   187,   188,   137,   177,   -22,   153,
-     124,   147,    36,    75,     0,   -22,  -110,     0,   -22,     0,
-     -22,     6,   146,   -22,     0,     7,     8,     9,    10,    11,
-      12,    13,    14,    15,    16,    17,    18,    19,    20,    21,
-      22,    23,    24,    25,    26,    27,    28,    29,    30,    31,
-      32,    33,    34,    35,     0,     0,     0,     0,     0,   -22,
-       0,     0,     0,    36,     0,     0,   -22,     0,   139,   -22,
-       0,   -22,     7,     8,     9,    10,    11,    12,    13,     0,
+      89,    90,    39,   114,    95,   156,    10,    60,   146,   163,
+     128,    74,    51,    86,    55,     4,    20,    99,    54,   148,
+     100,   150,    63,    59,   102,    29,    52,   152,    56,   164,
+      34,   134,    72,   114,   107,   114,    80,    56,   103,   -94,
+      88,   153,    89,   125,    76,    61,   147,   157,   128,   128,
+     111,   160,   143,   127,   -94,    67,   112,    87,    67,    92,
+      74,   174,   110,    64,    98,   161,   111,    65,    62,   179,
+     158,   159,   112,    66,    67,    67,   114,   113,    87,   147,
+      49,    50,    52,   111,   125,   105,   106,    76,   157,   112,
+      56,    67,    89,    91,   127,    96,    67,   108,   109,   104,
+      89,    97,   121,   142,   113,   149,   131,    81,   132,    89,
+     182,     7,     8,     9,    10,    11,    12,    13,   133,    15,
+      16,    17,    18,    19,    20,    21,    22,    23,    24,   154,
+      26,    27,    28,    29,    30,    31,   155,   108,    34,    35,
+      99,   162,   167,   168,   170,   -22,   169,   171,   172,    36,
+     163,   176,   -22,  -108,   177,   -22,   180,   -22,   122,     5,
+     -22,   183,     7,     8,     9,    10,    11,    12,    13,   184,
+      15,    16,    17,    18,    19,    20,    21,    22,    23,    24,
+     185,    26,    27,    28,    29,    30,    31,   186,   175,    34,
+      35,   135,   145,   151,   123,    75,   -22,     0,     0,     0,
+      36,     0,     0,   -22,  -109,   144,   -22,     0,   -22,     6,
+       0,   -22,     0,     7,     8,     9,    10,    11,    12,    13,
+      14,    15,    16,    17,    18,    19,    20,    21,    22,    23,
+      24,    25,    26,    27,    28,    29,    30,    31,    32,    33,
+      34,    35,     0,     0,     0,     0,     0,   -22,     0,     0,
+       0,    36,     0,     0,   -22,     0,   137,   -22,     0,   -22,
+       7,     8,     9,    10,    11,    12,    13,     0,    15,    16,
+      17,    18,    19,    20,    21,    22,    23,    24,     0,    26,
+      27,    28,    29,    30,    31,     0,     0,    34,    35,     0,
+       0,     0,     0,   -87,     0,     0,     0,     0,    36,     0,
+       0,     0,   173,     0,     0,   -87,     7,     8,     9,    10,
+      11,    12,    13,     0,    15,    16,    17,    18,    19,    20,
+      21,    22,    23,    24,     0,    26,    27,    28,    29,    30,
+      31,     0,     0,    34,    35,     0,     0,     0,     0,   -87,
+       0,     0,     0,     0,    36,     0,     0,     0,   178,     0,
+       0,   -87,     7,     8,     9,    10,    11,    12,    13,     0,
       15,    16,    17,    18,    19,    20,    21,    22,    23,    24,
        0,    26,    27,    28,    29,    30,    31,     0,     0,    34,
-      35,     0,     0,     0,     0,   -88,     0,     0,     0,     0,
-      36,     0,     0,     0,   175,     0,     0,   -88,     7,     8,
+      35,     0,     0,     0,     0,   -87,     0,     0,     0,     0,
+      36,     0,     0,     0,     0,     0,     0,   -87,     7,     8,
        9,    10,    11,    12,    13,     0,    15,    16,    17,    18,
       19,    20,    21,    22,    23,    24,     0,    26,    27,    28,
       29,    30,    31,     0,     0,    34,    35,     0,     0,     0,
-       0,   -88,     0,     0,     0,     0,    36,     0,     0,     0,
-     180,     0,     0,   -88,     7,     8,     9,    10,    11,    12,
+       0,     0,   125,     0,     0,     0,   126,     0,     0,     0,
+       0,     0,   127,     0,    67,     7,     8,     9,    10,    11,
+      12,    13,     0,    15,    16,    17,    18,    19,    20,    21,
+      22,    23,    24,     0,    26,    27,    28,    29,    30,    31,
+       0,     0,    34,    35,     0,     0,     0,     0,   181,     0,
+       0,     0,     0,    36,     7,     8,     9,    10,    11,    12,
       13,     0,    15,    16,    17,    18,    19,    20,    21,    22,
       23,    24,     0,    26,    27,    28,    29,    30,    31,     0,
-       0,    34,    35,     0,     0,     0,     0,   -88,     0,     0,
-       0,     0,    36,     0,     0,     0,     0,     0,     0,   -88,
-       7,     8,     9,    10,    11,    12,    13,     0,    15,    16,
-      17,    18,    19,    20,    21,    22,    23,    24,     0,    26,
-      27,    28,    29,    30,    31,     0,     0,    34,    35,     0,
-       0,     0,     0,     0,   126,     0,     0,     0,   127,     0,
-       0,     0,     0,     0,   128,     0,    67,     7,     8,     9,
-      10,    11,    12,    13,     0,    15,    16,    17,    18,    19,
-      20,    21,    22,    23,    24,     0,    26,    27,    28,    29,
-      30,    31,     0,     0,    34,    35,     0,     0,     0,     0,
-     183,     0,     0,     0,     0,    36,     7,     8,     9,    10,
-      11,    12,    13,     0,    15,    16,    17,    18,    19,    20,
-      21,    22,    23,    24,     0,    26,    27,    28,    29,    30,
-      31,     0,     0,    34,    35,     0,     0,     0,     0,     0,
-       0,     0,     0,     0,    36
+       0,    34,    35,     0,     0,     0,     0,     0,     0,     0,
+       0,     0,    36
 };
 
 #define yypact_value_is_default(Yystate) \
-  (!!((Yystate) == (-111)))
+  (!!((Yystate) == (-94)))
 
 #define yytable_value_is_error(Yytable_value) \
   YYID (0)
 
 static const yytype_int16 yycheck[] =
 {
-      61,    61,     3,    40,    82,    26,     1,    58,     8,    39,
-      32,    89,    39,     0,    67,    39,    39,    35,    18,    39,
-       1,     1,    47,    48,    47,    35,    53,    27,    79,    53,
-      52,    49,    32,    53,    55,   113,    66,   115,    59,    49,
-      35,    71,    95,   104,    39,   155,    35,   113,    43,   115,
-     128,   129,    89,   163,    49,    50,    51,    58,    39,    39,
-      49,    31,    43,    43,    34,   128,   129,    23,    49,    49,
-      51,    51,    39,    39,    46,    47,    43,    43,    79,   157,
-      49,    49,    49,    49,    51,    51,    39,    54,    46,    39,
-      43,    46,    47,    43,   155,    39,    49,    39,    51,    49,
-      46,    51,   163,    48,    53,    53,    47,    50,    36,     1,
-      50,   172,   172,     5,     6,     7,     8,     9,    10,    11,
-      52,    13,    14,    15,    16,    17,    18,    19,    20,    21,
-      22,    50,    24,    25,    26,    27,    28,    29,    50,    54,
-      32,    33,    36,    46,    48,    46,    31,    39,    46,    38,
-      50,    43,    48,    50,    46,    47,    32,    49,    36,    51,
-       1,     1,    54,    50,     5,     6,     7,     8,     9,    10,
-      11,    50,    13,    14,    15,    16,    17,    18,    19,    20,
-      21,    22,    50,    24,    25,    26,    27,    28,    29,    50,
-      50,    32,    33,    50,    50,    50,    98,   157,    39,   116,
-      84,   111,    43,    51,    -1,    46,    47,    -1,    49,    -1,
-      51,     1,   107,    54,    -1,     5,     6,     7,     8,     9,
-      10,    11,    12,    13,    14,    15,    16,    17,    18,    19,
-      20,    21,    22,    23,    24,    25,    26,    27,    28,    29,
-      30,    31,    32,    33,    -1,    -1,    -1,    -1,    -1,    39,
-      -1,    -1,    -1,    43,    -1,    -1,    46,    -1,     1,    49,
-      -1,    51,     5,     6,     7,     8,     9,    10,    11,    -1,
+      61,    61,     3,    82,    67,     1,     8,    26,     1,    32,
+      89,    40,    39,    58,    39,     0,    18,    31,    23,   112,
+      34,   114,    46,    39,    35,    27,    53,    35,    53,    52,
+      32,    94,    39,   112,    79,   114,    55,    53,    49,    35,
+      59,    49,   103,    39,    39,    49,    39,    43,   127,   128,
+      43,    35,    47,    49,    50,    51,    49,    58,    51,    66,
+      89,   153,    39,    39,    71,    49,    43,    43,    49,   161,
+     127,   128,    49,    49,    51,    51,   155,    54,    79,    39,
+      46,    47,    53,    43,    39,    47,    48,    39,    43,    49,
+      53,    51,   153,    39,    49,    46,    51,    46,    47,    52,
+     161,    48,    47,    36,    54,    36,    50,     1,    50,   170,
+     170,     5,     6,     7,     8,     9,    10,    11,    50,    13,
+      14,    15,    16,    17,    18,    19,    20,    21,    22,    46,
+      24,    25,    26,    27,    28,    29,    48,    46,    32,    33,
+      31,    46,    50,    50,    48,    39,    38,    50,    50,    43,
+      32,    50,    46,    47,    50,    49,    36,    51,     1,     1,
+      54,    50,     5,     6,     7,     8,     9,    10,    11,    50,
+      13,    14,    15,    16,    17,    18,    19,    20,    21,    22,
+      50,    24,    25,    26,    27,    28,    29,    50,   155,    32,
+      33,    97,   110,   115,    84,    51,    39,    -1,    -1,    -1,
+      43,    -1,    -1,    46,    47,   106,    49,    -1,    51,     1,
+      -1,    54,    -1,     5,     6,     7,     8,     9,    10,    11,
+      12,    13,    14,    15,    16,    17,    18,    19,    20,    21,
+      22,    23,    24,    25,    26,    27,    28,    29,    30,    31,
+      32,    33,    -1,    -1,    -1,    -1,    -1,    39,    -1,    -1,
+      -1,    43,    -1,    -1,    46,    -1,     1,    49,    -1,    51,
+       5,     6,     7,     8,     9,    10,    11,    -1,    13,    14,
+      15,    16,    17,    18,    19,    20,    21,    22,    -1,    24,
+      25,    26,    27,    28,    29,    -1,    -1,    32,    33,    -1,
+      -1,    -1,    -1,    38,    -1,    -1,    -1,    -1,    43,    -1,
+      -1,    -1,     1,    -1,    -1,    50,     5,     6,     7,     8,
+       9,    10,    11,    -1,    13,    14,    15,    16,    17,    18,
+      19,    20,    21,    22,    -1,    24,    25,    26,    27,    28,
+      29,    -1,    -1,    32,    33,    -1,    -1,    -1,    -1,    38,
+      -1,    -1,    -1,    -1,    43,    -1,    -1,    -1,     1,    -1,
+      -1,    50,     5,     6,     7,     8,     9,    10,    11,    -1,
       13,    14,    15,    16,    17,    18,    19,    20,    21,    22,
       -1,    24,    25,    26,    27,    28,    29,    -1,    -1,    32,
       33,    -1,    -1,    -1,    -1,    38,    -1,    -1,    -1,    -1,
-      43,    -1,    -1,    -1,     1,    -1,    -1,    50,     5,     6,
+      43,    -1,    -1,    -1,    -1,    -1,    -1,    50,     5,     6,
        7,     8,     9,    10,    11,    -1,    13,    14,    15,    16,
       17,    18,    19,    20,    21,    22,    -1,    24,    25,    26,
       27,    28,    29,    -1,    -1,    32,    33,    -1,    -1,    -1,
-      -1,    38,    -1,    -1,    -1,    -1,    43,    -1,    -1,    -1,
-       1,    -1,    -1,    50,     5,     6,     7,     8,     9,    10,
+      -1,    -1,    39,    -1,    -1,    -1,    43,    -1,    -1,    -1,
+      -1,    -1,    49,    -1,    51,     5,     6,     7,     8,     9,
+      10,    11,    -1,    13,    14,    15,    16,    17,    18,    19,
+      20,    21,    22,    -1,    24,    25,    26,    27,    28,    29,
+      -1,    -1,    32,    33,    -1,    -1,    -1,    -1,    38,    -1,
+      -1,    -1,    -1,    43,     5,     6,     7,     8,     9,    10,
       11,    -1,    13,    14,    15,    16,    17,    18,    19,    20,
       21,    22,    -1,    24,    25,    26,    27,    28,    29,    -1,
-      -1,    32,    33,    -1,    -1,    -1,    -1,    38,    -1,    -1,
-      -1,    -1,    43,    -1,    -1,    -1,    -1,    -1,    -1,    50,
-       5,     6,     7,     8,     9,    10,    11,    -1,    13,    14,
-      15,    16,    17,    18,    19,    20,    21,    22,    -1,    24,
-      25,    26,    27,    28,    29,    -1,    -1,    32,    33,    -1,
-      -1,    -1,    -1,    -1,    39,    -1,    -1,    -1,    43,    -1,
-      -1,    -1,    -1,    -1,    49,    -1,    51,     5,     6,     7,
-       8,     9,    10,    11,    -1,    13,    14,    15,    16,    17,
-      18,    19,    20,    21,    22,    -1,    24,    25,    26,    27,
-      28,    29,    -1,    -1,    32,    33,    -1,    -1,    -1,    -1,
-      38,    -1,    -1,    -1,    -1,    43,     5,     6,     7,     8,
-       9,    10,    11,    -1,    13,    14,    15,    16,    17,    18,
-      19,    20,    21,    22,    -1,    24,    25,    26,    27,    28,
-      29,    -1,    -1,    32,    33,    -1,    -1,    -1,    -1,    -1,
-      -1,    -1,    -1,    -1,    43
+      -1,    32,    33,    -1,    -1,    -1,    -1,    -1,    -1,    -1,
+      -1,    -1,    43
 };
 
 /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
@@ -873,16 +872,16 @@ static const yytype_uint8 yystos[] =
       89,    49,    49,    46,    39,    43,    49,    51,    63,    64,
       65,    72,    76,    77,    68,    98,    39,    99,   100,    60,
       89,     1,    66,    90,    91,    92,    62,    66,    89,    67,
-      83,    39,     1,    76,    73,    74,    75,    46,    48,    76,
-      31,    34,   102,    35,    49,    52,    47,    48,    62,    46,
-      47,    39,    43,    49,    54,    72,    78,    79,    93,    94,
-      95,    96,    47,     1,    92,    76,    39,    43,    49,    72,
-      84,    85,    50,    50,    50,    50,    75,    65,    97,     1,
-      80,    81,    82,    83,    36,    47,   100,    96,     1,    39,
-      78,    36,    78,    97,    35,    49,    46,    48,     1,    43,
-      84,    84,    35,    49,    46,    32,    52,    87,    88,    50,
-      50,    38,    48,    50,    50,     1,    80,    95,    50,    50,
-       1,    80,    36,    38,    83,    50,    50,    50,    50
+      83,    39,    76,    73,    74,    75,    46,    48,    76,    31,
+      34,   102,    35,    49,    52,    47,    48,    62,    46,    47,
+      39,    43,    49,    54,    72,    78,    79,    93,    94,    95,
+      96,    47,     1,    92,    76,    39,    43,    49,    72,    84,
+      85,    50,    50,    50,    75,    65,    97,     1,    80,    81,
+      82,    83,    36,    47,   100,    96,     1,    39,    78,    36,
+      78,    97,    35,    49,    46,    48,     1,    43,    84,    84,
+      35,    49,    46,    32,    52,    87,    88,    50,    50,    38,
+      48,    50,    50,     1,    80,    95,    50,    50,     1,    80,
+      36,    38,    83,    50,    50,    50,    50
 };
 
 #define yyerrok		(yyerrstatus = 0)
@@ -1928,12 +1927,12 @@ yyreduce:
 
   case 75:
 
-    { (yyval) = (yyvsp[(3) - (3)]); }
+    { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 76:
+  case 79:
 
-    { (yyval) = (yyvsp[(2) - (2)]); }
+    { (yyval) = (yyvsp[(4) - (4)]); }
     break;
 
   case 80:
@@ -1943,12 +1942,12 @@ yyreduce:
 
   case 81:
 
-    { (yyval) = (yyvsp[(4) - (4)]); }
+    { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
   case 82:
 
-    { (yyval) = (yyvsp[(2) - (2)]); }
+    { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
   case 83:
@@ -1958,45 +1957,40 @@ yyreduce:
 
   case 84:
 
-    { (yyval) = (yyvsp[(3) - (3)]); }
-    break;
-
-  case 85:
-
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 87:
+  case 86:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 88:
+  case 87:
 
     { (yyval) = NULL; }
     break;
 
-  case 91:
+  case 90:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 92:
+  case 91:
 
     { (yyval) = (yyvsp[(2) - (2)]) ? (yyvsp[(2) - (2)]) : (yyvsp[(1) - (2)]); }
     break;
 
-  case 93:
+  case 92:
 
     { (yyval) = (yyvsp[(2) - (2)]) ? (yyvsp[(2) - (2)]) : (yyvsp[(1) - (2)]); }
     break;
 
-  case 95:
+  case 94:
 
     { (yyval) = NULL; }
     break;
 
-  case 96:
+  case 95:
 
     { /* For version 2 checksums, we don't want to remember
 		     private parameter names.  */
@@ -2005,39 +1999,39 @@ yyreduce:
 		}
     break;
 
-  case 97:
+  case 96:
 
     { remove_node((yyvsp[(1) - (1)]));
 		  (yyval) = (yyvsp[(1) - (1)]);
 		}
     break;
 
-  case 98:
+  case 97:
 
     { (yyval) = (yyvsp[(4) - (4)]); }
     break;
 
-  case 99:
+  case 98:
 
     { (yyval) = (yyvsp[(4) - (4)]); }
     break;
 
-  case 100:
+  case 99:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 101:
+  case 100:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 102:
+  case 101:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 103:
+  case 102:
 
     { struct string_list *decl = *(yyvsp[(2) - (3)]);
 		  *(yyvsp[(2) - (3)]) = NULL;
@@ -2046,87 +2040,87 @@ yyreduce:
 		}
     break;
 
-  case 104:
+  case 103:
 
     { (yyval) = NULL; }
     break;
 
-  case 106:
+  case 105:
 
     { remove_list((yyvsp[(2) - (2)]), &(*(yyvsp[(1) - (2)]))->next); (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 107:
+  case 106:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 108:
+  case 107:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 109:
+  case 108:
 
     { (yyval) = NULL; }
     break;
 
-  case 112:
+  case 111:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 113:
+  case 112:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 114:
+  case 113:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 115:
+  case 114:
 
     { (yyval) = NULL; }
     break;
 
-  case 118:
+  case 117:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 119:
+  case 118:
 
     { (yyval) = (yyvsp[(2) - (2)]) ? (yyvsp[(2) - (2)]) : (yyvsp[(1) - (2)]); }
     break;
 
-  case 120:
+  case 119:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 122:
+  case 121:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 123:
+  case 122:
 
     { (yyval) = NULL; }
     break;
 
-  case 125:
+  case 124:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 126:
+  case 125:
 
     { (yyval) = (yyvsp[(4) - (4)]); }
     break;
 
-  case 129:
+  case 128:
 
     {
 			const char *name = strdup((*(yyvsp[(1) - (1)]))->string);
@@ -2134,7 +2128,7 @@ yyreduce:
 		}
     break;
 
-  case 130:
+  case 129:
 
     {
 			const char *name = strdup((*(yyvsp[(1) - (3)]))->string);
@@ -2143,17 +2137,17 @@ yyreduce:
 		}
     break;
 
-  case 131:
+  case 130:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 132:
+  case 131:
 
     { (yyval) = NULL; }
     break;
 
-  case 134:
+  case 133:
 
     { export_symbol((*(yyvsp[(3) - (5)]))->string); (yyval) = (yyvsp[(5) - (5)]); }
     break;
diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index 4fba255e54ae..00a6d7e54971 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -322,8 +322,6 @@ direct_declarator:
 		{ $$ = $2; }
 	| '(' declarator ')'
 		{ $$ = $3; }
-	| '(' error ')'
-		{ $$ = $3; }
 	;
 
 /* Nested declarators differ from regular declarators in that they do
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 90a091b6ae4d..eb8144643b78 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -196,7 +196,7 @@ clean-files     += config.pot linux.pot
 
 # Check that we have the required ncurses stuff installed for lxdialog (menuconfig)
 PHONY += $(obj)/dochecklxdialog
-$(addprefix $(obj)/,$(lxdialog)): $(obj)/dochecklxdialog
+$(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/dochecklxdialog
 $(obj)/dochecklxdialog:
 	$(Q)$(CONFIG_SHELL) $(check-lxdialog) -check $(HOSTCC) $(HOST_EXTRACFLAGS) $(HOSTLOADLIBES_mconf)
 
diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c
index a9bc5334a478..003114779815 100644
--- a/scripts/kconfig/nconf.c
+++ b/scripts/kconfig/nconf.c
@@ -271,7 +271,7 @@ static struct mitem k_menu_items[MAX_MENU_ITEMS];
 static int items_num;
 static int global_exit;
 /* the currently selected button */
-const char *current_instructions = menu_instructions;
+static const char *current_instructions = menu_instructions;
 
 static char *dialog_input_result;
 static int dialog_input_result_len;
@@ -305,7 +305,7 @@ struct function_keys {
 };
 
 static const int function_keys_num = 9;
-struct function_keys function_keys[] = {
+static struct function_keys function_keys[] = {
 	{
 		.key_str = "F1",
 		.func = "Help",
@@ -508,7 +508,7 @@ static int get_mext_match(const char *match_str, match_f flag)
 	index = (index + items_num) % items_num;
 	while (true) {
 		char *str = k_menu_items[index].str;
-		if (strcasestr(str, match_str) != 0)
+		if (strcasestr(str, match_str) != NULL)
 			return index;
 		if (flag == FIND_NEXT_MATCH_UP ||
 		    flag == MATCH_TINKER_PATTERN_UP)
@@ -1067,7 +1067,7 @@ static int do_match(int key, struct match_state *state, int *ans)
 
 static void conf(struct menu *menu)
 {
-	struct menu *submenu = 0;
+	struct menu *submenu = NULL;
 	const char *prompt = menu_get_prompt(menu);
 	struct symbol *sym;
 	int res;
@@ -1234,7 +1234,7 @@ static void show_help(struct menu *menu)
 static void conf_choice(struct menu *menu)
 {
 	const char *prompt = _(menu_get_prompt(menu));
-	struct menu *child = 0;
+	struct menu *child = NULL;
 	struct symbol *active;
 	int selected_index = 0;
 	int last_top_row = 0;
@@ -1456,7 +1456,7 @@ static void conf_save(void)
 	}
 }
 
-void setup_windows(void)
+static void setup_windows(void)
 {
 	int lines, columns;
 
diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c
index 4b2f44c20caf..a64b1c31253e 100644
--- a/scripts/kconfig/nconf.gui.c
+++ b/scripts/kconfig/nconf.gui.c
@@ -129,7 +129,7 @@ static void no_colors_theme(void)
 	mkattrn(FUNCTION_TEXT, A_REVERSE);
 }
 
-void set_colors()
+void set_colors(void)
 {
 	start_color();
 	use_default_colors();
@@ -192,7 +192,7 @@ const char *get_line(const char *text, int line_no)
 	int lines = 0;
 
 	if (!text)
-		return 0;
+		return NULL;
 
 	for (i = 0; text[i] != '\0' && lines < line_no; i++)
 		if (text[i] == '\n')
diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index 19d9bcadc0cc..b497d9764dcf 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -7,32 +7,8 @@ modpost-objs	:= modpost.o file2alias.o sumversion.o
 
 devicetable-offsets-file := devicetable-offsets.h
 
-define sed-y
-	"/^->/{s:->#\(.*\):/* \1 */:; \
-	s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \
-	s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
-	s:->::; p;}"
-endef
-
-quiet_cmd_offsets = GEN     $@
-define cmd_offsets
-	(set -e; \
-	 echo "#ifndef __DEVICETABLE_OFFSETS_H__"; \
-	 echo "#define __DEVICETABLE_OFFSETS_H__"; \
-	 echo "/*"; \
-	 echo " * DO NOT MODIFY."; \
-	 echo " *"; \
-	 echo " * This file was generated by Kbuild"; \
-	 echo " *"; \
-	 echo " */"; \
-	 echo ""; \
-	 sed -ne $(sed-y) $<; \
-	 echo ""; \
-	 echo "#endif" ) > $@
-endef
-
-$(obj)/$(devicetable-offsets-file): $(obj)/devicetable-offsets.s
-	$(call if_changed,offsets)
+$(obj)/$(devicetable-offsets-file): $(obj)/devicetable-offsets.s FORCE
+	$(call filechk,offsets,__DEVICETABLE_OFFSETS_H__)
 
 targets += $(devicetable-offsets-file) devicetable-offsets.s
 
diff --git a/scripts/objdiff b/scripts/objdiff
index 62e51dae2138..4fb5d6796893 100755
--- a/scripts/objdiff
+++ b/scripts/objdiff
@@ -57,13 +57,15 @@ get_output_dir() {
 do_objdump() {
 	dir=$(get_output_dir $1)
 	base=${1##*/}
+	stripped=$dir/${base%.o}.stripped
 	dis=$dir/${base%.o}.dis
 
 	[ ! -d "$dir" ] && mkdir -p $dir
 
 	# remove addresses for a cleaner diff
 	# http://dummdida.tumblr.com/post/60924060451/binary-diff-between-libc-from-scientificlinux-and
-	$OBJDUMP -D $1 | sed "s/^[[:space:]]\+[0-9a-f]\+//" > $dis
+	$STRIP -g $1 -R __bug_table -R .note -R .comment -o $stripped
+	$OBJDUMP -D $stripped | sed -e "s/^[[:space:]]\+[0-9a-f]\+//" -e "s:^$stripped:$1:" > $dis
 }
 
 dorecord() {
@@ -73,6 +75,7 @@ dorecord() {
 
 	CMT="`git rev-parse --short HEAD`"
 
+	STRIP="${CROSS_COMPILE}strip"
 	OBJDUMP="${CROSS_COMPILE}objdump"
 
 	for d in $FILES; do
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 676fc10c9514..aad67000e4dd 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -69,7 +69,7 @@ set_debarch() {
 		echo "" >&2
 		echo "** ** **  WARNING  ** ** **" >&2
 		echo "" >&2
-		echo "Your architecture doesn't have it's equivalent" >&2
+		echo "Your architecture doesn't have its equivalent" >&2
 		echo "Debian userspace architecture defined!" >&2
 		echo "Falling back to using your current userspace instead!" >&2
 		echo "Please add support for $UTS_MACHINE to ${0} ..." >&2
@@ -143,12 +143,7 @@ else
 	cp System.map "$tmpdir/boot/System.map-$version"
 	cp $KCONFIG_CONFIG "$tmpdir/boot/config-$version"
 fi
-# Not all arches include the boot path in KBUILD_IMAGE
-if [ -e $KBUILD_IMAGE ]; then
-	cp $KBUILD_IMAGE "$tmpdir/$installed_image_path"
-else
-	cp arch/$ARCH/boot/$KBUILD_IMAGE "$tmpdir/$installed_image_path"
-fi
+cp "$($MAKE -s image_name)" "$tmpdir/$installed_image_path"
 
 if grep -q "^CONFIG_OF=y" $KCONFIG_CONFIG ; then
 	# Only some architectures with OF support have this target
@@ -265,7 +260,7 @@ This is a packacked upstream version of the Linux kernel.
 The sources may be found at most Linux archive sites, including:
 https://www.kernel.org/pub/linux/kernel
 
-Copyright: 1991 - 2015 Linus Torvalds and others.
+Copyright: 1991 - 2017 Linus Torvalds and others.
 
 The git repository for mainline kernel development is at:
 git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
@@ -288,7 +283,6 @@ Section: kernel
 Priority: optional
 Maintainer: $maintainer
 Build-Depends: $build_depends
-Standards-Version: 3.8.4
 Homepage: http://www.kernel.org/
 EOF
 
@@ -296,7 +290,6 @@ if [ "$ARCH" = "um" ]; then
 	cat <<EOF >> debian/control
 
 Package: $packagename
-Provides: linux-image, linux-image-2.6, linux-modules-$version
 Architecture: any
 Description: User Mode Linux kernel, version $version
  User-mode Linux is a port of the Linux kernel to its own system call
@@ -313,7 +306,6 @@ else
 	cat <<EOF >> debian/control
 
 Package: $packagename
-Provides: linux-image, linux-image-2.6, linux-modules-$version
 Suggests: $fwpackagename
 Architecture: any
 Description: Linux kernel, version $version
@@ -346,7 +338,6 @@ rm -f "$objtree/debian/hdrsrcfiles" "$objtree/debian/hdrobjfiles"
 cat <<EOF >> debian/control
 
 Package: $kernel_headers_packagename
-Provides: linux-headers, linux-headers-2.6
 Architecture: any
 Description: Linux kernel headers for $KERNELRELEASE on \${kernel:debarch}
  This package provides kernel header files for $KERNELRELEASE on \${kernel:debarch}
@@ -404,7 +395,6 @@ if [ -n "$BUILD_DEBUG" ] ; then
 
 Package: $dbg_packagename
 Section: debug
-Provides: linux-debug, linux-debug-$version
 Architecture: any
 Description: Linux kernel debugging symbols for $version
  This package will come in handy if you need to debug the kernel. It provides
diff --git a/scripts/tags.sh b/scripts/tags.sh
index d661f2f3ef61..d23dcbf17457 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -106,6 +106,7 @@ all_compiled_sources()
 		case "$i" in
 			*.[cS])
 				j=${i/\.[cS]/\.o}
+				j="${j#$tree}"
 				if [ -e $j ]; then
 					echo $i
 				fi
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index d7f282d75cc1..1d32cd20009a 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -164,7 +164,7 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode,
 	hmac_misc.mode = inode->i_mode;
 	crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc));
 	if (evm_hmac_attrs & EVM_ATTR_FSUUID)
-		crypto_shash_update(desc, inode->i_sb->s_uuid,
+		crypto_shash_update(desc, &inode->i_sb->s_uuid.b[0],
 				    sizeof(inode->i_sb->s_uuid));
 	crypto_shash_final(desc, digest);
 }
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 3ab1067db624..6f885fab9d84 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -61,7 +61,7 @@ struct ima_rule_entry {
 	enum ima_hooks func;
 	int mask;
 	unsigned long fsmagic;
-	u8 fsuuid[16];
+	uuid_t fsuuid;
 	kuid_t uid;
 	kuid_t fowner;
 	bool (*uid_op)(kuid_t, kuid_t);    /* Handlers for operators       */
@@ -244,7 +244,7 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode,
 	    && rule->fsmagic != inode->i_sb->s_magic)
 		return false;
 	if ((rule->flags & IMA_FSUUID) &&
-	    memcmp(rule->fsuuid, inode->i_sb->s_uuid, sizeof(rule->fsuuid)))
+	    !uuid_equal(&rule->fsuuid, &inode->i_sb->s_uuid))
 		return false;
 	if ((rule->flags & IMA_UID) && !rule->uid_op(cred->uid, rule->uid))
 		return false;
@@ -711,14 +711,12 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 		case Opt_fsuuid:
 			ima_log_string(ab, "fsuuid", args[0].from);
 
-			if (memchr_inv(entry->fsuuid, 0x00,
-				       sizeof(entry->fsuuid))) {
+			if (uuid_is_null(&entry->fsuuid)) {
 				result = -EINVAL;
 				break;
 			}
 
-			result = blk_part_pack_uuid(args[0].from,
-						    entry->fsuuid);
+			result = uuid_parse(args[0].from, &entry->fsuuid);
 			if (!result)
 				entry->flags |= IMA_FSUUID;
 			break;
@@ -1087,7 +1085,7 @@ int ima_policy_show(struct seq_file *m, void *v)
 	}
 
 	if (entry->flags & IMA_FSUUID) {
-		seq_printf(m, "fsuuid=%pU", entry->fsuuid);
+		seq_printf(m, "fsuuid=%pU", &entry->fsuuid);
 		seq_puts(m, " ");
 	}
 
diff --git a/security/keys/Kconfig b/security/keys/Kconfig
index 6fd95f76bfae..a7a23b5541f8 100644
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -20,6 +20,10 @@ config KEYS
 
 	  If you are unsure as to whether this is required, answer N.
 
+config KEYS_COMPAT
+	def_bool y
+	depends on COMPAT && KEYS
+
 config PERSISTENT_KEYRINGS
 	bool "Enable register of persistent per-UID keyrings"
 	depends on KEYS
@@ -89,9 +93,9 @@ config ENCRYPTED_KEYS
 config KEY_DH_OPERATIONS
        bool "Diffie-Hellman operations on retained keys"
        depends on KEYS
-       select MPILIB
        select CRYPTO
        select CRYPTO_HASH
+       select CRYPTO_DH
        help
 	 This option provides support for calculating Diffie-Hellman
 	 public keys and shared secrets using values stored as keys
diff --git a/security/keys/dh.c b/security/keys/dh.c
index e603bd912e4c..4755d4b4f945 100644
--- a/security/keys/dh.c
+++ b/security/keys/dh.c
@@ -8,34 +8,17 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/mpi.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/scatterlist.h>
 #include <linux/crypto.h>
 #include <crypto/hash.h>
+#include <crypto/kpp.h>
+#include <crypto/dh.h>
 #include <keys/user-type.h>
 #include "internal.h"
 
-/*
- * Public key or shared secret generation function [RFC2631 sec 2.1.1]
- *
- * ya = g^xa mod p;
- * or
- * ZZ = yb^xa mod p;
- *
- * where xa is the local private key, ya is the local public key, g is
- * the generator, p is the prime, yb is the remote public key, and ZZ
- * is the shared secret.
- *
- * Both are the same calculation, so g or yb are the "base" and ya or
- * ZZ are the "result".
- */
-static int do_dh(MPI result, MPI base, MPI xa, MPI p)
-{
-	return mpi_powm(result, base, xa, p);
-}
-
-static ssize_t mpi_from_key(key_serial_t keyid, size_t maxlen, MPI *mpi)
+static ssize_t dh_data_from_key(key_serial_t keyid, void **data)
 {
 	struct key *key;
 	key_ref_t key_ref;
@@ -56,19 +39,17 @@ static ssize_t mpi_from_key(key_serial_t keyid, size_t maxlen, MPI *mpi)
 		status = key_validate(key);
 		if (status == 0) {
 			const struct user_key_payload *payload;
+			uint8_t *duplicate;
 
 			payload = user_key_payload_locked(key);
 
-			if (maxlen == 0) {
-				*mpi = NULL;
+			duplicate = kmemdup(payload->data, payload->datalen,
+					    GFP_KERNEL);
+			if (duplicate) {
+				*data = duplicate;
 				ret = payload->datalen;
-			} else if (payload->datalen <= maxlen) {
-				*mpi = mpi_read_raw_data(payload->data,
-							 payload->datalen);
-				if (*mpi)
-					ret = payload->datalen;
 			} else {
-				ret = -EINVAL;
+				ret = -ENOMEM;
 			}
 		}
 		up_read(&key->sem);
@@ -79,6 +60,29 @@ error:
 	return ret;
 }
 
+static void dh_free_data(struct dh *dh)
+{
+	kzfree(dh->key);
+	kzfree(dh->p);
+	kzfree(dh->g);
+}
+
+struct dh_completion {
+	struct completion completion;
+	int err;
+};
+
+static void dh_crypto_done(struct crypto_async_request *req, int err)
+{
+	struct dh_completion *compl = req->data;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	compl->err = err;
+	complete(&compl->completion);
+}
+
 struct kdf_sdesc {
 	struct shash_desc shash;
 	char ctx[];
@@ -89,6 +93,7 @@ static int kdf_alloc(struct kdf_sdesc **sdesc_ret, char *hashname)
 	struct crypto_shash *tfm;
 	struct kdf_sdesc *sdesc;
 	int size;
+	int err;
 
 	/* allocate synchronous hash */
 	tfm = crypto_alloc_shash(hashname, 0, 0);
@@ -97,16 +102,25 @@ static int kdf_alloc(struct kdf_sdesc **sdesc_ret, char *hashname)
 		return PTR_ERR(tfm);
 	}
 
+	err = -EINVAL;
+	if (crypto_shash_digestsize(tfm) == 0)
+		goto out_free_tfm;
+
+	err = -ENOMEM;
 	size = sizeof(struct shash_desc) + crypto_shash_descsize(tfm);
 	sdesc = kmalloc(size, GFP_KERNEL);
 	if (!sdesc)
-		return -ENOMEM;
+		goto out_free_tfm;
 	sdesc->shash.tfm = tfm;
 	sdesc->shash.flags = 0x0;
 
 	*sdesc_ret = sdesc;
 
 	return 0;
+
+out_free_tfm:
+	crypto_free_shash(tfm);
+	return err;
 }
 
 static void kdf_dealloc(struct kdf_sdesc *sdesc)
@@ -120,14 +134,6 @@ static void kdf_dealloc(struct kdf_sdesc *sdesc)
 	kzfree(sdesc);
 }
 
-/* convert 32 bit integer into its string representation */
-static inline void crypto_kw_cpu_to_be32(u32 val, u8 *buf)
-{
-	__be32 *a = (__be32 *)buf;
-
-	*a = cpu_to_be32(val);
-}
-
 /*
  * Implementation of the KDF in counter mode according to SP800-108 section 5.1
  * as well as SP800-56A section 5.8.1 (Single-step KDF).
@@ -138,25 +144,39 @@ static inline void crypto_kw_cpu_to_be32(u32 val, u8 *buf)
  * 5.8.1.2).
  */
 static int kdf_ctr(struct kdf_sdesc *sdesc, const u8 *src, unsigned int slen,
-		   u8 *dst, unsigned int dlen)
+		   u8 *dst, unsigned int dlen, unsigned int zlen)
 {
 	struct shash_desc *desc = &sdesc->shash;
 	unsigned int h = crypto_shash_digestsize(desc->tfm);
 	int err = 0;
 	u8 *dst_orig = dst;
-	u32 i = 1;
-	u8 iteration[sizeof(u32)];
+	__be32 counter = cpu_to_be32(1);
 
 	while (dlen) {
 		err = crypto_shash_init(desc);
 		if (err)
 			goto err;
 
-		crypto_kw_cpu_to_be32(i, iteration);
-		err = crypto_shash_update(desc, iteration, sizeof(u32));
+		err = crypto_shash_update(desc, (u8 *)&counter, sizeof(__be32));
 		if (err)
 			goto err;
 
+		if (zlen && h) {
+			u8 tmpbuffer[h];
+			size_t chunk = min_t(size_t, zlen, h);
+			memset(tmpbuffer, 0, chunk);
+
+			do {
+				err = crypto_shash_update(desc, tmpbuffer,
+							  chunk);
+				if (err)
+					goto err;
+
+				zlen -= chunk;
+				chunk = min_t(size_t, zlen, h);
+			} while (zlen);
+		}
+
 		if (src && slen) {
 			err = crypto_shash_update(desc, src, slen);
 			if (err)
@@ -179,7 +199,7 @@ static int kdf_ctr(struct kdf_sdesc *sdesc, const u8 *src, unsigned int slen,
 
 			dlen -= h;
 			dst += h;
-			i++;
+			counter = cpu_to_be32(be32_to_cpu(counter) + 1);
 		}
 	}
 
@@ -192,7 +212,7 @@ err:
 
 static int keyctl_dh_compute_kdf(struct kdf_sdesc *sdesc,
 				 char __user *buffer, size_t buflen,
-				 uint8_t *kbuf, size_t kbuflen)
+				 uint8_t *kbuf, size_t kbuflen, size_t lzero)
 {
 	uint8_t *outbuf = NULL;
 	int ret;
@@ -203,7 +223,7 @@ static int keyctl_dh_compute_kdf(struct kdf_sdesc *sdesc,
 		goto err;
 	}
 
-	ret = kdf_ctr(sdesc, kbuf, kbuflen, outbuf, buflen);
+	ret = kdf_ctr(sdesc, kbuf, kbuflen, outbuf, buflen, lzero);
 	if (ret)
 		goto err;
 
@@ -221,21 +241,26 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params,
 			 struct keyctl_kdf_params *kdfcopy)
 {
 	long ret;
-	MPI base, private, prime, result;
-	unsigned nbytes;
+	ssize_t dlen;
+	int secretlen;
+	int outlen;
 	struct keyctl_dh_params pcopy;
-	uint8_t *kbuf;
-	ssize_t keylen;
-	size_t resultlen;
+	struct dh dh_inputs;
+	struct scatterlist outsg;
+	struct dh_completion compl;
+	struct crypto_kpp *tfm;
+	struct kpp_request *req;
+	uint8_t *secret;
+	uint8_t *outbuf;
 	struct kdf_sdesc *sdesc = NULL;
 
 	if (!params || (!buffer && buflen)) {
 		ret = -EINVAL;
-		goto out;
+		goto out1;
 	}
 	if (copy_from_user(&pcopy, params, sizeof(pcopy)) != 0) {
 		ret = -EFAULT;
-		goto out;
+		goto out1;
 	}
 
 	if (kdfcopy) {
@@ -244,104 +269,147 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params,
 		if (buflen > KEYCTL_KDF_MAX_OUTPUT_LEN ||
 		    kdfcopy->otherinfolen > KEYCTL_KDF_MAX_OI_LEN) {
 			ret = -EMSGSIZE;
-			goto out;
+			goto out1;
 		}
 
 		/* get KDF name string */
 		hashname = strndup_user(kdfcopy->hashname, CRYPTO_MAX_ALG_NAME);
 		if (IS_ERR(hashname)) {
 			ret = PTR_ERR(hashname);
-			goto out;
+			goto out1;
 		}
 
 		/* allocate KDF from the kernel crypto API */
 		ret = kdf_alloc(&sdesc, hashname);
 		kfree(hashname);
 		if (ret)
-			goto out;
+			goto out1;
 	}
 
-	/*
-	 * If the caller requests postprocessing with a KDF, allow an
-	 * arbitrary output buffer size since the KDF ensures proper truncation.
-	 */
-	keylen = mpi_from_key(pcopy.prime, kdfcopy ? SIZE_MAX : buflen, &prime);
-	if (keylen < 0 || !prime) {
-		/* buflen == 0 may be used to query the required buffer size,
-		 * which is the prime key length.
-		 */
-		ret = keylen;
-		goto out;
+	memset(&dh_inputs, 0, sizeof(dh_inputs));
+
+	dlen = dh_data_from_key(pcopy.prime, &dh_inputs.p);
+	if (dlen < 0) {
+		ret = dlen;
+		goto out1;
+	}
+	dh_inputs.p_size = dlen;
+
+	dlen = dh_data_from_key(pcopy.base, &dh_inputs.g);
+	if (dlen < 0) {
+		ret = dlen;
+		goto out2;
 	}
+	dh_inputs.g_size = dlen;
 
-	/* The result is never longer than the prime */
-	resultlen = keylen;
+	dlen = dh_data_from_key(pcopy.private, &dh_inputs.key);
+	if (dlen < 0) {
+		ret = dlen;
+		goto out2;
+	}
+	dh_inputs.key_size = dlen;
 
-	keylen = mpi_from_key(pcopy.base, SIZE_MAX, &base);
-	if (keylen < 0 || !base) {
-		ret = keylen;
-		goto error1;
+	secretlen = crypto_dh_key_len(&dh_inputs);
+	secret = kmalloc(secretlen, GFP_KERNEL);
+	if (!secret) {
+		ret = -ENOMEM;
+		goto out2;
 	}
+	ret = crypto_dh_encode_key(secret, secretlen, &dh_inputs);
+	if (ret)
+		goto out3;
 
-	keylen = mpi_from_key(pcopy.private, SIZE_MAX, &private);
-	if (keylen < 0 || !private) {
-		ret = keylen;
-		goto error2;
+	tfm = crypto_alloc_kpp("dh", CRYPTO_ALG_TYPE_KPP, 0);
+	if (IS_ERR(tfm)) {
+		ret = PTR_ERR(tfm);
+		goto out3;
+	}
+
+	ret = crypto_kpp_set_secret(tfm, secret, secretlen);
+	if (ret)
+		goto out4;
+
+	outlen = crypto_kpp_maxsize(tfm);
+
+	if (!kdfcopy) {
+		/*
+		 * When not using a KDF, buflen 0 is used to read the
+		 * required buffer length
+		 */
+		if (buflen == 0) {
+			ret = outlen;
+			goto out4;
+		} else if (outlen > buflen) {
+			ret = -EOVERFLOW;
+			goto out4;
+		}
 	}
 
-	result = mpi_alloc(0);
-	if (!result) {
+	outbuf = kzalloc(kdfcopy ? (outlen + kdfcopy->otherinfolen) : outlen,
+			 GFP_KERNEL);
+	if (!outbuf) {
 		ret = -ENOMEM;
-		goto error3;
+		goto out4;
 	}
 
-	/* allocate space for DH shared secret and SP800-56A otherinfo */
-	kbuf = kmalloc(kdfcopy ? (resultlen + kdfcopy->otherinfolen) : resultlen,
-		       GFP_KERNEL);
-	if (!kbuf) {
+	sg_init_one(&outsg, outbuf, outlen);
+
+	req = kpp_request_alloc(tfm, GFP_KERNEL);
+	if (!req) {
 		ret = -ENOMEM;
-		goto error4;
+		goto out5;
 	}
 
+	kpp_request_set_input(req, NULL, 0);
+	kpp_request_set_output(req, &outsg, outlen);
+	init_completion(&compl.completion);
+	kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+				 CRYPTO_TFM_REQ_MAY_SLEEP,
+				 dh_crypto_done, &compl);
+
 	/*
-	 * Concatenate SP800-56A otherinfo past DH shared secret -- the
-	 * input to the KDF is (DH shared secret || otherinfo)
+	 * For DH, generate_public_key and generate_shared_secret are
+	 * the same calculation
 	 */
-	if (kdfcopy && kdfcopy->otherinfo &&
-	    copy_from_user(kbuf + resultlen, kdfcopy->otherinfo,
-			   kdfcopy->otherinfolen) != 0) {
-		ret = -EFAULT;
-		goto error5;
+	ret = crypto_kpp_generate_public_key(req);
+	if (ret == -EINPROGRESS) {
+		wait_for_completion(&compl.completion);
+		ret = compl.err;
+		if (ret)
+			goto out6;
 	}
 
-	ret = do_dh(result, base, private, prime);
-	if (ret)
-		goto error5;
-
-	ret = mpi_read_buffer(result, kbuf, resultlen, &nbytes, NULL);
-	if (ret != 0)
-		goto error5;
-
 	if (kdfcopy) {
-		ret = keyctl_dh_compute_kdf(sdesc, buffer, buflen, kbuf,
-					    resultlen + kdfcopy->otherinfolen);
-	} else {
-		ret = nbytes;
-		if (copy_to_user(buffer, kbuf, nbytes) != 0)
+		/*
+		 * Concatenate SP800-56A otherinfo past DH shared secret -- the
+		 * input to the KDF is (DH shared secret || otherinfo)
+		 */
+		if (copy_from_user(outbuf + req->dst_len, kdfcopy->otherinfo,
+				   kdfcopy->otherinfolen) != 0) {
 			ret = -EFAULT;
+			goto out6;
+		}
+
+		ret = keyctl_dh_compute_kdf(sdesc, buffer, buflen, outbuf,
+					    req->dst_len + kdfcopy->otherinfolen,
+					    outlen - req->dst_len);
+	} else if (copy_to_user(buffer, outbuf, req->dst_len) == 0) {
+		ret = req->dst_len;
+	} else {
+		ret = -EFAULT;
 	}
 
-error5:
-	kzfree(kbuf);
-error4:
-	mpi_free(result);
-error3:
-	mpi_free(private);
-error2:
-	mpi_free(base);
-error1:
-	mpi_free(prime);
-out:
+out6:
+	kpp_request_free(req);
+out5:
+	kzfree(outbuf);
+out4:
+	crypto_free_kpp(tfm);
+out3:
+	kzfree(secret);
+out2:
+	dh_free_data(&dh_inputs);
+out1:
 	kdf_dealloc(sdesc);
 	return ret;
 }
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 0010955d7876..bb6324d1ccec 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -30,6 +30,7 @@
 #include <linux/scatterlist.h>
 #include <linux/ctype.h>
 #include <crypto/aes.h>
+#include <crypto/algapi.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
 #include <crypto/skcipher.h>
@@ -54,13 +55,7 @@ static int blksize;
 #define MAX_DATA_SIZE 4096
 #define MIN_DATA_SIZE  20
 
-struct sdesc {
-	struct shash_desc shash;
-	char ctx[];
-};
-
-static struct crypto_shash *hashalg;
-static struct crypto_shash *hmacalg;
+static struct crypto_shash *hash_tfm;
 
 enum {
 	Opt_err = -1, Opt_new, Opt_load, Opt_update
@@ -141,23 +136,22 @@ static int valid_ecryptfs_desc(const char *ecryptfs_desc)
  */
 static int valid_master_desc(const char *new_desc, const char *orig_desc)
 {
-	if (!memcmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) {
-		if (strlen(new_desc) == KEY_TRUSTED_PREFIX_LEN)
-			goto out;
-		if (orig_desc)
-			if (memcmp(new_desc, orig_desc, KEY_TRUSTED_PREFIX_LEN))
-				goto out;
-	} else if (!memcmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) {
-		if (strlen(new_desc) == KEY_USER_PREFIX_LEN)
-			goto out;
-		if (orig_desc)
-			if (memcmp(new_desc, orig_desc, KEY_USER_PREFIX_LEN))
-				goto out;
-	} else
-		goto out;
+	int prefix_len;
+
+	if (!strncmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN))
+		prefix_len = KEY_TRUSTED_PREFIX_LEN;
+	else if (!strncmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN))
+		prefix_len = KEY_USER_PREFIX_LEN;
+	else
+		return -EINVAL;
+
+	if (!new_desc[prefix_len])
+		return -EINVAL;
+
+	if (orig_desc && strncmp(new_desc, orig_desc, prefix_len))
+		return -EINVAL;
+
 	return 0;
-out:
-	return -EINVAL;
 }
 
 /*
@@ -321,53 +315,38 @@ error:
 	return ukey;
 }
 
-static struct sdesc *alloc_sdesc(struct crypto_shash *alg)
-{
-	struct sdesc *sdesc;
-	int size;
-
-	size = sizeof(struct shash_desc) + crypto_shash_descsize(alg);
-	sdesc = kmalloc(size, GFP_KERNEL);
-	if (!sdesc)
-		return ERR_PTR(-ENOMEM);
-	sdesc->shash.tfm = alg;
-	sdesc->shash.flags = 0x0;
-	return sdesc;
-}
-
-static int calc_hmac(u8 *digest, const u8 *key, unsigned int keylen,
+static int calc_hash(struct crypto_shash *tfm, u8 *digest,
 		     const u8 *buf, unsigned int buflen)
 {
-	struct sdesc *sdesc;
-	int ret;
+	SHASH_DESC_ON_STACK(desc, tfm);
+	int err;
 
-	sdesc = alloc_sdesc(hmacalg);
-	if (IS_ERR(sdesc)) {
-		pr_info("encrypted_key: can't alloc %s\n", hmac_alg);
-		return PTR_ERR(sdesc);
-	}
+	desc->tfm = tfm;
+	desc->flags = 0;
 
-	ret = crypto_shash_setkey(hmacalg, key, keylen);
-	if (!ret)
-		ret = crypto_shash_digest(&sdesc->shash, buf, buflen, digest);
-	kfree(sdesc);
-	return ret;
+	err = crypto_shash_digest(desc, buf, buflen, digest);
+	shash_desc_zero(desc);
+	return err;
 }
 
-static int calc_hash(u8 *digest, const u8 *buf, unsigned int buflen)
+static int calc_hmac(u8 *digest, const u8 *key, unsigned int keylen,
+		     const u8 *buf, unsigned int buflen)
 {
-	struct sdesc *sdesc;
-	int ret;
+	struct crypto_shash *tfm;
+	int err;
 
-	sdesc = alloc_sdesc(hashalg);
-	if (IS_ERR(sdesc)) {
-		pr_info("encrypted_key: can't alloc %s\n", hash_alg);
-		return PTR_ERR(sdesc);
+	tfm = crypto_alloc_shash(hmac_alg, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm)) {
+		pr_err("encrypted_key: can't alloc %s transform: %ld\n",
+		       hmac_alg, PTR_ERR(tfm));
+		return PTR_ERR(tfm);
 	}
 
-	ret = crypto_shash_digest(&sdesc->shash, buf, buflen, digest);
-	kfree(sdesc);
-	return ret;
+	err = crypto_shash_setkey(tfm, key, keylen);
+	if (!err)
+		err = calc_hash(tfm, digest, buf, buflen);
+	crypto_free_shash(tfm);
+	return err;
 }
 
 enum derived_key_type { ENC_KEY, AUTH_KEY };
@@ -385,10 +364,9 @@ static int get_derived_key(u8 *derived_key, enum derived_key_type key_type,
 		derived_buf_len = HASH_SIZE;
 
 	derived_buf = kzalloc(derived_buf_len, GFP_KERNEL);
-	if (!derived_buf) {
-		pr_err("encrypted_key: out of memory\n");
+	if (!derived_buf)
 		return -ENOMEM;
-	}
+
 	if (key_type)
 		strcpy(derived_buf, "AUTH_KEY");
 	else
@@ -396,8 +374,8 @@ static int get_derived_key(u8 *derived_key, enum derived_key_type key_type,
 
 	memcpy(derived_buf + strlen(derived_buf) + 1, master_key,
 	       master_keylen);
-	ret = calc_hash(derived_key, derived_buf, derived_buf_len);
-	kfree(derived_buf);
+	ret = calc_hash(hash_tfm, derived_key, derived_buf, derived_buf_len);
+	kzfree(derived_buf);
 	return ret;
 }
 
@@ -480,12 +458,9 @@ static int derived_key_encrypt(struct encrypted_key_payload *epayload,
 	struct skcipher_request *req;
 	unsigned int encrypted_datalen;
 	u8 iv[AES_BLOCK_SIZE];
-	unsigned int padlen;
-	char pad[16];
 	int ret;
 
 	encrypted_datalen = roundup(epayload->decrypted_datalen, blksize);
-	padlen = encrypted_datalen - epayload->decrypted_datalen;
 
 	req = init_skcipher_req(derived_key, derived_keylen);
 	ret = PTR_ERR(req);
@@ -493,11 +468,10 @@ static int derived_key_encrypt(struct encrypted_key_payload *epayload,
 		goto out;
 	dump_decrypted_data(epayload);
 
-	memset(pad, 0, sizeof pad);
 	sg_init_table(sg_in, 2);
 	sg_set_buf(&sg_in[0], epayload->decrypted_data,
 		   epayload->decrypted_datalen);
-	sg_set_buf(&sg_in[1], pad, padlen);
+	sg_set_page(&sg_in[1], ZERO_PAGE(0), AES_BLOCK_SIZE, 0);
 
 	sg_init_table(sg_out, 1);
 	sg_set_buf(sg_out, epayload->encrypted_data, encrypted_datalen);
@@ -533,6 +507,7 @@ static int datablob_hmac_append(struct encrypted_key_payload *epayload,
 	if (!ret)
 		dump_hmac(NULL, digest, HASH_SIZE);
 out:
+	memzero_explicit(derived_key, sizeof(derived_key));
 	return ret;
 }
 
@@ -561,8 +536,8 @@ static int datablob_hmac_verify(struct encrypted_key_payload *epayload,
 	ret = calc_hmac(digest, derived_key, sizeof derived_key, p, len);
 	if (ret < 0)
 		goto out;
-	ret = memcmp(digest, epayload->format + epayload->datablob_len,
-		     sizeof digest);
+	ret = crypto_memneq(digest, epayload->format + epayload->datablob_len,
+			    sizeof(digest));
 	if (ret) {
 		ret = -EINVAL;
 		dump_hmac("datablob",
@@ -571,6 +546,7 @@ static int datablob_hmac_verify(struct encrypted_key_payload *epayload,
 		dump_hmac("calc", digest, HASH_SIZE);
 	}
 out:
+	memzero_explicit(derived_key, sizeof(derived_key));
 	return ret;
 }
 
@@ -584,9 +560,14 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload,
 	struct skcipher_request *req;
 	unsigned int encrypted_datalen;
 	u8 iv[AES_BLOCK_SIZE];
-	char pad[16];
+	u8 *pad;
 	int ret;
 
+	/* Throwaway buffer to hold the unused zero padding at the end */
+	pad = kmalloc(AES_BLOCK_SIZE, GFP_KERNEL);
+	if (!pad)
+		return -ENOMEM;
+
 	encrypted_datalen = roundup(epayload->decrypted_datalen, blksize);
 	req = init_skcipher_req(derived_key, derived_keylen);
 	ret = PTR_ERR(req);
@@ -594,13 +575,12 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload,
 		goto out;
 	dump_encrypted_data(epayload, encrypted_datalen);
 
-	memset(pad, 0, sizeof pad);
 	sg_init_table(sg_in, 1);
 	sg_init_table(sg_out, 2);
 	sg_set_buf(sg_in, epayload->encrypted_data, encrypted_datalen);
 	sg_set_buf(&sg_out[0], epayload->decrypted_data,
 		   epayload->decrypted_datalen);
-	sg_set_buf(&sg_out[1], pad, sizeof pad);
+	sg_set_buf(&sg_out[1], pad, AES_BLOCK_SIZE);
 
 	memcpy(iv, epayload->iv, sizeof(iv));
 	skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv);
@@ -612,6 +592,7 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload,
 		goto out;
 	dump_decrypted_data(epayload);
 out:
+	kfree(pad);
 	return ret;
 }
 
@@ -722,6 +703,7 @@ static int encrypted_key_decrypt(struct encrypted_key_payload *epayload,
 out:
 	up_read(&mkey->sem);
 	key_put(mkey);
+	memzero_explicit(derived_key, sizeof(derived_key));
 	return ret;
 }
 
@@ -828,13 +810,13 @@ static int encrypted_instantiate(struct key *key,
 	ret = encrypted_init(epayload, key->description, format, master_desc,
 			     decrypted_datalen, hex_encoded_iv);
 	if (ret < 0) {
-		kfree(epayload);
+		kzfree(epayload);
 		goto out;
 	}
 
 	rcu_assign_keypointer(key, epayload);
 out:
-	kfree(datablob);
+	kzfree(datablob);
 	return ret;
 }
 
@@ -843,8 +825,7 @@ static void encrypted_rcu_free(struct rcu_head *rcu)
 	struct encrypted_key_payload *epayload;
 
 	epayload = container_of(rcu, struct encrypted_key_payload, rcu);
-	memset(epayload->decrypted_data, 0, epayload->decrypted_datalen);
-	kfree(epayload);
+	kzfree(epayload);
 }
 
 /*
@@ -902,7 +883,7 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep)
 	rcu_assign_keypointer(key, new_epayload);
 	call_rcu(&epayload->rcu, encrypted_rcu_free);
 out:
-	kfree(buf);
+	kzfree(buf);
 	return ret;
 }
 
@@ -960,33 +941,26 @@ static long encrypted_read(const struct key *key, char __user *buffer,
 
 	up_read(&mkey->sem);
 	key_put(mkey);
+	memzero_explicit(derived_key, sizeof(derived_key));
 
 	if (copy_to_user(buffer, ascii_buf, asciiblob_len) != 0)
 		ret = -EFAULT;
-	kfree(ascii_buf);
+	kzfree(ascii_buf);
 
 	return asciiblob_len;
 out:
 	up_read(&mkey->sem);
 	key_put(mkey);
+	memzero_explicit(derived_key, sizeof(derived_key));
 	return ret;
 }
 
 /*
- * encrypted_destroy - before freeing the key, clear the decrypted data
- *
- * Before freeing the key, clear the memory containing the decrypted
- * key data.
+ * encrypted_destroy - clear and free the key's payload
  */
 static void encrypted_destroy(struct key *key)
 {
-	struct encrypted_key_payload *epayload = key->payload.data[0];
-
-	if (!epayload)
-		return;
-
-	memzero_explicit(epayload->decrypted_data, epayload->decrypted_datalen);
-	kfree(key->payload.data[0]);
+	kzfree(key->payload.data[0]);
 }
 
 struct key_type key_type_encrypted = {
@@ -999,47 +973,17 @@ struct key_type key_type_encrypted = {
 };
 EXPORT_SYMBOL_GPL(key_type_encrypted);
 
-static void encrypted_shash_release(void)
-{
-	if (hashalg)
-		crypto_free_shash(hashalg);
-	if (hmacalg)
-		crypto_free_shash(hmacalg);
-}
-
-static int __init encrypted_shash_alloc(void)
+static int __init init_encrypted(void)
 {
 	int ret;
 
-	hmacalg = crypto_alloc_shash(hmac_alg, 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(hmacalg)) {
-		pr_info("encrypted_key: could not allocate crypto %s\n",
-			hmac_alg);
-		return PTR_ERR(hmacalg);
-	}
-
-	hashalg = crypto_alloc_shash(hash_alg, 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(hashalg)) {
-		pr_info("encrypted_key: could not allocate crypto %s\n",
-			hash_alg);
-		ret = PTR_ERR(hashalg);
-		goto hashalg_fail;
+	hash_tfm = crypto_alloc_shash(hash_alg, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hash_tfm)) {
+		pr_err("encrypted_key: can't allocate %s transform: %ld\n",
+		       hash_alg, PTR_ERR(hash_tfm));
+		return PTR_ERR(hash_tfm);
 	}
 
-	return 0;
-
-hashalg_fail:
-	crypto_free_shash(hmacalg);
-	return ret;
-}
-
-static int __init init_encrypted(void)
-{
-	int ret;
-
-	ret = encrypted_shash_alloc();
-	if (ret < 0)
-		return ret;
 	ret = aes_get_sizes();
 	if (ret < 0)
 		goto out;
@@ -1048,14 +992,14 @@ static int __init init_encrypted(void)
 		goto out;
 	return 0;
 out:
-	encrypted_shash_release();
+	crypto_free_shash(hash_tfm);
 	return ret;
 
 }
 
 static void __exit cleanup_encrypted(void)
 {
-	encrypted_shash_release();
+	crypto_free_shash(hash_tfm);
 	unregister_key_type(&key_type_encrypted);
 }
 
diff --git a/security/keys/gc.c b/security/keys/gc.c
index 595becc6d0d2..87cb260e4890 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -158,9 +158,7 @@ static noinline void key_gc_unused_keys(struct list_head *keys)
 
 		kfree(key->description);
 
-#ifdef KEY_DEBUGGING
-		key->magic = KEY_DEBUG_MAGIC_X;
-#endif
+		memzero_explicit(key, sizeof(*key));
 		kmem_cache_free(key_jar, key);
 	}
 }
diff --git a/security/keys/internal.h b/security/keys/internal.h
index c0f8682eba69..91bc6214ae57 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -13,6 +13,7 @@
 #define _INTERNAL_H
 
 #include <linux/sched.h>
+#include <linux/wait_bit.h>
 #include <linux/cred.h>
 #include <linux/key-type.h>
 #include <linux/task_work.h>
diff --git a/security/keys/key.c b/security/keys/key.c
index 455c04d80bbb..83da68d98b40 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -660,14 +660,11 @@ not_found:
 	goto error;
 
 found:
-	/* pretend it doesn't exist if it is awaiting deletion */
-	if (refcount_read(&key->usage) == 0)
-		goto not_found;
-
-	/* this races with key_put(), but that doesn't matter since key_put()
-	 * doesn't actually change the key
+	/* A key is allowed to be looked up only if someone still owns a
+	 * reference to it - otherwise it's awaiting the gc.
 	 */
-	__key_get(key);
+	if (!refcount_inc_not_zero(&key->usage))
+		goto not_found;
 
 error:
 	spin_unlock(&key_serial_lock);
@@ -966,12 +963,11 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen)
 	/* the key must be writable */
 	ret = key_permission(key_ref, KEY_NEED_WRITE);
 	if (ret < 0)
-		goto error;
+		return ret;
 
 	/* attempt to update it if supported */
-	ret = -EOPNOTSUPP;
 	if (!key->type->update)
-		goto error;
+		return -EOPNOTSUPP;
 
 	memset(&prep, 0, sizeof(prep));
 	prep.data = payload;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 447a7d5cee0f..ab0b337c84b4 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -99,7 +99,7 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type,
 	/* pull the payload in if one was supplied */
 	payload = NULL;
 
-	if (_payload) {
+	if (plen) {
 		ret = -ENOMEM;
 		payload = kvmalloc(plen, GFP_KERNEL);
 		if (!payload)
@@ -132,7 +132,10 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type,
 
 	key_ref_put(keyring_ref);
  error3:
-	kvfree(payload);
+	if (payload) {
+		memzero_explicit(payload, plen);
+		kvfree(payload);
+	}
  error2:
 	kfree(description);
  error:
@@ -324,7 +327,7 @@ long keyctl_update_key(key_serial_t id,
 
 	/* pull the payload in if one was supplied */
 	payload = NULL;
-	if (_payload) {
+	if (plen) {
 		ret = -ENOMEM;
 		payload = kmalloc(plen, GFP_KERNEL);
 		if (!payload)
@@ -347,7 +350,7 @@ long keyctl_update_key(key_serial_t id,
 
 	key_ref_put(key_ref);
 error2:
-	kfree(payload);
+	kzfree(payload);
 error:
 	return ret;
 }
@@ -1093,7 +1096,10 @@ long keyctl_instantiate_key_common(key_serial_t id,
 		keyctl_change_reqkey_auth(NULL);
 
 error2:
-	kvfree(payload);
+	if (payload) {
+		memzero_explicit(payload, plen);
+		kvfree(payload);
+	}
 error:
 	return ret;
 }
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 4d1678e4586f..de81793f9920 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -706,7 +706,7 @@ descend_to_keyring:
 	 * Non-keyrings avoid the leftmost branch of the root entirely (root
 	 * slots 1-15).
 	 */
-	ptr = ACCESS_ONCE(keyring->keys.root);
+	ptr = READ_ONCE(keyring->keys.root);
 	if (!ptr)
 		goto not_this_keyring;
 
@@ -720,7 +720,7 @@ descend_to_keyring:
 		if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0)
 			goto not_this_keyring;
 
-		ptr = ACCESS_ONCE(shortcut->next_node);
+		ptr = READ_ONCE(shortcut->next_node);
 		node = assoc_array_ptr_to_node(ptr);
 		goto begin_node;
 	}
@@ -740,7 +740,7 @@ descend_to_node:
 	if (assoc_array_ptr_is_shortcut(ptr)) {
 		shortcut = assoc_array_ptr_to_shortcut(ptr);
 		smp_read_barrier_depends();
-		ptr = ACCESS_ONCE(shortcut->next_node);
+		ptr = READ_ONCE(shortcut->next_node);
 		BUG_ON(!assoc_array_ptr_is_node(ptr));
 	}
 	node = assoc_array_ptr_to_node(ptr);
@@ -752,7 +752,7 @@ begin_node:
 ascend_to_node:
 	/* Go through the slots in a node */
 	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
-		ptr = ACCESS_ONCE(node->slots[slot]);
+		ptr = READ_ONCE(node->slots[slot]);
 
 		if (assoc_array_ptr_is_meta(ptr) && node->back_pointer)
 			goto descend_to_node;
@@ -790,13 +790,13 @@ ascend_to_node:
 	/* We've dealt with all the slots in the current node, so now we need
 	 * to ascend to the parent and continue processing there.
 	 */
-	ptr = ACCESS_ONCE(node->back_pointer);
+	ptr = READ_ONCE(node->back_pointer);
 	slot = node->parent_slot;
 
 	if (ptr && assoc_array_ptr_is_shortcut(ptr)) {
 		shortcut = assoc_array_ptr_to_shortcut(ptr);
 		smp_read_barrier_depends();
-		ptr = ACCESS_ONCE(shortcut->back_pointer);
+		ptr = READ_ONCE(shortcut->back_pointer);
 		slot = shortcut->parent_slot;
 	}
 	if (!ptr)
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 2217dfec7996..86bced9fdbdf 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -809,15 +809,14 @@ long join_session_keyring(const char *name)
 		ret = PTR_ERR(keyring);
 		goto error2;
 	} else if (keyring == new->session_keyring) {
-		key_put(keyring);
 		ret = 0;
-		goto error2;
+		goto error3;
 	}
 
 	/* we've got a keyring - now to install it */
 	ret = install_session_keyring_to_cred(new, keyring);
 	if (ret < 0)
-		goto error2;
+		goto error3;
 
 	commit_creds(new);
 	mutex_unlock(&key_session_mutex);
@@ -827,6 +826,8 @@ long join_session_keyring(const char *name)
 okay:
 	return ret;
 
+error3:
+	key_put(keyring);
 error2:
 	mutex_unlock(&key_session_mutex);
 error:
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 2ae31c5a87de..435e86e13879 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -70,7 +70,7 @@ static int TSS_sha1(const unsigned char *data, unsigned int datalen,
 	}
 
 	ret = crypto_shash_digest(&sdesc->shash, data, datalen, digest);
-	kfree(sdesc);
+	kzfree(sdesc);
 	return ret;
 }
 
@@ -114,7 +114,7 @@ static int TSS_rawhmac(unsigned char *digest, const unsigned char *key,
 	if (!ret)
 		ret = crypto_shash_final(&sdesc->shash, digest);
 out:
-	kfree(sdesc);
+	kzfree(sdesc);
 	return ret;
 }
 
@@ -165,7 +165,7 @@ static int TSS_authhmac(unsigned char *digest, const unsigned char *key,
 				  paramdigest, TPM_NONCE_SIZE, h1,
 				  TPM_NONCE_SIZE, h2, 1, &c, 0, 0);
 out:
-	kfree(sdesc);
+	kzfree(sdesc);
 	return ret;
 }
 
@@ -246,7 +246,7 @@ static int TSS_checkhmac1(unsigned char *buffer,
 	if (memcmp(testhmac, authdata, SHA1_DIGEST_SIZE))
 		ret = -EINVAL;
 out:
-	kfree(sdesc);
+	kzfree(sdesc);
 	return ret;
 }
 
@@ -347,7 +347,7 @@ static int TSS_checkhmac2(unsigned char *buffer,
 	if (memcmp(testhmac2, authdata2, SHA1_DIGEST_SIZE))
 		ret = -EINVAL;
 out:
-	kfree(sdesc);
+	kzfree(sdesc);
 	return ret;
 }
 
@@ -564,7 +564,7 @@ static int tpm_seal(struct tpm_buf *tb, uint16_t keytype,
 		*bloblen = storedsize;
 	}
 out:
-	kfree(td);
+	kzfree(td);
 	return ret;
 }
 
@@ -678,7 +678,7 @@ static int key_seal(struct trusted_key_payload *p,
 	if (ret < 0)
 		pr_info("trusted_key: srkseal failed (%d)\n", ret);
 
-	kfree(tb);
+	kzfree(tb);
 	return ret;
 }
 
@@ -703,7 +703,7 @@ static int key_unseal(struct trusted_key_payload *p,
 		/* pull migratable flag out of sealed key */
 		p->migratable = p->key[--p->key_len];
 
-	kfree(tb);
+	kzfree(tb);
 	return ret;
 }
 
@@ -1037,12 +1037,12 @@ static int trusted_instantiate(struct key *key,
 	if (!ret && options->pcrlock)
 		ret = pcrlock(options->pcrlock);
 out:
-	kfree(datablob);
-	kfree(options);
+	kzfree(datablob);
+	kzfree(options);
 	if (!ret)
 		rcu_assign_keypointer(key, payload);
 	else
-		kfree(payload);
+		kzfree(payload);
 	return ret;
 }
 
@@ -1051,8 +1051,7 @@ static void trusted_rcu_free(struct rcu_head *rcu)
 	struct trusted_key_payload *p;
 
 	p = container_of(rcu, struct trusted_key_payload, rcu);
-	memset(p->key, 0, p->key_len);
-	kfree(p);
+	kzfree(p);
 }
 
 /*
@@ -1094,13 +1093,13 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep)
 	ret = datablob_parse(datablob, new_p, new_o);
 	if (ret != Opt_update) {
 		ret = -EINVAL;
-		kfree(new_p);
+		kzfree(new_p);
 		goto out;
 	}
 
 	if (!new_o->keyhandle) {
 		ret = -EINVAL;
-		kfree(new_p);
+		kzfree(new_p);
 		goto out;
 	}
 
@@ -1114,22 +1113,22 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep)
 	ret = key_seal(new_p, new_o);
 	if (ret < 0) {
 		pr_info("trusted_key: key_seal failed (%d)\n", ret);
-		kfree(new_p);
+		kzfree(new_p);
 		goto out;
 	}
 	if (new_o->pcrlock) {
 		ret = pcrlock(new_o->pcrlock);
 		if (ret < 0) {
 			pr_info("trusted_key: pcrlock failed (%d)\n", ret);
-			kfree(new_p);
+			kzfree(new_p);
 			goto out;
 		}
 	}
 	rcu_assign_keypointer(key, new_p);
 	call_rcu(&p->rcu, trusted_rcu_free);
 out:
-	kfree(datablob);
-	kfree(new_o);
+	kzfree(datablob);
+	kzfree(new_o);
 	return ret;
 }
 
@@ -1158,24 +1157,19 @@ static long trusted_read(const struct key *key, char __user *buffer,
 	for (i = 0; i < p->blob_len; i++)
 		bufp = hex_byte_pack(bufp, p->blob[i]);
 	if ((copy_to_user(buffer, ascii_buf, 2 * p->blob_len)) != 0) {
-		kfree(ascii_buf);
+		kzfree(ascii_buf);
 		return -EFAULT;
 	}
-	kfree(ascii_buf);
+	kzfree(ascii_buf);
 	return 2 * p->blob_len;
 }
 
 /*
- * trusted_destroy - before freeing the key, clear the decrypted data
+ * trusted_destroy - clear and free the key's payload
  */
 static void trusted_destroy(struct key *key)
 {
-	struct trusted_key_payload *p = key->payload.data[0];
-
-	if (!p)
-		return;
-	memset(p->key, 0, p->key_len);
-	kfree(key->payload.data[0]);
+	kzfree(key->payload.data[0]);
 }
 
 struct key_type key_type_trusted = {
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index 26605134f17a..3d8c68eba516 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -86,10 +86,18 @@ EXPORT_SYMBOL_GPL(user_preparse);
  */
 void user_free_preparse(struct key_preparsed_payload *prep)
 {
-	kfree(prep->payload.data[0]);
+	kzfree(prep->payload.data[0]);
 }
 EXPORT_SYMBOL_GPL(user_free_preparse);
 
+static void user_free_payload_rcu(struct rcu_head *head)
+{
+	struct user_key_payload *payload;
+
+	payload = container_of(head, struct user_key_payload, rcu);
+	kzfree(payload);
+}
+
 /*
  * update a user defined key
  * - the key's semaphore is write-locked
@@ -112,7 +120,7 @@ int user_update(struct key *key, struct key_preparsed_payload *prep)
 	prep->payload.data[0] = NULL;
 
 	if (zap)
-		kfree_rcu(zap, rcu);
+		call_rcu(&zap->rcu, user_free_payload_rcu);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(user_update);
@@ -130,7 +138,7 @@ void user_revoke(struct key *key)
 
 	if (upayload) {
 		rcu_assign_keypointer(key, NULL);
-		kfree_rcu(upayload, rcu);
+		call_rcu(&upayload->rcu, user_free_payload_rcu);
 	}
 }
 
@@ -143,7 +151,7 @@ void user_destroy(struct key *key)
 {
 	struct user_key_payload *upayload = key->payload.data[0];
 
-	kfree(upayload);
+	kzfree(upayload);
 }
 
 EXPORT_SYMBOL_GPL(user_destroy);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index e67a526d1f30..819fd6858b49 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1106,10 +1106,8 @@ static int selinux_parse_opts_str(char *options,
 
 	opts->mnt_opts_flags = kcalloc(NUM_SEL_MNT_OPTS, sizeof(int),
 				       GFP_KERNEL);
-	if (!opts->mnt_opts_flags) {
-		kfree(opts->mnt_opts);
+	if (!opts->mnt_opts_flags)
 		goto out_err;
-	}
 
 	if (fscontext) {
 		opts->mnt_opts[num_mnt_opts] = fscontext;
@@ -1132,6 +1130,7 @@ static int selinux_parse_opts_str(char *options,
 	return 0;
 
 out_err:
+	security_free_mnt_opts(opts);
 	kfree(context);
 	kfree(defcontext);
 	kfree(fscontext);
diff --git a/sound/Kconfig b/sound/Kconfig
index ee2e69a9ecd1..6a215a8c0490 100644
--- a/sound/Kconfig
+++ b/sound/Kconfig
@@ -115,6 +115,7 @@ endif # SND
 menuconfig SOUND_PRIME
 	tristate "Open Sound System (DEPRECATED)"
 	select SOUND_OSS_CORE
+	depends on BROKEN
 	help
 	  Say 'Y' or 'M' to enable Open Sound System drivers.
 
diff --git a/sound/core/control.c b/sound/core/control.c
index c109b82eef4b..6362da17ac3f 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -1577,7 +1577,7 @@ static ssize_t snd_ctl_read(struct file *file, char __user *buffer,
 		struct snd_ctl_event ev;
 		struct snd_kctl_event *kev;
 		while (list_empty(&ctl->events)) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 			if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) {
 				err = -EAGAIN;
 				goto __end_lock;
diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c
index 9602a7e38d8a..a73baa1242be 100644
--- a/sound/core/hwdep.c
+++ b/sound/core/hwdep.c
@@ -85,7 +85,7 @@ static int snd_hwdep_open(struct inode *inode, struct file * file)
 	int major = imajor(inode);
 	struct snd_hwdep *hw;
 	int err;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (major == snd_major) {
 		hw = snd_lookup_minor_data(iminor(inode),
diff --git a/sound/core/init.c b/sound/core/init.c
index 6bda8436d765..d61d2b3cd521 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -989,7 +989,7 @@ EXPORT_SYMBOL(snd_card_file_remove);
  */
 int snd_power_wait(struct snd_card *card, unsigned int power_state)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int result = 0;
 
 	/* fastpath */
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 36baf962f9b0..cd8b7bef8d06 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -1554,7 +1554,7 @@ static int snd_pcm_oss_sync1(struct snd_pcm_substream *substream, size_t size)
 	ssize_t result = 0;
 	snd_pcm_state_t state;
 	long res;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	runtime = substream->runtime;
 	init_waitqueue_entry(&wait, current);
@@ -2387,7 +2387,7 @@ static int snd_pcm_oss_open(struct inode *inode, struct file *file)
 	struct snd_pcm_oss_file *pcm_oss_file;
 	struct snd_pcm_oss_setup setup[2];
 	int nonblock;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	err = nonseekable_open(inode, file);
 	if (err < 0)
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 5088d4b8db22..877176067072 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -1904,7 +1904,7 @@ static int wait_for_avail(struct snd_pcm_substream *substream,
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	int is_playback = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int err = 0;
 	snd_pcm_uframes_t avail = 0;
 	long wait_time, tout;
@@ -2492,7 +2492,7 @@ static int pcm_chmap_ctl_get(struct snd_kcontrol *kcontrol,
 	struct snd_pcm_substream *substream;
 	const struct snd_pcm_chmap_elem *map;
 
-	if (snd_BUG_ON(!info->chmap))
+	if (!info->chmap)
 		return -EINVAL;
 	substream = snd_pcm_chmap_substream(info, idx);
 	if (!substream)
@@ -2524,7 +2524,7 @@ static int pcm_chmap_ctl_tlv(struct snd_kcontrol *kcontrol, int op_flag,
 	unsigned int __user *dst;
 	int c, count = 0;
 
-	if (snd_BUG_ON(!info->chmap))
+	if (!info->chmap)
 		return -EINVAL;
 	if (size < 8)
 		return -ENOMEM;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 13dec5ec93f2..faa2e2be6f2e 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -1652,7 +1652,7 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream,
 	struct snd_card *card;
 	struct snd_pcm_runtime *runtime;
 	struct snd_pcm_substream *s;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int result = 0;
 	int nonblock = 0;
 
@@ -2353,7 +2353,7 @@ static int snd_pcm_capture_open(struct inode *inode, struct file *file)
 static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream)
 {
 	int err;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (pcm == NULL) {
 		err = -ENODEV;
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index ab890336175f..32588ad05653 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -368,7 +368,7 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file)
 	int err;
 	struct snd_rawmidi *rmidi;
 	struct snd_rawmidi_file *rawmidi_file = NULL;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if ((file->f_flags & O_APPEND) && !(file->f_flags & O_NONBLOCK)) 
 		return -EINVAL;		/* invalid combination */
@@ -1002,7 +1002,7 @@ static ssize_t snd_rawmidi_read(struct file *file, char __user *buf, size_t coun
 	while (count > 0) {
 		spin_lock_irq(&runtime->lock);
 		while (!snd_rawmidi_ready(substream)) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 			if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) {
 				spin_unlock_irq(&runtime->lock);
 				return result > 0 ? result : -EAGAIN;
@@ -1306,7 +1306,7 @@ static ssize_t snd_rawmidi_write(struct file *file, const char __user *buf,
 	while (count > 0) {
 		spin_lock_irq(&runtime->lock);
 		while (!snd_rawmidi_ready_append(substream, count)) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 			if (file->f_flags & O_NONBLOCK) {
 				spin_unlock_irq(&runtime->lock);
 				return result > 0 ? result : -EAGAIN;
@@ -1338,7 +1338,7 @@ static ssize_t snd_rawmidi_write(struct file *file, const char __user *buf,
 	if (file->f_flags & O_DSYNC) {
 		spin_lock_irq(&runtime->lock);
 		while (runtime->avail != runtime->buffer_size) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 			unsigned int last_avail = runtime->avail;
 			init_waitqueue_entry(&wait, current);
 			add_wait_queue(&runtime->sleep, &wait);
diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index 01c4cfe30c9f..a8c2822e0198 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c
@@ -179,7 +179,7 @@ int snd_seq_fifo_cell_out(struct snd_seq_fifo *f,
 {
 	struct snd_seq_event_cell *cell;
 	unsigned long flags;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (snd_BUG_ON(!f))
 		return -EINVAL;
diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c
index d4c61ec9be13..d6e9aacdc36b 100644
--- a/sound/core/seq/seq_memory.c
+++ b/sound/core/seq/seq_memory.c
@@ -227,7 +227,7 @@ static int snd_seq_cell_alloc(struct snd_seq_pool *pool,
 	struct snd_seq_event_cell *cell;
 	unsigned long flags;
 	int err = -EAGAIN;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (pool == NULL)
 		return -EINVAL;
diff --git a/sound/core/timer.c b/sound/core/timer.c
index 2f836ca09860..884c3066b028 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -1618,6 +1618,7 @@ static int snd_timer_user_tselect(struct file *file,
 	if (err < 0)
 		goto __err;
 
+	tu->qhead = tu->qtail = tu->qused = 0;
 	kfree(tu->queue);
 	tu->queue = NULL;
 	kfree(tu->tqueue);
@@ -1959,10 +1960,11 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 
 	tu = file->private_data;
 	unit = tu->tread ? sizeof(struct snd_timer_tread) : sizeof(struct snd_timer_read);
+	mutex_lock(&tu->ioctl_lock);
 	spin_lock_irq(&tu->qlock);
 	while ((long)count - result >= unit) {
 		while (!tu->qused) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 
 			if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) {
 				err = -EAGAIN;
@@ -1974,7 +1976,9 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 			add_wait_queue(&tu->qchange_sleep, &wait);
 
 			spin_unlock_irq(&tu->qlock);
+			mutex_unlock(&tu->ioctl_lock);
 			schedule();
+			mutex_lock(&tu->ioctl_lock);
 			spin_lock_irq(&tu->qlock);
 
 			remove_wait_queue(&tu->qchange_sleep, &wait);
@@ -1994,7 +1998,6 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 		tu->qused--;
 		spin_unlock_irq(&tu->qlock);
 
-		mutex_lock(&tu->ioctl_lock);
 		if (tu->tread) {
 			if (copy_to_user(buffer, &tu->tqueue[qhead],
 					 sizeof(struct snd_timer_tread)))
@@ -2004,7 +2007,6 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 					 sizeof(struct snd_timer_read)))
 				err = -EFAULT;
 		}
-		mutex_unlock(&tu->ioctl_lock);
 
 		spin_lock_irq(&tu->qlock);
 		if (err < 0)
@@ -2014,6 +2016,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 	}
  _error:
 	spin_unlock_irq(&tu->qlock);
+	mutex_unlock(&tu->ioctl_lock);
 	return result > 0 ? result : err;
 }
 
diff --git a/sound/drivers/mpu401/mpu401.c b/sound/drivers/mpu401/mpu401.c
index fed7e7e2177b..9b86e00d7d95 100644
--- a/sound/drivers/mpu401/mpu401.c
+++ b/sound/drivers/mpu401/mpu401.c
@@ -53,9 +53,9 @@ MODULE_PARM_DESC(enable, "Enable MPU-401 device.");
 module_param_array(pnp, bool, NULL, 0444);
 MODULE_PARM_DESC(pnp, "PnP detection for MPU-401 device.");
 #endif
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for MPU-401 device.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for MPU-401 device.");
 module_param_array(uart_enter, bool, NULL, 0444);
 MODULE_PARM_DESC(uart_enter, "Issue UART_ENTER command at open.");
diff --git a/sound/drivers/mtpav.c b/sound/drivers/mtpav.c
index 00b31f92c504..0f6392001e30 100644
--- a/sound/drivers/mtpav.c
+++ b/sound/drivers/mtpav.c
@@ -86,9 +86,9 @@ module_param(index, int, 0444);
 MODULE_PARM_DESC(index, "Index value for MotuMTPAV MIDI.");
 module_param(id, charp, 0444);
 MODULE_PARM_DESC(id, "ID string for MotuMTPAV MIDI.");
-module_param(port, long, 0444);
+module_param_hw(port, long, ioport, 0444);
 MODULE_PARM_DESC(port, "Parallel port # for MotuMTPAV MIDI.");
-module_param(irq, int, 0444);
+module_param_hw(irq, int, irq, 0444);
 MODULE_PARM_DESC(irq, "Parallel IRQ # for MotuMTPAV MIDI.");
 module_param(hwports, int, 0444);
 MODULE_PARM_DESC(hwports, "Hardware ports # for MotuMTPAV MIDI.");
diff --git a/sound/drivers/serial-u16550.c b/sound/drivers/serial-u16550.c
index 60d51ac4ccfe..88e66ea0306d 100644
--- a/sound/drivers/serial-u16550.c
+++ b/sound/drivers/serial-u16550.c
@@ -84,9 +84,9 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for Serial MIDI.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable UART16550A chip.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for UART16550A chip.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for UART16550A chip.");
 module_param_array(speed, int, NULL, 0444);
 MODULE_PARM_DESC(speed, "Speed in bauds.");
diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index 9e6f54f8c45d..1e26854b3425 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -682,7 +682,9 @@ static void out_stream_callback(struct fw_iso_context *context, u32 tstamp,
 		cycle = increment_cycle_count(cycle, 1);
 		if (s->handle_packet(s, 0, cycle, i) < 0) {
 			s->packet_index = -1;
-			amdtp_stream_pcm_abort(s);
+			if (in_interrupt())
+				amdtp_stream_pcm_abort(s);
+			WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
 			return;
 		}
 	}
@@ -734,7 +736,9 @@ static void in_stream_callback(struct fw_iso_context *context, u32 tstamp,
 	/* Queueing error or detecting invalid payload. */
 	if (i < packets) {
 		s->packet_index = -1;
-		amdtp_stream_pcm_abort(s);
+		if (in_interrupt())
+			amdtp_stream_pcm_abort(s);
+		WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
 		return;
 	}
 
diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h
index 7e8831722821..ea1a91e99875 100644
--- a/sound/firewire/amdtp-stream.h
+++ b/sound/firewire/amdtp-stream.h
@@ -135,7 +135,7 @@ struct amdtp_stream {
 	/* For a PCM substream processing. */
 	struct snd_pcm_substream *pcm;
 	struct tasklet_struct period_tasklet;
-	unsigned int pcm_buffer_pointer;
+	snd_pcm_uframes_t pcm_buffer_pointer;
 	unsigned int pcm_period_pointer;
 
 	/* To wait for first packet. */
diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c
index ee08c389b4d6..978dc1801b3a 100644
--- a/sound/hda/hdac_controller.c
+++ b/sound/hda/hdac_controller.c
@@ -106,7 +106,11 @@ void snd_hdac_bus_stop_cmd_io(struct hdac_bus *bus)
 	/* disable ringbuffer DMAs */
 	snd_hdac_chip_writeb(bus, RIRBCTL, 0);
 	snd_hdac_chip_writeb(bus, CORBCTL, 0);
+	spin_unlock_irq(&bus->reg_lock);
+
 	hdac_wait_for_cmd_dmas(bus);
+
+	spin_lock_irq(&bus->reg_lock);
 	/* disable unsolicited responses */
 	snd_hdac_chip_updatel(bus, GCTL, AZX_GCTL_UNSOL, 0);
 	spin_unlock_irq(&bus->reg_lock);
diff --git a/sound/isa/ad1848/ad1848.c b/sound/isa/ad1848/ad1848.c
index a302d1f8d14f..e739b1c85c25 100644
--- a/sound/isa/ad1848/ad1848.c
+++ b/sound/isa/ad1848/ad1848.c
@@ -55,11 +55,11 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for " CRD_NAME " driver.");
 module_param_array(thinkpad, bool, NULL, 0444);
 MODULE_PARM_DESC(thinkpad, "Enable only for the onboard CS4248 of IBM Thinkpad 360/750/755 series.");
diff --git a/sound/isa/adlib.c b/sound/isa/adlib.c
index 8d3060fd7ad7..5fb619eca5c8 100644
--- a/sound/isa/adlib.c
+++ b/sound/isa/adlib.c
@@ -27,7 +27,7 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver.");
 
 static int snd_adlib_match(struct device *dev, unsigned int n)
diff --git a/sound/isa/cmi8328.c b/sound/isa/cmi8328.c
index 787475084f46..8e1756c3b9bb 100644
--- a/sound/isa/cmi8328.c
+++ b/sound/isa/cmi8328.c
@@ -51,18 +51,18 @@ MODULE_PARM_DESC(index, "Index value for CMI8328 soundcard.");
 module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for CMI8328 soundcard.");
 
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for CMI8328 driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for CMI8328 driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 for CMI8328 driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 for CMI8328 driver.");
 
-module_param_array(mpuport, long, NULL, 0444);
+module_param_hw_array(mpuport, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpuport, "MPU-401 port # for CMI8328 driver.");
-module_param_array(mpuirq, int, NULL, 0444);
+module_param_hw_array(mpuirq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpuirq, "IRQ # for CMI8328 MPU-401 port.");
 #ifdef SUPPORT_JOYSTICK
 module_param_array(gameport, bool, NULL, 0444);
diff --git a/sound/isa/cmi8330.c b/sound/isa/cmi8330.c
index dfedfd85f205..f64b29ab5cc7 100644
--- a/sound/isa/cmi8330.c
+++ b/sound/isa/cmi8330.c
@@ -95,27 +95,27 @@ module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard.");
 #endif
 
-module_param_array(sbport, long, NULL, 0444);
+module_param_hw_array(sbport, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(sbport, "Port # for CMI8330/CMI8329 SB driver.");
-module_param_array(sbirq, int, NULL, 0444);
+module_param_hw_array(sbirq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(sbirq, "IRQ # for CMI8330/CMI8329 SB driver.");
-module_param_array(sbdma8, int, NULL, 0444);
+module_param_hw_array(sbdma8, int, dma, NULL, 0444);
 MODULE_PARM_DESC(sbdma8, "DMA8 for CMI8330/CMI8329 SB driver.");
-module_param_array(sbdma16, int, NULL, 0444);
+module_param_hw_array(sbdma16, int, dma, NULL, 0444);
 MODULE_PARM_DESC(sbdma16, "DMA16 for CMI8330/CMI8329 SB driver.");
 
-module_param_array(wssport, long, NULL, 0444);
+module_param_hw_array(wssport, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(wssport, "Port # for CMI8330/CMI8329 WSS driver.");
-module_param_array(wssirq, int, NULL, 0444);
+module_param_hw_array(wssirq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(wssirq, "IRQ # for CMI8330/CMI8329 WSS driver.");
-module_param_array(wssdma, int, NULL, 0444);
+module_param_hw_array(wssdma, int, dma, NULL, 0444);
 MODULE_PARM_DESC(wssdma, "DMA for CMI8330/CMI8329 WSS driver.");
 
-module_param_array(fmport, long, NULL, 0444);
+module_param_hw_array(fmport, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fmport, "FM port # for CMI8330/CMI8329 driver.");
-module_param_array(mpuport, long, NULL, 0444);
+module_param_hw_array(mpuport, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpuport, "MPU-401 port # for CMI8330/CMI8329 driver.");
-module_param_array(mpuirq, int, NULL, 0444);
+module_param_hw_array(mpuirq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpuirq, "IRQ # for CMI8330/CMI8329 MPU-401 port.");
 #ifdef CONFIG_PNP
 static int isa_registered;
diff --git a/sound/isa/cs423x/cs4231.c b/sound/isa/cs423x/cs4231.c
index ef7448e9f813..e8edd9017a2f 100644
--- a/sound/isa/cs423x/cs4231.c
+++ b/sound/isa/cs423x/cs4231.c
@@ -55,17 +55,17 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " CRD_NAME " driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver.");
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " CRD_NAME " driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for " CRD_NAME " driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for " CRD_NAME " driver.");
 
 static int snd_cs4231_match(struct device *dev, unsigned int n)
diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c
index 9d7582c90a95..1f9a3b2be7a1 100644
--- a/sound/isa/cs423x/cs4236.c
+++ b/sound/isa/cs423x/cs4236.c
@@ -98,23 +98,23 @@ MODULE_PARM_DESC(enable, "Enable " IDENT " soundcard.");
 module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "ISA PnP detection for specified soundcard.");
 #endif
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " IDENT " driver.");
-module_param_array(cport, long, NULL, 0444);
+module_param_hw_array(cport, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(cport, "Control port # for " IDENT " driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " IDENT " driver.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port # for " IDENT " driver.");
-module_param_array(sb_port, long, NULL, 0444);
+module_param_hw_array(sb_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(sb_port, "SB port # for " IDENT " driver (optional).");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for " IDENT " driver.");
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " IDENT " driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for " IDENT " driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for " IDENT " driver.");
 
 #ifdef CONFIG_PNP
diff --git a/sound/isa/es1688/es1688.c b/sound/isa/es1688/es1688.c
index 1901c2bb6c3b..36320e7f2789 100644
--- a/sound/isa/es1688/es1688.c
+++ b/sound/isa/es1688/es1688.c
@@ -71,17 +71,17 @@ module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard.");
 #endif
 MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " CRD_NAME " driver.");
-module_param_array(irq, int, NULL, 0444);
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port # for ES1688 driver.");
 MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver.");
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " CRD_NAME " driver.");
-module_param_array(dma8, int, NULL, 0444);
+module_param_hw_array(dma8, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma8, "8-bit DMA # for " CRD_NAME " driver.");
 
 #ifdef CONFIG_PNP
diff --git a/sound/isa/es18xx.c b/sound/isa/es18xx.c
index 5094b62d8f77..0cabe2b8974f 100644
--- a/sound/isa/es18xx.c
+++ b/sound/isa/es18xx.c
@@ -1999,17 +1999,17 @@ MODULE_PARM_DESC(enable, "Enable ES18xx soundcard.");
 module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard.");
 #endif
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for ES18xx driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for ES18xx driver.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port # for ES18xx driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for ES18xx driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA 1 # for ES18xx driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA 2 # for ES18xx driver.");
 
 #ifdef CONFIG_PNP
diff --git a/sound/isa/galaxy/galaxy.c b/sound/isa/galaxy/galaxy.c
index 379abe2cbeb2..b9994cc9f5fb 100644
--- a/sound/isa/galaxy/galaxy.c
+++ b/sound/isa/galaxy/galaxy.c
@@ -53,21 +53,21 @@ static int mpu_irq[SNDRV_CARDS] = SNDRV_DEFAULT_IRQ;
 static int dma1[SNDRV_CARDS] = SNDRV_DEFAULT_DMA;
 static int dma2[SNDRV_CARDS] = SNDRV_DEFAULT_DMA;
 
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver.");
-module_param_array(wss_port, long, NULL, 0444);
+module_param_hw_array(wss_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(wss_port, "WSS port # for " CRD_NAME " driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " CRD_NAME " driver.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port # for " CRD_NAME " driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver.");
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " CRD_NAME " driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "Playback DMA # for " CRD_NAME " driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "Capture DMA # for " CRD_NAME " driver.");
 
 /*
diff --git a/sound/isa/gus/gusclassic.c b/sound/isa/gus/gusclassic.c
index c169be49ed71..92a997ab1229 100644
--- a/sound/isa/gus/gusclassic.c
+++ b/sound/isa/gus/gusclassic.c
@@ -58,13 +58,13 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for " CRD_NAME " driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for " CRD_NAME " driver.");
 module_param_array(joystick_dac, int, NULL, 0444);
 MODULE_PARM_DESC(joystick_dac, "Joystick DAC level 0.59V-4.52V or 0.389V-2.98V for " CRD_NAME " driver.");
diff --git a/sound/isa/gus/gusextreme.c b/sound/isa/gus/gusextreme.c
index 77ac2fd723b4..beb52c0f70ea 100644
--- a/sound/isa/gus/gusextreme.c
+++ b/sound/isa/gus/gusextreme.c
@@ -66,21 +66,21 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver.");
-module_param_array(gf1_port, long, NULL, 0444);
+module_param_hw_array(gf1_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(gf1_port, "GF1 port # for " CRD_NAME " driver (optional).");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " CRD_NAME " driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver.");
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " CRD_NAME " driver.");
-module_param_array(gf1_irq, int, NULL, 0444);
+module_param_hw_array(gf1_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(gf1_irq, "GF1 IRQ # for " CRD_NAME " driver.");
-module_param_array(dma8, int, NULL, 0444);
+module_param_hw_array(dma8, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma8, "8-bit DMA # for " CRD_NAME " driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "GF1 DMA # for " CRD_NAME " driver.");
 module_param_array(joystick_dac, int, NULL, 0444);
 MODULE_PARM_DESC(joystick_dac, "Joystick DAC level 0.59V-4.52V or 0.389V-2.98V for " CRD_NAME " driver.");
diff --git a/sound/isa/gus/gusmax.c b/sound/isa/gus/gusmax.c
index dd88c9d33492..63309a453140 100644
--- a/sound/isa/gus/gusmax.c
+++ b/sound/isa/gus/gusmax.c
@@ -56,13 +56,13 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for GUS MAX soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable GUS MAX soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for GUS MAX driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for GUS MAX driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for GUS MAX driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for GUS MAX driver.");
 module_param_array(joystick_dac, int, NULL, 0444);
 MODULE_PARM_DESC(joystick_dac, "Joystick DAC level 0.59V-4.52V or 0.389V-2.98V for GUS MAX driver.");
diff --git a/sound/isa/gus/interwave.c b/sound/isa/gus/interwave.c
index 70d0040484c8..0687b7ef3e53 100644
--- a/sound/isa/gus/interwave.c
+++ b/sound/isa/gus/interwave.c
@@ -92,17 +92,17 @@ MODULE_PARM_DESC(enable, "Enable InterWave soundcard.");
 module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "ISA PnP detection for specified soundcard.");
 #endif
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for InterWave driver.");
 #ifdef SNDRV_STB
-module_param_array(port_tc, long, NULL, 0444);
+module_param_hw_array(port_tc, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port_tc, "Tone control (TEA6330T - i2c bus) port # for InterWave driver.");
 #endif
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for InterWave driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for InterWave driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for InterWave driver.");
 module_param_array(joystick_dac, int, NULL, 0444);
 MODULE_PARM_DESC(joystick_dac, "Joystick DAC level 0.59V-4.52V or 0.389V-2.98V for InterWave driver.");
diff --git a/sound/isa/msnd/msnd_pinnacle.c b/sound/isa/msnd/msnd_pinnacle.c
index 4c072666115d..ad4897337df5 100644
--- a/sound/isa/msnd/msnd_pinnacle.c
+++ b/sound/isa/msnd/msnd_pinnacle.c
@@ -800,22 +800,22 @@ MODULE_LICENSE("GPL");
 MODULE_FIRMWARE(INITCODEFILE);
 MODULE_FIRMWARE(PERMCODEFILE);
 
-module_param_array(io, long, NULL, S_IRUGO);
+module_param_hw_array(io, long, ioport, NULL, S_IRUGO);
 MODULE_PARM_DESC(io, "IO port #");
-module_param_array(irq, int, NULL, S_IRUGO);
-module_param_array(mem, long, NULL, S_IRUGO);
+module_param_hw_array(irq, int, irq, NULL, S_IRUGO);
+module_param_hw_array(mem, long, iomem, NULL, S_IRUGO);
 module_param_array(write_ndelay, int, NULL, S_IRUGO);
 module_param(calibrate_signal, int, S_IRUGO);
 #ifndef MSND_CLASSIC
 module_param_array(digital, int, NULL, S_IRUGO);
-module_param_array(cfg, long, NULL, S_IRUGO);
+module_param_hw_array(cfg, long, ioport, NULL, S_IRUGO);
 module_param_array(reset, int, 0, S_IRUGO);
-module_param_array(mpu_io, long, NULL, S_IRUGO);
-module_param_array(mpu_irq, int, NULL, S_IRUGO);
-module_param_array(ide_io0, long, NULL, S_IRUGO);
-module_param_array(ide_io1, long, NULL, S_IRUGO);
-module_param_array(ide_irq, int, NULL, S_IRUGO);
-module_param_array(joystick_io, long, NULL, S_IRUGO);
+module_param_hw_array(mpu_io, long, ioport, NULL, S_IRUGO);
+module_param_hw_array(mpu_irq, int, irq, NULL, S_IRUGO);
+module_param_hw_array(ide_io0, long, ioport, NULL, S_IRUGO);
+module_param_hw_array(ide_io1, long, ioport, NULL, S_IRUGO);
+module_param_hw_array(ide_irq, int, irq, NULL, S_IRUGO);
+module_param_hw_array(joystick_io, long, ioport, NULL, S_IRUGO);
 #endif
 
 
diff --git a/sound/isa/opl3sa2.c b/sound/isa/opl3sa2.c
index ae133633a420..4098e3e0353d 100644
--- a/sound/isa/opl3sa2.c
+++ b/sound/isa/opl3sa2.c
@@ -69,21 +69,21 @@ MODULE_PARM_DESC(enable, "Enable OPL3-SA soundcard.");
 module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard.");
 #endif
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for OPL3-SA driver.");
-module_param_array(sb_port, long, NULL, 0444);
+module_param_hw_array(sb_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(sb_port, "SB port # for OPL3-SA driver.");
-module_param_array(wss_port, long, NULL, 0444);
+module_param_hw_array(wss_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(wss_port, "WSS port # for OPL3-SA driver.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port # for OPL3-SA driver.");
-module_param_array(midi_port, long, NULL, 0444);
+module_param_hw_array(midi_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(midi_port, "MIDI port # for OPL3-SA driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for OPL3-SA driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for OPL3-SA driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for OPL3-SA driver.");
 module_param_array(opl3sa3_ymode, int, NULL, 0444);
 MODULE_PARM_DESC(opl3sa3_ymode, "Speaker size selection for 3D Enhancement mode: Desktop/Large Notebook/Small Notebook/HiFi.");
diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c
index 3a9067db1a84..bcbff56f060d 100644
--- a/sound/isa/opti9xx/miro.c
+++ b/sound/isa/opti9xx/miro.c
@@ -69,19 +69,19 @@ module_param(index, int, 0444);
 MODULE_PARM_DESC(index, "Index value for miro soundcard.");
 module_param(id, charp, 0444);
 MODULE_PARM_DESC(id, "ID string for miro soundcard.");
-module_param(port, long, 0444);
+module_param_hw(port, long, ioport, 0444);
 MODULE_PARM_DESC(port, "WSS port # for miro driver.");
-module_param(mpu_port, long, 0444);
+module_param_hw(mpu_port, long, ioport, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for miro driver.");
-module_param(fm_port, long, 0444);
+module_param_hw(fm_port, long, ioport, 0444);
 MODULE_PARM_DESC(fm_port, "FM Port # for miro driver.");
-module_param(irq, int, 0444);
+module_param_hw(irq, int, irq, 0444);
 MODULE_PARM_DESC(irq, "WSS irq # for miro driver.");
-module_param(mpu_irq, int, 0444);
+module_param_hw(mpu_irq, int, irq, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 irq # for miro driver.");
-module_param(dma1, int, 0444);
+module_param_hw(dma1, int, dma, 0444);
 MODULE_PARM_DESC(dma1, "1st dma # for miro driver.");
-module_param(dma2, int, 0444);
+module_param_hw(dma2, int, dma, 0444);
 MODULE_PARM_DESC(dma2, "2nd dma # for miro driver.");
 module_param(wss, int, 0444);
 MODULE_PARM_DESC(wss, "wss mode");
diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c
index 0a5266003786..ceddb392b1e3 100644
--- a/sound/isa/opti9xx/opti92x-ad1848.c
+++ b/sound/isa/opti9xx/opti92x-ad1848.c
@@ -88,20 +88,20 @@ MODULE_PARM_DESC(id, "ID string for opti9xx based soundcard.");
 module_param(isapnp, bool, 0444);
 MODULE_PARM_DESC(isapnp, "Enable ISA PnP detection for specified soundcard.");
 #endif
-module_param(port, long, 0444);
+module_param_hw(port, long, ioport, 0444);
 MODULE_PARM_DESC(port, "WSS port # for opti9xx driver.");
-module_param(mpu_port, long, 0444);
+module_param_hw(mpu_port, long, ioport, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for opti9xx driver.");
-module_param(fm_port, long, 0444);
+module_param_hw(fm_port, long, ioport, 0444);
 MODULE_PARM_DESC(fm_port, "FM port # for opti9xx driver.");
-module_param(irq, int, 0444);
+module_param_hw(irq, int, irq, 0444);
 MODULE_PARM_DESC(irq, "WSS irq # for opti9xx driver.");
-module_param(mpu_irq, int, 0444);
+module_param_hw(mpu_irq, int, irq, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 irq # for opti9xx driver.");
-module_param(dma1, int, 0444);
+module_param_hw(dma1, int, dma, 0444);
 MODULE_PARM_DESC(dma1, "1st dma # for opti9xx driver.");
 #if defined(CS4231) || defined(OPTi93X)
-module_param(dma2, int, 0444);
+module_param_hw(dma2, int, dma, 0444);
 MODULE_PARM_DESC(dma2, "2nd dma # for opti9xx driver.");
 #endif	/* CS4231 || OPTi93X */
 
diff --git a/sound/isa/sb/jazz16.c b/sound/isa/sb/jazz16.c
index 4d909971eedb..bfa0055e1fd6 100644
--- a/sound/isa/sb/jazz16.c
+++ b/sound/isa/sb/jazz16.c
@@ -50,17 +50,17 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for Media Vision Jazz16 based soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable Media Vision Jazz16 based soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for jazz16 driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for jazz16 driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for jazz16 driver.");
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for jazz16 driver.");
-module_param_array(dma8, int, NULL, 0444);
+module_param_hw_array(dma8, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma8, "DMA8 # for jazz16 driver.");
-module_param_array(dma16, int, NULL, 0444);
+module_param_hw_array(dma16, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma16, "DMA16 # for jazz16 driver.");
 
 #define SB_JAZZ16_WAKEUP	0xaf
diff --git a/sound/isa/sb/sb16.c b/sound/isa/sb/sb16.c
index 4a7d7c89808f..3b2e4f405ff2 100644
--- a/sound/isa/sb/sb16.c
+++ b/sound/isa/sb/sb16.c
@@ -99,21 +99,21 @@ MODULE_PARM_DESC(enable, "Enable SoundBlaster 16 soundcard.");
 module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard.");
 #endif
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for SB16 driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for SB16 driver.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port # for SB16 PnP driver.");
 #ifdef SNDRV_SBAWE_EMU8000
-module_param_array(awe_port, long, NULL, 0444);
+module_param_hw_array(awe_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(awe_port, "AWE port # for SB16 PnP driver.");
 #endif
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for SB16 driver.");
-module_param_array(dma8, int, NULL, 0444);
+module_param_hw_array(dma8, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma8, "8-bit DMA # for SB16 driver.");
-module_param_array(dma16, int, NULL, 0444);
+module_param_hw_array(dma16, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma16, "16-bit DMA # for SB16 driver.");
 module_param_array(mic_agc, int, NULL, 0444);
 MODULE_PARM_DESC(mic_agc, "Mic Auto-Gain-Control switch.");
diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c
index ad42d2364199..d77dcba276b5 100644
--- a/sound/isa/sb/sb8.c
+++ b/sound/isa/sb/sb8.c
@@ -47,11 +47,11 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for Sound Blaster soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable Sound Blaster soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for SB8 driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for SB8 driver.");
-module_param_array(dma8, int, NULL, 0444);
+module_param_hw_array(dma8, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma8, "8-bit DMA # for SB8 driver.");
 
 struct snd_sb8 {
diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c
index b61a6633d8f2..c09d9b914efe 100644
--- a/sound/isa/sc6000.c
+++ b/sound/isa/sc6000.c
@@ -64,17 +64,17 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for sc-6000 based soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable sc-6000 based soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for sc-6000 driver.");
-module_param_array(mss_port, long, NULL, 0444);
+module_param_hw_array(mss_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mss_port, "MSS Port # for sc-6000 driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for sc-6000 driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for sc-6000 driver.");
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for sc-6000 driver.");
-module_param_array(dma, int, NULL, 0444);
+module_param_hw_array(dma, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma, "DMA # for sc-6000 driver.");
 module_param_array(joystick, bool, NULL, 0444);
 MODULE_PARM_DESC(joystick, "Enable gameport.");
diff --git a/sound/isa/sscape.c b/sound/isa/sscape.c
index fdcfa29e2205..54f5758a1bb3 100644
--- a/sound/isa/sscape.c
+++ b/sound/isa/sscape.c
@@ -63,22 +63,22 @@ MODULE_PARM_DESC(index, "Index number for SoundScape soundcard");
 module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "Description for SoundScape card");
 
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for SoundScape driver.");
 
-module_param_array(wss_port, long, NULL, 0444);
+module_param_hw_array(wss_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(wss_port, "WSS Port # for SoundScape driver.");
 
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for SoundScape driver.");
 
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU401 IRQ # for SoundScape driver.");
 
-module_param_array(dma, int, NULL, 0444);
+module_param_hw_array(dma, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma, "DMA # for SoundScape driver.");
 
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for SoundScape driver.");
 
 module_param_array(joystick, bool, NULL, 0444);
diff --git a/sound/isa/wavefront/wavefront.c b/sound/isa/wavefront/wavefront.c
index a0987a57c8a9..da4e9a85f0af 100644
--- a/sound/isa/wavefront/wavefront.c
+++ b/sound/isa/wavefront/wavefront.c
@@ -63,23 +63,23 @@ MODULE_PARM_DESC(enable, "Enable WaveFront soundcard.");
 module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "ISA PnP detection for WaveFront soundcards.");
 #endif
-module_param_array(cs4232_pcm_port, long, NULL, 0444);
+module_param_hw_array(cs4232_pcm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(cs4232_pcm_port, "Port # for CS4232 PCM interface.");
-module_param_array(cs4232_pcm_irq, int, NULL, 0444);
+module_param_hw_array(cs4232_pcm_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(cs4232_pcm_irq, "IRQ # for CS4232 PCM interface.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for CS4232 PCM interface.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for CS4232 PCM interface.");
-module_param_array(cs4232_mpu_port, long, NULL, 0444);
+module_param_hw_array(cs4232_mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(cs4232_mpu_port, "port # for CS4232 MPU-401 interface.");
-module_param_array(cs4232_mpu_irq, int, NULL, 0444);
+module_param_hw_array(cs4232_mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(cs4232_mpu_irq, "IRQ # for CS4232 MPU-401 interface.");
-module_param_array(ics2115_irq, int, NULL, 0444);
+module_param_hw_array(ics2115_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(ics2115_irq, "IRQ # for ICS2115.");
-module_param_array(ics2115_port, long, NULL, 0444);
+module_param_hw_array(ics2115_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(ics2115_port, "Port # for ICS2115.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port #.");
 module_param_array(use_cs4232_midi, bool, NULL, 0444);
 MODULE_PARM_DESC(use_cs4232_midi, "Use CS4232 MPU-401 interface (inaccessibly located inside your computer)");
diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c
index 4dae9ff9ef5a..0b1e4b34b299 100644
--- a/sound/isa/wavefront/wavefront_synth.c
+++ b/sound/isa/wavefront/wavefront_synth.c
@@ -1782,7 +1782,7 @@ wavefront_should_cause_interrupt (snd_wavefront_t *dev,
 				  int val, int port, unsigned long timeout)
 
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	init_waitqueue_entry(&wait, current);
 	spin_lock_irq(&dev->irq_lock);
diff --git a/sound/oss/ad1848.c b/sound/oss/ad1848.c
index f6156d8169d0..2421f59cf279 100644
--- a/sound/oss/ad1848.c
+++ b/sound/oss/ad1848.c
@@ -2805,10 +2805,10 @@ static int __initdata dma = -1;
 static int __initdata dma2 = -1;
 static int __initdata type = 0;
 
-module_param(io, int, 0);		/* I/O for a raw AD1848 card */
-module_param(irq, int, 0);		/* IRQ to use */
-module_param(dma, int, 0);		/* First DMA channel */
-module_param(dma2, int, 0);		/* Second DMA channel */
+module_param_hw(io, int, ioport, 0);	/* I/O for a raw AD1848 card */
+module_param_hw(irq, int, irq, 0);	/* IRQ to use */
+module_param_hw(dma, int, dma, 0);	/* First DMA channel */
+module_param_hw(dma2, int, dma, 0);	/* Second DMA channel */
 module_param(type, int, 0);		/* Card type */
 module_param(deskpro_xl, bool, 0);	/* Special magic for Deskpro XL boxen */
 module_param(deskpro_m, bool, 0);	/* Special magic for Deskpro M box */
diff --git a/sound/oss/aedsp16.c b/sound/oss/aedsp16.c
index bb477d5c8528..f058ed6bdb69 100644
--- a/sound/oss/aedsp16.c
+++ b/sound/oss/aedsp16.c
@@ -1303,17 +1303,17 @@ static int __initdata mpu_irq = -1;
 static int __initdata mss_base = -1;
 static int __initdata mpu_base = -1;
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, "I/O base address (0x220 0x240)");
-module_param(irq, int, 0);
+module_param_hw(irq, int, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ line (5 7 9 10 11)");
-module_param(dma, int, 0);
+module_param_hw(dma, int, dma, 0);
 MODULE_PARM_DESC(dma, "dma line (0 1 3)");
-module_param(mpu_irq, int, 0);
+module_param_hw(mpu_irq, int, irq, 0);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ line (5 7 9 10 0)");
-module_param(mss_base, int, 0);
+module_param_hw(mss_base, int, ioport, 0);
 MODULE_PARM_DESC(mss_base, "MSS emulation I/O base address (0x530 0xE80)");
-module_param(mpu_base, int, 0);
+module_param_hw(mpu_base, int, ioport, 0);
 MODULE_PARM_DESC(mpu_base,"MPU-401 I/O base address (0x300 0x310 0x320 0x330)");
 MODULE_AUTHOR("Riccardo Facchetti <fizban@tin.it>");
 MODULE_DESCRIPTION("Audio Excel DSP 16 Driver Version " VERSION);
diff --git a/sound/oss/mpu401.c b/sound/oss/mpu401.c
index 862735005b43..20e8fa46f647 100644
--- a/sound/oss/mpu401.c
+++ b/sound/oss/mpu401.c
@@ -1748,8 +1748,8 @@ static struct address_info cfg;
 static int io = -1;
 static int irq = -1;
 
-module_param(irq, int, 0);
-module_param(io, int, 0);
+module_param_hw(irq, int, irq, 0);
+module_param_hw(io, int, ioport, 0);
 
 static int __init init_mpu401(void)
 {
diff --git a/sound/oss/msnd_pinnacle.c b/sound/oss/msnd_pinnacle.c
index f34ec01d2239..d2abc2cf3213 100644
--- a/sound/oss/msnd_pinnacle.c
+++ b/sound/oss/msnd_pinnacle.c
@@ -1727,22 +1727,22 @@ static int
 calibrate_signal __initdata =		CONFIG_MSND_CALSIGNAL;
 #endif /* MODULE */
 
-module_param				(io, int, 0);
-module_param				(irq, int, 0);
-module_param				(mem, int, 0);
+module_param_hw				(io, int, ioport, 0);
+module_param_hw				(irq, int, irq, 0);
+module_param_hw				(mem, int, iomem, 0);
 module_param				(write_ndelay, int, 0);
 module_param				(fifosize, int, 0);
 module_param				(calibrate_signal, int, 0);
 #ifndef MSND_CLASSIC
 module_param				(digital, bool, 0);
-module_param				(cfg, int, 0);
+module_param_hw				(cfg, int, ioport, 0);
 module_param				(reset, int, 0);
-module_param				(mpu_io, int, 0);
-module_param				(mpu_irq, int, 0);
-module_param				(ide_io0, int, 0);
-module_param				(ide_io1, int, 0);
-module_param				(ide_irq, int, 0);
-module_param				(joystick_io, int, 0);
+module_param_hw				(mpu_io, int, ioport, 0);
+module_param_hw				(mpu_irq, int, irq, 0);
+module_param_hw				(ide_io0, int, ioport, 0);
+module_param_hw				(ide_io1, int, ioport, 0);
+module_param_hw				(ide_irq, int, irq, 0);
+module_param_hw				(joystick_io, int, ioport, 0);
 #endif
 
 static int __init msnd_init(void)
diff --git a/sound/oss/opl3.c b/sound/oss/opl3.c
index b6d19adf8f41..f0f5b5be6314 100644
--- a/sound/oss/opl3.c
+++ b/sound/oss/opl3.c
@@ -1200,7 +1200,7 @@ static int me;
 
 static int io = -1;
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 
 static int __init init_opl3 (void)
 {
diff --git a/sound/oss/pas2_card.c b/sound/oss/pas2_card.c
index b07954a79536..769fca692d2a 100644
--- a/sound/oss/pas2_card.c
+++ b/sound/oss/pas2_card.c
@@ -383,15 +383,15 @@ static int __initdata sb_irq	= -1;
 static int __initdata sb_dma	= -1;
 static int __initdata sb_dma16	= -1;
 
-module_param(io, int, 0);
-module_param(irq, int, 0);
-module_param(dma, int, 0);
-module_param(dma16, int, 0);
-
-module_param(sb_io, int, 0);
-module_param(sb_irq, int, 0);
-module_param(sb_dma, int, 0);
-module_param(sb_dma16, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
+module_param_hw(dma, int, dma, 0);
+module_param_hw(dma16, int, dma, 0);
+
+module_param_hw(sb_io, int, ioport, 0);
+module_param_hw(sb_irq, int, irq, 0);
+module_param_hw(sb_dma, int, dma, 0);
+module_param_hw(sb_dma16, int, dma, 0);
 
 module_param(joystick, bool, 0);
 module_param(symphony, bool, 0);
diff --git a/sound/oss/pss.c b/sound/oss/pss.c
index 81314f9e2ccb..33c3a442e162 100644
--- a/sound/oss/pss.c
+++ b/sound/oss/pss.c
@@ -1139,19 +1139,19 @@ static bool pss_no_sound = 0;	/* Just configure non-sound components */
 static bool pss_keep_settings  = 1;	/* Keep hardware settings at module exit */
 static char *pss_firmware = "/etc/sound/pss_synth";
 
-module_param(pss_io, int, 0);
+module_param_hw(pss_io, int, ioport, 0);
 MODULE_PARM_DESC(pss_io, "Set i/o base of PSS card (probably 0x220 or 0x240)");
-module_param(mss_io, int, 0);
+module_param_hw(mss_io, int, ioport, 0);
 MODULE_PARM_DESC(mss_io, "Set WSS (audio) i/o base (0x530, 0x604, 0xE80, 0xF40, or other. Address must end in 0 or 4 and must be from 0x100 to 0xFF4)");
-module_param(mss_irq, int, 0);
+module_param_hw(mss_irq, int, irq, 0);
 MODULE_PARM_DESC(mss_irq, "Set WSS (audio) IRQ (3, 5, 7, 9, 10, 11, 12)");
-module_param(mss_dma, int, 0);
+module_param_hw(mss_dma, int, dma, 0);
 MODULE_PARM_DESC(mss_dma, "Set WSS (audio) DMA (0, 1, 3)");
-module_param(mpu_io, int, 0);
+module_param_hw(mpu_io, int, ioport, 0);
 MODULE_PARM_DESC(mpu_io, "Set MIDI i/o base (0x330 or other. Address must be on 4 location boundaries and must be from 0x100 to 0xFFC)");
-module_param(mpu_irq, int, 0);
+module_param_hw(mpu_irq, int, irq, 0);
 MODULE_PARM_DESC(mpu_irq, "Set MIDI IRQ (3, 5, 7, 9, 10, 11, 12)");
-module_param(pss_cdrom_port, int, 0);
+module_param_hw(pss_cdrom_port, int, ioport, 0);
 MODULE_PARM_DESC(pss_cdrom_port, "Set the PSS CDROM port i/o base (0x340 or other)");
 module_param(pss_enable_joystick, bool, 0);
 MODULE_PARM_DESC(pss_enable_joystick, "Enables the PSS joystick port (1 to enable, 0 to disable)");
diff --git a/sound/oss/sb_card.c b/sound/oss/sb_card.c
index fb5d7250de38..2a92cfe6cfe9 100644
--- a/sound/oss/sb_card.c
+++ b/sound/oss/sb_card.c
@@ -61,15 +61,15 @@ static int __initdata uart401	= 0;
 static int __initdata pnp       = 0;
 #endif
 
-module_param(io, int, 000);
+module_param_hw(io, int, ioport, 000);
 MODULE_PARM_DESC(io,       "Soundblaster i/o base address (0x220,0x240,0x260,0x280)");
-module_param(irq, int, 000);
+module_param_hw(irq, int, irq, 000);
 MODULE_PARM_DESC(irq,	   "IRQ (5,7,9,10)");
-module_param(dma, int, 000);
+module_param_hw(dma, int, dma, 000);
 MODULE_PARM_DESC(dma,	   "8-bit DMA channel (0,1,3)");
-module_param(dma16, int, 000);
+module_param_hw(dma16, int, dma, 000);
 MODULE_PARM_DESC(dma16,	   "16-bit DMA channel (5,6,7)");
-module_param(mpu_io, int, 000);
+module_param_hw(mpu_io, int, ioport, 000);
 MODULE_PARM_DESC(mpu_io,   "MPU base address");
 module_param(type, int, 000);
 MODULE_PARM_DESC(type,	   "You can set this to specific card type (doesn't " \
diff --git a/sound/oss/trix.c b/sound/oss/trix.c
index 3c494dc93b93..a57bc635d758 100644
--- a/sound/oss/trix.c
+++ b/sound/oss/trix.c
@@ -413,15 +413,15 @@ static int __initdata sb_irq	= -1;
 static int __initdata mpu_io	= -1;
 static int __initdata mpu_irq	= -1;
 
-module_param(io, int, 0);
-module_param(irq, int, 0);
-module_param(dma, int, 0);
-module_param(dma2, int, 0);
-module_param(sb_io, int, 0);
-module_param(sb_dma, int, 0);
-module_param(sb_irq, int, 0);
-module_param(mpu_io, int, 0);
-module_param(mpu_irq, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
+module_param_hw(dma, int, dma, 0);
+module_param_hw(dma2, int, dma, 0);
+module_param_hw(sb_io, int, ioport, 0);
+module_param_hw(sb_dma, int, dma, 0);
+module_param_hw(sb_irq, int, irq, 0);
+module_param_hw(mpu_io, int, ioport, 0);
+module_param_hw(mpu_irq, int, irq, 0);
 module_param(joystick, bool, 0);
 
 static int __init init_trix(void)
diff --git a/sound/oss/uart401.c b/sound/oss/uart401.c
index dae4d4344407..83dcc85b8688 100644
--- a/sound/oss/uart401.c
+++ b/sound/oss/uart401.c
@@ -429,8 +429,8 @@ static struct address_info cfg_mpu;
 static int io = -1;
 static int irq = -1;
 
-module_param(io, int, 0444);
-module_param(irq, int, 0444);
+module_param_hw(io, int, ioport, 0444);
+module_param_hw(irq, int, irq, 0444);
 
 
 static int __init init_uart401(void)
diff --git a/sound/oss/uart6850.c b/sound/oss/uart6850.c
index 1079133dd6ab..eda32d7eddbd 100644
--- a/sound/oss/uart6850.c
+++ b/sound/oss/uart6850.c
@@ -315,8 +315,8 @@ static struct address_info cfg_mpu;
 static int __initdata io = -1;
 static int __initdata irq = -1;
 
-module_param(io, int, 0);
-module_param(irq, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
 
 static int __init init_uart6850(void)
 {
diff --git a/sound/oss/waveartist.c b/sound/oss/waveartist.c
index 0b8d0de87273..4f0c3a232e41 100644
--- a/sound/oss/waveartist.c
+++ b/sound/oss/waveartist.c
@@ -2036,8 +2036,8 @@ __setup("waveartist=", setup_waveartist);
 #endif
 
 MODULE_DESCRIPTION("Rockwell WaveArtist RWA-010 sound driver");
-module_param(io, int, 0);		/* IO base */
-module_param(irq, int, 0);		/* IRQ */
-module_param(dma, int, 0);		/* DMA */
-module_param(dma2, int, 0);		/* DMA2 */
+module_param_hw(io, int, ioport, 0);		/* IO base */
+module_param_hw(irq, int, irq, 0);		/* IRQ */
+module_param_hw(dma, int, dma, 0);		/* DMA */
+module_param_hw(dma2, int, dma, 0);		/* DMA2 */
 MODULE_LICENSE("GPL");
diff --git a/sound/pci/als4000.c b/sound/pci/als4000.c
index 92bc06d01288..7844a75d8ed9 100644
--- a/sound/pci/als4000.c
+++ b/sound/pci/als4000.c
@@ -102,7 +102,7 @@ MODULE_PARM_DESC(id, "ID string for ALS4000 soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable ALS4000 soundcard.");
 #ifdef SUPPORT_JOYSTICK
-module_param_array(joystick_port, int, NULL, 0444);
+module_param_hw_array(joystick_port, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(joystick_port, "Joystick port address for ALS4000 soundcard. (0 = disabled)");
 #endif
 
diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c
index 227c9d3802b8..745a0a3743b4 100644
--- a/sound/pci/cmipci.c
+++ b/sound/pci/cmipci.c
@@ -68,14 +68,14 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for C-Media PCI soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable C-Media PCI soundcard.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port.");
 module_param_array(soft_ac3, bool, NULL, 0444);
 MODULE_PARM_DESC(soft_ac3, "Software-conversion of raw SPDIF packets (model 033 only).");
 #ifdef SUPPORT_JOYSTICK
-module_param_array(joystick_port, int, NULL, 0444);
+module_param_hw_array(joystick_port, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(joystick_port, "Joystick port address.");
 #endif
 
diff --git a/sound/pci/ens1370.c b/sound/pci/ens1370.c
index 5d10349d11ce..09a63ef41ef2 100644
--- a/sound/pci/ens1370.c
+++ b/sound/pci/ens1370.c
@@ -106,7 +106,7 @@ module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable Ensoniq AudioPCI soundcard.");
 #ifdef SUPPORT_JOYSTICK
 #ifdef CHIP1371
-module_param_array(joystick_port, int, NULL, 0444);
+module_param_hw_array(joystick_port, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(joystick_port, "Joystick port address.");
 #else
 module_param_array(joystick, bool, NULL, 0444);
diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h
index d6fb2d5d01a7..60ce1cfc300f 100644
--- a/sound/pci/hda/hda_codec.h
+++ b/sound/pci/hda/hda_codec.h
@@ -295,6 +295,8 @@ struct hda_codec {
 
 #define list_for_each_codec(c, bus) \
 	list_for_each_entry(c, &(bus)->core.codec_list, core.list)
+#define list_for_each_codec_safe(c, n, bus)				\
+	list_for_each_entry_safe(c, n, &(bus)->core.codec_list, core.list)
 
 /* snd_hda_codec_read/write optional flags */
 #define HDA_RW_NO_RESPONSE_FALLBACK	(1 << 0)
diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
index 3715a5725613..1c60beb5b70a 100644
--- a/sound/pci/hda/hda_controller.c
+++ b/sound/pci/hda/hda_controller.c
@@ -1337,8 +1337,12 @@ EXPORT_SYMBOL_GPL(azx_probe_codecs);
 /* configure each codec instance */
 int azx_codec_configure(struct azx *chip)
 {
-	struct hda_codec *codec;
-	list_for_each_codec(codec, &chip->bus) {
+	struct hda_codec *codec, *next;
+
+	/* use _safe version here since snd_hda_codec_configure() deregisters
+	 * the device upon error and deletes itself from the bus list.
+	 */
+	list_for_each_codec_safe(codec, next, &chip->bus) {
 		snd_hda_codec_configure(codec);
 	}
 	return 0;
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 2842c82363c0..71545b56b4c8 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -3174,6 +3174,7 @@ static int check_dyn_adc_switch(struct hda_codec *codec)
 						spec->input_paths[i][nums]);
 					spec->input_paths[i][nums] =
 						spec->input_paths[i][n];
+					spec->input_paths[i][n] = 0;
 				}
 			}
 			nums++;
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 1770f085c2a6..01eb1dc7b5b3 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -370,10 +370,12 @@ enum {
 #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71)
 #define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0)
 #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+#define IS_BXT_T(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x1a98)
 #define IS_GLK(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x3198)
-#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \
-			IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci)	|| \
-			IS_GLK(pci)
+#define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348)
+#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci) || \
+			  IS_BXT_T(pci) || IS_KBL(pci) || IS_KBL_LP(pci) || \
+			  IS_KBL_H(pci)	|| IS_GLK(pci) || IS_CFL(pci))
 
 static char *driver_short_names[] = {
 	[AZX_DRIVER_ICH] = "HDA Intel",
@@ -2378,6 +2380,9 @@ static const struct pci_device_id azx_ids[] = {
 	/* Kabylake-H */
 	{ PCI_DEVICE(0x8086, 0xa2f0),
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
+	/* Coffelake */
+	{ PCI_DEVICE(0x8086, 0xa348),
+	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE},
 	/* Broxton-P(Apollolake) */
 	{ PCI_DEVICE(0x8086, 0x5a98),
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index e8253737c47a..63bc894ddf5e 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -860,7 +860,9 @@ static const struct hda_fixup cxt_fixups[] = {
 			{ 0x16, 0x21011020 }, /* line-out */
 			{ 0x18, 0x2181103f }, /* line-in */
 			{ }
-		}
+		},
+		.chained = true,
+		.chain_id = CXT_FIXUP_MUTE_LED_GPIO,
 	},
 	[CXT_FIXUP_HP_SPECTRE] = {
 		.type = HDA_FIXUP_PINS,
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 58df440013c5..cbeebc0a9711 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2324,10 +2324,11 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_MBA11_VREF),
 
 	SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD),
-	SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD),
-	SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
 	SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
+	SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD),
+	SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
+	SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
 	SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
 	SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
 	SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
@@ -2342,6 +2343,7 @@ static const struct hda_model_fixup alc882_fixup_models[] = {
 	{.id = ALC883_FIXUP_ACER_EAPD, .name = "acer-aspire"},
 	{.id = ALC882_FIXUP_INV_DMIC, .name = "inv-dmic"},
 	{.id = ALC882_FIXUP_NO_PRIMARY_HP, .name = "no-primary-hp"},
+	{.id = ALC1220_FIXUP_GB_DUAL_CODECS, .name = "dual-codecs"},
 	{}
 };
 
@@ -5852,7 +5854,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
 	SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x10c0, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
+	SND_PCI_QUIRK(0x1043, 0x10d0, "ASUS X540LA/X540LJ", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+	SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1043, 0x1290, "ASUS X441SA", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC),
@@ -5860,13 +5866,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
 	SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
 	SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
+	SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
-	SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1043, 0x10d0, "ASUS X540LA/X540LJ", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1043, 0x1290, "ASUS X441SA", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2),
 	SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC),
@@ -6014,6 +6017,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"},
 	{.id = ALC292_FIXUP_TPT440, .name = "tpt440"},
 	{.id = ALC292_FIXUP_TPT460, .name = "tpt460"},
+	{.id = ALC233_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"},
 	{}
 };
 #define ALC225_STANDARD_PINS \
@@ -6465,8 +6469,11 @@ static int patch_alc269(struct hda_codec *codec)
 		break;
 	case 0x10ec0225:
 	case 0x10ec0295:
+		spec->codec_variant = ALC269_TYPE_ALC225;
+		break;
 	case 0x10ec0299:
 		spec->codec_variant = ALC269_TYPE_ALC225;
+		spec->gen.mixer_nid = 0; /* no loopback on ALC299 */
 		break;
 	case 0x10ec0234:
 	case 0x10ec0274:
@@ -7338,6 +7345,7 @@ static const struct hda_model_fixup alc662_fixup_models[] = {
 	{.id = ALC662_FIXUP_ASUS_MODE8, .name = "asus-mode8"},
 	{.id = ALC662_FIXUP_INV_DMIC, .name = "inv-dmic"},
 	{.id = ALC668_FIXUP_DELL_MIC_NO_PRESENCE, .name = "dell-headset-multi"},
+	{.id = ALC662_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"},
 	{}
 };
 
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index faa3d38bac0b..6cefdf6c0b75 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -1559,6 +1559,8 @@ static const struct snd_pci_quirk stac9200_fixup_tbl[] = {
 		      "Dell Inspiron 1501", STAC_9200_DELL_M26),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01f6,
 		      "unknown Dell", STAC_9200_DELL_M26),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0201,
+		      "Dell Latitude D430", STAC_9200_DELL_M22),
 	/* Panasonic */
 	SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-74", STAC_9200_PANASONIC),
 	/* Gateway machines needs EAPD to be set on resume */
diff --git a/sound/pci/mixart/mixart_core.c b/sound/pci/mixart/mixart_core.c
index dccf3db48fe0..8bf2ce32d4a8 100644
--- a/sound/pci/mixart/mixart_core.c
+++ b/sound/pci/mixart/mixart_core.c
@@ -239,7 +239,7 @@ int snd_mixart_send_msg(struct mixart_mgr *mgr, struct mixart_msg *request, int
 	struct mixart_msg resp;
 	u32 msg_frame = 0; /* set to 0, so it's no notification to wait for, but the answer */
 	int err;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	long timeout;
 
 	init_waitqueue_entry(&wait, current);
@@ -284,7 +284,7 @@ int snd_mixart_send_msg_wait_notif(struct mixart_mgr *mgr,
 				   struct mixart_msg *request, u32 notif_event)
 {
 	int err;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	long timeout;
 
 	if (snd_BUG_ON(!notif_event))
diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c
index 19c9df6b0f3d..f067c76d77f8 100644
--- a/sound/pci/riptide/riptide.c
+++ b/sound/pci/riptide/riptide.c
@@ -137,12 +137,12 @@ MODULE_PARM_DESC(id, "ID string for Riptide soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable Riptide soundcard.");
 #ifdef SUPPORT_JOYSTICK
-module_param_array(joystick_port, int, NULL, 0444);
+module_param_hw_array(joystick_port, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(joystick_port, "Joystick port # for Riptide soundcard.");
 #endif
-module_param_array(mpu_port, int, NULL, 0444);
+module_param_hw_array(mpu_port, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU401 port # for Riptide driver.");
-module_param_array(opl3_port, int, NULL, 0444);
+module_param_hw_array(opl3_port, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(opl3_port, "OPL3 port # for Riptide driver.");
 
 /*
diff --git a/sound/pci/sonicvibes.c b/sound/pci/sonicvibes.c
index a6aa48c5b969..8e3d4ec39c35 100644
--- a/sound/pci/sonicvibes.c
+++ b/sound/pci/sonicvibes.c
@@ -66,7 +66,7 @@ module_param_array(reverb, bool, NULL, 0444);
 MODULE_PARM_DESC(reverb, "Enable reverb (SRAM is present) for S3 SonicVibes soundcard.");
 module_param_array(mge, bool, NULL, 0444);
 MODULE_PARM_DESC(mge, "MIC Gain Enable for S3 SonicVibes soundcard.");
-module_param(dmaio, uint, 0444);
+module_param_hw(dmaio, uint, ioport, 0444);
 MODULE_PARM_DESC(dmaio, "DDMA i/o base address for S3 SonicVibes soundcard.");
 
 /*
diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c
index d078e86414c2..b6c84d15b10b 100644
--- a/sound/pci/via82xx.c
+++ b/sound/pci/via82xx.c
@@ -92,7 +92,7 @@ module_param(index, int, 0444);
 MODULE_PARM_DESC(index, "Index value for VIA 82xx bridge.");
 module_param(id, charp, 0444);
 MODULE_PARM_DESC(id, "ID string for VIA 82xx bridge.");
-module_param(mpu_port, long, 0444);
+module_param_hw(mpu_port, long, ioport, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port. (VT82C686x only)");
 #ifdef SUPPORT_JOYSTICK
 module_param(joystick, bool, 0444);
diff --git a/sound/pci/ymfpci/ymfpci.c b/sound/pci/ymfpci/ymfpci.c
index 812e27a1bcbc..4faf3e1ed06a 100644
--- a/sound/pci/ymfpci/ymfpci.c
+++ b/sound/pci/ymfpci/ymfpci.c
@@ -55,12 +55,12 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for the Yamaha DS-1 PCI soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable Yamaha DS-1 soundcard.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 Port.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM OPL-3 Port.");
 #ifdef SUPPORT_JOYSTICK
-module_param_array(joystick_port, long, NULL, 0444);
+module_param_hw_array(joystick_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(joystick_port, "Joystick port address");
 #endif
 module_param_array(rear_switch, bool, NULL, 0444);
diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c
index fe4ba463b57c..1114166c685c 100644
--- a/sound/pci/ymfpci/ymfpci_main.c
+++ b/sound/pci/ymfpci/ymfpci_main.c
@@ -781,7 +781,7 @@ static snd_pcm_uframes_t snd_ymfpci_capture_pointer(struct snd_pcm_substream *su
 
 static void snd_ymfpci_irq_wait(struct snd_ymfpci *chip)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int loops = 4;
 
 	while (loops-- > 0) {
diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c
index 7ae46c2647d4..b7ef8c59b49a 100644
--- a/sound/soc/atmel/atmel-classd.c
+++ b/sound/soc/atmel/atmel-classd.c
@@ -301,6 +301,14 @@ static int atmel_classd_codec_probe(struct snd_soc_codec *codec)
 	return 0;
 }
 
+static int atmel_classd_codec_resume(struct snd_soc_codec *codec)
+{
+	struct snd_soc_card *card = snd_soc_codec_get_drvdata(codec);
+	struct atmel_classd *dd = snd_soc_card_get_drvdata(card);
+
+	return regcache_sync(dd->regmap);
+}
+
 static struct regmap *atmel_classd_codec_get_remap(struct device *dev)
 {
 	return dev_get_regmap(dev, NULL);
@@ -308,6 +316,7 @@ static struct regmap *atmel_classd_codec_get_remap(struct device *dev)
 
 static struct snd_soc_codec_driver soc_codec_dev_classd = {
 	.probe		= atmel_classd_codec_probe,
+	.resume		= atmel_classd_codec_resume,
 	.get_regmap	= atmel_classd_codec_get_remap,
 	.component_driver = {
 		.controls		= atmel_classd_snd_controls,
diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c
index 6dd7578f0bb8..024d83fa6a7f 100644
--- a/sound/soc/codecs/da7213.c
+++ b/sound/soc/codecs/da7213.c
@@ -772,7 +772,7 @@ static int da7213_dai_event(struct snd_soc_dapm_widget *w,
 				++i;
 				msleep(50);
 			}
-		} while ((i < DA7213_SRM_CHECK_RETRIES) & (!srm_lock));
+		} while ((i < DA7213_SRM_CHECK_RETRIES) && (!srm_lock));
 
 		if (!srm_lock)
 			dev_warn(codec->dev, "SRM failed to lock\n");
diff --git a/sound/soc/codecs/rt286.c b/sound/soc/codecs/rt286.c
index 9c365a7f758d..7899a2cdeb42 100644
--- a/sound/soc/codecs/rt286.c
+++ b/sound/soc/codecs/rt286.c
@@ -1108,6 +1108,13 @@ static const struct dmi_system_id force_combo_jack_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Kabylake Client platform")
 		}
 	},
+	{
+		.ident = "Thinkpad Helix 2nd",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad Helix 2nd")
+		}
+	},
 
 	{ }
 };
diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index 2c9dedab5184..bc136d2bd7cd 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c
@@ -202,7 +202,7 @@ static int asoc_simple_card_dai_init(struct snd_soc_pcm_runtime *rtd)
 	if (ret < 0)
 		return ret;
 
-	ret = asoc_simple_card_init_mic(rtd->card, &priv->hp_jack, PREFIX);
+	ret = asoc_simple_card_init_mic(rtd->card, &priv->mic_jack, PREFIX);
 	if (ret < 0)
 		return ret;
 
diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c
index e3f06672fd6d..e7d766d56c8e 100644
--- a/sound/soc/intel/skylake/skl-nhlt.c
+++ b/sound/soc/intel/skylake/skl-nhlt.c
@@ -21,8 +21,9 @@
 #include "skl.h"
 
 /* Unique identification for getting NHLT blobs */
-static u8 OSC_UUID[16] = {0x6E, 0x88, 0x9F, 0xA6, 0xEB, 0x6C, 0x94, 0x45,
-				0xA4, 0x1F, 0x7B, 0x5D, 0xCE, 0x24, 0xC5, 0x53};
+static guid_t osc_guid =
+	GUID_INIT(0xA69F886E, 0x6CEB, 0x4594,
+		  0xA4, 0x1F, 0x7B, 0x5D, 0xCE, 0x24, 0xC5, 0x53);
 
 struct nhlt_acpi_table *skl_nhlt_init(struct device *dev)
 {
@@ -37,7 +38,7 @@ struct nhlt_acpi_table *skl_nhlt_init(struct device *dev)
 		return NULL;
 	}
 
-	obj = acpi_evaluate_dsm(handle, OSC_UUID, 1, 1, NULL);
+	obj = acpi_evaluate_dsm(handle, &osc_guid, 1, 1, NULL);
 	if (obj && obj->type == ACPI_TYPE_BUFFER) {
 		nhlt_ptr = (struct nhlt_resource_desc  *)obj->buffer.pointer;
 		nhlt_table = (struct nhlt_acpi_table *)
diff --git a/sound/soc/intel/skylake/skl-sst-ipc.c b/sound/soc/intel/skylake/skl-sst-ipc.c
index 58c525096a7c..498b15345b1a 100644
--- a/sound/soc/intel/skylake/skl-sst-ipc.c
+++ b/sound/soc/intel/skylake/skl-sst-ipc.c
@@ -413,8 +413,11 @@ static void skl_ipc_process_reply(struct sst_generic_ipc *ipc,
 	u32 reply = header.primary & IPC_GLB_REPLY_STATUS_MASK;
 	u64 *ipc_header = (u64 *)(&header);
 	struct skl_sst *skl = container_of(ipc, struct skl_sst, ipc);
+	unsigned long flags;
 
+	spin_lock_irqsave(&ipc->dsp->spinlock, flags);
 	msg = skl_ipc_reply_get_msg(ipc, *ipc_header);
+	spin_unlock_irqrestore(&ipc->dsp->spinlock, flags);
 	if (msg == NULL) {
 		dev_dbg(ipc->dev, "ipc: rx list is empty\n");
 		return;
@@ -456,8 +459,10 @@ static void skl_ipc_process_reply(struct sst_generic_ipc *ipc,
 		}
 	}
 
+	spin_lock_irqsave(&ipc->dsp->spinlock, flags);
 	list_del(&msg->list);
 	sst_ipc_tx_msg_reply_complete(ipc, msg);
+	spin_unlock_irqrestore(&ipc->dsp->spinlock, flags);
 }
 
 irqreturn_t skl_dsp_irq_thread_handler(int irq, void *context)
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index 3a99712e44a8..64a0f8ed33e1 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -2502,7 +2502,7 @@ static int skl_tplg_get_manifest_tkn(struct device *dev,
 
 			if (ret < 0)
 				return ret;
-			tkn_count += ret;
+			tkn_count = ret;
 
 			tuple_size += tkn_count *
 				sizeof(struct snd_soc_tplg_vendor_string_elem);
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index 6df3b317a476..4c9b5781282b 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -410,7 +410,7 @@ static int skl_free(struct hdac_ext_bus *ebus)
 	struct skl *skl  = ebus_to_skl(ebus);
 	struct hdac_bus *bus = ebus_to_hbus(ebus);
 
-	skl->init_failed = 1; /* to be sure */
+	skl->init_done = 0; /* to be sure */
 
 	snd_hdac_ext_stop_streams(ebus);
 
@@ -428,8 +428,10 @@ static int skl_free(struct hdac_ext_bus *ebus)
 
 	snd_hdac_ext_bus_exit(ebus);
 
+	cancel_work_sync(&skl->probe_work);
 	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI))
 		snd_hdac_i915_exit(&ebus->bus);
+
 	return 0;
 }
 
@@ -566,6 +568,84 @@ static const struct hdac_bus_ops bus_core_ops = {
 	.get_response = snd_hdac_bus_get_response,
 };
 
+static int skl_i915_init(struct hdac_bus *bus)
+{
+	int err;
+
+	/*
+	 * The HDMI codec is in GPU so we need to ensure that it is powered
+	 * up and ready for probe
+	 */
+	err = snd_hdac_i915_init(bus);
+	if (err < 0)
+		return err;
+
+	err = snd_hdac_display_power(bus, true);
+	if (err < 0)
+		dev_err(bus->dev, "Cannot turn on display power on i915\n");
+
+	return err;
+}
+
+static void skl_probe_work(struct work_struct *work)
+{
+	struct skl *skl = container_of(work, struct skl, probe_work);
+	struct hdac_ext_bus *ebus = &skl->ebus;
+	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_ext_link *hlink = NULL;
+	int err;
+
+	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) {
+		err = skl_i915_init(bus);
+		if (err < 0)
+			return;
+	}
+
+	err = skl_init_chip(bus, true);
+	if (err < 0) {
+		dev_err(bus->dev, "Init chip failed with err: %d\n", err);
+		goto out_err;
+	}
+
+	/* codec detection */
+	if (!bus->codec_mask)
+		dev_info(bus->dev, "no hda codecs found!\n");
+
+	/* create codec instances */
+	err = skl_codec_create(ebus);
+	if (err < 0)
+		goto out_err;
+
+	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) {
+		err = snd_hdac_display_power(bus, false);
+		if (err < 0) {
+			dev_err(bus->dev, "Cannot turn off display power on i915\n");
+			return;
+		}
+	}
+
+	/* register platform dai and controls */
+	err = skl_platform_register(bus->dev);
+	if (err < 0)
+		return;
+	/*
+	 * we are done probing so decrement link counts
+	 */
+	list_for_each_entry(hlink, &ebus->hlink_list, list)
+		snd_hdac_ext_bus_link_put(ebus, hlink);
+
+	/* configure PM */
+	pm_runtime_put_noidle(bus->dev);
+	pm_runtime_allow(bus->dev);
+	skl->init_done = 1;
+
+	return;
+
+out_err:
+	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI))
+		err = snd_hdac_display_power(bus, false);
+}
+
 /*
  * constructor
  */
@@ -593,6 +673,7 @@ static int skl_create(struct pci_dev *pci,
 	snd_hdac_ext_bus_init(ebus, &pci->dev, &bus_core_ops, io_ops);
 	ebus->bus.use_posbuf = 1;
 	skl->pci = pci;
+	INIT_WORK(&skl->probe_work, skl_probe_work);
 
 	ebus->bus.bdl_pos_adj = 0;
 
@@ -601,27 +682,6 @@ static int skl_create(struct pci_dev *pci,
 	return 0;
 }
 
-static int skl_i915_init(struct hdac_bus *bus)
-{
-	int err;
-
-	/*
-	 * The HDMI codec is in GPU so we need to ensure that it is powered
-	 * up and ready for probe
-	 */
-	err = snd_hdac_i915_init(bus);
-	if (err < 0)
-		return err;
-
-	err = snd_hdac_display_power(bus, true);
-	if (err < 0) {
-		dev_err(bus->dev, "Cannot turn on display power on i915\n");
-		return err;
-	}
-
-	return err;
-}
-
 static int skl_first_init(struct hdac_ext_bus *ebus)
 {
 	struct skl *skl = ebus_to_skl(ebus);
@@ -684,20 +744,7 @@ static int skl_first_init(struct hdac_ext_bus *ebus)
 	/* initialize chip */
 	skl_init_pci(skl);
 
-	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) {
-		err = skl_i915_init(bus);
-		if (err < 0)
-			return err;
-	}
-
-	skl_init_chip(bus, true);
-
-	/* codec detection */
-	if (!bus->codec_mask) {
-		dev_info(bus->dev, "no hda codecs found!\n");
-	}
-
-	return 0;
+	return skl_init_chip(bus, true);
 }
 
 static int skl_probe(struct pci_dev *pci,
@@ -706,7 +753,6 @@ static int skl_probe(struct pci_dev *pci,
 	struct skl *skl;
 	struct hdac_ext_bus *ebus = NULL;
 	struct hdac_bus *bus = NULL;
-	struct hdac_ext_link *hlink = NULL;
 	int err;
 
 	/* we use ext core ops, so provide NULL for ops here */
@@ -729,7 +775,7 @@ static int skl_probe(struct pci_dev *pci,
 
 	if (skl->nhlt == NULL) {
 		err = -ENODEV;
-		goto out_display_power_off;
+		goto out_free;
 	}
 
 	err = skl_nhlt_create_sysfs(skl);
@@ -760,56 +806,24 @@ static int skl_probe(struct pci_dev *pci,
 	if (bus->mlcap)
 		snd_hdac_ext_bus_get_ml_capabilities(ebus);
 
+	snd_hdac_bus_stop_chip(bus);
+
 	/* create device for soc dmic */
 	err = skl_dmic_device_register(skl);
 	if (err < 0)
 		goto out_dsp_free;
 
-	/* register platform dai and controls */
-	err = skl_platform_register(bus->dev);
-	if (err < 0)
-		goto out_dmic_free;
-
-	/* create codec instances */
-	err = skl_codec_create(ebus);
-	if (err < 0)
-		goto out_unregister;
-
-	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) {
-		err = snd_hdac_display_power(bus, false);
-		if (err < 0) {
-			dev_err(bus->dev, "Cannot turn off display power on i915\n");
-			return err;
-		}
-	}
-
-	/*
-	 * we are done probling so decrement link counts
-	 */
-	list_for_each_entry(hlink, &ebus->hlink_list, list)
-		snd_hdac_ext_bus_link_put(ebus, hlink);
-
-	/* configure PM */
-	pm_runtime_put_noidle(bus->dev);
-	pm_runtime_allow(bus->dev);
+	schedule_work(&skl->probe_work);
 
 	return 0;
 
-out_unregister:
-	skl_platform_unregister(bus->dev);
-out_dmic_free:
-	skl_dmic_device_unregister(skl);
 out_dsp_free:
 	skl_free_dsp(skl);
 out_mach_free:
 	skl_machine_device_unregister(skl);
 out_nhlt_free:
 	skl_nhlt_free(skl->nhlt);
-out_display_power_off:
-	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI))
-		snd_hdac_display_power(bus, false);
 out_free:
-	skl->init_failed = 1;
 	skl_free(ebus);
 
 	return err;
@@ -828,7 +842,7 @@ static void skl_shutdown(struct pci_dev *pci)
 
 	skl = ebus_to_skl(ebus);
 
-	if (skl->init_failed)
+	if (!skl->init_done)
 		return;
 
 	snd_hdac_ext_stop_streams(ebus);
diff --git a/sound/soc/intel/skylake/skl.h b/sound/soc/intel/skylake/skl.h
index a454f6035f3e..2a630fcb7f08 100644
--- a/sound/soc/intel/skylake/skl.h
+++ b/sound/soc/intel/skylake/skl.h
@@ -46,7 +46,7 @@ struct skl {
 	struct hdac_ext_bus ebus;
 	struct pci_dev *pci;
 
-	unsigned int init_failed:1; /* delayed init failed */
+	unsigned int init_done:1; /* delayed init status */
 	struct platform_device *dmic_dev;
 	struct platform_device *i2s_dev;
 	struct snd_soc_platform *platform;
@@ -64,6 +64,8 @@ struct skl {
 	const struct firmware *tplg;
 
 	int supend_active;
+
+	struct work_struct probe_work;
 };
 
 #define skl_to_ebus(s)	(&(s)->ebus)
diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c
index 66203d107a11..d3b0dc145a56 100644
--- a/sound/soc/sh/rcar/adg.c
+++ b/sound/soc/sh/rcar/adg.c
@@ -507,7 +507,8 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv,
 				rbga = rbgx;
 				adg->rbga_rate_for_441khz = rate / div;
 				ckr |= brg_table[i] << 20;
-				if (req_441kHz_rate)
+				if (req_441kHz_rate &&
+				    !(adg_mode_flags(adg) & AUDIO_OUT_48))
 					parent_clk_name = __clk_get_name(clk);
 			}
 		}
@@ -522,7 +523,8 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv,
 				rbgb = rbgx;
 				adg->rbgb_rate_for_48khz = rate / div;
 				ckr |= brg_table[i] << 16;
-				if (req_48kHz_rate)
+				if (req_48kHz_rate &&
+				    (adg_mode_flags(adg) & AUDIO_OUT_48))
 					parent_clk_name = __clk_get_name(clk);
 			}
 		}
diff --git a/sound/soc/sh/rcar/cmd.c b/sound/soc/sh/rcar/cmd.c
index 7d92a24b7cfa..d879c010cf03 100644
--- a/sound/soc/sh/rcar/cmd.c
+++ b/sound/soc/sh/rcar/cmd.c
@@ -89,6 +89,7 @@ static int rsnd_cmd_init(struct rsnd_mod *mod,
 	dev_dbg(dev, "ctu/mix path = 0x%08x", data);
 
 	rsnd_mod_write(mod, CMD_ROUTE_SLCT, data);
+	rsnd_mod_write(mod, CMD_BUSIF_MODE, rsnd_get_busif_shift(io, mod) | 1);
 	rsnd_mod_write(mod, CMD_BUSIF_DALIGN, rsnd_get_dalign(mod, io));
 
 	rsnd_adg_set_cmd_timsel_gen2(mod, io);
diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c
index 1744015408c3..8c1f4e2e0c4f 100644
--- a/sound/soc/sh/rcar/core.c
+++ b/sound/soc/sh/rcar/core.c
@@ -343,6 +343,57 @@ u32 rsnd_get_dalign(struct rsnd_mod *mod, struct rsnd_dai_stream *io)
 		return 0x76543210;
 }
 
+u32 rsnd_get_busif_shift(struct rsnd_dai_stream *io, struct rsnd_mod *mod)
+{
+	enum rsnd_mod_type playback_mods[] = {
+		RSND_MOD_SRC,
+		RSND_MOD_CMD,
+		RSND_MOD_SSIU,
+	};
+	enum rsnd_mod_type capture_mods[] = {
+		RSND_MOD_CMD,
+		RSND_MOD_SRC,
+		RSND_MOD_SSIU,
+	};
+	struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
+	struct rsnd_mod *tmod = NULL;
+	enum rsnd_mod_type *mods =
+		rsnd_io_is_play(io) ?
+		playback_mods : capture_mods;
+	int i;
+
+	/*
+	 * This is needed for 24bit data
+	 * We need to shift 8bit
+	 *
+	 * Linux 24bit data is located as 0x00******
+	 * HW    24bit data is located as 0x******00
+	 *
+	 */
+	switch (runtime->sample_bits) {
+	case 16:
+		return 0;
+	case 32:
+		break;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(playback_mods); i++) {
+		tmod = rsnd_io_to_mod(io, mods[i]);
+		if (tmod)
+			break;
+	}
+
+	if (tmod != mod)
+		return 0;
+
+	if (rsnd_io_is_play(io))
+		return  (0 << 20) | /* shift to Left */
+			(8 << 16);  /* 8bit */
+	else
+		return  (1 << 20) | /* shift to Right */
+			(8 << 16);  /* 8bit */
+}
+
 /*
  *	rsnd_dai functions
  */
diff --git a/sound/soc/sh/rcar/gen.c b/sound/soc/sh/rcar/gen.c
index 63b6d3c28021..4b0980728e13 100644
--- a/sound/soc/sh/rcar/gen.c
+++ b/sound/soc/sh/rcar/gen.c
@@ -236,6 +236,7 @@ static int rsnd_gen2_probe(struct rsnd_priv *priv)
 		RSND_GEN_M_REG(SRC_ROUTE_MODE0,	0xc,	0x20),
 		RSND_GEN_M_REG(SRC_CTRL,	0x10,	0x20),
 		RSND_GEN_M_REG(SRC_INT_ENABLE0,	0x18,	0x20),
+		RSND_GEN_M_REG(CMD_BUSIF_MODE,	0x184,	0x20),
 		RSND_GEN_M_REG(CMD_BUSIF_DALIGN,0x188,	0x20),
 		RSND_GEN_M_REG(CMD_ROUTE_SLCT,	0x18c,	0x20),
 		RSND_GEN_M_REG(CMD_CTRL,	0x190,	0x20),
diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h
index dbf4163427e8..323af41ecfcb 100644
--- a/sound/soc/sh/rcar/rsnd.h
+++ b/sound/soc/sh/rcar/rsnd.h
@@ -73,6 +73,7 @@ enum rsnd_reg {
 	RSND_REG_SCU_SYS_INT_EN0,
 	RSND_REG_SCU_SYS_INT_EN1,
 	RSND_REG_CMD_CTRL,
+	RSND_REG_CMD_BUSIF_MODE,
 	RSND_REG_CMD_BUSIF_DALIGN,
 	RSND_REG_CMD_ROUTE_SLCT,
 	RSND_REG_CMDOUT_TIMSEL,
@@ -204,6 +205,7 @@ void rsnd_bset(struct rsnd_priv *priv, struct rsnd_mod *mod, enum rsnd_reg reg,
 		    u32 mask, u32 data);
 u32 rsnd_get_adinr_bit(struct rsnd_mod *mod, struct rsnd_dai_stream *io);
 u32 rsnd_get_dalign(struct rsnd_mod *mod, struct rsnd_dai_stream *io);
+u32 rsnd_get_busif_shift(struct rsnd_dai_stream *io, struct rsnd_mod *mod);
 
 /*
  *	R-Car DMA
diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c
index 20b5b2ec625e..76a477a3ccb5 100644
--- a/sound/soc/sh/rcar/src.c
+++ b/sound/soc/sh/rcar/src.c
@@ -190,11 +190,13 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io,
 	struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
 	struct device *dev = rsnd_priv_to_dev(priv);
 	struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
+	int is_play = rsnd_io_is_play(io);
 	int use_src = 0;
 	u32 fin, fout;
 	u32 ifscr, fsrate, adinr;
 	u32 cr, route;
 	u32 bsdsr, bsisr;
+	u32 i_busif, o_busif, tmp;
 	uint ratio;
 
 	if (!runtime)
@@ -270,6 +272,11 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io,
 		break;
 	}
 
+	/* BUSIF_MODE */
+	tmp = rsnd_get_busif_shift(io, mod);
+	i_busif = ( is_play ? tmp : 0) | 1;
+	o_busif = (!is_play ? tmp : 0) | 1;
+
 	rsnd_mod_write(mod, SRC_ROUTE_MODE0, route);
 
 	rsnd_mod_write(mod, SRC_SRCIR, 1);	/* initialize */
@@ -281,8 +288,9 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io,
 	rsnd_mod_write(mod, SRC_BSISR, bsisr);
 	rsnd_mod_write(mod, SRC_SRCIR, 0);	/* cancel initialize */
 
-	rsnd_mod_write(mod, SRC_I_BUSIF_MODE, 1);
-	rsnd_mod_write(mod, SRC_O_BUSIF_MODE, 1);
+	rsnd_mod_write(mod, SRC_I_BUSIF_MODE, i_busif);
+	rsnd_mod_write(mod, SRC_O_BUSIF_MODE, o_busif);
+
 	rsnd_mod_write(mod, SRC_BUSIF_DALIGN, rsnd_get_dalign(mod, io));
 
 	rsnd_adg_set_src_timesel_gen2(mod, io, fin, fout);
diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c
index 135c5669f796..91e5c07911b4 100644
--- a/sound/soc/sh/rcar/ssi.c
+++ b/sound/soc/sh/rcar/ssi.c
@@ -302,7 +302,7 @@ static void rsnd_ssi_config_init(struct rsnd_mod *mod,
 	 * always use 32bit system word.
 	 * see also rsnd_ssi_master_clk_enable()
 	 */
-	cr_own = FORCE | SWL_32 | PDTA;
+	cr_own = FORCE | SWL_32;
 
 	if (rdai->bit_clk_inv)
 		cr_own |= SCKP;
@@ -550,6 +550,13 @@ static void __rsnd_ssi_interrupt(struct rsnd_mod *mod,
 		struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
 		u32 *buf = (u32 *)(runtime->dma_area +
 				   rsnd_dai_pointer_offset(io, 0));
+		int shift = 0;
+
+		switch (runtime->sample_bits) {
+		case 32:
+			shift = 8;
+			break;
+		}
 
 		/*
 		 * 8/16/32 data can be assesse to TDR/RDR register
@@ -557,9 +564,9 @@ static void __rsnd_ssi_interrupt(struct rsnd_mod *mod,
 		 * see rsnd_ssi_init()
 		 */
 		if (rsnd_io_is_play(io))
-			rsnd_mod_write(mod, SSITDR, *buf);
+			rsnd_mod_write(mod, SSITDR, (*buf) << shift);
 		else
-			*buf = rsnd_mod_read(mod, SSIRDR);
+			*buf = (rsnd_mod_read(mod, SSIRDR) >> shift);
 
 		elapsed = rsnd_dai_pointer_update(io, sizeof(*buf));
 	}
@@ -709,6 +716,11 @@ static int rsnd_ssi_dma_remove(struct rsnd_mod *mod,
 			       struct rsnd_priv *priv)
 {
 	struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
+	struct rsnd_mod *ssi_parent_mod = rsnd_io_to_mod_ssip(io);
+
+	/* Do nothing for SSI parent mod */
+	if (ssi_parent_mod == mod)
+		return 0;
 
 	/* PIO will request IRQ again */
 	free_irq(ssi->irq, mod);
diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c
index 14fafdaf1395..512d238b79e2 100644
--- a/sound/soc/sh/rcar/ssiu.c
+++ b/sound/soc/sh/rcar/ssiu.c
@@ -144,7 +144,8 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod,
 			       (rsnd_io_is_play(io) ?
 				rsnd_runtime_channel_after_ctu(io) :
 				rsnd_runtime_channel_original(io)));
-		rsnd_mod_write(mod, SSI_BUSIF_MODE,  1);
+		rsnd_mod_write(mod, SSI_BUSIF_MODE,
+			       rsnd_get_busif_shift(io, mod) | 1);
 		rsnd_mod_write(mod, SSI_BUSIF_DALIGN,
 			       rsnd_get_dalign(mod, io));
 	}
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index aae099c0e502..754e3ef8d7ae 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2286,6 +2286,9 @@ static int soc_cleanup_card_resources(struct snd_soc_card *card)
 	list_for_each_entry(rtd, &card->rtd_list, list)
 		flush_delayed_work(&rtd->delayed_work);
 
+	/* free the ALSA card at first; this syncs with pending operations */
+	snd_card_free(card->snd_card);
+
 	/* remove and free each DAI */
 	soc_remove_dai_links(card);
 	soc_remove_pcm_runtimes(card);
@@ -2300,9 +2303,7 @@ static int soc_cleanup_card_resources(struct snd_soc_card *card)
 	if (card->remove)
 		card->remove(card);
 
-	snd_card_free(card->snd_card);
 	return 0;
-
 }
 
 /* removes a socdev */
diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c
index dc48eedea92e..26ed23b18b77 100644
--- a/sound/usb/mixer_us16x08.c
+++ b/sound/usb/mixer_us16x08.c
@@ -698,16 +698,18 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol,
 	struct snd_usb_audio *chip = elem->head.mixer->chip;
 	struct snd_us16x08_meter_store *store = elem->private_data;
 	u8 meter_urb[64];
-	char tmp[sizeof(mix_init_msg2)] = {0};
 
 	switch (kcontrol->private_value) {
-	case 0:
-		snd_us16x08_send_urb(chip, (char *)mix_init_msg1,
-				     sizeof(mix_init_msg1));
+	case 0: {
+		char tmp[sizeof(mix_init_msg1)];
+
+		memcpy(tmp, mix_init_msg1, sizeof(mix_init_msg1));
+		snd_us16x08_send_urb(chip, tmp, 4);
 		snd_us16x08_recv_urb(chip, meter_urb,
 			sizeof(meter_urb));
 		kcontrol->private_value++;
 		break;
+	}
 	case 1:
 		snd_us16x08_recv_urb(chip, meter_urb,
 			sizeof(meter_urb));
@@ -718,15 +720,18 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol,
 			sizeof(meter_urb));
 		kcontrol->private_value++;
 		break;
-	case 3:
+	case 3: {
+		char tmp[sizeof(mix_init_msg2)];
+
 		memcpy(tmp, mix_init_msg2, sizeof(mix_init_msg2));
 		tmp[2] = snd_get_meter_comp_index(store);
-		snd_us16x08_send_urb(chip, tmp, sizeof(mix_init_msg2));
+		snd_us16x08_send_urb(chip, tmp, 10);
 		snd_us16x08_recv_urb(chip, meter_urb,
 			sizeof(meter_urb));
 		kcontrol->private_value = 0;
 		break;
 	}
+	}
 
 	for (set = 0; set < 6; set++)
 		get_meter_levels_from_urb(set, store, meter_urb);
@@ -1135,7 +1140,7 @@ static const struct snd_us16x08_control_params eq_controls[] = {
 		.control_id = SND_US16X08_ID_EQLOWMIDWIDTH,
 		.type = USB_MIXER_U8,
 		.num_channels = 16,
-		.name = "EQ MidQLow Q",
+		.name = "EQ MidLow Q",
 	},
 	{ /* EQ mid high gain */
 		.kcontrol_new = &snd_us16x08_eq_gain_ctl,
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 01eff6ce6401..d7b0b0a3a2db 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1364,7 +1364,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
 	/* Amanero Combo384 USB interface with native DSD support */
 	case USB_ID(0x16d0, 0x071a):
 		if (fp->altsetting == 2) {
-			switch (chip->dev->descriptor.bcdDevice) {
+			switch (le16_to_cpu(chip->dev->descriptor.bcdDevice)) {
 			case 0x199:
 				return SNDRV_PCM_FMTBIT_DSD_U32_LE;
 			case 0x19b:
diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c
index 664b7fe206d6..b11d3920b9a5 100644
--- a/sound/x86/intel_hdmi_audio.c
+++ b/sound/x86/intel_hdmi_audio.c
@@ -1809,10 +1809,6 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 	pdata->notify_pending = false;
 	spin_unlock_irq(&pdata->lpe_audio_slock);
 
-	/* runtime PM isn't enabled as default, since it won't save much on
-	 * BYT/CHT devices; user who want the runtime PM should adjust the
-	 * power/ontrol and power/autosuspend_delay_ms sysfs entries instead
-	 */
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_runtime_mark_last_busy(&pdev->dev);
 	pm_runtime_set_active(&pdev->dev);
diff --git a/tools/Makefile b/tools/Makefile
index c8a90d01dd8e..221e1ce78b06 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -19,6 +19,7 @@ help:
 	@echo '  kvm_stat               - top-like utility for displaying kvm statistics'
 	@echo '  leds                   - LEDs  tools'
 	@echo '  lguest                 - a minimal 32-bit x86 hypervisor'
+	@echo '  liblockdep             - user-space wrapper for kernel locking-validator'
 	@echo '  net                    - misc networking tools'
 	@echo '  perf                   - Linux performance measurement and analysis tool'
 	@echo '  selftests              - various kernel selftests'
@@ -89,7 +90,7 @@ freefall: FORCE
 kvm_stat: FORCE
 	$(call descend,kvm/$@)
 
-all: acpi cgroup cpupower gpio hv firewire lguest \
+all: acpi cgroup cpupower gpio hv firewire lguest liblockdep \
 		perf selftests turbostat usb \
 		virtio vm net x86_energy_perf_policy \
 		tmon freefall objtool kvm_stat
@@ -103,6 +104,9 @@ cpupower_install:
 cgroup_install firewire_install gpio_install hv_install lguest_install perf_install usb_install virtio_install vm_install net_install objtool_install:
 	$(call descend,$(@:_install=),install)
 
+liblockdep_install:
+	$(call descend,lib/lockdep,install)
+
 selftests_install:
 	$(call descend,testing/$(@:_install=),install)
 
@@ -119,7 +123,7 @@ kvm_stat_install:
 	$(call descend,kvm/$(@:_install=),install)
 
 install: acpi_install cgroup_install cpupower_install gpio_install \
-		hv_install firewire_install lguest_install \
+		hv_install firewire_install lguest_install liblockdep_install \
 		perf_install selftests_install turbostat_install usb_install \
 		virtio_install vm_install net_install x86_energy_perf_policy_install \
 		tmon_install freefall_install objtool_install kvm_stat_install
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index 6ebd3e6a1fd1..5e3c673fa3f4 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -27,6 +27,8 @@
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
 
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
 
@@ -114,6 +116,8 @@ struct kvm_debug_exit_arch {
 };
 
 struct kvm_sync_regs {
+	/* Used with KVM_CAP_ARM_USER_IRQ */
+	__u64 device_irq_level;
 };
 
 struct kvm_arch_memory_slot {
@@ -192,13 +196,17 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS	8
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
 #define VGIC_LEVEL_INFO_LINE_LEVEL	0
 
-#define   KVM_DEV_ARM_VGIC_CTRL_INIT    0
+#define   KVM_DEV_ARM_VGIC_CTRL_INIT		0
+#define   KVM_DEV_ARM_ITS_SAVE_TABLES		1
+#define   KVM_DEV_ARM_ITS_RESTORE_TABLES	2
+#define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index c2860358ae3e..70eea2ecc663 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -39,6 +39,8 @@
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
 
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
 
@@ -143,6 +145,8 @@ struct kvm_debug_exit_arch {
 #define KVM_GUESTDBG_USE_HW		(1 << 17)
 
 struct kvm_sync_regs {
+	/* Used with KVM_CAP_ARM_USER_IRQ */
+	__u64 device_irq_level;
 };
 
 struct kvm_arch_memory_slot {
@@ -212,13 +216,17 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK	0x3ff
 #define VGIC_LEVEL_INFO_LINE_LEVEL	0
 
-#define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
+#define   KVM_DEV_ARM_VGIC_CTRL_INIT		0
+#define   KVM_DEV_ARM_ITS_SAVE_TABLES           1
+#define   KVM_DEV_ARM_ITS_RESTORE_TABLES        2
+#define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
 
 /* Device Control API on vcpu fd */
 #define KVM_ARM_VCPU_PMU_V3_CTRL	0
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 4edbe4bb0e8b..07fbeb927834 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -29,6 +29,9 @@
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_GUEST_DEBUG
 
+/* Not always available, but if it is, this is the correct offset.  */
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
 struct kvm_regs {
 	__u64 pc;
 	__u64 cr;
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 7f4fd65e9208..3dd2a1d308dd 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -26,6 +26,8 @@
 #define KVM_DEV_FLIC_ADAPTER_REGISTER	6
 #define KVM_DEV_FLIC_ADAPTER_MODIFY	7
 #define KVM_DEV_FLIC_CLEAR_IO_IRQ	8
+#define KVM_DEV_FLIC_AISM		9
+#define KVM_DEV_FLIC_AIRQ_INJECT	10
 /*
  * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
  * as well as up  to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
@@ -41,7 +43,14 @@ struct kvm_s390_io_adapter {
 	__u8 isc;
 	__u8 maskable;
 	__u8 swap;
-	__u8 pad;
+	__u8 flags;
+};
+
+#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01
+
+struct kvm_s390_ais_req {
+	__u8 isc;
+	__u16 mode;
 };
 
 #define KVM_S390_IO_ADAPTER_MASK 1
@@ -110,6 +119,7 @@ struct kvm_s390_vm_cpu_machine {
 #define KVM_S390_VM_CPU_FEAT_CMMA	10
 #define KVM_S390_VM_CPU_FEAT_PFMFI	11
 #define KVM_S390_VM_CPU_FEAT_SIGPIF	12
+#define KVM_S390_VM_CPU_FEAT_KSS	13
 struct kvm_s390_vm_cpu_feat {
 	__u64 feat[16];
 };
@@ -198,6 +208,10 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_VRS    (1UL << 6)
 #define KVM_SYNC_RICCB  (1UL << 7)
 #define KVM_SYNC_FPRS   (1UL << 8)
+#define KVM_SYNC_GSCB   (1UL << 9)
+/* length and alignment of the sdnx as a power of two */
+#define SDNXC 8
+#define SDNXL (1UL << SDNXC)
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
 	__u64 prefix;	/* prefix register */
@@ -218,8 +232,16 @@ struct kvm_sync_regs {
 	};
 	__u8  reserved[512];	/* for future vector expansion */
 	__u32 fpc;		/* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
-	__u8 padding[52];	/* riccb needs to be 64byte aligned */
+	__u8 padding1[52];	/* riccb needs to be 64byte aligned */
 	__u8 riccb[64];		/* runtime instrumentation controls block */
+	__u8 padding2[192];	/* sdnx needs to be 256byte aligned */
+	union {
+		__u8 sdnx[SDNXL];  /* state description annex */
+		struct {
+			__u64 reserved1[2];
+			__u64 gscb[4];
+		};
+	};
 };
 
 #define KVM_REG_S390_TODPR	(KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 0fe00446f9ca..2701e5f8145b 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -202,6 +202,8 @@
 #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
+#define X86_FEATURE_MBA         ( 7*32+18) /* Memory Bandwidth Allocation */
+
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
 #define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 85599ad4d024..5dff775af7cd 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -36,6 +36,12 @@
 # define DISABLE_OSPKE		(1<<(X86_FEATURE_OSPKE & 31))
 #endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
 
+#ifdef CONFIG_X86_5LEVEL
+# define DISABLE_LA57	0
+#else
+# define DISABLE_LA57	(1<<(X86_FEATURE_LA57 & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -55,7 +61,7 @@
 #define DISABLED_MASK13	0
 #define DISABLED_MASK14	0
 #define DISABLED_MASK15	0
-#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE)
+#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57)
 #define DISABLED_MASK17	0
 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index fac9a5c0abe9..d91ba04dd007 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -53,6 +53,12 @@
 # define NEED_MOVBE	0
 #endif
 
+#ifdef CONFIG_X86_5LEVEL
+# define NEED_LA57	(1<<(X86_FEATURE_LA57 & 31))
+#else
+# define NEED_LA57	0
+#endif
+
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_PARAVIRT
 /* Paravirtualized systems may not have PSE or PGE available */
@@ -98,7 +104,7 @@
 #define REQUIRED_MASK13	0
 #define REQUIRED_MASK14	0
 #define REQUIRED_MASK15	0
-#define REQUIRED_MASK16	0
+#define REQUIRED_MASK16	(NEED_LA57)
 #define REQUIRED_MASK17	0
 #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 739c0c594022..c2824d02ba37 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -9,6 +9,9 @@
 #include <linux/types.h>
 #include <linux/ioctl.h>
 
+#define KVM_PIO_PAGE_OFFSET 1
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+
 #define DE_VECTOR 0
 #define DB_VECTOR 1
 #define BP_VECTOR 3
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index 14458658e988..690a2dcf4078 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -76,7 +76,11 @@
 #define EXIT_REASON_WBINVD              54
 #define EXIT_REASON_XSETBV              55
 #define EXIT_REASON_APIC_WRITE          56
+#define EXIT_REASON_RDRAND              57
 #define EXIT_REASON_INVPCID             58
+#define EXIT_REASON_VMFUNC              59
+#define EXIT_REASON_ENCLS               60
+#define EXIT_REASON_RDSEED              61
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
@@ -90,6 +94,7 @@
 	{ EXIT_REASON_TASK_SWITCH,           "TASK_SWITCH" }, \
 	{ EXIT_REASON_CPUID,                 "CPUID" }, \
 	{ EXIT_REASON_HLT,                   "HLT" }, \
+	{ EXIT_REASON_INVD,                  "INVD" }, \
 	{ EXIT_REASON_INVLPG,                "INVLPG" }, \
 	{ EXIT_REASON_RDPMC,                 "RDPMC" }, \
 	{ EXIT_REASON_RDTSC,                 "RDTSC" }, \
@@ -108,6 +113,8 @@
 	{ EXIT_REASON_IO_INSTRUCTION,        "IO_INSTRUCTION" }, \
 	{ EXIT_REASON_MSR_READ,              "MSR_READ" }, \
 	{ EXIT_REASON_MSR_WRITE,             "MSR_WRITE" }, \
+	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
+	{ EXIT_REASON_MSR_LOAD_FAIL,         "MSR_LOAD_FAIL" }, \
 	{ EXIT_REASON_MWAIT_INSTRUCTION,     "MWAIT_INSTRUCTION" }, \
 	{ EXIT_REASON_MONITOR_TRAP_FLAG,     "MONITOR_TRAP_FLAG" }, \
 	{ EXIT_REASON_MONITOR_INSTRUCTION,   "MONITOR_INSTRUCTION" }, \
@@ -115,20 +122,24 @@
 	{ EXIT_REASON_MCE_DURING_VMENTRY,    "MCE_DURING_VMENTRY" }, \
 	{ EXIT_REASON_TPR_BELOW_THRESHOLD,   "TPR_BELOW_THRESHOLD" }, \
 	{ EXIT_REASON_APIC_ACCESS,           "APIC_ACCESS" }, \
-	{ EXIT_REASON_GDTR_IDTR,	     "GDTR_IDTR" }, \
-	{ EXIT_REASON_LDTR_TR,		     "LDTR_TR" }, \
+	{ EXIT_REASON_EOI_INDUCED,           "EOI_INDUCED" }, \
+	{ EXIT_REASON_GDTR_IDTR,             "GDTR_IDTR" }, \
+	{ EXIT_REASON_LDTR_TR,               "LDTR_TR" }, \
 	{ EXIT_REASON_EPT_VIOLATION,         "EPT_VIOLATION" }, \
 	{ EXIT_REASON_EPT_MISCONFIG,         "EPT_MISCONFIG" }, \
 	{ EXIT_REASON_INVEPT,                "INVEPT" }, \
+	{ EXIT_REASON_RDTSCP,                "RDTSCP" }, \
 	{ EXIT_REASON_PREEMPTION_TIMER,      "PREEMPTION_TIMER" }, \
+	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
 	{ EXIT_REASON_WBINVD,                "WBINVD" }, \
+	{ EXIT_REASON_XSETBV,                "XSETBV" }, \
 	{ EXIT_REASON_APIC_WRITE,            "APIC_WRITE" }, \
-	{ EXIT_REASON_EOI_INDUCED,           "EOI_INDUCED" }, \
-	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
-	{ EXIT_REASON_MSR_LOAD_FAIL,         "MSR_LOAD_FAIL" }, \
-	{ EXIT_REASON_INVD,                  "INVD" }, \
-	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
+	{ EXIT_REASON_RDRAND,                "RDRAND" }, \
 	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
+	{ EXIT_REASON_VMFUNC,                "VMFUNC" }, \
+	{ EXIT_REASON_ENCLS,                 "ENCLS" }, \
+	{ EXIT_REASON_RDSEED,                "RDSEED" }, \
+	{ EXIT_REASON_PML_FULL,              "PML_FULL" }, \
 	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
 	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
 
diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c
index ebc6dceddb58..7598361ef1f1 100644
--- a/tools/build/feature/test-bpf.c
+++ b/tools/build/feature/test-bpf.c
@@ -29,6 +29,7 @@ int main(void)
 	attr.log_size = 0;
 	attr.log_level = 0;
 	attr.kern_version = 0;
+	attr.prog_flags = 0;
 
 	/*
 	 * Test existence of __NR_bpf and BPF_PROG_LOAD.
diff --git a/tools/build/feature/test-sched_getcpu.c b/tools/build/feature/test-sched_getcpu.c
index c4a148dd7104..9c6b4cbffb1c 100644
--- a/tools/build/feature/test-sched_getcpu.c
+++ b/tools/build/feature/test-sched_getcpu.c
@@ -1,4 +1,6 @@
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 #include <sched.h>
 
 int main(void)
diff --git a/tools/include/asm/sections.h b/tools/include/asm/sections.h
new file mode 100644
index 000000000000..a80643d7a7f1
--- /dev/null
+++ b/tools/include/asm/sections.h
@@ -0,0 +1,4 @@
+#ifndef __TOOLS_INCLUDE_LINUX_ASM_SECTIONS_H
+#define __TOOLS_INCLUDE_LINUX_ASM_SECTIONS_H
+
+#endif /* __TOOLS_INCLUDE_LINUX_ASM_SECTIONS_H */
diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h
index 1aecad369af5..969db1981868 100644
--- a/tools/include/linux/bitops.h
+++ b/tools/include/linux/bitops.h
@@ -61,4 +61,14 @@ static inline unsigned fls_long(unsigned long l)
 	return fls64(l);
 }
 
+/**
+ * rol32 - rotate a 32-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u32 rol32(__u32 word, unsigned int shift)
+{
+	return (word << shift) | (word >> ((-shift) & 31));
+}
+
 #endif
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index 825d44f89a29..bd39b2090ad1 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -19,3 +19,13 @@
 
 /* &a[0] degrades to a pointer: a different type from an array */
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+
+#define  noinline	__attribute__((noinline))
+
+#define __packed	__attribute__((packed))
+
+#define __noreturn	__attribute__((noreturn))
+
+#define __aligned(x)	__attribute__((aligned(x)))
+#define __printf(a, b)	__attribute__((format(printf, a, b)))
+#define __scanf(a, b)	__attribute__((format(scanf, a, b)))
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 23299d7e7160..d7a5604c38d7 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -17,6 +17,10 @@
 # define __always_inline	inline __attribute__((always_inline))
 #endif
 
+#ifndef noinline
+#define noinline
+#endif
+
 /* Are two types/vars the same type (ignoring qualifiers)? */
 #ifndef __same_type
 # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
@@ -45,6 +49,10 @@
 # define __maybe_unused		__attribute__((unused))
 #endif
 
+#ifndef __used
+# define __used		__attribute__((__unused__))
+#endif
+
 #ifndef __packed
 # define __packed		__attribute__((__packed__))
 #endif
@@ -65,6 +73,14 @@
 # define unlikely(x)		__builtin_expect(!!(x), 0)
 #endif
 
+#ifndef __init
+# define __init
+#endif
+
+#ifndef noinline
+# define noinline
+#endif
+
 #define uninitialized_var(x) x = *(&(x))
 
 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
diff --git a/tools/lib/lockdep/uinclude/linux/debug_locks.h b/tools/include/linux/debug_locks.h
index f38eb64df794..61cc7f501168 100644
--- a/tools/lib/lockdep/uinclude/linux/debug_locks.h
+++ b/tools/include/linux/debug_locks.h
@@ -3,8 +3,9 @@
 
 #include <stddef.h>
 #include <linux/compiler.h>
+#include <asm/bug.h>
 
-#define DEBUG_LOCKS_WARN_ON(x) (x)
+#define DEBUG_LOCKS_WARN_ON(x) WARN_ON(x)
 
 extern bool debug_locks;
 extern bool debug_locks_silent;
diff --git a/tools/include/linux/delay.h b/tools/include/linux/delay.h
new file mode 100644
index 000000000000..55aa4173af1f
--- /dev/null
+++ b/tools/include/linux/delay.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_DELAY_H
+#define _TOOLS_INCLUDE_LINUX_DELAY_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_DELAY_H */
diff --git a/tools/include/linux/err.h b/tools/include/linux/err.h
index bdc3dd8131d4..abf0478a8fb2 100644
--- a/tools/include/linux/err.h
+++ b/tools/include/linux/err.h
@@ -46,4 +46,9 @@ static inline bool __must_check IS_ERR(__force const void *ptr)
 	return IS_ERR_VALUE((unsigned long)ptr);
 }
 
+static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr)
+{
+	return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr);
+}
+
 #endif /* _LINUX_ERR_H */
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index 390d7c9685fd..4ce25d43e8e3 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -208,6 +208,16 @@
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Unconditional jumps, goto pc + off16 */
+
+#define BPF_JMP_A(OFF)						\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_JA,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Function call */
 
 #define BPF_EMIT_CALL(FUNC)					\
diff --git a/tools/include/linux/ftrace.h b/tools/include/linux/ftrace.h
new file mode 100644
index 000000000000..949f541ce11e
--- /dev/null
+++ b/tools/include/linux/ftrace.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_FTRACE_H
+#define _TOOLS_INCLUDE_LINUX_FTRACE_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_FTRACE_H */
diff --git a/tools/include/linux/gfp.h b/tools/include/linux/gfp.h
new file mode 100644
index 000000000000..22030756fbc0
--- /dev/null
+++ b/tools/include/linux/gfp.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_GFP_H
+#define _TOOLS_INCLUDE_LINUX_GFP_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_GFP_H */
diff --git a/tools/lib/lockdep/uinclude/linux/hardirq.h b/tools/include/linux/hardirq.h
index c8f3f8f58729..c8f3f8f58729 100644
--- a/tools/lib/lockdep/uinclude/linux/hardirq.h
+++ b/tools/include/linux/hardirq.h
diff --git a/tools/include/linux/interrupt.h b/tools/include/linux/interrupt.h
new file mode 100644
index 000000000000..6be25bbdca9e
--- /dev/null
+++ b/tools/include/linux/interrupt.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_INTERRUPT_H
+#define _TOOLS_INCLUDE_LINUX_INTERRUPT_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_INTERRUPT_H */
diff --git a/tools/lib/lockdep/uinclude/linux/irqflags.h b/tools/include/linux/irqflags.h
index 6cc296f0fad0..df77669cfe1c 100644
--- a/tools/lib/lockdep/uinclude/linux/irqflags.h
+++ b/tools/include/linux/irqflags.h
@@ -17,19 +17,19 @@
 #define raw_local_irq_disable() do { } while (0)
 #define raw_local_irq_enable() do { } while (0)
 #define raw_local_irq_save(flags) ((flags) = 0)
-#define raw_local_irq_restore(flags) do { } while (0)
+#define raw_local_irq_restore(flags) ((void)(flags))
 #define raw_local_save_flags(flags) ((flags) = 0)
-#define raw_irqs_disabled_flags(flags) do { } while (0)
+#define raw_irqs_disabled_flags(flags) ((void)(flags))
 #define raw_irqs_disabled() 0
 #define raw_safe_halt()
 
 #define local_irq_enable() do { } while (0)
 #define local_irq_disable() do { } while (0)
 #define local_irq_save(flags) ((flags) = 0)
-#define local_irq_restore(flags) do { } while (0)
+#define local_irq_restore(flags) ((void)(flags))
 #define local_save_flags(flags)	((flags) = 0)
 #define irqs_disabled() (1)
-#define irqs_disabled_flags(flags) (0)
+#define irqs_disabled_flags(flags) ((void)(flags), 0)
 #define safe_halt() do { } while (0)
 
 #define trace_lock_release(x, y)
diff --git a/tools/include/linux/jhash.h b/tools/include/linux/jhash.h
new file mode 100644
index 000000000000..348c6f47e4cc
--- /dev/null
+++ b/tools/include/linux/jhash.h
@@ -0,0 +1,175 @@
+#ifndef _LINUX_JHASH_H
+#define _LINUX_JHASH_H
+
+/* jhash.h: Jenkins hash support.
+ *
+ * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
+ *
+ * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are my fault.
+ * Jozsef
+ */
+#include <linux/bitops.h>
+#include <linux/unaligned/packed_struct.h>
+
+/* Best hash sizes are of power of two */
+#define jhash_size(n)   ((u32)1<<(n))
+/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
+#define jhash_mask(n)   (jhash_size(n)-1)
+
+/* __jhash_mix -- mix 3 32-bit values reversibly. */
+#define __jhash_mix(a, b, c)			\
+{						\
+	a -= c;  a ^= rol32(c, 4);  c += b;	\
+	b -= a;  b ^= rol32(a, 6);  a += c;	\
+	c -= b;  c ^= rol32(b, 8);  b += a;	\
+	a -= c;  a ^= rol32(c, 16); c += b;	\
+	b -= a;  b ^= rol32(a, 19); a += c;	\
+	c -= b;  c ^= rol32(b, 4);  b += a;	\
+}
+
+/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
+#define __jhash_final(a, b, c)			\
+{						\
+	c ^= b; c -= rol32(b, 14);		\
+	a ^= c; a -= rol32(c, 11);		\
+	b ^= a; b -= rol32(a, 25);		\
+	c ^= b; c -= rol32(b, 16);		\
+	a ^= c; a -= rol32(c, 4);		\
+	b ^= a; b -= rol32(a, 14);		\
+	c ^= b; c -= rol32(b, 24);		\
+}
+
+/* An arbitrary initial parameter */
+#define JHASH_INITVAL		0xdeadbeef
+
+/* jhash - hash an arbitrary key
+ * @k: sequence of bytes as key
+ * @length: the length of the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * The generic version, hashes an arbitrary sequence of bytes.
+ * No alignment or length assumptions are made about the input key.
+ *
+ * Returns the hash value of the key. The result depends on endianness.
+ */
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+	u32 a, b, c;
+	const u8 *k = key;
+
+	/* Set up the internal state */
+	a = b = c = JHASH_INITVAL + length + initval;
+
+	/* All but the last block: affect some 32 bits of (a,b,c) */
+	while (length > 12) {
+		a += __get_unaligned_cpu32(k);
+		b += __get_unaligned_cpu32(k + 4);
+		c += __get_unaligned_cpu32(k + 8);
+		__jhash_mix(a, b, c);
+		length -= 12;
+		k += 12;
+	}
+	/* Last block: affect all 32 bits of (c) */
+	/* All the case statements fall through */
+	switch (length) {
+	case 12: c += (u32)k[11]<<24;
+	case 11: c += (u32)k[10]<<16;
+	case 10: c += (u32)k[9]<<8;
+	case 9:  c += k[8];
+	case 8:  b += (u32)k[7]<<24;
+	case 7:  b += (u32)k[6]<<16;
+	case 6:  b += (u32)k[5]<<8;
+	case 5:  b += k[4];
+	case 4:  a += (u32)k[3]<<24;
+	case 3:  a += (u32)k[2]<<16;
+	case 2:  a += (u32)k[1]<<8;
+	case 1:  a += k[0];
+		 __jhash_final(a, b, c);
+	case 0: /* Nothing left to add */
+		break;
+	}
+
+	return c;
+}
+
+/* jhash2 - hash an array of u32's
+ * @k: the key which must be an array of u32's
+ * @length: the number of u32's in the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * Returns the hash value of the key.
+ */
+static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
+{
+	u32 a, b, c;
+
+	/* Set up the internal state */
+	a = b = c = JHASH_INITVAL + (length<<2) + initval;
+
+	/* Handle most of the key */
+	while (length > 3) {
+		a += k[0];
+		b += k[1];
+		c += k[2];
+		__jhash_mix(a, b, c);
+		length -= 3;
+		k += 3;
+	}
+
+	/* Handle the last 3 u32's: all the case statements fall through */
+	switch (length) {
+	case 3: c += k[2];
+	case 2: b += k[1];
+	case 1: a += k[0];
+		__jhash_final(a, b, c);
+	case 0:	/* Nothing left to add */
+		break;
+	}
+
+	return c;
+}
+
+
+/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
+static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+	a += initval;
+	b += initval;
+	c += initval;
+
+	__jhash_final(a, b, c);
+
+	return c;
+}
+
+static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
+{
+	return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2));
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+static inline u32 jhash_1word(u32 a, u32 initval)
+{
+	return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2));
+}
+
+#endif /* _LINUX_JHASH_H */
diff --git a/tools/lib/lockdep/uinclude/linux/kallsyms.h b/tools/include/linux/kallsyms.h
index b0f2dbdf1a15..582cc1e5f3a4 100644
--- a/tools/lib/lockdep/uinclude/linux/kallsyms.h
+++ b/tools/include/linux/kallsyms.h
@@ -3,6 +3,7 @@
 
 #include <linux/kernel.h>
 #include <stdio.h>
+#include <unistd.h>
 
 #define KSYM_NAME_LEN 128
 
@@ -24,7 +25,7 @@ static inline void print_ip_sym(unsigned long ip)
 
 	name = backtrace_symbols((void **)&ip, 1);
 
-	printf("%s\n", *name);
+	dprintf(STDOUT_FILENO, "%s\n", *name);
 
 	free(name);
 }
diff --git a/tools/lib/lockdep/uinclude/linux/kern_levels.h b/tools/include/linux/kern_levels.h
index 3b9bade28698..3b9bade28698 100644
--- a/tools/lib/lockdep/uinclude/linux/kern_levels.h
+++ b/tools/include/linux/kern_levels.h
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 73ccc48126bb..77d2e94ca5df 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -5,6 +5,8 @@
 #include <stddef.h>
 #include <assert.h>
 #include <linux/compiler.h>
+#include <endian.h>
+#include <byteswap.h>
 
 #ifndef UINT_MAX
 #define UINT_MAX	(~0U)
@@ -32,6 +34,7 @@
 	(type *)((char *)__mptr - offsetof(type, member)); })
 #endif
 
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
 #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
 
 #ifndef max
@@ -67,12 +70,33 @@
 #endif
 #endif
 
-/*
- * Both need more care to handle endianness
- * (Don't use bitmap_copy_le() for now)
- */
-#define cpu_to_le64(x)	(x)
-#define cpu_to_le32(x)	(x)
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define cpu_to_le16 bswap_16
+#define cpu_to_le32 bswap_32
+#define cpu_to_le64 bswap_64
+#define le16_to_cpu bswap_16
+#define le32_to_cpu bswap_32
+#define le64_to_cpu bswap_64
+#define cpu_to_be16
+#define cpu_to_be32
+#define cpu_to_be64
+#define be16_to_cpu
+#define be32_to_cpu
+#define be64_to_cpu
+#else
+#define cpu_to_le16
+#define cpu_to_le32
+#define cpu_to_le64
+#define le16_to_cpu
+#define le32_to_cpu
+#define le64_to_cpu
+#define cpu_to_be16 bswap_16
+#define cpu_to_be32 bswap_32
+#define cpu_to_be64 bswap_64
+#define be16_to_cpu bswap_16
+#define be32_to_cpu bswap_32
+#define be64_to_cpu bswap_64
+#endif
 
 int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
 int scnprintf(char * buf, size_t size, const char * fmt, ...);
@@ -89,4 +113,7 @@ int scnprintf(char * buf, size_t size, const char * fmt, ...);
 #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
 #define round_down(x, y) ((x) & ~__round_mask(x, y))
 
+#define current_gfp_context(k) 0
+#define synchronize_sched()
+
 #endif
diff --git a/tools/lib/lockdep/uinclude/linux/kmemcheck.h b/tools/include/linux/kmemcheck.h
index 94d598bc6abe..94d598bc6abe 100644
--- a/tools/lib/lockdep/uinclude/linux/kmemcheck.h
+++ b/tools/include/linux/kmemcheck.h
diff --git a/tools/include/linux/linkage.h b/tools/include/linux/linkage.h
new file mode 100644
index 000000000000..bc763d500262
--- /dev/null
+++ b/tools/include/linux/linkage.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_LINKAGE_H
+#define _TOOLS_INCLUDE_LINUX_LINKAGE_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_LINKAGE_H */
diff --git a/tools/lib/lockdep/uinclude/linux/lockdep.h b/tools/include/linux/lockdep.h
index c808c7d02d21..8da3e8effafa 100644
--- a/tools/lib/lockdep/uinclude/linux/lockdep.h
+++ b/tools/include/linux/lockdep.h
@@ -7,8 +7,15 @@
 #include <limits.h>
 #include <linux/utsname.h>
 #include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/kern_levels.h>
+#include <linux/err.h>
+#include <linux/rcu.h>
+#include <linux/list.h>
+#include <linux/hardirq.h>
+#include <unistd.h>
 
-#define MAX_LOCK_DEPTH 2000UL
+#define MAX_LOCK_DEPTH 63UL
 
 #define asmlinkage
 #define __visible
@@ -29,31 +36,32 @@ extern struct task_struct *__curr(void);
 
 #define current (__curr())
 
-#define debug_locks_off() 1
+static inline int debug_locks_off(void)
+{
+	return 1;
+}
+
 #define task_pid_nr(tsk) ((tsk)->pid)
 
 #define KSYM_NAME_LEN 128
-#define printk printf
+#define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__)
+#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#define pr_warn pr_err
 
 #define list_del_rcu list_del
 
 #define atomic_t unsigned long
 #define atomic_inc(x) ((*(x))++)
 
-static struct new_utsname *init_utsname(void)
-{
-	static struct new_utsname n = (struct new_utsname) {
-		.release = "liblockdep",
-		.version = LIBLOCKDEP_VERSION,
-	};
-
-	return &n;
-}
-
 #define print_tainted() ""
 #define static_obj(x) 1
 
 #define debug_show_all_locks()
 extern void debug_check_no_locks_held(void);
 
+static __used bool __is_kernel_percpu_address(unsigned long addr, void *can_addr)
+{
+	return false;
+}
+
 #endif
diff --git a/tools/lib/lockdep/uinclude/linux/module.h b/tools/include/linux/module.h
index 09c7a7be8ccc..07055db296f3 100644
--- a/tools/lib/lockdep/uinclude/linux/module.h
+++ b/tools/include/linux/module.h
@@ -3,4 +3,9 @@
 
 #define module_param(name, type, perm)
 
+static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
+{
+	return false;
+}
+
 #endif
diff --git a/tools/include/linux/mutex.h b/tools/include/linux/mutex.h
new file mode 100644
index 000000000000..a8180d25f2fc
--- /dev/null
+++ b/tools/include/linux/mutex.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_MUTEX_H
+#define _TOOLS_INCLUDE_LINUX_MUTEX_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_MUTEX_H */
diff --git a/tools/include/linux/proc_fs.h b/tools/include/linux/proc_fs.h
new file mode 100644
index 000000000000..8b3b03b64fda
--- /dev/null
+++ b/tools/include/linux/proc_fs.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_PROC_FS_H
+#define _TOOLS_INCLUDE_LINUX_PROC_FS_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_PROC_FS_H */
diff --git a/tools/lib/lockdep/uinclude/linux/rcu.h b/tools/include/linux/rcu.h
index 042ee8e463c9..5080649dad04 100644
--- a/tools/lib/lockdep/uinclude/linux/rcu.h
+++ b/tools/include/linux/rcu.h
@@ -18,4 +18,7 @@ static inline bool rcu_is_watching(void)
 	return false;
 }
 
+#define rcu_assign_pointer(p, v) ((p) = (v))
+#define RCU_INIT_POINTER(p, v) p=(v)
+
 #endif
diff --git a/tools/include/linux/sched/clock.h b/tools/include/linux/sched/clock.h
new file mode 100644
index 000000000000..5837d17c4182
--- /dev/null
+++ b/tools/include/linux/sched/clock.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_PERF_LINUX_SCHED_CLOCK_H
+#define _TOOLS_PERF_LINUX_SCHED_CLOCK_H
+
+#endif  /* _TOOLS_PERF_LINUX_SCHED_CLOCK_H */
diff --git a/tools/include/linux/sched/mm.h b/tools/include/linux/sched/mm.h
new file mode 100644
index 000000000000..c8d9f19c1f35
--- /dev/null
+++ b/tools/include/linux/sched/mm.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_PERF_LINUX_SCHED_MM_H
+#define _TOOLS_PERF_LINUX_SCHED_MM_H
+
+#endif  /* _TOOLS_PERF_LINUX_SCHED_MM_H */
diff --git a/tools/include/linux/sched/task.h b/tools/include/linux/sched/task.h
new file mode 100644
index 000000000000..a97890eca110
--- /dev/null
+++ b/tools/include/linux/sched/task.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_PERF_LINUX_SCHED_TASK_H
+#define _TOOLS_PERF_LINUX_SCHED_TASK_H
+
+#endif  /* _TOOLS_PERF_LINUX_SCHED_TASK_H */
diff --git a/tools/include/linux/seq_file.h b/tools/include/linux/seq_file.h
new file mode 100644
index 000000000000..102fd9217f1f
--- /dev/null
+++ b/tools/include/linux/seq_file.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_SEQ_FILE_H
+#define _TOOLS_INCLUDE_LINUX_SEQ_FILE_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_SEQ_FILE_H */
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index 58397dcb19d6..417cda4f793f 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -1,5 +1,31 @@
+#ifndef __LINUX_SPINLOCK_H_
+#define __LINUX_SPINLOCK_H_
+
+#include <pthread.h>
+#include <stdbool.h>
+
 #define spinlock_t		pthread_mutex_t
 #define DEFINE_SPINLOCK(x)	pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER;
 
 #define spin_lock_irqsave(x, f)		(void)f, pthread_mutex_lock(x)
 #define spin_unlock_irqrestore(x, f)	(void)f, pthread_mutex_unlock(x)
+
+#define arch_spinlock_t pthread_mutex_t
+#define __ARCH_SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER
+
+static inline void arch_spin_lock(arch_spinlock_t *mutex)
+{
+	pthread_mutex_lock(mutex);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *mutex)
+{
+	pthread_mutex_unlock(mutex);
+}
+
+static inline bool arch_spin_is_locked(arch_spinlock_t *mutex)
+{
+	return true;
+}
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/stacktrace.h b/tools/include/linux/stacktrace.h
index 39aecc6b19d1..39aecc6b19d1 100644
--- a/tools/lib/lockdep/uinclude/linux/stacktrace.h
+++ b/tools/include/linux/stacktrace.h
diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
index f436d2420a18..d62b56cf8c12 100644
--- a/tools/include/linux/string.h
+++ b/tools/include/linux/string.h
@@ -18,4 +18,6 @@ extern size_t strlcpy(char *dest, const char *src, size_t size);
 
 char *str_error_r(int errnum, char *buf, size_t buflen);
 
+int prefixcmp(const char *str, const char *prefix);
+
 #endif /* _LINUX_STRING_H_ */
diff --git a/tools/include/linux/unaligned/packed_struct.h b/tools/include/linux/unaligned/packed_struct.h
new file mode 100644
index 000000000000..c0d817de4df2
--- /dev/null
+++ b/tools/include/linux/unaligned/packed_struct.h
@@ -0,0 +1,46 @@
+#ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H
+#define _LINUX_UNALIGNED_PACKED_STRUCT_H
+
+#include <linux/kernel.h>
+
+struct __una_u16 { u16 x; } __packed;
+struct __una_u32 { u32 x; } __packed;
+struct __una_u64 { u64 x; } __packed;
+
+static inline u16 __get_unaligned_cpu16(const void *p)
+{
+	const struct __una_u16 *ptr = (const struct __una_u16 *)p;
+	return ptr->x;
+}
+
+static inline u32 __get_unaligned_cpu32(const void *p)
+{
+	const struct __una_u32 *ptr = (const struct __una_u32 *)p;
+	return ptr->x;
+}
+
+static inline u64 __get_unaligned_cpu64(const void *p)
+{
+	const struct __una_u64 *ptr = (const struct __una_u64 *)p;
+	return ptr->x;
+}
+
+static inline void __put_unaligned_cpu16(u16 val, void *p)
+{
+	struct __una_u16 *ptr = (struct __una_u16 *)p;
+	ptr->x = val;
+}
+
+static inline void __put_unaligned_cpu32(u32 val, void *p)
+{
+	struct __una_u32 *ptr = (struct __una_u32 *)p;
+	ptr->x = val;
+}
+
+static inline void __put_unaligned_cpu64(u64 val, void *p)
+{
+	struct __una_u64 *ptr = (struct __una_u64 *)p;
+	ptr->x = val;
+}
+
+#endif /* _LINUX_UNALIGNED_PACKED_STRUCT_H */
diff --git a/tools/include/trace/events/lock.h b/tools/include/trace/events/lock.h
new file mode 100644
index 000000000000..5b15fd5ee1af
--- /dev/null
+++ b/tools/include/trace/events/lock.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_TRACE_EVENTS_LOCK_H
+#define _TOOLS_INCLUDE_TRACE_EVENTS_LOCK_H
+
+#endif /* _TOOLS_INCLUDE_TRACE_EVENTS_LOCK_H */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e553529929f6..94dfa9def355 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -132,6 +132,13 @@ enum bpf_attach_type {
  */
 #define BPF_F_ALLOW_OVERRIDE	(1U << 0)
 
+/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+ * verifier will perform strict alignment checking as if the kernel
+ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+ * and NET_IP_ALIGN defined to 2.
+ */
+#define BPF_F_STRICT_ALIGNMENT	(1U << 0)
+
 #define BPF_PSEUDO_MAP_FD	1
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
@@ -177,6 +184,7 @@ union bpf_attr {
 		__u32		log_size;	/* size of user buffer */
 		__aligned_u64	log_buf;	/* user supplied buffer */
 		__u32		kern_version;	/* checked when prog_type=kprobe */
+		__u32		prog_flags;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -481,8 +489,7 @@ union bpf_attr {
  * u32 bpf_get_socket_uid(skb)
  *     Get the owner uid of the socket stored inside sk_buff.
  *     @skb: pointer to skb
- *     Return: uid of the socket owner on success or 0 if the socket pointer
- *     inside sk_buff is NULL
+ *     Return: uid of the socket owner on success or overflowuid if failed.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index d538897b8e08..17b10304c393 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -48,17 +48,13 @@
  * tv_sec holds the number of seconds before (negative) or after (positive)
  * 00:00:00 1st January 1970 UTC.
  *
- * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is
- * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time.
- *
- * Note that if both tv_sec and tv_nsec are non-zero, then the two values must
- * either be both positive or both negative.
+ * tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time.
  *
  * __reserved is held in case we need a yet finer resolution.
  */
 struct statx_timestamp {
 	__s64	tv_sec;
-	__s32	tv_nsec;
+	__u32	tv_nsec;
 	__s32	__reserved;
 };
 
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 809c7721cd24..a7ecf8f469f4 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -387,6 +387,22 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep)
 	return err;
 }
 
+int filename__write_int(const char *filename, int value)
+{
+	int fd = open(filename, O_WRONLY), err = -1;
+	char buf[64];
+
+	if (fd < 0)
+		return err;
+
+	sprintf(buf, "%d", value);
+	if (write(fd, buf, sizeof(buf)) == sizeof(buf))
+		err = 0;
+
+	close(fd);
+	return err;
+}
+
 int procfs__read_str(const char *entry, char **buf, size_t *sizep)
 {
 	char path[PATH_MAX];
@@ -480,3 +496,17 @@ int sysctl__read_int(const char *sysctl, int *value)
 
 	return filename__read_int(path, value);
 }
+
+int sysfs__write_int(const char *entry, int value)
+{
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	if (snprintf(path, sizeof(path), "%s/%s", sysfs, entry) >= PATH_MAX)
+		return -1;
+
+	return filename__write_int(path, value);
+}
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 956c21127d1e..45605348461e 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -31,6 +31,8 @@ int filename__read_int(const char *filename, int *value);
 int filename__read_ull(const char *filename, unsigned long long *value);
 int filename__read_str(const char *filename, char **buf, size_t *sizep);
 
+int filename__write_int(const char *filename, int value);
+
 int procfs__read_str(const char *entry, char **buf, size_t *sizep);
 
 int sysctl__read_int(const char *sysctl, int *value);
@@ -38,4 +40,6 @@ int sysfs__read_int(const char *entry, int *value);
 int sysfs__read_ull(const char *entry, unsigned long long *value);
 int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
 int sysfs__read_bool(const char *entry, bool *value);
+
+int sysfs__write_int(const char *entry, int value);
 #endif /* __API_FS__ */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 4fe444b8092e..6e178987af8e 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -117,6 +117,28 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 	return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
 }
 
+int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+		       size_t insns_cnt, int strict_alignment,
+		       const char *license, __u32 kern_version,
+		       char *log_buf, size_t log_buf_sz)
+{
+	union bpf_attr attr;
+
+	bzero(&attr, sizeof(attr));
+	attr.prog_type = type;
+	attr.insn_cnt = (__u32)insns_cnt;
+	attr.insns = ptr_to_u64(insns);
+	attr.license = ptr_to_u64(license);
+	attr.log_buf = ptr_to_u64(log_buf);
+	attr.log_size = log_buf_sz;
+	attr.log_level = 2;
+	log_buf[0] = 0;
+	attr.kern_version = kern_version;
+	attr.prog_flags = strict_alignment ? BPF_F_STRICT_ALIGNMENT : 0;
+
+	return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
 int bpf_map_update_elem(int fd, const void *key, const void *value,
 			__u64 flags)
 {
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index edb4daeff7a5..972bd8333eb7 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -35,6 +35,10 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 		     size_t insns_cnt, const char *license,
 		     __u32 kern_version, char *log_buf,
 		     size_t log_buf_sz);
+int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+		       size_t insns_cnt, int strict_alignment,
+		       const char *license, __u32 kern_version,
+		       char *log_buf, size_t log_buf_sz);
 
 int bpf_map_update_elem(int fd, const void *key, const void *value,
 			__u64 flags);
diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile
index 3bc0ef9f8923..ed9ace59d112 100644
--- a/tools/lib/lockdep/Makefile
+++ b/tools/lib/lockdep/Makefile
@@ -79,6 +79,7 @@ INCLUDES = -I. -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES)
 # Set compile option CFLAGS if not set elsewhere
 CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g
 CFLAGS += -fPIC
+CFLAGS += -Wall
 
 override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
 
@@ -100,7 +101,7 @@ include $(srctree)/tools/build/Makefile.include
 
 do_compile_shared_library =			\
 	($(print_shared_lib_compile)		\
-	$(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -sf $@ liblockdep.so))
+	$(CC) $(LDFLAGS) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='$(@F)';$(shell ln -sf $(@F) $(@D)/liblockdep.so))
 
 do_build_static_lib =				\
 	($(print_static_lib_build)		\
@@ -118,10 +119,10 @@ all_cmd: $(CMD_TARGETS)
 $(LIB_IN): force
 	$(Q)$(MAKE) $(build)=liblockdep
 
-liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN)
+$(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN)
 	$(Q)$(do_compile_shared_library)
 
-liblockdep.a: $(LIB_IN)
+$(OUTPUT)liblockdep.a: $(LIB_IN)
 	$(Q)$(do_build_static_lib)
 
 tags:	force
@@ -149,7 +150,7 @@ install_lib: all_cmd
 install: install_lib
 
 clean:
-	$(RM) *.o *~ $(TARGETS) *.a *liblockdep*.so* $(VERSION_FILES) .*.d .*.cmd
+	$(RM) $(OUTPUT)*.o *~ $(TARGETS) $(OUTPUT)*.a $(OUTPUT)*liblockdep*.so* $(VERSION_FILES) $(OUTPUT).*.d $(OUTPUT).*.cmd
 	$(RM) tags TAGS
 
 PHONY += force
diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c
index a0a2e3a266af..ced6d7443cea 100644
--- a/tools/lib/lockdep/lockdep.c
+++ b/tools/lib/lockdep/lockdep.c
@@ -1,8 +1,27 @@
 #include <linux/lockdep.h>
+#include <stdlib.h>
 
 /* Trivial API wrappers, we don't (yet) have RCU in user-space: */
 #define hlist_for_each_entry_rcu	hlist_for_each_entry
 #define hlist_add_head_rcu		hlist_add_head
 #define hlist_del_rcu			hlist_del
+#define list_for_each_entry_rcu		list_for_each_entry
+#define list_add_tail_rcu		list_add_tail
+
+u32 prandom_u32(void)
+{
+	/* Used only by lock_pin_lock() which is dead code */
+	abort();
+}
+
+static struct new_utsname *init_utsname(void)
+{
+	static struct new_utsname n = (struct new_utsname) {
+		.release = "liblockdep",
+		.version = LIBLOCKDEP_VERSION,
+	};
+
+	return &n;
+}
 
 #include "../../../kernel/locking/lockdep.c"
diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c
index 52844847569c..6a2d3c5d4e92 100644
--- a/tools/lib/lockdep/preload.c
+++ b/tools/lib/lockdep/preload.c
@@ -4,6 +4,7 @@
 #include <dlfcn.h>
 #include <stdlib.h>
 #include <sysexits.h>
+#include <unistd.h>
 #include "include/liblockdep/mutex.h"
 #include "../../include/linux/rbtree.h"
 
@@ -122,8 +123,6 @@ static struct rb_node **__get_lock_node(void *lock, struct rb_node **parent)
 #define LIBLOCKDEP_STATIC_ENTRIES	1024
 #endif
 
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
 static struct lock_lookup __locks[LIBLOCKDEP_STATIC_ENTRIES];
 static int __locks_nr;
 
@@ -149,7 +148,7 @@ static struct lock_lookup *alloc_lock(void)
 
 		int idx = __locks_nr++;
 		if (idx >= ARRAY_SIZE(__locks)) {
-			fprintf(stderr,
+			dprintf(STDERR_FILENO,
 		"LOCKDEP error: insufficient LIBLOCKDEP_STATIC_ENTRIES\n");
 			exit(EX_UNAVAILABLE);
 		}
diff --git a/tools/lib/lockdep/rbtree.c b/tools/lib/lockdep/rbtree.c
index f7f43033c8b7..297c304571f8 100644
--- a/tools/lib/lockdep/rbtree.c
+++ b/tools/lib/lockdep/rbtree.c
@@ -1 +1 @@
-#include "../../../lib/rbtree.c"
+#include "../../lib/rbtree.c"
diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
index 1069d96248c1..f9b94098fc98 100755
--- a/tools/lib/lockdep/run_tests.sh
+++ b/tools/lib/lockdep/run_tests.sh
@@ -4,9 +4,9 @@ make &> /dev/null
 
 for i in `ls tests/*.c`; do
 	testname=$(basename "$i" .c)
-	gcc -o tests/$testname -pthread -lpthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null
+	gcc -o tests/$testname -pthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null
 	echo -ne "$testname... "
-	if [ $(timeout 1 ./tests/$testname | wc -l) -gt 0 ]; then
+	if [ $(timeout 1 ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then
 		echo "PASSED!"
 	else
 		echo "FAILED!"
@@ -18,9 +18,9 @@ done
 
 for i in `ls tests/*.c`; do
 	testname=$(basename "$i" .c)
-	gcc -o tests/$testname -pthread -lpthread -Iinclude $i &> /dev/null
+	gcc -o tests/$testname -pthread -Iinclude $i &> /dev/null
 	echo -ne "(PRELOAD) $testname... "
-	if [ $(timeout 1 ./lockdep ./tests/$testname | wc -l) -gt 0 ]; then
+	if [ $(timeout 1 ./lockdep ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then
 		echo "PASSED!"
 	else
 		echo "FAILED!"
diff --git a/tools/lib/lockdep/uinclude/asm/hash.h b/tools/lib/lockdep/uinclude/asm/hash.h
deleted file mode 100644
index d82b170bb216..000000000000
--- a/tools/lib/lockdep/uinclude/asm/hash.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_GENERIC_HASH_H
-#define __ASM_GENERIC_HASH_H
-
-/* Stub */
-
-#endif /* __ASM_GENERIC_HASH_H */
diff --git a/tools/lib/lockdep/uinclude/asm/hweight.h b/tools/lib/lockdep/uinclude/asm/hweight.h
deleted file mode 100644
index fab00ff936d1..000000000000
--- a/tools/lib/lockdep/uinclude/asm/hweight.h
+++ /dev/null
@@ -1,3 +0,0 @@
-
-/* empty file */
-
diff --git a/tools/lib/lockdep/uinclude/asm/sections.h b/tools/lib/lockdep/uinclude/asm/sections.h
deleted file mode 100644
index fab00ff936d1..000000000000
--- a/tools/lib/lockdep/uinclude/asm/sections.h
+++ /dev/null
@@ -1,3 +0,0 @@
-
-/* empty file */
-
diff --git a/tools/lib/lockdep/uinclude/linux/bitops.h b/tools/lib/lockdep/uinclude/linux/bitops.h
deleted file mode 100644
index fab00ff936d1..000000000000
--- a/tools/lib/lockdep/uinclude/linux/bitops.h
+++ /dev/null
@@ -1,3 +0,0 @@
-
-/* empty file */
-
diff --git a/tools/lib/lockdep/uinclude/linux/compiler.h b/tools/lib/lockdep/uinclude/linux/compiler.h
deleted file mode 100644
index fd3e56a83fc2..000000000000
--- a/tools/lib/lockdep/uinclude/linux/compiler.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _LIBLOCKDEP_LINUX_COMPILER_H_
-#define _LIBLOCKDEP_LINUX_COMPILER_H_
-
-#define __used		__attribute__((__unused__))
-#define unlikely
-#define READ_ONCE(x) (x)
-#define WRITE_ONCE(x, val) x=(val)
-#define RCU_INIT_POINTER(p, v) p=(v)
-
-#endif
diff --git a/tools/lib/lockdep/uinclude/linux/delay.h b/tools/lib/lockdep/uinclude/linux/delay.h
deleted file mode 100644
index fab00ff936d1..000000000000
--- a/tools/lib/lockdep/uinclude/linux/delay.h
+++ /dev/null
@@ -1,3 +0,0 @@
-
-/* empty file */
-
diff --git a/tools/lib/lockdep/uinclude/linux/ftrace.h b/tools/lib/lockdep/uinclude/linux/ftrace.h
deleted file mode 100644
index fab00ff936d1..000000000000
--- a/tools/lib/lockdep/uinclude/linux/ftrace.h
+++ /dev/null
@@ -1,3 +0,0 @@
-
-/* empty file */
-
diff --git a/tools/lib/lockdep/uinclude/linux/gfp.h b/tools/lib/lockdep/uinclude/linux/gfp.h
deleted file mode 100644
index fab00ff936d1..000000000000
--- a/tools/lib/lockdep/uinclude/linux/gfp.h
+++ /dev/null
@@ -1,3 +0,0 @@
-
-/* empty file */
-
diff --git a/tools/lib/lockdep/uinclude/linux/hash.h b/tools/lib/lockdep/uinclude/linux/hash.h
deleted file mode 100644
index 0f8479858dc0..000000000000
--- a/tools/lib/lockdep/uinclude/linux/hash.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../include/linux/hash.h"
diff --git a/tools/lib/lockdep/uinclude/linux/interrupt.h b/tools/lib/lockdep/uinclude/linux/interrupt.h
deleted file mode 100644
index fab00ff936d1..000000000000
--- a/tools/lib/lockdep/uinclude/linux/interrupt.h
+++ /dev/null
@@ -1,3 +0,0 @@
-
-/* empty file */
-
diff --git a/tools/lib/lockdep/uinclude/linux/kernel.h b/tools/lib/lockdep/uinclude/linux/kernel.h
deleted file mode 100644
index 276c7a8b2ed1..000000000000
--- a/tools/lib/lockdep/uinclude/linux/kernel.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef _LIBLOCKDEP_LINUX_KERNEL_H_
-#define _LIBLOCKDEP_LINUX_KERNEL_H_
-
-#include <linux/export.h>
-#include <linux/types.h>
-#include <linux/rcu.h>
-#include <linux/hardirq.h>
-#include <linux/kern_levels.h>
-
-#ifndef container_of
-#define container_of(ptr, type, member) ({			\
-	const typeof(((type *)0)->member) * __mptr = (ptr);	\
-	(type *)((char *)__mptr - offsetof(type, member)); })
-#endif
-
-#define max(x, y) ({				\
-	typeof(x) _max1 = (x);			\
-	typeof(y) _max2 = (y);			\
-	(void) (&_max1 == &_max2);		\
-	_max1 > _max2 ? _max1 : _max2; })
-
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-#define WARN_ON(x) (x)
-#define WARN_ON_ONCE(x) (x)
-#define likely(x) (x)
-#define WARN(x, y...) (x)
-#define uninitialized_var(x) x
-#define __init
-#define noinline
-#define list_add_tail_rcu list_add_tail
-#define list_for_each_entry_rcu list_for_each_entry
-#define barrier() 
-#define synchronize_sched()
-
-#ifndef CALLER_ADDR0
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#endif
-
-#ifndef _RET_IP_
-#define _RET_IP_ CALLER_ADDR0
-#endif
-
-#ifndef _THIS_IP_
-#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
-#endif
-
-#endif
diff --git a/tools/lib/lockdep/uinclude/linux/linkage.h b/tools/lib/lockdep/uinclude/linux/linkage.h
deleted file mode 100644
index fab00ff936d1..000000000000
--- a/tools/lib/lockdep/uinclude/linux/linkage.h
+++ /dev/null
@@ -1,3 +0,0 @@
-
-/* empty file */
-
diff --git a/tools/lib/lockdep/uinclude/linux/list.h b/tools/lib/lockdep/uinclude/linux/list.h
deleted file mode 100644
index 6e9ef31ed82e..000000000000
--- a/tools/lib/lockdep/uinclude/linux/list.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../include/linux/list.h"
diff --git a/tools/lib/lockdep/uinclude/linux/mutex.h b/tools/lib/lockdep/uinclude/linux/mutex.h
deleted file mode 100644
index fab00ff936d1..000000000000
--- a/tools/lib/lockdep/uinclude/linux/mutex.h
+++ /dev/null
@@ -1,3 +0,0 @@
-
-/* empty file */
-
diff --git a/tools/lib/lockdep/uinclude/linux/poison.h b/tools/lib/lockdep/uinclude/linux/poison.h
deleted file mode 100644
index 0c27bdf14233..000000000000
--- a/tools/lib/lockdep/uinclude/linux/poison.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../include/linux/poison.h"
diff --git a/tools/lib/lockdep/uinclude/linux/prefetch.h b/tools/lib/lockdep/uinclude/linux/prefetch.h
deleted file mode 100644
index d73fe6f850ac..000000000000
--- a/tools/lib/lockdep/uinclude/linux/prefetch.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _LIBLOCKDEP_LINUX_PREFETCH_H_
-#define _LIBLOCKDEP_LINUX_PREFETCH_H
-
-static inline void prefetch(void *a __attribute__((unused))) { }
-
-#endif
diff --git a/tools/lib/lockdep/uinclude/linux/proc_fs.h b/tools/lib/lockdep/uinclude/linux/proc_fs.h
deleted file mode 100644
index fab00ff936d1..000000000000
--- a/tools/lib/lockdep/uinclude/linux/proc_fs.h
+++ /dev/null
@@ -1,3 +0,0 @@
-
-/* empty file */
-
diff --git a/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h b/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h
deleted file mode 100644
index c3759477379c..000000000000
--- a/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define __always_inline
-#include "../../../include/linux/rbtree_augmented.h"
diff --git a/tools/lib/lockdep/uinclude/linux/seq_file.h b/tools/lib/lockdep/uinclude/linux/seq_file.h
deleted file mode 100644
index fab00ff936d1..000000000000
--- a/tools/lib/lockdep/uinclude/linux/seq_file.h
+++ /dev/null
@@ -1,3 +0,0 @@
-
-/* empty file */
-
diff --git a/tools/lib/lockdep/uinclude/linux/spinlock.h b/tools/lib/lockdep/uinclude/linux/spinlock.h
deleted file mode 100644
index 68c1aa2bcba5..000000000000
--- a/tools/lib/lockdep/uinclude/linux/spinlock.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _LIBLOCKDEP_SPINLOCK_H_
-#define _LIBLOCKDEP_SPINLOCK_H_
-
-#include <pthread.h>
-#include <stdbool.h>
-
-#define arch_spinlock_t pthread_mutex_t
-#define __ARCH_SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER
-
-static inline void arch_spin_lock(arch_spinlock_t *mutex)
-{
-	pthread_mutex_lock(mutex);
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *mutex)
-{
-	pthread_mutex_unlock(mutex);
-}
-
-static inline bool arch_spin_is_locked(arch_spinlock_t *mutex)
-{
-	return true;
-}
-
-#endif
diff --git a/tools/lib/lockdep/uinclude/linux/stringify.h b/tools/lib/lockdep/uinclude/linux/stringify.h
deleted file mode 100644
index 05dfcd1ac118..000000000000
--- a/tools/lib/lockdep/uinclude/linux/stringify.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _LIBLOCKDEP_LINUX_STRINGIFY_H_
-#define _LIBLOCKDEP_LINUX_STRINGIFY_H_
-
-#define __stringify_1(x...)	#x
-#define __stringify(x...)	__stringify_1(x)
-
-#endif
diff --git a/tools/lib/lockdep/uinclude/trace/events/lock.h b/tools/lib/lockdep/uinclude/trace/events/lock.h
deleted file mode 100644
index fab00ff936d1..000000000000
--- a/tools/lib/lockdep/uinclude/trace/events/lock.h
+++ /dev/null
@@ -1,3 +0,0 @@
-
-/* empty file */
-
diff --git a/tools/lib/string.c b/tools/lib/string.c
index bd239bc1d557..8e678af1c6ee 100644
--- a/tools/lib/string.c
+++ b/tools/lib/string.c
@@ -87,3 +87,12 @@ size_t __weak strlcpy(char *dest, const char *src, size_t size)
 	}
 	return ret;
 }
+
+int prefixcmp(const char *str, const char *prefix)
+{
+	for (; ; str++, prefix++)
+		if (!*prefix)
+			return 0;
+		else if (*str != *prefix)
+			return (unsigned char)*prefix - (unsigned char)*str;
+}
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index e228c3cb3716..ba970a73d053 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <linux/string.h>
 #include <termios.h>
 #include <sys/ioctl.h>
 #include <sys/types.h>
diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
index 6bc24025d054..359bfa77f39c 100644
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -1,4 +1,5 @@
 #include <linux/compiler.h>
+#include <linux/string.h>
 #include <linux/types.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h
index fc2e45d8aaf1..8fa5f036eff0 100644
--- a/tools/lib/subcmd/subcmd-util.h
+++ b/tools/lib/subcmd/subcmd-util.h
@@ -79,13 +79,4 @@ static inline void astrcat(char **out, const char *add)
 	free(tmp);
 }
 
-static inline int prefixcmp(const char *str, const char *prefix)
-{
-	for (; ; str++, prefix++)
-		if (!*prefix)
-			return 0;
-		else if (*str != *prefix)
-			return (unsigned char)*prefix - (unsigned char)*str;
-}
-
 #endif /* __SUBCMD_UTIL_H */
diff --git a/tools/objtool/Build b/tools/objtool/Build
index d6cdece5e58b..6f2e1987c4d9 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build
@@ -1,5 +1,6 @@
 objtool-y += arch/$(SRCARCH)/
 objtool-y += builtin-check.o
+objtool-y += check.o
 objtool-y += elf.o
 objtool-y += special.o
 objtool-y += objtool.o
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index 55a60d331f47..17c1195f11f4 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -127,28 +127,13 @@ b) 100% reliable stack traces for DWARF enabled kernels
 
 c) Higher live patching compatibility rate
 
-   (NOTE: This is not yet implemented)
-
-   Currently with CONFIG_LIVEPATCH there's a basic live patching
-   framework which is safe for roughly 85-90% of "security" fixes.  But
-   patches can't have complex features like function dependency or
-   prototype changes, or data structure changes.
-
-   There's a strong need to support patches which have the more complex
-   features so that the patch compatibility rate for security fixes can
-   eventually approach something resembling 100%.  To achieve that, a
-   "consistency model" is needed, which allows tasks to be safely
-   transitioned from an unpatched state to a patched state.
-
-   One of the key requirements of the currently proposed livepatch
-   consistency model [*] is that it needs to walk the stack of each
-   sleeping task to determine if it can be transitioned to the patched
-   state.  If objtool can ensure that stack traces are reliable, this
-   consistency model can be used and the live patching compatibility
-   rate can be improved significantly.
-
-   [*] https://lkml.kernel.org/r/cover.1423499826.git.jpoimboe@redhat.com
+   Livepatch has an optional "consistency model", which is needed for
+   more complex patches.  In order for the consistency model to work,
+   stack traces need to be reliable (or an unreliable condition needs to
+   be detectable).  Objtool makes that possible.
 
+   For more details, see the livepatch documentation in the Linux kernel
+   source tree at Documentation/livepatch/livepatch.txt.
 
 Rules
 -----
@@ -201,80 +186,84 @@ To achieve the validation, objtool enforces the following rules:
    return normally.
 
 
-Errors in .S files
-------------------
+Objtool warnings
+----------------
 
-If you're getting an error in a compiled .S file which you don't
-understand, first make sure that the affected code follows the above
-rules.
+For asm files, if you're getting an error which doesn't make sense,
+first make sure that the affected code follows the above rules.
+
+For C files, the common culprits are inline asm statements and calls to
+"noreturn" functions.  See below for more details.
+
+Another possible cause for errors in C code is if the Makefile removes
+-fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
 
 Here are some examples of common warnings reported by objtool, what
 they mean, and suggestions for how to fix them.
 
 
-1. asm_file.o: warning: objtool: func()+0x128: call without frame pointer save/setup
+1. file.o: warning: objtool: func()+0x128: call without frame pointer save/setup
 
    The func() function made a function call without first saving and/or
-   updating the frame pointer.
-
-   If func() is indeed a callable function, add proper frame pointer
-   logic using the FRAME_BEGIN and FRAME_END macros.  Otherwise, remove
-   its ELF function annotation by changing ENDPROC to END.
+   updating the frame pointer, and CONFIG_FRAME_POINTER is enabled.
 
-   If you're getting this error in a .c file, see the "Errors in .c
-   files" section.
+   If the error is for an asm file, and func() is indeed a callable
+   function, add proper frame pointer logic using the FRAME_BEGIN and
+   FRAME_END macros.  Otherwise, if it's not a callable function, remove
+   its ELF function annotation by changing ENDPROC to END, and instead
+   use the manual CFI hint macros in asm/undwarf.h.
 
+   If it's a GCC-compiled .c file, the error may be because the function
+   uses an inline asm() statement which has a "call" instruction.  An
+   asm() statement with a call instruction must declare the use of the
+   stack pointer in its output operand.  For example, on x86_64:
 
-2. asm_file.o: warning: objtool: .text+0x53: return instruction outside of a callable function
-
-   A return instruction was detected, but objtool couldn't find a way
-   for a callable function to reach the instruction.
+     register void *__sp asm("rsp");
+     asm volatile("call func" : "+r" (__sp));
 
-   If the return instruction is inside (or reachable from) a callable
-   function, the function needs to be annotated with the ENTRY/ENDPROC
-   macros.
+   Otherwise the stack frame may not get created before the call.
 
-   If you _really_ need a return instruction outside of a function, and
-   are 100% sure that it won't affect stack traces, you can tell
-   objtool to ignore it.  See the "Adding exceptions" section below.
 
+2. file.o: warning: objtool: .text+0x53: unreachable instruction
 
-3. asm_file.o: warning: objtool: func()+0x9: function has unreachable instruction
+   Objtool couldn't find a code path to reach the instruction.
 
-   The instruction lives inside of a callable function, but there's no
-   possible control flow path from the beginning of the function to the
-   instruction.
+   If the error is for an asm file, and the instruction is inside (or
+   reachable from) a callable function, the function should be annotated
+   with the ENTRY/ENDPROC macros (ENDPROC is the important one).
+   Otherwise, the code should probably be annotated with the CFI hint
+   macros in asm/undwarf.h so objtool and the unwinder can know the
+   stack state associated with the code.
 
-   If the instruction is actually needed, and it's actually in a
-   callable function, ensure that its function is properly annotated
-   with ENTRY/ENDPROC.
+   If you're 100% sure the code won't affect stack traces, or if you're
+   a just a bad person, you can tell objtool to ignore it.  See the
+   "Adding exceptions" section below.
 
    If it's not actually in a callable function (e.g. kernel entry code),
    change ENDPROC to END.
 
 
-4. asm_file.o: warning: objtool: func(): can't find starting instruction
+4. file.o: warning: objtool: func(): can't find starting instruction
    or
-   asm_file.o: warning: objtool: func()+0x11dd: can't decode instruction
+   file.o: warning: objtool: func()+0x11dd: can't decode instruction
 
-   Did you put data in a text section?  If so, that can confuse
+   Does the file have data in a text section?  If so, that can confuse
    objtool's instruction decoder.  Move the data to a more appropriate
    section like .data or .rodata.
 
 
-5. asm_file.o: warning: objtool: func()+0x6: kernel entry/exit from callable instruction
-
-   This is a kernel entry/exit instruction like sysenter or sysret.
-   Such instructions aren't allowed in a callable function, and are most
-   likely part of the kernel entry code.
+5. file.o: warning: objtool: func()+0x6: unsupported instruction in callable function
 
-   If the instruction isn't actually in a callable function, change
-   ENDPROC to END.
+   This is a kernel entry/exit instruction like sysenter or iret.  Such
+   instructions aren't allowed in a callable function, and are most
+   likely part of the kernel entry code.  They should usually not have
+   the callable function annotation (ENDPROC) and should always be
+   annotated with the CFI hint macros in asm/undwarf.h.
 
 
-6. asm_file.o: warning: objtool: func()+0x26: sibling call from callable instruction with changed frame pointer
+6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame
 
-   This is a dynamic jump or a jump to an undefined symbol.  Stacktool
+   This is a dynamic jump or a jump to an undefined symbol.  Objtool
    assumed it's a sibling call and detected that the frame pointer
    wasn't first restored to its original state.
 
@@ -282,24 +271,28 @@ they mean, and suggestions for how to fix them.
    destination code to the local file.
 
    If the instruction is not actually in a callable function (e.g.
-   kernel entry code), change ENDPROC to END.
+   kernel entry code), change ENDPROC to END and annotate manually with
+   the CFI hint macros in asm/undwarf.h.
 
 
-7. asm_file: warning: objtool: func()+0x5c: frame pointer state mismatch
+7. file: warning: objtool: func()+0x5c: stack state mismatch
 
    The instruction's frame pointer state is inconsistent, depending on
    which execution path was taken to reach the instruction.
 
-   Make sure the function pushes and sets up the frame pointer (for
-   x86_64, this means rbp) at the beginning of the function and pops it
-   at the end of the function.  Also make sure that no other code in the
-   function touches the frame pointer.
+   Make sure that, when CONFIG_FRAME_POINTER is enabled, the function
+   pushes and sets up the frame pointer (for x86_64, this means rbp) at
+   the beginning of the function and pops it at the end of the function.
+   Also make sure that no other code in the function touches the frame
+   pointer.
 
+   Another possibility is that the code has some asm or inline asm which
+   does some unusual things to the stack or the frame pointer.  In such
+   cases it's probably appropriate to use the CFI hint macros in
+   asm/undwarf.h.
 
-Errors in .c files
-------------------
 
-1. c_file.o: warning: objtool: funcA() falls through to next function funcB()
+8. file.o: warning: objtool: funcA() falls through to next function funcB()
 
    This means that funcA() doesn't end with a return instruction or an
    unconditional jump, and that objtool has determined that the function
@@ -318,22 +311,6 @@ Errors in .c files
       might be corrupt due to a gcc bug.  For more details, see:
       https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
 
-2. If you're getting any other objtool error in a compiled .c file, it
-   may be because the file uses an asm() statement which has a "call"
-   instruction.  An asm() statement with a call instruction must declare
-   the use of the stack pointer in its output operand.  For example, on
-   x86_64:
-
-     register void *__sp asm("rsp");
-     asm volatile("call func" : "+r" (__sp));
-
-   Otherwise the stack frame may not get created before the call.
-
-3. Another possible cause for errors in C code is if the Makefile removes
-   -fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
-
-Also see the above section for .S file errors for more information what
-the individual error messages mean.
 
 If the error doesn't seem to make sense, it could be a bug in objtool.
 Feel free to ask the objtool maintainer for help.
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 27e019c09bd2..0e2765e243c0 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -25,7 +25,7 @@ OBJTOOL_IN := $(OBJTOOL)-in.o
 all: $(OBJTOOL)
 
 INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi
-CFLAGS   += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
+CFLAGS   += -Wall -Werror $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -fomit-frame-pointer -O2 -g $(INCLUDES)
 LDFLAGS  += -lelf $(LIBSUBCMD)
 
 # Allow old libelf to be used:
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index a59e061c0b4a..21aeca874edb 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -19,25 +19,63 @@
 #define _ARCH_H
 
 #include <stdbool.h>
+#include <linux/list.h>
 #include "elf.h"
+#include "cfi.h"
 
-#define INSN_FP_SAVE		1
-#define INSN_FP_SETUP		2
-#define INSN_FP_RESTORE		3
-#define INSN_JUMP_CONDITIONAL	4
-#define INSN_JUMP_UNCONDITIONAL	5
-#define INSN_JUMP_DYNAMIC	6
-#define INSN_CALL		7
-#define INSN_CALL_DYNAMIC	8
-#define INSN_RETURN		9
-#define INSN_CONTEXT_SWITCH	10
-#define INSN_NOP		11
-#define INSN_OTHER		12
+#define INSN_JUMP_CONDITIONAL	1
+#define INSN_JUMP_UNCONDITIONAL	2
+#define INSN_JUMP_DYNAMIC	3
+#define INSN_CALL		4
+#define INSN_CALL_DYNAMIC	5
+#define INSN_RETURN		6
+#define INSN_CONTEXT_SWITCH	7
+#define INSN_STACK		8
+#define INSN_NOP		9
+#define INSN_OTHER		10
 #define INSN_LAST		INSN_OTHER
 
+enum op_dest_type {
+	OP_DEST_REG,
+	OP_DEST_REG_INDIRECT,
+	OP_DEST_MEM,
+	OP_DEST_PUSH,
+	OP_DEST_LEAVE,
+};
+
+struct op_dest {
+	enum op_dest_type type;
+	unsigned char reg;
+	int offset;
+};
+
+enum op_src_type {
+	OP_SRC_REG,
+	OP_SRC_REG_INDIRECT,
+	OP_SRC_CONST,
+	OP_SRC_POP,
+	OP_SRC_ADD,
+	OP_SRC_AND,
+};
+
+struct op_src {
+	enum op_src_type type;
+	unsigned char reg;
+	int offset;
+};
+
+struct stack_op {
+	struct op_dest dest;
+	struct op_src src;
+};
+
+void arch_initial_func_cfi_state(struct cfi_state *state);
+
 int arch_decode_instruction(struct elf *elf, struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
 			    unsigned int *len, unsigned char *type,
-			    unsigned long *displacement);
+			    unsigned long *immediate, struct stack_op *op);
+
+bool arch_callee_saved_reg(unsigned char reg);
 
 #endif /* _ARCH_H */
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 6ac99e3266eb..a36c2eba64e7 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -27,6 +27,17 @@
 #include "../../arch.h"
 #include "../../warn.h"
 
+static unsigned char op_to_cfi_reg[][2] = {
+	{CFI_AX, CFI_R8},
+	{CFI_CX, CFI_R9},
+	{CFI_DX, CFI_R10},
+	{CFI_BX, CFI_R11},
+	{CFI_SP, CFI_R12},
+	{CFI_BP, CFI_R13},
+	{CFI_SI, CFI_R14},
+	{CFI_DI, CFI_R15},
+};
+
 static int is_x86_64(struct elf *elf)
 {
 	switch (elf->ehdr.e_machine) {
@@ -40,24 +51,50 @@ static int is_x86_64(struct elf *elf)
 	}
 }
 
+bool arch_callee_saved_reg(unsigned char reg)
+{
+	switch (reg) {
+	case CFI_BP:
+	case CFI_BX:
+	case CFI_R12:
+	case CFI_R13:
+	case CFI_R14:
+	case CFI_R15:
+		return true;
+
+	case CFI_AX:
+	case CFI_CX:
+	case CFI_DX:
+	case CFI_SI:
+	case CFI_DI:
+	case CFI_SP:
+	case CFI_R8:
+	case CFI_R9:
+	case CFI_R10:
+	case CFI_R11:
+	case CFI_RA:
+	default:
+		return false;
+	}
+}
+
 int arch_decode_instruction(struct elf *elf, struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
 			    unsigned int *len, unsigned char *type,
-			    unsigned long *immediate)
+			    unsigned long *immediate, struct stack_op *op)
 {
 	struct insn insn;
-	int x86_64;
-	unsigned char op1, op2, ext;
+	int x86_64, sign;
+	unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
+		      modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
+		      sib = 0;
 
 	x86_64 = is_x86_64(elf);
 	if (x86_64 == -1)
 		return -1;
 
-	insn_init(&insn, (void *)(sec->data + offset), maxlen, x86_64);
+	insn_init(&insn, sec->data->d_buf + offset, maxlen, x86_64);
 	insn_get_length(&insn);
-	insn_get_opcode(&insn);
-	insn_get_modrm(&insn);
-	insn_get_immediate(&insn);
 
 	if (!insn_complete(&insn)) {
 		WARN_FUNC("can't decode instruction", sec, offset);
@@ -73,67 +110,323 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 	op1 = insn.opcode.bytes[0];
 	op2 = insn.opcode.bytes[1];
 
+	if (insn.rex_prefix.nbytes) {
+		rex = insn.rex_prefix.bytes[0];
+		rex_w = X86_REX_W(rex) >> 3;
+		rex_r = X86_REX_R(rex) >> 2;
+		rex_b = X86_REX_B(rex);
+	}
+
+	if (insn.modrm.nbytes) {
+		modrm = insn.modrm.bytes[0];
+		modrm_mod = X86_MODRM_MOD(modrm);
+		modrm_reg = X86_MODRM_REG(modrm);
+		modrm_rm = X86_MODRM_RM(modrm);
+	}
+
+	if (insn.sib.nbytes)
+		sib = insn.sib.bytes[0];
+
 	switch (op1) {
-	case 0x55:
-		if (!insn.rex_prefix.nbytes)
-			/* push rbp */
-			*type = INSN_FP_SAVE;
+
+	case 0x1:
+	case 0x29:
+		if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+
+			/* add/sub reg, %rsp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			op->dest.type = OP_SRC_REG;
+			op->dest.reg = CFI_SP;
+		}
+		break;
+
+	case 0x50 ... 0x57:
+
+		/* push reg */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_REG;
+		op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+		op->dest.type = OP_DEST_PUSH;
+
 		break;
 
-	case 0x5d:
-		if (!insn.rex_prefix.nbytes)
-			/* pop rbp */
-			*type = INSN_FP_RESTORE;
+	case 0x58 ... 0x5f:
+
+		/* pop reg */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_POP;
+		op->dest.type = OP_DEST_REG;
+		op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+
+		break;
+
+	case 0x68:
+	case 0x6a:
+		/* push immediate */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_CONST;
+		op->dest.type = OP_DEST_PUSH;
 		break;
 
 	case 0x70 ... 0x7f:
 		*type = INSN_JUMP_CONDITIONAL;
 		break;
 
+	case 0x81:
+	case 0x83:
+		if (rex != 0x48)
+			break;
+
+		if (modrm == 0xe4) {
+			/* and imm, %rsp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_AND;
+			op->src.reg = CFI_SP;
+			op->src.offset = insn.immediate.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+			break;
+		}
+
+		if (modrm == 0xc4)
+			sign = 1;
+		else if (modrm == 0xec)
+			sign = -1;
+		else
+			break;
+
+		/* add/sub imm, %rsp */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_ADD;
+		op->src.reg = CFI_SP;
+		op->src.offset = insn.immediate.value * sign;
+		op->dest.type = OP_DEST_REG;
+		op->dest.reg = CFI_SP;
+		break;
+
 	case 0x89:
-		if (insn.rex_prefix.nbytes == 1 &&
-		    insn.rex_prefix.bytes[0] == 0x48 &&
-		    insn.modrm.nbytes && insn.modrm.bytes[0] == 0xe5)
-			/* mov rsp, rbp */
-			*type = INSN_FP_SETUP;
+		if (rex == 0x48 && modrm == 0xe5) {
+
+			/* mov %rsp, %rbp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG;
+			op->src.reg = CFI_SP;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_BP;
+			break;
+		}
+		/* fallthrough */
+	case 0x88:
+		if (!rex_b &&
+		    (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
+
+			/* mov reg, disp(%rbp) */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG;
+			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			op->dest.type = OP_DEST_REG_INDIRECT;
+			op->dest.reg = CFI_BP;
+			op->dest.offset = insn.displacement.value;
+
+		} else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
+
+			/* mov reg, disp(%rsp) */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG;
+			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			op->dest.type = OP_DEST_REG_INDIRECT;
+			op->dest.reg = CFI_SP;
+			op->dest.offset = insn.displacement.value;
+		}
+
+		break;
+
+	case 0x8b:
+		if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
+
+			/* mov disp(%rbp), reg */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG_INDIRECT;
+			op->src.reg = CFI_BP;
+			op->src.offset = insn.displacement.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+
+		} else if (rex_w && !rex_b && sib == 0x24 &&
+			   modrm_mod != 3 && modrm_rm == 4) {
+
+			/* mov disp(%rsp), reg */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG_INDIRECT;
+			op->src.reg = CFI_SP;
+			op->src.offset = insn.displacement.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+		}
+
 		break;
 
 	case 0x8d:
-		if (insn.rex_prefix.nbytes &&
-		    insn.rex_prefix.bytes[0] == 0x48 &&
-		    insn.modrm.nbytes && insn.modrm.bytes[0] == 0x2c &&
-		    insn.sib.nbytes && insn.sib.bytes[0] == 0x24)
-			/* lea %(rsp), %rbp */
-			*type = INSN_FP_SETUP;
+		if (rex == 0x48 && modrm == 0x65) {
+
+			/* lea -disp(%rbp), %rsp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_BP;
+			op->src.offset = insn.displacement.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+			break;
+		}
+
+		if (rex == 0x4c && modrm == 0x54 && sib == 0x24 &&
+		    insn.displacement.value == 8) {
+
+			/*
+			 * lea 0x8(%rsp), %r10
+			 *
+			 * Here r10 is the "drap" pointer, used as a stack
+			 * pointer helper when the stack gets realigned.
+			 */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_SP;
+			op->src.offset = 8;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_R10;
+			break;
+		}
+
+		if (rex == 0x4c && modrm == 0x6c && sib == 0x24 &&
+		    insn.displacement.value == 16) {
+
+			/*
+			 * lea 0x10(%rsp), %r13
+			 *
+			 * Here r13 is the "drap" pointer, used as a stack
+			 * pointer helper when the stack gets realigned.
+			 */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_SP;
+			op->src.offset = 16;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_R13;
+			break;
+		}
+
+		if (rex == 0x49 && modrm == 0x62 &&
+		    insn.displacement.value == -8) {
+
+			/*
+			 * lea -0x8(%r10), %rsp
+			 *
+			 * Restoring rsp back to its original value after a
+			 * stack realignment.
+			 */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_R10;
+			op->src.offset = -8;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+			break;
+		}
+
+		if (rex == 0x49 && modrm == 0x65 &&
+		    insn.displacement.value == -16) {
+
+			/*
+			 * lea -0x10(%r13), %rsp
+			 *
+			 * Restoring rsp back to its original value after a
+			 * stack realignment.
+			 */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_R13;
+			op->src.offset = -16;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+			break;
+		}
+
+		break;
+
+	case 0x8f:
+		/* pop to mem */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_POP;
+		op->dest.type = OP_DEST_MEM;
 		break;
 
 	case 0x90:
 		*type = INSN_NOP;
 		break;
 
+	case 0x9c:
+		/* pushf */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_CONST;
+		op->dest.type = OP_DEST_PUSH;
+		break;
+
+	case 0x9d:
+		/* popf */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_POP;
+		op->dest.type = OP_DEST_MEM;
+		break;
+
 	case 0x0f:
+
 		if (op2 >= 0x80 && op2 <= 0x8f)
 			*type = INSN_JUMP_CONDITIONAL;
 		else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
 			 op2 == 0x35)
+
 			/* sysenter, sysret */
 			*type = INSN_CONTEXT_SWITCH;
+
 		else if (op2 == 0x0d || op2 == 0x1f)
+
 			/* nopl/nopw */
 			*type = INSN_NOP;
-		else if (op2 == 0x01 && insn.modrm.nbytes &&
-			 (insn.modrm.bytes[0] == 0xc2 ||
-			  insn.modrm.bytes[0] == 0xd8))
-			/* vmlaunch, vmrun */
-			*type = INSN_CONTEXT_SWITCH;
+
+		else if (op2 == 0xa0 || op2 == 0xa8) {
+
+			/* push fs/gs */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_CONST;
+			op->dest.type = OP_DEST_PUSH;
+
+		} else if (op2 == 0xa1 || op2 == 0xa9) {
+
+			/* pop fs/gs */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_POP;
+			op->dest.type = OP_DEST_MEM;
+		}
 
 		break;
 
-	case 0xc9: /* leave */
-		*type = INSN_FP_RESTORE;
+	case 0xc9:
+		/*
+		 * leave
+		 *
+		 * equivalent to:
+		 * mov bp, sp
+		 * pop bp
+		 */
+		*type = INSN_STACK;
+		op->dest.type = OP_DEST_LEAVE;
+
 		break;
 
-	case 0xe3: /* jecxz/jrcxz */
+	case 0xe3:
+		/* jecxz/jrcxz */
 		*type = INSN_JUMP_CONDITIONAL;
 		break;
 
@@ -158,14 +451,27 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		break;
 
 	case 0xff:
-		ext = X86_MODRM_REG(insn.modrm.bytes[0]);
-		if (ext == 2 || ext == 3)
+		if (modrm_reg == 2 || modrm_reg == 3)
+
 			*type = INSN_CALL_DYNAMIC;
-		else if (ext == 4)
+
+		else if (modrm_reg == 4)
+
 			*type = INSN_JUMP_DYNAMIC;
-		else if (ext == 5) /*jmpf */
+
+		else if (modrm_reg == 5)
+
+			/* jmpf */
 			*type = INSN_CONTEXT_SWITCH;
 
+		else if (modrm_reg == 6) {
+
+			/* push from mem */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_CONST;
+			op->dest.type = OP_DEST_PUSH;
+		}
+
 		break;
 
 	default:
@@ -176,3 +482,21 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 
 	return 0;
 }
+
+void arch_initial_func_cfi_state(struct cfi_state *state)
+{
+	int i;
+
+	for (i = 0; i < CFI_NUM_REGS; i++) {
+		state->regs[i].base = CFI_UNDEFINED;
+		state->regs[i].offset = 0;
+	}
+
+	/* initial CFA (call frame address) */
+	state->cfa.base = CFI_SP;
+	state->cfa.offset = 8;
+
+	/* initial RA (return address) */
+	state->regs[16].base = CFI_CFA;
+	state->regs[16].offset = -8;
+}
diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
index 767be7c76034..12e377184ee4 100644
--- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt
+++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
@@ -1009,7 +1009,7 @@ GrpTable: Grp15
 1: fxstor | RDGSBASE Ry (F3),(11B)
 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
-4: XSAVE
+4: XSAVE | ptwrite Ey (F3),(11B)
 5: XRSTOR | lfence (11B)
 6: XSAVEOPT | clwb (66) | mfence (11B)
 7: clflush | clflushopt (66) | sfence (11B)
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 282a60368b14..365c34ecab26 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -25,1286 +25,32 @@
  * For more information, see tools/objtool/Documentation/stack-validation.txt.
  */
 
-#include <string.h>
-#include <stdlib.h>
 #include <subcmd/parse-options.h>
-
 #include "builtin.h"
-#include "elf.h"
-#include "special.h"
-#include "arch.h"
-#include "warn.h"
-
-#include <linux/hashtable.h>
-#include <linux/kernel.h>
-
-#define STATE_FP_SAVED		0x1
-#define STATE_FP_SETUP		0x2
-#define STATE_FENTRY		0x4
-
-struct instruction {
-	struct list_head list;
-	struct hlist_node hash;
-	struct section *sec;
-	unsigned long offset;
-	unsigned int len, state;
-	unsigned char type;
-	unsigned long immediate;
-	bool alt_group, visited, dead_end;
-	struct symbol *call_dest;
-	struct instruction *jump_dest;
-	struct list_head alts;
-	struct symbol *func;
-};
-
-struct alternative {
-	struct list_head list;
-	struct instruction *insn;
-};
-
-struct objtool_file {
-	struct elf *elf;
-	struct list_head insn_list;
-	DECLARE_HASHTABLE(insn_hash, 16);
-	struct section *rodata, *whitelist;
-	bool ignore_unreachables, c_file;
-};
-
-const char *objname;
-static bool nofp;
-
-static struct instruction *find_insn(struct objtool_file *file,
-				     struct section *sec, unsigned long offset)
-{
-	struct instruction *insn;
-
-	hash_for_each_possible(file->insn_hash, insn, hash, offset)
-		if (insn->sec == sec && insn->offset == offset)
-			return insn;
-
-	return NULL;
-}
-
-static struct instruction *next_insn_same_sec(struct objtool_file *file,
-					      struct instruction *insn)
-{
-	struct instruction *next = list_next_entry(insn, list);
-
-	if (&next->list == &file->insn_list || next->sec != insn->sec)
-		return NULL;
-
-	return next;
-}
-
-static bool gcov_enabled(struct objtool_file *file)
-{
-	struct section *sec;
-	struct symbol *sym;
-
-	list_for_each_entry(sec, &file->elf->sections, list)
-		list_for_each_entry(sym, &sec->symbol_list, list)
-			if (!strncmp(sym->name, "__gcov_.", 8))
-				return true;
-
-	return false;
-}
-
-#define for_each_insn(file, insn)					\
-	list_for_each_entry(insn, &file->insn_list, list)
-
-#define func_for_each_insn(file, func, insn)				\
-	for (insn = find_insn(file, func->sec, func->offset);		\
-	     insn && &insn->list != &file->insn_list &&			\
-		insn->sec == func->sec &&				\
-		insn->offset < func->offset + func->len;		\
-	     insn = list_next_entry(insn, list))
-
-#define func_for_each_insn_continue_reverse(file, func, insn)		\
-	for (insn = list_prev_entry(insn, list);			\
-	     &insn->list != &file->insn_list &&				\
-		insn->sec == func->sec && insn->offset >= func->offset;	\
-	     insn = list_prev_entry(insn, list))
-
-#define sec_for_each_insn_from(file, insn)				\
-	for (; insn; insn = next_insn_same_sec(file, insn))
-
-
-/*
- * Check if the function has been manually whitelisted with the
- * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted
- * due to its use of a context switching instruction.
- */
-static bool ignore_func(struct objtool_file *file, struct symbol *func)
-{
-	struct rela *rela;
-	struct instruction *insn;
-
-	/* check for STACK_FRAME_NON_STANDARD */
-	if (file->whitelist && file->whitelist->rela)
-		list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) {
-			if (rela->sym->type == STT_SECTION &&
-			    rela->sym->sec == func->sec &&
-			    rela->addend == func->offset)
-				return true;
-			if (rela->sym->type == STT_FUNC && rela->sym == func)
-				return true;
-		}
-
-	/* check if it has a context switching instruction */
-	func_for_each_insn(file, func, insn)
-		if (insn->type == INSN_CONTEXT_SWITCH)
-			return true;
-
-	return false;
-}
-
-/*
- * This checks to see if the given function is a "noreturn" function.
- *
- * For global functions which are outside the scope of this object file, we
- * have to keep a manual list of them.
- *
- * For local functions, we have to detect them manually by simply looking for
- * the lack of a return instruction.
- *
- * Returns:
- *  -1: error
- *   0: no dead end
- *   1: dead end
- */
-static int __dead_end_function(struct objtool_file *file, struct symbol *func,
-			       int recursion)
-{
-	int i;
-	struct instruction *insn;
-	bool empty = true;
-
-	/*
-	 * Unfortunately these have to be hard coded because the noreturn
-	 * attribute isn't provided in ELF data.
-	 */
-	static const char * const global_noreturns[] = {
-		"__stack_chk_fail",
-		"panic",
-		"do_exit",
-		"do_task_dead",
-		"__module_put_and_exit",
-		"complete_and_exit",
-		"kvm_spurious_fault",
-		"__reiserfs_panic",
-		"lbug_with_loc"
-	};
-
-	if (func->bind == STB_WEAK)
-		return 0;
-
-	if (func->bind == STB_GLOBAL)
-		for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
-			if (!strcmp(func->name, global_noreturns[i]))
-				return 1;
-
-	if (!func->sec)
-		return 0;
-
-	func_for_each_insn(file, func, insn) {
-		empty = false;
-
-		if (insn->type == INSN_RETURN)
-			return 0;
-	}
-
-	if (empty)
-		return 0;
-
-	/*
-	 * A function can have a sibling call instead of a return.  In that
-	 * case, the function's dead-end status depends on whether the target
-	 * of the sibling call returns.
-	 */
-	func_for_each_insn(file, func, insn) {
-		if (insn->sec != func->sec ||
-		    insn->offset >= func->offset + func->len)
-			break;
-
-		if (insn->type == INSN_JUMP_UNCONDITIONAL) {
-			struct instruction *dest = insn->jump_dest;
-			struct symbol *dest_func;
-
-			if (!dest)
-				/* sibling call to another file */
-				return 0;
-
-			if (dest->sec != func->sec ||
-			    dest->offset < func->offset ||
-			    dest->offset >= func->offset + func->len) {
-				/* local sibling call */
-				dest_func = find_symbol_by_offset(dest->sec,
-								  dest->offset);
-				if (!dest_func)
-					continue;
-
-				if (recursion == 5) {
-					WARN_FUNC("infinite recursion (objtool bug!)",
-						  dest->sec, dest->offset);
-					return -1;
-				}
-
-				return __dead_end_function(file, dest_func,
-							   recursion + 1);
-			}
-		}
-
-		if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts))
-			/* sibling call */
-			return 0;
-	}
-
-	return 1;
-}
-
-static int dead_end_function(struct objtool_file *file, struct symbol *func)
-{
-	return __dead_end_function(file, func, 0);
-}
-
-/*
- * Call the arch-specific instruction decoder for all the instructions and add
- * them to the global instruction list.
- */
-static int decode_instructions(struct objtool_file *file)
-{
-	struct section *sec;
-	struct symbol *func;
-	unsigned long offset;
-	struct instruction *insn;
-	int ret;
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-
-		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
-			continue;
-
-		for (offset = 0; offset < sec->len; offset += insn->len) {
-			insn = malloc(sizeof(*insn));
-			memset(insn, 0, sizeof(*insn));
-
-			INIT_LIST_HEAD(&insn->alts);
-			insn->sec = sec;
-			insn->offset = offset;
-
-			ret = arch_decode_instruction(file->elf, sec, offset,
-						      sec->len - offset,
-						      &insn->len, &insn->type,
-						      &insn->immediate);
-			if (ret)
-				return ret;
-
-			if (!insn->type || insn->type > INSN_LAST) {
-				WARN_FUNC("invalid instruction type %d",
-					  insn->sec, insn->offset, insn->type);
-				return -1;
-			}
-
-			hash_add(file->insn_hash, &insn->hash, insn->offset);
-			list_add_tail(&insn->list, &file->insn_list);
-		}
-
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			if (!find_insn(file, sec, func->offset)) {
-				WARN("%s(): can't find starting instruction",
-				     func->name);
-				return -1;
-			}
-
-			func_for_each_insn(file, func, insn)
-				if (!insn->func)
-					insn->func = func;
-		}
-	}
-
-	return 0;
-}
-
-/*
- * Find all uses of the unreachable() macro, which are code path dead ends.
- */
-static int add_dead_ends(struct objtool_file *file)
-{
-	struct section *sec;
-	struct rela *rela;
-	struct instruction *insn;
-	bool found;
-
-	sec = find_section_by_name(file->elf, ".rela.discard.unreachable");
-	if (!sec)
-		return 0;
-
-	list_for_each_entry(rela, &sec->rela_list, list) {
-		if (rela->sym->type != STT_SECTION) {
-			WARN("unexpected relocation symbol type in %s", sec->name);
-			return -1;
-		}
-		insn = find_insn(file, rela->sym->sec, rela->addend);
-		if (insn)
-			insn = list_prev_entry(insn, list);
-		else if (rela->addend == rela->sym->sec->len) {
-			found = false;
-			list_for_each_entry_reverse(insn, &file->insn_list, list) {
-				if (insn->sec == rela->sym->sec) {
-					found = true;
-					break;
-				}
-			}
-
-			if (!found) {
-				WARN("can't find unreachable insn at %s+0x%x",
-				     rela->sym->sec->name, rela->addend);
-				return -1;
-			}
-		} else {
-			WARN("can't find unreachable insn at %s+0x%x",
-			     rela->sym->sec->name, rela->addend);
-			return -1;
-		}
-
-		insn->dead_end = true;
-	}
-
-	return 0;
-}
-
-/*
- * Warnings shouldn't be reported for ignored functions.
- */
-static void add_ignores(struct objtool_file *file)
-{
-	struct instruction *insn;
-	struct section *sec;
-	struct symbol *func;
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			if (!ignore_func(file, func))
-				continue;
-
-			func_for_each_insn(file, func, insn)
-				insn->visited = true;
-		}
-	}
-}
-
-/*
- * Find the destination instructions for all jumps.
- */
-static int add_jump_destinations(struct objtool_file *file)
-{
-	struct instruction *insn;
-	struct rela *rela;
-	struct section *dest_sec;
-	unsigned long dest_off;
-
-	for_each_insn(file, insn) {
-		if (insn->type != INSN_JUMP_CONDITIONAL &&
-		    insn->type != INSN_JUMP_UNCONDITIONAL)
-			continue;
-
-		/* skip ignores */
-		if (insn->visited)
-			continue;
-
-		rela = find_rela_by_dest_range(insn->sec, insn->offset,
-					       insn->len);
-		if (!rela) {
-			dest_sec = insn->sec;
-			dest_off = insn->offset + insn->len + insn->immediate;
-		} else if (rela->sym->type == STT_SECTION) {
-			dest_sec = rela->sym->sec;
-			dest_off = rela->addend + 4;
-		} else if (rela->sym->sec->idx) {
-			dest_sec = rela->sym->sec;
-			dest_off = rela->sym->sym.st_value + rela->addend + 4;
-		} else {
-			/* sibling call */
-			insn->jump_dest = 0;
-			continue;
-		}
-
-		insn->jump_dest = find_insn(file, dest_sec, dest_off);
-		if (!insn->jump_dest) {
-
-			/*
-			 * This is a special case where an alt instruction
-			 * jumps past the end of the section.  These are
-			 * handled later in handle_group_alt().
-			 */
-			if (!strcmp(insn->sec->name, ".altinstr_replacement"))
-				continue;
-
-			WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
-				  insn->sec, insn->offset, dest_sec->name,
-				  dest_off);
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
-/*
- * Find the destination instructions for all calls.
- */
-static int add_call_destinations(struct objtool_file *file)
-{
-	struct instruction *insn;
-	unsigned long dest_off;
-	struct rela *rela;
-
-	for_each_insn(file, insn) {
-		if (insn->type != INSN_CALL)
-			continue;
-
-		rela = find_rela_by_dest_range(insn->sec, insn->offset,
-					       insn->len);
-		if (!rela) {
-			dest_off = insn->offset + insn->len + insn->immediate;
-			insn->call_dest = find_symbol_by_offset(insn->sec,
-								dest_off);
-			if (!insn->call_dest) {
-				WARN_FUNC("can't find call dest symbol at offset 0x%lx",
-					  insn->sec, insn->offset, dest_off);
-				return -1;
-			}
-		} else if (rela->sym->type == STT_SECTION) {
-			insn->call_dest = find_symbol_by_offset(rela->sym->sec,
-								rela->addend+4);
-			if (!insn->call_dest ||
-			    insn->call_dest->type != STT_FUNC) {
-				WARN_FUNC("can't find call dest symbol at %s+0x%x",
-					  insn->sec, insn->offset,
-					  rela->sym->sec->name,
-					  rela->addend + 4);
-				return -1;
-			}
-		} else
-			insn->call_dest = rela->sym;
-	}
-
-	return 0;
-}
-
-/*
- * The .alternatives section requires some extra special care, over and above
- * what other special sections require:
- *
- * 1. Because alternatives are patched in-place, we need to insert a fake jump
- *    instruction at the end so that validate_branch() skips all the original
- *    replaced instructions when validating the new instruction path.
- *
- * 2. An added wrinkle is that the new instruction length might be zero.  In
- *    that case the old instructions are replaced with noops.  We simulate that
- *    by creating a fake jump as the only new instruction.
- *
- * 3. In some cases, the alternative section includes an instruction which
- *    conditionally jumps to the _end_ of the entry.  We have to modify these
- *    jumps' destinations to point back to .text rather than the end of the
- *    entry in .altinstr_replacement.
- *
- * 4. It has been requested that we don't validate the !POPCNT feature path
- *    which is a "very very small percentage of machines".
- */
-static int handle_group_alt(struct objtool_file *file,
-			    struct special_alt *special_alt,
-			    struct instruction *orig_insn,
-			    struct instruction **new_insn)
-{
-	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
-	unsigned long dest_off;
-
-	last_orig_insn = NULL;
-	insn = orig_insn;
-	sec_for_each_insn_from(file, insn) {
-		if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
-			break;
-
-		if (special_alt->skip_orig)
-			insn->type = INSN_NOP;
-
-		insn->alt_group = true;
-		last_orig_insn = insn;
-	}
-
-	if (!next_insn_same_sec(file, last_orig_insn)) {
-		WARN("%s: don't know how to handle alternatives at end of section",
-		     special_alt->orig_sec->name);
-		return -1;
-	}
-
-	fake_jump = malloc(sizeof(*fake_jump));
-	if (!fake_jump) {
-		WARN("malloc failed");
-		return -1;
-	}
-	memset(fake_jump, 0, sizeof(*fake_jump));
-	INIT_LIST_HEAD(&fake_jump->alts);
-	fake_jump->sec = special_alt->new_sec;
-	fake_jump->offset = -1;
-	fake_jump->type = INSN_JUMP_UNCONDITIONAL;
-	fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
-
-	if (!special_alt->new_len) {
-		*new_insn = fake_jump;
-		return 0;
-	}
-
-	last_new_insn = NULL;
-	insn = *new_insn;
-	sec_for_each_insn_from(file, insn) {
-		if (insn->offset >= special_alt->new_off + special_alt->new_len)
-			break;
-
-		last_new_insn = insn;
-
-		if (insn->type != INSN_JUMP_CONDITIONAL &&
-		    insn->type != INSN_JUMP_UNCONDITIONAL)
-			continue;
-
-		if (!insn->immediate)
-			continue;
-
-		dest_off = insn->offset + insn->len + insn->immediate;
-		if (dest_off == special_alt->new_off + special_alt->new_len)
-			insn->jump_dest = fake_jump;
-
-		if (!insn->jump_dest) {
-			WARN_FUNC("can't find alternative jump destination",
-				  insn->sec, insn->offset);
-			return -1;
-		}
-	}
-
-	if (!last_new_insn) {
-		WARN_FUNC("can't find last new alternative instruction",
-			  special_alt->new_sec, special_alt->new_off);
-		return -1;
-	}
-
-	list_add(&fake_jump->list, &last_new_insn->list);
-
-	return 0;
-}
-
-/*
- * A jump table entry can either convert a nop to a jump or a jump to a nop.
- * If the original instruction is a jump, make the alt entry an effective nop
- * by just skipping the original instruction.
- */
-static int handle_jump_alt(struct objtool_file *file,
-			   struct special_alt *special_alt,
-			   struct instruction *orig_insn,
-			   struct instruction **new_insn)
-{
-	if (orig_insn->type == INSN_NOP)
-		return 0;
-
-	if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
-		WARN_FUNC("unsupported instruction at jump label",
-			  orig_insn->sec, orig_insn->offset);
-		return -1;
-	}
-
-	*new_insn = list_next_entry(orig_insn, list);
-	return 0;
-}
-
-/*
- * Read all the special sections which have alternate instructions which can be
- * patched in or redirected to at runtime.  Each instruction having alternate
- * instruction(s) has them added to its insn->alts list, which will be
- * traversed in validate_branch().
- */
-static int add_special_section_alts(struct objtool_file *file)
-{
-	struct list_head special_alts;
-	struct instruction *orig_insn, *new_insn;
-	struct special_alt *special_alt, *tmp;
-	struct alternative *alt;
-	int ret;
-
-	ret = special_get_alts(file->elf, &special_alts);
-	if (ret)
-		return ret;
-
-	list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
-		alt = malloc(sizeof(*alt));
-		if (!alt) {
-			WARN("malloc failed");
-			ret = -1;
-			goto out;
-		}
-
-		orig_insn = find_insn(file, special_alt->orig_sec,
-				      special_alt->orig_off);
-		if (!orig_insn) {
-			WARN_FUNC("special: can't find orig instruction",
-				  special_alt->orig_sec, special_alt->orig_off);
-			ret = -1;
-			goto out;
-		}
+#include "check.h"
 
-		new_insn = NULL;
-		if (!special_alt->group || special_alt->new_len) {
-			new_insn = find_insn(file, special_alt->new_sec,
-					     special_alt->new_off);
-			if (!new_insn) {
-				WARN_FUNC("special: can't find new instruction",
-					  special_alt->new_sec,
-					  special_alt->new_off);
-				ret = -1;
-				goto out;
-			}
-		}
+bool nofp;
 
-		if (special_alt->group) {
-			ret = handle_group_alt(file, special_alt, orig_insn,
-					       &new_insn);
-			if (ret)
-				goto out;
-		} else if (special_alt->jump_or_nop) {
-			ret = handle_jump_alt(file, special_alt, orig_insn,
-					      &new_insn);
-			if (ret)
-				goto out;
-		}
-
-		alt->insn = new_insn;
-		list_add_tail(&alt->list, &orig_insn->alts);
-
-		list_del(&special_alt->list);
-		free(special_alt);
-	}
-
-out:
-	return ret;
-}
-
-static int add_switch_table(struct objtool_file *file, struct symbol *func,
-			    struct instruction *insn, struct rela *table,
-			    struct rela *next_table)
-{
-	struct rela *rela = table;
-	struct instruction *alt_insn;
-	struct alternative *alt;
-
-	list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
-		if (rela == next_table)
-			break;
-
-		if (rela->sym->sec != insn->sec ||
-		    rela->addend <= func->offset ||
-		    rela->addend >= func->offset + func->len)
-			break;
-
-		alt_insn = find_insn(file, insn->sec, rela->addend);
-		if (!alt_insn) {
-			WARN("%s: can't find instruction at %s+0x%x",
-			     file->rodata->rela->name, insn->sec->name,
-			     rela->addend);
-			return -1;
-		}
-
-		alt = malloc(sizeof(*alt));
-		if (!alt) {
-			WARN("malloc failed");
-			return -1;
-		}
-
-		alt->insn = alt_insn;
-		list_add_tail(&alt->list, &insn->alts);
-	}
-
-	return 0;
-}
-
-/*
- * find_switch_table() - Given a dynamic jump, find the switch jump table in
- * .rodata associated with it.
- *
- * There are 3 basic patterns:
- *
- * 1. jmpq *[rodata addr](,%reg,8)
- *
- *    This is the most common case by far.  It jumps to an address in a simple
- *    jump table which is stored in .rodata.
- *
- * 2. jmpq *[rodata addr](%rip)
- *
- *    This is caused by a rare GCC quirk, currently only seen in three driver
- *    functions in the kernel, only with certain obscure non-distro configs.
- *
- *    As part of an optimization, GCC makes a copy of an existing switch jump
- *    table, modifies it, and then hard-codes the jump (albeit with an indirect
- *    jump) to use a single entry in the table.  The rest of the jump table and
- *    some of its jump targets remain as dead code.
- *
- *    In such a case we can just crudely ignore all unreachable instruction
- *    warnings for the entire object file.  Ideally we would just ignore them
- *    for the function, but that would require redesigning the code quite a
- *    bit.  And honestly that's just not worth doing: unreachable instruction
- *    warnings are of questionable value anyway, and this is such a rare issue.
- *
- * 3. mov [rodata addr],%reg1
- *    ... some instructions ...
- *    jmpq *(%reg1,%reg2,8)
- *
- *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
- *    writing, there are 11 occurrences of it in the allmodconfig kernel.
- *
- *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
- *    ensure the same register is used in the mov and jump instructions.
- */
-static struct rela *find_switch_table(struct objtool_file *file,
-				      struct symbol *func,
-				      struct instruction *insn)
-{
-	struct rela *text_rela, *rodata_rela;
-	struct instruction *orig_insn = insn;
-
-	text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
-	if (text_rela && text_rela->sym == file->rodata->sym) {
-		/* case 1 */
-		rodata_rela = find_rela_by_dest(file->rodata,
-						text_rela->addend);
-		if (rodata_rela)
-			return rodata_rela;
-
-		/* case 2 */
-		rodata_rela = find_rela_by_dest(file->rodata,
-						text_rela->addend + 4);
-		if (!rodata_rela)
-			return NULL;
-		file->ignore_unreachables = true;
-		return rodata_rela;
-	}
-
-	/* case 3 */
-	func_for_each_insn_continue_reverse(file, func, insn) {
-		if (insn->type == INSN_JUMP_DYNAMIC)
-			break;
-
-		/* allow small jumps within the range */
-		if (insn->type == INSN_JUMP_UNCONDITIONAL &&
-		    insn->jump_dest &&
-		    (insn->jump_dest->offset <= insn->offset ||
-		     insn->jump_dest->offset > orig_insn->offset))
-		    break;
-
-		/* look for a relocation which references .rodata */
-		text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
-						    insn->len);
-		if (!text_rela || text_rela->sym != file->rodata->sym)
-			continue;
-
-		/*
-		 * Make sure the .rodata address isn't associated with a
-		 * symbol.  gcc jump tables are anonymous data.
-		 */
-		if (find_symbol_containing(file->rodata, text_rela->addend))
-			continue;
-
-		return find_rela_by_dest(file->rodata, text_rela->addend);
-	}
-
-	return NULL;
-}
-
-static int add_func_switch_tables(struct objtool_file *file,
-				  struct symbol *func)
-{
-	struct instruction *insn, *prev_jump = NULL;
-	struct rela *rela, *prev_rela = NULL;
-	int ret;
-
-	func_for_each_insn(file, func, insn) {
-		if (insn->type != INSN_JUMP_DYNAMIC)
-			continue;
-
-		rela = find_switch_table(file, func, insn);
-		if (!rela)
-			continue;
-
-		/*
-		 * We found a switch table, but we don't know yet how big it
-		 * is.  Don't add it until we reach the end of the function or
-		 * the beginning of another switch table in the same function.
-		 */
-		if (prev_jump) {
-			ret = add_switch_table(file, func, prev_jump, prev_rela,
-					       rela);
-			if (ret)
-				return ret;
-		}
-
-		prev_jump = insn;
-		prev_rela = rela;
-	}
-
-	if (prev_jump) {
-		ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-/*
- * For some switch statements, gcc generates a jump table in the .rodata
- * section which contains a list of addresses within the function to jump to.
- * This finds these jump tables and adds them to the insn->alts lists.
- */
-static int add_switch_table_alts(struct objtool_file *file)
-{
-	struct section *sec;
-	struct symbol *func;
-	int ret;
-
-	if (!file->rodata || !file->rodata->rela)
-		return 0;
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			ret = add_func_switch_tables(file, func);
-			if (ret)
-				return ret;
-		}
-	}
-
-	return 0;
-}
-
-static int decode_sections(struct objtool_file *file)
-{
-	int ret;
-
-	ret = decode_instructions(file);
-	if (ret)
-		return ret;
-
-	ret = add_dead_ends(file);
-	if (ret)
-		return ret;
-
-	add_ignores(file);
-
-	ret = add_jump_destinations(file);
-	if (ret)
-		return ret;
-
-	ret = add_call_destinations(file);
-	if (ret)
-		return ret;
-
-	ret = add_special_section_alts(file);
-	if (ret)
-		return ret;
-
-	ret = add_switch_table_alts(file);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static bool is_fentry_call(struct instruction *insn)
-{
-	if (insn->type == INSN_CALL &&
-	    insn->call_dest->type == STT_NOTYPE &&
-	    !strcmp(insn->call_dest->name, "__fentry__"))
-		return true;
-
-	return false;
-}
-
-static bool has_modified_stack_frame(struct instruction *insn)
-{
-	return (insn->state & STATE_FP_SAVED) ||
-	       (insn->state & STATE_FP_SETUP);
-}
-
-static bool has_valid_stack_frame(struct instruction *insn)
-{
-	return (insn->state & STATE_FP_SAVED) &&
-	       (insn->state & STATE_FP_SETUP);
-}
-
-static unsigned int frame_state(unsigned long state)
-{
-	return (state & (STATE_FP_SAVED | STATE_FP_SETUP));
-}
-
-/*
- * Follow the branch starting at the given instruction, and recursively follow
- * any other branches (jumps).  Meanwhile, track the frame pointer state at
- * each instruction and validate all the rules described in
- * tools/objtool/Documentation/stack-validation.txt.
- */
-static int validate_branch(struct objtool_file *file,
-			   struct instruction *first, unsigned char first_state)
-{
-	struct alternative *alt;
-	struct instruction *insn;
-	struct section *sec;
-	struct symbol *func = NULL;
-	unsigned char state;
-	int ret;
-
-	insn = first;
-	sec = insn->sec;
-	state = first_state;
-
-	if (insn->alt_group && list_empty(&insn->alts)) {
-		WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
-			  sec, insn->offset);
-		return 1;
-	}
-
-	while (1) {
-		if (file->c_file && insn->func) {
-			if (func && func != insn->func) {
-				WARN("%s() falls through to next function %s()",
-				     func->name, insn->func->name);
-				return 1;
-			}
-
-			func = insn->func;
-		}
-
-		if (insn->visited) {
-			if (frame_state(insn->state) != frame_state(state)) {
-				WARN_FUNC("frame pointer state mismatch",
-					  sec, insn->offset);
-				return 1;
-			}
-
-			return 0;
-		}
-
-		insn->visited = true;
-		insn->state = state;
-
-		list_for_each_entry(alt, &insn->alts, list) {
-			ret = validate_branch(file, alt->insn, state);
-			if (ret)
-				return 1;
-		}
-
-		switch (insn->type) {
-
-		case INSN_FP_SAVE:
-			if (!nofp) {
-				if (state & STATE_FP_SAVED) {
-					WARN_FUNC("duplicate frame pointer save",
-						  sec, insn->offset);
-					return 1;
-				}
-				state |= STATE_FP_SAVED;
-			}
-			break;
-
-		case INSN_FP_SETUP:
-			if (!nofp) {
-				if (state & STATE_FP_SETUP) {
-					WARN_FUNC("duplicate frame pointer setup",
-						  sec, insn->offset);
-					return 1;
-				}
-				state |= STATE_FP_SETUP;
-			}
-			break;
-
-		case INSN_FP_RESTORE:
-			if (!nofp) {
-				if (has_valid_stack_frame(insn))
-					state &= ~STATE_FP_SETUP;
-
-				state &= ~STATE_FP_SAVED;
-			}
-			break;
-
-		case INSN_RETURN:
-			if (!nofp && has_modified_stack_frame(insn)) {
-				WARN_FUNC("return without frame pointer restore",
-					  sec, insn->offset);
-				return 1;
-			}
-			return 0;
-
-		case INSN_CALL:
-			if (is_fentry_call(insn)) {
-				state |= STATE_FENTRY;
-				break;
-			}
-
-			ret = dead_end_function(file, insn->call_dest);
-			if (ret == 1)
-				return 0;
-			if (ret == -1)
-				return 1;
-
-			/* fallthrough */
-		case INSN_CALL_DYNAMIC:
-			if (!nofp && !has_valid_stack_frame(insn)) {
-				WARN_FUNC("call without frame pointer save/setup",
-					  sec, insn->offset);
-				return 1;
-			}
-			break;
-
-		case INSN_JUMP_CONDITIONAL:
-		case INSN_JUMP_UNCONDITIONAL:
-			if (insn->jump_dest) {
-				ret = validate_branch(file, insn->jump_dest,
-						      state);
-				if (ret)
-					return 1;
-			} else if (has_modified_stack_frame(insn)) {
-				WARN_FUNC("sibling call from callable instruction with changed frame pointer",
-					  sec, insn->offset);
-				return 1;
-			} /* else it's a sibling call */
-
-			if (insn->type == INSN_JUMP_UNCONDITIONAL)
-				return 0;
-
-			break;
-
-		case INSN_JUMP_DYNAMIC:
-			if (list_empty(&insn->alts) &&
-			    has_modified_stack_frame(insn)) {
-				WARN_FUNC("sibling call from callable instruction with changed frame pointer",
-					  sec, insn->offset);
-				return 1;
-			}
-
-			return 0;
-
-		default:
-			break;
-		}
-
-		if (insn->dead_end)
-			return 0;
-
-		insn = next_insn_same_sec(file, insn);
-		if (!insn) {
-			WARN("%s: unexpected end of section", sec->name);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-static bool is_kasan_insn(struct instruction *insn)
-{
-	return (insn->type == INSN_CALL &&
-		!strcmp(insn->call_dest->name, "__asan_handle_no_return"));
-}
-
-static bool is_ubsan_insn(struct instruction *insn)
-{
-	return (insn->type == INSN_CALL &&
-		!strcmp(insn->call_dest->name,
-			"__ubsan_handle_builtin_unreachable"));
-}
-
-static bool ignore_unreachable_insn(struct symbol *func,
-				    struct instruction *insn)
-{
-	int i;
-
-	if (insn->type == INSN_NOP)
-		return true;
-
-	/*
-	 * Check if this (or a subsequent) instruction is related to
-	 * CONFIG_UBSAN or CONFIG_KASAN.
-	 *
-	 * End the search at 5 instructions to avoid going into the weeds.
-	 */
-	for (i = 0; i < 5; i++) {
-
-		if (is_kasan_insn(insn) || is_ubsan_insn(insn))
-			return true;
-
-		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
-			insn = insn->jump_dest;
-			continue;
-		}
-
-		if (insn->offset + insn->len >= func->offset + func->len)
-			break;
-		insn = list_next_entry(insn, list);
-	}
-
-	return false;
-}
-
-static int validate_functions(struct objtool_file *file)
-{
-	struct section *sec;
-	struct symbol *func;
-	struct instruction *insn;
-	int ret, warnings = 0;
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			insn = find_insn(file, sec, func->offset);
-			if (!insn)
-				continue;
-
-			ret = validate_branch(file, insn, 0);
-			warnings += ret;
-		}
-	}
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			func_for_each_insn(file, func, insn) {
-				if (insn->visited)
-					continue;
-
-				insn->visited = true;
-
-				if (file->ignore_unreachables || warnings ||
-				    ignore_unreachable_insn(func, insn))
-					continue;
-
-				/*
-				 * gcov produces a lot of unreachable
-				 * instructions.  If we get an unreachable
-				 * warning and the file has gcov enabled, just
-				 * ignore it, and all other such warnings for
-				 * the file.
-				 */
-				if (!file->ignore_unreachables &&
-				    gcov_enabled(file)) {
-					file->ignore_unreachables = true;
-					continue;
-				}
-
-				WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset);
-				warnings++;
-			}
-		}
-	}
-
-	return warnings;
-}
-
-static int validate_uncallable_instructions(struct objtool_file *file)
-{
-	struct instruction *insn;
-	int warnings = 0;
-
-	for_each_insn(file, insn) {
-		if (!insn->visited && insn->type == INSN_RETURN) {
-			WARN_FUNC("return instruction outside of a callable function",
-				  insn->sec, insn->offset);
-			warnings++;
-		}
-	}
-
-	return warnings;
-}
-
-static void cleanup(struct objtool_file *file)
-{
-	struct instruction *insn, *tmpinsn;
-	struct alternative *alt, *tmpalt;
-
-	list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) {
-		list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) {
-			list_del(&alt->list);
-			free(alt);
-		}
-		list_del(&insn->list);
-		hash_del(&insn->hash);
-		free(insn);
-	}
-	elf_close(file->elf);
-}
-
-const char * const check_usage[] = {
+static const char * const check_usage[] = {
 	"objtool check [<options>] file.o",
 	NULL,
 };
 
+const struct option check_options[] = {
+	OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"),
+	OPT_END(),
+};
+
 int cmd_check(int argc, const char **argv)
 {
-	struct objtool_file file;
-	int ret, warnings = 0;
-
-	const struct option options[] = {
-		OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"),
-		OPT_END(),
-	};
+	const char *objname;
 
-	argc = parse_options(argc, argv, options, check_usage, 0);
+	argc = parse_options(argc, argv, check_options, check_usage, 0);
 
 	if (argc != 1)
-		usage_with_options(check_usage, options);
+		usage_with_options(check_usage, check_options);
 
 	objname = argv[0];
 
-	file.elf = elf_open(objname);
-	if (!file.elf) {
-		fprintf(stderr, "error reading elf file %s\n", objname);
-		return 1;
-	}
-
-	INIT_LIST_HEAD(&file.insn_list);
-	hash_init(file.insn_hash);
-	file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
-	file.rodata = find_section_by_name(file.elf, ".rodata");
-	file.ignore_unreachables = false;
-	file.c_file = find_section_by_name(file.elf, ".comment");
-
-	ret = decode_sections(&file);
-	if (ret < 0)
-		goto out;
-	warnings += ret;
-
-	ret = validate_functions(&file);
-	if (ret < 0)
-		goto out;
-	warnings += ret;
-
-	ret = validate_uncallable_instructions(&file);
-	if (ret < 0)
-		goto out;
-	warnings += ret;
-
-out:
-	cleanup(&file);
-
-	/* ignore warnings for now until we get all the code cleaned up */
-	if (ret || warnings)
-		return 0;
-	return 0;
+	return check(objname, nofp);
 }
diff --git a/tools/objtool/cfi.h b/tools/objtool/cfi.h
new file mode 100644
index 000000000000..443ab2c69992
--- /dev/null
+++ b/tools/objtool/cfi.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _OBJTOOL_CFI_H
+#define _OBJTOOL_CFI_H
+
+#define CFI_UNDEFINED		-1
+#define CFI_CFA			-2
+#define CFI_SP_INDIRECT		-3
+#define CFI_BP_INDIRECT		-4
+
+#define CFI_AX			0
+#define CFI_DX			1
+#define CFI_CX			2
+#define CFI_BX			3
+#define CFI_SI			4
+#define CFI_DI			5
+#define CFI_BP			6
+#define CFI_SP			7
+#define CFI_R8			8
+#define CFI_R9			9
+#define CFI_R10			10
+#define CFI_R11			11
+#define CFI_R12			12
+#define CFI_R13			13
+#define CFI_R14			14
+#define CFI_R15			15
+#define CFI_RA			16
+#define CFI_NUM_REGS	17
+
+struct cfi_reg {
+	int base;
+	int offset;
+};
+
+struct cfi_state {
+	struct cfi_reg cfa;
+	struct cfi_reg regs[CFI_NUM_REGS];
+};
+
+#endif /* _OBJTOOL_CFI_H */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
new file mode 100644
index 000000000000..fea222192c57
--- /dev/null
+++ b/tools/objtool/check.c
@@ -0,0 +1,1655 @@
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "check.h"
+#include "elf.h"
+#include "special.h"
+#include "arch.h"
+#include "warn.h"
+
+#include <linux/hashtable.h>
+#include <linux/kernel.h>
+
+struct alternative {
+	struct list_head list;
+	struct instruction *insn;
+};
+
+const char *objname;
+static bool nofp;
+struct cfi_state initial_func_cfi;
+
+static struct instruction *find_insn(struct objtool_file *file,
+				     struct section *sec, unsigned long offset)
+{
+	struct instruction *insn;
+
+	hash_for_each_possible(file->insn_hash, insn, hash, offset)
+		if (insn->sec == sec && insn->offset == offset)
+			return insn;
+
+	return NULL;
+}
+
+static struct instruction *next_insn_same_sec(struct objtool_file *file,
+					      struct instruction *insn)
+{
+	struct instruction *next = list_next_entry(insn, list);
+
+	if (!next || &next->list == &file->insn_list || next->sec != insn->sec)
+		return NULL;
+
+	return next;
+}
+
+static bool gcov_enabled(struct objtool_file *file)
+{
+	struct section *sec;
+	struct symbol *sym;
+
+	for_each_sec(file, sec)
+		list_for_each_entry(sym, &sec->symbol_list, list)
+			if (!strncmp(sym->name, "__gcov_.", 8))
+				return true;
+
+	return false;
+}
+
+#define func_for_each_insn(file, func, insn)				\
+	for (insn = find_insn(file, func->sec, func->offset);		\
+	     insn && &insn->list != &file->insn_list &&			\
+		insn->sec == func->sec &&				\
+		insn->offset < func->offset + func->len;		\
+	     insn = list_next_entry(insn, list))
+
+#define func_for_each_insn_continue_reverse(file, func, insn)		\
+	for (insn = list_prev_entry(insn, list);			\
+	     &insn->list != &file->insn_list &&				\
+		insn->sec == func->sec && insn->offset >= func->offset;	\
+	     insn = list_prev_entry(insn, list))
+
+#define sec_for_each_insn_from(file, insn)				\
+	for (; insn; insn = next_insn_same_sec(file, insn))
+
+#define sec_for_each_insn_continue(file, insn)				\
+	for (insn = next_insn_same_sec(file, insn); insn;		\
+	     insn = next_insn_same_sec(file, insn))
+
+/*
+ * Check if the function has been manually whitelisted with the
+ * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted
+ * due to its use of a context switching instruction.
+ */
+static bool ignore_func(struct objtool_file *file, struct symbol *func)
+{
+	struct rela *rela;
+	struct instruction *insn;
+
+	/* check for STACK_FRAME_NON_STANDARD */
+	if (file->whitelist && file->whitelist->rela)
+		list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) {
+			if (rela->sym->type == STT_SECTION &&
+			    rela->sym->sec == func->sec &&
+			    rela->addend == func->offset)
+				return true;
+			if (rela->sym->type == STT_FUNC && rela->sym == func)
+				return true;
+		}
+
+	/* check if it has a context switching instruction */
+	func_for_each_insn(file, func, insn)
+		if (insn->type == INSN_CONTEXT_SWITCH)
+			return true;
+
+	return false;
+}
+
+/*
+ * This checks to see if the given function is a "noreturn" function.
+ *
+ * For global functions which are outside the scope of this object file, we
+ * have to keep a manual list of them.
+ *
+ * For local functions, we have to detect them manually by simply looking for
+ * the lack of a return instruction.
+ *
+ * Returns:
+ *  -1: error
+ *   0: no dead end
+ *   1: dead end
+ */
+static int __dead_end_function(struct objtool_file *file, struct symbol *func,
+			       int recursion)
+{
+	int i;
+	struct instruction *insn;
+	bool empty = true;
+
+	/*
+	 * Unfortunately these have to be hard coded because the noreturn
+	 * attribute isn't provided in ELF data.
+	 */
+	static const char * const global_noreturns[] = {
+		"__stack_chk_fail",
+		"panic",
+		"do_exit",
+		"do_task_dead",
+		"__module_put_and_exit",
+		"complete_and_exit",
+		"kvm_spurious_fault",
+		"__reiserfs_panic",
+		"lbug_with_loc",
+		"fortify_panic",
+	};
+
+	if (func->bind == STB_WEAK)
+		return 0;
+
+	if (func->bind == STB_GLOBAL)
+		for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
+			if (!strcmp(func->name, global_noreturns[i]))
+				return 1;
+
+	if (!func->sec)
+		return 0;
+
+	func_for_each_insn(file, func, insn) {
+		empty = false;
+
+		if (insn->type == INSN_RETURN)
+			return 0;
+	}
+
+	if (empty)
+		return 0;
+
+	/*
+	 * A function can have a sibling call instead of a return.  In that
+	 * case, the function's dead-end status depends on whether the target
+	 * of the sibling call returns.
+	 */
+	func_for_each_insn(file, func, insn) {
+		if (insn->sec != func->sec ||
+		    insn->offset >= func->offset + func->len)
+			break;
+
+		if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+			struct instruction *dest = insn->jump_dest;
+			struct symbol *dest_func;
+
+			if (!dest)
+				/* sibling call to another file */
+				return 0;
+
+			if (dest->sec != func->sec ||
+			    dest->offset < func->offset ||
+			    dest->offset >= func->offset + func->len) {
+				/* local sibling call */
+				dest_func = find_symbol_by_offset(dest->sec,
+								  dest->offset);
+				if (!dest_func)
+					continue;
+
+				if (recursion == 5) {
+					WARN_FUNC("infinite recursion (objtool bug!)",
+						  dest->sec, dest->offset);
+					return -1;
+				}
+
+				return __dead_end_function(file, dest_func,
+							   recursion + 1);
+			}
+		}
+
+		if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts))
+			/* sibling call */
+			return 0;
+	}
+
+	return 1;
+}
+
+static int dead_end_function(struct objtool_file *file, struct symbol *func)
+{
+	return __dead_end_function(file, func, 0);
+}
+
+static void clear_insn_state(struct insn_state *state)
+{
+	int i;
+
+	memset(state, 0, sizeof(*state));
+	state->cfa.base = CFI_UNDEFINED;
+	for (i = 0; i < CFI_NUM_REGS; i++)
+		state->regs[i].base = CFI_UNDEFINED;
+	state->drap_reg = CFI_UNDEFINED;
+}
+
+/*
+ * Call the arch-specific instruction decoder for all the instructions and add
+ * them to the global instruction list.
+ */
+static int decode_instructions(struct objtool_file *file)
+{
+	struct section *sec;
+	struct symbol *func;
+	unsigned long offset;
+	struct instruction *insn;
+	int ret;
+
+	for_each_sec(file, sec) {
+
+		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+			continue;
+
+		for (offset = 0; offset < sec->len; offset += insn->len) {
+			insn = malloc(sizeof(*insn));
+			if (!insn) {
+				WARN("malloc failed");
+				return -1;
+			}
+			memset(insn, 0, sizeof(*insn));
+			INIT_LIST_HEAD(&insn->alts);
+			clear_insn_state(&insn->state);
+
+			insn->sec = sec;
+			insn->offset = offset;
+
+			ret = arch_decode_instruction(file->elf, sec, offset,
+						      sec->len - offset,
+						      &insn->len, &insn->type,
+						      &insn->immediate,
+						      &insn->stack_op);
+			if (ret)
+				return ret;
+
+			if (!insn->type || insn->type > INSN_LAST) {
+				WARN_FUNC("invalid instruction type %d",
+					  insn->sec, insn->offset, insn->type);
+				return -1;
+			}
+
+			hash_add(file->insn_hash, &insn->hash, insn->offset);
+			list_add_tail(&insn->list, &file->insn_list);
+		}
+
+		list_for_each_entry(func, &sec->symbol_list, list) {
+			if (func->type != STT_FUNC)
+				continue;
+
+			if (!find_insn(file, sec, func->offset)) {
+				WARN("%s(): can't find starting instruction",
+				     func->name);
+				return -1;
+			}
+
+			func_for_each_insn(file, func, insn)
+				if (!insn->func)
+					insn->func = func;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Find all uses of the unreachable() macro, which are code path dead ends.
+ */
+static int add_dead_ends(struct objtool_file *file)
+{
+	struct section *sec;
+	struct rela *rela;
+	struct instruction *insn;
+	bool found;
+
+	sec = find_section_by_name(file->elf, ".rela.discard.unreachable");
+	if (!sec)
+		return 0;
+
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in %s", sec->name);
+			return -1;
+		}
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (insn)
+			insn = list_prev_entry(insn, list);
+		else if (rela->addend == rela->sym->sec->len) {
+			found = false;
+			list_for_each_entry_reverse(insn, &file->insn_list, list) {
+				if (insn->sec == rela->sym->sec) {
+					found = true;
+					break;
+				}
+			}
+
+			if (!found) {
+				WARN("can't find unreachable insn at %s+0x%x",
+				     rela->sym->sec->name, rela->addend);
+				return -1;
+			}
+		} else {
+			WARN("can't find unreachable insn at %s+0x%x",
+			     rela->sym->sec->name, rela->addend);
+			return -1;
+		}
+
+		insn->dead_end = true;
+	}
+
+	return 0;
+}
+
+/*
+ * Warnings shouldn't be reported for ignored functions.
+ */
+static void add_ignores(struct objtool_file *file)
+{
+	struct instruction *insn;
+	struct section *sec;
+	struct symbol *func;
+
+	for_each_sec(file, sec) {
+		list_for_each_entry(func, &sec->symbol_list, list) {
+			if (func->type != STT_FUNC)
+				continue;
+
+			if (!ignore_func(file, func))
+				continue;
+
+			func_for_each_insn(file, func, insn)
+				insn->ignore = true;
+		}
+	}
+}
+
+/*
+ * Find the destination instructions for all jumps.
+ */
+static int add_jump_destinations(struct objtool_file *file)
+{
+	struct instruction *insn;
+	struct rela *rela;
+	struct section *dest_sec;
+	unsigned long dest_off;
+
+	for_each_insn(file, insn) {
+		if (insn->type != INSN_JUMP_CONDITIONAL &&
+		    insn->type != INSN_JUMP_UNCONDITIONAL)
+			continue;
+
+		if (insn->ignore)
+			continue;
+
+		rela = find_rela_by_dest_range(insn->sec, insn->offset,
+					       insn->len);
+		if (!rela) {
+			dest_sec = insn->sec;
+			dest_off = insn->offset + insn->len + insn->immediate;
+		} else if (rela->sym->type == STT_SECTION) {
+			dest_sec = rela->sym->sec;
+			dest_off = rela->addend + 4;
+		} else if (rela->sym->sec->idx) {
+			dest_sec = rela->sym->sec;
+			dest_off = rela->sym->sym.st_value + rela->addend + 4;
+		} else {
+			/* sibling call */
+			insn->jump_dest = 0;
+			continue;
+		}
+
+		insn->jump_dest = find_insn(file, dest_sec, dest_off);
+		if (!insn->jump_dest) {
+
+			/*
+			 * This is a special case where an alt instruction
+			 * jumps past the end of the section.  These are
+			 * handled later in handle_group_alt().
+			 */
+			if (!strcmp(insn->sec->name, ".altinstr_replacement"))
+				continue;
+
+			WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
+				  insn->sec, insn->offset, dest_sec->name,
+				  dest_off);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Find the destination instructions for all calls.
+ */
+static int add_call_destinations(struct objtool_file *file)
+{
+	struct instruction *insn;
+	unsigned long dest_off;
+	struct rela *rela;
+
+	for_each_insn(file, insn) {
+		if (insn->type != INSN_CALL)
+			continue;
+
+		rela = find_rela_by_dest_range(insn->sec, insn->offset,
+					       insn->len);
+		if (!rela) {
+			dest_off = insn->offset + insn->len + insn->immediate;
+			insn->call_dest = find_symbol_by_offset(insn->sec,
+								dest_off);
+			if (!insn->call_dest) {
+				WARN_FUNC("can't find call dest symbol at offset 0x%lx",
+					  insn->sec, insn->offset, dest_off);
+				return -1;
+			}
+		} else if (rela->sym->type == STT_SECTION) {
+			insn->call_dest = find_symbol_by_offset(rela->sym->sec,
+								rela->addend+4);
+			if (!insn->call_dest ||
+			    insn->call_dest->type != STT_FUNC) {
+				WARN_FUNC("can't find call dest symbol at %s+0x%x",
+					  insn->sec, insn->offset,
+					  rela->sym->sec->name,
+					  rela->addend + 4);
+				return -1;
+			}
+		} else
+			insn->call_dest = rela->sym;
+	}
+
+	return 0;
+}
+
+/*
+ * The .alternatives section requires some extra special care, over and above
+ * what other special sections require:
+ *
+ * 1. Because alternatives are patched in-place, we need to insert a fake jump
+ *    instruction at the end so that validate_branch() skips all the original
+ *    replaced instructions when validating the new instruction path.
+ *
+ * 2. An added wrinkle is that the new instruction length might be zero.  In
+ *    that case the old instructions are replaced with noops.  We simulate that
+ *    by creating a fake jump as the only new instruction.
+ *
+ * 3. In some cases, the alternative section includes an instruction which
+ *    conditionally jumps to the _end_ of the entry.  We have to modify these
+ *    jumps' destinations to point back to .text rather than the end of the
+ *    entry in .altinstr_replacement.
+ *
+ * 4. It has been requested that we don't validate the !POPCNT feature path
+ *    which is a "very very small percentage of machines".
+ */
+static int handle_group_alt(struct objtool_file *file,
+			    struct special_alt *special_alt,
+			    struct instruction *orig_insn,
+			    struct instruction **new_insn)
+{
+	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
+	unsigned long dest_off;
+
+	last_orig_insn = NULL;
+	insn = orig_insn;
+	sec_for_each_insn_from(file, insn) {
+		if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
+			break;
+
+		if (special_alt->skip_orig)
+			insn->type = INSN_NOP;
+
+		insn->alt_group = true;
+		last_orig_insn = insn;
+	}
+
+	if (!next_insn_same_sec(file, last_orig_insn)) {
+		WARN("%s: don't know how to handle alternatives at end of section",
+		     special_alt->orig_sec->name);
+		return -1;
+	}
+
+	fake_jump = malloc(sizeof(*fake_jump));
+	if (!fake_jump) {
+		WARN("malloc failed");
+		return -1;
+	}
+	memset(fake_jump, 0, sizeof(*fake_jump));
+	INIT_LIST_HEAD(&fake_jump->alts);
+	clear_insn_state(&fake_jump->state);
+
+	fake_jump->sec = special_alt->new_sec;
+	fake_jump->offset = -1;
+	fake_jump->type = INSN_JUMP_UNCONDITIONAL;
+	fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
+	fake_jump->ignore = true;
+
+	if (!special_alt->new_len) {
+		*new_insn = fake_jump;
+		return 0;
+	}
+
+	last_new_insn = NULL;
+	insn = *new_insn;
+	sec_for_each_insn_from(file, insn) {
+		if (insn->offset >= special_alt->new_off + special_alt->new_len)
+			break;
+
+		last_new_insn = insn;
+
+		if (insn->type != INSN_JUMP_CONDITIONAL &&
+		    insn->type != INSN_JUMP_UNCONDITIONAL)
+			continue;
+
+		if (!insn->immediate)
+			continue;
+
+		dest_off = insn->offset + insn->len + insn->immediate;
+		if (dest_off == special_alt->new_off + special_alt->new_len)
+			insn->jump_dest = fake_jump;
+
+		if (!insn->jump_dest) {
+			WARN_FUNC("can't find alternative jump destination",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+	}
+
+	if (!last_new_insn) {
+		WARN_FUNC("can't find last new alternative instruction",
+			  special_alt->new_sec, special_alt->new_off);
+		return -1;
+	}
+
+	list_add(&fake_jump->list, &last_new_insn->list);
+
+	return 0;
+}
+
+/*
+ * A jump table entry can either convert a nop to a jump or a jump to a nop.
+ * If the original instruction is a jump, make the alt entry an effective nop
+ * by just skipping the original instruction.
+ */
+static int handle_jump_alt(struct objtool_file *file,
+			   struct special_alt *special_alt,
+			   struct instruction *orig_insn,
+			   struct instruction **new_insn)
+{
+	if (orig_insn->type == INSN_NOP)
+		return 0;
+
+	if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
+		WARN_FUNC("unsupported instruction at jump label",
+			  orig_insn->sec, orig_insn->offset);
+		return -1;
+	}
+
+	*new_insn = list_next_entry(orig_insn, list);
+	return 0;
+}
+
+/*
+ * Read all the special sections which have alternate instructions which can be
+ * patched in or redirected to at runtime.  Each instruction having alternate
+ * instruction(s) has them added to its insn->alts list, which will be
+ * traversed in validate_branch().
+ */
+static int add_special_section_alts(struct objtool_file *file)
+{
+	struct list_head special_alts;
+	struct instruction *orig_insn, *new_insn;
+	struct special_alt *special_alt, *tmp;
+	struct alternative *alt;
+	int ret;
+
+	ret = special_get_alts(file->elf, &special_alts);
+	if (ret)
+		return ret;
+
+	list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
+		alt = malloc(sizeof(*alt));
+		if (!alt) {
+			WARN("malloc failed");
+			ret = -1;
+			goto out;
+		}
+
+		orig_insn = find_insn(file, special_alt->orig_sec,
+				      special_alt->orig_off);
+		if (!orig_insn) {
+			WARN_FUNC("special: can't find orig instruction",
+				  special_alt->orig_sec, special_alt->orig_off);
+			ret = -1;
+			goto out;
+		}
+
+		new_insn = NULL;
+		if (!special_alt->group || special_alt->new_len) {
+			new_insn = find_insn(file, special_alt->new_sec,
+					     special_alt->new_off);
+			if (!new_insn) {
+				WARN_FUNC("special: can't find new instruction",
+					  special_alt->new_sec,
+					  special_alt->new_off);
+				ret = -1;
+				goto out;
+			}
+		}
+
+		if (special_alt->group) {
+			ret = handle_group_alt(file, special_alt, orig_insn,
+					       &new_insn);
+			if (ret)
+				goto out;
+		} else if (special_alt->jump_or_nop) {
+			ret = handle_jump_alt(file, special_alt, orig_insn,
+					      &new_insn);
+			if (ret)
+				goto out;
+		}
+
+		alt->insn = new_insn;
+		list_add_tail(&alt->list, &orig_insn->alts);
+
+		list_del(&special_alt->list);
+		free(special_alt);
+	}
+
+out:
+	return ret;
+}
+
+static int add_switch_table(struct objtool_file *file, struct symbol *func,
+			    struct instruction *insn, struct rela *table,
+			    struct rela *next_table)
+{
+	struct rela *rela = table;
+	struct instruction *alt_insn;
+	struct alternative *alt;
+
+	list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
+		if (rela == next_table)
+			break;
+
+		if (rela->sym->sec != insn->sec ||
+		    rela->addend <= func->offset ||
+		    rela->addend >= func->offset + func->len)
+			break;
+
+		alt_insn = find_insn(file, insn->sec, rela->addend);
+		if (!alt_insn) {
+			WARN("%s: can't find instruction at %s+0x%x",
+			     file->rodata->rela->name, insn->sec->name,
+			     rela->addend);
+			return -1;
+		}
+
+		alt = malloc(sizeof(*alt));
+		if (!alt) {
+			WARN("malloc failed");
+			return -1;
+		}
+
+		alt->insn = alt_insn;
+		list_add_tail(&alt->list, &insn->alts);
+	}
+
+	return 0;
+}
+
+/*
+ * find_switch_table() - Given a dynamic jump, find the switch jump table in
+ * .rodata associated with it.
+ *
+ * There are 3 basic patterns:
+ *
+ * 1. jmpq *[rodata addr](,%reg,8)
+ *
+ *    This is the most common case by far.  It jumps to an address in a simple
+ *    jump table which is stored in .rodata.
+ *
+ * 2. jmpq *[rodata addr](%rip)
+ *
+ *    This is caused by a rare GCC quirk, currently only seen in three driver
+ *    functions in the kernel, only with certain obscure non-distro configs.
+ *
+ *    As part of an optimization, GCC makes a copy of an existing switch jump
+ *    table, modifies it, and then hard-codes the jump (albeit with an indirect
+ *    jump) to use a single entry in the table.  The rest of the jump table and
+ *    some of its jump targets remain as dead code.
+ *
+ *    In such a case we can just crudely ignore all unreachable instruction
+ *    warnings for the entire object file.  Ideally we would just ignore them
+ *    for the function, but that would require redesigning the code quite a
+ *    bit.  And honestly that's just not worth doing: unreachable instruction
+ *    warnings are of questionable value anyway, and this is such a rare issue.
+ *
+ * 3. mov [rodata addr],%reg1
+ *    ... some instructions ...
+ *    jmpq *(%reg1,%reg2,8)
+ *
+ *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
+ *    writing, there are 11 occurrences of it in the allmodconfig kernel.
+ *
+ *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
+ *    ensure the same register is used in the mov and jump instructions.
+ */
+static struct rela *find_switch_table(struct objtool_file *file,
+				      struct symbol *func,
+				      struct instruction *insn)
+{
+	struct rela *text_rela, *rodata_rela;
+	struct instruction *orig_insn = insn;
+
+	text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
+	if (text_rela && text_rela->sym == file->rodata->sym) {
+		/* case 1 */
+		rodata_rela = find_rela_by_dest(file->rodata,
+						text_rela->addend);
+		if (rodata_rela)
+			return rodata_rela;
+
+		/* case 2 */
+		rodata_rela = find_rela_by_dest(file->rodata,
+						text_rela->addend + 4);
+		if (!rodata_rela)
+			return NULL;
+		file->ignore_unreachables = true;
+		return rodata_rela;
+	}
+
+	/* case 3 */
+	func_for_each_insn_continue_reverse(file, func, insn) {
+		if (insn->type == INSN_JUMP_DYNAMIC)
+			break;
+
+		/* allow small jumps within the range */
+		if (insn->type == INSN_JUMP_UNCONDITIONAL &&
+		    insn->jump_dest &&
+		    (insn->jump_dest->offset <= insn->offset ||
+		     insn->jump_dest->offset > orig_insn->offset))
+		    break;
+
+		/* look for a relocation which references .rodata */
+		text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
+						    insn->len);
+		if (!text_rela || text_rela->sym != file->rodata->sym)
+			continue;
+
+		/*
+		 * Make sure the .rodata address isn't associated with a
+		 * symbol.  gcc jump tables are anonymous data.
+		 */
+		if (find_symbol_containing(file->rodata, text_rela->addend))
+			continue;
+
+		return find_rela_by_dest(file->rodata, text_rela->addend);
+	}
+
+	return NULL;
+}
+
+static int add_func_switch_tables(struct objtool_file *file,
+				  struct symbol *func)
+{
+	struct instruction *insn, *prev_jump = NULL;
+	struct rela *rela, *prev_rela = NULL;
+	int ret;
+
+	func_for_each_insn(file, func, insn) {
+		if (insn->type != INSN_JUMP_DYNAMIC)
+			continue;
+
+		rela = find_switch_table(file, func, insn);
+		if (!rela)
+			continue;
+
+		/*
+		 * We found a switch table, but we don't know yet how big it
+		 * is.  Don't add it until we reach the end of the function or
+		 * the beginning of another switch table in the same function.
+		 */
+		if (prev_jump) {
+			ret = add_switch_table(file, func, prev_jump, prev_rela,
+					       rela);
+			if (ret)
+				return ret;
+		}
+
+		prev_jump = insn;
+		prev_rela = rela;
+	}
+
+	if (prev_jump) {
+		ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * For some switch statements, gcc generates a jump table in the .rodata
+ * section which contains a list of addresses within the function to jump to.
+ * This finds these jump tables and adds them to the insn->alts lists.
+ */
+static int add_switch_table_alts(struct objtool_file *file)
+{
+	struct section *sec;
+	struct symbol *func;
+	int ret;
+
+	if (!file->rodata || !file->rodata->rela)
+		return 0;
+
+	for_each_sec(file, sec) {
+		list_for_each_entry(func, &sec->symbol_list, list) {
+			if (func->type != STT_FUNC)
+				continue;
+
+			ret = add_func_switch_tables(file, func);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int decode_sections(struct objtool_file *file)
+{
+	int ret;
+
+	ret = decode_instructions(file);
+	if (ret)
+		return ret;
+
+	ret = add_dead_ends(file);
+	if (ret)
+		return ret;
+
+	add_ignores(file);
+
+	ret = add_jump_destinations(file);
+	if (ret)
+		return ret;
+
+	ret = add_call_destinations(file);
+	if (ret)
+		return ret;
+
+	ret = add_special_section_alts(file);
+	if (ret)
+		return ret;
+
+	ret = add_switch_table_alts(file);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static bool is_fentry_call(struct instruction *insn)
+{
+	if (insn->type == INSN_CALL &&
+	    insn->call_dest->type == STT_NOTYPE &&
+	    !strcmp(insn->call_dest->name, "__fentry__"))
+		return true;
+
+	return false;
+}
+
+static bool has_modified_stack_frame(struct insn_state *state)
+{
+	int i;
+
+	if (state->cfa.base != initial_func_cfi.cfa.base ||
+	    state->cfa.offset != initial_func_cfi.cfa.offset ||
+	    state->stack_size != initial_func_cfi.cfa.offset ||
+	    state->drap)
+		return true;
+
+	for (i = 0; i < CFI_NUM_REGS; i++)
+		if (state->regs[i].base != initial_func_cfi.regs[i].base ||
+		    state->regs[i].offset != initial_func_cfi.regs[i].offset)
+			return true;
+
+	return false;
+}
+
+static bool has_valid_stack_frame(struct insn_state *state)
+{
+	if (state->cfa.base == CFI_BP && state->regs[CFI_BP].base == CFI_CFA &&
+	    state->regs[CFI_BP].offset == -16)
+		return true;
+
+	if (state->drap && state->regs[CFI_BP].base == CFI_BP)
+		return true;
+
+	return false;
+}
+
+static void save_reg(struct insn_state *state, unsigned char reg, int base,
+		     int offset)
+{
+	if ((arch_callee_saved_reg(reg) ||
+	    (state->drap && reg == state->drap_reg)) &&
+	    state->regs[reg].base == CFI_UNDEFINED) {
+		state->regs[reg].base = base;
+		state->regs[reg].offset = offset;
+	}
+}
+
+static void restore_reg(struct insn_state *state, unsigned char reg)
+{
+	state->regs[reg].base = CFI_UNDEFINED;
+	state->regs[reg].offset = 0;
+}
+
+/*
+ * A note about DRAP stack alignment:
+ *
+ * GCC has the concept of a DRAP register, which is used to help keep track of
+ * the stack pointer when aligning the stack.  r10 or r13 is used as the DRAP
+ * register.  The typical DRAP pattern is:
+ *
+ *   4c 8d 54 24 08		lea    0x8(%rsp),%r10
+ *   48 83 e4 c0		and    $0xffffffffffffffc0,%rsp
+ *   41 ff 72 f8		pushq  -0x8(%r10)
+ *   55				push   %rbp
+ *   48 89 e5			mov    %rsp,%rbp
+ *				(more pushes)
+ *   41 52			push   %r10
+ *				...
+ *   41 5a			pop    %r10
+ *				(more pops)
+ *   5d				pop    %rbp
+ *   49 8d 62 f8		lea    -0x8(%r10),%rsp
+ *   c3				retq
+ *
+ * There are some variations in the epilogues, like:
+ *
+ *   5b				pop    %rbx
+ *   41 5a			pop    %r10
+ *   41 5c			pop    %r12
+ *   41 5d			pop    %r13
+ *   41 5e			pop    %r14
+ *   c9				leaveq
+ *   49 8d 62 f8		lea    -0x8(%r10),%rsp
+ *   c3				retq
+ *
+ * and:
+ *
+ *   4c 8b 55 e8		mov    -0x18(%rbp),%r10
+ *   48 8b 5d e0		mov    -0x20(%rbp),%rbx
+ *   4c 8b 65 f0		mov    -0x10(%rbp),%r12
+ *   4c 8b 6d f8		mov    -0x8(%rbp),%r13
+ *   c9				leaveq
+ *   49 8d 62 f8		lea    -0x8(%r10),%rsp
+ *   c3				retq
+ *
+ * Sometimes r13 is used as the DRAP register, in which case it's saved and
+ * restored beforehand:
+ *
+ *   41 55			push   %r13
+ *   4c 8d 6c 24 10		lea    0x10(%rsp),%r13
+ *   48 83 e4 f0		and    $0xfffffffffffffff0,%rsp
+ *				...
+ *   49 8d 65 f0		lea    -0x10(%r13),%rsp
+ *   41 5d			pop    %r13
+ *   c3				retq
+ */
+static int update_insn_state(struct instruction *insn, struct insn_state *state)
+{
+	struct stack_op *op = &insn->stack_op;
+	struct cfi_reg *cfa = &state->cfa;
+	struct cfi_reg *regs = state->regs;
+
+	/* stack operations don't make sense with an undefined CFA */
+	if (cfa->base == CFI_UNDEFINED) {
+		if (insn->func) {
+			WARN_FUNC("undefined stack state", insn->sec, insn->offset);
+			return -1;
+		}
+		return 0;
+	}
+
+	switch (op->dest.type) {
+
+	case OP_DEST_REG:
+		switch (op->src.type) {
+
+		case OP_SRC_REG:
+			if (cfa->base == op->src.reg && cfa->base == CFI_SP &&
+			    op->dest.reg == CFI_BP && regs[CFI_BP].base == CFI_CFA &&
+			    regs[CFI_BP].offset == -cfa->offset) {
+
+				/* mov %rsp, %rbp */
+				cfa->base = op->dest.reg;
+				state->bp_scratch = false;
+			} else if (state->drap) {
+
+				/* drap: mov %rsp, %rbp */
+				regs[CFI_BP].base = CFI_BP;
+				regs[CFI_BP].offset = -state->stack_size;
+				state->bp_scratch = false;
+			} else if (!nofp) {
+
+				WARN_FUNC("unknown stack-related register move",
+					  insn->sec, insn->offset);
+				return -1;
+			}
+
+			break;
+
+		case OP_SRC_ADD:
+			if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) {
+
+				/* add imm, %rsp */
+				state->stack_size -= op->src.offset;
+				if (cfa->base == CFI_SP)
+					cfa->offset -= op->src.offset;
+				break;
+			}
+
+			if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) {
+
+				/* lea disp(%rbp), %rsp */
+				state->stack_size = -(op->src.offset + regs[CFI_BP].offset);
+				break;
+			}
+
+			if (op->dest.reg != CFI_BP && op->src.reg == CFI_SP &&
+			    cfa->base == CFI_SP) {
+
+				/* drap: lea disp(%rsp), %drap */
+				state->drap_reg = op->dest.reg;
+				break;
+			}
+
+			if (state->drap && op->dest.reg == CFI_SP &&
+			    op->src.reg == state->drap_reg) {
+
+				 /* drap: lea disp(%drap), %rsp */
+				cfa->base = CFI_SP;
+				cfa->offset = state->stack_size = -op->src.offset;
+				state->drap_reg = CFI_UNDEFINED;
+				state->drap = false;
+				break;
+			}
+
+			if (op->dest.reg == state->cfa.base) {
+				WARN_FUNC("unsupported stack register modification",
+					  insn->sec, insn->offset);
+				return -1;
+			}
+
+			break;
+
+		case OP_SRC_AND:
+			if (op->dest.reg != CFI_SP ||
+			    (state->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) ||
+			    (state->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) {
+				WARN_FUNC("unsupported stack pointer realignment",
+					  insn->sec, insn->offset);
+				return -1;
+			}
+
+			if (state->drap_reg != CFI_UNDEFINED) {
+				/* drap: and imm, %rsp */
+				cfa->base = state->drap_reg;
+				cfa->offset = state->stack_size = 0;
+				state->drap = true;
+
+			}
+
+			/*
+			 * Older versions of GCC (4.8ish) realign the stack
+			 * without DRAP, with a frame pointer.
+			 */
+
+			break;
+
+		case OP_SRC_POP:
+			if (!state->drap && op->dest.type == OP_DEST_REG &&
+			    op->dest.reg == cfa->base) {
+
+				/* pop %rbp */
+				cfa->base = CFI_SP;
+			}
+
+			if (regs[op->dest.reg].offset == -state->stack_size) {
+
+				if (state->drap && cfa->base == CFI_BP_INDIRECT &&
+				    op->dest.type == OP_DEST_REG &&
+				    op->dest.reg == state->drap_reg) {
+
+					/* drap: pop %drap */
+					cfa->base = state->drap_reg;
+					cfa->offset = 0;
+				}
+
+				restore_reg(state, op->dest.reg);
+			}
+
+			state->stack_size -= 8;
+			if (cfa->base == CFI_SP)
+				cfa->offset -= 8;
+
+			break;
+
+		case OP_SRC_REG_INDIRECT:
+			if (state->drap && op->src.reg == CFI_BP &&
+			    op->src.offset == regs[op->dest.reg].offset) {
+
+				/* drap: mov disp(%rbp), %reg */
+				if (op->dest.reg == state->drap_reg) {
+					cfa->base = state->drap_reg;
+					cfa->offset = 0;
+				}
+
+				restore_reg(state, op->dest.reg);
+
+			} else if (op->src.reg == cfa->base &&
+			    op->src.offset == regs[op->dest.reg].offset + cfa->offset) {
+
+				/* mov disp(%rbp), %reg */
+				/* mov disp(%rsp), %reg */
+				restore_reg(state, op->dest.reg);
+			}
+
+			break;
+
+		default:
+			WARN_FUNC("unknown stack-related instruction",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
+		break;
+
+	case OP_DEST_PUSH:
+		state->stack_size += 8;
+		if (cfa->base == CFI_SP)
+			cfa->offset += 8;
+
+		if (op->src.type != OP_SRC_REG)
+			break;
+
+		if (state->drap) {
+			if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+
+				/* drap: push %drap */
+				cfa->base = CFI_BP_INDIRECT;
+				cfa->offset = -state->stack_size;
+
+				/* save drap so we know when to undefine it */
+				save_reg(state, op->src.reg, CFI_CFA, -state->stack_size);
+
+			} else if (op->src.reg == CFI_BP && cfa->base == state->drap_reg) {
+
+				/* drap: push %rbp */
+				state->stack_size = 0;
+
+			} else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+
+				/* drap: push %reg */
+				save_reg(state, op->src.reg, CFI_BP, -state->stack_size);
+			}
+
+		} else {
+
+			/* push %reg */
+			save_reg(state, op->src.reg, CFI_CFA, -state->stack_size);
+		}
+
+		/* detect when asm code uses rbp as a scratch register */
+		if (!nofp && insn->func && op->src.reg == CFI_BP &&
+		    cfa->base != CFI_BP)
+			state->bp_scratch = true;
+		break;
+
+	case OP_DEST_REG_INDIRECT:
+
+		if (state->drap) {
+			if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+
+				/* drap: mov %drap, disp(%rbp) */
+				cfa->base = CFI_BP_INDIRECT;
+				cfa->offset = op->dest.offset;
+
+				/* save drap so we know when to undefine it */
+				save_reg(state, op->src.reg, CFI_CFA, op->dest.offset);
+			}
+
+			else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+
+				/* drap: mov reg, disp(%rbp) */
+				save_reg(state, op->src.reg, CFI_BP, op->dest.offset);
+			}
+
+		} else if (op->dest.reg == cfa->base) {
+
+			/* mov reg, disp(%rbp) */
+			/* mov reg, disp(%rsp) */
+			save_reg(state, op->src.reg, CFI_CFA,
+				 op->dest.offset - state->cfa.offset);
+		}
+
+		break;
+
+	case OP_DEST_LEAVE:
+		if ((!state->drap && cfa->base != CFI_BP) ||
+		    (state->drap && cfa->base != state->drap_reg)) {
+			WARN_FUNC("leave instruction with modified stack frame",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
+		/* leave (mov %rbp, %rsp; pop %rbp) */
+
+		state->stack_size = -state->regs[CFI_BP].offset - 8;
+		restore_reg(state, CFI_BP);
+
+		if (!state->drap) {
+			cfa->base = CFI_SP;
+			cfa->offset -= 8;
+		}
+
+		break;
+
+	case OP_DEST_MEM:
+		if (op->src.type != OP_SRC_POP) {
+			WARN_FUNC("unknown stack-related memory operation",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
+		/* pop mem */
+		state->stack_size -= 8;
+		if (cfa->base == CFI_SP)
+			cfa->offset -= 8;
+
+		break;
+
+	default:
+		WARN_FUNC("unknown stack-related instruction",
+			  insn->sec, insn->offset);
+		return -1;
+	}
+
+	return 0;
+}
+
+static bool insn_state_match(struct instruction *insn, struct insn_state *state)
+{
+	struct insn_state *state1 = &insn->state, *state2 = state;
+	int i;
+
+	if (memcmp(&state1->cfa, &state2->cfa, sizeof(state1->cfa))) {
+		WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d",
+			  insn->sec, insn->offset,
+			  state1->cfa.base, state1->cfa.offset,
+			  state2->cfa.base, state2->cfa.offset);
+
+	} else if (memcmp(&state1->regs, &state2->regs, sizeof(state1->regs))) {
+		for (i = 0; i < CFI_NUM_REGS; i++) {
+			if (!memcmp(&state1->regs[i], &state2->regs[i],
+				    sizeof(struct cfi_reg)))
+				continue;
+
+			WARN_FUNC("stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d",
+				  insn->sec, insn->offset,
+				  i, state1->regs[i].base, state1->regs[i].offset,
+				  i, state2->regs[i].base, state2->regs[i].offset);
+			break;
+		}
+
+	} else if (state1->drap != state2->drap ||
+		 (state1->drap && state1->drap_reg != state2->drap_reg)) {
+		WARN_FUNC("stack state mismatch: drap1=%d(%d) drap2=%d(%d)",
+			  insn->sec, insn->offset,
+			  state1->drap, state1->drap_reg,
+			  state2->drap, state2->drap_reg);
+
+	} else
+		return true;
+
+	return false;
+}
+
+/*
+ * Follow the branch starting at the given instruction, and recursively follow
+ * any other branches (jumps).  Meanwhile, track the frame pointer state at
+ * each instruction and validate all the rules described in
+ * tools/objtool/Documentation/stack-validation.txt.
+ */
+static int validate_branch(struct objtool_file *file, struct instruction *first,
+			   struct insn_state state)
+{
+	struct alternative *alt;
+	struct instruction *insn;
+	struct section *sec;
+	struct symbol *func = NULL;
+	int ret;
+
+	insn = first;
+	sec = insn->sec;
+
+	if (insn->alt_group && list_empty(&insn->alts)) {
+		WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
+			  sec, insn->offset);
+		return -1;
+	}
+
+	while (1) {
+		if (file->c_file && insn->func) {
+			if (func && func != insn->func) {
+				WARN("%s() falls through to next function %s()",
+				     func->name, insn->func->name);
+				return 1;
+			}
+		}
+
+		func = insn->func;
+
+		if (insn->visited) {
+			if (!!insn_state_match(insn, &state))
+				return 1;
+
+			return 0;
+		}
+
+		insn->state = state;
+
+		insn->visited = true;
+
+		list_for_each_entry(alt, &insn->alts, list) {
+			ret = validate_branch(file, alt->insn, state);
+			if (ret)
+				return 1;
+		}
+
+		switch (insn->type) {
+
+		case INSN_RETURN:
+			if (func && has_modified_stack_frame(&state)) {
+				WARN_FUNC("return with modified stack frame",
+					  sec, insn->offset);
+				return 1;
+			}
+
+			if (state.bp_scratch) {
+				WARN("%s uses BP as a scratch register",
+				     insn->func->name);
+				return 1;
+			}
+
+			return 0;
+
+		case INSN_CALL:
+			if (is_fentry_call(insn))
+				break;
+
+			ret = dead_end_function(file, insn->call_dest);
+			if (ret == 1)
+				return 0;
+			if (ret == -1)
+				return 1;
+
+			/* fallthrough */
+		case INSN_CALL_DYNAMIC:
+			if (!nofp && func && !has_valid_stack_frame(&state)) {
+				WARN_FUNC("call without frame pointer save/setup",
+					  sec, insn->offset);
+				return 1;
+			}
+			break;
+
+		case INSN_JUMP_CONDITIONAL:
+		case INSN_JUMP_UNCONDITIONAL:
+			if (insn->jump_dest) {
+				ret = validate_branch(file, insn->jump_dest,
+						      state);
+				if (ret)
+					return 1;
+			} else if (func && has_modified_stack_frame(&state)) {
+				WARN_FUNC("sibling call from callable instruction with modified stack frame",
+					  sec, insn->offset);
+				return 1;
+			} /* else it's a sibling call */
+
+			if (insn->type == INSN_JUMP_UNCONDITIONAL)
+				return 0;
+
+			break;
+
+		case INSN_JUMP_DYNAMIC:
+			if (func && list_empty(&insn->alts) &&
+			    has_modified_stack_frame(&state)) {
+				WARN_FUNC("sibling call from callable instruction with modified stack frame",
+					  sec, insn->offset);
+				return 1;
+			}
+
+			return 0;
+
+		case INSN_STACK:
+			if (update_insn_state(insn, &state))
+				return -1;
+
+			break;
+
+		default:
+			break;
+		}
+
+		if (insn->dead_end)
+			return 0;
+
+		insn = next_insn_same_sec(file, insn);
+		if (!insn) {
+			WARN("%s: unexpected end of section", sec->name);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static bool is_kasan_insn(struct instruction *insn)
+{
+	return (insn->type == INSN_CALL &&
+		!strcmp(insn->call_dest->name, "__asan_handle_no_return"));
+}
+
+static bool is_ubsan_insn(struct instruction *insn)
+{
+	return (insn->type == INSN_CALL &&
+		!strcmp(insn->call_dest->name,
+			"__ubsan_handle_builtin_unreachable"));
+}
+
+static bool ignore_unreachable_insn(struct instruction *insn)
+{
+	int i;
+
+	if (insn->ignore || insn->type == INSN_NOP)
+		return true;
+
+	/*
+	 * Ignore any unused exceptions.  This can happen when a whitelisted
+	 * function has an exception table entry.
+	 */
+	if (!strcmp(insn->sec->name, ".fixup"))
+		return true;
+
+	/*
+	 * Check if this (or a subsequent) instruction is related to
+	 * CONFIG_UBSAN or CONFIG_KASAN.
+	 *
+	 * End the search at 5 instructions to avoid going into the weeds.
+	 */
+	if (!insn->func)
+		return false;
+	for (i = 0; i < 5; i++) {
+
+		if (is_kasan_insn(insn) || is_ubsan_insn(insn))
+			return true;
+
+		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
+			insn = insn->jump_dest;
+			continue;
+		}
+
+		if (insn->offset + insn->len >= insn->func->offset + insn->func->len)
+			break;
+		insn = list_next_entry(insn, list);
+	}
+
+	return false;
+}
+
+static int validate_functions(struct objtool_file *file)
+{
+	struct section *sec;
+	struct symbol *func;
+	struct instruction *insn;
+	struct insn_state state;
+	int ret, warnings = 0;
+
+	clear_insn_state(&state);
+
+	state.cfa = initial_func_cfi.cfa;
+	memcpy(&state.regs, &initial_func_cfi.regs,
+	       CFI_NUM_REGS * sizeof(struct cfi_reg));
+	state.stack_size = initial_func_cfi.cfa.offset;
+
+	for_each_sec(file, sec) {
+		list_for_each_entry(func, &sec->symbol_list, list) {
+			if (func->type != STT_FUNC)
+				continue;
+
+			insn = find_insn(file, sec, func->offset);
+			if (!insn || insn->ignore)
+				continue;
+
+			ret = validate_branch(file, insn, state);
+			warnings += ret;
+		}
+	}
+
+	return warnings;
+}
+
+static int validate_reachable_instructions(struct objtool_file *file)
+{
+	struct instruction *insn;
+
+	if (file->ignore_unreachables)
+		return 0;
+
+	for_each_insn(file, insn) {
+		if (insn->visited || ignore_unreachable_insn(insn))
+			continue;
+
+		/*
+		 * gcov produces a lot of unreachable instructions.  If we get
+		 * an unreachable warning and the file has gcov enabled, just
+		 * ignore it, and all other such warnings for the file.  Do
+		 * this here because this is an expensive function.
+		 */
+		if (gcov_enabled(file))
+			return 0;
+
+		WARN_FUNC("unreachable instruction", insn->sec, insn->offset);
+		return 1;
+	}
+
+	return 0;
+}
+
+static void cleanup(struct objtool_file *file)
+{
+	struct instruction *insn, *tmpinsn;
+	struct alternative *alt, *tmpalt;
+
+	list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) {
+		list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) {
+			list_del(&alt->list);
+			free(alt);
+		}
+		list_del(&insn->list);
+		hash_del(&insn->hash);
+		free(insn);
+	}
+	elf_close(file->elf);
+}
+
+int check(const char *_objname, bool _nofp)
+{
+	struct objtool_file file;
+	int ret, warnings = 0;
+
+	objname = _objname;
+	nofp = _nofp;
+
+	file.elf = elf_open(objname);
+	if (!file.elf)
+		return 1;
+
+	INIT_LIST_HEAD(&file.insn_list);
+	hash_init(file.insn_hash);
+	file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
+	file.rodata = find_section_by_name(file.elf, ".rodata");
+	file.ignore_unreachables = false;
+	file.c_file = find_section_by_name(file.elf, ".comment");
+
+	arch_initial_func_cfi_state(&initial_func_cfi);
+
+	ret = decode_sections(&file);
+	if (ret < 0)
+		goto out;
+	warnings += ret;
+
+	if (list_empty(&file.insn_list))
+		goto out;
+
+	ret = validate_functions(&file);
+	if (ret < 0)
+		goto out;
+	warnings += ret;
+
+	if (!warnings) {
+		ret = validate_reachable_instructions(&file);
+		if (ret < 0)
+			goto out;
+		warnings += ret;
+	}
+
+out:
+	cleanup(&file);
+
+	/* ignore warnings for now until we get all the code cleaned up */
+	if (ret || warnings)
+		return 0;
+	return 0;
+}
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
new file mode 100644
index 000000000000..da85f5b00ec6
--- /dev/null
+++ b/tools/objtool/check.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _CHECK_H
+#define _CHECK_H
+
+#include <stdbool.h>
+#include "elf.h"
+#include "cfi.h"
+#include "arch.h"
+#include <linux/hashtable.h>
+
+struct insn_state {
+	struct cfi_reg cfa;
+	struct cfi_reg regs[CFI_NUM_REGS];
+	int stack_size;
+	bool bp_scratch;
+	bool drap;
+	int drap_reg;
+};
+
+struct instruction {
+	struct list_head list;
+	struct hlist_node hash;
+	struct section *sec;
+	unsigned long offset;
+	unsigned int len;
+	unsigned char type;
+	unsigned long immediate;
+	bool alt_group, visited, dead_end, ignore;
+	struct symbol *call_dest;
+	struct instruction *jump_dest;
+	struct list_head alts;
+	struct symbol *func;
+	struct stack_op stack_op;
+	struct insn_state state;
+};
+
+struct objtool_file {
+	struct elf *elf;
+	struct list_head insn_list;
+	DECLARE_HASHTABLE(insn_hash, 16);
+	struct section *rodata, *whitelist;
+	bool ignore_unreachables, c_file;
+};
+
+int check(const char *objname, bool nofp);
+
+#define for_each_insn(file, insn)					\
+	list_for_each_entry(insn, &file->insn_list, list)
+
+#endif /* _CHECK_H */
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index d897702ce742..1a7e8aa2af58 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -37,6 +37,9 @@
 #define ELF_C_READ_MMAP ELF_C_READ
 #endif
 
+#define WARN_ELF(format, ...)					\
+	WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
+
 struct section *find_section_by_name(struct elf *elf, const char *name)
 {
 	struct section *sec;
@@ -139,12 +142,12 @@ static int read_sections(struct elf *elf)
 	int i;
 
 	if (elf_getshdrnum(elf->elf, &sections_nr)) {
-		perror("elf_getshdrnum");
+		WARN_ELF("elf_getshdrnum");
 		return -1;
 	}
 
 	if (elf_getshdrstrndx(elf->elf, &shstrndx)) {
-		perror("elf_getshdrstrndx");
+		WARN_ELF("elf_getshdrstrndx");
 		return -1;
 	}
 
@@ -165,37 +168,36 @@ static int read_sections(struct elf *elf)
 
 		s = elf_getscn(elf->elf, i);
 		if (!s) {
-			perror("elf_getscn");
+			WARN_ELF("elf_getscn");
 			return -1;
 		}
 
 		sec->idx = elf_ndxscn(s);
 
 		if (!gelf_getshdr(s, &sec->sh)) {
-			perror("gelf_getshdr");
+			WARN_ELF("gelf_getshdr");
 			return -1;
 		}
 
 		sec->name = elf_strptr(elf->elf, shstrndx, sec->sh.sh_name);
 		if (!sec->name) {
-			perror("elf_strptr");
+			WARN_ELF("elf_strptr");
 			return -1;
 		}
 
-		sec->elf_data = elf_getdata(s, NULL);
-		if (!sec->elf_data) {
-			perror("elf_getdata");
+		sec->data = elf_getdata(s, NULL);
+		if (!sec->data) {
+			WARN_ELF("elf_getdata");
 			return -1;
 		}
 
-		if (sec->elf_data->d_off != 0 ||
-		    sec->elf_data->d_size != sec->sh.sh_size) {
+		if (sec->data->d_off != 0 ||
+		    sec->data->d_size != sec->sh.sh_size) {
 			WARN("unexpected data attributes for %s", sec->name);
 			return -1;
 		}
 
-		sec->data = (unsigned long)sec->elf_data->d_buf;
-		sec->len = sec->elf_data->d_size;
+		sec->len = sec->data->d_size;
 	}
 
 	/* sanity check, one more call to elf_nextscn() should return NULL */
@@ -232,15 +234,15 @@ static int read_symbols(struct elf *elf)
 
 		sym->idx = i;
 
-		if (!gelf_getsym(symtab->elf_data, i, &sym->sym)) {
-			perror("gelf_getsym");
+		if (!gelf_getsym(symtab->data, i, &sym->sym)) {
+			WARN_ELF("gelf_getsym");
 			goto err;
 		}
 
 		sym->name = elf_strptr(elf->elf, symtab->sh.sh_link,
 				       sym->sym.st_name);
 		if (!sym->name) {
-			perror("elf_strptr");
+			WARN_ELF("elf_strptr");
 			goto err;
 		}
 
@@ -322,8 +324,8 @@ static int read_relas(struct elf *elf)
 			}
 			memset(rela, 0, sizeof(*rela));
 
-			if (!gelf_getrela(sec->elf_data, i, &rela->rela)) {
-				perror("gelf_getrela");
+			if (!gelf_getrela(sec->data, i, &rela->rela)) {
+				WARN_ELF("gelf_getrela");
 				return -1;
 			}
 
@@ -362,12 +364,6 @@ struct elf *elf_open(const char *name)
 
 	INIT_LIST_HEAD(&elf->sections);
 
-	elf->name = strdup(name);
-	if (!elf->name) {
-		perror("strdup");
-		goto err;
-	}
-
 	elf->fd = open(name, O_RDONLY);
 	if (elf->fd == -1) {
 		perror("open");
@@ -376,12 +372,12 @@ struct elf *elf_open(const char *name)
 
 	elf->elf = elf_begin(elf->fd, ELF_C_READ_MMAP, NULL);
 	if (!elf->elf) {
-		perror("elf_begin");
+		WARN_ELF("elf_begin");
 		goto err;
 	}
 
 	if (!gelf_getehdr(elf->elf, &elf->ehdr)) {
-		perror("gelf_getehdr");
+		WARN_ELF("gelf_getehdr");
 		goto err;
 	}
 
@@ -407,6 +403,12 @@ void elf_close(struct elf *elf)
 	struct symbol *sym, *tmpsym;
 	struct rela *rela, *tmprela;
 
+	if (elf->elf)
+		elf_end(elf->elf);
+
+	if (elf->fd > 0)
+		close(elf->fd);
+
 	list_for_each_entry_safe(sec, tmpsec, &elf->sections, list) {
 		list_for_each_entry_safe(sym, tmpsym, &sec->symbol_list, list) {
 			list_del(&sym->list);
@@ -421,11 +423,6 @@ void elf_close(struct elf *elf)
 		list_del(&sec->list);
 		free(sec);
 	}
-	if (elf->name)
-		free(elf->name);
-	if (elf->fd > 0)
-		close(elf->fd);
-	if (elf->elf)
-		elf_end(elf->elf);
+
 	free(elf);
 }
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 731973e1a3f5..343968b778cb 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -37,10 +37,9 @@ struct section {
 	DECLARE_HASHTABLE(rela_hash, 16);
 	struct section *base, *rela;
 	struct symbol *sym;
-	Elf_Data *elf_data;
+	Elf_Data *data;
 	char *name;
 	int idx;
-	unsigned long data;
 	unsigned int len;
 };
 
@@ -86,6 +85,7 @@ struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
 struct symbol *find_containing_func(struct section *sec, unsigned long offset);
 void elf_close(struct elf *elf);
 
-
+#define for_each_sec(file, sec)						\
+	list_for_each_entry(sec, &file->elf->sections, list)
 
 #endif /* _OBJTOOL_ELF_H */
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index bff8abb3a4aa..84f001d52322 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -91,16 +91,16 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
 	alt->jump_or_nop = entry->jump_or_nop;
 
 	if (alt->group) {
-		alt->orig_len = *(unsigned char *)(sec->data + offset +
+		alt->orig_len = *(unsigned char *)(sec->data->d_buf + offset +
 						   entry->orig_len);
-		alt->new_len = *(unsigned char *)(sec->data + offset +
+		alt->new_len = *(unsigned char *)(sec->data->d_buf + offset +
 						  entry->new_len);
 	}
 
 	if (entry->feature) {
 		unsigned short feature;
 
-		feature = *(unsigned short *)(sec->data + offset +
+		feature = *(unsigned short *)(sec->data->d_buf + offset +
 					      entry->feature);
 
 		/*
diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h
index ac7e07523e84..afd9f7a05f6d 100644
--- a/tools/objtool/warn.h
+++ b/tools/objtool/warn.h
@@ -18,6 +18,13 @@
 #ifndef _WARN_H
 #define _WARN_H
 
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "elf.h"
+
 extern const char *objname;
 
 static inline char *offstr(struct section *sec, unsigned long offset)
@@ -57,4 +64,7 @@ static inline char *offstr(struct section *sec, unsigned long offset)
 	free(_str);					\
 })
 
+#define WARN_ELF(format, ...)				\
+	WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
+
 #endif /* _WARN_H */
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index b0b3007d3c9c..4b6cdbf8f935 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -108,6 +108,9 @@ approach is available to export the data to a postgresql database.  Refer to
 script export-to-postgresql.py for more details, and to script
 call-graph-from-postgresql.py for an example of using the database.
 
+There is also script intel-pt-events.py which provides an example of how to
+unpack the raw data for power events and PTWRITE.
+
 As mentioned above, it is easy to capture too much data.  One way to limit the
 data captured is to use 'snapshot' mode which is explained further below.
 Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
@@ -364,6 +367,42 @@ cyc_thresh	Specifies how frequently CYC packets are produced - see cyc
 
 		CYC packets are not requested by default.
 
+pt		Specifies pass-through which enables the 'branch' config term.
+
+		The default config selects 'pt' if it is available, so a user will
+		never need to specify this term.
+
+branch		Enable branch tracing.  Branch tracing is enabled by default so to
+		disable branch tracing use 'branch=0'.
+
+		The default config selects 'branch' if it is available.
+
+ptw		Enable PTWRITE packets which are produced when a ptwrite instruction
+		is executed.
+
+		Support for this feature is indicated by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/ptwrite
+
+		which contains "1" if the feature is supported and
+		"0" otherwise.
+
+fup_on_ptw	Enable a FUP packet to follow the PTWRITE packet.  The FUP packet
+		provides the address of the ptwrite instruction.  In the absence of
+		fup_on_ptw, the decoder will use the address of the previous branch
+		if branch tracing is enabled, otherwise the address will be zero.
+		Note that fup_on_ptw will work even when branch tracing is disabled.
+
+pwr_evt		Enable power events.  The power events provide information about
+		changes to the CPU C-state.
+
+		Support for this feature is indicated by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/power_event_trace
+
+		which contains "1" if the feature is supported and
+		"0" otherwise.
+
 
 new snapshot option
 -------------------
@@ -674,13 +713,15 @@ Having no option is the same as
 
 which, in turn, is the same as
 
-	--itrace=ibxe
+	--itrace=ibxwpe
 
 The letters are:
 
 	i	synthesize "instructions" events
 	b	synthesize "branches" events
 	x	synthesize "transactions" events
+	w	synthesize "ptwrite" events
+	p	synthesize "power" events
 	c	synthesize branches events (calls only)
 	r	synthesize branches events (returns only)
 	e	synthesize tracing error events
@@ -699,7 +740,40 @@ and "r" can be combined to get calls and returns.
 'flags' field can be used in perf script to determine whether the event is a
 tranasaction start, commit or abort.
 
-Error events are new.  They show where the decoder lost the trace.  Error events
+Note that "instructions", "branches" and "transactions" events depend on code
+flow packets which can be disabled by using the config term "branch=0".  Refer
+to the config terms section above.
+
+"ptwrite" events record the payload of the ptwrite instruction and whether
+"fup_on_ptw" was used.  "ptwrite" events depend on PTWRITE packets which are
+recorded only if the "ptw" config term was used.  Refer to the config terms
+section above.  perf script "synth" field displays "ptwrite" information like
+this: "ip: 0 payload: 0x123456789abcdef0"  where "ip" is 1 if "fup_on_ptw" was
+used.
+
+"Power" events correspond to power event packets and CBR (core-to-bus ratio)
+packets.  While CBR packets are always recorded when tracing is enabled, power
+event packets are recorded only if the "pwr_evt" config term was used.  Refer to
+the config terms section above.  The power events record information about
+C-state changes, whereas CBR is indicative of CPU frequency.  perf script
+"event,synth" fields display information like this:
+	cbr:  cbr: 22 freq: 2189 MHz (200%)
+	mwait:  hints: 0x60 extensions: 0x1
+	pwre:  hw: 0 cstate: 2 sub-cstate: 0
+	exstop:  ip: 1
+	pwrx:  deepest cstate: 2 last cstate: 2 wake reason: 0x4
+Where:
+	"cbr" includes the frequency and the percentage of maximum non-turbo
+	"mwait" shows mwait hints and extensions
+	"pwre" shows C-state transitions (to a C-state deeper than C0) and
+	whether	initiated by hardware
+	"exstop" indicates execution stopped and whether the IP was recorded
+	exactly,
+	"pwrx" indicates return to C0
+For more details refer to the Intel 64 and IA-32 Architectures Software
+Developer Manuals.
+
+Error events show where the decoder lost the trace.  Error events
 are quite important.  Users must know if what they are seeing is a complete
 picture or not.
 
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index e2a4c5e0dbe5..a3abe04c779d 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -3,13 +3,15 @@
 		c	synthesize branches events (calls only)
 		r	synthesize branches events (returns only)
 		x	synthesize transactions events
+		w	synthesize ptwrite events
+		p	synthesize power events
 		e	synthesize error events
 		d	create a debug log
 		g	synthesize a call chain (use with i or x)
 		l	synthesize last branch entries (use with i or x)
 		s       skip initial number of events
 
-	The default is all events i.e. the same as --itrace=ibxe
+	The default is all events i.e. the same as --itrace=ibxwpe
 
 	In addition, the period (default 100000) for instructions events
 	can be specified in units of:
@@ -26,8 +28,8 @@
 	Also the number of last branch entries (default 64, max. 1024) for
 	instructions or transactions events can be specified.
 
-	It is also possible to skip events generated (instructions, branches, transactions)
-	at the beginning. This is useful to ignore initialization code.
+	It is also possible to skip events generated (instructions, branches, transactions,
+	ptwrite, power) at the beginning. This is useful to ignore initialization code.
 
 	--itrace=i0nss1000000
 
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index 2da07e51e119..822414235170 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -76,7 +76,7 @@ REPORT OPTIONS
 
 -c::
 --coalesce::
-	Specify sorintg fields for single cacheline display.
+	Specify sorting fields for single cacheline display.
 	Following fields are available: tid,pid,iaddr,dso
 	(see COALESCE)
 
@@ -106,7 +106,7 @@ REPORT OPTIONS
 
 -d::
 --display::
-	Siwtch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
+	Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
 
 C2C RECORD
 ----------
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 6e6a8b22c859..721a447f046e 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -48,6 +48,39 @@ OPTIONS
 	Ranges of CPUs are specified with -: 0-2.
 	Default is to trace on all online CPUs.
 
+-T::
+--trace-funcs=::
+	Only trace functions given by the argument.  Multiple functions
+	can be given by using this option more than once.  The function
+	argument also can be a glob pattern.  It will be passed to
+	'set_ftrace_filter' in tracefs.
+
+-N::
+--notrace-funcs=::
+	Do not trace functions given by the argument.  Like -T option,
+	this can be used more than once to specify multiple functions
+	(or glob patterns).  It will be passed to 'set_ftrace_notrace'
+	in tracefs.
+
+-G::
+--graph-funcs=::
+	Set graph filter on the given function (or a glob pattern).
+	This is useful for the function_graph tracer only and enables
+	tracing for functions executed from the given function.
+	This can be used more than once to specify multiple functions.
+	It will be passed to 'set_graph_function' in tracefs.
+
+-g::
+--nograph-funcs=::
+	Set graph notrace filter on the given function (or a glob pattern).
+	Like -G option, this is useful for the function_graph tracer only
+	and disables tracing for function executed from the given function.
+	This can be used more than once to specify multiple functions.
+	It will be passed to 'set_graph_notrace' in tracefs.
+
+-D::
+--graph-depth=::
+	Set max depth for function graph tracer to follow
 
 SEE ALSO
 --------
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index e6c9902c6d82..165c2b1d4317 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -240,9 +240,13 @@ Add a probe on schedule() function 12th line with recording cpu local variable:
  or
  ./perf probe --add='schedule:12 cpu'
 
- this will add one or more probes which has the name start with "schedule".
+Add one or more probes which has the name start with "schedule".
 
- Add probes on lines in schedule() function which calls update_rq_clock().
+ ./perf probe schedule*
+ or
+ ./perf probe --add='schedule*'
+
+Add probes on lines in schedule() function which calls update_rq_clock().
 
  ./perf probe 'schedule;update_rq_clock*'
  or
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index ea3789d05e5e..b0e9e921d534 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -225,7 +225,7 @@ OPTIONS
 	the libunwind or libdw library) should be used instead.
 	Using the "lbr" method doesn't require any compiler options. It
 	will produce call graphs from the hardware LBR registers. The
-	main limition is that it is only available on new Intel
+	main limitation is that it is only available on new Intel
 	platforms, such as Haswell. It can only get user call chain. It
 	doesn't work with branch stack sampling at the same time.
 
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 37a175914157..9fa84617181e 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -182,7 +182,7 @@ OPTIONS
 --parent=<regex>::
         A regex filter to identify parent. The parent is a caller of this
 	function and searched through the callchain, thus it requires callchain
-	information recorded. The pattern is in the exteneded regex format and
+	information recorded. The pattern is in the extended regex format and
 	defaults to "\^sys_|^do_page_fault", see '--sort parent'.
 
 -x::
@@ -207,8 +207,8 @@ OPTIONS
 -g::
 --call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>::
         Display call chains using type, min percent threshold, print limit,
-	call order, sort key, optional branch and value.  Note that ordering of
-	parameters is not fixed so any parement can be given in an arbitraty order.
+	call order, sort key, optional branch and value.  Note that ordering
+	is not fixed so any parameter can be given in an arbitrary order.
 	One exception is the print_limit which should be preceded by threshold.
 
 	print_type can be either:
diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
index dfbb506d2c34..142606c0ec9c 100644
--- a/tools/perf/Documentation/perf-script-perl.txt
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -39,7 +39,7 @@ EVENT HANDLERS
 When perf script is invoked using a trace script, a user-defined
 'handler function' is called for each event in the trace.  If there's
 no handler function defined for a given event type, the event is
-ignored (or passed to a 'trace_handled' function, see below) and the
+ignored (or passed to a 'trace_unhandled' function, see below) and the
 next event is processed.
 
 Most of the event's field values are passed as arguments to the
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
index 54acba221558..51ec2d20068a 100644
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -149,10 +149,8 @@ def raw_syscalls__sys_enter(event_name, context, common_cpu,
 		print "id=%d, args=%s\n" % \
 		(id, args),
 
-def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs,
-		common_pid, common_comm):
-		print_header(event_name, common_cpu, common_secs, common_nsecs,
-		common_pid, common_comm)
+def trace_unhandled(event_name, context, event_fields_dict):
+		print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])
 
 def print_header(event_name, cpu, secs, nsecs, pid, comm):
 	print "%-20s %5u %05u.%09u %8u %-20s " % \
@@ -321,7 +319,7 @@ So those are the essential steps in writing and running a script.  The
 process can be generalized to any tracepoint or set of tracepoints
 you're interested in - basically find the tracepoint(s) you're
 interested in by looking at the list of available events shown by
-'perf list' and/or look in /sys/kernel/debug/tracing events for
+'perf list' and/or look in /sys/kernel/debug/tracing/events/ for
 detailed event and field info, record the corresponding trace data
 using 'perf record', passing it the list of interesting events,
 generate a skeleton script using 'perf script -g python' and modify the
@@ -334,7 +332,7 @@ right place, you can have your script listed alongside the other
 scripts listed by the 'perf script -l' command e.g.:
 
 ----
-root@tropicana:~# perf script -l
+# perf script -l
 List of available trace scripts:
   wakeup-latency                       system-wide min/max/avg wakeup latency
   rw-by-file <comm>                    r/w activity for a program, by file
@@ -383,8 +381,6 @@ source tree:
 
 ----
 # ls -al kernel-source/tools/perf/scripts/python
-
-root@tropicana:/home/trz/src/tip# ls -al tools/perf/scripts/python
 total 32
 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
@@ -399,7 +395,7 @@ otherwise your script won't show up at run-time), 'perf script -l'
 should show a new entry for your script:
 
 ----
-root@tropicana:~# perf script -l
+# perf script -l
 List of available trace scripts:
   wakeup-latency                       system-wide min/max/avg wakeup latency
   rw-by-file <comm>                    r/w activity for a program, by file
@@ -437,7 +433,7 @@ EVENT HANDLERS
 When perf script is invoked using a trace script, a user-defined
 'handler function' is called for each event in the trace.  If there's
 no handler function defined for a given event type, the event is
-ignored (or passed to a 'trace_handled' function, see below) and the
+ignored (or passed to a 'trace_unhandled' function, see below) and the
 next event is processed.
 
 Most of the event's field values are passed as arguments to the
@@ -532,7 +528,7 @@ can implement a set of optional functions:
 gives scripts a chance to do setup tasks:
 
 ----
-def trace_begin:
+def trace_begin():
     pass
 ----
 
@@ -541,7 +537,7 @@ def trace_begin:
  as display results:
 
 ----
-def trace_end:
+def trace_end():
     pass
 ----
 
@@ -550,8 +546,7 @@ def trace_end:
  of common arguments are passed into it:
 
 ----
-def trace_unhandled(event_name, context, common_cpu, common_secs,
-        common_nsecs, common_pid, common_comm):
+def trace_unhandled(event_name, context, event_fields_dict):
     pass
 ----
 
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index cb0eda3925e6..5ee8796be96e 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,8 +116,9 @@ OPTIONS
 --fields::
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
-        srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
-        callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw,
+        srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff,
+        callindent, insn, insnlen, synth.
+        Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
 
@@ -130,6 +131,14 @@ OPTIONS
 	i.e., the specified fields apply to all event types if the type string
 	is not given.
 
+	In addition to overriding fields, it is also possible to add or remove
+	fields from the defaults. For example
+
+		-F -cpu,+insn
+
+	removes the cpu field and adds the insn field. Adding/removing fields
+	cannot be mixed with normal overriding.
+
 	The arguments are processed in the order received. A later usage can
 	reset a prior request. e.g.:
 
@@ -185,6 +194,9 @@ OPTIONS
 	instruction bytes and the instruction length of the current
 	instruction.
 
+	The synth field is used by synthesized events which may be created when
+	Instruction Trace decoding.
+
 	Finally, a user may not set fields to none for all event types.
 	i.e., -F "" is not allowed.
 
@@ -203,6 +215,8 @@ OPTIONS
 	is printed. This is the full execution path leading to the sample. This is only supported when the
 	sample was recorded with perf record -b or -j any.
 
+	The brstackoff field will print an offset into a specific dso/binary.
+
 -k::
 --vmlinux=<file>::
         vmlinux pathname
@@ -311,6 +325,10 @@ include::itrace.txt[]
 	Set the maximum number of program blocks to print with brstackasm for
 	each sample.
 
+--inline::
+	If a callgraph address belongs to an inlined function, the inline stack
+	will be printed. Each entry has function name and file/line.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index bd0e4417f2be..698076313606 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -239,6 +239,20 @@ taskset.
 --no-merge::
 Do not merge results from same PMUs.
 
+--smi-cost::
+Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
+
+During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
+freeze core counters on SMI.
+The aperf counter will not be effected by the setting.
+The cost of SMI can be measured by (aperf - unhalted core cycles).
+
+In practice, the percentages of SMI cycles is very useful for performance
+oriented analysis. --metric_only will be applied by default.
+The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
+
+Users who wants to get the actual value can apply --no-metric-only.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index fa2a9132f0a9..de8b39dda7b8 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -270,7 +270,7 @@ When the event stream contains multiple events each event is identified
 by an ID. This can be either through the PERF_SAMPLE_ID or the
 PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is
 at a fixed offset from the event header, which allows reliable
-parsing of the header. Relying on ID may be ambigious.
+parsing of the header. Relying on ID may be ambiguous.
 IDENTIFIER is only supported by newer Linux kernels.
 
 Perf record specific events:
@@ -288,7 +288,7 @@ struct attr_event {
 	uint64_t id[];
 };
 
-	PERF_RECORD_HEADER_EVENT_TYPE		= 65, /* depreceated */
+	PERF_RECORD_HEADER_EVENT_TYPE		= 65, /* deprecated */
 
 #define MAX_EVENT_NAME 64
 
diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt
index 170b0289a7bc..db0ca3063eae 100644
--- a/tools/perf/Documentation/tips.txt
+++ b/tools/perf/Documentation/tips.txt
@@ -23,7 +23,7 @@ For memory address profiling, try: perf mem record / perf mem report
 For tracepoint events, try: perf report -s trace_fields
 To record callchains for each sample: perf record -g
 To record every process run by a user: perf record -u <user>
-Skip collecing build-id when recording: perf record -B
+Skip collecting build-id when recording: perf record -B
 To change sampling frequency to 100 Hz: perf record -F 100
 See assembly instructions with percentage: perf annotate <symbol>
 If you prefer Intel style assembly, try: perf annotate -M intel
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 8354d04b392f..bdf0e87f9b29 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -19,18 +19,18 @@ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
 
 include $(srctree)/tools/scripts/Makefile.arch
 
-$(call detected_var,ARCH)
+$(call detected_var,SRCARCH)
 
 NO_PERF_REGS := 1
 
 # Additional ARCH settings for ppc
-ifeq ($(ARCH),powerpc)
+ifeq ($(SRCARCH),powerpc)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
 endif
 
 # Additional ARCH settings for x86
-ifeq ($(ARCH),x86)
+ifeq ($(SRCARCH),x86)
   $(call detected,CONFIG_X86)
   ifeq (${IS_64_BIT}, 1)
     CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated
@@ -43,12 +43,12 @@ ifeq ($(ARCH),x86)
   NO_PERF_REGS := 0
 endif
 
-ifeq ($(ARCH),arm)
+ifeq ($(SRCARCH),arm)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS = -lunwind -lunwind-arm
 endif
 
-ifeq ($(ARCH),arm64)
+ifeq ($(SRCARCH),arm64)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
 endif
@@ -61,7 +61,7 @@ endif
 # Disable it on all other architectures in case libdw unwind
 # support is detected in system. Add supported architectures
 # to the check.
-ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
+ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc))
   NO_LIBDW_DWARF_UNWIND := 1
 endif
 
@@ -115,9 +115,9 @@ endif
 FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
 FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
 
-FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
+FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
 # include ARCH specific config
--include $(src-perf)/arch/$(ARCH)/Makefile
+-include $(src-perf)/arch/$(SRCARCH)/Makefile
 
 ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
   CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
@@ -228,12 +228,12 @@ ifeq ($(DEBUG),0)
 endif
 
 INC_FLAGS += -I$(src-perf)/util/include
-INC_FLAGS += -I$(src-perf)/arch/$(ARCH)/include
+INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
 INC_FLAGS += -I$(srctree)/tools/include/uapi
 INC_FLAGS += -I$(srctree)/tools/include/
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/
 
 # $(obj-perf)      for generated common-cmds.h
 # $(obj-perf)/util for generated bison/flex headers
@@ -355,7 +355,7 @@ ifndef NO_LIBELF
 
   ifndef NO_DWARF
     ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
-      msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
+      msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled);
       NO_DWARF := 1
     else
       CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
@@ -380,7 +380,7 @@ ifndef NO_LIBELF
         CFLAGS += -DHAVE_BPF_PROLOGUE
         $(call detected,CONFIG_BPF_PROLOGUE)
       else
-        msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset());
+        msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset());
       endif
     else
       msg := $(warning DWARF support is off, BPF prologue is disabled);
@@ -406,7 +406,7 @@ ifdef PERF_HAVE_JITDUMP
   endif
 endif
 
-ifeq ($(ARCH),powerpc)
+ifeq ($(SRCARCH),powerpc)
   ifndef NO_DWARF
     CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
   endif
@@ -487,7 +487,7 @@ else
 endif
 
 ifndef NO_LOCAL_LIBUNWIND
-  ifeq ($(ARCH),$(filter $(ARCH),arm arm64))
+  ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64))
     $(call feature_check,libunwind-debug-frame)
     ifneq ($(feature-libunwind-debug-frame), 1)
       msg := $(warning No debug_frame support found in libunwind);
@@ -740,7 +740,7 @@ ifeq (${IS_64_BIT}, 1)
       NO_PERF_READ_VDSO32 := 1
     endif
   endif
-  ifneq ($(ARCH), x86)
+  ifneq ($(SRCARCH), x86)
     NO_PERF_READ_VDSOX32 := 1
   endif
   ifndef NO_PERF_READ_VDSOX32
@@ -769,7 +769,7 @@ ifdef LIBBABELTRACE
 endif
 
 ifndef NO_AUXTRACE
-  ifeq ($(ARCH),x86)
+  ifeq ($(SRCARCH),x86)
     ifeq ($(feature-get_cpuid), 0)
       msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
       NO_AUXTRACE := 1
@@ -872,7 +872,7 @@ sysconfdir = $(prefix)/etc
 ETC_PERFCONFIG = etc/perfconfig
 endif
 ifndef lib
-ifeq ($(ARCH)$(IS_64_BIT), x861)
+ifeq ($(SRCARCH)$(IS_64_BIT), x861)
 lib = lib64
 else
 lib = lib
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 79fe31f20a17..5008f51a08a2 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -226,7 +226,7 @@ endif
 
 ifeq ($(config),0)
 include $(srctree)/tools/scripts/Makefile.arch
--include arch/$(ARCH)/Makefile
+-include arch/$(SRCARCH)/Makefile
 endif
 
 # The FEATURE_DUMP_EXPORT holds location of the actual
diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build
index 109eb75cf7de..d9b6af837c7d 100644
--- a/tools/perf/arch/Build
+++ b/tools/perf/arch/Build
@@ -1,2 +1,2 @@
 libperf-y += common.o
-libperf-y += $(ARCH)/
+libperf-y += $(SRCARCH)/
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 29361d9b635a..7ce3d1a25133 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -17,6 +17,7 @@
 
 #include <api/fs/fs.h>
 #include <linux/bitops.h>
+#include <linux/compiler.h>
 #include <linux/coresight-pmu.h>
 #include <linux/kernel.h>
 #include <linux/log2.h>
@@ -202,19 +203,18 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
 		pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME,
 			  opts->auxtrace_snapshot_size);
 
-	if (cs_etm_evsel) {
-		/*
-		 * To obtain the auxtrace buffer file descriptor, the auxtrace
-		 * event must come first.
-		 */
-		perf_evlist__to_front(evlist, cs_etm_evsel);
-		/*
-		 * In the case of per-cpu mmaps, we need the CPU on the
-		 * AUX event.
-		 */
-		if (!cpu_map__empty(cpus))
-			perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
-	}
+	/*
+	 * To obtain the auxtrace buffer file descriptor, the auxtrace
+	 * event must come first.
+	 */
+	perf_evlist__to_front(evlist, cs_etm_evsel);
+
+	/*
+	 * In the case of per-cpu mmaps, we need the CPU on the
+	 * AUX event.
+	 */
+	if (!cpu_map__empty(cpus))
+		perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
 
 	/* Add dummy event to keep tracking */
 	if (opts->full_auxtrace) {
@@ -583,8 +583,7 @@ static FILE *cs_device__open_file(const char *name)
 
 }
 
-static __attribute__((format(printf, 2, 3)))
-int cs_device__print_file(const char *name, const char *fmt, ...)
+static int __printf(2, 3) cs_device__print_file(const char *name, const char *fmt, ...)
 {
 	va_list args;
 	FILE *file;
diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c
index 44eafd6f2d50..8f1908756cb6 100644
--- a/tools/perf/arch/arm64/annotate/instructions.c
+++ b/tools/perf/arch/arm64/annotate/instructions.c
@@ -50,7 +50,7 @@ static int arm64__annotate_init(struct arch *arch)
 	arch->initialized = true;
 	arch->priv	  = arm;
 	arch->associate_instruction_ops   = arm64__associate_instruction_ops;
-	arch->objdump.comment_char	  = ';';
+	arch->objdump.comment_char	  = '/';
 	arch->objdump.skip_functions_char = '+';
 	return 0;
 
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index 837067f48a4c..6b40e9f01740 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -26,6 +26,7 @@ const char *const arm64_triplets[] = {
 
 const char *const powerpc_triplets[] = {
 	"powerpc-unknown-linux-gnu-",
+	"powerpc-linux-gnu-",
 	"powerpc64-unknown-linux-gnu-",
 	"powerpc64-linux-gnu-",
 	"powerpc64le-linux-gnu-",
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 90ad64b231cd..2e6595310420 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -5,4 +5,6 @@ libperf-y += perf_regs.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
+
 libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index 39dbe512b9fc..bf9a2594572c 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -52,6 +52,18 @@ int arch__compare_symbol_names(const char *namea, const char *nameb)
 
 	return strcmp(namea, nameb);
 }
+
+int arch__compare_symbol_names_n(const char *namea, const char *nameb,
+				 unsigned int n)
+{
+	/* Skip over initial dot */
+	if (*namea == '.')
+		namea++;
+	if (*nameb == '.')
+		nameb++;
+
+	return strncmp(namea, nameb, n);
+}
 #endif
 
 #if defined(_CALL_ELF) && _CALL_ELF == 2
diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c
new file mode 100644
index 000000000000..3a24b3c43273
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/unwind-libdw.c
@@ -0,0 +1,73 @@
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+
+/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils.  */
+static const int special_regs[3][2] = {
+	{ 65, PERF_REG_POWERPC_LINK },
+	{ 101, PERF_REG_POWERPC_XER },
+	{ 109, PERF_REG_POWERPC_CTR },
+};
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct regs_dump *user_regs = &ui->sample->user_regs;
+	Dwarf_Word dwarf_regs[32], dwarf_nip;
+	size_t i;
+
+#define REG(r) ({						\
+	Dwarf_Word val = 0;					\
+	perf_reg_value(&val, user_regs, PERF_REG_POWERPC_##r);	\
+	val;							\
+})
+
+	dwarf_regs[0]  = REG(R0);
+	dwarf_regs[1]  = REG(R1);
+	dwarf_regs[2]  = REG(R2);
+	dwarf_regs[3]  = REG(R3);
+	dwarf_regs[4]  = REG(R4);
+	dwarf_regs[5]  = REG(R5);
+	dwarf_regs[6]  = REG(R6);
+	dwarf_regs[7]  = REG(R7);
+	dwarf_regs[8]  = REG(R8);
+	dwarf_regs[9]  = REG(R9);
+	dwarf_regs[10] = REG(R10);
+	dwarf_regs[11] = REG(R11);
+	dwarf_regs[12] = REG(R12);
+	dwarf_regs[13] = REG(R13);
+	dwarf_regs[14] = REG(R14);
+	dwarf_regs[15] = REG(R15);
+	dwarf_regs[16] = REG(R16);
+	dwarf_regs[17] = REG(R17);
+	dwarf_regs[18] = REG(R18);
+	dwarf_regs[19] = REG(R19);
+	dwarf_regs[20] = REG(R20);
+	dwarf_regs[21] = REG(R21);
+	dwarf_regs[22] = REG(R22);
+	dwarf_regs[23] = REG(R23);
+	dwarf_regs[24] = REG(R24);
+	dwarf_regs[25] = REG(R25);
+	dwarf_regs[26] = REG(R26);
+	dwarf_regs[27] = REG(R27);
+	dwarf_regs[28] = REG(R28);
+	dwarf_regs[29] = REG(R29);
+	dwarf_regs[30] = REG(R30);
+	dwarf_regs[31] = REG(R31);
+	if (!dwfl_thread_state_registers(thread, 0, 32, dwarf_regs))
+		return false;
+
+	dwarf_nip = REG(NIP);
+	dwfl_thread_state_register_pc(thread, dwarf_nip);
+	for (i = 0; i < ARRAY_SIZE(special_regs); i++) {
+		Dwarf_Word val = 0;
+		perf_reg_value(&val, user_regs, special_regs[i][1]);
+		if (!dwfl_thread_state_registers(thread,
+						 special_regs[i][0], 1,
+						 &val))
+			return false;
+	}
+
+	return true;
+}
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
index 0f196eec9f48..3cbf6fad169f 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@@ -1664,3 +1664,15 @@
 "0f c7 1d 78 56 34 12 \txrstors 0x12345678",},
 {{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
 "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20          \tptwritel (%eax)",},
+{{0xf3, 0x0f, 0xae, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f ae 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20          \tptwritel (%eax)",},
+{{0xf3, 0x0f, 0xae, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f ae 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%eax,%ecx,8)",},
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
index af25bc8240d0..aa512fa944dd 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -1696,3 +1696,33 @@
 "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",},
 {{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
 "41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20          \tptwritel (%rax)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 41 0f ae 20       \tptwritel (%r8)",},
+{{0xf3, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae 24 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 41 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20          \tptwritel (%rax)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 41 0f ae 20       \tptwritel (%r8)",},
+{{0xf3, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae 24 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 41 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x48, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 48 0f ae 20       \tptwriteq (%rax)",},
+{{0xf3, 0x49, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 49 0f ae 20       \tptwriteq (%r8)",},
+{{0xf3, 0x48, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 48 0f ae 24 25 78 56 34 12 \tptwriteq 0x12345678",},
+{{0xf3, 0x48, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 48 0f ae a4 c8 78 56 34 12 \tptwriteq 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x49, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 49 0f ae a4 c8 78 56 34 12 \tptwriteq 0x12345678(%r8,%rcx,8)",},
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index 979487dae8d4..6cdb65d25b79 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -1343,6 +1343,26 @@ int main(void)
 	asm volatile("xrstors 0x12345678(%rax,%rcx,8)");
 	asm volatile("xrstors 0x12345678(%r8,%rcx,8)");
 
+	/* ptwrite */
+
+	asm volatile("ptwrite (%rax)");
+	asm volatile("ptwrite (%r8)");
+	asm volatile("ptwrite (0x12345678)");
+	asm volatile("ptwrite 0x12345678(%rax,%rcx,8)");
+	asm volatile("ptwrite 0x12345678(%r8,%rcx,8)");
+
+	asm volatile("ptwritel (%rax)");
+	asm volatile("ptwritel (%r8)");
+	asm volatile("ptwritel (0x12345678)");
+	asm volatile("ptwritel 0x12345678(%rax,%rcx,8)");
+	asm volatile("ptwritel 0x12345678(%r8,%rcx,8)");
+
+	asm volatile("ptwriteq (%rax)");
+	asm volatile("ptwriteq (%r8)");
+	asm volatile("ptwriteq (0x12345678)");
+	asm volatile("ptwriteq 0x12345678(%rax,%rcx,8)");
+	asm volatile("ptwriteq 0x12345678(%r8,%rcx,8)");
+
 #else  /* #ifdef __x86_64__ */
 
 	/* bound r32, mem (same op code as EVEX prefix) */
@@ -2653,6 +2673,16 @@ int main(void)
 	asm volatile("xrstors (0x12345678)");
 	asm volatile("xrstors 0x12345678(%eax,%ecx,8)");
 
+	/* ptwrite */
+
+	asm volatile("ptwrite (%eax)");
+	asm volatile("ptwrite (0x12345678)");
+	asm volatile("ptwrite 0x12345678(%eax,%ecx,8)");
+
+	asm volatile("ptwritel (%eax)");
+	asm volatile("ptwritel (0x12345678)");
+	asm volatile("ptwritel 0x12345678(%eax,%ecx,8)");
+
 #endif /* #ifndef __x86_64__ */
 
 	/* Following line is a marker for the awk script - do not change */
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index af2bce7a2cd6..781df40b2966 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -35,10 +35,6 @@
 #define KiB_MASK(x) (KiB(x) - 1)
 #define MiB_MASK(x) (MiB(x) - 1)
 
-#define INTEL_BTS_DFLT_SAMPLE_SIZE	KiB(4)
-
-#define INTEL_BTS_MAX_SAMPLE_SIZE	KiB(60)
-
 struct intel_bts_snapshot_ref {
 	void	*ref_buf;
 	size_t	ref_offset;
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index f630de0206a1..9535be57033f 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -40,10 +40,6 @@
 #define KiB_MASK(x) (KiB(x) - 1)
 #define MiB_MASK(x) (MiB(x) - 1)
 
-#define INTEL_PT_DEFAULT_SAMPLE_SIZE	KiB(4)
-
-#define INTEL_PT_MAX_SAMPLE_SIZE	KiB(60)
-
 #define INTEL_PT_PSB_PERIOD_NEAR	256
 
 struct intel_pt_snapshot_ref {
@@ -196,6 +192,7 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
 	int psb_cyc, psb_periods, psb_period;
 	int pos = 0;
 	u64 config;
+	char c;
 
 	pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
 
@@ -229,6 +226,10 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
 		}
 	}
 
+	if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
+	    perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1)
+		pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch");
+
 	pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
 
 	intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 27de0c8c5c19..469d65b21122 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -700,7 +700,7 @@ static inline uint32_t lfsr_32(uint32_t lfsr)
  * kernel (KSM, zero page, etc.) cannot optimize away RAM
  * accesses:
  */
-static inline u64 access_data(u64 *data __attribute__((unused)), u64 val)
+static inline u64 access_data(u64 *data, u64 val)
 {
 	if (g->p.data_reads)
 		val += *data;
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 64b44e81c771..9eba7f1add1f 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -49,19 +49,22 @@ static bool same_kallsyms_reloc(const char *from_dir, char *to_dir)
 	char to[PATH_MAX];
 	const char *name;
 	u64 addr1 = 0, addr2 = 0;
-	int i;
+	int i, err = -1;
 
 	scnprintf(from, sizeof(from), "%s/kallsyms", from_dir);
 	scnprintf(to, sizeof(to), "%s/kallsyms", to_dir);
 
 	for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
-		addr1 = kallsyms__get_function_start(from, name);
-		if (addr1)
+		err = kallsyms__get_function_start(from, name, &addr1);
+		if (!err)
 			break;
 	}
 
-	if (name)
-		addr2 = kallsyms__get_function_start(to, name);
+	if (err)
+		return false;
+
+	if (kallsyms__get_function_start(to, name, &addr2))
+		return false;
 
 	return addr1 == addr2;
 }
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index e33b4acece90..475999e48f66 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -27,6 +27,7 @@
 #include "tool.h"
 #include "data.h"
 #include "sort.h"
+#include "event.h"
 #include "evlist.h"
 #include "evsel.h"
 #include <asm/bug.h>
@@ -1724,10 +1725,10 @@ static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name)
 				tok; tok = strtok_r(NULL, ", ", &tmp)) {	\
 			ret = _fn(hpp_list, tok);				\
 			if (ret == -EINVAL) {					\
-				error("Invalid --fields key: `%s'", tok);	\
+				pr_err("Invalid --fields key: `%s'", tok);	\
 				break;						\
 			} else if (ret == -ESRCH) {				\
-				error("Unknown --fields key: `%s'", tok);	\
+				pr_err("Unknown --fields key: `%s'", tok);	\
 				break;						\
 			}							\
 		}								\
diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index 55f04f85b049..ece45582a48d 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c
@@ -156,9 +156,10 @@ static int parse_config_arg(char *arg, char **var, char **value)
 
 int cmd_config(int argc, const char **argv)
 {
-	int i, ret = 0;
+	int i, ret = -1;
 	struct perf_config_set *set;
 	char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
+	const char *config_filename;
 
 	argc = parse_options(argc, argv, config_options, config_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
@@ -175,15 +176,18 @@ int cmd_config(int argc, const char **argv)
 	else if (use_user_config)
 		config_exclusive_filename = user_config;
 
+	if (!config_exclusive_filename)
+		config_filename = user_config;
+	else
+		config_filename = config_exclusive_filename;
+
 	/*
 	 * At only 'config' sub-command, individually use the config set
 	 * because of reinitializing with options config file location.
 	 */
 	set = perf_config_set__new();
-	if (!set) {
-		ret = -1;
+	if (!set)
 		goto out_err;
-	}
 
 	switch (actions) {
 	case ACTION_LIST:
@@ -191,50 +195,54 @@ int cmd_config(int argc, const char **argv)
 			pr_err("Error: takes no arguments\n");
 			parse_options_usage(config_usage, config_options, "l", 1);
 		} else {
-			ret = show_config(set);
-			if (ret < 0) {
-				const char * config_filename = config_exclusive_filename;
-				if (!config_exclusive_filename)
-					config_filename = user_config;
+			if (show_config(set) < 0) {
 				pr_err("Nothing configured, "
 				       "please check your %s \n", config_filename);
+				goto out_err;
 			}
 		}
 		break;
 	default:
-		if (argc) {
-			for (i = 0; argv[i]; i++) {
-				char *var, *value;
-				char *arg = strdup(argv[i]);
-
-				if (!arg) {
-					pr_err("%s: strdup failed\n", __func__);
-					ret = -1;
-					break;
-				}
+		if (!argc) {
+			usage_with_options(config_usage, config_options);
+			break;
+		}
 
-				if (parse_config_arg(arg, &var, &value) < 0) {
-					free(arg);
-					ret = -1;
-					break;
-				}
+		for (i = 0; argv[i]; i++) {
+			char *var, *value;
+			char *arg = strdup(argv[i]);
 
-				if (value == NULL)
-					ret = show_spec_config(set, var);
-				else {
-					const char *config_filename = config_exclusive_filename;
+			if (!arg) {
+				pr_err("%s: strdup failed\n", __func__);
+				goto out_err;
+			}
 
-					if (!config_exclusive_filename)
-						config_filename = user_config;
-					ret = set_config(set, config_filename, var, value);
-				}
+			if (parse_config_arg(arg, &var, &value) < 0) {
 				free(arg);
+				goto out_err;
 			}
-		} else
-			usage_with_options(config_usage, config_options);
+
+			if (value == NULL) {
+				if (show_spec_config(set, var) < 0) {
+					pr_err("%s is not configured: %s\n",
+					       var, config_filename);
+					free(arg);
+					goto out_err;
+				}
+			} else {
+				if (set_config(set, config_filename, var, value) < 0) {
+					pr_err("Failed to set '%s=%s' on %s\n",
+					       var, value, config_filename);
+					free(arg);
+					goto out_err;
+				}
+			}
+			free(arg);
+		}
 	}
 
-	perf_config_set__delete(set);
+	ret = 0;
 out_err:
+	perf_config_set__delete(set);
 	return ret;
 }
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index eec5df80f5a3..0cd4cf6a344b 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1302,7 +1302,10 @@ static int diff__config(const char *var, const char *value,
 			void *cb __maybe_unused)
 {
 	if (!strcmp(var, "diff.order")) {
-		sort_compute = perf_config_int(var, value);
+		int ret;
+		if (perf_config_int(&ret, var, value) < 0)
+			return -1;
+		sort_compute = ret;
 		return 0;
 	}
 	if (!strcmp(var, "diff.compute")) {
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 9e0b35cd0eea..dd26c62c9893 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -28,9 +28,19 @@
 #define DEFAULT_TRACER  "function_graph"
 
 struct perf_ftrace {
-	struct perf_evlist *evlist;
-	struct target target;
-	const char *tracer;
+	struct perf_evlist	*evlist;
+	struct target		target;
+	const char		*tracer;
+	struct list_head	filters;
+	struct list_head	notrace;
+	struct list_head	graph_funcs;
+	struct list_head	nograph_funcs;
+	int			graph_depth;
+};
+
+struct filter_entry {
+	struct list_head	list;
+	char			name[];
 };
 
 static bool done;
@@ -61,6 +71,7 @@ static int __write_tracing_file(const char *name, const char *val, bool append)
 	int fd, ret = -1;
 	ssize_t size = strlen(val);
 	int flags = O_WRONLY;
+	char errbuf[512];
 
 	file = get_tracing_file(name);
 	if (!file) {
@@ -75,14 +86,16 @@ static int __write_tracing_file(const char *name, const char *val, bool append)
 
 	fd = open(file, flags);
 	if (fd < 0) {
-		pr_debug("cannot open tracing file: %s\n", name);
+		pr_debug("cannot open tracing file: %s: %s\n",
+			 name, str_error_r(errno, errbuf, sizeof(errbuf)));
 		goto out;
 	}
 
 	if (write(fd, val, size) == size)
 		ret = 0;
 	else
-		pr_debug("write '%s' to tracing/%s failed\n", val, name);
+		pr_debug("write '%s' to tracing/%s failed: %s\n",
+			 val, name, str_error_r(errno, errbuf, sizeof(errbuf)));
 
 	close(fd);
 out:
@@ -101,6 +114,7 @@ static int append_tracing_file(const char *name, const char *val)
 }
 
 static int reset_tracing_cpu(void);
+static void reset_tracing_filters(void);
 
 static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
 {
@@ -116,6 +130,10 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
 	if (reset_tracing_cpu() < 0)
 		return -1;
 
+	if (write_tracing_file("max_graph_depth", "0") < 0)
+		return -1;
+
+	reset_tracing_filters();
 	return 0;
 }
 
@@ -181,6 +199,68 @@ static int reset_tracing_cpu(void)
 	return ret;
 }
 
+static int __set_tracing_filter(const char *filter_file, struct list_head *funcs)
+{
+	struct filter_entry *pos;
+
+	list_for_each_entry(pos, funcs, list) {
+		if (append_tracing_file(filter_file, pos->name) < 0)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int set_tracing_filters(struct perf_ftrace *ftrace)
+{
+	int ret;
+
+	ret = __set_tracing_filter("set_ftrace_filter", &ftrace->filters);
+	if (ret < 0)
+		return ret;
+
+	ret = __set_tracing_filter("set_ftrace_notrace", &ftrace->notrace);
+	if (ret < 0)
+		return ret;
+
+	ret = __set_tracing_filter("set_graph_function", &ftrace->graph_funcs);
+	if (ret < 0)
+		return ret;
+
+	/* old kernels do not have this filter */
+	__set_tracing_filter("set_graph_notrace", &ftrace->nograph_funcs);
+
+	return ret;
+}
+
+static void reset_tracing_filters(void)
+{
+	write_tracing_file("set_ftrace_filter", " ");
+	write_tracing_file("set_ftrace_notrace", " ");
+	write_tracing_file("set_graph_function", " ");
+	write_tracing_file("set_graph_notrace", " ");
+}
+
+static int set_tracing_depth(struct perf_ftrace *ftrace)
+{
+	char buf[16];
+
+	if (ftrace->graph_depth == 0)
+		return 0;
+
+	if (ftrace->graph_depth < 0) {
+		pr_err("invalid graph depth: %d\n", ftrace->graph_depth);
+		return -1;
+	}
+
+	snprintf(buf, sizeof(buf), "%d", ftrace->graph_depth);
+
+	if (write_tracing_file("max_graph_depth", buf) < 0)
+		return -1;
+
+	return 0;
+}
+
 static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
 {
 	char *trace_file;
@@ -223,11 +303,23 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
 		goto out_reset;
 	}
 
+	if (set_tracing_filters(ftrace) < 0) {
+		pr_err("failed to set tracing filters\n");
+		goto out_reset;
+	}
+
+	if (set_tracing_depth(ftrace) < 0) {
+		pr_err("failed to set graph depth\n");
+		goto out_reset;
+	}
+
 	if (write_tracing_file("current_tracer", ftrace->tracer) < 0) {
 		pr_err("failed to set current_tracer to %s\n", ftrace->tracer);
 		goto out_reset;
 	}
 
+	setup_pager();
+
 	trace_file = get_tracing_file("trace_pipe");
 	if (!trace_file) {
 		pr_err("failed to open trace_pipe\n");
@@ -251,8 +343,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
 		goto out_close_fd;
 	}
 
-	setup_pager();
-
 	perf_evlist__start_workload(ftrace->evlist);
 
 	while (!done) {
@@ -307,6 +397,32 @@ static int perf_ftrace_config(const char *var, const char *value, void *cb)
 	return -1;
 }
 
+static int parse_filter_func(const struct option *opt, const char *str,
+			     int unset __maybe_unused)
+{
+	struct list_head *head = opt->value;
+	struct filter_entry *entry;
+
+	entry = malloc(sizeof(*entry) + strlen(str) + 1);
+	if (entry == NULL)
+		return -ENOMEM;
+
+	strcpy(entry->name, str);
+	list_add_tail(&entry->list, head);
+
+	return 0;
+}
+
+static void delete_filter_func(struct list_head *head)
+{
+	struct filter_entry *pos, *tmp;
+
+	list_for_each_entry_safe(pos, tmp, head, list) {
+		list_del(&pos->list);
+		free(pos);
+	}
+}
+
 int cmd_ftrace(int argc, const char **argv)
 {
 	int ret;
@@ -330,9 +446,24 @@ int cmd_ftrace(int argc, const char **argv)
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
 		    "list of cpus to monitor"),
+	OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
+		     "trace given functions only", parse_filter_func),
+	OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func",
+		     "do not trace given functions", parse_filter_func),
+	OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func",
+		     "Set graph filter on given functions", parse_filter_func),
+	OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func",
+		     "Set nograph filter on given functions", parse_filter_func),
+	OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth,
+		    "Max depth for function graph tracer"),
 	OPT_END()
 	};
 
+	INIT_LIST_HEAD(&ftrace.filters);
+	INIT_LIST_HEAD(&ftrace.notrace);
+	INIT_LIST_HEAD(&ftrace.graph_funcs);
+	INIT_LIST_HEAD(&ftrace.nograph_funcs);
+
 	ret = perf_config(perf_ftrace_config, &ftrace);
 	if (ret < 0)
 		return -1;
@@ -348,12 +479,14 @@ int cmd_ftrace(int argc, const char **argv)
 
 		target__strerror(&ftrace.target, ret, errbuf, 512);
 		pr_err("%s\n", errbuf);
-		return -EINVAL;
+		goto out_delete_filters;
 	}
 
 	ftrace.evlist = perf_evlist__new();
-	if (ftrace.evlist == NULL)
-		return -ENOMEM;
+	if (ftrace.evlist == NULL) {
+		ret = -ENOMEM;
+		goto out_delete_filters;
+	}
 
 	ret = perf_evlist__create_maps(ftrace.evlist, &ftrace.target);
 	if (ret < 0)
@@ -364,5 +497,11 @@ int cmd_ftrace(int argc, const char **argv)
 out_delete_evlist:
 	perf_evlist__delete(ftrace.evlist);
 
+out_delete_filters:
+	delete_filter_func(&ftrace.filters);
+	delete_filter_func(&ftrace.notrace);
+	delete_filter_func(&ftrace.graph_funcs);
+	delete_filter_func(&ftrace.nograph_funcs);
+
 	return ret;
 }
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 492f8e14ab09..530a7f2fa0f3 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -108,10 +108,14 @@ out:
 	return ret;
 }
 
-static void exec_woman_emacs(const char *path, const char *page)
+static void exec_failed(const char *cmd)
 {
 	char sbuf[STRERR_BUFSIZE];
+	pr_warning("failed to exec '%s': %s", cmd, str_error_r(errno, sbuf, sizeof(sbuf)));
+}
 
+static void exec_woman_emacs(const char *path, const char *page)
+{
 	if (!check_emacsclient_version()) {
 		/* This works only with emacsclient version >= 22. */
 		char *man_page;
@@ -122,8 +126,7 @@ static void exec_woman_emacs(const char *path, const char *page)
 			execlp(path, "emacsclient", "-e", man_page, NULL);
 			free(man_page);
 		}
-		warning("failed to exec '%s': %s", path,
-			str_error_r(errno, sbuf, sizeof(sbuf)));
+		exec_failed(path);
 	}
 }
 
@@ -134,7 +137,6 @@ static void exec_man_konqueror(const char *path, const char *page)
 	if (display && *display) {
 		char *man_page;
 		const char *filename = "kfmclient";
-		char sbuf[STRERR_BUFSIZE];
 
 		/* It's simpler to launch konqueror using kfmclient. */
 		if (path) {
@@ -155,33 +157,27 @@ static void exec_man_konqueror(const char *path, const char *page)
 			execlp(path, filename, "newTab", man_page, NULL);
 			free(man_page);
 		}
-		warning("failed to exec '%s': %s", path,
-			str_error_r(errno, sbuf, sizeof(sbuf)));
+		exec_failed(path);
 	}
 }
 
 static void exec_man_man(const char *path, const char *page)
 {
-	char sbuf[STRERR_BUFSIZE];
-
 	if (!path)
 		path = "man";
 	execlp(path, "man", page, NULL);
-	warning("failed to exec '%s': %s", path,
-		str_error_r(errno, sbuf, sizeof(sbuf)));
+	exec_failed(path);
 }
 
 static void exec_man_cmd(const char *cmd, const char *page)
 {
-	char sbuf[STRERR_BUFSIZE];
 	char *shell_cmd;
 
 	if (asprintf(&shell_cmd, "%s %s", cmd, page) > 0) {
 		execl("/bin/sh", "sh", "-c", shell_cmd, NULL);
 		free(shell_cmd);
 	}
-	warning("failed to exec '%s': %s", cmd,
-		str_error_r(errno, sbuf, sizeof(sbuf)));
+	exec_failed(cmd);
 }
 
 static void add_man_viewer(const char *name)
@@ -214,6 +210,12 @@ static void do_add_man_viewer_info(const char *name,
 	man_viewer_info_list = new;
 }
 
+static void unsupported_man_viewer(const char *name, const char *var)
+{
+	pr_warning("'%s': path for unsupported man viewer.\n"
+		   "Please consider using 'man.<tool>.%s' instead.", name, var);
+}
+
 static int add_man_viewer_path(const char *name,
 			       size_t len,
 			       const char *value)
@@ -221,9 +223,7 @@ static int add_man_viewer_path(const char *name,
 	if (supported_man_viewer(name, len))
 		do_add_man_viewer_info(name, len, value);
 	else
-		warning("'%s': path for unsupported man viewer.\n"
-			"Please consider using 'man.<tool>.cmd' instead.",
-			name);
+		unsupported_man_viewer(name, "cmd");
 
 	return 0;
 }
@@ -233,9 +233,7 @@ static int add_man_viewer_cmd(const char *name,
 			      const char *value)
 {
 	if (supported_man_viewer(name, len))
-		warning("'%s': cmd for supported man viewer.\n"
-			"Please consider using 'man.<tool>.path' instead.",
-			name);
+		unsupported_man_viewer(name, "path");
 	else
 		do_add_man_viewer_info(name, len, value);
 
@@ -247,8 +245,10 @@ static int add_man_viewer_info(const char *var, const char *value)
 	const char *name = var + 4;
 	const char *subkey = strrchr(name, '.');
 
-	if (!subkey)
-		return error("Config with no key for man viewer: %s", name);
+	if (!subkey) {
+		pr_err("Config with no key for man viewer: %s", name);
+		return -1;
+	}
 
 	if (!strcmp(subkey, ".path")) {
 		if (!value)
@@ -261,7 +261,7 @@ static int add_man_viewer_info(const char *var, const char *value)
 		return add_man_viewer_cmd(name, subkey - name, value);
 	}
 
-	warning("'%s': unsupported man viewer sub key.", subkey);
+	pr_warning("'%s': unsupported man viewer sub key.", subkey);
 	return 0;
 }
 
@@ -332,7 +332,7 @@ static void setup_man_path(void)
 		setenv("MANPATH", new_path, 1);
 		free(new_path);
 	} else {
-		error("Unable to setup man path");
+		pr_err("Unable to setup man path");
 	}
 }
 
@@ -349,7 +349,7 @@ static void exec_viewer(const char *name, const char *page)
 	else if (info)
 		exec_man_cmd(info, page);
 	else
-		warning("'%s': unknown man viewer.", name);
+		pr_warning("'%s': unknown man viewer.", name);
 }
 
 static int show_man_page(const char *perf_cmd)
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 9409c9464667..0a8a1c45af87 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1715,7 +1715,7 @@ static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
 		if (!tok)
 			break;
 		if (slab_sort_dimension__add(tok, sort_list) < 0) {
-			error("Unknown slab --sort key: '%s'", tok);
+			pr_err("Unknown slab --sort key: '%s'", tok);
 			free(str);
 			return -1;
 		}
@@ -1741,7 +1741,7 @@ static int setup_page_sorting(struct list_head *sort_list, const char *arg)
 		if (!tok)
 			break;
 		if (page_sort_dimension__add(tok, sort_list) < 0) {
-			error("Unknown page --sort key: '%s'", tok);
+			pr_err("Unknown page --sort key: '%s'", tok);
 			free(str);
 			return -1;
 		}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ee7d0a82ccd0..17a14bcce34a 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -453,7 +453,7 @@ try_again:
 	}
 
 	if (perf_evlist__apply_filters(evlist, &pos)) {
-		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
+		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
 			pos->filter, perf_evsel__name(pos), errno,
 			str_error_r(errno, msg, sizeof(msg)));
 		rc = -1;
@@ -461,7 +461,7 @@ try_again:
 	}
 
 	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
-		error("failed to set config \"%s\" on event %s with %d (%s)\n",
+		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
 		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
 		      str_error_r(errno, msg, sizeof(msg)));
 		rc = -1;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 22478ff2b706..79a33eb1a10d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -94,10 +94,9 @@ static int report__config(const char *var, const char *value, void *cb)
 		symbol_conf.cumulate_callchain = perf_config_bool(var, value);
 		return 0;
 	}
-	if (!strcmp(var, "report.queue-size")) {
-		rep->queue_size = perf_config_u64(var, value);
-		return 0;
-	}
+	if (!strcmp(var, "report.queue-size"))
+		return perf_config_u64(&rep->queue_size, var, value);
+
 	if (!strcmp(var, "report.sort_order")) {
 		default_sort_order = strdup(value);
 		return 0;
@@ -558,6 +557,7 @@ static int __cmd_report(struct report *rep)
 			ui__error("failed to set cpu bitmap\n");
 			return ret;
 		}
+		session->itrace_synth_opts->cpu_bitmap = rep->cpu_bitmap;
 	}
 
 	if (rep->show_threads) {
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 39996c53995a..322b4def8411 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -2066,7 +2066,7 @@ static void save_task_callchain(struct perf_sched *sched,
 	if (thread__resolve_callchain(thread, cursor, evsel, sample,
 				      NULL, NULL, sched->max_stack + 2) != 0) {
 		if (verbose > 0)
-			error("Failed to resolve callchain. Skipping\n");
+			pr_err("Failed to resolve callchain. Skipping\n");
 
 		return;
 	}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index d05aec491cff..83cdc0a61fd6 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -85,6 +85,8 @@ enum perf_output_field {
 	PERF_OUTPUT_INSN	    = 1U << 21,
 	PERF_OUTPUT_INSNLEN	    = 1U << 22,
 	PERF_OUTPUT_BRSTACKINSN	    = 1U << 23,
+	PERF_OUTPUT_BRSTACKOFF	    = 1U << 24,
+	PERF_OUTPUT_SYNTH           = 1U << 25,
 };
 
 struct output_option {
@@ -115,6 +117,13 @@ struct output_option {
 	{.str = "insn", .field = PERF_OUTPUT_INSN},
 	{.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
 	{.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
+	{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
+	{.str = "synth", .field = PERF_OUTPUT_SYNTH},
+};
+
+enum {
+	OUTPUT_TYPE_SYNTH = PERF_TYPE_MAX,
+	OUTPUT_TYPE_MAX
 };
 
 /* default set to maintain compatibility with current format */
@@ -124,7 +133,7 @@ static struct {
 	unsigned int print_ip_opts;
 	u64 fields;
 	u64 invalid_fields;
-} output[PERF_TYPE_MAX] = {
+} output[OUTPUT_TYPE_MAX] = {
 
 	[PERF_TYPE_HARDWARE] = {
 		.user_set = false,
@@ -182,12 +191,44 @@ static struct {
 
 		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
+
+	[OUTPUT_TYPE_SYNTH] = {
+		.user_set = false,
+
+		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
+			      PERF_OUTPUT_SYNTH,
+
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
+	},
 };
 
+static inline int output_type(unsigned int type)
+{
+	switch (type) {
+	case PERF_TYPE_SYNTH:
+		return OUTPUT_TYPE_SYNTH;
+	default:
+		return type;
+	}
+}
+
+static inline unsigned int attr_type(unsigned int type)
+{
+	switch (type) {
+	case OUTPUT_TYPE_SYNTH:
+		return PERF_TYPE_SYNTH;
+	default:
+		return type;
+	}
+}
+
 static bool output_set_by_user(void)
 {
 	int j;
-	for (j = 0; j < PERF_TYPE_MAX; ++j) {
+	for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
 		if (output[j].user_set)
 			return true;
 	}
@@ -208,7 +249,7 @@ static const char *output_field2str(enum perf_output_field field)
 	return str;
 }
 
-#define PRINT_FIELD(x)  (output[attr->type].fields & PERF_OUTPUT_##x)
+#define PRINT_FIELD(x)  (output[output_type(attr->type)].fields & PERF_OUTPUT_##x)
 
 static int perf_evsel__do_check_stype(struct perf_evsel *evsel,
 				      u64 sample_type, const char *sample_msg,
@@ -216,7 +257,7 @@ static int perf_evsel__do_check_stype(struct perf_evsel *evsel,
 				      bool allow_user_set)
 {
 	struct perf_event_attr *attr = &evsel->attr;
-	int type = attr->type;
+	int type = output_type(attr->type);
 	const char *evname;
 
 	if (attr->sample_type & sample_type)
@@ -298,10 +339,10 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 		       "selected.\n");
 		return -EINVAL;
 	}
-	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
-		pr_err("Display of DSO requested but neither sample IP nor "
-			   "sample address\nis selected. Hence, no addresses to convert "
-		       "to DSO.\n");
+	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) &&
+	    !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) {
+		pr_err("Display of DSO requested but no address to convert.  Select\n"
+		       "sample IP, sample address, brstack, brstacksym, or brstackoff.\n");
 		return -EINVAL;
 	}
 	if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) {
@@ -346,7 +387,7 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 
 static void set_print_ip_opts(struct perf_event_attr *attr)
 {
-	unsigned int type = attr->type;
+	unsigned int type = output_type(attr->type);
 
 	output[type].print_ip_opts = 0;
 	if (PRINT_FIELD(IP))
@@ -374,16 +415,17 @@ static int perf_session__check_output_opt(struct perf_session *session)
 	unsigned int j;
 	struct perf_evsel *evsel;
 
-	for (j = 0; j < PERF_TYPE_MAX; ++j) {
-		evsel = perf_session__find_first_evtype(session, j);
+	for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
+		evsel = perf_session__find_first_evtype(session, attr_type(j));
 
 		/*
 		 * even if fields is set to 0 (ie., show nothing) event must
 		 * exist if user explicitly includes it on the command line
 		 */
-		if (!evsel && output[j].user_set && !output[j].wildcard_set) {
+		if (!evsel && output[j].user_set && !output[j].wildcard_set &&
+		    j != OUTPUT_TYPE_SYNTH) {
 			pr_err("%s events do not exist. "
-			       "Remove corresponding -f option to proceed.\n",
+			       "Remove corresponding -F option to proceed.\n",
 			       event_type(j));
 			return -1;
 		}
@@ -514,18 +556,43 @@ mispred_str(struct branch_entry *br)
 	return br->flags.predicted ? 'P' : 'M';
 }
 
-static void print_sample_brstack(struct perf_sample *sample)
+static void print_sample_brstack(struct perf_sample *sample,
+				 struct thread *thread,
+				 struct perf_event_attr *attr)
 {
 	struct branch_stack *br = sample->branch_stack;
-	u64 i;
+	struct addr_location alf, alt;
+	u64 i, from, to;
 
 	if (!(br && br->nr))
 		return;
 
 	for (i = 0; i < br->nr; i++) {
-		printf(" 0x%"PRIx64"/0x%"PRIx64"/%c/%c/%c/%d ",
-			br->entries[i].from,
-			br->entries[i].to,
+		from = br->entries[i].from;
+		to   = br->entries[i].to;
+
+		if (PRINT_FIELD(DSO)) {
+			memset(&alf, 0, sizeof(alf));
+			memset(&alt, 0, sizeof(alt));
+			thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
+			thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+		}
+
+		printf("0x%"PRIx64, from);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alf.map, stdout);
+			printf(")");
+		}
+
+		printf("/0x%"PRIx64, to);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alt.map, stdout);
+			printf(")");
+		}
+
+		printf("/%c/%c/%c/%d ",
 			mispred_str( br->entries + i),
 			br->entries[i].flags.in_tx? 'X' : '-',
 			br->entries[i].flags.abort? 'A' : '-',
@@ -534,7 +601,8 @@ static void print_sample_brstack(struct perf_sample *sample)
 }
 
 static void print_sample_brstacksym(struct perf_sample *sample,
-				    struct thread *thread)
+				    struct thread *thread,
+				    struct perf_event_attr *attr)
 {
 	struct branch_stack *br = sample->branch_stack;
 	struct addr_location alf, alt;
@@ -559,8 +627,18 @@ static void print_sample_brstacksym(struct perf_sample *sample,
 			alt.sym = map__find_symbol(alt.map, alt.addr);
 
 		symbol__fprintf_symname_offs(alf.sym, &alf, stdout);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alf.map, stdout);
+			printf(")");
+		}
 		putchar('/');
 		symbol__fprintf_symname_offs(alt.sym, &alt, stdout);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alt.map, stdout);
+			printf(")");
+		}
 		printf("/%c/%c/%c/%d ",
 			mispred_str( br->entries + i),
 			br->entries[i].flags.in_tx? 'X' : '-',
@@ -569,6 +647,51 @@ static void print_sample_brstacksym(struct perf_sample *sample,
 	}
 }
 
+static void print_sample_brstackoff(struct perf_sample *sample,
+				    struct thread *thread,
+				    struct perf_event_attr *attr)
+{
+	struct branch_stack *br = sample->branch_stack;
+	struct addr_location alf, alt;
+	u64 i, from, to;
+
+	if (!(br && br->nr))
+		return;
+
+	for (i = 0; i < br->nr; i++) {
+
+		memset(&alf, 0, sizeof(alf));
+		memset(&alt, 0, sizeof(alt));
+		from = br->entries[i].from;
+		to   = br->entries[i].to;
+
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
+		if (alf.map && !alf.map->dso->adjust_symbols)
+			from = map__map_ip(alf.map, from);
+
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+		if (alt.map && !alt.map->dso->adjust_symbols)
+			to = map__map_ip(alt.map, to);
+
+		printf("0x%"PRIx64, from);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alf.map, stdout);
+			printf(")");
+		}
+		printf("/0x%"PRIx64, to);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alt.map, stdout);
+			printf(")");
+		}
+		printf("/%c/%c/%c/%d ",
+			mispred_str(br->entries + i),
+			br->entries[i].flags.in_tx ? 'X' : '-',
+			br->entries[i].flags.abort ? 'A' : '-',
+			br->entries[i].flags.cycles);
+	}
+}
 #define MAXBB 16384UL
 
 static int grab_bb(u8 *buffer, u64 start, u64 end,
@@ -906,6 +1029,7 @@ static void print_sample_bts(struct perf_sample *sample,
 			     struct machine *machine)
 {
 	struct perf_event_attr *attr = &evsel->attr;
+	unsigned int type = output_type(attr->type);
 	bool print_srcline_last = false;
 
 	if (PRINT_FIELD(CALLINDENT))
@@ -913,7 +1037,7 @@ static void print_sample_bts(struct perf_sample *sample,
 
 	/* print branch_from information */
 	if (PRINT_FIELD(IP)) {
-		unsigned int print_opts = output[attr->type].print_ip_opts;
+		unsigned int print_opts = output[type].print_ip_opts;
 		struct callchain_cursor *cursor = NULL;
 
 		if (symbol_conf.use_callchain && sample->callchain &&
@@ -936,7 +1060,7 @@ static void print_sample_bts(struct perf_sample *sample,
 	/* print branch_to information */
 	if (PRINT_FIELD(ADDR) ||
 	    ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
-	     !output[attr->type].user_set)) {
+	     !output[type].user_set)) {
 		printf(" => ");
 		print_sample_addr(sample, thread, attr);
 	}
@@ -1079,6 +1203,127 @@ static void print_sample_bpf_output(struct perf_sample *sample)
 		       (char *)(sample->raw_data));
 }
 
+static void print_sample_spacing(int len, int spacing)
+{
+	if (len > 0 && len < spacing)
+		printf("%*s", spacing - len, "");
+}
+
+static void print_sample_pt_spacing(int len)
+{
+	print_sample_spacing(len, 34);
+}
+
+static void print_sample_synth_ptwrite(struct perf_sample *sample)
+{
+	struct perf_synth_intel_ptwrite *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return;
+
+	len = printf(" IP: %u payload: %#" PRIx64 " ",
+		     data->ip, le64_to_cpu(data->payload));
+	print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_mwait(struct perf_sample *sample)
+{
+	struct perf_synth_intel_mwait *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return;
+
+	len = printf(" hints: %#x extensions: %#x ",
+		     data->hints, data->extensions);
+	print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_pwre(struct perf_sample *sample)
+{
+	struct perf_synth_intel_pwre *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return;
+
+	len = printf(" hw: %u cstate: %u sub-cstate: %u ",
+		     data->hw, data->cstate, data->subcstate);
+	print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_exstop(struct perf_sample *sample)
+{
+	struct perf_synth_intel_exstop *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return;
+
+	len = printf(" IP: %u ", data->ip);
+	print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_pwrx(struct perf_sample *sample)
+{
+	struct perf_synth_intel_pwrx *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return;
+
+	len = printf(" deepest cstate: %u last cstate: %u wake reason: %#x ",
+		     data->deepest_cstate, data->last_cstate,
+		     data->wake_reason);
+	print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_cbr(struct perf_sample *sample)
+{
+	struct perf_synth_intel_cbr *data = perf_sample__synth_ptr(sample);
+	unsigned int percent, freq;
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return;
+
+	freq = (le32_to_cpu(data->freq) + 500) / 1000;
+	len = printf(" cbr: %2u freq: %4u MHz ", data->cbr, freq);
+	if (data->max_nonturbo) {
+		percent = (5 + (1000 * data->cbr) / data->max_nonturbo) / 10;
+		len += printf("(%3u%%) ", percent);
+	}
+	print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth(struct perf_sample *sample,
+			       struct perf_evsel *evsel)
+{
+	switch (evsel->attr.config) {
+	case PERF_SYNTH_INTEL_PTWRITE:
+		print_sample_synth_ptwrite(sample);
+		break;
+	case PERF_SYNTH_INTEL_MWAIT:
+		print_sample_synth_mwait(sample);
+		break;
+	case PERF_SYNTH_INTEL_PWRE:
+		print_sample_synth_pwre(sample);
+		break;
+	case PERF_SYNTH_INTEL_EXSTOP:
+		print_sample_synth_exstop(sample);
+		break;
+	case PERF_SYNTH_INTEL_PWRX:
+		print_sample_synth_pwrx(sample);
+		break;
+	case PERF_SYNTH_INTEL_CBR:
+		print_sample_synth_cbr(sample);
+		break;
+	default:
+		break;
+	}
+}
+
 struct perf_script {
 	struct perf_tool	tool;
 	struct perf_session	*session;
@@ -1132,8 +1377,9 @@ static void process_event(struct perf_script *script,
 {
 	struct thread *thread = al->thread;
 	struct perf_event_attr *attr = &evsel->attr;
+	unsigned int type = output_type(attr->type);
 
-	if (output[attr->type].fields == 0)
+	if (output[type].fields == 0)
 		return;
 
 	print_sample_start(sample, thread, evsel);
@@ -1162,6 +1408,10 @@ static void process_event(struct perf_script *script,
 	if (PRINT_FIELD(TRACE))
 		event_format__print(evsel->tp_format, sample->cpu,
 				    sample->raw_data, sample->raw_size);
+
+	if (attr->type == PERF_TYPE_SYNTH && PRINT_FIELD(SYNTH))
+		print_sample_synth(sample, evsel);
+
 	if (PRINT_FIELD(ADDR))
 		print_sample_addr(sample, thread, attr);
 
@@ -1180,16 +1430,18 @@ static void process_event(struct perf_script *script,
 			cursor = &callchain_cursor;
 
 		putchar(cursor ? '\n' : ' ');
-		sample__fprintf_sym(sample, al, 0, output[attr->type].print_ip_opts, cursor, stdout);
+		sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, stdout);
 	}
 
 	if (PRINT_FIELD(IREGS))
 		print_sample_iregs(sample, attr);
 
 	if (PRINT_FIELD(BRSTACK))
-		print_sample_brstack(sample);
+		print_sample_brstack(sample, thread, attr);
 	else if (PRINT_FIELD(BRSTACKSYM))
-		print_sample_brstacksym(sample, thread);
+		print_sample_brstacksym(sample, thread, attr);
+	else if (PRINT_FIELD(BRSTACKOFF))
+		print_sample_brstackoff(sample, thread, attr);
 
 	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
 		print_sample_bpf_output(sample);
@@ -1325,7 +1577,8 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
 	evlist = *pevlist;
 	evsel = perf_evlist__last(*pevlist);
 
-	if (evsel->attr.type >= PERF_TYPE_MAX)
+	if (evsel->attr.type >= PERF_TYPE_MAX &&
+	    evsel->attr.type != PERF_TYPE_SYNTH)
 		return 0;
 
 	evlist__for_each_entry(evlist, pos) {
@@ -1727,6 +1980,7 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
 	int rc = 0;
 	char *str = strdup(arg);
 	int type = -1;
+	enum { DEFAULT, SET, ADD, REMOVE } change = DEFAULT;
 
 	if (!str)
 		return -ENOMEM;
@@ -1749,6 +2003,8 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
 			type = PERF_TYPE_RAW;
 		else if (!strcmp(str, "break"))
 			type = PERF_TYPE_BREAKPOINT;
+		else if (!strcmp(str, "synth"))
+			type = OUTPUT_TYPE_SYNTH;
 		else {
 			fprintf(stderr, "Invalid event type in field string.\n");
 			rc = -EINVAL;
@@ -1772,23 +2028,44 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
 			goto out;
 		}
 
+		/* Don't override defaults for +- */
+		if (strchr(str, '+') || strchr(str, '-'))
+			goto parse;
+
 		if (output_set_by_user())
 			pr_warning("Overriding previous field request for all events.\n");
 
-		for (j = 0; j < PERF_TYPE_MAX; ++j) {
+		for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
 			output[j].fields = 0;
 			output[j].user_set = true;
 			output[j].wildcard_set = true;
 		}
 	}
 
+parse:
 	for (tok = strtok_r(tok, ",", &strtok_saveptr); tok; tok = strtok_r(NULL, ",", &strtok_saveptr)) {
+		if (*tok == '+') {
+			if (change == SET)
+				goto out_badmix;
+			change = ADD;
+			tok++;
+		} else if (*tok == '-') {
+			if (change == SET)
+				goto out_badmix;
+			change = REMOVE;
+			tok++;
+		} else {
+			if (change != SET && change != DEFAULT)
+				goto out_badmix;
+			change = SET;
+		}
+
 		for (i = 0; i < imax; ++i) {
 			if (strcmp(tok, all_output_options[i].str) == 0)
 				break;
 		}
 		if (i == imax && strcmp(tok, "flags") == 0) {
-			print_flags = true;
+			print_flags = change == REMOVE ? false : true;
 			continue;
 		}
 		if (i == imax) {
@@ -1801,12 +2078,16 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
 			/* add user option to all events types for
 			 * which it is valid
 			 */
-			for (j = 0; j < PERF_TYPE_MAX; ++j) {
+			for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
 				if (output[j].invalid_fields & all_output_options[i].field) {
 					pr_warning("\'%s\' not valid for %s events. Ignoring.\n",
 						   all_output_options[i].str, event_type(j));
-				} else
-					output[j].fields |= all_output_options[i].field;
+				} else {
+					if (change == REMOVE)
+						output[j].fields &= ~all_output_options[i].field;
+					else
+						output[j].fields |= all_output_options[i].field;
+				}
 			}
 		} else {
 			if (output[type].invalid_fields & all_output_options[i].field) {
@@ -1826,7 +2107,11 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
 				 "Events will not be displayed.\n", event_type(type));
 		}
 	}
+	goto out;
 
+out_badmix:
+	fprintf(stderr, "Cannot mix +-field with overridden fields\n");
+	rc = -EINVAL;
 out:
 	free(str);
 	return rc;
@@ -2444,10 +2729,11 @@ int cmd_script(int argc, const char **argv)
 		     symbol__config_symfs),
 	OPT_CALLBACK('F', "fields", NULL, "str",
 		     "comma separated output fields prepend with 'type:'. "
-		     "Valid types: hw,sw,trace,raw. "
+		     "+field to add and -field to remove."
+		     "Valid types: hw,sw,trace,raw,synth. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
 		     "addr,symoff,period,iregs,brstack,brstacksym,flags,"
-		     "bpf-output,callindent,insn,insnlen,brstackinsn",
+		     "bpf-output,callindent,insn,insnlen,brstackinsn,synth",
 		     parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
@@ -2494,6 +2780,8 @@ int cmd_script(int argc, const char **argv)
 			"Enable kernel symbol demangling"),
 	OPT_STRING(0, "time", &script.time_str, "str",
 		   "Time span of interest (start,stop)"),
+	OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name,
+		    "Show inline function"),
 	OPT_END()
 	};
 	const char * const script_subcommands[] = { "record", "report", NULL };
@@ -2704,6 +2992,7 @@ int cmd_script(int argc, const char **argv)
 		err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
 		if (err < 0)
 			goto out_delete;
+		itrace_synth_opts.cpu_bitmap = cpu_bitmap;
 	}
 
 	if (!no_callchain)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a935b5023732..48ac53b199fc 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -86,6 +86,7 @@
 #define DEFAULT_SEPARATOR	" "
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
+#define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"
 
 static void print_counters(struct timespec *ts, int argc, const char **argv);
 
@@ -122,6 +123,14 @@ static const char * topdown_attrs[] = {
 	NULL,
 };
 
+static const char *smi_cost_attrs = {
+	"{"
+	"msr/aperf/,"
+	"msr/smi/,"
+	"cycles"
+	"}"
+};
+
 static struct perf_evlist	*evsel_list;
 
 static struct target target = {
@@ -137,6 +146,8 @@ static bool			null_run			=  false;
 static int			detailed_run			=  0;
 static bool			transaction_run;
 static bool			topdown_run			= false;
+static bool			smi_cost			= false;
+static bool			smi_reset			= false;
 static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
 static const char		*csv_sep			= NULL;
@@ -625,14 +636,14 @@ try_again:
 	}
 
 	if (perf_evlist__apply_filters(evsel_list, &counter)) {
-		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
+		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
 			counter->filter, perf_evsel__name(counter), errno,
 			str_error_r(errno, msg, sizeof(msg)));
 		return -1;
 	}
 
 	if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) {
-		error("failed to set config \"%s\" on event %s with %d (%s)\n",
+		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
 		      err_term->val.drv_cfg, perf_evsel__name(counter), errno,
 		      str_error_r(errno, msg, sizeof(msg)));
 		return -1;
@@ -1578,6 +1589,7 @@ static void print_header(int argc, const char **argv)
 static void print_footer(void)
 {
 	FILE *output = stat_config.output;
+	int n;
 
 	if (!null_run)
 		fprintf(output, "\n");
@@ -1590,7 +1602,9 @@ static void print_footer(void)
 	}
 	fprintf(output, "\n\n");
 
-	if (print_free_counters_hint)
+	if (print_free_counters_hint &&
+	    sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
+	    n > 0)
 		fprintf(output,
 "Some events weren't counted. Try disabling the NMI watchdog:\n"
 "	echo 0 > /proc/sys/kernel/nmi_watchdog\n"
@@ -1779,6 +1793,8 @@ static const struct option stat_options[] = {
 			"Only print computed metrics. No raw values", enable_metric_only),
 	OPT_BOOLEAN(0, "topdown", &topdown_run,
 			"measure topdown level 1 statistics"),
+	OPT_BOOLEAN(0, "smi-cost", &smi_cost,
+			"measure SMI cost"),
 	OPT_END()
 };
 
@@ -2157,6 +2173,39 @@ static int add_default_attributes(void)
 		return 0;
 	}
 
+	if (smi_cost) {
+		int smi;
+
+		if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
+			fprintf(stderr, "freeze_on_smi is not supported.\n");
+			return -1;
+		}
+
+		if (!smi) {
+			if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
+				fprintf(stderr, "Failed to set freeze_on_smi.\n");
+				return -1;
+			}
+			smi_reset = true;
+		}
+
+		if (pmu_have_event("msr", "aperf") &&
+		    pmu_have_event("msr", "smi")) {
+			if (!force_metric_only)
+				metric_only = true;
+			err = parse_events(evsel_list, smi_cost_attrs, NULL);
+		} else {
+			fprintf(stderr, "To measure SMI cost, it needs "
+				"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
+			return -1;
+		}
+		if (err) {
+			fprintf(stderr, "Cannot set up SMI cost events\n");
+			return -1;
+		}
+		return 0;
+	}
+
 	if (topdown_run) {
 		char *str = NULL;
 		bool warn = false;
@@ -2739,6 +2788,9 @@ int cmd_stat(int argc, const char **argv)
 	perf_stat__exit_aggr_mode();
 	perf_evlist__free_stats(evsel_list);
 out:
+	if (smi_cost && smi_reset)
+		sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
+
 	perf_evlist__delete(evsel_list);
 	return status;
 }
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 7ab42b8311a1..6052376634c0 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -27,13 +27,13 @@
 #include "util/drv_configs.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include "util/event.h"
 #include "util/machine.h"
 #include "util/session.h"
 #include "util/symbol.h"
 #include "util/thread.h"
 #include "util/thread_map.h"
 #include "util/top.h"
-#include "util/util.h"
 #include <linux/rbtree.h>
 #include <subcmd/parse-options.h>
 #include "util/parse-events.h"
@@ -134,7 +134,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
 		return err;
 	}
 
-	err = symbol__disassemble(sym, map, NULL, 0);
+	err = symbol__disassemble(sym, map, NULL, 0, NULL);
 	if (err == 0) {
 out_assign:
 		top->sym_filter_entry = he;
@@ -958,7 +958,7 @@ static int __cmd_top(struct perf_top *top)
 
 	ret = perf_evlist__apply_drv_configs(evlist, &pos, &err_term);
 	if (ret) {
-		error("failed to set config \"%s\" on event %s with %d (%s)\n",
+		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
 			err_term->val.drv_cfg, perf_evsel__name(pos), errno,
 			str_error_r(errno, msg, sizeof(msg)));
 		goto out_delete;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index eaa66fb57347..4b2a5d298197 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -21,6 +21,7 @@
 #include "builtin.h"
 #include "util/color.h"
 #include "util/debug.h"
+#include "util/event.h"
 #include "util/evlist.h"
 #include <subcmd/exec-cmd.h>
 #include "util/machine.h"
@@ -680,6 +681,10 @@ static struct syscall_fmt {
 	{ .name	    = "mlockall",   .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 	{ .name	    = "mmap",	    .hexret = true,
+/* The standard mmap maps to old_mmap on s390x */
+#if defined(__s390x__)
+	.alias = "old_mmap",
+#endif
 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
 			     [2] = SCA_MMAP_PROT, /* prot */
 			     [3] = SCA_MMAP_FLAGS, /* flags */ }, },
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index e9651a9d670e..cf36de7ea255 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -304,7 +304,7 @@ jvmti_close(void *agent)
 	FILE *fp = agent;
 
 	if (!fp) {
-		warnx("jvmti: incalid fd in close_agent");
+		warnx("jvmti: invalid fd in close_agent");
 		return -1;
 	}
 
diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h
index bedf5d0ba9ff..c53a41f48b63 100644
--- a/tools/perf/jvmti/jvmti_agent.h
+++ b/tools/perf/jvmti/jvmti_agent.h
@@ -5,8 +5,6 @@
 #include <stdint.h>
 #include <jvmti.h>
 
-#define __unused __attribute__((unused))
-
 #if defined(__cplusplus)
 extern "C" {
 #endif
diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
index 5612641c69b4..6d710904c837 100644
--- a/tools/perf/jvmti/libjvmti.c
+++ b/tools/perf/jvmti/libjvmti.c
@@ -1,3 +1,4 @@
+#include <linux/compiler.h>
 #include <sys/types.h>
 #include <stdio.h>
 #include <string.h>
@@ -238,7 +239,7 @@ code_generated_cb(jvmtiEnv *jvmti,
 }
 
 JNIEXPORT jint JNICALL
-Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused)
+Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __maybe_unused)
 {
 	jvmtiEventCallbacks cb;
 	jvmtiCapabilities caps1;
@@ -313,7 +314,7 @@ Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused)
 }
 
 JNIEXPORT void JNICALL
-Agent_OnUnload(JavaVM *jvm __unused)
+Agent_OnUnload(JavaVM *jvm __maybe_unused)
 {
 	int ret;
 
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 4cc6960f6226..628a5e412cb1 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -17,6 +17,7 @@
 #include <subcmd/parse-options.h>
 #include "util/bpf-loader.h"
 #include "util/debug.h"
+#include "util/event.h"
 #include <api/fs/fs.h>
 #include <api/fs/tracing_path.h>
 #include <errno.h>
diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
index 9213a1273697..999a4e878162 100644
--- a/tools/perf/pmu-events/Build
+++ b/tools/perf/pmu-events/Build
@@ -2,7 +2,7 @@ hostprogs := jevents
 
 jevents-y	+= json.o jsmn.o jevents.o
 pmu-events-y	+= pmu-events.o
-JDIR		=  pmu-events/arch/$(ARCH)
+JDIR		=  pmu-events/arch/$(SRCARCH)
 JSON		=  $(shell [ -d $(JDIR) ] &&				\
 			find $(JDIR) -name '*.json' -o -name 'mapfile.csv')
 #
@@ -10,4 +10,4 @@ JSON		=  $(shell [ -d $(JDIR) ] &&				\
 # directory and create tables in pmu-events.c.
 #
 $(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS)
-	$(Q)$(call echo-cmd,gen)$(JEVENTS) $(ARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
+	$(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index baa073f38334..bd0aabb2bd0f 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -48,10 +48,6 @@
 #include "json.h"
 #include "jevents.h"
 
-#ifndef __maybe_unused
-#define __maybe_unused                  __attribute__((unused))
-#endif
-
 int verbose;
 char *prog;
 
diff --git a/tools/perf/scripts/python/bin/intel-pt-events-record b/tools/perf/scripts/python/bin/intel-pt-events-record
new file mode 100644
index 000000000000..10fe2b6977d4
--- /dev/null
+++ b/tools/perf/scripts/python/bin/intel-pt-events-record
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+#
+# print Intel PT Power Events and PTWRITE. The intel_pt PMU event needs
+# to be specified with appropriate config terms.
+#
+if ! echo "$@" | grep -q intel_pt ; then
+	echo "Options must include the Intel PT event e.g. -e intel_pt/pwr_evt,ptw/"
+	echo "and for power events it probably needs to be system wide i.e. -a option"
+	echo "For example: -a -e intel_pt/pwr_evt,branch=0/ sleep 1"
+	exit 1
+fi
+perf record $@
diff --git a/tools/perf/scripts/python/bin/intel-pt-events-report b/tools/perf/scripts/python/bin/intel-pt-events-report
new file mode 100644
index 000000000000..9a9c92fcd026
--- /dev/null
+++ b/tools/perf/scripts/python/bin/intel-pt-events-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: print Intel PT Power Events and PTWRITE
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/intel-pt-events.py
+\ No newline at end of file
diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py
new file mode 100644
index 000000000000..b19172d673af
--- /dev/null
+++ b/tools/perf/scripts/python/intel-pt-events.py
@@ -0,0 +1,128 @@
+# intel-pt-events.py: Print Intel PT Power Events and PTWRITE
+# Copyright (c) 2017, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+
+import os
+import sys
+import struct
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+# These perf imports are not used at present
+#from perf_trace_context import *
+#from Core import *
+
+def trace_begin():
+	print "Intel PT Power Events and PTWRITE"
+
+def trace_end():
+	print "End"
+
+def trace_unhandled(event_name, context, event_fields_dict):
+		print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])
+
+def print_ptwrite(raw_buf):
+	data = struct.unpack_from("<IQ", raw_buf)
+	flags = data[0]
+	payload = data[1]
+	exact_ip = flags & 1
+	print "IP: %u payload: %#x" % (exact_ip, payload),
+
+def print_cbr(raw_buf):
+	data = struct.unpack_from("<BBBBII", raw_buf)
+	cbr = data[0]
+	f = (data[4] + 500) / 1000
+	p = ((cbr * 1000 / data[2]) + 5) / 10
+	print "%3u  freq: %4u MHz  (%3u%%)" % (cbr, f, p),
+
+def print_mwait(raw_buf):
+	data = struct.unpack_from("<IQ", raw_buf)
+	payload = data[1]
+	hints = payload & 0xff
+	extensions = (payload >> 32) & 0x3
+	print "hints: %#x extensions: %#x" % (hints, extensions),
+
+def print_pwre(raw_buf):
+	data = struct.unpack_from("<IQ", raw_buf)
+	payload = data[1]
+	hw = (payload >> 7) & 1
+	cstate = (payload >> 12) & 0xf
+	subcstate = (payload >> 8) & 0xf
+	print "hw: %u cstate: %u sub-cstate: %u" % (hw, cstate, subcstate),
+
+def print_exstop(raw_buf):
+	data = struct.unpack_from("<I", raw_buf)
+	flags = data[0]
+	exact_ip = flags & 1
+	print "IP: %u" % (exact_ip),
+
+def print_pwrx(raw_buf):
+	data = struct.unpack_from("<IQ", raw_buf)
+	payload = data[1]
+	deepest_cstate = payload & 0xf
+	last_cstate = (payload >> 4) & 0xf
+	wake_reason = (payload >> 8) & 0xf
+	print "deepest cstate: %u last cstate: %u wake reason: %#x" % (deepest_cstate, last_cstate, wake_reason),
+
+def print_common_start(comm, sample, name):
+	ts = sample["time"]
+	cpu = sample["cpu"]
+	pid = sample["pid"]
+	tid = sample["tid"]
+	print "%16s %5u/%-5u [%03u] %9u.%09u %7s:" % (comm, pid, tid, cpu, ts / 1000000000, ts %1000000000, name),
+
+def print_common_ip(sample, symbol, dso):
+	ip = sample["ip"]
+	print "%16x %s (%s)" % (ip, symbol, dso)
+
+def process_event(param_dict):
+        event_attr = param_dict["attr"]
+        sample     = param_dict["sample"]
+        raw_buf    = param_dict["raw_buf"]
+        comm       = param_dict["comm"]
+        name       = param_dict["ev_name"]
+
+        # Symbol and dso info are not always resolved
+        if (param_dict.has_key("dso")):
+                dso = param_dict["dso"]
+        else:
+                dso = "[unknown]"
+
+        if (param_dict.has_key("symbol")):
+                symbol = param_dict["symbol"]
+        else:
+                symbol = "[unknown]"
+
+	if name == "ptwrite":
+		print_common_start(comm, sample, name)
+		print_ptwrite(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "cbr":
+		print_common_start(comm, sample, name)
+		print_cbr(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "mwait":
+		print_common_start(comm, sample, name)
+		print_mwait(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "pwre":
+		print_common_start(comm, sample, name)
+		print_pwre(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "exstop":
+		print_common_start(comm, sample, name)
+		print_exstop(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "pwrx":
+		print_common_start(comm, sample, name)
+		print_pwrx(raw_buf)
+		print_common_ip(sample, symbol, dso)
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index af58ebc243ef..84222bdb8689 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -75,7 +75,7 @@ $(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/B
 	$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
 	$(Q)echo ';' >> $@
 
-ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc))
+ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 endif
 
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 0dd77494bb58..0e77b2cf61ec 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -18,6 +18,7 @@
  * permissions. All the event text files are stored there.
  */
 
+#include <debug.h>
 #include <errno.h>
 #include <inttypes.h>
 #include <stdlib.h>
@@ -29,14 +30,11 @@
 #include <sys/stat.h>
 #include <unistd.h>
 #include "../perf.h"
-#include "util.h"
 #include <subcmd/exec-cmd.h>
 #include "tests.h"
 
 #define ENV "PERF_TEST_ATTR"
 
-extern int verbose;
-
 static char *dir;
 
 void test_attr__init(void)
@@ -138,8 +136,10 @@ void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
 {
 	int errno_saved = errno;
 
-	if (store_event(attr, pid, cpu, fd, group_fd, flags))
-		die("test attr FAILED");
+	if (store_event(attr, pid, cpu, fd, group_fd, flags)) {
+		pr_err("test attr FAILED");
+		exit(128);
+	}
 
 	errno = errno_saved;
 }
diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
index 1091bd47adfd..cdf21a9d0c35 100644
--- a/tools/perf/tests/attr.py
+++ b/tools/perf/tests/attr.py
@@ -16,6 +16,13 @@ class Fail(Exception):
     def getMsg(self):
         return '\'%s\' - %s' % (self.test.path, self.msg)
 
+class Notest(Exception):
+    def __init__(self, test, arch):
+        self.arch = arch
+        self.test = test
+    def getMsg(self):
+        return '[%s] \'%s\'' % (self.arch, self.test.path)
+
 class Unsup(Exception):
     def __init__(self, test):
         self.test = test
@@ -112,6 +119,9 @@ class Event(dict):
 #     'command' - perf command name
 #     'args'    - special command arguments
 #     'ret'     - expected command return value (0 by default)
+#     'arch'    - architecture specific test (optional)
+#                 comma separated list, ! at the beginning
+#                 negates it.
 #
 # [eventX:base]
 #   - one or multiple instances in file
@@ -134,6 +144,12 @@ class Test(object):
         except:
             self.ret  = 0
 
+        try:
+            self.arch  = parser.get('config', 'arch')
+            log.warning("test limitation '%s'" % self.arch)
+        except:
+            self.arch  = ''
+
         self.expect   = {}
         self.result   = {}
         log.debug("  loading expected events");
@@ -145,6 +161,31 @@ class Test(object):
         else:
             return True
 
+    def skip_test(self, myarch):
+        # If architecture not set always run test
+        if self.arch == '':
+            # log.warning("test for arch %s is ok" % myarch)
+            return False
+
+        # Allow multiple values in assignment separated by ','
+        arch_list = self.arch.split(',')
+
+        # Handle negated list such as !s390x,ppc
+        if arch_list[0][0] == '!':
+            arch_list[0] = arch_list[0][1:]
+            log.warning("excluded architecture list %s" % arch_list)
+            for arch_item in arch_list:
+                # log.warning("test for %s arch is %s" % (arch_item, myarch))
+                if arch_item == myarch:
+                    return True
+            return False
+
+        for arch_item in arch_list:
+            # log.warning("test for architecture '%s' current '%s'" % (arch_item, myarch))
+            if arch_item == myarch:
+                return False
+        return True
+
     def load_events(self, path, events):
         parser_event = ConfigParser.SafeConfigParser()
         parser_event.read(path)
@@ -168,6 +209,11 @@ class Test(object):
             events[section] = e
 
     def run_cmd(self, tempdir):
+        junk1, junk2, junk3, junk4, myarch = (os.uname())
+
+        if self.skip_test(myarch):
+            raise Notest(self, myarch)
+
         cmd = "PERF_TEST_ATTR=%s %s %s -o %s/perf.data %s" % (tempdir,
               self.perf, self.command, tempdir, self.args)
         ret = os.WEXITSTATUS(os.system(cmd))
@@ -265,6 +311,8 @@ def run_tests(options):
             Test(f, options).run()
         except Unsup, obj:
             log.warning("unsupp  %s" % obj.getMsg())
+        except Notest, obj:
+            log.warning("skipped %s" % obj.getMsg())
 
 def setup_log(verbose):
     global log
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index e7664fe3bd33..39bbb97cd30a 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -62,8 +62,7 @@ static void __test_function(volatile long *ptr)
 }
 #endif
 
-__attribute__ ((noinline))
-static int test_function(void)
+static noinline int test_function(void)
 {
 	__test_function(&the_var);
 	the_var++;
@@ -288,3 +287,17 @@ int test__bp_signal(int subtest __maybe_unused)
 	return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ?
 		TEST_OK : TEST_FAIL;
 }
+
+bool test__bp_signal_is_supported(void)
+{
+/*
+ * The powerpc so far does not have support to even create
+ * instruction breakpoint using the perf event interface.
+ * Once it's there we can release this.
+ */
+#ifdef __powerpc__
+	return false;
+#else
+	return true;
+#endif
+}
diff --git a/tools/perf/tests/bp_signal_overflow.c b/tools/perf/tests/bp_signal_overflow.c
index 89f92fa67cc4..3b1ac6f31b15 100644
--- a/tools/perf/tests/bp_signal_overflow.c
+++ b/tools/perf/tests/bp_signal_overflow.c
@@ -28,8 +28,7 @@
 
 static int overflows;
 
-__attribute__ ((noinline))
-static int test_function(void)
+static noinline int test_function(void)
 {
 	return time(NULL);
 }
diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c
index 7230e62c70fc..b4ebc75e25ae 100644
--- a/tools/perf/tests/bpf-script-test-prologue.c
+++ b/tools/perf/tests/bpf-script-test-prologue.c
@@ -10,6 +10,15 @@
 
 #include <uapi/linux/fs.h>
 
+/*
+ * If CONFIG_PROFILE_ALL_BRANCHES is selected,
+ * 'if' is redefined after include kernel header.
+ * Recover 'if' for BPF object code.
+ */
+#ifdef if
+# undef if
+#endif
+
 #define FMODE_READ		0x1
 #define FMODE_WRITE		0x2
 
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 9e08d297f1a9..3ccfd58a8c3c 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -97,10 +97,12 @@ static struct test generic_tests[] = {
 	{
 		.desc = "Breakpoint overflow signal handler",
 		.func = test__bp_signal,
+		.is_supported = test__bp_signal_is_supported,
 	},
 	{
 		.desc = "Breakpoint overflow sampling",
 		.func = test__bp_signal_overflow,
+		.is_supported = test__bp_signal_is_supported,
 	},
 	{
 		.desc = "Number of exit events of a simple workload",
@@ -401,6 +403,11 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
 		if (!perf_test__matches(t, curr, argc, argv))
 			continue;
 
+		if (t->is_supported && !t->is_supported()) {
+			pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc);
+			continue;
+		}
+
 		pr_info("%2d: %-*s:", i, width, t->desc);
 
 		if (intlist__find(skiplist, i)) {
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 1f14e7612cbb..94b7c7b02bde 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -229,6 +229,8 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 	unsigned char buf2[BUFSZ];
 	size_t ret_len;
 	u64 objdump_addr;
+	const char *objdump_name;
+	char decomp_name[KMOD_DECOMP_LEN];
 	int ret;
 
 	pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
@@ -289,9 +291,25 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 		state->done[state->done_cnt++] = al.map->start;
 	}
 
+	objdump_name = al.map->dso->long_name;
+	if (dso__needs_decompress(al.map->dso)) {
+		if (dso__decompress_kmodule_path(al.map->dso, objdump_name,
+						 decomp_name,
+						 sizeof(decomp_name)) < 0) {
+			pr_debug("decompression failed\n");
+			return -1;
+		}
+
+		objdump_name = decomp_name;
+	}
+
 	/* Read the object code using objdump */
 	objdump_addr = map__rip_2objdump(al.map, al.addr);
-	ret = read_via_objdump(al.map->dso->long_name, objdump_addr, buf2, len);
+	ret = read_via_objdump(objdump_name, objdump_addr, buf2, len);
+
+	if (dso__needs_decompress(al.map->dso))
+		unlink(objdump_name);
+
 	if (ret > 0) {
 		/*
 		 * The kernel maps are inaccurate - assume objdump is right in
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index dfe5c89e2049..3e56d08f7995 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -76,8 +76,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
 	return strcmp((const char *) symbol, funcs[idx]);
 }
 
-__attribute__ ((noinline))
-static int unwind_thread(struct thread *thread)
+static noinline int unwind_thread(struct thread *thread)
 {
 	struct perf_sample sample;
 	unsigned long cnt = 0;
@@ -108,8 +107,7 @@ static int unwind_thread(struct thread *thread)
 
 static int global_unwind_retval = -INT_MAX;
 
-__attribute__ ((noinline))
-static int compare(void *p1, void *p2)
+static noinline int compare(void *p1, void *p2)
 {
 	/* Any possible value should be 'thread' */
 	struct thread *thread = *(struct thread **)p1;
@@ -128,8 +126,7 @@ static int compare(void *p1, void *p2)
 	return p1 - p2;
 }
 
-__attribute__ ((noinline))
-static int krava_3(struct thread *thread)
+static noinline int krava_3(struct thread *thread)
 {
 	struct thread *array[2] = {thread, thread};
 	void *fp = &bsearch;
@@ -147,14 +144,12 @@ static int krava_3(struct thread *thread)
 	return global_unwind_retval;
 }
 
-__attribute__ ((noinline))
-static int krava_2(struct thread *thread)
+static noinline int krava_2(struct thread *thread)
 {
 	return krava_3(thread);
 }
 
-__attribute__ ((noinline))
-static int krava_1(struct thread *thread)
+static noinline int krava_1(struct thread *thread)
 {
 	return krava_2(thread);
 }
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 70918b986568..d549a9f2c41b 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -1,5 +1,6 @@
 #include "perf.h"
 #include "util/debug.h"
+#include "util/event.h"
 #include "util/symbol.h"
 #include "util/sort.h"
 #include "util/evsel.h"
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index f171b2da4899..df9c91f49af1 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -3,6 +3,7 @@
 #include "util/symbol.h"
 #include "util/sort.h"
 #include "util/evsel.h"
+#include "util/event.h"
 #include "util/evlist.h"
 #include "util/machine.h"
 #include "util/thread.h"
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index cdf0dde5fe97..06e5080182d3 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -1,5 +1,6 @@
 #include "perf.h"
 #include "util/debug.h"
+#include "util/event.h"
 #include "util/symbol.h"
 #include "util/sort.h"
 #include "util/evsel.h"
diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
index 76f41f249944..6cd9e5107f77 100644
--- a/tools/perf/tests/kmod-path.c
+++ b/tools/perf/tests/kmod-path.c
@@ -61,6 +61,7 @@ int test__kmod_path__parse(int subtest __maybe_unused)
 	M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_KERNEL, true);
 	M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_USER, false);
 
+#ifdef HAVE_ZLIB_SUPPORT
 	/* path                alloc_name  alloc_ext   kmod  comp  name   ext */
 	T("/xxxx/xxxx/x.ko.gz", true     , true      , true, true, "[x]", "gz");
 	T("/xxxx/xxxx/x.ko.gz", false    , true      , true, true, NULL , "gz");
@@ -96,6 +97,7 @@ int test__kmod_path__parse(int subtest __maybe_unused)
 	M("x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
 	M("x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
 	M("x.ko.gz", PERF_RECORD_MISC_USER, false);
+#endif
 
 	/* path            alloc_name  alloc_ext  kmod  comp   name             ext */
 	T("[test_module]", true      , true     , true, false, "[test_module]", NULL);
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 7fad885491c5..812a053d1941 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1810,17 +1810,6 @@ static int test_pmu_events(void)
 	return ret;
 }
 
-static void debug_warn(const char *warn, va_list params)
-{
-	char msg[1024];
-
-	if (verbose <= 0)
-		return;
-
-	vsnprintf(msg, sizeof(msg), warn, params);
-	fprintf(stderr, " Warning: %s\n", msg);
-}
-
 int test__parse_events(int subtest __maybe_unused)
 {
 	int ret1, ret2 = 0;
@@ -1832,8 +1821,6 @@ do {							\
 		ret2 = ret1;				\
 } while (0)
 
-	set_warning_routine(debug_warn);
-
 	TEST_EVENTS(test__events);
 
 	if (test_pmu())
diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c
index f73b3c5e125d..06eda675ae2c 100644
--- a/tools/perf/tests/sdt.c
+++ b/tools/perf/tests/sdt.c
@@ -1,7 +1,6 @@
 #include <errno.h>
 #include <stdio.h>
 #include <sys/epoll.h>
-#include <util/util.h>
 #include <util/evlist.h>
 #include <linux/filter.h>
 #include "tests.h"
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 32873ec91a4e..cf00ebad2ef5 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -83,7 +83,7 @@ int test__task_exit(int subtest __maybe_unused)
 
 	evsel = perf_evlist__first(evlist);
 	evsel->attr.task = 1;
-	evsel->attr.sample_freq = 0;
+	evsel->attr.sample_freq = 1;
 	evsel->attr.inherit = 0;
 	evsel->attr.watermark = 0;
 	evsel->attr.wakeup_events = 1;
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 631859629403..577363809c9b 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -34,6 +34,7 @@ struct test {
 		int (*get_nr)(void);
 		const char *(*get_desc)(int subtest);
 	} subtest;
+	bool (*is_supported)(void);
 };
 
 /* Tests */
@@ -99,6 +100,8 @@ const char *test__clang_subtest_get_desc(int subtest);
 int test__clang_subtest_get_nr(void);
 int test__unit_number__scnprint(int subtest);
 
+bool test__bp_signal_is_supported(void);
+
 #if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 struct thread;
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index d990ad08a3c6..27f41f28dcb4 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -46,12 +46,15 @@ static struct annotate_browser_opt {
 	.jump_arrows	= true,
 };
 
+struct arch;
+
 struct annotate_browser {
 	struct ui_browser b;
 	struct rb_root	  entries;
 	struct rb_node	  *curr_hot;
 	struct disasm_line  *selection;
 	struct disasm_line  **offsets;
+	struct arch	    *arch;
 	int		    nr_events;
 	u64		    start;
 	int		    nr_asm_entries;
@@ -125,43 +128,57 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	int i, pcnt_width = annotate_browser__pcnt_width(ab);
 	double percent_max = 0.0;
 	char bf[256];
+	bool show_title = false;
 
 	for (i = 0; i < ab->nr_events; i++) {
 		if (bdl->samples[i].percent > percent_max)
 			percent_max = bdl->samples[i].percent;
 	}
 
+	if ((row == 0) && (dl->offset == -1 || percent_max == 0.0)) {
+		if (ab->have_cycles) {
+			if (dl->ipc == 0.0 && dl->cycles == 0)
+				show_title = true;
+		} else
+			show_title = true;
+	}
+
 	if (dl->offset != -1 && percent_max != 0.0) {
-		if (percent_max != 0.0) {
-			for (i = 0; i < ab->nr_events; i++) {
-				ui_browser__set_percent_color(browser,
-							bdl->samples[i].percent,
-							current_entry);
-				if (annotate_browser__opts.show_total_period) {
-					ui_browser__printf(browser, "%6" PRIu64 " ",
-							   bdl->samples[i].nr);
-				} else {
-					ui_browser__printf(browser, "%6.2f ",
-							   bdl->samples[i].percent);
-				}
+		for (i = 0; i < ab->nr_events; i++) {
+			ui_browser__set_percent_color(browser,
+						bdl->samples[i].percent,
+						current_entry);
+			if (annotate_browser__opts.show_total_period) {
+				ui_browser__printf(browser, "%6" PRIu64 " ",
+						   bdl->samples[i].nr);
+			} else {
+				ui_browser__printf(browser, "%6.2f ",
+						   bdl->samples[i].percent);
 			}
-		} else {
-			ui_browser__write_nstring(browser, " ", 7 * ab->nr_events);
 		}
 	} else {
 		ui_browser__set_percent_color(browser, 0, current_entry);
-		ui_browser__write_nstring(browser, " ", 7 * ab->nr_events);
+
+		if (!show_title)
+			ui_browser__write_nstring(browser, " ", 7 * ab->nr_events);
+		else
+			ui_browser__printf(browser, "%*s", 7, "Percent");
 	}
 	if (ab->have_cycles) {
 		if (dl->ipc)
 			ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->ipc);
-		else
+		else if (!show_title)
 			ui_browser__write_nstring(browser, " ", IPC_WIDTH);
+		else
+			ui_browser__printf(browser, "%*s ", IPC_WIDTH - 1, "IPC");
+
 		if (dl->cycles)
 			ui_browser__printf(browser, "%*" PRIu64 " ",
 					   CYCLES_WIDTH - 1, dl->cycles);
-		else
+		else if (!show_title)
 			ui_browser__write_nstring(browser, " ", CYCLES_WIDTH);
+		else
+			ui_browser__printf(browser, "%*s ", CYCLES_WIDTH - 1, "Cycle");
 	}
 
 	SLsmg_write_char(' ');
@@ -1056,7 +1073,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 		  (nr_pcnt - 1);
 	}
 
-	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), sizeof_bdl);
+	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
+				  sizeof_bdl, &browser.arch);
 	if (err) {
 		char msg[BUFSIZ];
 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index e99ba86158d2..d903fd493416 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -168,7 +168,8 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
 	if (map->dso->annotate_warned)
 		return -1;
 
-	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0);
+	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
+				  0, NULL);
 	if (err) {
 		char msg[BUFSIZ];
 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 59addd52d9cd..ddb2c6fbdf91 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -210,6 +210,8 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
 			return 0;
 
 		ret = b->callchain->max_depth - a->callchain->max_depth;
+		if (callchain_param.order == ORDER_CALLER)
+			ret = -ret;
 	}
 	return ret;
 }
diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c
index 5ea0b40c4fc2..caf1ce6f5152 100644
--- a/tools/perf/ui/setup.c
+++ b/tools/perf/ui/setup.c
@@ -10,7 +10,10 @@ pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
 void *perf_gtk_handle;
 int use_browser = -1;
 
+#define PERF_GTK_DSO "libperf-gtk.so"
+
 #ifdef HAVE_GTK2_SUPPORT
+
 static int setup_gtk_browser(void)
 {
 	int (*perf_ui_init)(void);
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 069583bdc670..79dea95a7f68 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -13,6 +13,7 @@ libperf-y += find_bit.o
 libperf-y += kallsyms.o
 libperf-y += levenshtein.o
 libperf-y += llvm-utils.o
+libperf-y += memswap.o
 libperf-y += parse-events.o
 libperf-y += perf_regs.o
 libperf-y += path.o
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 683f8340460c..be1caabb9290 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -239,10 +239,20 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 	const char *s = strchr(ops->raw, '+');
 	const char *c = strchr(ops->raw, ',');
 
-	if (c++ != NULL)
+	/*
+	 * skip over possible up to 2 operands to get to address, e.g.:
+	 * tbnz	 w0, #26, ffff0000083cd190 <security_file_permission+0xd0>
+	 */
+	if (c++ != NULL) {
 		ops->target.addr = strtoull(c, NULL, 16);
-	else
+		if (!ops->target.addr) {
+			c = strchr(c, ',');
+			if (c++ != NULL)
+				ops->target.addr = strtoull(c, NULL, 16);
+		}
+	} else {
 		ops->target.addr = strtoull(ops->raw, NULL, 16);
+	}
 
 	if (s++ != NULL) {
 		ops->target.offset = strtoull(s, NULL, 16);
@@ -257,10 +267,27 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
 			   struct ins_operands *ops)
 {
+	const char *c = strchr(ops->raw, ',');
+
 	if (!ops->target.addr || ops->target.offset < 0)
 		return ins__raw_scnprintf(ins, bf, size, ops);
 
-	return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset);
+	if (c != NULL) {
+		const char *c2 = strchr(c + 1, ',');
+
+		/* check for 3-op insn */
+		if (c2 != NULL)
+			c = c2;
+		c++;
+
+		/* mirror arch objdump's space-after-comma style */
+		if (*c == ' ')
+			c++;
+	}
+
+	return scnprintf(bf, size, "%-6.6s %.*s%" PRIx64,
+			 ins->name, c ? c - ops->raw : 0, ops->raw,
+			 ops->target.offset);
 }
 
 static struct ins_ops jump_ops = {
@@ -1294,6 +1321,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
 	char linkname[PATH_MAX];
 	char *build_id_filename;
 	char *build_id_path = NULL;
+	char *pos;
 
 	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
 	    !dso__is_kcore(dso))
@@ -1313,7 +1341,14 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
 	if (!build_id_path)
 		return -1;
 
-	dirname(build_id_path);
+	/*
+	 * old style build-id cache has name of XX/XXXXXXX.. while
+	 * new style has XX/XXXXXXX../{elf,kallsyms,vdso}.
+	 * extract the build-id part of dirname in the new style only.
+	 */
+	pos = strrchr(build_id_path, '/');
+	if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
+		dirname(build_id_path);
 
 	if (dso__is_kcore(dso) ||
 	    readlink(build_id_path, linkname, sizeof(linkname)) < 0 ||
@@ -1344,7 +1379,9 @@ static const char *annotate__norm_arch(const char *arch_name)
 	return normalize_arch((char *)arch_name);
 }
 
-int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize)
+int symbol__disassemble(struct symbol *sym, struct map *map,
+			const char *arch_name, size_t privsize,
+			struct arch **parch)
 {
 	struct dso *dso = map->dso;
 	char command[PATH_MAX * 2];
@@ -1370,6 +1407,9 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na
 	if (arch == NULL)
 		return -ENOTSUP;
 
+	if (parch)
+		*parch = arch;
+
 	if (arch->init) {
 		err = arch->init(arch);
 		if (err) {
@@ -1396,31 +1436,10 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na
 				sizeof(symfs_filename));
 		}
 	} else if (dso__needs_decompress(dso)) {
-		char tmp[PATH_MAX];
-		struct kmod_path m;
-		int fd;
-		bool ret;
-
-		if (kmod_path__parse_ext(&m, symfs_filename))
-			goto out;
-
-		snprintf(tmp, PATH_MAX, "/tmp/perf-kmod-XXXXXX");
-
-		fd = mkstemp(tmp);
-		if (fd < 0) {
-			free(m.ext);
-			goto out;
-		}
-
-		ret = decompress_to_file(m.ext, symfs_filename, fd);
-
-		if (ret)
-			pr_err("Cannot decompress %s %s\n", m.ext, symfs_filename);
-
-		free(m.ext);
-		close(fd);
+		char tmp[KMOD_DECOMP_LEN];
 
-		if (!ret)
+		if (dso__decompress_kmodule_path(dso, symfs_filename,
+						 tmp, sizeof(tmp)) < 0)
 			goto out;
 
 		strcpy(symfs_filename, tmp);
@@ -1429,7 +1448,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na
 	snprintf(command, sizeof(command),
 		 "%s %s%s --start-address=0x%016" PRIx64
 		 " --stop-address=0x%016" PRIx64
-		 " -l -d %s %s -C %s 2>/dev/null|grep -v %s:|expand",
+		 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
 		 objdump_path ? objdump_path : "objdump",
 		 disassembler_style ? "-M " : "",
 		 disassembler_style ? disassembler_style : "",
@@ -1887,7 +1906,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
 	struct rb_root source_line = RB_ROOT;
 	u64 len;
 
-	if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0) < 0)
+	if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
+				0, NULL) < 0)
 		return -1;
 
 	len = symbol__size(sym);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 948aa8e6fd39..21055034aedd 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -158,7 +158,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
 int symbol__alloc_hist(struct symbol *sym);
 void symbol__annotate_zero_histograms(struct symbol *sym);
 
-int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize);
+int symbol__disassemble(struct symbol *sym, struct map *map,
+			const char *arch_name, size_t privsize,
+			struct arch **parch);
 
 enum symbol_disassemble_errno {
 	SYMBOL_ANNOTATE_ERRNO__SUCCESS		= 0,
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 0daf63b9ee3e..5547457566a7 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -322,6 +322,13 @@ static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
 	return auxtrace_queues__add_buffer(queues, idx, buffer);
 }
 
+static bool filter_cpu(struct perf_session *session, int cpu)
+{
+	unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
+
+	return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap);
+}
+
 int auxtrace_queues__add_event(struct auxtrace_queues *queues,
 			       struct perf_session *session,
 			       union perf_event *event, off_t data_offset,
@@ -331,6 +338,9 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
 	unsigned int idx;
 	int err;
 
+	if (filter_cpu(session, event->auxtrace.cpu))
+		return 0;
+
 	buffer = zalloc(sizeof(struct auxtrace_buffer));
 	if (!buffer)
 		return -ENOMEM;
@@ -947,6 +957,8 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
 	synth_opts->instructions = true;
 	synth_opts->branches = true;
 	synth_opts->transactions = true;
+	synth_opts->ptwrites = true;
+	synth_opts->pwr_events = true;
 	synth_opts->errors = true;
 	synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
 	synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
@@ -1030,6 +1042,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 		case 'x':
 			synth_opts->transactions = true;
 			break;
+		case 'w':
+			synth_opts->ptwrites = true;
+			break;
+		case 'p':
+			synth_opts->pwr_events = true;
+			break;
 		case 'e':
 			synth_opts->errors = true;
 			break;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 9f0de72d58e2..33b5e6cdf38c 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -59,6 +59,8 @@ enum itrace_period_type {
  * @instructions: whether to synthesize 'instructions' events
  * @branches: whether to synthesize 'branches' events
  * @transactions: whether to synthesize events for transactions
+ * @ptwrites: whether to synthesize events for ptwrites
+ * @pwr_events: whether to synthesize power events
  * @errors: whether to synthesize decoder error events
  * @dont_decode: whether to skip decoding entirely
  * @log: write a decoding log
@@ -72,6 +74,7 @@ enum itrace_period_type {
  * @period: 'instructions' events period
  * @period_type: 'instructions' events period type
  * @initial_skip: skip N events at the beginning.
+ * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all
  */
 struct itrace_synth_opts {
 	bool			set;
@@ -79,6 +82,8 @@ struct itrace_synth_opts {
 	bool			instructions;
 	bool			branches;
 	bool			transactions;
+	bool			ptwrites;
+	bool			pwr_events;
 	bool			errors;
 	bool			dont_decode;
 	bool			log;
@@ -92,6 +97,7 @@ struct itrace_synth_opts {
 	unsigned long long	period;
 	enum itrace_period_type	period_type;
 	unsigned long		initial_skip;
+	unsigned long		*cpu_bitmap;
 };
 
 /**
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 168cc49654e7..e0148b081bdf 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -278,51 +278,6 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size)
 	return bf;
 }
 
-bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size)
-{
-	char *id_name = NULL, *ch;
-	struct stat sb;
-	char sbuild_id[SBUILD_ID_SIZE];
-
-	if (!dso->has_build_id)
-		goto err;
-
-	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
-	id_name = build_id_cache__linkname(sbuild_id, NULL, 0);
-	if (!id_name)
-		goto err;
-	if (access(id_name, F_OK))
-		goto err;
-	if (lstat(id_name, &sb) == -1)
-		goto err;
-	if ((size_t)sb.st_size > size - 1)
-		goto err;
-	if (readlink(id_name, bf, size - 1) < 0)
-		goto err;
-
-	bf[sb.st_size] = '\0';
-
-	/*
-	 * link should be:
-	 * ../../lib/modules/4.4.0-rc4/kernel/net/ipv4/netfilter/nf_nat_ipv4.ko/a09fe3eb3147dafa4e3b31dbd6257e4d696bdc92
-	 */
-	ch = strrchr(bf, '/');
-	if (!ch)
-		goto err;
-	if (ch - 3 < bf)
-		goto err;
-
-	free(id_name);
-	return strncmp(".ko", ch - 3, 3) == 0;
-err:
-	pr_err("Invalid build id: %s\n", id_name ? :
-					 dso->long_name ? :
-					 dso->short_name ? :
-					 "[unknown]");
-	free(id_name);
-	return false;
-}
-
 #define dsos__for_each_with_build_id(pos, head)	\
 	list_for_each_entry(pos, head, node)	\
 		if (!pos->has_build_id)		\
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index a96081121179..96690a55c62c 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -17,7 +17,6 @@ char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
 				    size_t size);
 
 char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size);
-bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size);
 
 int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
 			   struct perf_sample *sample, struct perf_evsel *evsel,
@@ -44,6 +43,10 @@ bool build_id_cache__cached(const char *sbuild_id);
 int build_id_cache__add_s(const char *sbuild_id,
 			  const char *name, bool is_kallsyms, bool is_vdso);
 int build_id_cache__remove_s(const char *sbuild_id);
+
+extern char buildid_dir[];
+
+void set_buildid_dir(const char *dir);
 void disable_buildid_cache(void);
 
 #endif
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 0328f297a748..0175765c05b9 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -5,6 +5,7 @@
 #include <subcmd/pager.h>
 #include "../ui/ui.h"
 
+#include <linux/compiler.h>
 #include <linux/string.h>
 
 #define CMD_EXEC_PATH "--exec-path"
@@ -24,6 +25,6 @@ static inline int is_absolute_path(const char *path)
 	return path[0] == '/';
 }
 
-char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+char *mkpath(const char *fmt, ...) __printf(1, 2);
 
 #endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 81fc29ac798f..b4204b43ed58 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -621,14 +621,19 @@ enum match_result {
 static enum match_result match_chain_srcline(struct callchain_cursor_node *node,
 					     struct callchain_list *cnode)
 {
-	char *left = get_srcline(cnode->ms.map->dso,
+	char *left = NULL;
+	char *right = NULL;
+	enum match_result ret = MATCH_EQ;
+	int cmp;
+
+	if (cnode->ms.map)
+		left = get_srcline(cnode->ms.map->dso,
 				 map__rip_2objdump(cnode->ms.map, cnode->ip),
 				 cnode->ms.sym, true, false);
-	char *right = get_srcline(node->map->dso,
+	if (node->map)
+		right = get_srcline(node->map->dso,
 				  map__rip_2objdump(node->map, node->ip),
 				  node->sym, true, false);
-	enum match_result ret = MATCH_EQ;
-	int cmp;
 
 	if (left && right)
 		cmp = strcmp(left, right);
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 8d724f0fa5a8..31a7dea248d0 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -335,32 +335,42 @@ static int perf_parse_long(const char *value, long *ret)
 	return 0;
 }
 
-static void die_bad_config(const char *name)
+static void bad_config(const char *name)
 {
 	if (config_file_name)
-		die("bad config value for '%s' in %s", name, config_file_name);
-	die("bad config value for '%s'", name);
+		pr_warning("bad config value for '%s' in %s, ignoring...\n", name, config_file_name);
+	else
+		pr_warning("bad config value for '%s', ignoring...\n", name);
 }
 
-u64 perf_config_u64(const char *name, const char *value)
+int perf_config_u64(u64 *dest, const char *name, const char *value)
 {
 	long long ret = 0;
 
-	if (!perf_parse_llong(value, &ret))
-		die_bad_config(name);
-	return (u64) ret;
+	if (!perf_parse_llong(value, &ret)) {
+		bad_config(name);
+		return -1;
+	}
+
+	*dest = ret;
+	return 0;
 }
 
-int perf_config_int(const char *name, const char *value)
+int perf_config_int(int *dest, const char *name, const char *value)
 {
 	long ret = 0;
-	if (!perf_parse_long(value, &ret))
-		die_bad_config(name);
-	return ret;
+	if (!perf_parse_long(value, &ret)) {
+		bad_config(name);
+		return -1;
+	}
+	*dest = ret;
+	return 0;
 }
 
 static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
 {
+	int ret;
+
 	*is_bool = 1;
 	if (!value)
 		return 1;
@@ -371,7 +381,7 @@ static int perf_config_bool_or_int(const char *name, const char *value, int *is_
 	if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off"))
 		return 0;
 	*is_bool = 0;
-	return perf_config_int(name, value);
+	return perf_config_int(&ret, name, value) < 0 ? -1 : ret;
 }
 
 int perf_config_bool(const char *name, const char *value)
@@ -657,8 +667,7 @@ static int perf_config_set__init(struct perf_config_set *set)
 
 	user_config = strdup(mkpath("%s/.perfconfig", home));
 	if (user_config == NULL) {
-		warning("Not enough memory to process %s/.perfconfig, "
-			"ignoring it.", home);
+		pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home);
 		goto out;
 	}
 
@@ -671,8 +680,7 @@ static int perf_config_set__init(struct perf_config_set *set)
 	ret = 0;
 
 	if (st.st_uid && (st.st_uid != geteuid())) {
-		warning("File %s not owned by current user or root, "
-			"ignoring it.", user_config);
+		pr_warning("File %s not owned by current user or root, ignoring it.", user_config);
 		goto out_free;
 	}
 
@@ -795,7 +803,8 @@ void perf_config_set__delete(struct perf_config_set *set)
  */
 int config_error_nonbool(const char *var)
 {
-	return error("Missing value for '%s'", var);
+	pr_err("Missing value for '%s'", var);
+	return -1;
 }
 
 void set_buildid_dir(const char *dir)
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index 1a59a6b43f8b..b6bb11f3f165 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -27,8 +27,8 @@ extern const char *config_exclusive_filename;
 typedef int (*config_fn_t)(const char *, const char *, void *);
 int perf_default_config(const char *, const char *, void *);
 int perf_config(config_fn_t fn, void *);
-int perf_config_int(const char *, const char *);
-u64 perf_config_u64(const char *, const char *);
+int perf_config_int(int *dest, const char *, const char *);
+int perf_config_u64(u64 *dest, const char *, const char *);
 int perf_config_bool(const char *, const char *);
 int config_error_nonbool(const char *);
 const char *perf_etc_perfconfig(void);
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 89d50318833d..3149b70799fd 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -1444,10 +1444,8 @@ static int convert__config(const char *var, const char *value, void *cb)
 {
 	struct convert *c = cb;
 
-	if (!strcmp(var, "convert.queue-size")) {
-		c->queue_size = perf_config_u64(var, value);
-		return 0;
-	}
+	if (!strcmp(var, "convert.queue-size"))
+		return perf_config_u64(&c->queue_size, var, value);
 
 	return 0;
 }
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 8a23ea1a71c7..c818bdb1c1ab 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -4,6 +4,7 @@
 
 #include <stdbool.h>
 #include <string.h>
+#include <linux/compiler.h>
 #include "event.h"
 #include "../ui/helpline.h"
 #include "../ui/progress.h"
@@ -40,16 +41,16 @@ extern int debug_data_convert;
 
 #define STRERR_BUFSIZE	128	/* For the buffer size of str_error_r */
 
-int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+int dump_printf(const char *fmt, ...) __printf(1, 2);
 void trace_event(union perf_event *event);
 
-int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2)));
-int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
+int ui__error(const char *format, ...) __printf(1, 2);
+int ui__warning(const char *format, ...) __printf(1, 2);
 
 void pr_stat(const char *fmt, ...);
 
-int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
-int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5)));
+int eprintf(int level, int var, const char *fmt, ...) __printf(3, 4);
+int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5);
 int veprintf(int level, int var, const char *fmt, va_list args);
 
 int perf_debug_option(const char *str);
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index a96a99d2369f..4e7ab611377a 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -248,6 +248,64 @@ bool dso__needs_decompress(struct dso *dso)
 		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
 }
 
+static int decompress_kmodule(struct dso *dso, const char *name, char *tmpbuf)
+{
+	int fd = -1;
+	struct kmod_path m;
+
+	if (!dso__needs_decompress(dso))
+		return -1;
+
+	if (kmod_path__parse_ext(&m, dso->long_name))
+		return -1;
+
+	if (!m.comp)
+		goto out;
+
+	fd = mkstemp(tmpbuf);
+	if (fd < 0) {
+		dso->load_errno = errno;
+		goto out;
+	}
+
+	if (!decompress_to_file(m.ext, name, fd)) {
+		dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
+		close(fd);
+		fd = -1;
+	}
+
+out:
+	free(m.ext);
+	return fd;
+}
+
+int dso__decompress_kmodule_fd(struct dso *dso, const char *name)
+{
+	char tmpbuf[] = KMOD_DECOMP_NAME;
+	int fd;
+
+	fd = decompress_kmodule(dso, name, tmpbuf);
+	unlink(tmpbuf);
+	return fd;
+}
+
+int dso__decompress_kmodule_path(struct dso *dso, const char *name,
+				 char *pathname, size_t len)
+{
+	char tmpbuf[] = KMOD_DECOMP_NAME;
+	int fd;
+
+	fd = decompress_kmodule(dso, name, tmpbuf);
+	if (fd < 0) {
+		unlink(tmpbuf);
+		return -1;
+	}
+
+	strncpy(pathname, tmpbuf, len);
+	close(fd);
+	return 0;
+}
+
 /*
  * Parses kernel module specified in @path and updates
  * @m argument like:
@@ -335,6 +393,21 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
 	return 0;
 }
 
+void dso__set_module_info(struct dso *dso, struct kmod_path *m,
+			  struct machine *machine)
+{
+	if (machine__is_host(machine))
+		dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
+	else
+		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
+
+	/* _KMODULE_COMP should be next to _KMODULE */
+	if (m->kmod && m->comp)
+		dso->symtab_type++;
+
+	dso__set_short_name(dso, strdup(m->name), true);
+}
+
 /*
  * Global list of open DSOs and the counter.
  */
@@ -381,7 +454,7 @@ static int do_open(char *name)
 
 static int __open_dso(struct dso *dso, struct machine *machine)
 {
-	int fd;
+	int fd = -EINVAL;
 	char *root_dir = (char *)"";
 	char *name = malloc(PATH_MAX);
 
@@ -392,15 +465,30 @@ static int __open_dso(struct dso *dso, struct machine *machine)
 		root_dir = machine->root_dir;
 
 	if (dso__read_binary_type_filename(dso, dso->binary_type,
-					    root_dir, name, PATH_MAX)) {
-		free(name);
-		return -EINVAL;
-	}
+					    root_dir, name, PATH_MAX))
+		goto out;
 
 	if (!is_regular_file(name))
-		return -EINVAL;
+		goto out;
+
+	if (dso__needs_decompress(dso)) {
+		char newpath[KMOD_DECOMP_LEN];
+		size_t len = sizeof(newpath);
+
+		if (dso__decompress_kmodule_path(dso, name, newpath, len) < 0) {
+			fd = -dso->load_errno;
+			goto out;
+		}
+
+		strcpy(name, newpath);
+	}
 
 	fd = do_open(name);
+
+	if (dso__needs_decompress(dso))
+		unlink(name);
+
+out:
 	free(name);
 	return fd;
 }
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 12350b171727..bd061ba7b47c 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -244,6 +244,12 @@ bool is_supported_compression(const char *ext);
 bool is_kernel_module(const char *pathname, int cpumode);
 bool decompress_to_file(const char *ext, const char *filename, int output_fd);
 bool dso__needs_decompress(struct dso *dso);
+int dso__decompress_kmodule_fd(struct dso *dso, const char *name);
+int dso__decompress_kmodule_path(struct dso *dso, const char *name,
+				 char *pathname, size_t len);
+
+#define KMOD_DECOMP_NAME  "/tmp/perf-kmod-XXXXXX"
+#define KMOD_DECOMP_LEN   sizeof(KMOD_DECOMP_NAME)
 
 struct kmod_path {
 	char *name;
@@ -259,6 +265,9 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
 #define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true , false)
 #define kmod_path__parse_ext(__m, __p)  __kmod_path__parse(__m, __p, false, true)
 
+void dso__set_module_info(struct dso *dso, struct kmod_path *m,
+			  struct machine *machine);
+
 /*
  * The dso__data_* external interface provides following functions:
  *   dso__data_get_fd
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 142835c0ca0a..dc5c3bb69d73 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -8,6 +8,7 @@
 #include <unistd.h>
 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
 #include <api/fs/fs.h>
+#include <linux/perf_event.h>
 #include "event.h"
 #include "debug.h"
 #include "hist.h"
@@ -767,15 +768,16 @@ static int find_symbol_cb(void *arg, const char *name, char type,
 	return 1;
 }
 
-u64 kallsyms__get_function_start(const char *kallsyms_filename,
-				 const char *symbol_name)
+int kallsyms__get_function_start(const char *kallsyms_filename,
+				 const char *symbol_name, u64 *addr)
 {
 	struct process_symbol_args args = { .name = symbol_name, };
 
 	if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0)
-		return 0;
+		return -1;
 
-	return args.start;
+	*addr = args.start;
+	return 0;
 }
 
 int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index db2de6413518..9967c87af7a6 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -229,7 +229,7 @@ struct build_id_event {
 enum perf_user_event_type { /* above any possible kernel type */
 	PERF_RECORD_USER_TYPE_START		= 64,
 	PERF_RECORD_HEADER_ATTR			= 64,
-	PERF_RECORD_HEADER_EVENT_TYPE		= 65, /* depreceated */
+	PERF_RECORD_HEADER_EVENT_TYPE		= 65, /* deprecated */
 	PERF_RECORD_HEADER_TRACING_DATA		= 66,
 	PERF_RECORD_HEADER_BUILD_ID		= 67,
 	PERF_RECORD_FINISHED_ROUND		= 68,
@@ -252,6 +252,127 @@ enum auxtrace_error_type {
 	PERF_AUXTRACE_ERROR_MAX
 };
 
+/* Attribute type for custom synthesized events */
+#define PERF_TYPE_SYNTH		(INT_MAX + 1U)
+
+/* Attribute config for custom synthesized events */
+enum perf_synth_id {
+	PERF_SYNTH_INTEL_PTWRITE,
+	PERF_SYNTH_INTEL_MWAIT,
+	PERF_SYNTH_INTEL_PWRE,
+	PERF_SYNTH_INTEL_EXSTOP,
+	PERF_SYNTH_INTEL_PWRX,
+	PERF_SYNTH_INTEL_CBR,
+};
+
+/*
+ * Raw data formats for synthesized events. Note that 4 bytes of padding are
+ * present to match the 'size' member of PERF_SAMPLE_RAW data which is always
+ * 8-byte aligned. That means we must dereference raw_data with an offset of 4.
+ * Refer perf_sample__synth_ptr() and perf_synth__raw_data().  It also means the
+ * structure sizes are 4 bytes bigger than the raw_size, refer
+ * perf_synth__raw_size().
+ */
+
+struct perf_synth_intel_ptwrite {
+	u32 padding;
+	union {
+		struct {
+			u32	ip		:  1,
+				reserved	: 31;
+		};
+		u32	flags;
+	};
+	u64	payload;
+};
+
+struct perf_synth_intel_mwait {
+	u32 padding;
+	u32 reserved;
+	union {
+		struct {
+			u64	hints		:  8,
+				reserved1	: 24,
+				extensions	:  2,
+				reserved2	: 30;
+		};
+		u64	payload;
+	};
+};
+
+struct perf_synth_intel_pwre {
+	u32 padding;
+	u32 reserved;
+	union {
+		struct {
+			u64	reserved1	:  7,
+				hw		:  1,
+				subcstate	:  4,
+				cstate		:  4,
+				reserved2	: 48;
+		};
+		u64	payload;
+	};
+};
+
+struct perf_synth_intel_exstop {
+	u32 padding;
+	union {
+		struct {
+			u32	ip		:  1,
+				reserved	: 31;
+		};
+		u32	flags;
+	};
+};
+
+struct perf_synth_intel_pwrx {
+	u32 padding;
+	u32 reserved;
+	union {
+		struct {
+			u64	deepest_cstate	:  4,
+				last_cstate	:  4,
+				wake_reason	:  4,
+				reserved1	: 52;
+		};
+		u64	payload;
+	};
+};
+
+struct perf_synth_intel_cbr {
+	u32 padding;
+	union {
+		struct {
+			u32	cbr		:  8,
+				reserved1	:  8,
+				max_nonturbo	:  8,
+				reserved2	:  8;
+		};
+		u32	flags;
+	};
+	u32 freq;
+	u32 reserved3;
+};
+
+/*
+ * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
+ * 8-byte alignment.
+ */
+static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
+{
+	return sample->raw_data - 4;
+}
+
+static inline void *perf_synth__raw_data(void *p)
+{
+	return p + 4;
+}
+
+#define perf_synth__raw_size(d) (sizeof(d) - 4)
+
+#define perf_sample__bad_synth_size(s, d) ((s)->raw_size < sizeof(d) - 4)
+
 /*
  * The kernel collects the number of events it couldn't send in a stretch and
  * when possible sends this number in a PERF_RECORD_LOST event. The number of
@@ -675,10 +796,18 @@ size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf(union perf_event *event, FILE *fp);
 
-u64 kallsyms__get_function_start(const char *kallsyms_filename,
-				 const char *symbol_name);
+int kallsyms__get_function_start(const char *kallsyms_filename,
+				 const char *symbol_name, u64 *addr);
 
 void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max);
 void  cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map,
 			       u16 type, int max);
+
+void event_attr_init(struct perf_event_attr *attr);
+
+int perf_event_paranoid(void);
+
+extern int sysctl_perf_event_max_stack;
+extern int sysctl_perf_event_max_contexts_per_stack;
+
 #endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 94cea4398a13..8d601fbdd8d6 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -1,6 +1,7 @@
 #ifndef __PERF_EVLIST_H
 #define __PERF_EVLIST_H 1
 
+#include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/refcount.h>
 #include <linux/list.h>
@@ -34,7 +35,7 @@ struct perf_mmap {
 	refcount_t	 refcnt;
 	u64		 prev;
 	struct auxtrace_mmap auxtrace_mmap;
-	char		 event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
+	char		 event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
 };
 
 static inline size_t
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 0e879097adfb..6f4882f8d61f 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -11,16 +11,21 @@
 #include <errno.h>
 #include <inttypes.h>
 #include <linux/bitops.h>
+#include <api/fs/fs.h>
 #include <api/fs/tracing_path.h>
 #include <traceevent/event-parse.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/perf_event.h>
+#include <linux/compiler.h>
 #include <linux/err.h>
 #include <sys/ioctl.h>
 #include <sys/resource.h>
+#include <sys/types.h>
+#include <dirent.h>
 #include "asm/bug.h"
 #include "callchain.h"
 #include "cgroup.h"
+#include "event.h"
 #include "evsel.h"
 #include "evlist.h"
 #include "util.h"
@@ -272,8 +277,20 @@ struct perf_evsel *perf_evsel__new_cycles(void)
 	struct perf_evsel *evsel;
 
 	event_attr_init(&attr);
+	/*
+	 * Unnamed union member, not supported as struct member named
+	 * initializer in older compilers such as gcc 4.4.7
+	 *
+	 * Just for probing the precise_ip:
+	 */
+	attr.sample_period = 1;
 
 	perf_event_attr__set_max_precise_ip(&attr);
+	/*
+	 * Now let the usual logic to set up the perf_event_attr defaults
+	 * to kick in when we return and before perf_evsel__open() is called.
+	 */
+	attr.sample_period = 0;
 
 	evsel = perf_evsel__new(&attr);
 	if (evsel == NULL)
@@ -1428,7 +1445,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 }
 
 static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
-				void *priv __attribute__((unused)))
+				void *priv __maybe_unused)
 {
 	return fprintf(fp, "  %-32s %s\n", name, val);
 }
@@ -2458,6 +2475,42 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
 	return false;
 }
 
+static bool find_process(const char *name)
+{
+	size_t len = strlen(name);
+	DIR *dir;
+	struct dirent *d;
+	int ret = -1;
+
+	dir = opendir(procfs__mountpoint());
+	if (!dir)
+		return false;
+
+	/* Walk through the directory. */
+	while (ret && (d = readdir(dir)) != NULL) {
+		char path[PATH_MAX];
+		char *data;
+		size_t size;
+
+		if ((d->d_type != DT_DIR) ||
+		     !strcmp(".", d->d_name) ||
+		     !strcmp("..", d->d_name))
+			continue;
+
+		scnprintf(path, sizeof(path), "%s/%s/comm",
+			  procfs__mountpoint(), d->d_name);
+
+		if (filename__read_str(path, &data, &size))
+			continue;
+
+		ret = strncmp(name, data, len);
+		free(data);
+	}
+
+	closedir(dir);
+	return ret ? false : true;
+}
+
 int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
 			      int err, char *msg, size_t size)
 {
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index e415aee6a245..583f3a602506 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -7,6 +7,7 @@
 #include "map.h"
 #include "strlist.h"
 #include "symbol.h"
+#include "srcline.h"
 
 static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
 {
@@ -168,6 +169,38 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
 			if (!print_oneline)
 				printed += fprintf(fp, "\n");
 
+			if (symbol_conf.inline_name && node->map) {
+				struct inline_node *inode;
+
+				addr = map__rip_2objdump(node->map, node->ip),
+				inode = dso__parse_addr_inlines(node->map->dso, addr);
+
+				if (inode) {
+					struct inline_list *ilist;
+
+					list_for_each_entry(ilist, &inode->val, list) {
+						if (print_arrow)
+							printed += fprintf(fp, " <-");
+
+						/* IP is same, just skip it */
+						if (print_ip)
+							printed += fprintf(fp, "%c%16s",
+									   s, "");
+						if (print_sym)
+							printed += fprintf(fp, " %s",
+									   ilist->funcname);
+						if (print_srcline)
+							printed += fprintf(fp, "\n  %s:%d",
+									   ilist->filename,
+									   ilist->line_nr);
+						if (!print_oneline)
+							printed += fprintf(fp, "\n");
+					}
+
+					inline_node__delete(inode);
+				}
+			}
+
 			if (symbol_conf.bt_stop_list &&
 			    node->sym &&
 			    strlist__has_entry(symbol_conf.bt_stop_list,
diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c
index 5980f7d256b1..40789d8603d0 100644
--- a/tools/perf/util/genelf_debug.c
+++ b/tools/perf/util/genelf_debug.c
@@ -11,6 +11,7 @@
  * @remark Copyright 2007 OProfile authors
  * @author Philippe Elie
  */
+#include <linux/compiler.h>
 #include <sys/types.h>
 #include <stdio.h>
 #include <getopt.h>
@@ -125,7 +126,7 @@ struct debug_line_header {
 	 * and filesize, last entry is followed by en empty string.
 	 */
 	/* follow the first program statement */
-} __attribute__((packed));
+} __packed;
 
 /* DWARF 2 spec talk only about one possible compilation unit header while
  * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not
@@ -138,7 +139,7 @@ struct compilation_unit_header {
 	uhalf version;
 	uword debug_abbrev_offset;
 	ubyte pointer_size;
-} __attribute__((packed));
+} __packed;
 
 #define DW_LNS_num_opcode (DW_LNS_set_isa + 1)
 
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 948b2c5efb65..76ed7d03e500 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -8,6 +8,7 @@
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <linux/compiler.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
 #include <linux/bitops.h>
@@ -19,6 +20,7 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "header.h"
+#include "memswap.h"
 #include "../perf.h"
 #include "trace-event.h"
 #include "session.h"
@@ -840,7 +842,7 @@ static int write_group_desc(int fd, struct perf_header *h __maybe_unused,
 
 /*
  * default get_cpuid(): nothing gets recorded
- * actual implementation must be in arch/$(ARCH)/util/header.c
+ * actual implementation must be in arch/$(SRCARCH)/util/header.c
  */
 int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
 {
@@ -1273,7 +1275,7 @@ error:
 }
 
 static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val,
-				void *priv __attribute__((unused)))
+				void *priv __maybe_unused)
 {
 	return fprintf(fp, ", %s = %s", name, val);
 }
@@ -1468,8 +1470,16 @@ static int __event_process_build_id(struct build_id_event *bev,
 
 		dso__set_build_id(dso, &bev->build_id);
 
-		if (!is_kernel_module(filename, cpumode))
-			dso->kernel = dso_type;
+		if (dso_type != DSO_TYPE_USER) {
+			struct kmod_path m = { .name = NULL, };
+
+			if (!kmod_path__parse_name(&m, filename) && m.kmod)
+				dso__set_module_info(dso, &m, machine);
+			else
+				dso->kernel = dso_type;
+
+			free(m.name);
+		}
 
 		build_id__sprintf(dso->build_id, sizeof(dso->build_id),
 				  sbuild_id);
diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
index 1c88ad6425b8..15b95300d7f3 100644
--- a/tools/perf/util/help-unknown-cmd.c
+++ b/tools/perf/util/help-unknown-cmd.c
@@ -12,7 +12,7 @@ static int perf_unknown_cmd_config(const char *var, const char *value,
 				   void *cb __maybe_unused)
 {
 	if (!strcmp(var, "help.autocorrect"))
-		autocorrect = perf_config_int(var,value);
+		return perf_config_int(&autocorrect, var,value);
 
 	return 0;
 }
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index b2834ac7b1f5..218ee2bac9a5 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -866,8 +866,6 @@ static void intel_bts_print_info(u64 *arr, int start, int finish)
 		fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
 }
 
-u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE];
-
 int intel_bts_process_auxtrace_info(union perf_event *event,
 				    struct perf_session *session)
 {
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 7cf7f7aca4d2..aa1593ce551d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -64,6 +64,25 @@ enum intel_pt_pkt_state {
 	INTEL_PT_STATE_FUP_NO_TIP,
 };
 
+static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
+{
+	switch (pkt_state) {
+	case INTEL_PT_STATE_NO_PSB:
+	case INTEL_PT_STATE_NO_IP:
+	case INTEL_PT_STATE_ERR_RESYNC:
+	case INTEL_PT_STATE_IN_SYNC:
+	case INTEL_PT_STATE_TNT:
+		return true;
+	case INTEL_PT_STATE_TIP:
+	case INTEL_PT_STATE_TIP_PGD:
+	case INTEL_PT_STATE_FUP:
+	case INTEL_PT_STATE_FUP_NO_TIP:
+		return false;
+	default:
+		return true;
+	};
+}
+
 #ifdef INTEL_PT_STRICT
 #define INTEL_PT_STATE_ERR1	INTEL_PT_STATE_NO_PSB
 #define INTEL_PT_STATE_ERR2	INTEL_PT_STATE_NO_PSB
@@ -87,11 +106,13 @@ struct intel_pt_decoder {
 	const unsigned char *buf;
 	size_t len;
 	bool return_compression;
+	bool branch_enable;
 	bool mtc_insn;
 	bool pge;
 	bool have_tma;
 	bool have_cyc;
 	bool fixup_last_mtc;
+	bool have_last_ip;
 	uint64_t pos;
 	uint64_t last_ip;
 	uint64_t ip;
@@ -99,6 +120,7 @@ struct intel_pt_decoder {
 	uint64_t timestamp;
 	uint64_t tsc_timestamp;
 	uint64_t ref_timestamp;
+	uint64_t sample_timestamp;
 	uint64_t ret_addr;
 	uint64_t ctc_timestamp;
 	uint64_t ctc_delta;
@@ -119,6 +141,7 @@ struct intel_pt_decoder {
 	int pkt_len;
 	int last_packet_type;
 	unsigned int cbr;
+	unsigned int cbr_seen;
 	unsigned int max_non_turbo_ratio;
 	double max_non_turbo_ratio_fp;
 	double cbr_cyc_to_tsc;
@@ -136,9 +159,18 @@ struct intel_pt_decoder {
 	bool continuous_period;
 	bool overflow;
 	bool set_fup_tx_flags;
+	bool set_fup_ptw;
+	bool set_fup_mwait;
+	bool set_fup_pwre;
+	bool set_fup_exstop;
 	unsigned int fup_tx_flags;
 	unsigned int tx_flags;
+	uint64_t fup_ptw_payload;
+	uint64_t fup_mwait_payload;
+	uint64_t fup_pwre_payload;
+	uint64_t cbr_payload;
 	uint64_t timestamp_insn_cnt;
+	uint64_t sample_insn_cnt;
 	uint64_t stuck_ip;
 	int no_progress;
 	int stuck_ip_prd;
@@ -192,6 +224,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
 	decoder->pgd_ip             = params->pgd_ip;
 	decoder->data               = params->data;
 	decoder->return_compression = params->return_compression;
+	decoder->branch_enable      = params->branch_enable;
 
 	decoder->period             = params->period;
 	decoder->period_type        = params->period_type;
@@ -398,6 +431,7 @@ static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
 static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
 {
 	decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
+	decoder->have_last_ip = true;
 }
 
 static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
@@ -635,6 +669,8 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
 	case INTEL_PT_PAD:
 	case INTEL_PT_VMCS:
 	case INTEL_PT_MNT:
+	case INTEL_PT_PTWRITE:
+	case INTEL_PT_PTWRITE_IP:
 		return 0;
 
 	case INTEL_PT_MTC:
@@ -675,6 +711,12 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
 		break;
 
 	case INTEL_PT_TSC:
+		/*
+		 * For now, do not support using TSC packets - refer
+		 * intel_pt_calc_cyc_to_tsc().
+		 */
+		if (data->from_mtc)
+			return 1;
 		timestamp = pkt_info->packet.payload |
 			    (data->timestamp & (0xffULL << 56));
 		if (data->from_mtc && timestamp < data->timestamp &&
@@ -733,6 +775,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
 
 	case INTEL_PT_TIP_PGD:
 	case INTEL_PT_TRACESTOP:
+	case INTEL_PT_EXSTOP:
+	case INTEL_PT_EXSTOP_IP:
+	case INTEL_PT_MWAIT:
+	case INTEL_PT_PWRE:
+	case INTEL_PT_PWRX:
 	case INTEL_PT_OVF:
 	case INTEL_PT_BAD: /* Does not happen */
 	default:
@@ -787,6 +834,14 @@ static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
 		.cbr_cyc_to_tsc = 0,
 	};
 
+	/*
+	 * For now, do not support using TSC packets for at least the reasons:
+	 * 1) timing might have stopped
+	 * 2) TSC packets within PSB+ can slip against CYC packets
+	 */
+	if (!from_mtc)
+		return;
+
 	intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
 }
 
@@ -898,6 +953,7 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
 
 	decoder->tot_insn_cnt += insn_cnt;
 	decoder->timestamp_insn_cnt += insn_cnt;
+	decoder->sample_insn_cnt += insn_cnt;
 	decoder->period_insn_cnt += insn_cnt;
 
 	if (err) {
@@ -990,6 +1046,57 @@ out_no_progress:
 	return err;
 }
 
+static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
+{
+	bool ret = false;
+
+	if (decoder->set_fup_tx_flags) {
+		decoder->set_fup_tx_flags = false;
+		decoder->tx_flags = decoder->fup_tx_flags;
+		decoder->state.type = INTEL_PT_TRANSACTION;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		decoder->state.flags = decoder->fup_tx_flags;
+		return true;
+	}
+	if (decoder->set_fup_ptw) {
+		decoder->set_fup_ptw = false;
+		decoder->state.type = INTEL_PT_PTW;
+		decoder->state.flags |= INTEL_PT_FUP_IP;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		decoder->state.ptw_payload = decoder->fup_ptw_payload;
+		return true;
+	}
+	if (decoder->set_fup_mwait) {
+		decoder->set_fup_mwait = false;
+		decoder->state.type = INTEL_PT_MWAIT_OP;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		decoder->state.mwait_payload = decoder->fup_mwait_payload;
+		ret = true;
+	}
+	if (decoder->set_fup_pwre) {
+		decoder->set_fup_pwre = false;
+		decoder->state.type |= INTEL_PT_PWR_ENTRY;
+		decoder->state.type &= ~INTEL_PT_BRANCH;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		decoder->state.pwre_payload = decoder->fup_pwre_payload;
+		ret = true;
+	}
+	if (decoder->set_fup_exstop) {
+		decoder->set_fup_exstop = false;
+		decoder->state.type |= INTEL_PT_EX_STOP;
+		decoder->state.type &= ~INTEL_PT_BRANCH;
+		decoder->state.flags |= INTEL_PT_FUP_IP;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		ret = true;
+	}
+	return ret;
+}
+
 static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
 {
 	struct intel_pt_insn intel_pt_insn;
@@ -1003,15 +1110,8 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
 		if (err == INTEL_PT_RETURN)
 			return 0;
 		if (err == -EAGAIN) {
-			if (decoder->set_fup_tx_flags) {
-				decoder->set_fup_tx_flags = false;
-				decoder->tx_flags = decoder->fup_tx_flags;
-				decoder->state.type = INTEL_PT_TRANSACTION;
-				decoder->state.from_ip = decoder->ip;
-				decoder->state.to_ip = 0;
-				decoder->state.flags = decoder->fup_tx_flags;
+			if (intel_pt_fup_event(decoder))
 				return 0;
-			}
 			return err;
 		}
 		decoder->set_fup_tx_flags = false;
@@ -1360,7 +1460,9 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
 
 static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
 {
-	unsigned int cbr = decoder->packet.payload;
+	unsigned int cbr = decoder->packet.payload & 0xff;
+
+	decoder->cbr_payload = decoder->packet.payload;
 
 	if (decoder->cbr == cbr)
 		return;
@@ -1417,6 +1519,13 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
 		case INTEL_PT_TRACESTOP:
 		case INTEL_PT_BAD:
 		case INTEL_PT_PSB:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
 			decoder->have_tma = false;
 			intel_pt_log("ERROR: Unexpected packet\n");
 			return -EAGAIN;
@@ -1446,7 +1555,8 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
 
 		case INTEL_PT_FUP:
 			decoder->pge = true;
-			intel_pt_set_last_ip(decoder);
+			if (decoder->packet.count)
+				intel_pt_set_last_ip(decoder);
 			break;
 
 		case INTEL_PT_MODE_TSX:
@@ -1497,6 +1607,13 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
 		case INTEL_PT_MODE_TSX:
 		case INTEL_PT_BAD:
 		case INTEL_PT_PSBEND:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
 			intel_pt_log("ERROR: Missing TIP after FUP\n");
 			decoder->pkt_state = INTEL_PT_STATE_ERR3;
 			return -ENOENT;
@@ -1625,6 +1742,15 @@ next:
 				break;
 			}
 			intel_pt_set_last_ip(decoder);
+			if (!decoder->branch_enable) {
+				decoder->ip = decoder->last_ip;
+				if (intel_pt_fup_event(decoder))
+					return 0;
+				no_tip = false;
+				break;
+			}
+			if (decoder->set_fup_mwait)
+				no_tip = true;
 			err = intel_pt_walk_fup(decoder);
 			if (err != -EAGAIN) {
 				if (err)
@@ -1650,6 +1776,8 @@ next:
 			break;
 
 		case INTEL_PT_PSB:
+			decoder->last_ip = 0;
+			decoder->have_last_ip = true;
 			intel_pt_clear_stack(&decoder->stack);
 			err = intel_pt_walk_psbend(decoder);
 			if (err == -EAGAIN)
@@ -1696,6 +1824,16 @@ next:
 
 		case INTEL_PT_CBR:
 			intel_pt_calc_cbr(decoder);
+			if (!decoder->branch_enable &&
+			    decoder->cbr != decoder->cbr_seen) {
+				decoder->cbr_seen = decoder->cbr;
+				decoder->state.type = INTEL_PT_CBR_CHG;
+				decoder->state.from_ip = decoder->ip;
+				decoder->state.to_ip = 0;
+				decoder->state.cbr_payload =
+							decoder->packet.payload;
+				return 0;
+			}
 			break;
 
 		case INTEL_PT_MODE_EXEC:
@@ -1722,6 +1860,71 @@ next:
 		case INTEL_PT_PAD:
 			break;
 
+		case INTEL_PT_PTWRITE_IP:
+			decoder->fup_ptw_payload = decoder->packet.payload;
+			err = intel_pt_get_next_packet(decoder);
+			if (err)
+				return err;
+			if (decoder->packet.type == INTEL_PT_FUP) {
+				decoder->set_fup_ptw = true;
+				no_tip = true;
+			} else {
+				intel_pt_log_at("ERROR: Missing FUP after PTWRITE",
+						decoder->pos);
+			}
+			goto next;
+
+		case INTEL_PT_PTWRITE:
+			decoder->state.type = INTEL_PT_PTW;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			decoder->state.ptw_payload = decoder->packet.payload;
+			return 0;
+
+		case INTEL_PT_MWAIT:
+			decoder->fup_mwait_payload = decoder->packet.payload;
+			decoder->set_fup_mwait = true;
+			break;
+
+		case INTEL_PT_PWRE:
+			if (decoder->set_fup_mwait) {
+				decoder->fup_pwre_payload =
+							decoder->packet.payload;
+				decoder->set_fup_pwre = true;
+				break;
+			}
+			decoder->state.type = INTEL_PT_PWR_ENTRY;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			decoder->state.pwrx_payload = decoder->packet.payload;
+			return 0;
+
+		case INTEL_PT_EXSTOP_IP:
+			err = intel_pt_get_next_packet(decoder);
+			if (err)
+				return err;
+			if (decoder->packet.type == INTEL_PT_FUP) {
+				decoder->set_fup_exstop = true;
+				no_tip = true;
+			} else {
+				intel_pt_log_at("ERROR: Missing FUP after EXSTOP",
+						decoder->pos);
+			}
+			goto next;
+
+		case INTEL_PT_EXSTOP:
+			decoder->state.type = INTEL_PT_EX_STOP;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			return 0;
+
+		case INTEL_PT_PWRX:
+			decoder->state.type = INTEL_PT_PWR_EXIT;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			decoder->state.pwrx_payload = decoder->packet.payload;
+			return 0;
+
 		default:
 			return intel_pt_bug(decoder);
 		}
@@ -1730,8 +1933,9 @@ next:
 
 static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
 {
-	return decoder->last_ip || decoder->packet.count == 0 ||
-	       decoder->packet.count == 3 || decoder->packet.count == 6;
+	return decoder->packet.count &&
+	       (decoder->have_last_ip || decoder->packet.count == 3 ||
+		decoder->packet.count == 6);
 }
 
 /* Walk PSB+ packets to get in sync. */
@@ -1750,6 +1954,13 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
 			__fallthrough;
 		case INTEL_PT_TIP_PGE:
 		case INTEL_PT_TIP:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
 			intel_pt_log("ERROR: Unexpected packet\n");
 			return -ENOENT;
 
@@ -1854,14 +2065,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
 			break;
 
 		case INTEL_PT_FUP:
-			if (decoder->overflow) {
-				if (intel_pt_have_ip(decoder))
-					intel_pt_set_ip(decoder);
-				if (decoder->ip)
-					return 0;
-			}
-			if (decoder->packet.count)
-				intel_pt_set_last_ip(decoder);
+			if (intel_pt_have_ip(decoder))
+				intel_pt_set_ip(decoder);
+			if (decoder->ip)
+				return 0;
 			break;
 
 		case INTEL_PT_MTC:
@@ -1910,6 +2117,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
 			break;
 
 		case INTEL_PT_PSB:
+			decoder->last_ip = 0;
+			decoder->have_last_ip = true;
+			intel_pt_clear_stack(&decoder->stack);
 			err = intel_pt_walk_psb(decoder);
 			if (err)
 				return err;
@@ -1925,6 +2135,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
 		case INTEL_PT_VMCS:
 		case INTEL_PT_MNT:
 		case INTEL_PT_PAD:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
 		default:
 			break;
 		}
@@ -1935,6 +2152,19 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
 {
 	int err;
 
+	decoder->set_fup_tx_flags = false;
+	decoder->set_fup_ptw = false;
+	decoder->set_fup_mwait = false;
+	decoder->set_fup_pwre = false;
+	decoder->set_fup_exstop = false;
+
+	if (!decoder->branch_enable) {
+		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+		decoder->overflow = false;
+		decoder->state.type = 0; /* Do not have a sample */
+		return 0;
+	}
+
 	intel_pt_log("Scanning for full IP\n");
 	err = intel_pt_walk_to_ip(decoder);
 	if (err)
@@ -2043,6 +2273,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
 
 	decoder->pge = false;
 	decoder->continuous_period = false;
+	decoder->have_last_ip = false;
 	decoder->last_ip = 0;
 	decoder->ip = 0;
 	intel_pt_clear_stack(&decoder->stack);
@@ -2051,6 +2282,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
 	if (err)
 		return err;
 
+	decoder->have_last_ip = true;
 	decoder->pkt_state = INTEL_PT_STATE_NO_IP;
 
 	err = intel_pt_walk_psb(decoder);
@@ -2069,7 +2301,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
 
 static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
 {
-	uint64_t est = decoder->timestamp_insn_cnt << 1;
+	uint64_t est = decoder->sample_insn_cnt << 1;
 
 	if (!decoder->cbr || !decoder->max_non_turbo_ratio)
 		goto out;
@@ -2077,7 +2309,7 @@ static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
 	est *= decoder->max_non_turbo_ratio;
 	est /= decoder->cbr;
 out:
-	return decoder->timestamp + est;
+	return decoder->sample_timestamp + est;
 }
 
 const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
@@ -2093,8 +2325,10 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
 			err = intel_pt_sync(decoder);
 			break;
 		case INTEL_PT_STATE_NO_IP:
+			decoder->have_last_ip = false;
 			decoder->last_ip = 0;
-			/* Fall through */
+			decoder->ip = 0;
+			__fallthrough;
 		case INTEL_PT_STATE_ERR_RESYNC:
 			err = intel_pt_sync_ip(decoder);
 			break;
@@ -2130,15 +2364,29 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
 		}
 	} while (err == -ENOLINK);
 
-	decoder->state.err = err ? intel_pt_ext_err(err) : 0;
-	decoder->state.timestamp = decoder->timestamp;
+	if (err) {
+		decoder->state.err = intel_pt_ext_err(err);
+		decoder->state.from_ip = decoder->ip;
+		decoder->sample_timestamp = decoder->timestamp;
+		decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+	} else {
+		decoder->state.err = 0;
+		if (decoder->cbr != decoder->cbr_seen && decoder->state.type) {
+			decoder->cbr_seen = decoder->cbr;
+			decoder->state.type |= INTEL_PT_CBR_CHG;
+			decoder->state.cbr_payload = decoder->cbr_payload;
+		}
+		if (intel_pt_sample_time(decoder->pkt_state)) {
+			decoder->sample_timestamp = decoder->timestamp;
+			decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+		}
+	}
+
+	decoder->state.timestamp = decoder->sample_timestamp;
 	decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
 	decoder->state.cr3 = decoder->cr3;
 	decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
 
-	if (err)
-		decoder->state.from_ip = decoder->ip;
-
 	return &decoder->state;
 }
 
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index e90619a43c0c..921b22e8ca0e 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -25,11 +25,18 @@
 #define INTEL_PT_IN_TX		(1 << 0)
 #define INTEL_PT_ABORT_TX	(1 << 1)
 #define INTEL_PT_ASYNC		(1 << 2)
+#define INTEL_PT_FUP_IP		(1 << 3)
 
 enum intel_pt_sample_type {
 	INTEL_PT_BRANCH		= 1 << 0,
 	INTEL_PT_INSTRUCTION	= 1 << 1,
 	INTEL_PT_TRANSACTION	= 1 << 2,
+	INTEL_PT_PTW		= 1 << 3,
+	INTEL_PT_MWAIT_OP	= 1 << 4,
+	INTEL_PT_PWR_ENTRY	= 1 << 5,
+	INTEL_PT_EX_STOP	= 1 << 6,
+	INTEL_PT_PWR_EXIT	= 1 << 7,
+	INTEL_PT_CBR_CHG	= 1 << 8,
 };
 
 enum intel_pt_period_type {
@@ -63,6 +70,11 @@ struct intel_pt_state {
 	uint64_t timestamp;
 	uint64_t est_timestamp;
 	uint64_t trace_nr;
+	uint64_t ptw_payload;
+	uint64_t mwait_payload;
+	uint64_t pwre_payload;
+	uint64_t pwrx_payload;
+	uint64_t cbr_payload;
 	uint32_t flags;
 	enum intel_pt_insn_op insn_op;
 	int insn_len;
@@ -87,6 +99,7 @@ struct intel_pt_params {
 	bool (*pgd_ip)(uint64_t ip, void *data);
 	void *data;
 	bool return_compression;
+	bool branch_enable;
 	uint64_t period;
 	enum intel_pt_period_type period_type;
 	unsigned max_non_turbo_ratio;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
index debe751dc3d6..45b64f93f358 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -16,6 +16,7 @@
 #ifndef INCLUDE__INTEL_PT_LOG_H__
 #define INCLUDE__INTEL_PT_LOG_H__
 
+#include <linux/compiler.h>
 #include <stdint.h>
 #include <inttypes.h>
 
@@ -34,8 +35,7 @@ void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
 void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
 				 uint64_t ip);
 
-__attribute__((format(printf, 1, 2)))
-void __intel_pt_log(const char *fmt, ...);
+void __intel_pt_log(const char *fmt, ...) __printf(1, 2);
 
 #define intel_pt_log(fmt, ...) \
 	do { \
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 7528ae4f7e28..ba4c9dd18643 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -64,6 +64,13 @@ static const char * const packet_name[] = {
 	[INTEL_PT_PIP]		= "PIP",
 	[INTEL_PT_OVF]		= "OVF",
 	[INTEL_PT_MNT]		= "MNT",
+	[INTEL_PT_PTWRITE]	= "PTWRITE",
+	[INTEL_PT_PTWRITE_IP]	= "PTWRITE",
+	[INTEL_PT_EXSTOP]	= "EXSTOP",
+	[INTEL_PT_EXSTOP_IP]	= "EXSTOP",
+	[INTEL_PT_MWAIT]	= "MWAIT",
+	[INTEL_PT_PWRE]		= "PWRE",
+	[INTEL_PT_PWRX]		= "PWRX",
 };
 
 const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
@@ -123,7 +130,7 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
 	if (len < 4)
 		return INTEL_PT_NEED_MORE_BYTES;
 	packet->type = INTEL_PT_CBR;
-	packet->payload = buf[2];
+	packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2));
 	return 4;
 }
 
@@ -217,12 +224,80 @@ static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
 	}
 }
 
+static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len,
+				struct intel_pt_pkt *packet)
+{
+	packet->count = (buf[1] >> 5) & 0x3;
+	packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP :
+					 INTEL_PT_PTWRITE;
+
+	switch (packet->count) {
+	case 0:
+		if (len < 6)
+			return INTEL_PT_NEED_MORE_BYTES;
+		packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2));
+		return 6;
+	case 1:
+		if (len < 10)
+			return INTEL_PT_NEED_MORE_BYTES;
+		packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+		return 10;
+	default:
+		return INTEL_PT_BAD_PACKET;
+	}
+}
+
+static int intel_pt_get_exstop(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_EXSTOP;
+	return 2;
+}
+
+static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_EXSTOP_IP;
+	return 2;
+}
+
+static int intel_pt_get_mwait(const unsigned char *buf, size_t len,
+			      struct intel_pt_pkt *packet)
+{
+	if (len < 10)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_MWAIT;
+	packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+	return 10;
+}
+
+static int intel_pt_get_pwre(const unsigned char *buf, size_t len,
+			     struct intel_pt_pkt *packet)
+{
+	if (len < 4)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_PWRE;
+	memcpy_le64(&packet->payload, buf + 2, 2);
+	return 4;
+}
+
+static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
+			     struct intel_pt_pkt *packet)
+{
+	if (len < 7)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_PWRX;
+	memcpy_le64(&packet->payload, buf + 2, 5);
+	return 7;
+}
+
 static int intel_pt_get_ext(const unsigned char *buf, size_t len,
 			    struct intel_pt_pkt *packet)
 {
 	if (len < 2)
 		return INTEL_PT_NEED_MORE_BYTES;
 
+	if ((buf[1] & 0x1f) == 0x12)
+		return intel_pt_get_ptwrite(buf, len, packet);
+
 	switch (buf[1]) {
 	case 0xa3: /* Long TNT */
 		return intel_pt_get_long_tnt(buf, len, packet);
@@ -244,6 +319,16 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
 		return intel_pt_get_tma(buf, len, packet);
 	case 0xC3: /* 3-byte header */
 		return intel_pt_get_3byte(buf, len, packet);
+	case 0x62: /* EXSTOP no IP */
+		return intel_pt_get_exstop(packet);
+	case 0xE2: /* EXSTOP with IP */
+		return intel_pt_get_exstop_ip(packet);
+	case 0xC2: /* MWAIT */
+		return intel_pt_get_mwait(buf, len, packet);
+	case 0x22: /* PWRE */
+		return intel_pt_get_pwre(buf, len, packet);
+	case 0xA2: /* PWRX */
+		return intel_pt_get_pwrx(buf, len, packet);
 	default:
 		return INTEL_PT_BAD_PACKET;
 	}
@@ -522,6 +607,29 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
 		ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
 			       name, payload, nr);
 		return ret;
+	case INTEL_PT_PTWRITE:
+		return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
+	case INTEL_PT_PTWRITE_IP:
+		return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
+	case INTEL_PT_EXSTOP:
+		return snprintf(buf, buf_len, "%s IP:0", name);
+	case INTEL_PT_EXSTOP_IP:
+		return snprintf(buf, buf_len, "%s IP:1", name);
+	case INTEL_PT_MWAIT:
+		return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x",
+				name, payload, (unsigned int)(payload & 0xff),
+				(unsigned int)((payload >> 32) & 0x3));
+	case INTEL_PT_PWRE:
+		return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u",
+				name, payload, !!(payload & 0x80),
+				(unsigned int)((payload >> 12) & 0xf),
+				(unsigned int)((payload >> 8) & 0xf));
+	case INTEL_PT_PWRX:
+		return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x",
+				name, payload,
+				(unsigned int)((payload >> 4) & 0xf),
+				(unsigned int)(payload & 0xf),
+				(unsigned int)((payload >> 8) & 0xf));
 	default:
 		break;
 	}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
index 781bb79883bd..73ddc3a88d07 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -52,6 +52,13 @@ enum intel_pt_pkt_type {
 	INTEL_PT_PIP,
 	INTEL_PT_OVF,
 	INTEL_PT_MNT,
+	INTEL_PT_PTWRITE,
+	INTEL_PT_PTWRITE_IP,
+	INTEL_PT_EXSTOP,
+	INTEL_PT_EXSTOP_IP,
+	INTEL_PT_MWAIT,
+	INTEL_PT_PWRE,
+	INTEL_PT_PWRX,
 };
 
 struct intel_pt_pkt {
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
index 767be7c76034..12e377184ee4 100644
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -1009,7 +1009,7 @@ GrpTable: Grp15
 1: fxstor | RDGSBASE Ry (F3),(11B)
 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
-4: XSAVE
+4: XSAVE | ptwrite Ey (F3),(11B)
 5: XRSTOR | lfence (11B)
 6: XSAVEOPT | clwb (66) | mfence (11B)
 7: clflush | clflushopt (66) | sfence (11B)
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index bdd4a28c6cee..b58f9fd1e2ee 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -23,6 +23,7 @@
 #include "../perf.h"
 #include "session.h"
 #include "machine.h"
+#include "memswap.h"
 #include "sort.h"
 #include "tool.h"
 #include "event.h"
@@ -80,7 +81,6 @@ struct intel_pt {
 
 	bool sample_instructions;
 	u64 instructions_sample_type;
-	u64 instructions_sample_period;
 	u64 instructions_id;
 
 	bool sample_branches;
@@ -92,6 +92,18 @@ struct intel_pt {
 	u64 transactions_sample_type;
 	u64 transactions_id;
 
+	bool sample_ptwrites;
+	u64 ptwrites_sample_type;
+	u64 ptwrites_id;
+
+	bool sample_pwr_events;
+	u64 pwr_events_sample_type;
+	u64 mwait_id;
+	u64 pwre_id;
+	u64 exstop_id;
+	u64 pwrx_id;
+	u64 cbr_id;
+
 	bool synth_needs_swap;
 
 	u64 tsc_bit;
@@ -102,6 +114,7 @@ struct intel_pt {
 	u64 cyc_bit;
 	u64 noretcomp_bit;
 	unsigned max_non_turbo_ratio;
+	unsigned cbr2khz;
 
 	unsigned long num_events;
 
@@ -667,6 +680,19 @@ static bool intel_pt_return_compression(struct intel_pt *pt)
 	return true;
 }
 
+static bool intel_pt_branch_enable(struct intel_pt *pt)
+{
+	struct perf_evsel *evsel;
+	u64 config;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->attr, &config) &&
+		    (config & 1) && !(config & 0x2000))
+			return false;
+	}
+	return true;
+}
+
 static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
 {
 	struct perf_evsel *evsel;
@@ -798,6 +824,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 	params.walk_insn = intel_pt_walk_next_insn;
 	params.data = ptq;
 	params.return_compression = intel_pt_return_compression(pt);
+	params.branch_enable = intel_pt_branch_enable(pt);
 	params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
 	params.mtc_period = intel_pt_mtc_period(pt);
 	params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
@@ -1043,6 +1070,36 @@ static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
 		bs->nr += 1;
 }
 
+static inline bool intel_pt_skip_event(struct intel_pt *pt)
+{
+	return pt->synth_opts.initial_skip &&
+	       pt->num_events++ < pt->synth_opts.initial_skip;
+}
+
+static void intel_pt_prep_b_sample(struct intel_pt *pt,
+				   struct intel_pt_queue *ptq,
+				   union perf_event *event,
+				   struct perf_sample *sample)
+{
+	event->sample.header.type = PERF_RECORD_SAMPLE;
+	event->sample.header.misc = PERF_RECORD_MISC_USER;
+	event->sample.header.size = sizeof(struct perf_event_header);
+
+	if (!pt->timeless_decoding)
+		sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+
+	sample->cpumode = PERF_RECORD_MISC_USER;
+	sample->ip = ptq->state->from_ip;
+	sample->pid = ptq->pid;
+	sample->tid = ptq->tid;
+	sample->addr = ptq->state->to_ip;
+	sample->period = 1;
+	sample->cpu = ptq->cpu;
+	sample->flags = ptq->flags;
+	sample->insn_len = ptq->insn_len;
+	memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+}
+
 static int intel_pt_inject_event(union perf_event *event,
 				 struct perf_sample *sample, u64 type,
 				 bool swapped)
@@ -1051,9 +1108,35 @@ static int intel_pt_inject_event(union perf_event *event,
 	return perf_event__synthesize_sample(event, type, 0, sample, swapped);
 }
 
-static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
+static inline int intel_pt_opt_inject(struct intel_pt *pt,
+				      union perf_event *event,
+				      struct perf_sample *sample, u64 type)
+{
+	if (!pt->synth_opts.inject)
+		return 0;
+
+	return intel_pt_inject_event(event, sample, type, pt->synth_needs_swap);
+}
+
+static int intel_pt_deliver_synth_b_event(struct intel_pt *pt,
+					  union perf_event *event,
+					  struct perf_sample *sample, u64 type)
 {
 	int ret;
+
+	ret = intel_pt_opt_inject(pt, event, sample, type);
+	if (ret)
+		return ret;
+
+	ret = perf_session__deliver_synth_event(pt->session, event, sample);
+	if (ret)
+		pr_err("Intel PT: failed to deliver event, error %d\n", ret);
+
+	return ret;
+}
+
+static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
+{
 	struct intel_pt *pt = ptq->pt;
 	union perf_event *event = ptq->event_buf;
 	struct perf_sample sample = { .ip = 0, };
@@ -1065,29 +1148,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
 		return 0;
 
-	if (pt->synth_opts.initial_skip &&
-	    pt->num_events++ < pt->synth_opts.initial_skip)
+	if (intel_pt_skip_event(pt))
 		return 0;
 
-	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
-	event->sample.header.size = sizeof(struct perf_event_header);
+	intel_pt_prep_b_sample(pt, ptq, event, &sample);
 
-	if (!pt->timeless_decoding)
-		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
-
-	sample.cpumode = PERF_RECORD_MISC_USER;
-	sample.ip = ptq->state->from_ip;
-	sample.pid = ptq->pid;
-	sample.tid = ptq->tid;
-	sample.addr = ptq->state->to_ip;
 	sample.id = ptq->pt->branches_id;
 	sample.stream_id = ptq->pt->branches_id;
-	sample.period = 1;
-	sample.cpu = ptq->cpu;
-	sample.flags = ptq->flags;
-	sample.insn_len = ptq->insn_len;
-	memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
 
 	/*
 	 * perf report cannot handle events without a branch stack when using
@@ -1104,144 +1171,251 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 		sample.branch_stack = (struct branch_stack *)&dummy_bs;
 	}
 
-	if (pt->synth_opts.inject) {
-		ret = intel_pt_inject_event(event, &sample,
-					    pt->branches_sample_type,
-					    pt->synth_needs_swap);
-		if (ret)
-			return ret;
+	return intel_pt_deliver_synth_b_event(pt, event, &sample,
+					      pt->branches_sample_type);
+}
+
+static void intel_pt_prep_sample(struct intel_pt *pt,
+				 struct intel_pt_queue *ptq,
+				 union perf_event *event,
+				 struct perf_sample *sample)
+{
+	intel_pt_prep_b_sample(pt, ptq, event, sample);
+
+	if (pt->synth_opts.callchain) {
+		thread_stack__sample(ptq->thread, ptq->chain,
+				     pt->synth_opts.callchain_sz, sample->ip);
+		sample->callchain = ptq->chain;
 	}
 
-	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
-	if (ret)
-		pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
-		       ret);
+	if (pt->synth_opts.last_branch) {
+		intel_pt_copy_last_branch_rb(ptq);
+		sample->branch_stack = ptq->last_branch;
+	}
+}
+
+static inline int intel_pt_deliver_synth_event(struct intel_pt *pt,
+					       struct intel_pt_queue *ptq,
+					       union perf_event *event,
+					       struct perf_sample *sample,
+					       u64 type)
+{
+	int ret;
+
+	ret = intel_pt_deliver_synth_b_event(pt, event, sample, type);
+
+	if (pt->synth_opts.last_branch)
+		intel_pt_reset_last_branch_rb(ptq);
 
 	return ret;
 }
 
 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
 {
-	int ret;
 	struct intel_pt *pt = ptq->pt;
 	union perf_event *event = ptq->event_buf;
 	struct perf_sample sample = { .ip = 0, };
 
-	if (pt->synth_opts.initial_skip &&
-	    pt->num_events++ < pt->synth_opts.initial_skip)
+	if (intel_pt_skip_event(pt))
 		return 0;
 
-	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
-	event->sample.header.size = sizeof(struct perf_event_header);
-
-	if (!pt->timeless_decoding)
-		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+	intel_pt_prep_sample(pt, ptq, event, &sample);
 
-	sample.cpumode = PERF_RECORD_MISC_USER;
-	sample.ip = ptq->state->from_ip;
-	sample.pid = ptq->pid;
-	sample.tid = ptq->tid;
-	sample.addr = ptq->state->to_ip;
 	sample.id = ptq->pt->instructions_id;
 	sample.stream_id = ptq->pt->instructions_id;
 	sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
-	sample.cpu = ptq->cpu;
-	sample.flags = ptq->flags;
-	sample.insn_len = ptq->insn_len;
-	memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
 
 	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
 
-	if (pt->synth_opts.callchain) {
-		thread_stack__sample(ptq->thread, ptq->chain,
-				     pt->synth_opts.callchain_sz, sample.ip);
-		sample.callchain = ptq->chain;
-	}
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->instructions_sample_type);
+}
 
-	if (pt->synth_opts.last_branch) {
-		intel_pt_copy_last_branch_rb(ptq);
-		sample.branch_stack = ptq->last_branch;
-	}
+static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
 
-	if (pt->synth_opts.inject) {
-		ret = intel_pt_inject_event(event, &sample,
-					    pt->instructions_sample_type,
-					    pt->synth_needs_swap);
-		if (ret)
-			return ret;
-	}
+	if (intel_pt_skip_event(pt))
+		return 0;
 
-	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
-	if (ret)
-		pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
-		       ret);
+	intel_pt_prep_sample(pt, ptq, event, &sample);
 
-	if (pt->synth_opts.last_branch)
-		intel_pt_reset_last_branch_rb(ptq);
+	sample.id = ptq->pt->transactions_id;
+	sample.stream_id = ptq->pt->transactions_id;
 
-	return ret;
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->transactions_sample_type);
 }
 
-static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
+static void intel_pt_prep_p_sample(struct intel_pt *pt,
+				   struct intel_pt_queue *ptq,
+				   union perf_event *event,
+				   struct perf_sample *sample)
+{
+	intel_pt_prep_sample(pt, ptq, event, sample);
+
+	/*
+	 * Zero IP is used to mean "trace start" but that is not the case for
+	 * power or PTWRITE events with no IP, so clear the flags.
+	 */
+	if (!sample->ip)
+		sample->flags = 0;
+}
+
+static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
 {
-	int ret;
 	struct intel_pt *pt = ptq->pt;
 	union perf_event *event = ptq->event_buf;
 	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_ptwrite raw;
 
-	if (pt->synth_opts.initial_skip &&
-	    pt->num_events++ < pt->synth_opts.initial_skip)
+	if (intel_pt_skip_event(pt))
 		return 0;
 
-	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
-	event->sample.header.size = sizeof(struct perf_event_header);
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
 
-	if (!pt->timeless_decoding)
-		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+	sample.id = ptq->pt->ptwrites_id;
+	sample.stream_id = ptq->pt->ptwrites_id;
 
-	sample.cpumode = PERF_RECORD_MISC_USER;
-	sample.ip = ptq->state->from_ip;
-	sample.pid = ptq->pid;
-	sample.tid = ptq->tid;
-	sample.addr = ptq->state->to_ip;
-	sample.id = ptq->pt->transactions_id;
-	sample.stream_id = ptq->pt->transactions_id;
-	sample.period = 1;
-	sample.cpu = ptq->cpu;
-	sample.flags = ptq->flags;
-	sample.insn_len = ptq->insn_len;
-	memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+	raw.flags = 0;
+	raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+	raw.payload = cpu_to_le64(ptq->state->ptw_payload);
 
-	if (pt->synth_opts.callchain) {
-		thread_stack__sample(ptq->thread, ptq->chain,
-				     pt->synth_opts.callchain_sz, sample.ip);
-		sample.callchain = ptq->chain;
-	}
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
 
-	if (pt->synth_opts.last_branch) {
-		intel_pt_copy_last_branch_rb(ptq);
-		sample.branch_stack = ptq->last_branch;
-	}
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->ptwrites_sample_type);
+}
 
-	if (pt->synth_opts.inject) {
-		ret = intel_pt_inject_event(event, &sample,
-					    pt->transactions_sample_type,
-					    pt->synth_needs_swap);
-		if (ret)
-			return ret;
-	}
+static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_cbr raw;
+	u32 flags;
 
-	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
-	if (ret)
-		pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
-		       ret);
+	if (intel_pt_skip_event(pt))
+		return 0;
 
-	if (pt->synth_opts.last_branch)
-		intel_pt_reset_last_branch_rb(ptq);
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
 
-	return ret;
+	sample.id = ptq->pt->cbr_id;
+	sample.stream_id = ptq->pt->cbr_id;
+
+	flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16);
+	raw.flags = cpu_to_le32(flags);
+	raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz);
+	raw.reserved3 = 0;
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_mwait raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->mwait_id;
+	sample.stream_id = ptq->pt->mwait_id;
+
+	raw.reserved = 0;
+	raw.payload = cpu_to_le64(ptq->state->mwait_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_pwre raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->pwre_id;
+	sample.stream_id = ptq->pt->pwre_id;
+
+	raw.reserved = 0;
+	raw.payload = cpu_to_le64(ptq->state->pwre_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_exstop raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->exstop_id;
+	sample.stream_id = ptq->pt->exstop_id;
+
+	raw.flags = 0;
+	raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_pwrx raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->pwrx_id;
+	sample.stream_id = ptq->pt->pwrx_id;
+
+	raw.reserved = 0;
+	raw.payload = cpu_to_le64(ptq->state->pwrx_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
 }
 
 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
@@ -1295,6 +1469,10 @@ static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
 			       PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
 }
 
+#define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
+			  INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \
+			  INTEL_PT_CBR_CHG)
+
 static int intel_pt_sample(struct intel_pt_queue *ptq)
 {
 	const struct intel_pt_state *state = ptq->state;
@@ -1306,24 +1484,52 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 
 	ptq->have_sample = false;
 
-	if (pt->sample_instructions &&
-	    (state->type & INTEL_PT_INSTRUCTION) &&
-	    (!pt->synth_opts.initial_skip ||
-	     pt->num_events++ >= pt->synth_opts.initial_skip)) {
+	if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
+		if (state->type & INTEL_PT_CBR_CHG) {
+			err = intel_pt_synth_cbr_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (state->type & INTEL_PT_MWAIT_OP) {
+			err = intel_pt_synth_mwait_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (state->type & INTEL_PT_PWR_ENTRY) {
+			err = intel_pt_synth_pwre_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (state->type & INTEL_PT_EX_STOP) {
+			err = intel_pt_synth_exstop_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (state->type & INTEL_PT_PWR_EXIT) {
+			err = intel_pt_synth_pwrx_sample(ptq);
+			if (err)
+				return err;
+		}
+	}
+
+	if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) {
 		err = intel_pt_synth_instruction_sample(ptq);
 		if (err)
 			return err;
 	}
 
-	if (pt->sample_transactions &&
-	    (state->type & INTEL_PT_TRANSACTION) &&
-	    (!pt->synth_opts.initial_skip ||
-	     pt->num_events++ >= pt->synth_opts.initial_skip)) {
+	if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {
 		err = intel_pt_synth_transaction_sample(ptq);
 		if (err)
 			return err;
 	}
 
+	if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) {
+		err = intel_pt_synth_ptwrite_sample(ptq);
+		if (err)
+			return err;
+	}
+
 	if (!(state->type & INTEL_PT_BRANCH))
 		return 0;
 
@@ -1924,36 +2130,65 @@ static int intel_pt_event_synth(struct perf_tool *tool,
 						 NULL);
 }
 
-static int intel_pt_synth_event(struct perf_session *session,
+static int intel_pt_synth_event(struct perf_session *session, const char *name,
 				struct perf_event_attr *attr, u64 id)
 {
 	struct intel_pt_synth intel_pt_synth;
+	int err;
+
+	pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+		 name, id, (u64)attr->sample_type);
 
 	memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
 	intel_pt_synth.session = session;
 
-	return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
-					   &id, intel_pt_event_synth);
+	err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
+					  &id, intel_pt_event_synth);
+	if (err)
+		pr_err("%s: failed to synthesize '%s' event type\n",
+		       __func__, name);
+
+	return err;
 }
 
-static int intel_pt_synth_events(struct intel_pt *pt,
-				 struct perf_session *session)
+static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id,
+				    const char *name)
 {
-	struct perf_evlist *evlist = session->evlist;
 	struct perf_evsel *evsel;
-	struct perf_event_attr attr;
-	bool found = false;
-	u64 id;
-	int err;
 
 	evlist__for_each_entry(evlist, evsel) {
-		if (evsel->attr.type == pt->pmu_type && evsel->ids) {
-			found = true;
+		if (evsel->id && evsel->id[0] == id) {
+			if (evsel->name)
+				zfree(&evsel->name);
+			evsel->name = strdup(name);
 			break;
 		}
 	}
+}
 
-	if (!found) {
+static struct perf_evsel *intel_pt_evsel(struct intel_pt *pt,
+					 struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == pt->pmu_type && evsel->ids)
+			return evsel;
+	}
+
+	return NULL;
+}
+
+static int intel_pt_synth_events(struct intel_pt *pt,
+				 struct perf_session *session)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct perf_evsel *evsel = intel_pt_evsel(pt, evlist);
+	struct perf_event_attr attr;
+	u64 id;
+	int err;
+
+	if (!evsel) {
 		pr_debug("There are no selected events with Intel Processor Trace data\n");
 		return 0;
 	}
@@ -1982,6 +2217,25 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 	if (!id)
 		id = 1;
 
+	if (pt->synth_opts.branches) {
+		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+		attr.sample_period = 1;
+		attr.sample_type |= PERF_SAMPLE_ADDR;
+		err = intel_pt_synth_event(session, "branches", &attr, id);
+		if (err)
+			return err;
+		pt->sample_branches = true;
+		pt->branches_sample_type = attr.sample_type;
+		pt->branches_id = id;
+		id += 1;
+		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
+	}
+
+	if (pt->synth_opts.callchain)
+		attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+	if (pt->synth_opts.last_branch)
+		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+
 	if (pt->synth_opts.instructions) {
 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
 		if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
@@ -1989,70 +2243,90 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 				intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
 		else
 			attr.sample_period = pt->synth_opts.period;
-		pt->instructions_sample_period = attr.sample_period;
-		if (pt->synth_opts.callchain)
-			attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
-		if (pt->synth_opts.last_branch)
-			attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
-		pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
-			 id, (u64)attr.sample_type);
-		err = intel_pt_synth_event(session, &attr, id);
-		if (err) {
-			pr_err("%s: failed to synthesize 'instructions' event type\n",
-			       __func__);
+		err = intel_pt_synth_event(session, "instructions", &attr, id);
+		if (err)
 			return err;
-		}
 		pt->sample_instructions = true;
 		pt->instructions_sample_type = attr.sample_type;
 		pt->instructions_id = id;
 		id += 1;
 	}
 
+	attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD;
+	attr.sample_period = 1;
+
 	if (pt->synth_opts.transactions) {
 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
-		attr.sample_period = 1;
-		if (pt->synth_opts.callchain)
-			attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
-		if (pt->synth_opts.last_branch)
-			attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
-		pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
-			 id, (u64)attr.sample_type);
-		err = intel_pt_synth_event(session, &attr, id);
-		if (err) {
-			pr_err("%s: failed to synthesize 'transactions' event type\n",
-			       __func__);
+		err = intel_pt_synth_event(session, "transactions", &attr, id);
+		if (err)
 			return err;
-		}
 		pt->sample_transactions = true;
+		pt->transactions_sample_type = attr.sample_type;
 		pt->transactions_id = id;
+		intel_pt_set_event_name(evlist, id, "transactions");
 		id += 1;
-		evlist__for_each_entry(evlist, evsel) {
-			if (evsel->id && evsel->id[0] == pt->transactions_id) {
-				if (evsel->name)
-					zfree(&evsel->name);
-				evsel->name = strdup("transactions");
-				break;
-			}
-		}
 	}
 
-	if (pt->synth_opts.branches) {
-		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
-		attr.sample_period = 1;
-		attr.sample_type |= PERF_SAMPLE_ADDR;
-		attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
-		attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
-		pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
-			 id, (u64)attr.sample_type);
-		err = intel_pt_synth_event(session, &attr, id);
-		if (err) {
-			pr_err("%s: failed to synthesize 'branches' event type\n",
-			       __func__);
+	attr.type = PERF_TYPE_SYNTH;
+	attr.sample_type |= PERF_SAMPLE_RAW;
+
+	if (pt->synth_opts.ptwrites) {
+		attr.config = PERF_SYNTH_INTEL_PTWRITE;
+		err = intel_pt_synth_event(session, "ptwrite", &attr, id);
+		if (err)
 			return err;
-		}
-		pt->sample_branches = true;
-		pt->branches_sample_type = attr.sample_type;
-		pt->branches_id = id;
+		pt->sample_ptwrites = true;
+		pt->ptwrites_sample_type = attr.sample_type;
+		pt->ptwrites_id = id;
+		intel_pt_set_event_name(evlist, id, "ptwrite");
+		id += 1;
+	}
+
+	if (pt->synth_opts.pwr_events) {
+		pt->sample_pwr_events = true;
+		pt->pwr_events_sample_type = attr.sample_type;
+
+		attr.config = PERF_SYNTH_INTEL_CBR;
+		err = intel_pt_synth_event(session, "cbr", &attr, id);
+		if (err)
+			return err;
+		pt->cbr_id = id;
+		intel_pt_set_event_name(evlist, id, "cbr");
+		id += 1;
+	}
+
+	if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) {
+		attr.config = PERF_SYNTH_INTEL_MWAIT;
+		err = intel_pt_synth_event(session, "mwait", &attr, id);
+		if (err)
+			return err;
+		pt->mwait_id = id;
+		intel_pt_set_event_name(evlist, id, "mwait");
+		id += 1;
+
+		attr.config = PERF_SYNTH_INTEL_PWRE;
+		err = intel_pt_synth_event(session, "pwre", &attr, id);
+		if (err)
+			return err;
+		pt->pwre_id = id;
+		intel_pt_set_event_name(evlist, id, "pwre");
+		id += 1;
+
+		attr.config = PERF_SYNTH_INTEL_EXSTOP;
+		err = intel_pt_synth_event(session, "exstop", &attr, id);
+		if (err)
+			return err;
+		pt->exstop_id = id;
+		intel_pt_set_event_name(evlist, id, "exstop");
+		id += 1;
+
+		attr.config = PERF_SYNTH_INTEL_PWRX;
+		err = intel_pt_synth_event(session, "pwrx", &attr, id);
+		if (err)
+			return err;
+		pt->pwrx_id = id;
+		intel_pt_set_event_name(evlist, id, "pwrx");
+		id += 1;
 	}
 
 	pt->synth_needs_swap = evsel->needs_swap;
@@ -2321,6 +2595,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 		intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
 		intel_pt_log("Maximum non-turbo ratio %u\n",
 			     pt->max_non_turbo_ratio);
+		pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
 	}
 
 	if (pt->synth_opts.calls)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 7a47f52ccfcc..5de2b86b9880 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -572,16 +572,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
 		if (dso == NULL)
 			goto out_unlock;
 
-		if (machine__is_host(machine))
-			dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
-		else
-			dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
-
-		/* _KMODULE_COMP should be next to _KMODULE */
-		if (m->kmod && m->comp)
-			dso->symtab_type++;
-
-		dso__set_short_name(dso, strdup(m->name), true);
+		dso__set_module_info(dso, m, machine);
 		dso__set_long_name(dso, strdup(filename), true);
 	}
 
@@ -796,11 +787,11 @@ const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL};
  * Returns the name of the start symbol in *symbol_name. Pass in NULL as
  * symbol_name if it's not that important.
  */
-static u64 machine__get_running_kernel_start(struct machine *machine,
-					     const char **symbol_name)
+static int machine__get_running_kernel_start(struct machine *machine,
+					     const char **symbol_name, u64 *start)
 {
 	char filename[PATH_MAX];
-	int i;
+	int i, err = -1;
 	const char *name;
 	u64 addr = 0;
 
@@ -810,21 +801,28 @@ static u64 machine__get_running_kernel_start(struct machine *machine,
 		return 0;
 
 	for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
-		addr = kallsyms__get_function_start(filename, name);
-		if (addr)
+		err = kallsyms__get_function_start(filename, name, &addr);
+		if (!err)
 			break;
 	}
 
+	if (err)
+		return -1;
+
 	if (symbol_name)
 		*symbol_name = name;
 
-	return addr;
+	*start = addr;
+	return 0;
 }
 
 int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
 {
 	int type;
-	u64 start = machine__get_running_kernel_start(machine, NULL);
+	u64 start = 0;
+
+	if (machine__get_running_kernel_start(machine, NULL, &start))
+		return -1;
 
 	/* In case of renewal the kernel map, destroy previous one */
 	machine__destroy_kernel_maps(machine);
@@ -1185,8 +1183,8 @@ static int machine__create_modules(struct machine *machine)
 int machine__create_kernel_maps(struct machine *machine)
 {
 	struct dso *kernel = machine__get_kernel(machine);
-	const char *name;
-	u64 addr;
+	const char *name = NULL;
+	u64 addr = 0;
 	int ret;
 
 	if (kernel == NULL)
@@ -1211,11 +1209,12 @@ int machine__create_kernel_maps(struct machine *machine)
 	 */
 	map_groups__fixup_end(&machine->kmaps);
 
-	addr = machine__get_running_kernel_start(machine, &name);
-	if (!addr) {
-	} else if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
-		machine__destroy_kernel_maps(machine);
-		return -1;
+	if (!machine__get_running_kernel_start(machine, &name, &addr)) {
+		if (name &&
+		    maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
+			machine__destroy_kernel_maps(machine);
+			return -1;
+		}
 	}
 
 	return 0;
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index ebfa5d92358a..2179b2deb730 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -325,11 +325,6 @@ int map__load(struct map *map)
 	return 0;
 }
 
-int __weak arch__compare_symbol_names(const char *namea, const char *nameb)
-{
-	return strcmp(namea, nameb);
-}
-
 struct symbol *map__find_symbol(struct map *map, u64 addr)
 {
 	if (map__load(map) < 0)
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index c8a5a644c0a9..f9e8ac8a52cd 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -130,13 +130,14 @@ struct thread;
  */
 #define __map__for_each_symbol_by_name(map, sym_name, pos)	\
 	for (pos = map__find_symbol_by_name(map, sym_name);	\
-	     pos && arch__compare_symbol_names(pos->name, sym_name) == 0;	\
+	     pos &&						\
+	     !symbol__match_symbol_name(pos->name, sym_name,	\
+					SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); \
 	     pos = symbol__next_by_name(pos))
 
 #define map__for_each_symbol_by_name(map, sym_name, pos)		\
 	__map__for_each_symbol_by_name(map, sym_name, (pos))
 
-int arch__compare_symbol_names(const char *namea, const char *nameb);
 void map__init(struct map *map, enum map_type type,
 	       u64 start, u64 end, u64 pgoff, struct dso *dso);
 struct map *map__new(struct machine *machine, u64 start, u64 len,
diff --git a/tools/perf/util/memswap.c b/tools/perf/util/memswap.c
new file mode 100644
index 000000000000..55f7faa8d9ec
--- /dev/null
+++ b/tools/perf/util/memswap.c
@@ -0,0 +1,24 @@
+#include <byteswap.h>
+#include "memswap.h"
+#include <linux/types.h>
+
+void mem_bswap_32(void *src, int byte_size)
+{
+	u32 *m = src;
+	while (byte_size > 0) {
+		*m = bswap_32(*m);
+		byte_size -= sizeof(u32);
+		++m;
+	}
+}
+
+void mem_bswap_64(void *src, int byte_size)
+{
+	u64 *m = src;
+
+	while (byte_size > 0) {
+		*m = bswap_64(*m);
+		byte_size -= sizeof(u64);
+		++m;
+	}
+}
diff --git a/tools/perf/util/memswap.h b/tools/perf/util/memswap.h
new file mode 100644
index 000000000000..7d1b1c34bb57
--- /dev/null
+++ b/tools/perf/util/memswap.h
@@ -0,0 +1,7 @@
+#ifndef PERF_MEMSWAP_H_
+#define PERF_MEMSWAP_H_
+
+void mem_bswap_64(void *src, int byte_size);
+void mem_bswap_32(void *src, int byte_size);
+
+#endif /* PERF_MEMSWAP_H_ */
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index ea7f450dc609..389e9729331f 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -2,6 +2,7 @@
 #define __PMU_H
 
 #include <linux/bitmap.h>
+#include <linux/compiler.h>
 #include <linux/perf_event.h>
 #include <stdbool.h>
 #include "evsel.h"
@@ -83,8 +84,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet,
 		      bool long_desc, bool details_flag);
 bool pmu_have_event(const char *pname, const char *name);
 
-int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
-			...) __attribute__((format(scanf, 3, 4)));
+int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4);
 
 int perf_pmu__test(void);
 
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 84e7e698411e..a2670e9d652d 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -619,7 +619,7 @@ static int post_process_probe_trace_point(struct probe_trace_point *tp,
 					   struct map *map, unsigned long offs)
 {
 	struct symbol *sym;
-	u64 addr = tp->address + tp->offset - offs;
+	u64 addr = tp->address - offs;
 
 	sym = map__find_symbol(map, addr);
 	if (!sym)
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 373842656fb6..5812947418dd 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -1,6 +1,7 @@
 #ifndef _PROBE_EVENT_H
 #define _PROBE_EVENT_H
 
+#include <linux/compiler.h>
 #include <stdbool.h>
 #include "intlist.h"
 
@@ -171,8 +172,7 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
 			     struct symbol *sym);
 
 /* If there is no space to write, returns -E2BIG. */
-int e_snprintf(char *str, size_t size, const char *format, ...)
-	__attribute__((format(printf, 3, 4)));
+int e_snprintf(char *str, size_t size, const char *format, ...) __printf(3, 4);
 
 /* Maximum index number of event-name postfix */
 #define MAX_EVENT_INDEX	1024
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 2b12bdb3ce33..7b79c413486b 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -28,7 +28,9 @@
 #include <linux/bitmap.h>
 #include <linux/time64.h>
 
-#include "../util.h"
+#include <stdbool.h>
+/* perl needs the following define, right after including stdbool.h */
+#define HAS_BOOL
 #include <EXTERN.h>
 #include <perl.h>
 
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 9d92af7d0718..57b7a00e6f16 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -28,6 +28,7 @@
 #include <stdbool.h>
 #include <errno.h>
 #include <linux/bitmap.h>
+#include <linux/compiler.h>
 #include <linux/time64.h>
 
 #include "../../perf.h"
@@ -84,7 +85,7 @@ struct tables {
 
 static struct tables tables_global;
 
-static void handler_call_die(const char *handler_name) NORETURN;
+static void handler_call_die(const char *handler_name) __noreturn;
 static void handler_call_die(const char *handler_name)
 {
 	PyErr_Print();
@@ -1219,7 +1220,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 	fprintf(ofp, "# be retrieved using Python functions of the form "
 		"common_*(context).\n");
 
-	fprintf(ofp, "# See the perf-trace-python Documentation for the list "
+	fprintf(ofp, "# See the perf-script-python Documentation for the list "
 		"of available functions.\n\n");
 
 	fprintf(ofp, "import os\n");
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 3041c6b98191..d19c40a81040 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -11,6 +11,7 @@
 
 #include "evlist.h"
 #include "evsel.h"
+#include "memswap.h"
 #include "session.h"
 #include "tool.h"
 #include "sort.h"
@@ -2034,7 +2035,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
 
 		if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
 			pr_err("File does not contain CPU events. "
-			       "Remove -c option to proceed.\n");
+			       "Remove -C option to proceed.\n");
 			return -1;
 		}
 	}
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 5762ae4e9e91..8b327c955a4f 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2532,12 +2532,12 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
 			ret = sort_dimension__add(list, tok, evlist, level);
 			if (ret == -EINVAL) {
 				if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok)))
-					error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
+					pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
 				else
-					error("Invalid --sort key: `%s'", tok);
+					pr_err("Invalid --sort key: `%s'", tok);
 				break;
 			} else if (ret == -ESRCH) {
-				error("Unknown --sort key: `%s'", tok);
+				pr_err("Unknown --sort key: `%s'", tok);
 				break;
 			}
 		}
@@ -2594,7 +2594,7 @@ static int setup_sort_order(struct perf_evlist *evlist)
 		return 0;
 
 	if (sort_order[1] == '\0') {
-		error("Invalid --sort key: `+'");
+		pr_err("Invalid --sort key: `+'");
 		return -EINVAL;
 	}
 
@@ -2604,7 +2604,7 @@ static int setup_sort_order(struct perf_evlist *evlist)
 	 */
 	if (asprintf(&new_sort_order, "%s,%s",
 		     get_default_sort_order(evlist), sort_order + 1) < 0) {
-		error("Not enough memory to set up --sort");
+		pr_err("Not enough memory to set up --sort");
 		return -ENOMEM;
 	}
 
@@ -2668,7 +2668,7 @@ static int __setup_sorting(struct perf_evlist *evlist)
 
 	str = strdup(sort_keys);
 	if (str == NULL) {
-		error("Not enough memory to setup sort keys");
+		pr_err("Not enough memory to setup sort keys");
 		return -ENOMEM;
 	}
 
@@ -2678,7 +2678,7 @@ static int __setup_sorting(struct perf_evlist *evlist)
 	if (!is_strict_order(field_order)) {
 		str = setup_overhead(str);
 		if (str == NULL) {
-			error("Not enough memory to setup overhead keys");
+			pr_err("Not enough memory to setup overhead keys");
 			return -ENOMEM;
 		}
 	}
@@ -2834,10 +2834,10 @@ static int setup_output_list(struct perf_hpp_list *list, char *str)
 			tok; tok = strtok_r(NULL, ", ", &tmp)) {
 		ret = output_field_add(list, tok);
 		if (ret == -EINVAL) {
-			error("Invalid --fields key: `%s'", tok);
+			pr_err("Invalid --fields key: `%s'", tok);
 			break;
 		} else if (ret == -ESRCH) {
-			error("Unknown --fields key: `%s'", tok);
+			pr_err("Unknown --fields key: `%s'", tok);
 			break;
 		}
 	}
@@ -2877,7 +2877,7 @@ static int __setup_output_field(void)
 
 	strp = str = strdup(field_order);
 	if (str == NULL) {
-		error("Not enough memory to setup output fields");
+		pr_err("Not enough memory to setup output fields");
 		return -ENOMEM;
 	}
 
@@ -2885,7 +2885,7 @@ static int __setup_output_field(void)
 		strp++;
 
 	if (!strlen(strp)) {
-		error("Invalid --fields key: `+'");
+		pr_err("Invalid --fields key: `+'");
 		goto out;
 	}
 
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index df051a52393c..ebc88a74e67b 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -56,7 +56,10 @@ static int inline_list__append(char *filename, char *funcname, int line_nr,
 		}
 	}
 
-	list_add_tail(&ilist->list, &node->val);
+	if (callchain_param.order == ORDER_CALLEE)
+		list_add_tail(&ilist->list, &node->val);
+	else
+		list_add(&ilist->list, &node->val);
 
 	return 0;
 }
@@ -200,12 +203,14 @@ static void addr2line_cleanup(struct a2l_data *a2l)
 
 #define MAX_INLINE_NEST 1024
 
-static void inline_list__reverse(struct inline_node *node)
+static int inline_list__append_dso_a2l(struct dso *dso,
+				       struct inline_node *node)
 {
-	struct inline_list *ilist, *n;
+	struct a2l_data *a2l = dso->a2l;
+	char *funcname = a2l->funcname ? strdup(a2l->funcname) : NULL;
+	char *filename = a2l->filename ? strdup(a2l->filename) : NULL;
 
-	list_for_each_entry_safe_reverse(ilist, n, &node->val, list)
-		list_move_tail(&ilist->list, &node->val);
+	return inline_list__append(filename, funcname, a2l->line, node, dso);
 }
 
 static int addr2line(const char *dso_name, u64 addr,
@@ -230,36 +235,36 @@ static int addr2line(const char *dso_name, u64 addr,
 
 	bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l);
 
-	if (a2l->found && unwind_inlines) {
+	if (!a2l->found)
+		return 0;
+
+	if (unwind_inlines) {
 		int cnt = 0;
 
+		if (node && inline_list__append_dso_a2l(dso, node))
+			return 0;
+
 		while (bfd_find_inliner_info(a2l->abfd, &a2l->filename,
 					     &a2l->funcname, &a2l->line) &&
 		       cnt++ < MAX_INLINE_NEST) {
 
 			if (node != NULL) {
-				if (inline_list__append(strdup(a2l->filename),
-							strdup(a2l->funcname),
-							a2l->line, node,
-							dso) != 0)
+				if (inline_list__append_dso_a2l(dso, node))
 					return 0;
+				// found at least one inline frame
+				ret = 1;
 			}
 		}
+	}
 
-		if ((node != NULL) &&
-		    (callchain_param.order != ORDER_CALLEE)) {
-			inline_list__reverse(node);
-		}
+	if (file) {
+		*file = a2l->filename ? strdup(a2l->filename) : NULL;
+		ret = *file ? 1 : 0;
 	}
 
-	if (a2l->found && a2l->filename) {
-		*file = strdup(a2l->filename);
+	if (line)
 		*line = a2l->line;
 
-		if (*file)
-			ret = 1;
-	}
-
 	return ret;
 }
 
@@ -278,8 +283,6 @@ void dso__free_a2l(struct dso *dso)
 static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
 	struct dso *dso)
 {
-	char *file = NULL;
-	unsigned int line = 0;
 	struct inline_node *node;
 
 	node = zalloc(sizeof(*node));
@@ -291,7 +294,7 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
 	INIT_LIST_HEAD(&node->val);
 	node->addr = addr;
 
-	if (!addr2line(dso_name, addr, &file, &line, dso, TRUE, node))
+	if (!addr2line(dso_name, addr, NULL, NULL, dso, TRUE, node))
 		goto out_free_inline_node;
 
 	if (list_empty(&node->val))
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index ac10cc675d39..719d6cb86952 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
 static struct rblist runtime_saved_values;
 static bool have_frontend_stalled;
 
@@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void)
 	memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
 	memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
 	memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
+	memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
+	memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
 
 	next = rb_first(&runtime_saved_values.entries);
 	while (next) {
@@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
 		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
 		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, SMI_NUM))
+		update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, APERF))
+		update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
 
 	if (counter->collect_stat) {
 		struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
@@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu)
 	return sanitize_val(1.0 - sum);
 }
 
+static void print_smi_cost(int cpu, struct perf_evsel *evsel,
+			   struct perf_stat_output_ctx *out)
+{
+	double smi_num, aperf, cycles, cost = 0.0;
+	int ctx = evsel_context(evsel);
+	const char *color = NULL;
+
+	smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
+	aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
+	cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+
+	if ((cycles == 0) || (aperf == 0))
+		return;
+
+	if (smi_num)
+		cost = (aperf - cycles) / aperf * 100.00;
+
+	if (cost > 10)
+		color = PERF_COLOR_RED;
+	out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
+	out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
+}
+
 void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 				   double avg, int cpu,
 				   struct perf_stat_output_ctx *out)
@@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		}
 		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
 		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
+	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
+		print_smi_cost(cpu, evsel, out);
 	} else {
 		print_metric(ctxp, NULL, NULL, NULL, 0);
 	}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c58174443dc1..53b9a994a3dc 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
 	ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
 	ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
 	ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
+	ID(SMI_NUM, msr/smi/),
+	ID(APERF, msr/aperf/),
 };
 #undef ID
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 0a65ae23f495..7522bf10b03e 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -22,6 +22,8 @@ enum perf_stat_evsel_id {
 	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
 	PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
 	PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
+	PERF_STAT_EVSEL_ID__SMI_NUM,
+	PERF_STAT_EVSEL_ID__APERF,
 	PERF_STAT_EVSEL_ID__MAX,
 };
 
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index e91b5e86f027..aafe908b82b5 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c
@@ -3,15 +3,6 @@
 #include <linux/kernel.h>
 #include <errno.h>
 
-int prefixcmp(const char *str, const char *prefix)
-{
-	for (; ; str++, prefix++)
-		if (!*prefix)
-			return 0;
-		else if (*str != *prefix)
-			return (unsigned char)*prefix - (unsigned char)*str;
-}
-
 /*
  * Used as the default ->buf value, so that people can always assume
  * buf is non NULL and ->buf is NUL terminated even for a freshly
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
index 318424ea561d..802d743378af 100644
--- a/tools/perf/util/strbuf.h
+++ b/tools/perf/util/strbuf.h
@@ -42,6 +42,7 @@
 #include <stdarg.h>
 #include <stddef.h>
 #include <string.h>
+#include <linux/compiler.h>
 #include <sys/types.h>
 
 extern char strbuf_slopbuf[];
@@ -85,8 +86,7 @@ static inline int strbuf_addstr(struct strbuf *sb, const char *s) {
 	return strbuf_add(sb, s, strlen(s));
 }
 
-__attribute__((format(printf,2,3)))
-int strbuf_addf(struct strbuf *sb, const char *fmt, ...);
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...) __printf(2, 3);
 
 /* XXX: if read fails, any partial read is undone */
 ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index e7ee47f7377a..502505cf236a 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -637,43 +637,6 @@ static int dso__swap_init(struct dso *dso, unsigned char eidata)
 	return 0;
 }
 
-static int decompress_kmodule(struct dso *dso, const char *name,
-			      enum dso_binary_type type)
-{
-	int fd = -1;
-	char tmpbuf[] = "/tmp/perf-kmod-XXXXXX";
-	struct kmod_path m;
-
-	if (type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP &&
-	    type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP &&
-	    type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
-		return -1;
-
-	if (type == DSO_BINARY_TYPE__BUILD_ID_CACHE)
-		name = dso->long_name;
-
-	if (kmod_path__parse_ext(&m, name) || !m.comp)
-		return -1;
-
-	fd = mkstemp(tmpbuf);
-	if (fd < 0) {
-		dso->load_errno = errno;
-		goto out;
-	}
-
-	if (!decompress_to_file(m.ext, name, fd)) {
-		dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
-		close(fd);
-		fd = -1;
-	}
-
-	unlink(tmpbuf);
-
-out:
-	free(m.ext);
-	return fd;
-}
-
 bool symsrc__possibly_runtime(struct symsrc *ss)
 {
 	return ss->dynsym || ss->opdsec;
@@ -705,9 +668,11 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
 	int fd;
 
 	if (dso__needs_decompress(dso)) {
-		fd = decompress_kmodule(dso, name, type);
+		fd = dso__decompress_kmodule_fd(dso, name);
 		if (fd < 0)
 			return -1;
+
+		type = dso->symtab_type;
 	} else {
 		fd = open(name, O_RDONLY);
 		if (fd < 0) {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 2cb7665e9973..e7a98dbd2aed 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -90,6 +90,17 @@ static int prefix_underscores_count(const char *str)
 	return tail - str;
 }
 
+int __weak arch__compare_symbol_names(const char *namea, const char *nameb)
+{
+	return strcmp(namea, nameb);
+}
+
+int __weak arch__compare_symbol_names_n(const char *namea, const char *nameb,
+					unsigned int n)
+{
+	return strncmp(namea, nameb, n);
+}
+
 int __weak arch__choose_best_symbol(struct symbol *syma,
 				    struct symbol *symb __maybe_unused)
 {
@@ -399,8 +410,26 @@ static void symbols__sort_by_name(struct rb_root *symbols,
 	}
 }
 
+int symbol__match_symbol_name(const char *name, const char *str,
+			      enum symbol_tag_include includes)
+{
+	const char *versioning;
+
+	if (includes == SYMBOL_TAG_INCLUDE__DEFAULT_ONLY &&
+	    (versioning = strstr(name, "@@"))) {
+		int len = strlen(str);
+
+		if (len < versioning - name)
+			len = versioning - name;
+
+		return arch__compare_symbol_names_n(name, str, len);
+	} else
+		return arch__compare_symbol_names(name, str);
+}
+
 static struct symbol *symbols__find_by_name(struct rb_root *symbols,
-					    const char *name)
+					    const char *name,
+					    enum symbol_tag_include includes)
 {
 	struct rb_node *n;
 	struct symbol_name_rb_node *s = NULL;
@@ -414,11 +443,11 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
 		int cmp;
 
 		s = rb_entry(n, struct symbol_name_rb_node, rb_node);
-		cmp = arch__compare_symbol_names(name, s->sym.name);
+		cmp = symbol__match_symbol_name(s->sym.name, name, includes);
 
-		if (cmp < 0)
+		if (cmp > 0)
 			n = n->rb_left;
-		else if (cmp > 0)
+		else if (cmp < 0)
 			n = n->rb_right;
 		else
 			break;
@@ -427,16 +456,17 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
 	if (n == NULL)
 		return NULL;
 
-	/* return first symbol that has same name (if any) */
-	for (n = rb_prev(n); n; n = rb_prev(n)) {
-		struct symbol_name_rb_node *tmp;
+	if (includes != SYMBOL_TAG_INCLUDE__DEFAULT_ONLY)
+		/* return first symbol that has same name (if any) */
+		for (n = rb_prev(n); n; n = rb_prev(n)) {
+			struct symbol_name_rb_node *tmp;
 
-		tmp = rb_entry(n, struct symbol_name_rb_node, rb_node);
-		if (arch__compare_symbol_names(tmp->sym.name, s->sym.name))
-			break;
+			tmp = rb_entry(n, struct symbol_name_rb_node, rb_node);
+			if (arch__compare_symbol_names(tmp->sym.name, s->sym.name))
+				break;
 
-		s = tmp;
-	}
+			s = tmp;
+		}
 
 	return &s->sym;
 }
@@ -466,7 +496,7 @@ void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym)
 struct symbol *dso__find_symbol(struct dso *dso,
 				enum map_type type, u64 addr)
 {
-	if (dso->last_find_result[type].addr != addr) {
+	if (dso->last_find_result[type].addr != addr || dso->last_find_result[type].symbol == NULL) {
 		dso->last_find_result[type].addr   = addr;
 		dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr);
 	}
@@ -503,7 +533,12 @@ struct symbol *symbol__next_by_name(struct symbol *sym)
 struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
 					const char *name)
 {
-	return symbols__find_by_name(&dso->symbol_names[type], name);
+	struct symbol *s = symbols__find_by_name(&dso->symbol_names[type], name,
+						 SYMBOL_TAG_INCLUDE__NONE);
+	if (!s)
+		s = symbols__find_by_name(&dso->symbol_names[type], name,
+					  SYMBOL_TAG_INCLUDE__DEFAULT_ONLY);
+	return s;
 }
 
 void dso__sort_by_name(struct dso *dso, enum map_type type)
@@ -1075,8 +1110,9 @@ static int validate_kcore_addresses(const char *kallsyms_filename,
 	if (kmap->ref_reloc_sym && kmap->ref_reloc_sym->name) {
 		u64 start;
 
-		start = kallsyms__get_function_start(kallsyms_filename,
-						     kmap->ref_reloc_sym->name);
+		if (kallsyms__get_function_start(kallsyms_filename,
+						 kmap->ref_reloc_sym->name, &start))
+			return -ENOENT;
 		if (start != kmap->ref_reloc_sym->addr)
 			return -EINVAL;
 	}
@@ -1248,9 +1284,7 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta)
 	if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name)
 		return 0;
 
-	addr = kallsyms__get_function_start(filename,
-					    kmap->ref_reloc_sym->name);
-	if (!addr)
+	if (kallsyms__get_function_start(filename, kmap->ref_reloc_sym->name, &addr))
 		return -1;
 
 	*delta = addr - kmap->ref_reloc_sym->addr;
@@ -1528,10 +1562,6 @@ int dso__load(struct dso *dso, struct map *map)
 	if (!runtime_ss && syms_ss)
 		runtime_ss = syms_ss;
 
-	if (syms_ss && syms_ss->type == DSO_BINARY_TYPE__BUILD_ID_CACHE)
-		if (dso__build_id_is_kmod(dso, name, PATH_MAX))
-			kmod = true;
-
 	if (syms_ss)
 		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, kmod);
 	else
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 7acd70fce68e..41ebba9a2eb2 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -348,8 +348,19 @@ void arch__sym_update(struct symbol *s, GElf_Sym *sym);
 #define SYMBOL_A 0
 #define SYMBOL_B 1
 
+int arch__compare_symbol_names(const char *namea, const char *nameb);
+int arch__compare_symbol_names_n(const char *namea, const char *nameb,
+				 unsigned int n);
 int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb);
 
+enum symbol_tag_include {
+	SYMBOL_TAG_INCLUDE__NONE = 0,
+	SYMBOL_TAG_INCLUDE__DEFAULT_ONLY
+};
+
+int symbol__match_symbol_name(const char *namea, const char *nameb,
+			      enum symbol_tag_include includes);
+
 /* structure containing an SDT note's info */
 struct sdt_note {
 	char *name;			/* name of the note*/
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 746bbee645d9..e0a6e9a6a053 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -24,7 +24,7 @@
 #include <errno.h>
 
 #include "../perf.h"
-#include "util.h"
+#include "debug.h"
 #include "trace-event.h"
 
 #include "sane_ctype.h"
@@ -150,7 +150,7 @@ void parse_ftrace_printk(struct pevent *pevent,
 	while (line) {
 		addr_str = strtok_r(line, ":", &fmt);
 		if (!addr_str) {
-			warning("printk format with empty entry");
+			pr_warning("printk format with empty entry");
 			break;
 		}
 		addr = strtoull(addr_str, NULL, 16);
diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
index f6a2a3d117d5..4767ec2c5ef6 100644
--- a/tools/perf/util/units.c
+++ b/tools/perf/util/units.c
@@ -1,8 +1,37 @@
 #include "units.h"
 #include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
 #include <linux/kernel.h>
 #include <linux/time64.h>
 
+unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
+{
+	struct parse_tag *i = tags;
+
+	while (i->tag) {
+		char *s = strchr(str, i->tag);
+
+		if (s) {
+			unsigned long int value;
+			char *endptr;
+
+			value = strtoul(str, &endptr, 10);
+			if (s != endptr)
+				break;
+
+			if (value > ULONG_MAX / i->mult)
+				break;
+			value *= i->mult;
+			return value;
+		}
+		i++;
+	}
+
+	return (unsigned long) -1;
+}
+
 unsigned long convert_unit(unsigned long value, char *unit)
 {
 	*unit = ' ';
diff --git a/tools/perf/util/units.h b/tools/perf/util/units.h
index 3ed7774afaa9..f02c87317150 100644
--- a/tools/perf/util/units.h
+++ b/tools/perf/util/units.h
@@ -4,6 +4,13 @@
 #include <stddef.h>
 #include <linux/types.h>
 
+struct parse_tag {
+	char tag;
+	int  mult;
+};
+
+unsigned long parse_tag_value(const char *str, struct parse_tag *tags);
+
 unsigned long convert_unit(unsigned long value, char *unit);
 int unit_number__scnprintf(char *buf, size_t size, u64 n);
 
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index f90e11a555b2..7755a5e0fe5e 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -39,6 +39,14 @@ static int __report_module(struct addr_location *al, u64 ip,
 		return 0;
 
 	mod = dwfl_addrmodule(ui->dwfl, ip);
+	if (mod) {
+		Dwarf_Addr s;
+
+		dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
+		if (s != al->map->start)
+			mod = 0;
+	}
+
 	if (!mod)
 		mod = dwfl_report_elf(ui->dwfl, dso->short_name,
 				      dso->long_name, -1, al->map->start,
@@ -168,12 +176,24 @@ frame_callback(Dwfl_Frame *state, void *arg)
 {
 	struct unwind_info *ui = arg;
 	Dwarf_Addr pc;
+	bool isactivation;
 
 	if (!dwfl_frame_pc(state, &pc, NULL)) {
 		pr_err("%s", dwfl_errmsg(-1));
 		return DWARF_CB_ABORT;
 	}
 
+	// report the module before we query for isactivation
+	report_module(pc, ui);
+
+	if (!dwfl_frame_pc(state, &pc, &isactivation)) {
+		pr_err("%s", dwfl_errmsg(-1));
+		return DWARF_CB_ABORT;
+	}
+
+	if (!isactivation)
+		--pc;
+
 	return entry(pc, ui) || !(--ui->max_stack) ?
 	       DWARF_CB_ABORT : DWARF_CB_OK;
 }
@@ -220,7 +240,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
 
 	err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui);
 
-	if (err && !ui->max_stack)
+	if (err && ui->max_stack != max_stack)
 		err = 0;
 
 	/*
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index f8455bed6e65..672c2ada9357 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -692,6 +692,17 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
 
 		while (!ret && (unw_step(&c) > 0) && i < max_stack) {
 			unw_get_reg(&c, UNW_REG_IP, &ips[i]);
+
+			/*
+			 * Decrement the IP for any non-activation frames.
+			 * this is required to properly find the srcline
+			 * for caller frames.
+			 * See also the documentation for dwfl_frame_pc(),
+			 * which this code tries to replicate.
+			 */
+			if (unw_is_signal_frame(&c) <= 0)
+				--ips[i];
+
 			++i;
 		}
 
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index 996046a66fe5..6cc9d9888ce0 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c
@@ -9,75 +9,17 @@
 #include "util.h"
 #include "debug.h"
 
-static void report(const char *prefix, const char *err, va_list params)
-{
-	char msg[1024];
-	vsnprintf(msg, sizeof(msg), err, params);
-	fprintf(stderr, " %s%s\n", prefix, msg);
-}
-
-static NORETURN void usage_builtin(const char *err)
+static __noreturn void usage_builtin(const char *err)
 {
 	fprintf(stderr, "\n Usage: %s\n", err);
 	exit(129);
 }
 
-static NORETURN void die_builtin(const char *err, va_list params)
-{
-	report(" Fatal: ", err, params);
-	exit(128);
-}
-
-static void error_builtin(const char *err, va_list params)
-{
-	report(" Error: ", err, params);
-}
-
-static void warn_builtin(const char *warn, va_list params)
-{
-	report(" Warning: ", warn, params);
-}
-
 /* If we are in a dlopen()ed .so write to a global variable would segfault
  * (ugh), so keep things static. */
-static void (*usage_routine)(const char *err) NORETURN = usage_builtin;
-static void (*error_routine)(const char *err, va_list params) = error_builtin;
-static void (*warn_routine)(const char *err, va_list params) = warn_builtin;
-
-void set_warning_routine(void (*routine)(const char *err, va_list params))
-{
-	warn_routine = routine;
-}
+static void (*usage_routine)(const char *err) __noreturn = usage_builtin;
 
 void usage(const char *err)
 {
 	usage_routine(err);
 }
-
-void die(const char *err, ...)
-{
-	va_list params;
-
-	va_start(params, err);
-	die_builtin(err, params);
-	va_end(params);
-}
-
-int error(const char *err, ...)
-{
-	va_list params;
-
-	va_start(params, err);
-	error_routine(err, params);
-	va_end(params);
-	return -1;
-}
-
-void warning(const char *warn, ...)
-{
-	va_list params;
-
-	va_start(params, warn);
-	warn_routine(warn, params);
-	va_end(params);
-}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 6450c75a6f5b..988111e0bab5 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -13,7 +13,6 @@
 #include <string.h>
 #include <errno.h>
 #include <limits.h>
-#include <byteswap.h>
 #include <linux/kernel.h>
 #include <linux/log2.h>
 #include <linux/time64.h>
@@ -335,33 +334,6 @@ int hex2u64(const char *ptr, u64 *long_val)
 	return p - ptr;
 }
 
-unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
-{
-	struct parse_tag *i = tags;
-
-	while (i->tag) {
-		char *s;
-
-		s = strchr(str, i->tag);
-		if (s) {
-			unsigned long int value;
-			char *endptr;
-
-			value = strtoul(str, &endptr, 10);
-			if (s != endptr)
-				break;
-
-			if (value > ULONG_MAX / i->mult)
-				break;
-			value *= i->mult;
-			return value;
-		}
-		i++;
-	}
-
-	return (unsigned long) -1;
-}
-
 int perf_event_paranoid(void)
 {
 	int value;
@@ -371,64 +343,6 @@ int perf_event_paranoid(void)
 
 	return value;
 }
-
-void mem_bswap_32(void *src, int byte_size)
-{
-	u32 *m = src;
-	while (byte_size > 0) {
-		*m = bswap_32(*m);
-		byte_size -= sizeof(u32);
-		++m;
-	}
-}
-
-void mem_bswap_64(void *src, int byte_size)
-{
-	u64 *m = src;
-
-	while (byte_size > 0) {
-		*m = bswap_64(*m);
-		byte_size -= sizeof(u64);
-		++m;
-	}
-}
-
-bool find_process(const char *name)
-{
-	size_t len = strlen(name);
-	DIR *dir;
-	struct dirent *d;
-	int ret = -1;
-
-	dir = opendir(procfs__mountpoint());
-	if (!dir)
-		return false;
-
-	/* Walk through the directory. */
-	while (ret && (d = readdir(dir)) != NULL) {
-		char path[PATH_MAX];
-		char *data;
-		size_t size;
-
-		if ((d->d_type != DT_DIR) ||
-		     !strcmp(".", d->d_name) ||
-		     !strcmp("..", d->d_name))
-			continue;
-
-		scnprintf(path, sizeof(path), "%s/%s/comm",
-			  procfs__mountpoint(), d->d_name);
-
-		if (filename__read_str(path, &data, &size))
-			continue;
-
-		ret = strncmp(name, data, len);
-		free(data);
-	}
-
-	closedir(dir);
-	return ret ? false : true;
-}
-
 static int
 fetch_ubuntu_kernel_version(unsigned int *puint)
 {
@@ -436,8 +350,12 @@ fetch_ubuntu_kernel_version(unsigned int *puint)
 	size_t line_len = 0;
 	char *ptr, *line = NULL;
 	int version, patchlevel, sublevel, err;
-	FILE *vsig = fopen("/proc/version_signature", "r");
+	FILE *vsig;
+
+	if (!puint)
+		return 0;
 
+	vsig = fopen("/proc/version_signature", "r");
 	if (!vsig) {
 		pr_debug("Open /proc/version_signature failed: %s\n",
 			 strerror(errno));
@@ -467,8 +385,7 @@ fetch_ubuntu_kernel_version(unsigned int *puint)
 		goto errout;
 	}
 
-	if (puint)
-		*puint = (version << 16) + (patchlevel << 8) + sublevel;
+	*puint = (version << 16) + (patchlevel << 8) + sublevel;
 	err = 0;
 errout:
 	free(line);
@@ -495,6 +412,9 @@ fetch_kernel_version(unsigned int *puint, char *str,
 		str[str_size - 1] = '\0';
 	}
 
+	if (!puint || int_ver_ready)
+		return 0;
+
 	err = sscanf(utsname.release, "%d.%d.%d",
 		     &version, &patchlevel, &sublevel);
 
@@ -504,8 +424,7 @@ fetch_kernel_version(unsigned int *puint, char *str,
 		return -1;
 	}
 
-	if (puint && !int_ver_ready)
-		*puint = (version << 16) + (patchlevel << 8) + sublevel;
+	*puint = (version << 16) + (patchlevel << 8) + sublevel;
 	return 0;
 }
 
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 3852b6d3270a..2c9e58a45310 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -1,42 +1,21 @@
 #ifndef GIT_COMPAT_UTIL_H
 #define GIT_COMPAT_UTIL_H
 
-#define _ALL_SOURCE 1
 #define _BSD_SOURCE 1
 /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
 #define _DEFAULT_SOURCE 1
-#define HAS_BOOL
 
 #include <fcntl.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include <stdarg.h>
+#include <linux/compiler.h>
 #include <linux/types.h>
 
-extern char buildid_dir[];
-
-#ifdef __GNUC__
-#define NORETURN __attribute__((__noreturn__))
-#else
-#define NORETURN
-#ifndef __attribute__
-#define __attribute__(x)
-#endif
-#endif
-
-#define PERF_GTK_DSO  "libperf-gtk.so"
-
 /* General helper functions */
-void usage(const char *err) NORETURN;
-void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
-int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
-void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
-
-void set_warning_routine(void (*routine)(const char *err, va_list params));
-
-int prefixcmp(const char *str, const char *prefix);
-void set_buildid_dir(const char *dir);
+void usage(const char *err) __noreturn;
+void die(const char *err, ...) __noreturn __printf(1, 2);
 
 static inline void *zalloc(size_t size)
 {
@@ -59,31 +38,11 @@ int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 si
 ssize_t readn(int fd, void *buf, size_t n);
 ssize_t writen(int fd, void *buf, size_t n);
 
-struct perf_event_attr;
-
-void event_attr_init(struct perf_event_attr *attr);
-
 size_t hex_width(u64 v);
 int hex2u64(const char *ptr, u64 *val);
 
 extern unsigned int page_size;
 extern int cacheline_size;
-extern int sysctl_perf_event_max_stack;
-extern int sysctl_perf_event_max_contexts_per_stack;
-
-struct parse_tag {
-	char tag;
-	int mult;
-};
-
-unsigned long parse_tag_value(const char *str, struct parse_tag *tags);
-
-int perf_event_paranoid(void);
-
-void mem_bswap_64(void *src, int byte_size);
-void mem_bswap_32(void *src, int byte_size);
-
-bool find_process(const char *name);
 
 int fetch_kernel_version(unsigned int *puint,
 			 char *str, size_t str_sz);
diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore
new file mode 100644
index 000000000000..cba3d994995c
--- /dev/null
+++ b/tools/power/acpi/.gitignore
@@ -0,0 +1,4 @@
+acpidbg
+acpidump
+ec
+include
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 64cae1a5deff..e1f75a1914a1 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -370,7 +370,7 @@ acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path,
 }
 EXPORT_SYMBOL(__wrap_acpi_evaluate_object);
 
-union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
+union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
 		u64 rev, u64 func, union acpi_object *argv4)
 {
 	union acpi_object *obj = ERR_PTR(-ENXIO);
@@ -379,11 +379,11 @@ union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
 	rcu_read_lock();
 	ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list);
 	if (ops)
-		obj = ops->evaluate_dsm(handle, uuid, rev, func, argv4);
+		obj = ops->evaluate_dsm(handle, guid, rev, func, argv4);
 	rcu_read_unlock();
 
 	if (IS_ERR(obj))
-		return acpi_evaluate_dsm(handle, uuid, rev, func, argv4);
+		return acpi_evaluate_dsm(handle, guid, rev, func, argv4);
 	return obj;
 }
 EXPORT_SYMBOL(__wrap_acpi_evaluate_dsm);
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index c2187178fb13..28859da78edf 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1559,7 +1559,7 @@ static unsigned long nfit_ctl_handle;
 union acpi_object *result;
 
 static union acpi_object *nfit_test_evaluate_dsm(acpi_handle handle,
-		const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4)
+		const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4)
 {
 	if (handle != &nfit_ctl_handle)
 		return ERR_PTR(-ENXIO);
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index f54c0032c6ff..d3d63dd5ed38 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -13,6 +13,7 @@
 #ifndef __NFIT_TEST_H__
 #define __NFIT_TEST_H__
 #include <linux/list.h>
+#include <linux/uuid.h>
 #include <linux/ioport.h>
 #include <linux/spinlock_types.h>
 
@@ -36,7 +37,8 @@ typedef void *acpi_handle;
 
 typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t);
 typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle,
-		const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4);
+		 const guid_t *guid, u64 rev, u64 func,
+		 union acpi_object *argv4);
 void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
 		unsigned long size);
 void __wrap_iounmap(volatile void __iomem *addr);
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 91edd0566237..f389b02d43a0 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -11,7 +11,8 @@ endif
 CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
 LDLIBS += -lcap -lelf
 
-TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs
+TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
+	test_align
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o
 
@@ -34,6 +35,7 @@ $(BPFOBJ): force
 CLANG ?= clang
 
 %.o: %.c
-	$(CLANG) -I. -I../../../include/uapi -I../../../../samples/bpf/ \
+	$(CLANG) -I. -I./include/uapi -I../../../include/uapi \
+		-I../../../../samples/bpf/ \
 		-Wno-compare-distinct-pointer-types \
 		-O2 -target bpf -c $< -o $@
diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h
index 19d0604f8694..487cbfb89beb 100644
--- a/tools/testing/selftests/bpf/bpf_endian.h
+++ b/tools/testing/selftests/bpf/bpf_endian.h
@@ -1,23 +1,42 @@
 #ifndef __BPF_ENDIAN__
 #define __BPF_ENDIAN__
 
-#include <asm/byteorder.h>
+#include <linux/swab.h>
 
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-# define __bpf_ntohs(x)		__builtin_bswap16(x)
-# define __bpf_htons(x)		__builtin_bswap16(x)
-#elif __BYTE_ORDER == __BIG_ENDIAN
-# define __bpf_ntohs(x)		(x)
-# define __bpf_htons(x)		(x)
+/* LLVM's BPF target selects the endianness of the CPU
+ * it compiles on, or the user specifies (bpfel/bpfeb),
+ * respectively. The used __BYTE_ORDER__ is defined by
+ * the compiler, we cannot rely on __BYTE_ORDER from
+ * libc headers, since it doesn't reflect the actual
+ * requested byte order.
+ *
+ * Note, LLVM's BPF target has different __builtin_bswapX()
+ * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
+ * in bpfel and bpfeb case, which means below, that we map
+ * to cpu_to_be16(). We could use it unconditionally in BPF
+ * case, but better not rely on it, so that this header here
+ * can be used from application and BPF program side, which
+ * use different targets.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define __bpf_ntohs(x)			__builtin_bswap16(x)
+# define __bpf_htons(x)			__builtin_bswap16(x)
+# define __bpf_constant_ntohs(x)	___constant_swab16(x)
+# define __bpf_constant_htons(x)	___constant_swab16(x)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# define __bpf_ntohs(x)			(x)
+# define __bpf_htons(x)			(x)
+# define __bpf_constant_ntohs(x)	(x)
+# define __bpf_constant_htons(x)	(x)
 #else
-# error "Fix your __BYTE_ORDER?!"
+# error "Fix your compiler's __BYTE_ORDER__?!"
 #endif
 
 #define bpf_htons(x)				\
 	(__builtin_constant_p(x) ?		\
-	 __constant_htons(x) : __bpf_htons(x))
+	 __bpf_constant_htons(x) : __bpf_htons(x))
 #define bpf_ntohs(x)				\
 	(__builtin_constant_p(x) ?		\
-	 __constant_ntohs(x) : __bpf_ntohs(x))
+	 __bpf_constant_ntohs(x) : __bpf_ntohs(x))
 
-#endif
+#endif /* __BPF_ENDIAN__ */
diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/testing/selftests/bpf/include/uapi/linux/types.h
new file mode 100644
index 000000000000..51841848fbfe
--- /dev/null
+++ b/tools/testing/selftests/bpf/include/uapi/linux/types.h
@@ -0,0 +1,22 @@
+#ifndef _UAPI_LINUX_TYPES_H
+#define _UAPI_LINUX_TYPES_H
+
+#include <asm-generic/int-ll64.h>
+
+/* copied from linux:include/uapi/linux/types.h */
+#define __bitwise
+typedef __u16 __bitwise __le16;
+typedef __u16 __bitwise __be16;
+typedef __u32 __bitwise __le32;
+typedef __u32 __bitwise __be32;
+typedef __u64 __bitwise __le64;
+typedef __u64 __bitwise __be64;
+
+typedef __u16 __bitwise __sum16;
+typedef __u32 __bitwise __wsum;
+
+#define __aligned_u64 __u64 __attribute__((aligned(8)))
+#define __aligned_be64 __be64 __attribute__((aligned(8)))
+#define __aligned_le64 __le64 __attribute__((aligned(8)))
+
+#endif /* _UAPI_LINUX_TYPES_H */
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
new file mode 100644
index 000000000000..9644d4e069de
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -0,0 +1,453 @@
+#include <asm/types.h>
+#include <linux/types.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#include <linux/unistd.h>
+#include <linux/filter.h>
+#include <linux/bpf_perf_event.h>
+#include <linux/bpf.h>
+
+#include <bpf/bpf.h>
+
+#include "../../../include/linux/filter.h"
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#define MAX_INSNS	512
+#define MAX_MATCHES	16
+
+struct bpf_align_test {
+	const char *descr;
+	struct bpf_insn	insns[MAX_INSNS];
+	enum {
+		UNDEF,
+		ACCEPT,
+		REJECT
+	} result;
+	enum bpf_prog_type prog_type;
+	const char *matches[MAX_MATCHES];
+};
+
+static struct bpf_align_test tests[] = {
+	{
+		.descr = "mov",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 16),
+			BPF_MOV64_IMM(BPF_REG_3, 32),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			"1: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
+			"2: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
+			"3: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
+			"4: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
+			"5: R1=ctx R3=imm32,min_value=32,max_value=32,min_align=32 R10=fp",
+		},
+	},
+	{
+		.descr = "shift",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_3, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 32),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			"1: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp",
+			"2: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
+			"3: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
+			"4: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
+			"5: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
+			"6: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp",
+			"7: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm32,min_value=32,max_value=32,min_align=32 R10=fp",
+			"8: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
+			"9: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
+			"10: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
+			"11: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
+		},
+	},
+	{
+		.descr = "addsub",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_4, 8),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			"1: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
+			"2: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=4 R10=fp",
+			"3: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R10=fp",
+			"4: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
+			"5: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm12,min_value=12,max_value=12,min_align=4 R10=fp",
+			"6: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm14,min_value=14,max_value=14,min_align=2 R10=fp",
+		},
+	},
+	{
+		.descr = "mul",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 7),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 2),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			"1: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp",
+			"2: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp",
+			"3: R1=ctx R3=imm14,min_value=14,max_value=14,min_align=2 R10=fp",
+			"4: R1=ctx R3=imm56,min_value=56,max_value=56,min_align=4 R10=fp",
+		},
+	},
+
+#define PREP_PKT_POINTERS \
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \
+		    offsetof(struct __sk_buff, data)), \
+	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, \
+		    offsetof(struct __sk_buff, data_end))
+
+#define LOAD_UNKNOWN(DST_REG) \
+	PREP_PKT_POINTERS, \
+	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), \
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), \
+	BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 1), \
+	BPF_EXIT_INSN(), \
+	BPF_LDX_MEM(BPF_B, DST_REG, BPF_REG_2, 0)
+
+	{
+		.descr = "unknown shift",
+		.insns = {
+			LOAD_UNKNOWN(BPF_REG_3),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			LOAD_UNKNOWN(BPF_REG_4),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 5),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			"7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp",
+			"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv55,min_align=2 R10=fp",
+			"9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv54,min_align=4 R10=fp",
+			"10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv53,min_align=8 R10=fp",
+			"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv52,min_align=16 R10=fp",
+			"18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv56 R10=fp",
+			"19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv51,min_align=32 R10=fp",
+			"20: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv52,min_align=16 R10=fp",
+			"21: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv53,min_align=8 R10=fp",
+			"22: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv54,min_align=4 R10=fp",
+			"23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv55,min_align=2 R10=fp",
+		},
+	},
+	{
+		.descr = "unknown mul",
+		.insns = {
+			LOAD_UNKNOWN(BPF_REG_3),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 1),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 4),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 8),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			"7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp",
+			"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
+			"9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv55,min_align=1 R10=fp",
+			"10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
+			"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv54,min_align=2 R10=fp",
+			"12: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
+			"13: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv53,min_align=4 R10=fp",
+			"14: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
+			"15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv52,min_align=8 R10=fp",
+			"16: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv50,min_align=8 R10=fp"
+		},
+	},
+	{
+		.descr = "packet const offset",
+		.insns = {
+			PREP_PKT_POINTERS,
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+
+			/* Skip over ethernet header.  */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 3),
+			BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 0),
+			BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 2),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			"4: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=0,r=0) R10=fp",
+			"5: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=14,r=0) R10=fp",
+			"6: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R4=pkt(id=0,off=14,r=0) R5=pkt(id=0,off=14,r=0) R10=fp",
+			"10: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv56 R5=pkt(id=0,off=14,r=18) R10=fp",
+			"14: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp",
+			"15: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp",
+		},
+	},
+	{
+		.descr = "packet variable offset",
+		.insns = {
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+
+			/* First, add a constant to the R5 packet pointer,
+			 * then a variable with a known alignment.
+			 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			/* Now, test in the other direction.  Adding first
+			 * the variable offset to R5, then the constant.
+			 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			/* Test multiple accumulations of unknown values
+			 * into a packet pointer.
+			 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R6=inv54,min_align=4 R10=fp",
+
+			/* Offset is added to packet pointer R5, resulting in known
+			 * auxiliary alignment and offset.
+			 */
+			"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R5=pkt(id=1,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+
+			/* At the time the word size load is performed from R5,
+			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
+			 * reg->aux_off (14) which is 16.  Then the variable
+			 * offset is considered using reg->aux_off_align which
+			 * is 4 and meets the load's requirements.
+			 */
+			"15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=1,off=4,r=4),aux_off=14,aux_off_align=4 R5=pkt(id=1,off=0,r=4),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+
+
+			/* Variable offset is added to R5 packet pointer,
+			 * resulting in auxiliary alignment of 4.
+			 */
+			"18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=0,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+
+			/* Constant offset is added to R5, resulting in
+			 * reg->off of 14.
+			 */
+			"19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=14,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+
+			/* At the time the word size load is performed from R5,
+			 * it's total offset is NET_IP_ALIGN + reg->off (14) which
+			 * is 16.  Then the variable offset is considered using
+			 * reg->aux_off_align which is 4 and meets the load's
+			 * requirements.
+			 */
+			"23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=2,off=18,r=18),aux_off_align=4 R5=pkt(id=2,off=14,r=18),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+
+			/* Constant offset is added to R5 packet pointer,
+			 * resulting in reg->off value of 14.
+			 */
+			"26: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=0,off=14,r=8) R6=inv54,min_align=4 R10=fp",
+			/* Variable offset is added to R5, resulting in an
+			 * auxiliary offset of 14, and an auxiliary alignment of 4.
+			 */
+			"27: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+			/* Constant is added to R5 again, setting reg->off to 4. */
+			"28: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=4,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+			/* And once more we add a variable, which causes an accumulation
+			 * of reg->off into reg->aux_off_align, with resulting value of
+			 * 18.  The auxiliary alignment stays at 4.
+			 */
+			"29: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=4,off=0,r=0),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+			/* At the time the word size load is performed from R5,
+			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
+			 * reg->aux_off (18) which is 20.  Then the variable offset
+			 * is considered using reg->aux_off_align which is 4 and meets
+			 * the load's requirements.
+			 */
+			"33: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=4,off=4,r=4),aux_off=18,aux_off_align=4 R5=pkt(id=4,off=0,r=4),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+		},
+	},
+};
+
+static int probe_filter_length(const struct bpf_insn *fp)
+{
+	int len;
+
+	for (len = MAX_INSNS - 1; len > 0; --len)
+		if (fp[len].code != 0 || fp[len].imm != 0)
+			break;
+	return len + 1;
+}
+
+static char bpf_vlog[32768];
+
+static int do_test_single(struct bpf_align_test *test)
+{
+	struct bpf_insn *prog = test->insns;
+	int prog_type = test->prog_type;
+	int prog_len, i;
+	int fd_prog;
+	int ret;
+
+	prog_len = probe_filter_length(prog);
+	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
+				     prog, prog_len, 1, "GPL", 0,
+				     bpf_vlog, sizeof(bpf_vlog));
+	if (fd_prog < 0) {
+		printf("Failed to load program.\n");
+		printf("%s", bpf_vlog);
+		ret = 1;
+	} else {
+		ret = 0;
+		for (i = 0; i < MAX_MATCHES; i++) {
+			const char *t, *m = test->matches[i];
+
+			if (!m)
+				break;
+			t = strstr(bpf_vlog, m);
+			if (!t) {
+				printf("Failed to find match: %s\n", m);
+				ret = 1;
+				printf("%s", bpf_vlog);
+				break;
+			}
+		}
+		close(fd_prog);
+	}
+	return ret;
+}
+
+static int do_test(unsigned int from, unsigned int to)
+{
+	int all_pass = 0;
+	int all_fail = 0;
+	unsigned int i;
+
+	for (i = from; i < to; i++) {
+		struct bpf_align_test *test = &tests[i];
+		int fail;
+
+		printf("Test %3d: %s ... ",
+		       i, test->descr);
+		fail = do_test_single(test);
+		if (fail) {
+			all_fail++;
+			printf("FAIL\n");
+		} else {
+			all_pass++;
+			printf("PASS\n");
+		}
+	}
+	printf("Results: %d pass %d fail\n",
+	       all_pass, all_fail);
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned int from = 0, to = ARRAY_SIZE(tests);
+
+	if (argc == 3) {
+		unsigned int l = atoi(argv[argc - 2]);
+		unsigned int u = atoi(argv[argc - 1]);
+
+		if (l < to && u < to) {
+			from = l;
+			to   = u + 1;
+		}
+	} else if (argc == 2) {
+		unsigned int t = atoi(argv[argc - 1]);
+
+		if (t < to) {
+			from = t;
+			to   = t + 1;
+		}
+	}
+	return do_test(from, to);
+}
diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/test_pkt_access.c
index 39387bb7e08c..6e11ba11709e 100644
--- a/tools/testing/selftests/bpf/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/test_pkt_access.c
@@ -5,6 +5,7 @@
  * License as published by the Free Software Foundation.
  */
 #include <stddef.h>
+#include <string.h>
 #include <linux/bpf.h>
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3773562056da..0ff8c55c0464 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -49,6 +49,7 @@
 #define MAX_NR_MAPS	4
 
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
+#define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
 
 struct bpf_test {
 	const char *descr;
@@ -2615,6 +2616,30 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
+		"direct packet access: test17 (pruning, alignment)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+			BPF_JMP_A(-6),
+		},
+		.errstr = "misaligned packet access off 2+15+-4 size 4",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
+	{
 		"helper access to packet: test1, valid packet_ptr range",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -3341,6 +3366,70 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
 	{
+		"alu ops on ptr_to_map_value_or_null, 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
 		"invalid memory access with multiple map_lookup_elem calls",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_1, 10),
@@ -3660,6 +3749,72 @@ static struct bpf_test tests[] = {
 		.errstr = "invalid bpf_context access",
 	},
 	{
+		"leak pointer into ctx 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2,
+				      offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 2 },
+		.errstr_unpriv = "R2 leaks addr into mem",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"leak pointer into ctx 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10,
+				      offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R10 leaks addr into mem",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"leak pointer into ctx 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2,
+				      offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 1 },
+		.errstr_unpriv = "R2 leaks addr into ctx",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"leak pointer into map val",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+			BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr_unpriv = "R6 leaks addr into mem",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
 		"helper access to map: full range",
 		.insns = {
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -4937,7 +5092,149 @@ static struct bpf_test tests[] = {
 		.fixup_map_in_map = { 3 },
 		.errstr = "R1 type=map_value_or_null expected=map_ptr",
 		.result = REJECT,
-	}
+	},
+	{
+		"ld_abs: check calling conv, r1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R4 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R5 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r7",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_7, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"ld_ind: check calling conv, r1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_3, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_4, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R4 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R5 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r7",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_7, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -5059,9 +5356,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
 	do_test_fixup(test, prog, map_fds);
 
-	fd_prog = bpf_load_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
-				   prog, prog_len, "GPL", 0, bpf_vlog,
-				   sizeof(bpf_vlog));
+	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
+				     prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
+				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog));
 
 	expected_ret = unpriv && test->result_unpriv != UNDEF ?
 		       test->result_unpriv : test->result;
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index 32e6211e1c6e..717581145cfc 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -58,7 +58,7 @@ parse_opts() { # opts
     ;;
     --verbose|-v|-vv)
       VERBOSE=$((VERBOSE + 1))
-      [ $1 == '-vv' ] && VERBOSE=$((VERBOSE + 1))
+      [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1))
       shift 1
     ;;
     --debug|-d)
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
index 07bb3e5930b4..aa31368851c9 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
@@ -48,7 +48,7 @@ test_event_enabled() {
     e=`cat $EVENT_ENABLE`
     if [ "$e" != $val ]; then
 	echo "Expected $val but found $e"
-	exit -1
+	exit 1
     fi
 }
 
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index 9aec6fcb7729..f2019b37370d 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -34,10 +34,10 @@ reset_ftrace_filter() { # reset all triggers in set_ftrace_filter
     echo > set_ftrace_filter
     grep -v '^#' set_ftrace_filter | while read t; do
 	tr=`echo $t | cut -d: -f2`
-	if [ "$tr" == "" ]; then
+	if [ "$tr" = "" ]; then
 	    continue
 	fi
-	if [ $tr == "enable_event" -o $tr == "disable_event" ]; then
+	if [ $tr = "enable_event" -o $tr = "disable_event" ]; then
 	    tr=`echo $t | cut -d: -f1-4`
 	    limit=`echo $t | cut -d: -f5`
 	else
diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
index 4c5a061a5b4e..c73db7863adb 100644
--- a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
@@ -75,9 +75,13 @@ rmdir foo
 if [ -d foo ]; then
         fail "foo still exists"
 fi
-exit 0
-
 
+mkdir foo
+echo "schedule:enable_event:sched:sched_switch" > foo/set_ftrace_filter
+rmdir foo
+if [ -d foo ]; then
+        fail "foo still exists"
+fi
 
 
 instance_slam() {
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
new file mode 100644
index 000000000000..f4d1ff785d67
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -0,0 +1,21 @@
+#!/bin/sh
+# description: Register/unregister many kprobe events
+
+# ftrace fentry skip size depends on the machine architecture.
+# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc
+case `uname -m` in
+  x86_64|i[3456]86) OFFS=5;;
+  ppc*) OFFS=4;;
+  *) OFFS=0;;
+esac
+
+echo "Setup up to 256 kprobes"
+grep t /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \
+head -n 256 | while read i; do echo p ${i}+${OFFS} ; done > kprobe_events ||:
+
+echo 1 > events/kprobes/enable
+echo 0 > events/kprobes/enable
+echo > kprobe_events
+echo "Waiting for unoptimizing & freeing"
+sleep 5
+echo "Done"
diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index a676d3eefefb..13f5198ba0ee 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -305,7 +305,7 @@ function perf_test()
 	echo "Running remote perf test $WITH DMA"
 	write_file "" $REMOTE_PERF/run
 	echo -n "  "
-	read_file $LOCAL_PERF/run
+	read_file $REMOTE_PERF/run
 	echo "  Passed"
 
 	_modprobe -r ntb_perf
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index 427621792229..2f1f7b013293 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -11,3 +11,4 @@ tm-signal-context-chk-fpu
 tm-signal-context-chk-gpr
 tm-signal-context-chk-vmx
 tm-signal-context-chk-vsx
+tm-vmx-unavail
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 5576ee6a51f2..958c11c14acd 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -2,7 +2,8 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
 	tm-signal-context-chk-vmx tm-signal-context-chk-vsx
 
 TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
-	tm-vmxcopy tm-fork tm-tar tm-tmspr $(SIGNAL_CONTEXT_CHK_TESTS)
+	tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail \
+	$(SIGNAL_CONTEXT_CHK_TESTS)
 
 include ../../lib.mk
 
@@ -13,6 +14,7 @@ CFLAGS += -mhtm
 $(OUTPUT)/tm-syscall: tm-syscall-asm.S
 $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
 $(OUTPUT)/tm-tmspr: CFLAGS += -pthread
+$(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
 
 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
 $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
index d9c49f41515e..e79ccd6aada1 100644
--- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
@@ -42,12 +42,12 @@ int test_body(void)
 	printf("Check DSCR TM context switch: ");
 	fflush(stdout);
 	for (;;) {
-		rv = 1;
 		asm __volatile__ (
 			/* set a known value into the DSCR */
 			"ld      3, %[dscr1];"
 			"mtspr   %[sprn_dscr], 3;"
 
+			"li      %[rv], 1;"
 			/* start and suspend a transaction */
 			"tbegin.;"
 			"beq     1f;"
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
new file mode 100644
index 000000000000..137185ba4937
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2017, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ * Original: Breno Leitao <brenohl@br.ibm.com> &
+ *           Gustavo Bueno Romero <gromero@br.ibm.com>
+ * Edited: Michael Neuling
+ *
+ * Force VMX unavailable during a transaction and see if it corrupts
+ * the checkpointed VMX register state after the abort.
+ */
+
+#include <inttypes.h>
+#include <htmintrin.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include "tm.h"
+#include "utils.h"
+
+int passed;
+
+void *worker(void *unused)
+{
+	__int128 vmx0;
+	uint64_t texasr;
+
+	asm goto (
+		"li       3, 1;"  /* Stick non-zero value in VMX0 */
+		"std      3, 0(%[vmx0_ptr]);"
+		"lvx      0, 0, %[vmx0_ptr];"
+
+		/* Wait here a bit so we get scheduled out 255 times */
+		"lis      3, 0x3fff;"
+		"1: ;"
+		"addi     3, 3, -1;"
+		"cmpdi    3, 0;"
+		"bne      1b;"
+
+		/* Kernel will hopefully turn VMX off now */
+
+		"tbegin. ;"
+		"beq      failure;"
+
+		/* Cause VMX unavail. Any VMX instruction */
+		"vaddcuw  0,0,0;"
+
+		"tend. ;"
+		"b        %l[success];"
+
+		/* Check VMX0 sanity after abort */
+		"failure: ;"
+		"lvx       1,  0, %[vmx0_ptr];"
+		"vcmpequb. 2,  0, 1;"
+		"bc        4, 24, %l[value_mismatch];"
+		"b        %l[value_match];"
+		:
+		: [vmx0_ptr] "r"(&vmx0)
+		: "r3"
+		: success, value_match, value_mismatch
+		);
+
+	/* HTM aborted and VMX0 is corrupted */
+value_mismatch:
+	texasr = __builtin_get_texasr();
+
+	printf("\n\n==============\n\n");
+	printf("Failure with error: %lx\n",   _TEXASR_FAILURE_CODE(texasr));
+	printf("Summary error     : %lx\n",   _TEXASR_FAILURE_SUMMARY(texasr));
+	printf("TFIAR exact       : %lx\n\n", _TEXASR_TFIAR_EXACT(texasr));
+
+	passed = 0;
+	return NULL;
+
+	/* HTM aborted but VMX0 is correct */
+value_match:
+//	printf("!");
+	return NULL;
+
+success:
+//	printf(".");
+	return NULL;
+}
+
+int tm_vmx_unavail_test()
+{
+	int threads;
+	pthread_t *thread;
+
+	SKIP_IF(!have_htm());
+
+	passed = 1;
+
+	threads = sysconf(_SC_NPROCESSORS_ONLN) * 4;
+	thread = malloc(sizeof(pthread_t)*threads);
+	if (!thread)
+		return EXIT_FAILURE;
+
+	for (uint64_t i = 0; i < threads; i++)
+		pthread_create(&thread[i], NULL, &worker, NULL);
+
+	for (uint64_t i = 0; i < threads; i++)
+		pthread_join(thread[i], NULL);
+
+	free(thread);
+
+	return passed ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+
+int main(int argc, char **argv)
+{
+	return test_harness(tm_vmx_unavail_test, "tm_vmx_unavail_test");
+}
diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
index eee31e261bf7..70fca318a82b 100755
--- a/tools/testing/selftests/rcutorture/bin/configcheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh
@@ -27,7 +27,7 @@ cat $1 > $T/.config
 
 cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' |
 awk	'
-BEGIN	{
+{
 		print "if grep -q \"" $0 "\" < '"$T/.config"'";
 		print "then";
 		print "\t:";
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index 00cb0db2643d..c29f2ec0bf9f 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -45,7 +45,7 @@ T=/tmp/test-linux.sh.$$
 trap 'rm -rf $T' 0
 mkdir $T
 
-grep -v 'CONFIG_[A-Z]*_TORTURE_TEST' < ${config_template} > $T/config
+grep -v 'CONFIG_[A-Z]*_TORTURE_TEST=' < ${config_template} > $T/config
 cat << ___EOF___ >> $T/config
 CONFIG_INITRAMFS_SOURCE="$TORTURE_INITRD"
 CONFIG_VIRTIO_PCI=y
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index ea6e373edc27..93eede4e8fbe 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -170,7 +170,7 @@ qemu_append="`identify_qemu_append "$QEMU"`"
 # Pull in Kconfig-fragment boot parameters
 boot_args="`configfrag_boot_params "$boot_args" "$config_template"`"
 # Generate kernel-version-specific boot parameters
-boot_args="`per_version_boot_params "$boot_args" $builddir/.config $seconds`"
+boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`"
 
 if test -n "$TORTURE_BUILDONLY"
 then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 3b3c1b693ee1..50091de3a911 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -296,10 +296,7 @@ if test -d .git
 then
 	git status >> $resdir/$ds/testid.txt
 	git rev-parse HEAD >> $resdir/$ds/testid.txt
-	if ! git diff HEAD > $T/git-diff 2>&1
-	then
-		cp $T/git-diff $resdir/$ds
-	fi
+	git diff HEAD >> $resdir/$ds/testid.txt
 fi
 ___EOF___
 awk < $T/cfgcpu.pack \
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index a3a1a05a2b5c..6a0b9f69faad 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -9,6 +9,8 @@ TREE08
 TREE09
 SRCU-N
 SRCU-P
+SRCU-t
+SRCU-u
 TINY01
 TINY02
 TASKS01
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot
new file mode 100644
index 000000000000..84a7d51b7481
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcud
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
index 1a087c3c8bb8..2da8b49589a0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
@@ -5,4 +5,4 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
-CONFIG_RCU_EXPERT=y
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
index 4837430a71c0..ab7ccd38232b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
@@ -2,7 +2,11 @@ CONFIG_RCU_TRACE=n
 CONFIG_SMP=y
 CONFIG_NR_CPUS=8
 CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
new file mode 100644
index 000000000000..6c78022c8cd8
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
@@ -0,0 +1,10 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_SRCU=y
+CONFIG_RCU_TRACE=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+#CHECK#CONFIG_PREEMPT_COUNT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot
new file mode 100644
index 000000000000..238bfe3bd0cc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcu
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
new file mode 100644
index 000000000000..6bc24e99862f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
@@ -0,0 +1,9 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_SRCU=y
+CONFIG_RCU_TRACE=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_PREEMPT_COUNT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
new file mode 100644
index 000000000000..84a7d51b7481
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcud
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
index a59f7686e219..d8674264318d 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
@@ -6,10 +6,9 @@ CONFIG_PREEMPT=n
 CONFIG_HZ_PERIODIC=y
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_TRACE=y
 CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU_REPEATEDLY=y
 #CHECK#CONFIG_PROVE_RCU=y
 CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
-CONFIG_PREEMPT_COUNT=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
index 359cb258f639..b5b53973c01e 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -10,12 +10,9 @@ CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_RCU_TRACE=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_MAXSMP=y
+CONFIG_CPUMASK_OFFSTACK=y
 CONFIG_RCU_NOCB_CPU=y
-CONFIG_RCU_NOCB_CPU_ZERO=y
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
index adc3abc82fb8..1d14e1383016 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -1 +1,5 @@
 rcutorture.torture_type=rcu_bh maxcpus=8
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
+rcu_nocbs=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
index c1ab5926568b..35e639e39366 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -18,9 +18,6 @@ CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=n
 CONFIG_RCU_BOOST=n
-CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
+CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
index 3b93ee544e70..2dc31b16e506 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
@@ -14,9 +14,5 @@ CONFIG_RCU_FANOUT_LEAF=2
 CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_RCU_BOOST=y
-CONFIG_RCU_KTHREAD_PRIO=2
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
index 120c0c88d100..5d2cc0bd50a0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -1 +1,5 @@
 rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
+rcutree.kthread_prio=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index 5af758e783c7..27d22695d64c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -15,11 +15,7 @@ CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n
 CONFIG_RCU_FANOUT=4
 CONFIG_RCU_FANOUT_LEAF=3
-CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
 CONFIG_RCU_EQS_DEBUG=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
index d4cdc0d74e16..2dde0d9964e3 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -13,12 +13,8 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_RCU_FANOUT=6
 CONFIG_RCU_FANOUT_LEAF=6
 CONFIG_RCU_NOCB_CPU=y
-CONFIG_RCU_NOCB_CPU_NONE=y
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
index 15b3e1a86f74..c7fd050dfcd9 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
@@ -1,2 +1,5 @@
 rcutorture.torture_type=sched
 rcupdate.rcu_self_test_sched=1
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
index 4cb02bd28f08..05a4eec3f27b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
@@ -18,8 +18,6 @@ CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
+CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
 CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
index dd90f28ed700..ad18b52a2cad 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
@@ -2,3 +2,6 @@ rcupdate.rcu_self_test=1
 rcupdate.rcu_self_test_bh=1
 rcupdate.rcu_self_test_sched=1
 rcutree.rcu_fanout_exact=1
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index b12a3ea1867e..0f4759f4232e 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -1,6 +1,5 @@
 CONFIG_SMP=y
 CONFIG_NR_CPUS=16
-CONFIG_CPUMASK_OFFSTACK=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
@@ -9,16 +8,11 @@ CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
 CONFIG_NO_HZ_FULL_ALL=n
-CONFIG_NO_HZ_FULL_SYSIDLE=y
 CONFIG_RCU_FAST_NO_HZ=n
 CONFIG_RCU_TRACE=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_RCU_FANOUT=2
 CONFIG_RCU_FANOUT_LEAF=2
-CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
index 099cc63c6a3b..fb1c763c10c5 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -15,7 +15,6 @@ CONFIG_HIBERNATION=n
 CONFIG_RCU_FANOUT=3
 CONFIG_RCU_FANOUT_LEAF=2
 CONFIG_RCU_NOCB_CPU=y
-CONFIG_RCU_NOCB_CPU_ALL=y
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_PROVE_LOCKING=n
 CONFIG_RCU_BOOST=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
deleted file mode 100644
index 2ad13f0d29cc..000000000000
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
+++ /dev/null
@@ -1,21 +0,0 @@
-CONFIG_SMP=y
-CONFIG_NR_CPUS=16
-CONFIG_PREEMPT_NONE=n
-CONFIG_PREEMPT_VOLUNTARY=n
-CONFIG_PREEMPT=y
-#CHECK#CONFIG_PREEMPT_RCU=y
-CONFIG_HZ_PERIODIC=n
-CONFIG_NO_HZ_IDLE=y
-CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_RCU_TRACE=y
-CONFIG_HOTPLUG_CPU=n
-CONFIG_SUSPEND=n
-CONFIG_HIBERNATION=n
-CONFIG_RCU_FANOUT=3
-CONFIG_RCU_FANOUT_LEAF=2
-CONFIG_RCU_NOCB_CPU=y
-CONFIG_RCU_NOCB_CPU_ALL=y
-CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_BOOST=n
-CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
index fb066dc82769..1bd8efc4141e 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
@@ -2,3 +2,4 @@ rcutorture.torture_type=sched
 rcupdate.rcu_self_test=1
 rcupdate.rcu_self_test_sched=1
 rcutree.rcu_fanout_exact=1
+rcu_nocbs=0-7
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
index 917d2517b5b5..fb05ef5279b4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
@@ -1,21 +1,16 @@
-CONFIG_SMP=y
-CONFIG_NR_CPUS=8
-CONFIG_PREEMPT_NONE=n
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
-CONFIG_PREEMPT=y
-#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
 CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_RCU_TRACE=y
-CONFIG_HOTPLUG_CPU=n
-CONFIG_SUSPEND=n
-CONFIG_HIBERNATION=n
-CONFIG_RCU_FANOUT=3
-CONFIG_RCU_FANOUT_LEAF=3
 CONFIG_RCU_NOCB_CPU=n
-CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_PROVE_LOCKING=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
index a312f671a29a..721cfda76ab2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
@@ -7,7 +7,6 @@ CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
 CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_RCU_TRACE=n
 CONFIG_HOTPLUG_CPU=n
 CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
index 985fb170d13c..7629f5dd73b2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
@@ -8,7 +8,6 @@ CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
 CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_RCU_TRACE=n
 CONFIG_HOTPLUG_CPU=n
 CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n
diff --git a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
index 24396ae8355b..a75b16991a92 100644
--- a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
+++ b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
@@ -18,7 +18,6 @@ CONFIG_PROVE_RCU
 
 	In common code tested by TREE_RCU test cases.
 
-CONFIG_NO_HZ_FULL_SYSIDLE
 CONFIG_RCU_NOCB_CPU
 
 	Meaningless for TINY_RCU.
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index 364801b1a230..9ad3f89c8dc7 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -9,28 +9,20 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD -- Do one.
 CONFIG_HOTPLUG_CPU -- Do half.  (Every second.)
 CONFIG_HZ_PERIODIC -- Do one.
 CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.)
-CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE.
-CONFIG_NO_HZ_FULL_SYSIDLE -- Do one.
+CONFIG_NO_HZ_FULL -- Do two, one with partial CPU enablement.
 CONFIG_PREEMPT -- Do half.  (First three and #8.)
 CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
 CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
-CONFIG_PROVE_RCU_REPEATEDLY -- Do one.
 CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
-CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing.
 CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
 CONFIG_RCU_FANOUT_LEAF -- Do one non-default.
-CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL.
-CONFIG_RCU_NOCB_CPU -- Do three, see below.
-CONFIG_RCU_NOCB_CPU_ALL -- Do one.
-CONFIG_RCU_NOCB_CPU_NONE -- Do one.
-CONFIG_RCU_NOCB_CPU_ZERO -- Do one.
+CONFIG_RCU_FAST_NO_HZ -- Do one, but not with all nohz_full CPUs.
+CONFIG_RCU_NOCB_CPU -- Do three, one with no rcu_nocbs CPUs, one with
+	rcu_nocbs=0, and one with all rcu_nocbs CPUs.
 CONFIG_RCU_TRACE -- Do half.
 CONFIG_SMP -- Need one !SMP for PREEMPT_RCU.
 CONFIG_RCU_EXPERT=n -- Do a few, but these have to be vanilla configurations.
 CONFIG_RCU_EQS_DEBUG -- Do at least one for CONFIG_NO_HZ_FULL and not.
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP -- Do for all but a couple TREE scenarios.
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT -- Do for all but a couple TREE scenarios.
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT -- Do for all but a couple TREE scenarios.
 
 RCU-bh: Do one with PREEMPT and one with !PREEMPT.
 RCU-sched: Do one with PREEMPT but not BOOST.
@@ -52,10 +44,6 @@ CONFIG_64BIT
 
 	Used only to check CONFIG_RCU_FANOUT value, inspection suffices.
 
-CONFIG_NO_HZ_FULL_SYSIDLE_SMALL
-
-	Defer until Frederic uses this.
-
 CONFIG_PREEMPT_COUNT
 CONFIG_PREEMPT_RCU
 
@@ -78,30 +66,16 @@ CONFIG_RCU_TORTURE_TEST_RUNNABLE
 
 	Always used in KVM testing.
 
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY
-
-	Inspection suffices, ignore.
-
 CONFIG_PREEMPT_RCU
 CONFIG_TREE_RCU
 CONFIG_TINY_RCU
 
 	These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP.
 
-CONFIG_SPARSE_RCU_POINTER
-
-	Makes sense only for sparse runs, not for kernel builds.
-
 CONFIG_SRCU
 CONFIG_TASKS_RCU
 
 	Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable.
 
-CONFIG_RCU_TRACE
-
-	Implied by CONFIG_RCU_TRACE for Tree RCU.
-
 
 boot parameters ignored: TBD
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
index 8ff89043d0a9..c9e8bc5082a7 100755
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
@@ -1,4 +1,4 @@
-#!/bin/awk -f
+#!/usr/bin/awk -f
 
 # Modify SRCU for formal verification. The first argument should be srcu.h and
 # the second should be srcu.c. Outputs modified srcu.h and srcu.c into the
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 9377c8b4ac16..d8f534025b7f 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -57,6 +57,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
 				      bool weak_barriers,
+				      bool ctx,
 				      void *pages,
 				      bool (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
index f31353fac541..453ca3c21193 100644
--- a/tools/virtio/ringtest/main.c
+++ b/tools/virtio/ringtest/main.c
@@ -20,6 +20,7 @@
 int runcycles = 10000000;
 int max_outstanding = INT_MAX;
 int batch = 1;
+int param = 0;
 
 bool do_sleep = false;
 bool do_relax = false;
@@ -86,7 +87,7 @@ void set_affinity(const char *arg)
 	cpu = strtol(arg, &endptr, 0);
 	assert(!*endptr);
 
-	assert(cpu >= 0 || cpu < CPU_SETSIZE);
+	assert(cpu >= 0 && cpu < CPU_SETSIZE);
 
 	self = pthread_self();
 	CPU_ZERO(&cpuset);
@@ -247,6 +248,11 @@ static const struct option longopts[] = {
 		.val = 'b',
 	},
 	{
+		.name = "param",
+		.has_arg = required_argument,
+		.val = 'p',
+	},
+	{
 		.name = "sleep",
 		.has_arg = no_argument,
 		.val = 's',
@@ -274,6 +280,7 @@ static void help(void)
 		" [--run-cycles C (default: %d)]"
 		" [--batch b]"
 		" [--outstanding o]"
+		" [--param p]"
 		" [--sleep]"
 		" [--relax]"
 		" [--exit]"
@@ -328,6 +335,12 @@ int main(int argc, char **argv)
 			assert(c > 0 && c < INT_MAX);
 			max_outstanding = c;
 			break;
+		case 'p':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			param = c;
+			break;
 		case 'b':
 			c = strtol(optarg, &endptr, 0);
 			assert(!*endptr);
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
index 14142faf040b..90b0133004e1 100644
--- a/tools/virtio/ringtest/main.h
+++ b/tools/virtio/ringtest/main.h
@@ -10,6 +10,8 @@
 
 #include <stdbool.h>
 
+extern int param;
+
 extern bool do_exit;
 
 #if defined(__x86_64__) || defined(__i386__)
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
index 635b07b4fdd3..7b22f1b20652 100644
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -97,6 +97,9 @@ void alloc_ring(void)
 {
 	int ret = ptr_ring_init(&array, ring_size, 0);
 	assert(!ret);
+	/* Hacky way to poke at ring internals. Useful for testing though. */
+	if (param)
+		array.batch = param;
 }
 
 /* guest side */
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index e0445898f08f..0fecaec90d0d 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -100,7 +100,7 @@ static void vq_info_add(struct vdev_info *dev, int num)
 	vring_init(&info->vring, num, info->ring, 4096);
 	info->vq = vring_new_virtqueue(info->idx,
 				       info->vring.num, 4096, &dev->vdev,
-				       true, info->ring,
+				       true, false, info->ring,
 				       vq_notify, vq_callback, "test");
 	assert(info->vq);
 	info->vq->priv = info;
@@ -202,7 +202,7 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq,
 	test = 0;
 	r = ioctl(dev->control, VHOST_TEST_RUN, &test);
 	assert(r >= 0);
-	fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious);
+	fprintf(stderr, "spurious wakeups: 0x%llx\n", spurious);
 }
 
 const char optstring[] = "h";
diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c
index 5f94f5105678..9476c616d064 100644
--- a/tools/virtio/vringh_test.c
+++ b/tools/virtio/vringh_test.c
@@ -314,7 +314,8 @@ static int parallel_test(u64 features,
 			err(1, "Could not set affinity to cpu %u", first_cpu);
 
 		vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true,
-					 guest_map, fast_vringh ? no_notify_host
+					 false, guest_map,
+					 fast_vringh ? no_notify_host
 					 : parallel_notify_host,
 					 never_callback_guest, "guest vq");
 
@@ -479,7 +480,7 @@ int main(int argc, char *argv[])
 	memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN));
 
 	/* Set up guest side. */
-	vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
+	vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, false,
 				 __user_addr_min,
 				 never_notify_host, never_callback_guest,
 				 "guest vq");
@@ -663,7 +664,7 @@ int main(int argc, char *argv[])
 		/* Force creation of direct, which we modify. */
 		__virtio_clear_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
 		vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
-					 __user_addr_min,
+					 false, __user_addr_min,
 					 never_notify_host,
 					 never_callback_guest,
 					 "guest vq");
diff --git a/usr/Kconfig b/usr/Kconfig
index c0c48507e44e..ad0543e21760 100644
--- a/usr/Kconfig
+++ b/usr/Kconfig
@@ -220,6 +220,7 @@ config INITRAMFS_COMPRESSION_LZ4
 endchoice
 
 config INITRAMFS_COMPRESSION
+	depends on INITRAMFS_SOURCE!=""
 	string
 	default ""      if INITRAMFS_COMPRESSION_NONE
 	default ".gz"   if INITRAMFS_COMPRESSION_GZIP
diff --git a/arch/arm/kvm/arm.c b/virt/kvm/arm/arm.c
index 8a31906bdc9b..3417e184c8e1 100644
--- a/arch/arm/kvm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -332,7 +332,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 	kvm_arm_reset_debug_ptr(vcpu);
 
-	return 0;
+	return kvm_vgic_vcpu_init(vcpu);
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index bce6037cf01d..87940364570b 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -22,7 +22,7 @@
 #include <asm/kvm_hyp.h>
 
 #define vtr_to_max_lr_idx(v)		((v) & 0xf)
-#define vtr_to_nr_pri_bits(v)		(((u32)(v) >> 29) + 1)
+#define vtr_to_nr_pre_bits(v)		((((u32)(v) >> 26) & 7) + 1)
 
 static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
 {
@@ -135,13 +135,13 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 
 	if (used_lrs) {
 		int i;
-		u32 nr_pri_bits;
+		u32 nr_pre_bits;
 
 		cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
 
 		write_gicreg(0, ICH_HCR_EL2);
 		val = read_gicreg(ICH_VTR_EL2);
-		nr_pri_bits = vtr_to_nr_pri_bits(val);
+		nr_pre_bits = vtr_to_nr_pre_bits(val);
 
 		for (i = 0; i < used_lrs; i++) {
 			if (cpu_if->vgic_elrsr & (1 << i))
@@ -152,7 +152,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 			__gic_v3_set_lr(0, i);
 		}
 
-		switch (nr_pri_bits) {
+		switch (nr_pre_bits) {
 		case 7:
 			cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
 			cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
@@ -162,7 +162,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 			cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
 		}
 
-		switch (nr_pri_bits) {
+		switch (nr_pre_bits) {
 		case 7:
 			cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
 			cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
@@ -198,7 +198,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
 	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
 	u64 val;
-	u32 nr_pri_bits;
+	u32 nr_pre_bits;
 	int i;
 
 	/*
@@ -217,12 +217,12 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 	}
 
 	val = read_gicreg(ICH_VTR_EL2);
-	nr_pri_bits = vtr_to_nr_pri_bits(val);
+	nr_pre_bits = vtr_to_nr_pre_bits(val);
 
 	if (used_lrs) {
 		write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
 
-		switch (nr_pri_bits) {
+		switch (nr_pre_bits) {
 		case 7:
 			write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
 			write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
@@ -232,7 +232,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 			write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
 		}
 
-		switch (nr_pri_bits) {
+		switch (nr_pre_bits) {
 		case 7:
 			write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
 			write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
diff --git a/arch/arm/kvm/mmio.c b/virt/kvm/arm/mmio.c
index b6e715fd3c90..b6e715fd3c90 100644
--- a/arch/arm/kvm/mmio.c
+++ b/virt/kvm/arm/mmio.c
diff --git a/arch/arm/kvm/mmu.c b/virt/kvm/arm/mmu.c
index 313ee646480f..e2e5effba2a9 100644
--- a/arch/arm/kvm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -295,6 +295,13 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	assert_spin_locked(&kvm->mmu_lock);
 	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
 	do {
+		/*
+		 * Make sure the page table is still active, as another thread
+		 * could have possibly freed the page table, while we released
+		 * the lock.
+		 */
+		if (!READ_ONCE(kvm->arch.pgd))
+			break;
 		next = stage2_pgd_addr_end(addr, end);
 		if (!stage2_pgd_none(*pgd))
 			unmap_stage2_puds(kvm, pgd, addr, next);
@@ -829,22 +836,22 @@ void stage2_unmap_vm(struct kvm *kvm)
  * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
  * underlying level-2 and level-3 tables before freeing the actual level-1 table
  * and setting the struct pointer to NULL.
- *
- * Note we don't need locking here as this is only called when the VM is
- * destroyed, which can only be done once.
  */
 void kvm_free_stage2_pgd(struct kvm *kvm)
 {
-	if (kvm->arch.pgd == NULL)
-		return;
+	void *pgd = NULL;
 
 	spin_lock(&kvm->mmu_lock);
-	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
+	if (kvm->arch.pgd) {
+		unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
+		pgd = READ_ONCE(kvm->arch.pgd);
+		kvm->arch.pgd = NULL;
+	}
 	spin_unlock(&kvm->mmu_lock);
 
 	/* Free the HW pgd, one page at a time */
-	free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
-	kvm->arch.pgd = NULL;
+	if (pgd)
+		free_pages_exact(pgd, S2_PGD_SIZE);
 }
 
 static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
@@ -872,6 +879,9 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
 	pmd_t *pmd;
 
 	pud = stage2_get_pud(kvm, cache, addr);
+	if (!pud)
+		return NULL;
+
 	if (stage2_pud_none(*pud)) {
 		if (!cache)
 			return NULL;
@@ -1170,11 +1180,13 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 		 * large. Otherwise, we may see kernel panics with
 		 * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
 		 * CONFIG_LOCKDEP. Additionally, holding the lock too long
-		 * will also starve other vCPUs.
+		 * will also starve other vCPUs. We have to also make sure
+		 * that the page tables are not freed while we released
+		 * the lock.
 		 */
-		if (need_resched() || spin_needbreak(&kvm->mmu_lock))
-			cond_resched_lock(&kvm->mmu_lock);
-
+		cond_resched_lock(&kvm->mmu_lock);
+		if (!READ_ONCE(kvm->arch.pgd))
+			break;
 		next = stage2_pgd_addr_end(addr, end);
 		if (stage2_pgd_present(*pgd))
 			stage2_wp_puds(pgd, addr, next);
diff --git a/arch/arm/kvm/perf.c b/virt/kvm/arm/perf.c
index 1a3849da0b4b..1a3849da0b4b 100644
--- a/arch/arm/kvm/perf.c
+++ b/virt/kvm/arm/perf.c
diff --git a/arch/arm/kvm/psci.c b/virt/kvm/arm/psci.c
index a08d7a93aebb..a08d7a93aebb 100644
--- a/arch/arm/kvm/psci.c
+++ b/virt/kvm/arm/psci.c
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
index 37d8b98867d5..f7dc5ddd6847 100644
--- a/virt/kvm/arm/trace.h
+++ b/virt/kvm/arm/trace.h
@@ -7,26 +7,250 @@
 #define TRACE_SYSTEM kvm
 
 /*
- * Tracepoints for vgic
+ * Tracepoints for entry/exit to guest
  */
-TRACE_EVENT(vgic_update_irq_pending,
-	TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level),
-	TP_ARGS(vcpu_id, irq, level),
+TRACE_EVENT(kvm_entry,
+	TP_PROTO(unsigned long vcpu_pc),
+	TP_ARGS(vcpu_pc),
 
 	TP_STRUCT__entry(
-		__field(	unsigned long,	vcpu_id	)
-		__field(	__u32,		irq	)
-		__field(	bool,		level	)
+		__field(	unsigned long,	vcpu_pc		)
 	),
 
 	TP_fast_assign(
-		__entry->vcpu_id	= vcpu_id;
-		__entry->irq		= irq;
+		__entry->vcpu_pc		= vcpu_pc;
+	),
+
+	TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
+);
+
+TRACE_EVENT(kvm_exit,
+	TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc),
+	TP_ARGS(idx, exit_reason, vcpu_pc),
+
+	TP_STRUCT__entry(
+		__field(	int,		idx		)
+		__field(	unsigned int,	exit_reason	)
+		__field(	unsigned long,	vcpu_pc		)
+	),
+
+	TP_fast_assign(
+		__entry->idx			= idx;
+		__entry->exit_reason		= exit_reason;
+		__entry->vcpu_pc		= vcpu_pc;
+	),
+
+	TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
+		  __print_symbolic(__entry->idx, kvm_arm_exception_type),
+		  __entry->exit_reason,
+		  __print_symbolic(__entry->exit_reason, kvm_arm_exception_class),
+		  __entry->vcpu_pc)
+);
+
+TRACE_EVENT(kvm_guest_fault,
+	TP_PROTO(unsigned long vcpu_pc, unsigned long hsr,
+		 unsigned long hxfar,
+		 unsigned long long ipa),
+	TP_ARGS(vcpu_pc, hsr, hxfar, ipa),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	vcpu_pc		)
+		__field(	unsigned long,	hsr		)
+		__field(	unsigned long,	hxfar		)
+		__field(   unsigned long long,	ipa		)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_pc		= vcpu_pc;
+		__entry->hsr			= hsr;
+		__entry->hxfar			= hxfar;
+		__entry->ipa			= ipa;
+	),
+
+	TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
+		  __entry->ipa, __entry->hsr,
+		  __entry->hxfar, __entry->vcpu_pc)
+);
+
+TRACE_EVENT(kvm_access_fault,
+	TP_PROTO(unsigned long ipa),
+	TP_ARGS(ipa),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	ipa		)
+	),
+
+	TP_fast_assign(
+		__entry->ipa		= ipa;
+	),
+
+	TP_printk("IPA: %lx", __entry->ipa)
+);
+
+TRACE_EVENT(kvm_irq_line,
+	TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
+	TP_ARGS(type, vcpu_idx, irq_num, level),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	type		)
+		__field(	int,		vcpu_idx	)
+		__field(	int,		irq_num		)
+		__field(	int,		level		)
+	),
+
+	TP_fast_assign(
+		__entry->type		= type;
+		__entry->vcpu_idx	= vcpu_idx;
+		__entry->irq_num	= irq_num;
 		__entry->level		= level;
 	),
 
-	TP_printk("VCPU: %ld, IRQ %d, level: %d",
-		  __entry->vcpu_id, __entry->irq, __entry->level)
+	TP_printk("Inject %s interrupt (%d), vcpu->idx: %d, num: %d, level: %d",
+		  (__entry->type == KVM_ARM_IRQ_TYPE_CPU) ? "CPU" :
+		  (__entry->type == KVM_ARM_IRQ_TYPE_PPI) ? "VGIC PPI" :
+		  (__entry->type == KVM_ARM_IRQ_TYPE_SPI) ? "VGIC SPI" : "UNKNOWN",
+		  __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level)
+);
+
+TRACE_EVENT(kvm_mmio_emulate,
+	TP_PROTO(unsigned long vcpu_pc, unsigned long instr,
+		 unsigned long cpsr),
+	TP_ARGS(vcpu_pc, instr, cpsr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	vcpu_pc		)
+		__field(	unsigned long,	instr		)
+		__field(	unsigned long,	cpsr		)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_pc		= vcpu_pc;
+		__entry->instr			= instr;
+		__entry->cpsr			= cpsr;
+	),
+
+	TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)",
+		  __entry->vcpu_pc, __entry->instr, __entry->cpsr)
+);
+
+TRACE_EVENT(kvm_unmap_hva,
+	TP_PROTO(unsigned long hva),
+	TP_ARGS(hva),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	hva		)
+	),
+
+	TP_fast_assign(
+		__entry->hva		= hva;
+	),
+
+	TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva)
+);
+
+TRACE_EVENT(kvm_unmap_hva_range,
+	TP_PROTO(unsigned long start, unsigned long end),
+	TP_ARGS(start, end),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	start		)
+		__field(	unsigned long,	end		)
+	),
+
+	TP_fast_assign(
+		__entry->start		= start;
+		__entry->end		= end;
+	),
+
+	TP_printk("mmu notifier unmap range: %#08lx -- %#08lx",
+		  __entry->start, __entry->end)
+);
+
+TRACE_EVENT(kvm_set_spte_hva,
+	TP_PROTO(unsigned long hva),
+	TP_ARGS(hva),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	hva		)
+	),
+
+	TP_fast_assign(
+		__entry->hva		= hva;
+	),
+
+	TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
+);
+
+TRACE_EVENT(kvm_age_hva,
+	TP_PROTO(unsigned long start, unsigned long end),
+	TP_ARGS(start, end),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	start		)
+		__field(	unsigned long,	end		)
+	),
+
+	TP_fast_assign(
+		__entry->start		= start;
+		__entry->end		= end;
+	),
+
+	TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
+		  __entry->start, __entry->end)
+);
+
+TRACE_EVENT(kvm_test_age_hva,
+	TP_PROTO(unsigned long hva),
+	TP_ARGS(hva),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	hva		)
+	),
+
+	TP_fast_assign(
+		__entry->hva		= hva;
+	),
+
+	TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
+);
+
+TRACE_EVENT(kvm_set_way_flush,
+	    TP_PROTO(unsigned long vcpu_pc, bool cache),
+	    TP_ARGS(vcpu_pc, cache),
+
+	    TP_STRUCT__entry(
+		    __field(	unsigned long,	vcpu_pc		)
+		    __field(	bool,		cache		)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->vcpu_pc		= vcpu_pc;
+		    __entry->cache		= cache;
+	    ),
+
+	    TP_printk("S/W flush at 0x%016lx (cache %s)",
+		      __entry->vcpu_pc, __entry->cache ? "on" : "off")
+);
+
+TRACE_EVENT(kvm_toggle_cache,
+	    TP_PROTO(unsigned long vcpu_pc, bool was, bool now),
+	    TP_ARGS(vcpu_pc, was, now),
+
+	    TP_STRUCT__entry(
+		    __field(	unsigned long,	vcpu_pc		)
+		    __field(	bool,		was		)
+		    __field(	bool,		now		)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->vcpu_pc		= vcpu_pc;
+		    __entry->was		= was;
+		    __entry->now		= now;
+	    ),
+
+	    TP_printk("VM op at 0x%016lx (cache was %s, now %s)",
+		      __entry->vcpu_pc, __entry->was ? "on" : "off",
+		      __entry->now ? "on" : "off")
 );
 
 /*
diff --git a/virt/kvm/arm/vgic/trace.h b/virt/kvm/arm/vgic/trace.h
new file mode 100644
index 000000000000..ed3229282888
--- /dev/null
+++ b/virt/kvm/arm/vgic/trace.h
@@ -0,0 +1,37 @@
+#if !defined(_TRACE_VGIC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_VGIC_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+TRACE_EVENT(vgic_update_irq_pending,
+	TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level),
+	TP_ARGS(vcpu_id, irq, level),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	vcpu_id	)
+		__field(	__u32,		irq	)
+		__field(	bool,		level	)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	= vcpu_id;
+		__entry->irq		= irq;
+		__entry->level		= level;
+	),
+
+	TP_printk("VCPU: %ld, IRQ %d, level: %d",
+		  __entry->vcpu_id, __entry->irq, __entry->level)
+);
+
+#endif /* _TRACE_VGIC_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm/vgic
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 25fd1b942c11..3a0b8999f011 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -227,10 +227,30 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
 }
 
 /**
- * kvm_vgic_vcpu_init() - Enable the VCPU interface
- * @vcpu: the VCPU which's VGIC should be enabled
+ * kvm_vgic_vcpu_init() - Register VCPU-specific KVM iodevs
+ * @vcpu: pointer to the VCPU being created and initialized
  */
-static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	int ret = 0;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return 0;
+
+	/*
+	 * If we are creating a VCPU with a GICv3 we must also register the
+	 * KVM io device for the redistributor that belongs to this VCPU.
+	 */
+	if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+		mutex_lock(&vcpu->kvm->lock);
+		ret = vgic_register_redist_iodev(vcpu);
+		mutex_unlock(&vcpu->kvm->lock);
+	}
+	return ret;
+}
+
+static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
 {
 	if (kvm_vgic_global_state.type == VGIC_V2)
 		vgic_v2_enable(vcpu);
@@ -269,7 +289,7 @@ int vgic_init(struct kvm *kvm)
 		dist->msis_require_devid = true;
 
 	kvm_for_each_vcpu(i, vcpu, kvm)
-		kvm_vgic_vcpu_init(vcpu);
+		kvm_vgic_vcpu_enable(vcpu);
 
 	ret = kvm_vgic_setup_default_irq_routing(kvm);
 	if (ret)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 8d1da1af4b09..2dff288b3a66 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -23,6 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/uaccess.h>
+#include <linux/list_sort.h>
 
 #include <linux/irqchip/arm-gic-v3.h>
 
@@ -33,6 +34,12 @@
 #include "vgic.h"
 #include "vgic-mmio.h"
 
+static int vgic_its_save_tables_v0(struct vgic_its *its);
+static int vgic_its_restore_tables_v0(struct vgic_its *its);
+static int vgic_its_commit_v0(struct vgic_its *its);
+static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
+			     struct kvm_vcpu *filter_vcpu);
+
 /*
  * Creates a new (reference to a) struct vgic_irq for a given LPI.
  * If this LPI is already mapped on another ITS, we increase its refcount
@@ -40,10 +47,12 @@
  * If this is a "new" LPI, we allocate and initialize a new struct vgic_irq.
  * This function returns a pointer to the _unlocked_ structure.
  */
-static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid)
+static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
+				     struct kvm_vcpu *vcpu)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
 	struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq;
+	int ret;
 
 	/* In this case there is no put, since we keep the reference. */
 	if (irq)
@@ -60,6 +69,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid)
 	irq->config = VGIC_CONFIG_EDGE;
 	kref_init(&irq->refcount);
 	irq->intid = intid;
+	irq->target_vcpu = vcpu;
 
 	spin_lock(&dist->lpi_list_lock);
 
@@ -91,6 +101,19 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid)
 out_unlock:
 	spin_unlock(&dist->lpi_list_lock);
 
+	/*
+	 * We "cache" the configuration table entries in our struct vgic_irq's.
+	 * However we only have those structs for mapped IRQs, so we read in
+	 * the respective config data from memory here upon mapping the LPI.
+	 */
+	ret = update_lpi_config(kvm, irq, NULL);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ret = vgic_v3_lpi_sync_pending_status(kvm, irq);
+	if (ret)
+		return ERR_PTR(ret);
+
 	return irq;
 }
 
@@ -99,6 +122,8 @@ struct its_device {
 
 	/* the head for the list of ITTEs */
 	struct list_head itt_head;
+	u32 num_eventid_bits;
+	gpa_t itt_addr;
 	u32 device_id;
 };
 
@@ -114,8 +139,8 @@ struct its_collection {
 #define its_is_collection_mapped(coll) ((coll) && \
 				((coll)->target_addr != COLLECTION_NOT_MAPPED))
 
-struct its_itte {
-	struct list_head itte_list;
+struct its_ite {
+	struct list_head ite_list;
 
 	struct vgic_irq *irq;
 	struct its_collection *collection;
@@ -123,6 +148,50 @@ struct its_itte {
 	u32 event_id;
 };
 
+/**
+ * struct vgic_its_abi - ITS abi ops and settings
+ * @cte_esz: collection table entry size
+ * @dte_esz: device table entry size
+ * @ite_esz: interrupt translation table entry size
+ * @save tables: save the ITS tables into guest RAM
+ * @restore_tables: restore the ITS internal structs from tables
+ *  stored in guest RAM
+ * @commit: initialize the registers which expose the ABI settings,
+ *  especially the entry sizes
+ */
+struct vgic_its_abi {
+	int cte_esz;
+	int dte_esz;
+	int ite_esz;
+	int (*save_tables)(struct vgic_its *its);
+	int (*restore_tables)(struct vgic_its *its);
+	int (*commit)(struct vgic_its *its);
+};
+
+static const struct vgic_its_abi its_table_abi_versions[] = {
+	[0] = {.cte_esz = 8, .dte_esz = 8, .ite_esz = 8,
+	 .save_tables = vgic_its_save_tables_v0,
+	 .restore_tables = vgic_its_restore_tables_v0,
+	 .commit = vgic_its_commit_v0,
+	},
+};
+
+#define NR_ITS_ABIS	ARRAY_SIZE(its_table_abi_versions)
+
+inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its)
+{
+	return &its_table_abi_versions[its->abi_rev];
+}
+
+int vgic_its_set_abi(struct vgic_its *its, int rev)
+{
+	const struct vgic_its_abi *abi;
+
+	its->abi_rev = rev;
+	abi = vgic_its_get_abi(its);
+	return abi->commit(its);
+}
+
 /*
  * Find and returns a device in the device table for an ITS.
  * Must be called with the its_lock mutex held.
@@ -143,27 +212,27 @@ static struct its_device *find_its_device(struct vgic_its *its, u32 device_id)
  * Device ID/Event ID pair on an ITS.
  * Must be called with the its_lock mutex held.
  */
-static struct its_itte *find_itte(struct vgic_its *its, u32 device_id,
+static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
 				  u32 event_id)
 {
 	struct its_device *device;
-	struct its_itte *itte;
+	struct its_ite *ite;
 
 	device = find_its_device(its, device_id);
 	if (device == NULL)
 		return NULL;
 
-	list_for_each_entry(itte, &device->itt_head, itte_list)
-		if (itte->event_id == event_id)
-			return itte;
+	list_for_each_entry(ite, &device->itt_head, ite_list)
+		if (ite->event_id == event_id)
+			return ite;
 
 	return NULL;
 }
 
 /* To be used as an iterator this macro misses the enclosing parentheses */
-#define for_each_lpi_its(dev, itte, its) \
+#define for_each_lpi_its(dev, ite, its) \
 	list_for_each_entry(dev, &(its)->device_list, dev_list) \
-		list_for_each_entry(itte, &(dev)->itt_head, itte_list)
+		list_for_each_entry(ite, &(dev)->itt_head, ite_list)
 
 /*
  * We only implement 48 bits of PA at the moment, although the ITS
@@ -171,11 +240,14 @@ static struct its_itte *find_itte(struct vgic_its *its, u32 device_id,
  */
 #define BASER_ADDRESS(x)	((x) & GENMASK_ULL(47, 16))
 #define CBASER_ADDRESS(x)	((x) & GENMASK_ULL(47, 12))
-#define PENDBASER_ADDRESS(x)	((x) & GENMASK_ULL(47, 16))
-#define PROPBASER_ADDRESS(x)	((x) & GENMASK_ULL(47, 12))
 
 #define GIC_LPI_OFFSET 8192
 
+#define VITS_TYPER_IDBITS 16
+#define VITS_TYPER_DEVBITS 16
+#define VITS_DTE_MAX_DEVID_OFFSET	(BIT(14) - 1)
+#define VITS_ITE_MAX_EVENTID_OFFSET	(BIT(16) - 1)
+
 /*
  * Finds and returns a collection in the ITS collection table.
  * Must be called with the its_lock mutex held.
@@ -204,7 +276,7 @@ static struct its_collection *find_collection(struct vgic_its *its, int coll_id)
 static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
 			     struct kvm_vcpu *filter_vcpu)
 {
-	u64 propbase = PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
+	u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
 	u8 prop;
 	int ret;
 
@@ -229,13 +301,13 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
 }
 
 /*
- * Create a snapshot of the current LPI list, so that we can enumerate all
- * LPIs without holding any lock.
- * Returns the array length and puts the kmalloc'ed array into intid_ptr.
+ * Create a snapshot of the current LPIs targeting @vcpu, so that we can
+ * enumerate those LPIs without holding any lock.
+ * Returns their number and puts the kmalloc'ed array into intid_ptr.
  */
-static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr)
+static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
 {
-	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	struct vgic_irq *irq;
 	u32 *intids;
 	int irq_count = dist->lpi_list_count, i = 0;
@@ -254,14 +326,14 @@ static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr)
 	spin_lock(&dist->lpi_list_lock);
 	list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
 		/* We don't need to "get" the IRQ, as we hold the list lock. */
-		intids[i] = irq->intid;
-		if (++i == irq_count)
-			break;
+		if (irq->target_vcpu != vcpu)
+			continue;
+		intids[i++] = irq->intid;
 	}
 	spin_unlock(&dist->lpi_list_lock);
 
 	*intid_ptr = intids;
-	return irq_count;
+	return i;
 }
 
 /*
@@ -270,18 +342,18 @@ static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr)
  * Needs to be called whenever either the collection for a LPIs has
  * changed or the collection itself got retargeted.
  */
-static void update_affinity_itte(struct kvm *kvm, struct its_itte *itte)
+static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite)
 {
 	struct kvm_vcpu *vcpu;
 
-	if (!its_is_collection_mapped(itte->collection))
+	if (!its_is_collection_mapped(ite->collection))
 		return;
 
-	vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
+	vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
 
-	spin_lock(&itte->irq->irq_lock);
-	itte->irq->target_vcpu = vcpu;
-	spin_unlock(&itte->irq->irq_lock);
+	spin_lock(&ite->irq->irq_lock);
+	ite->irq->target_vcpu = vcpu;
+	spin_unlock(&ite->irq->irq_lock);
 }
 
 /*
@@ -292,13 +364,13 @@ static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its,
 				       struct its_collection *coll)
 {
 	struct its_device *device;
-	struct its_itte *itte;
+	struct its_ite *ite;
 
-	for_each_lpi_its(device, itte, its) {
-		if (!itte->collection || coll != itte->collection)
+	for_each_lpi_its(device, ite, its) {
+		if (!ite->collection || coll != ite->collection)
 			continue;
 
-		update_affinity_itte(kvm, itte);
+		update_affinity_ite(kvm, ite);
 	}
 }
 
@@ -310,20 +382,20 @@ static u32 max_lpis_propbaser(u64 propbaser)
 }
 
 /*
- * Scan the whole LPI pending table and sync the pending bit in there
+ * Sync the pending table pending bit of LPIs targeting @vcpu
  * with our own data structures. This relies on the LPI being
  * mapped before.
  */
 static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
 {
-	gpa_t pendbase = PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
+	gpa_t pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
 	struct vgic_irq *irq;
 	int last_byte_offset = -1;
 	int ret = 0;
 	u32 *intids;
 	int nr_irqs, i;
 
-	nr_irqs = vgic_copy_lpi_list(vcpu->kvm, &intids);
+	nr_irqs = vgic_copy_lpi_list(vcpu, &intids);
 	if (nr_irqs < 0)
 		return nr_irqs;
 
@@ -364,6 +436,7 @@ static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
 					      struct vgic_its *its,
 					      gpa_t addr, unsigned int len)
 {
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
 	u64 reg = GITS_TYPER_PLPIS;
 
 	/*
@@ -374,8 +447,9 @@ static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
 	 * To avoid memory waste in the guest, we keep the number of IDBits and
 	 * DevBits low - as least for the time being.
 	 */
-	reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT;
-	reg |= 0x0f << GITS_TYPER_IDBITS_SHIFT;
+	reg |= GIC_ENCODE_SZ(VITS_TYPER_DEVBITS, 5) << GITS_TYPER_DEVBITS_SHIFT;
+	reg |= GIC_ENCODE_SZ(VITS_TYPER_IDBITS, 5) << GITS_TYPER_IDBITS_SHIFT;
+	reg |= GIC_ENCODE_SZ(abi->ite_esz, 4) << GITS_TYPER_ITT_ENTRY_SIZE_SHIFT;
 
 	return extract_bytes(reg, addr & 7, len);
 }
@@ -384,7 +458,23 @@ static unsigned long vgic_mmio_read_its_iidr(struct kvm *kvm,
 					     struct vgic_its *its,
 					     gpa_t addr, unsigned int len)
 {
-	return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+	u32 val;
+
+	val = (its->abi_rev << GITS_IIDR_REV_SHIFT) & GITS_IIDR_REV_MASK;
+	val |= (PRODUCT_ID_KVM << GITS_IIDR_PRODUCTID_SHIFT) | IMPLEMENTER_ARM;
+	return val;
+}
+
+static int vgic_mmio_uaccess_write_its_iidr(struct kvm *kvm,
+					    struct vgic_its *its,
+					    gpa_t addr, unsigned int len,
+					    unsigned long val)
+{
+	u32 rev = GITS_IIDR_REV(val);
+
+	if (rev >= NR_ITS_ABIS)
+		return -EINVAL;
+	return vgic_its_set_abi(its, rev);
 }
 
 static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
@@ -425,25 +515,25 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
 				u32 devid, u32 eventid)
 {
 	struct kvm_vcpu *vcpu;
-	struct its_itte *itte;
+	struct its_ite *ite;
 
 	if (!its->enabled)
 		return -EBUSY;
 
-	itte = find_itte(its, devid, eventid);
-	if (!itte || !its_is_collection_mapped(itte->collection))
+	ite = find_ite(its, devid, eventid);
+	if (!ite || !its_is_collection_mapped(ite->collection))
 		return E_ITS_INT_UNMAPPED_INTERRUPT;
 
-	vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
+	vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
 	if (!vcpu)
 		return E_ITS_INT_UNMAPPED_INTERRUPT;
 
 	if (!vcpu->arch.vgic_cpu.lpis_enabled)
 		return -EBUSY;
 
-	spin_lock(&itte->irq->irq_lock);
-	itte->irq->pending_latch = true;
-	vgic_queue_irq_unlock(kvm, itte->irq);
+	spin_lock(&ite->irq->irq_lock);
+	ite->irq->pending_latch = true;
+	vgic_queue_irq_unlock(kvm, ite->irq);
 
 	return 0;
 }
@@ -511,15 +601,15 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
 }
 
 /* Requires the its_lock to be held. */
-static void its_free_itte(struct kvm *kvm, struct its_itte *itte)
+static void its_free_ite(struct kvm *kvm, struct its_ite *ite)
 {
-	list_del(&itte->itte_list);
+	list_del(&ite->ite_list);
 
 	/* This put matches the get in vgic_add_lpi. */
-	if (itte->irq)
-		vgic_put_irq(kvm, itte->irq);
+	if (ite->irq)
+		vgic_put_irq(kvm, ite->irq);
 
-	kfree(itte);
+	kfree(ite);
 }
 
 static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size)
@@ -529,9 +619,11 @@ static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size)
 
 #define its_cmd_get_command(cmd)	its_cmd_mask_field(cmd, 0,  0,  8)
 #define its_cmd_get_deviceid(cmd)	its_cmd_mask_field(cmd, 0, 32, 32)
+#define its_cmd_get_size(cmd)		(its_cmd_mask_field(cmd, 1,  0,  5) + 1)
 #define its_cmd_get_id(cmd)		its_cmd_mask_field(cmd, 1,  0, 32)
 #define its_cmd_get_physical_id(cmd)	its_cmd_mask_field(cmd, 1, 32, 32)
 #define its_cmd_get_collection(cmd)	its_cmd_mask_field(cmd, 2,  0, 16)
+#define its_cmd_get_ittaddr(cmd)	(its_cmd_mask_field(cmd, 2,  8, 44) << 8)
 #define its_cmd_get_target_addr(cmd)	its_cmd_mask_field(cmd, 2, 16, 32)
 #define its_cmd_get_validbit(cmd)	its_cmd_mask_field(cmd, 2, 63,  1)
 
@@ -544,17 +636,17 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its,
 {
 	u32 device_id = its_cmd_get_deviceid(its_cmd);
 	u32 event_id = its_cmd_get_id(its_cmd);
-	struct its_itte *itte;
+	struct its_ite *ite;
 
 
-	itte = find_itte(its, device_id, event_id);
-	if (itte && itte->collection) {
+	ite = find_ite(its, device_id, event_id);
+	if (ite && ite->collection) {
 		/*
 		 * Though the spec talks about removing the pending state, we
 		 * don't bother here since we clear the ITTE anyway and the
 		 * pending state is a property of the ITTE struct.
 		 */
-		its_free_itte(kvm, itte);
+		its_free_ite(kvm, ite);
 		return 0;
 	}
 
@@ -572,26 +664,26 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
 	u32 event_id = its_cmd_get_id(its_cmd);
 	u32 coll_id = its_cmd_get_collection(its_cmd);
 	struct kvm_vcpu *vcpu;
-	struct its_itte *itte;
+	struct its_ite *ite;
 	struct its_collection *collection;
 
-	itte = find_itte(its, device_id, event_id);
-	if (!itte)
+	ite = find_ite(its, device_id, event_id);
+	if (!ite)
 		return E_ITS_MOVI_UNMAPPED_INTERRUPT;
 
-	if (!its_is_collection_mapped(itte->collection))
+	if (!its_is_collection_mapped(ite->collection))
 		return E_ITS_MOVI_UNMAPPED_COLLECTION;
 
 	collection = find_collection(its, coll_id);
 	if (!its_is_collection_mapped(collection))
 		return E_ITS_MOVI_UNMAPPED_COLLECTION;
 
-	itte->collection = collection;
+	ite->collection = collection;
 	vcpu = kvm_get_vcpu(kvm, collection->target_addr);
 
-	spin_lock(&itte->irq->irq_lock);
-	itte->irq->target_vcpu = vcpu;
-	spin_unlock(&itte->irq->irq_lock);
+	spin_lock(&ite->irq->irq_lock);
+	ite->irq->target_vcpu = vcpu;
+	spin_unlock(&ite->irq->irq_lock);
 
 	return 0;
 }
@@ -600,16 +692,31 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
  * Check whether an ID can be stored into the corresponding guest table.
  * For a direct table this is pretty easy, but gets a bit nasty for
  * indirect tables. We check whether the resulting guest physical address
- * is actually valid (covered by a memslot and guest accessbible).
+ * is actually valid (covered by a memslot and guest accessible).
  * For this we have to read the respective first level entry.
  */
-static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id)
+static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
+			      gpa_t *eaddr)
 {
 	int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
+	u64 indirect_ptr, type = GITS_BASER_TYPE(baser);
+	int esz = GITS_BASER_ENTRY_SIZE(baser);
 	int index;
-	u64 indirect_ptr;
 	gfn_t gfn;
-	int esz = GITS_BASER_ENTRY_SIZE(baser);
+
+	switch (type) {
+	case GITS_BASER_TYPE_DEVICE:
+		if (id >= BIT_ULL(VITS_TYPER_DEVBITS))
+			return false;
+		break;
+	case GITS_BASER_TYPE_COLLECTION:
+		/* as GITS_TYPER.CIL == 0, ITS supports 16-bit collection ID */
+		if (id >= BIT_ULL(16))
+			return false;
+		break;
+	default:
+		return false;
+	}
 
 	if (!(baser & GITS_BASER_INDIRECT)) {
 		phys_addr_t addr;
@@ -620,6 +727,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id)
 		addr = BASER_ADDRESS(baser) + id * esz;
 		gfn = addr >> PAGE_SHIFT;
 
+		if (eaddr)
+			*eaddr = addr;
 		return kvm_is_visible_gfn(its->dev->kvm, gfn);
 	}
 
@@ -652,6 +761,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id)
 	indirect_ptr += index * esz;
 	gfn = indirect_ptr >> PAGE_SHIFT;
 
+	if (eaddr)
+		*eaddr = indirect_ptr;
 	return kvm_is_visible_gfn(its->dev->kvm, gfn);
 }
 
@@ -661,7 +772,7 @@ static int vgic_its_alloc_collection(struct vgic_its *its,
 {
 	struct its_collection *collection;
 
-	if (!vgic_its_check_id(its, its->baser_coll_table, coll_id))
+	if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
 		return E_ITS_MAPC_COLLECTION_OOR;
 
 	collection = kzalloc(sizeof(*collection), GFP_KERNEL);
@@ -679,7 +790,7 @@ static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id)
 {
 	struct its_collection *collection;
 	struct its_device *device;
-	struct its_itte *itte;
+	struct its_ite *ite;
 
 	/*
 	 * Clearing the mapping for that collection ID removes the
@@ -690,15 +801,34 @@ static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id)
 	if (!collection)
 		return;
 
-	for_each_lpi_its(device, itte, its)
-		if (itte->collection &&
-		    itte->collection->collection_id == coll_id)
-			itte->collection = NULL;
+	for_each_lpi_its(device, ite, its)
+		if (ite->collection &&
+		    ite->collection->collection_id == coll_id)
+			ite->collection = NULL;
 
 	list_del(&collection->coll_list);
 	kfree(collection);
 }
 
+/* Must be called with its_lock mutex held */
+static struct its_ite *vgic_its_alloc_ite(struct its_device *device,
+					  struct its_collection *collection,
+					  u32 lpi_id, u32 event_id)
+{
+	struct its_ite *ite;
+
+	ite = kzalloc(sizeof(*ite), GFP_KERNEL);
+	if (!ite)
+		return ERR_PTR(-ENOMEM);
+
+	ite->event_id	= event_id;
+	ite->collection = collection;
+	ite->lpi = lpi_id;
+
+	list_add_tail(&ite->ite_list, &device->itt_head);
+	return ite;
+}
+
 /*
  * The MAPTI and MAPI commands map LPIs to ITTEs.
  * Must be called with its_lock mutex held.
@@ -709,16 +839,20 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 	u32 device_id = its_cmd_get_deviceid(its_cmd);
 	u32 event_id = its_cmd_get_id(its_cmd);
 	u32 coll_id = its_cmd_get_collection(its_cmd);
-	struct its_itte *itte;
+	struct its_ite *ite;
+	struct kvm_vcpu *vcpu = NULL;
 	struct its_device *device;
 	struct its_collection *collection, *new_coll = NULL;
-	int lpi_nr;
 	struct vgic_irq *irq;
+	int lpi_nr;
 
 	device = find_its_device(its, device_id);
 	if (!device)
 		return E_ITS_MAPTI_UNMAPPED_DEVICE;
 
+	if (event_id >= BIT_ULL(device->num_eventid_bits))
+		return E_ITS_MAPTI_ID_OOR;
+
 	if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI)
 		lpi_nr = its_cmd_get_physical_id(its_cmd);
 	else
@@ -728,7 +862,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 		return E_ITS_MAPTI_PHYSICALID_OOR;
 
 	/* If there is an existing mapping, behavior is UNPREDICTABLE. */
-	if (find_itte(its, device_id, event_id))
+	if (find_ite(its, device_id, event_id))
 		return 0;
 
 	collection = find_collection(its, coll_id);
@@ -739,36 +873,24 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 		new_coll = collection;
 	}
 
-	itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
-	if (!itte) {
+	ite = vgic_its_alloc_ite(device, collection, lpi_nr, event_id);
+	if (IS_ERR(ite)) {
 		if (new_coll)
 			vgic_its_free_collection(its, coll_id);
-		return -ENOMEM;
+		return PTR_ERR(ite);
 	}
 
-	itte->event_id	= event_id;
-	list_add_tail(&itte->itte_list, &device->itt_head);
-
-	itte->collection = collection;
-	itte->lpi = lpi_nr;
+	if (its_is_collection_mapped(collection))
+		vcpu = kvm_get_vcpu(kvm, collection->target_addr);
 
-	irq = vgic_add_lpi(kvm, lpi_nr);
+	irq = vgic_add_lpi(kvm, lpi_nr, vcpu);
 	if (IS_ERR(irq)) {
 		if (new_coll)
 			vgic_its_free_collection(its, coll_id);
-		its_free_itte(kvm, itte);
+		its_free_ite(kvm, ite);
 		return PTR_ERR(irq);
 	}
-	itte->irq = irq;
-
-	update_affinity_itte(kvm, itte);
-
-	/*
-	 * We "cache" the configuration table entries in out struct vgic_irq's.
-	 * However we only have those structs for mapped IRQs, so we read in
-	 * the respective config data from memory here upon mapping the LPI.
-	 */
-	update_lpi_config(kvm, itte->irq, NULL);
+	ite->irq = irq;
 
 	return 0;
 }
@@ -776,20 +898,40 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 /* Requires the its_lock to be held. */
 static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device)
 {
-	struct its_itte *itte, *temp;
+	struct its_ite *ite, *temp;
 
 	/*
 	 * The spec says that unmapping a device with still valid
 	 * ITTEs associated is UNPREDICTABLE. We remove all ITTEs,
 	 * since we cannot leave the memory unreferenced.
 	 */
-	list_for_each_entry_safe(itte, temp, &device->itt_head, itte_list)
-		its_free_itte(kvm, itte);
+	list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list)
+		its_free_ite(kvm, ite);
 
 	list_del(&device->dev_list);
 	kfree(device);
 }
 
+/* Must be called with its_lock mutex held */
+static struct its_device *vgic_its_alloc_device(struct vgic_its *its,
+						u32 device_id, gpa_t itt_addr,
+						u8 num_eventid_bits)
+{
+	struct its_device *device;
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (!device)
+		return ERR_PTR(-ENOMEM);
+
+	device->device_id = device_id;
+	device->itt_addr = itt_addr;
+	device->num_eventid_bits = num_eventid_bits;
+	INIT_LIST_HEAD(&device->itt_head);
+
+	list_add_tail(&device->dev_list, &its->device_list);
+	return device;
+}
+
 /*
  * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs).
  * Must be called with the its_lock mutex held.
@@ -799,11 +941,16 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
 {
 	u32 device_id = its_cmd_get_deviceid(its_cmd);
 	bool valid = its_cmd_get_validbit(its_cmd);
+	u8 num_eventid_bits = its_cmd_get_size(its_cmd);
+	gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd);
 	struct its_device *device;
 
-	if (!vgic_its_check_id(its, its->baser_device_table, device_id))
+	if (!vgic_its_check_id(its, its->baser_device_table, device_id, NULL))
 		return E_ITS_MAPD_DEVICE_OOR;
 
+	if (valid && num_eventid_bits > VITS_TYPER_IDBITS)
+		return E_ITS_MAPD_ITTSIZE_OOR;
+
 	device = find_its_device(its, device_id);
 
 	/*
@@ -821,14 +968,10 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
 	if (!valid)
 		return 0;
 
-	device = kzalloc(sizeof(struct its_device), GFP_KERNEL);
-	if (!device)
-		return -ENOMEM;
-
-	device->device_id = device_id;
-	INIT_LIST_HEAD(&device->itt_head);
-
-	list_add_tail(&device->dev_list, &its->device_list);
+	device = vgic_its_alloc_device(its, device_id, itt_addr,
+				       num_eventid_bits);
+	if (IS_ERR(device))
+		return PTR_ERR(device);
 
 	return 0;
 }
@@ -883,14 +1026,14 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
 {
 	u32 device_id = its_cmd_get_deviceid(its_cmd);
 	u32 event_id = its_cmd_get_id(its_cmd);
-	struct its_itte *itte;
+	struct its_ite *ite;
 
 
-	itte = find_itte(its, device_id, event_id);
-	if (!itte)
+	ite = find_ite(its, device_id, event_id);
+	if (!ite)
 		return E_ITS_CLEAR_UNMAPPED_INTERRUPT;
 
-	itte->irq->pending_latch = false;
+	ite->irq->pending_latch = false;
 
 	return 0;
 }
@@ -904,14 +1047,14 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
 {
 	u32 device_id = its_cmd_get_deviceid(its_cmd);
 	u32 event_id = its_cmd_get_id(its_cmd);
-	struct its_itte *itte;
+	struct its_ite *ite;
 
 
-	itte = find_itte(its, device_id, event_id);
-	if (!itte)
+	ite = find_ite(its, device_id, event_id);
+	if (!ite)
 		return E_ITS_INV_UNMAPPED_INTERRUPT;
 
-	return update_lpi_config(kvm, itte->irq, NULL);
+	return update_lpi_config(kvm, ite->irq, NULL);
 }
 
 /*
@@ -938,7 +1081,7 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
 
 	vcpu = kvm_get_vcpu(kvm, collection->target_addr);
 
-	irq_count = vgic_copy_lpi_list(kvm, &intids);
+	irq_count = vgic_copy_lpi_list(vcpu, &intids);
 	if (irq_count < 0)
 		return irq_count;
 
@@ -1213,6 +1356,33 @@ static unsigned long vgic_mmio_read_its_creadr(struct kvm *kvm,
 	return extract_bytes(its->creadr, addr & 0x7, len);
 }
 
+static int vgic_mmio_uaccess_write_its_creadr(struct kvm *kvm,
+					      struct vgic_its *its,
+					      gpa_t addr, unsigned int len,
+					      unsigned long val)
+{
+	u32 cmd_offset;
+	int ret = 0;
+
+	mutex_lock(&its->cmd_lock);
+
+	if (its->enabled) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	cmd_offset = ITS_CMD_OFFSET(val);
+	if (cmd_offset >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	its->creadr = cmd_offset;
+out:
+	mutex_unlock(&its->cmd_lock);
+	return ret;
+}
+
 #define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7)
 static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm,
 					      struct vgic_its *its,
@@ -1241,6 +1411,7 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
 				      gpa_t addr, unsigned int len,
 				      unsigned long val)
 {
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
 	u64 entry_size, device_type;
 	u64 reg, *regptr, clearbits = 0;
 
@@ -1251,12 +1422,12 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
 	switch (BASER_INDEX(addr)) {
 	case 0:
 		regptr = &its->baser_device_table;
-		entry_size = 8;
+		entry_size = abi->dte_esz;
 		device_type = GITS_BASER_TYPE_DEVICE;
 		break;
 	case 1:
 		regptr = &its->baser_coll_table;
-		entry_size = 8;
+		entry_size = abi->cte_esz;
 		device_type = GITS_BASER_TYPE_COLLECTION;
 		clearbits = GITS_BASER_INDIRECT;
 		break;
@@ -1317,6 +1488,16 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
 	.its_write = wr,					\
 }
 
+#define REGISTER_ITS_DESC_UACCESS(off, rd, wr, uwr, length, acc)\
+{								\
+	.reg_offset = off,					\
+	.len = length,						\
+	.access_flags = acc,					\
+	.its_read = rd,						\
+	.its_write = wr,					\
+	.uaccess_its_write = uwr,				\
+}
+
 static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its,
 			      gpa_t addr, unsigned int len, unsigned long val)
 {
@@ -1327,8 +1508,9 @@ static struct vgic_register_region its_registers[] = {
 	REGISTER_ITS_DESC(GITS_CTLR,
 		vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_ITS_DESC(GITS_IIDR,
-		vgic_mmio_read_its_iidr, its_mmio_write_wi, 4,
+	REGISTER_ITS_DESC_UACCESS(GITS_IIDR,
+		vgic_mmio_read_its_iidr, its_mmio_write_wi,
+		vgic_mmio_uaccess_write_its_iidr, 4,
 		VGIC_ACCESS_32bit),
 	REGISTER_ITS_DESC(GITS_TYPER,
 		vgic_mmio_read_its_typer, its_mmio_write_wi, 8,
@@ -1339,8 +1521,9 @@ static struct vgic_register_region its_registers[] = {
 	REGISTER_ITS_DESC(GITS_CWRITER,
 		vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8,
 		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
-	REGISTER_ITS_DESC(GITS_CREADR,
-		vgic_mmio_read_its_creadr, its_mmio_write_wi, 8,
+	REGISTER_ITS_DESC_UACCESS(GITS_CREADR,
+		vgic_mmio_read_its_creadr, its_mmio_write_wi,
+		vgic_mmio_uaccess_write_its_creadr, 8,
 		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
 	REGISTER_ITS_DESC(GITS_BASER,
 		vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40,
@@ -1357,17 +1540,19 @@ void vgic_enable_lpis(struct kvm_vcpu *vcpu)
 		its_sync_lpi_pending_table(vcpu);
 }
 
-static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
+static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its,
+				   u64 addr)
 {
 	struct vgic_io_device *iodev = &its->iodev;
 	int ret;
 
-	if (!its->initialized)
-		return -EBUSY;
-
-	if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base))
-		return -ENXIO;
+	mutex_lock(&kvm->slots_lock);
+	if (!IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) {
+		ret = -EBUSY;
+		goto out;
+	}
 
+	its->vgic_its_base = addr;
 	iodev->regions = its_registers;
 	iodev->nr_regions = ARRAY_SIZE(its_registers);
 	kvm_iodevice_init(&iodev->dev, &kvm_io_gic_ops);
@@ -1375,9 +1560,9 @@ static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
 	iodev->base_addr = its->vgic_its_base;
 	iodev->iodev_type = IODEV_ITS;
 	iodev->its = its;
-	mutex_lock(&kvm->slots_lock);
 	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr,
 				      KVM_VGIC_V3_ITS_SIZE, &iodev->dev);
+out:
 	mutex_unlock(&kvm->slots_lock);
 
 	return ret;
@@ -1387,7 +1572,6 @@ static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
 	(GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)		| \
 	 GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner)		| \
 	 GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)		| \
-	 ((8ULL - 1) << GITS_BASER_ENTRY_SIZE_SHIFT)			| \
 	 GITS_BASER_PAGE_SIZE_64K)
 
 #define INITIAL_PROPBASER_VALUE						  \
@@ -1415,7 +1599,6 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
 	INIT_LIST_HEAD(&its->collection_list);
 
 	dev->kvm->arch.vgic.has_its = true;
-	its->initialized = false;
 	its->enabled = false;
 	its->dev = dev;
 
@@ -1427,16 +1610,23 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
 
 	dev->private = its;
 
-	return 0;
+	return vgic_its_set_abi(its, NR_ITS_ABIS - 1);
+}
+
+static void vgic_its_free_device(struct kvm *kvm, struct its_device *dev)
+{
+	struct its_ite *ite, *tmp;
+
+	list_for_each_entry_safe(ite, tmp, &dev->itt_head, ite_list)
+		its_free_ite(kvm, ite);
+	list_del(&dev->dev_list);
+	kfree(dev);
 }
 
 static void vgic_its_destroy(struct kvm_device *kvm_dev)
 {
 	struct kvm *kvm = kvm_dev->kvm;
 	struct vgic_its *its = kvm_dev->private;
-	struct its_device *dev;
-	struct its_itte *itte;
-	struct list_head *dev_cur, *dev_temp;
 	struct list_head *cur, *temp;
 
 	/*
@@ -1447,25 +1637,710 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
 		return;
 
 	mutex_lock(&its->its_lock);
-	list_for_each_safe(dev_cur, dev_temp, &its->device_list) {
-		dev = container_of(dev_cur, struct its_device, dev_list);
-		list_for_each_safe(cur, temp, &dev->itt_head) {
-			itte = (container_of(cur, struct its_itte, itte_list));
-			its_free_itte(kvm, itte);
-		}
-		list_del(dev_cur);
-		kfree(dev);
+	list_for_each_safe(cur, temp, &its->device_list) {
+		struct its_device *dev;
+
+		dev = list_entry(cur, struct its_device, dev_list);
+		vgic_its_free_device(kvm, dev);
 	}
 
 	list_for_each_safe(cur, temp, &its->collection_list) {
+		struct its_collection *coll;
+
+		coll = list_entry(cur, struct its_collection, coll_list);
 		list_del(cur);
-		kfree(container_of(cur, struct its_collection, coll_list));
+		kfree(coll);
 	}
 	mutex_unlock(&its->its_lock);
 
 	kfree(its);
 }
 
+int vgic_its_has_attr_regs(struct kvm_device *dev,
+			   struct kvm_device_attr *attr)
+{
+	const struct vgic_register_region *region;
+	gpa_t offset = attr->attr;
+	int align;
+
+	align = (offset < GITS_TYPER) || (offset >= GITS_PIDR4) ? 0x3 : 0x7;
+
+	if (offset & align)
+		return -EINVAL;
+
+	region = vgic_find_mmio_region(its_registers,
+				       ARRAY_SIZE(its_registers),
+				       offset);
+	if (!region)
+		return -ENXIO;
+
+	return 0;
+}
+
+int vgic_its_attr_regs_access(struct kvm_device *dev,
+			      struct kvm_device_attr *attr,
+			      u64 *reg, bool is_write)
+{
+	const struct vgic_register_region *region;
+	struct vgic_its *its;
+	gpa_t addr, offset;
+	unsigned int len;
+	int align, ret = 0;
+
+	its = dev->private;
+	offset = attr->attr;
+
+	/*
+	 * Although the spec supports upper/lower 32-bit accesses to
+	 * 64-bit ITS registers, the userspace ABI requires 64-bit
+	 * accesses to all 64-bit wide registers. We therefore only
+	 * support 32-bit accesses to GITS_CTLR, GITS_IIDR and GITS ID
+	 * registers
+	 */
+	if ((offset < GITS_TYPER) || (offset >= GITS_PIDR4))
+		align = 0x3;
+	else
+		align = 0x7;
+
+	if (offset & align)
+		return -EINVAL;
+
+	mutex_lock(&dev->kvm->lock);
+
+	if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+	region = vgic_find_mmio_region(its_registers,
+				       ARRAY_SIZE(its_registers),
+				       offset);
+	if (!region) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (!lock_all_vcpus(dev->kvm)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	addr = its->vgic_its_base + offset;
+
+	len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4;
+
+	if (is_write) {
+		if (region->uaccess_its_write)
+			ret = region->uaccess_its_write(dev->kvm, its, addr,
+							len, *reg);
+		else
+			region->its_write(dev->kvm, its, addr, len, *reg);
+	} else {
+		*reg = region->its_read(dev->kvm, its, addr, len);
+	}
+	unlock_all_vcpus(dev->kvm);
+out:
+	mutex_unlock(&dev->kvm->lock);
+	return ret;
+}
+
+static u32 compute_next_devid_offset(struct list_head *h,
+				     struct its_device *dev)
+{
+	struct its_device *next;
+	u32 next_offset;
+
+	if (list_is_last(&dev->dev_list, h))
+		return 0;
+	next = list_next_entry(dev, dev_list);
+	next_offset = next->device_id - dev->device_id;
+
+	return min_t(u32, next_offset, VITS_DTE_MAX_DEVID_OFFSET);
+}
+
+static u32 compute_next_eventid_offset(struct list_head *h, struct its_ite *ite)
+{
+	struct its_ite *next;
+	u32 next_offset;
+
+	if (list_is_last(&ite->ite_list, h))
+		return 0;
+	next = list_next_entry(ite, ite_list);
+	next_offset = next->event_id - ite->event_id;
+
+	return min_t(u32, next_offset, VITS_ITE_MAX_EVENTID_OFFSET);
+}
+
+/**
+ * entry_fn_t - Callback called on a table entry restore path
+ * @its: its handle
+ * @id: id of the entry
+ * @entry: pointer to the entry
+ * @opaque: pointer to an opaque data
+ *
+ * Return: < 0 on error, 0 if last element was identified, id offset to next
+ * element otherwise
+ */
+typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry,
+			  void *opaque);
+
+/**
+ * scan_its_table - Scan a contiguous table in guest RAM and applies a function
+ * to each entry
+ *
+ * @its: its handle
+ * @base: base gpa of the table
+ * @size: size of the table in bytes
+ * @esz: entry size in bytes
+ * @start_id: the ID of the first entry in the table
+ * (non zero for 2d level tables)
+ * @fn: function to apply on each entry
+ *
+ * Return: < 0 on error, 0 if last element was identified, 1 otherwise
+ * (the last element may not be found on second level tables)
+ */
+static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz,
+			  int start_id, entry_fn_t fn, void *opaque)
+{
+	void *entry = kzalloc(esz, GFP_KERNEL);
+	struct kvm *kvm = its->dev->kvm;
+	unsigned long len = size;
+	int id = start_id;
+	gpa_t gpa = base;
+	int ret;
+
+	while (len > 0) {
+		int next_offset;
+		size_t byte_offset;
+
+		ret = kvm_read_guest(kvm, gpa, entry, esz);
+		if (ret)
+			goto out;
+
+		next_offset = fn(its, id, entry, opaque);
+		if (next_offset <= 0) {
+			ret = next_offset;
+			goto out;
+		}
+
+		byte_offset = next_offset * esz;
+		id += next_offset;
+		gpa += byte_offset;
+		len -= byte_offset;
+	}
+	ret =  1;
+
+out:
+	kfree(entry);
+	return ret;
+}
+
+/**
+ * vgic_its_save_ite - Save an interrupt translation entry at @gpa
+ */
+static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev,
+			      struct its_ite *ite, gpa_t gpa, int ite_esz)
+{
+	struct kvm *kvm = its->dev->kvm;
+	u32 next_offset;
+	u64 val;
+
+	next_offset = compute_next_eventid_offset(&dev->itt_head, ite);
+	val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) |
+	       ((u64)ite->lpi << KVM_ITS_ITE_PINTID_SHIFT) |
+		ite->collection->collection_id;
+	val = cpu_to_le64(val);
+	return kvm_write_guest(kvm, gpa, &val, ite_esz);
+}
+
+/**
+ * vgic_its_restore_ite - restore an interrupt translation entry
+ * @event_id: id used for indexing
+ * @ptr: pointer to the ITE entry
+ * @opaque: pointer to the its_device
+ */
+static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
+				void *ptr, void *opaque)
+{
+	struct its_device *dev = (struct its_device *)opaque;
+	struct its_collection *collection;
+	struct kvm *kvm = its->dev->kvm;
+	struct kvm_vcpu *vcpu = NULL;
+	u64 val;
+	u64 *p = (u64 *)ptr;
+	struct vgic_irq *irq;
+	u32 coll_id, lpi_id;
+	struct its_ite *ite;
+	u32 offset;
+
+	val = *p;
+
+	val = le64_to_cpu(val);
+
+	coll_id = val & KVM_ITS_ITE_ICID_MASK;
+	lpi_id = (val & KVM_ITS_ITE_PINTID_MASK) >> KVM_ITS_ITE_PINTID_SHIFT;
+
+	if (!lpi_id)
+		return 1; /* invalid entry, no choice but to scan next entry */
+
+	if (lpi_id < VGIC_MIN_LPI)
+		return -EINVAL;
+
+	offset = val >> KVM_ITS_ITE_NEXT_SHIFT;
+	if (event_id + offset >= BIT_ULL(dev->num_eventid_bits))
+		return -EINVAL;
+
+	collection = find_collection(its, coll_id);
+	if (!collection)
+		return -EINVAL;
+
+	ite = vgic_its_alloc_ite(dev, collection, lpi_id, event_id);
+	if (IS_ERR(ite))
+		return PTR_ERR(ite);
+
+	if (its_is_collection_mapped(collection))
+		vcpu = kvm_get_vcpu(kvm, collection->target_addr);
+
+	irq = vgic_add_lpi(kvm, lpi_id, vcpu);
+	if (IS_ERR(irq))
+		return PTR_ERR(irq);
+	ite->irq = irq;
+
+	return offset;
+}
+
+static int vgic_its_ite_cmp(void *priv, struct list_head *a,
+			    struct list_head *b)
+{
+	struct its_ite *itea = container_of(a, struct its_ite, ite_list);
+	struct its_ite *iteb = container_of(b, struct its_ite, ite_list);
+
+	if (itea->event_id < iteb->event_id)
+		return -1;
+	else
+		return 1;
+}
+
+static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device)
+{
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	gpa_t base = device->itt_addr;
+	struct its_ite *ite;
+	int ret;
+	int ite_esz = abi->ite_esz;
+
+	list_sort(NULL, &device->itt_head, vgic_its_ite_cmp);
+
+	list_for_each_entry(ite, &device->itt_head, ite_list) {
+		gpa_t gpa = base + ite->event_id * ite_esz;
+
+		ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev)
+{
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	gpa_t base = dev->itt_addr;
+	int ret;
+	int ite_esz = abi->ite_esz;
+	size_t max_size = BIT_ULL(dev->num_eventid_bits) * ite_esz;
+
+	ret = scan_its_table(its, base, max_size, ite_esz, 0,
+			     vgic_its_restore_ite, dev);
+
+	return ret;
+}
+
+/**
+ * vgic_its_save_dte - Save a device table entry at a given GPA
+ *
+ * @its: ITS handle
+ * @dev: ITS device
+ * @ptr: GPA
+ */
+static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev,
+			     gpa_t ptr, int dte_esz)
+{
+	struct kvm *kvm = its->dev->kvm;
+	u64 val, itt_addr_field;
+	u32 next_offset;
+
+	itt_addr_field = dev->itt_addr >> 8;
+	next_offset = compute_next_devid_offset(&its->device_list, dev);
+	val = (1ULL << KVM_ITS_DTE_VALID_SHIFT |
+	       ((u64)next_offset << KVM_ITS_DTE_NEXT_SHIFT) |
+	       (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) |
+		(dev->num_eventid_bits - 1));
+	val = cpu_to_le64(val);
+	return kvm_write_guest(kvm, ptr, &val, dte_esz);
+}
+
+/**
+ * vgic_its_restore_dte - restore a device table entry
+ *
+ * @its: its handle
+ * @id: device id the DTE corresponds to
+ * @ptr: kernel VA where the 8 byte DTE is located
+ * @opaque: unused
+ *
+ * Return: < 0 on error, 0 if the dte is the last one, id offset to the
+ * next dte otherwise
+ */
+static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
+				void *ptr, void *opaque)
+{
+	struct its_device *dev;
+	gpa_t itt_addr;
+	u8 num_eventid_bits;
+	u64 entry = *(u64 *)ptr;
+	bool valid;
+	u32 offset;
+	int ret;
+
+	entry = le64_to_cpu(entry);
+
+	valid = entry >> KVM_ITS_DTE_VALID_SHIFT;
+	num_eventid_bits = (entry & KVM_ITS_DTE_SIZE_MASK) + 1;
+	itt_addr = ((entry & KVM_ITS_DTE_ITTADDR_MASK)
+			>> KVM_ITS_DTE_ITTADDR_SHIFT) << 8;
+
+	if (!valid)
+		return 1;
+
+	/* dte entry is valid */
+	offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT;
+
+	dev = vgic_its_alloc_device(its, id, itt_addr, num_eventid_bits);
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+
+	ret = vgic_its_restore_itt(its, dev);
+	if (ret) {
+		vgic_its_free_device(its->dev->kvm, dev);
+		return ret;
+	}
+
+	return offset;
+}
+
+static int vgic_its_device_cmp(void *priv, struct list_head *a,
+			       struct list_head *b)
+{
+	struct its_device *deva = container_of(a, struct its_device, dev_list);
+	struct its_device *devb = container_of(b, struct its_device, dev_list);
+
+	if (deva->device_id < devb->device_id)
+		return -1;
+	else
+		return 1;
+}
+
+/**
+ * vgic_its_save_device_tables - Save the device table and all ITT
+ * into guest RAM
+ *
+ * L1/L2 handling is hidden by vgic_its_check_id() helper which directly
+ * returns the GPA of the device entry
+ */
+static int vgic_its_save_device_tables(struct vgic_its *its)
+{
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	struct its_device *dev;
+	int dte_esz = abi->dte_esz;
+	u64 baser;
+
+	baser = its->baser_device_table;
+
+	list_sort(NULL, &its->device_list, vgic_its_device_cmp);
+
+	list_for_each_entry(dev, &its->device_list, dev_list) {
+		int ret;
+		gpa_t eaddr;
+
+		if (!vgic_its_check_id(its, baser,
+				       dev->device_id, &eaddr))
+			return -EINVAL;
+
+		ret = vgic_its_save_itt(its, dev);
+		if (ret)
+			return ret;
+
+		ret = vgic_its_save_dte(its, dev, eaddr, dte_esz);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+/**
+ * handle_l1_dte - callback used for L1 device table entries (2 stage case)
+ *
+ * @its: its handle
+ * @id: index of the entry in the L1 table
+ * @addr: kernel VA
+ * @opaque: unused
+ *
+ * L1 table entries are scanned by steps of 1 entry
+ * Return < 0 if error, 0 if last dte was found when scanning the L2
+ * table, +1 otherwise (meaning next L1 entry must be scanned)
+ */
+static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr,
+			 void *opaque)
+{
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	int l2_start_id = id * (SZ_64K / abi->dte_esz);
+	u64 entry = *(u64 *)addr;
+	int dte_esz = abi->dte_esz;
+	gpa_t gpa;
+	int ret;
+
+	entry = le64_to_cpu(entry);
+
+	if (!(entry & KVM_ITS_L1E_VALID_MASK))
+		return 1;
+
+	gpa = entry & KVM_ITS_L1E_ADDR_MASK;
+
+	ret = scan_its_table(its, gpa, SZ_64K, dte_esz,
+			     l2_start_id, vgic_its_restore_dte, NULL);
+
+	if (ret <= 0)
+		return ret;
+
+	return 1;
+}
+
+/**
+ * vgic_its_restore_device_tables - Restore the device table and all ITT
+ * from guest RAM to internal data structs
+ */
+static int vgic_its_restore_device_tables(struct vgic_its *its)
+{
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	u64 baser = its->baser_device_table;
+	int l1_esz, ret;
+	int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
+	gpa_t l1_gpa;
+
+	if (!(baser & GITS_BASER_VALID))
+		return 0;
+
+	l1_gpa = BASER_ADDRESS(baser);
+
+	if (baser & GITS_BASER_INDIRECT) {
+		l1_esz = GITS_LVL1_ENTRY_SIZE;
+		ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0,
+				     handle_l1_dte, NULL);
+	} else {
+		l1_esz = abi->dte_esz;
+		ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0,
+				     vgic_its_restore_dte, NULL);
+	}
+
+	if (ret > 0)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+static int vgic_its_save_cte(struct vgic_its *its,
+			     struct its_collection *collection,
+			     gpa_t gpa, int esz)
+{
+	u64 val;
+
+	val = (1ULL << KVM_ITS_CTE_VALID_SHIFT |
+	       ((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) |
+	       collection->collection_id);
+	val = cpu_to_le64(val);
+	return kvm_write_guest(its->dev->kvm, gpa, &val, esz);
+}
+
+static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
+{
+	struct its_collection *collection;
+	struct kvm *kvm = its->dev->kvm;
+	u32 target_addr, coll_id;
+	u64 val;
+	int ret;
+
+	BUG_ON(esz > sizeof(val));
+	ret = kvm_read_guest(kvm, gpa, &val, esz);
+	if (ret)
+		return ret;
+	val = le64_to_cpu(val);
+	if (!(val & KVM_ITS_CTE_VALID_MASK))
+		return 0;
+
+	target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT);
+	coll_id = val & KVM_ITS_CTE_ICID_MASK;
+
+	if (target_addr >= atomic_read(&kvm->online_vcpus))
+		return -EINVAL;
+
+	collection = find_collection(its, coll_id);
+	if (collection)
+		return -EEXIST;
+	ret = vgic_its_alloc_collection(its, &collection, coll_id);
+	if (ret)
+		return ret;
+	collection->target_addr = target_addr;
+	return 1;
+}
+
+/**
+ * vgic_its_save_collection_table - Save the collection table into
+ * guest RAM
+ */
+static int vgic_its_save_collection_table(struct vgic_its *its)
+{
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	struct its_collection *collection;
+	u64 val;
+	gpa_t gpa;
+	size_t max_size, filled = 0;
+	int ret, cte_esz = abi->cte_esz;
+
+	gpa = BASER_ADDRESS(its->baser_coll_table);
+	if (!gpa)
+		return 0;
+
+	max_size = GITS_BASER_NR_PAGES(its->baser_coll_table) * SZ_64K;
+
+	list_for_each_entry(collection, &its->collection_list, coll_list) {
+		ret = vgic_its_save_cte(its, collection, gpa, cte_esz);
+		if (ret)
+			return ret;
+		gpa += cte_esz;
+		filled += cte_esz;
+	}
+
+	if (filled == max_size)
+		return 0;
+
+	/*
+	 * table is not fully filled, add a last dummy element
+	 * with valid bit unset
+	 */
+	val = 0;
+	BUG_ON(cte_esz > sizeof(val));
+	ret = kvm_write_guest(its->dev->kvm, gpa, &val, cte_esz);
+	return ret;
+}
+
+/**
+ * vgic_its_restore_collection_table - reads the collection table
+ * in guest memory and restores the ITS internal state. Requires the
+ * BASER registers to be restored before.
+ */
+static int vgic_its_restore_collection_table(struct vgic_its *its)
+{
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	int cte_esz = abi->cte_esz;
+	size_t max_size, read = 0;
+	gpa_t gpa;
+	int ret;
+
+	if (!(its->baser_coll_table & GITS_BASER_VALID))
+		return 0;
+
+	gpa = BASER_ADDRESS(its->baser_coll_table);
+
+	max_size = GITS_BASER_NR_PAGES(its->baser_coll_table) * SZ_64K;
+
+	while (read < max_size) {
+		ret = vgic_its_restore_cte(its, gpa, cte_esz);
+		if (ret <= 0)
+			break;
+		gpa += cte_esz;
+		read += cte_esz;
+	}
+	return ret;
+}
+
+/**
+ * vgic_its_save_tables_v0 - Save the ITS tables into guest ARM
+ * according to v0 ABI
+ */
+static int vgic_its_save_tables_v0(struct vgic_its *its)
+{
+	struct kvm *kvm = its->dev->kvm;
+	int ret;
+
+	mutex_lock(&kvm->lock);
+	mutex_lock(&its->its_lock);
+
+	if (!lock_all_vcpus(kvm)) {
+		mutex_unlock(&its->its_lock);
+		mutex_unlock(&kvm->lock);
+		return -EBUSY;
+	}
+
+	ret = vgic_its_save_device_tables(its);
+	if (ret)
+		goto out;
+
+	ret = vgic_its_save_collection_table(its);
+
+out:
+	unlock_all_vcpus(kvm);
+	mutex_unlock(&its->its_lock);
+	mutex_unlock(&kvm->lock);
+	return ret;
+}
+
+/**
+ * vgic_its_restore_tables_v0 - Restore the ITS tables from guest RAM
+ * to internal data structs according to V0 ABI
+ *
+ */
+static int vgic_its_restore_tables_v0(struct vgic_its *its)
+{
+	struct kvm *kvm = its->dev->kvm;
+	int ret;
+
+	mutex_lock(&kvm->lock);
+	mutex_lock(&its->its_lock);
+
+	if (!lock_all_vcpus(kvm)) {
+		mutex_unlock(&its->its_lock);
+		mutex_unlock(&kvm->lock);
+		return -EBUSY;
+	}
+
+	ret = vgic_its_restore_collection_table(its);
+	if (ret)
+		goto out;
+
+	ret = vgic_its_restore_device_tables(its);
+out:
+	unlock_all_vcpus(kvm);
+	mutex_unlock(&its->its_lock);
+	mutex_unlock(&kvm->lock);
+
+	return ret;
+}
+
+static int vgic_its_commit_v0(struct vgic_its *its)
+{
+	const struct vgic_its_abi *abi;
+
+	abi = vgic_its_get_abi(its);
+	its->baser_coll_table &= ~GITS_BASER_ENTRY_SIZE_MASK;
+	its->baser_device_table &= ~GITS_BASER_ENTRY_SIZE_MASK;
+
+	its->baser_coll_table |= (GIC_ENCODE_SZ(abi->cte_esz, 5)
+					<< GITS_BASER_ENTRY_SIZE_SHIFT);
+
+	its->baser_device_table |= (GIC_ENCODE_SZ(abi->dte_esz, 5)
+					<< GITS_BASER_ENTRY_SIZE_SHIFT);
+	return 0;
+}
+
 static int vgic_its_has_attr(struct kvm_device *dev,
 			     struct kvm_device_attr *attr)
 {
@@ -1480,8 +2355,14 @@ static int vgic_its_has_attr(struct kvm_device *dev,
 		switch (attr->attr) {
 		case KVM_DEV_ARM_VGIC_CTRL_INIT:
 			return 0;
+		case KVM_DEV_ARM_ITS_SAVE_TABLES:
+			return 0;
+		case KVM_DEV_ARM_ITS_RESTORE_TABLES:
+			return 0;
 		}
 		break;
+	case KVM_DEV_ARM_VGIC_GRP_ITS_REGS:
+		return vgic_its_has_attr_regs(dev, attr);
 	}
 	return -ENXIO;
 }
@@ -1509,18 +2390,30 @@ static int vgic_its_set_attr(struct kvm_device *dev,
 		if (ret)
 			return ret;
 
-		its->vgic_its_base = addr;
-
-		return 0;
+		return vgic_register_its_iodev(dev->kvm, its, addr);
 	}
-	case KVM_DEV_ARM_VGIC_GRP_CTRL:
+	case KVM_DEV_ARM_VGIC_GRP_CTRL: {
+		const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+
 		switch (attr->attr) {
 		case KVM_DEV_ARM_VGIC_CTRL_INIT:
-			its->initialized = true;
-
+			/* Nothing to do */
 			return 0;
+		case KVM_DEV_ARM_ITS_SAVE_TABLES:
+			return abi->save_tables(its);
+		case KVM_DEV_ARM_ITS_RESTORE_TABLES:
+			return abi->restore_tables(its);
 		}
-		break;
+	}
+	case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
+		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+		u64 reg;
+
+		if (get_user(reg, uaddr))
+			return -EFAULT;
+
+		return vgic_its_attr_regs_access(dev, attr, &reg, true);
+	}
 	}
 	return -ENXIO;
 }
@@ -1541,10 +2434,20 @@ static int vgic_its_get_attr(struct kvm_device *dev,
 		if (copy_to_user(uaddr, &addr, sizeof(addr)))
 			return -EFAULT;
 		break;
+	}
+	case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
+		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+		u64 reg;
+		int ret;
+
+		ret = vgic_its_attr_regs_access(dev, attr, &reg, false);
+		if (ret)
+			return ret;
+		return put_user(reg, uaddr);
+	}
 	default:
 		return -ENXIO;
 	}
-	}
 
 	return 0;
 }
@@ -1563,30 +2466,3 @@ int kvm_vgic_register_its_device(void)
 	return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
 				       KVM_DEV_TYPE_ARM_VGIC_ITS);
 }
-
-/*
- * Registers all ITSes with the kvm_io_bus framework.
- * To follow the existing VGIC initialization sequence, this has to be
- * done as late as possible, just before the first VCPU runs.
- */
-int vgic_register_its_iodevs(struct kvm *kvm)
-{
-	struct kvm_device *dev;
-	int ret = 0;
-
-	list_for_each_entry(dev, &kvm->devices, vm_node) {
-		if (dev->ops != &kvm_arm_vgic_its_ops)
-			continue;
-
-		ret = vgic_register_its_iodev(kvm, dev->private);
-		if (ret)
-			return ret;
-		/*
-		 * We don't need to care about tearing down previously
-		 * registered ITSes, as the kvm_io_bus framework removes
-		 * them for us if the VM gets destroyed.
-		 */
-	}
-
-	return ret;
-}
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
index d181d2baee9c..10ae6f394b71 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -37,6 +37,14 @@ int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
 	return 0;
 }
 
+static int vgic_check_type(struct kvm *kvm, int type_needed)
+{
+	if (kvm->arch.vgic.vgic_model != type_needed)
+		return -ENODEV;
+	else
+		return 0;
+}
+
 /**
  * kvm_vgic_addr - set or get vgic VM base addresses
  * @kvm:   pointer to the vm struct
@@ -57,40 +65,41 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
 {
 	int r = 0;
 	struct vgic_dist *vgic = &kvm->arch.vgic;
-	int type_needed;
 	phys_addr_t *addr_ptr, alignment;
 
 	mutex_lock(&kvm->lock);
 	switch (type) {
 	case KVM_VGIC_V2_ADDR_TYPE_DIST:
-		type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
+		r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
 		addr_ptr = &vgic->vgic_dist_base;
 		alignment = SZ_4K;
 		break;
 	case KVM_VGIC_V2_ADDR_TYPE_CPU:
-		type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
+		r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
 		addr_ptr = &vgic->vgic_cpu_base;
 		alignment = SZ_4K;
 		break;
 	case KVM_VGIC_V3_ADDR_TYPE_DIST:
-		type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
+		r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
 		addr_ptr = &vgic->vgic_dist_base;
 		alignment = SZ_64K;
 		break;
 	case KVM_VGIC_V3_ADDR_TYPE_REDIST:
-		type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
+		r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
+		if (r)
+			break;
+		if (write) {
+			r = vgic_v3_set_redist_base(kvm, *addr);
+			goto out;
+		}
 		addr_ptr = &vgic->vgic_redist_base;
-		alignment = SZ_64K;
 		break;
 	default:
 		r = -ENODEV;
-		goto out;
 	}
 
-	if (vgic->vgic_model != type_needed) {
-		r = -ENODEV;
+	if (r)
 		goto out;
-	}
 
 	if (write) {
 		r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment);
@@ -259,13 +268,13 @@ static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
 	}
 }
 
-static void unlock_all_vcpus(struct kvm *kvm)
+void unlock_all_vcpus(struct kvm *kvm)
 {
 	unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
 }
 
 /* Returns true if all vcpus were locked, false otherwise */
-static bool lock_all_vcpus(struct kvm *kvm)
+bool lock_all_vcpus(struct kvm *kvm)
 {
 	struct kvm_vcpu *tmp_vcpu;
 	int c;
@@ -580,6 +589,24 @@ static int vgic_v3_set_attr(struct kvm_device *dev,
 		reg = tmp32;
 		return vgic_v3_attr_regs_access(dev, attr, &reg, true);
 	}
+	case KVM_DEV_ARM_VGIC_GRP_CTRL: {
+		int ret;
+
+		switch (attr->attr) {
+		case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES:
+			mutex_lock(&dev->kvm->lock);
+
+			if (!lock_all_vcpus(dev->kvm)) {
+				mutex_unlock(&dev->kvm->lock);
+				return -EBUSY;
+			}
+			ret = vgic_v3_save_pending_tables(dev->kvm);
+			unlock_all_vcpus(dev->kvm);
+			mutex_unlock(&dev->kvm->lock);
+			return ret;
+		}
+		break;
+	}
 	}
 	return -ENXIO;
 }
@@ -658,6 +685,8 @@ static int vgic_v3_has_attr(struct kvm_device *dev,
 		switch (attr->attr) {
 		case KVM_DEV_ARM_VGIC_CTRL_INIT:
 			return 0;
+		case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES:
+			return 0;
 		}
 	}
 	return -ENXIO;
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index 0a4283ed9aa7..63e0bbdcddcc 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -226,7 +226,13 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
 
 	switch (addr & 0xff) {
 	case GIC_CPU_CTRL:
-		val = vmcr.ctlr;
+		val = vmcr.grpen0 << GIC_CPU_CTRL_EnableGrp0_SHIFT;
+		val |= vmcr.grpen1 << GIC_CPU_CTRL_EnableGrp1_SHIFT;
+		val |= vmcr.ackctl << GIC_CPU_CTRL_AckCtl_SHIFT;
+		val |= vmcr.fiqen << GIC_CPU_CTRL_FIQEn_SHIFT;
+		val |= vmcr.cbpr << GIC_CPU_CTRL_CBPR_SHIFT;
+		val |= vmcr.eoim << GIC_CPU_CTRL_EOImodeNS_SHIFT;
+
 		break;
 	case GIC_CPU_PRIMASK:
 		/*
@@ -267,7 +273,13 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
 
 	switch (addr & 0xff) {
 	case GIC_CPU_CTRL:
-		vmcr.ctlr = val;
+		vmcr.grpen0 = !!(val & GIC_CPU_CTRL_EnableGrp0);
+		vmcr.grpen1 = !!(val & GIC_CPU_CTRL_EnableGrp1);
+		vmcr.ackctl = !!(val & GIC_CPU_CTRL_AckCtl);
+		vmcr.fiqen = !!(val & GIC_CPU_CTRL_FIQEn);
+		vmcr.cbpr = !!(val & GIC_CPU_CTRL_CBPR);
+		vmcr.eoim = !!(val & GIC_CPU_CTRL_EOImodeNS);
+
 		break;
 	case GIC_CPU_PRIMASK:
 		/*
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 6afb3b484886..201d5e2e973d 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -556,67 +556,136 @@ unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev)
 	return SZ_64K;
 }
 
-int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address)
+/**
+ * vgic_register_redist_iodev - register a single redist iodev
+ * @vcpu:    The VCPU to which the redistributor belongs
+ *
+ * Register a KVM iodev for this VCPU's redistributor using the address
+ * provided.
+ *
+ * Return 0 on success, -ERRNO otherwise.
+ */
+int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
 {
-	struct kvm_vcpu *vcpu;
-	int c, ret = 0;
+	struct kvm *kvm = vcpu->kvm;
+	struct vgic_dist *vgic = &kvm->arch.vgic;
+	struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
+	struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
+	gpa_t rd_base, sgi_base;
+	int ret;
 
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		gpa_t rd_base = redist_base_address + c * SZ_64K * 2;
-		gpa_t sgi_base = rd_base + SZ_64K;
-		struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
-		struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
-
-		kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
-		rd_dev->base_addr = rd_base;
-		rd_dev->iodev_type = IODEV_REDIST;
-		rd_dev->regions = vgic_v3_rdbase_registers;
-		rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
-		rd_dev->redist_vcpu = vcpu;
+	/*
+	 * We may be creating VCPUs before having set the base address for the
+	 * redistributor region, in which case we will come back to this
+	 * function for all VCPUs when the base address is set.  Just return
+	 * without doing any work for now.
+	 */
+	if (IS_VGIC_ADDR_UNDEF(vgic->vgic_redist_base))
+		return 0;
 
-		mutex_lock(&kvm->slots_lock);
-		ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
-					      SZ_64K, &rd_dev->dev);
-		mutex_unlock(&kvm->slots_lock);
+	if (!vgic_v3_check_base(kvm))
+		return -EINVAL;
 
-		if (ret)
-			break;
+	rd_base = vgic->vgic_redist_base + vgic->vgic_redist_free_offset;
+	sgi_base = rd_base + SZ_64K;
 
-		kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
-		sgi_dev->base_addr = sgi_base;
-		sgi_dev->iodev_type = IODEV_REDIST;
-		sgi_dev->regions = vgic_v3_sgibase_registers;
-		sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
-		sgi_dev->redist_vcpu = vcpu;
+	kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
+	rd_dev->base_addr = rd_base;
+	rd_dev->iodev_type = IODEV_REDIST;
+	rd_dev->regions = vgic_v3_rdbase_registers;
+	rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
+	rd_dev->redist_vcpu = vcpu;
 
-		mutex_lock(&kvm->slots_lock);
-		ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
-					      SZ_64K, &sgi_dev->dev);
-		mutex_unlock(&kvm->slots_lock);
-		if (ret) {
-			kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
-						  &rd_dev->dev);
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
+				      SZ_64K, &rd_dev->dev);
+	mutex_unlock(&kvm->slots_lock);
+
+	if (ret)
+		return ret;
+
+	kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
+	sgi_dev->base_addr = sgi_base;
+	sgi_dev->iodev_type = IODEV_REDIST;
+	sgi_dev->regions = vgic_v3_sgibase_registers;
+	sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
+	sgi_dev->redist_vcpu = vcpu;
+
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
+				      SZ_64K, &sgi_dev->dev);
+	if (ret) {
+		kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+					  &rd_dev->dev);
+		goto out;
+	}
+
+	vgic->vgic_redist_free_offset += 2 * SZ_64K;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return ret;
+}
+
+static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
+{
+	struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
+	struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
+
+	kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev);
+	kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &sgi_dev->dev);
+}
+
+static int vgic_register_all_redist_iodevs(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu;
+	int c, ret = 0;
+
+	kvm_for_each_vcpu(c, vcpu, kvm) {
+		ret = vgic_register_redist_iodev(vcpu);
+		if (ret)
 			break;
-		}
 	}
 
 	if (ret) {
 		/* The current c failed, so we start with the previous one. */
+		mutex_lock(&kvm->slots_lock);
 		for (c--; c >= 0; c--) {
-			struct vgic_cpu *vgic_cpu;
-
 			vcpu = kvm_get_vcpu(kvm, c);
-			vgic_cpu = &vcpu->arch.vgic_cpu;
-			kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
-						  &vgic_cpu->rd_iodev.dev);
-			kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
-						  &vgic_cpu->sgi_iodev.dev);
+			vgic_unregister_redist_iodev(vcpu);
 		}
+		mutex_unlock(&kvm->slots_lock);
 	}
 
 	return ret;
 }
 
+int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr)
+{
+	struct vgic_dist *vgic = &kvm->arch.vgic;
+	int ret;
+
+	/* vgic_check_ioaddr makes sure we don't do this twice */
+	ret = vgic_check_ioaddr(kvm, &vgic->vgic_redist_base, addr, SZ_64K);
+	if (ret)
+		return ret;
+
+	vgic->vgic_redist_base = addr;
+	if (!vgic_v3_check_base(kvm)) {
+		vgic->vgic_redist_base = VGIC_ADDR_UNDEF;
+		return -EINVAL;
+	}
+
+	/*
+	 * Register iodevs for each existing VCPU.  Adding more VCPUs
+	 * afterwards will register the iodevs when needed.
+	 */
+	ret = vgic_register_all_redist_iodevs(kvm);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
 	const struct vgic_register_region *region;
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 2a5db1352722..1c17b2a2f105 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -446,13 +446,12 @@ static int match_region(const void *key, const void *elt)
 	return 0;
 }
 
-/* Find the proper register handler entry given a certain address offset. */
-static const struct vgic_register_region *
-vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions,
-		      unsigned int offset)
+const struct vgic_register_region *
+vgic_find_mmio_region(const struct vgic_register_region *regions,
+		      int nr_regions, unsigned int offset)
 {
-	return bsearch((void *)(uintptr_t)offset, region, nr_regions,
-		       sizeof(region[0]), match_region);
+	return bsearch((void *)(uintptr_t)offset, regions, nr_regions,
+		       sizeof(regions[0]), match_region);
 }
 
 void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
index 98bb566b660a..ea4171acdef3 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -36,8 +36,13 @@ struct vgic_register_region {
 	};
 	unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr,
 				      unsigned int len);
-	void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
-			      unsigned int len, unsigned long val);
+	union {
+		void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
+				      unsigned int len, unsigned long val);
+		int (*uaccess_its_write)(struct kvm *kvm, struct vgic_its *its,
+					 gpa_t addr, unsigned int len,
+					 unsigned long val);
+	};
 };
 
 extern struct kvm_io_device_ops kvm_io_gic_ops;
@@ -192,4 +197,9 @@ u64 vgic_sanitise_shareability(u64 reg);
 u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift,
 			u64 (*sanitise_fn)(u64));
 
+/* Find the proper register handler entry given a certain address offset */
+const struct vgic_register_region *
+vgic_find_mmio_region(const struct vgic_register_region *regions,
+		      int nr_regions, unsigned int offset);
+
 #endif
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index a65757aab6d3..e4187e52bb26 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -149,6 +149,13 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 	if (irq->hw) {
 		val |= GICH_LR_HW;
 		val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
+		/*
+		 * Never set pending+active on a HW interrupt, as the
+		 * pending state is kept at the physical distributor
+		 * level.
+		 */
+		if (irq->active && irq_is_pending(irq))
+			val &= ~GICH_LR_PENDING_BIT;
 	} else {
 		if (irq->config == VGIC_CONFIG_LEVEL)
 			val |= GICH_LR_EOI;
@@ -170,7 +177,18 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
 	u32 vmcr;
 
-	vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
+	vmcr = (vmcrp->grpen0 << GICH_VMCR_ENABLE_GRP0_SHIFT) &
+		GICH_VMCR_ENABLE_GRP0_MASK;
+	vmcr |= (vmcrp->grpen1 << GICH_VMCR_ENABLE_GRP1_SHIFT) &
+		GICH_VMCR_ENABLE_GRP1_MASK;
+	vmcr |= (vmcrp->ackctl << GICH_VMCR_ACK_CTL_SHIFT) &
+		GICH_VMCR_ACK_CTL_MASK;
+	vmcr |= (vmcrp->fiqen << GICH_VMCR_FIQ_EN_SHIFT) &
+		GICH_VMCR_FIQ_EN_MASK;
+	vmcr |= (vmcrp->cbpr << GICH_VMCR_CBPR_SHIFT) &
+		GICH_VMCR_CBPR_MASK;
+	vmcr |= (vmcrp->eoim << GICH_VMCR_EOI_MODE_SHIFT) &
+		GICH_VMCR_EOI_MODE_MASK;
 	vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) &
 		GICH_VMCR_ALIAS_BINPOINT_MASK;
 	vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
@@ -188,8 +206,19 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 
 	vmcr = cpu_if->vgic_vmcr;
 
-	vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >>
-			GICH_VMCR_CTRL_SHIFT;
+	vmcrp->grpen0 = (vmcr & GICH_VMCR_ENABLE_GRP0_MASK) >>
+		GICH_VMCR_ENABLE_GRP0_SHIFT;
+	vmcrp->grpen1 = (vmcr & GICH_VMCR_ENABLE_GRP1_MASK) >>
+		GICH_VMCR_ENABLE_GRP1_SHIFT;
+	vmcrp->ackctl = (vmcr & GICH_VMCR_ACK_CTL_MASK) >>
+		GICH_VMCR_ACK_CTL_SHIFT;
+	vmcrp->fiqen = (vmcr & GICH_VMCR_FIQ_EN_MASK) >>
+		GICH_VMCR_FIQ_EN_SHIFT;
+	vmcrp->cbpr = (vmcr & GICH_VMCR_CBPR_MASK) >>
+		GICH_VMCR_CBPR_SHIFT;
+	vmcrp->eoim = (vmcr & GICH_VMCR_EOI_MODE_MASK) >>
+		GICH_VMCR_EOI_MODE_SHIFT;
+
 	vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >>
 			GICH_VMCR_ALIAS_BINPOINT_SHIFT;
 	vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >>
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index df1503650300..030248e669f6 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -127,6 +127,13 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 	if (irq->hw) {
 		val |= ICH_LR_HW;
 		val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
+		/*
+		 * Never set pending+active on a HW interrupt, as the
+		 * pending state is kept at the physical distributor
+		 * level.
+		 */
+		if (irq->active && irq_is_pending(irq))
+			val &= ~ICH_LR_PENDING_BIT;
 	} else {
 		if (irq->config == VGIC_CONFIG_LEVEL)
 			val |= ICH_LR_EOI;
@@ -152,15 +159,24 @@ void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 {
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u32 model = vcpu->kvm->arch.vgic.vgic_model;
 	u32 vmcr;
 
-	/*
-	 * Ignore the FIQen bit, because GIC emulation always implies
-	 * SRE=1 which means the vFIQEn bit is also RES1.
-	 */
-	vmcr = ((vmcrp->ctlr >> ICC_CTLR_EL1_EOImode_SHIFT) <<
-		 ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK;
-	vmcr |= (vmcrp->ctlr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK;
+	if (model == KVM_DEV_TYPE_ARM_VGIC_V2) {
+		vmcr = (vmcrp->ackctl << ICH_VMCR_ACK_CTL_SHIFT) &
+			ICH_VMCR_ACK_CTL_MASK;
+		vmcr |= (vmcrp->fiqen << ICH_VMCR_FIQ_EN_SHIFT) &
+			ICH_VMCR_FIQ_EN_MASK;
+	} else {
+		/*
+		 * When emulating GICv3 on GICv3 with SRE=1 on the
+		 * VFIQEn bit is RES1 and the VAckCtl bit is RES0.
+		 */
+		vmcr = ICH_VMCR_FIQ_EN_MASK;
+	}
+
+	vmcr |= (vmcrp->cbpr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK;
+	vmcr |= (vmcrp->eoim << ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK;
 	vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
 	vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
 	vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
@@ -173,17 +189,27 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 {
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u32 model = vcpu->kvm->arch.vgic.vgic_model;
 	u32 vmcr;
 
 	vmcr = cpu_if->vgic_vmcr;
 
-	/*
-	 * Ignore the FIQen bit, because GIC emulation always implies
-	 * SRE=1 which means the vFIQEn bit is also RES1.
-	 */
-	vmcrp->ctlr = ((vmcr >> ICH_VMCR_EOIM_SHIFT) <<
-			ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK;
-	vmcrp->ctlr |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT;
+	if (model == KVM_DEV_TYPE_ARM_VGIC_V2) {
+		vmcrp->ackctl = (vmcr & ICH_VMCR_ACK_CTL_MASK) >>
+			ICH_VMCR_ACK_CTL_SHIFT;
+		vmcrp->fiqen = (vmcr & ICH_VMCR_FIQ_EN_MASK) >>
+			ICH_VMCR_FIQ_EN_SHIFT;
+	} else {
+		/*
+		 * When emulating GICv3 on GICv3 with SRE=1 on the
+		 * VFIQEn bit is RES1 and the VAckCtl bit is RES0.
+		 */
+		vmcrp->fiqen = 1;
+		vmcrp->ackctl = 0;
+	}
+
+	vmcrp->cbpr = (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT;
+	vmcrp->eoim = (vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT;
 	vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
 	vmcrp->bpr  = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
 	vmcrp->pmr  = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
@@ -234,19 +260,125 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
 	vgic_v3->vgic_hcr = ICH_HCR_EN;
 }
 
-/* check for overlapping regions and for regions crossing the end of memory */
-static bool vgic_v3_check_base(struct kvm *kvm)
+int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
+{
+	struct kvm_vcpu *vcpu;
+	int byte_offset, bit_nr;
+	gpa_t pendbase, ptr;
+	bool status;
+	u8 val;
+	int ret;
+
+retry:
+	vcpu = irq->target_vcpu;
+	if (!vcpu)
+		return 0;
+
+	pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
+
+	byte_offset = irq->intid / BITS_PER_BYTE;
+	bit_nr = irq->intid % BITS_PER_BYTE;
+	ptr = pendbase + byte_offset;
+
+	ret = kvm_read_guest(kvm, ptr, &val, 1);
+	if (ret)
+		return ret;
+
+	status = val & (1 << bit_nr);
+
+	spin_lock(&irq->irq_lock);
+	if (irq->target_vcpu != vcpu) {
+		spin_unlock(&irq->irq_lock);
+		goto retry;
+	}
+	irq->pending_latch = status;
+	vgic_queue_irq_unlock(vcpu->kvm, irq);
+
+	if (status) {
+		/* clear consumed data */
+		val &= ~(1 << bit_nr);
+		ret = kvm_write_guest(kvm, ptr, &val, 1);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+/**
+ * vgic_its_save_pending_tables - Save the pending tables into guest RAM
+ * kvm lock and all vcpu lock must be held
+ */
+int vgic_v3_save_pending_tables(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	int last_byte_offset = -1;
+	struct vgic_irq *irq;
+	int ret;
+
+	list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
+		int byte_offset, bit_nr;
+		struct kvm_vcpu *vcpu;
+		gpa_t pendbase, ptr;
+		bool stored;
+		u8 val;
+
+		vcpu = irq->target_vcpu;
+		if (!vcpu)
+			continue;
+
+		pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
+
+		byte_offset = irq->intid / BITS_PER_BYTE;
+		bit_nr = irq->intid % BITS_PER_BYTE;
+		ptr = pendbase + byte_offset;
+
+		if (byte_offset != last_byte_offset) {
+			ret = kvm_read_guest(kvm, ptr, &val, 1);
+			if (ret)
+				return ret;
+			last_byte_offset = byte_offset;
+		}
+
+		stored = val & (1U << bit_nr);
+		if (stored == irq->pending_latch)
+			continue;
+
+		if (irq->pending_latch)
+			val |= 1 << bit_nr;
+		else
+			val &= ~(1 << bit_nr);
+
+		ret = kvm_write_guest(kvm, ptr, &val, 1);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+/*
+ * Check for overlapping regions and for regions crossing the end of memory
+ * for base addresses which have already been set.
+ */
+bool vgic_v3_check_base(struct kvm *kvm)
 {
 	struct vgic_dist *d = &kvm->arch.vgic;
 	gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE;
 
 	redist_size *= atomic_read(&kvm->online_vcpus);
 
-	if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base)
+	if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) &&
+	    d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base)
 		return false;
-	if (d->vgic_redist_base + redist_size < d->vgic_redist_base)
+
+	if (!IS_VGIC_ADDR_UNDEF(d->vgic_redist_base) &&
+	    d->vgic_redist_base + redist_size < d->vgic_redist_base)
 		return false;
 
+	/* Both base addresses must be set to check if they overlap */
+	if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) ||
+	    IS_VGIC_ADDR_UNDEF(d->vgic_redist_base))
+		return true;
+
 	if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base)
 		return true;
 	if (d->vgic_redist_base + redist_size <= d->vgic_dist_base)
@@ -291,20 +423,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
 		goto out;
 	}
 
-	ret = vgic_register_redist_iodevs(kvm, dist->vgic_redist_base);
-	if (ret) {
-		kvm_err("Unable to register VGICv3 redist MMIO regions\n");
-		goto out;
-	}
-
-	if (vgic_has_its(kvm)) {
-		ret = vgic_register_its_iodevs(kvm);
-		if (ret) {
-			kvm_err("Unable to register VGIC ITS MMIO regions\n");
-			goto out;
-		}
-	}
-
 	dist->ready = true;
 
 out:
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 4346bc7d08dc..83b24d20ff8f 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -21,7 +21,7 @@
 #include "vgic.h"
 
 #define CREATE_TRACE_POINTS
-#include "../trace.h"
+#include "trace.h"
 
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 799fd651b260..bba7fa22a7f7 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -73,6 +73,29 @@
 				      KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \
 				      KVM_REG_ARM_VGIC_SYSREG_OP2_MASK)
 
+/*
+ * As per Documentation/virtual/kvm/devices/arm-vgic-its.txt,
+ * below macros are defined for ITS table entry encoding.
+ */
+#define KVM_ITS_CTE_VALID_SHIFT		63
+#define KVM_ITS_CTE_VALID_MASK		BIT_ULL(63)
+#define KVM_ITS_CTE_RDBASE_SHIFT	16
+#define KVM_ITS_CTE_ICID_MASK		GENMASK_ULL(15, 0)
+#define KVM_ITS_ITE_NEXT_SHIFT		48
+#define KVM_ITS_ITE_PINTID_SHIFT	16
+#define KVM_ITS_ITE_PINTID_MASK		GENMASK_ULL(47, 16)
+#define KVM_ITS_ITE_ICID_MASK		GENMASK_ULL(15, 0)
+#define KVM_ITS_DTE_VALID_SHIFT		63
+#define KVM_ITS_DTE_VALID_MASK		BIT_ULL(63)
+#define KVM_ITS_DTE_NEXT_SHIFT		49
+#define KVM_ITS_DTE_NEXT_MASK		GENMASK_ULL(62, 49)
+#define KVM_ITS_DTE_ITTADDR_SHIFT	5
+#define KVM_ITS_DTE_ITTADDR_MASK	GENMASK_ULL(48, 5)
+#define KVM_ITS_DTE_SIZE_MASK		GENMASK_ULL(4, 0)
+#define KVM_ITS_L1E_VALID_MASK		BIT_ULL(63)
+/* we only support 64 kB translation table page size */
+#define KVM_ITS_L1E_ADDR_MASK		GENMASK_ULL(51, 16)
+
 static inline bool irq_is_pending(struct vgic_irq *irq)
 {
 	if (irq->config == VGIC_CONFIG_EDGE)
@@ -88,14 +111,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
  * registers regardless of the hardware backed GIC used.
  */
 struct vgic_vmcr {
-	u32	ctlr;
+	u32	grpen0;
+	u32	grpen1;
+
+	u32	ackctl;
+	u32	fiqen;
+	u32	cbpr;
+	u32	eoim;
+
 	u32	abpr;
 	u32	bpr;
 	u32	pmr;  /* Priority mask field in the GICC_PMR and
 		       * ICC_PMR_EL1 priority field format */
-	/* Below member variable are valid only for GICv3 */
-	u32	grpen0;
-	u32	grpen1;
 };
 
 struct vgic_reg_attr {
@@ -157,12 +184,15 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v3_enable(struct kvm_vcpu *vcpu);
 int vgic_v3_probe(const struct gic_kvm_info *info);
 int vgic_v3_map_resources(struct kvm *kvm);
-int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
+int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
+int vgic_v3_save_pending_tables(struct kvm *kvm);
+int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr);
+int vgic_register_redist_iodev(struct kvm_vcpu *vcpu);
+bool vgic_v3_check_base(struct kvm *kvm);
 
 void vgic_v3_load(struct kvm_vcpu *vcpu);
 void vgic_v3_put(struct kvm_vcpu *vcpu);
 
-int vgic_register_its_iodevs(struct kvm *kvm);
 bool vgic_has_its(struct kvm *kvm);
 int kvm_vgic_register_its_device(void);
 void vgic_enable_lpis(struct kvm_vcpu *vcpu);
@@ -187,4 +217,7 @@ int vgic_init(struct kvm *kvm);
 int vgic_debug_init(struct kvm *kvm);
 int vgic_debug_destroy(struct kvm *kvm);
 
+bool lock_all_vcpus(struct kvm *kvm);
+void unlock_all_vcpus(struct kvm *kvm);
+
 #endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a8d540398bbd..9120edf3c94b 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -184,7 +184,7 @@ int __attribute__((weak)) kvm_arch_set_irq_inatomic(
  * Called with wqh->lock held and interrupts disabled
  */
 static int
-irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
+irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct kvm_kernel_irqfd *irqfd =
 		container_of(wait, struct kvm_kernel_irqfd, wait);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b3d151ee2a67..f0fe9d02f6bb 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2836,10 +2836,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 	[KVM_DEV_TYPE_FSL_MPIC_20]	= &kvm_mpic_ops,
 	[KVM_DEV_TYPE_FSL_MPIC_42]	= &kvm_mpic_ops,
 #endif
-
-#ifdef CONFIG_KVM_XICS
-	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
-#endif
 };
 
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
@@ -3715,7 +3711,7 @@ static const struct file_operations vm_stat_get_per_vm_fops = {
 	.release = kvm_debugfs_release,
 	.read    = simple_attr_read,
 	.write   = simple_attr_write,
-	.llseek  = generic_file_llseek,
+	.llseek  = no_llseek,
 };
 
 static int vcpu_stat_get_per_vm(void *data, u64 *val)
@@ -3760,7 +3756,7 @@ static const struct file_operations vcpu_stat_get_per_vm_fops = {
 	.release = kvm_debugfs_release,
 	.read    = simple_attr_read,
 	.write   = simple_attr_write,
-	.llseek  = generic_file_llseek,
+	.llseek  = no_llseek,
 };
 
 static const struct file_operations *stat_fops_per_vm[] = {